Add new attributes to help auto-vectorization

author: Jukka Ojanen <jukka.ojanen@linkotec.net> 2015-07-06 12:08:32 +0300
committer: Jukka Ojanen <jukka.ojanen@linkotec.net> 2015-07-06 12:08:32 +0300
commit: fbcfb21e9de85b6443848c721523d3793ae668ff (patch)
tree: ed8666765ee25a2dd6dbbf1783374c8b2ff36e1f
parent: ceb8e6aef7f0e406ff4724896a8138bf72911a68 (diff)
download: ffts-fbcfb21e9de85b6443848c721523d3793ae668ff.zip
ffts-fbcfb21e9de85b6443848c721523d3793ae668ff.tar.gz
2 files changed, 54 insertions, 19 deletions
diff --git a/src/ffts_attributes.h b/src/ffts_attributes.h
index 6ac2ac3..763a6af 100644
--- a/src/ffts_attributes.h
+++ b/src/ffts_attributes.h
@@ -68,10 +68,32 @@
 #define FFTS_INLINE inline
 #endif
 
-#if defined(_MSC_VER)
+#if defined(__GNUC__)
+#define FFTS_RESTRICT __restrict
+#elif defined(_MSC_VER)
+#define FFTS_RESTRICT __restrict
+#else
 #define FFTS_RESTRICT
+#endif
+
+#if GCC_VERSION_AT_LEAST(4,5)
+#define FFTS_ASSUME(cond) do { if (!(cond)) __builtin_unreachable(); } while (0)
+#elif defined(_MSC_VER)
+#define FFTS_ASSUME(cond) __assume(cond)
 #else
-#define FFTS_RESTRICT __restrict
+#define FFTS_ASSUME(cond)
+#endif
+
+#if GCC_VERSION_AT_LEAST(4,7)
+#define FFTS_ASSUME_ALIGNED_16(x) __builtin_assume_aligned(x, 16)
+#else
+#define FFTS_ASSUME_ALIGNED_16(x) x
+#endif
+
+#if GCC_VERSION_AT_LEAST(4,7)
+#define FFTS_ASSUME_ALIGNED_32(x) __builtin_assume_aligned(x, 32)
+#else
+#define FFTS_ASSUME_ALIGNED_32(x) x
 #endif
 
 #endif /* FFTS_ATTRIBUTES_H */
diff --git a/src/ffts_real.c b/src/ffts_real.c
index 5522f6b..82a9e79 100644
--- a/src/ffts_real.c
+++ b/src/ffts_real.c
@@ -63,13 +63,19 @@ ffts_free_1d_real(ffts_plan_t *p)
     free(p);
 }
 
-static void ffts_execute_1d_real(ffts_plan_t *p, const void *vin, void *vout)
+static void
+ffts_execute_1d_real(ffts_plan_t *p, const void *input, void *output)
 {
-    float *out = (float*) vout;
-    float *buf = (float*) p->buf;
-    float *A = p->A;
-    float *B = p->B;
-    size_t N = p->N;
+    float *const FFTS_RESTRICT out =
+        (float *const FFTS_RESTRICT) FFTS_ASSUME_ALIGNED_16(output);
+    float *const FFTS_RESTRICT buf =
+        (float *const FFTS_RESTRICT) FFTS_ASSUME_ALIGNED_32(p->buf);
+    const float *const FFTS_RESTRICT A =
+        (const float *const FFTS_RESTRICT) FFTS_ASSUME_ALIGNED_32(p->A);
+    const float *const FFTS_RESTRICT B =
+        (const float *const FFTS_RESTRICT) FFTS_ASSUME_ALIGNED_32(p->B);
+    const int N = (const int) p->N;
+    int i;
 
 #ifdef __ARM_NEON__
     float *p_buf0 = buf;
@@ -77,9 +83,10 @@ static void ffts_execute_1d_real(ffts_plan_t *p, const void *vin, void *vout)
     float *p_out = out;
 #endif
 
-    size_t i;
+    /* we know this */
+    FFTS_ASSUME(N/2 > 0);
 
-    p->plans[0]->transform(p->plans[0], vin, buf);
+    p->plans[0]->transform(p->plans[0], input, buf);
 
     buf[N + 0] = buf[0];
     buf[N + 1] = buf[1];
@@ -138,14 +145,19 @@ static void ffts_execute_1d_real(ffts_plan_t *p, const void *vin, void *vout)
     out[N + 1] = 0.0f;
 }
 
-static void ffts_execute_1d_real_inv(ffts_plan_t *p, const void *vin, void *vout)
+static void
+ffts_execute_1d_real_inv(ffts_plan_t *p, const void *input, void *output)
 {
-    float *out = (float*) vout;
-    float *in = (float*) vin;
-    float *buf = (float*) p->buf;
-    float *A = p->A;
-    float *B = p->B;
-    size_t N = p->N;
+    float *const FFTS_RESTRICT in =
+        (float *const FFTS_RESTRICT) FFTS_ASSUME_ALIGNED_16(input);
+    float *const FFTS_RESTRICT buf =
+        (float *const FFTS_RESTRICT) FFTS_ASSUME_ALIGNED_32(p->buf);
+    const float *const FFTS_RESTRICT A =
+        (const float *const FFTS_RESTRICT) FFTS_ASSUME_ALIGNED_32(p->A);
+    const float *const FFTS_RESTRICT B =
+        (const float *const FFTS_RESTRICT) FFTS_ASSUME_ALIGNED_32(p->B);
+    const int N = (const int) p->N;
+    int i;
 
 #ifdef __ARM_NEON__
     float *p_buf0 = in;
@@ -153,7 +165,8 @@ static void ffts_execute_1d_real_inv(ffts_plan_t *p, const void *vin, void *vout
     float *p_out = buf;
 #endif
 
-    size_t i;
+    /* we know this */
+    FFTS_ASSUME(N/2 > 0);
 
 #ifdef __ARM_NEON__
     for (i = 0; i < N/2; i += 2) {
@@ -205,7 +218,7 @@ static void ffts_execute_1d_real_inv(ffts_plan_t *p, const void *vin, void *vout
     }
 #endif
 
-    p->plans[0]->transform(p->plans[0], buf, out);
+    p->plans[0]->transform(p->plans[0], buf, output);
 }
 
 ffts_plan_t*
author	Jukka Ojanen <jukka.ojanen@linkotec.net>	2015-07-06 12:08:32 +0300
committer	Jukka Ojanen <jukka.ojanen@linkotec.net>	2015-07-06 12:08:32 +0300
commit	fbcfb21e9de85b6443848c721523d3793ae668ff (patch)
tree	ed8666765ee25a2dd6dbbf1783374c8b2ff36e1f
parent	ceb8e6aef7f0e406ff4724896a8138bf72911a68 (diff)
download	ffts-fbcfb21e9de85b6443848c721523d3793ae668ff.zip ffts-fbcfb21e9de85b6443848c721523d3793ae668ff.tar.gz