summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJukka Ojanen <jukka.ojanen@linkotec.net>2015-07-06 12:08:32 +0300
committerJukka Ojanen <jukka.ojanen@linkotec.net>2015-07-06 12:08:32 +0300
commitfbcfb21e9de85b6443848c721523d3793ae668ff (patch)
treeed8666765ee25a2dd6dbbf1783374c8b2ff36e1f
parentceb8e6aef7f0e406ff4724896a8138bf72911a68 (diff)
downloadffts-fbcfb21e9de85b6443848c721523d3793ae668ff.zip
ffts-fbcfb21e9de85b6443848c721523d3793ae668ff.tar.gz
Add new attributes to help auto-vectorization
-rw-r--r--src/ffts_attributes.h26
-rw-r--r--src/ffts_real.c47
2 files changed, 54 insertions, 19 deletions
diff --git a/src/ffts_attributes.h b/src/ffts_attributes.h
index 6ac2ac3..763a6af 100644
--- a/src/ffts_attributes.h
+++ b/src/ffts_attributes.h
@@ -68,10 +68,32 @@
#define FFTS_INLINE inline
#endif
-#if defined(_MSC_VER)
+#if defined(__GNUC__)
+#define FFTS_RESTRICT __restrict
+#elif defined(_MSC_VER)
+#define FFTS_RESTRICT __restrict
+#else
#define FFTS_RESTRICT
+#endif
+
+#if GCC_VERSION_AT_LEAST(4,5)
+#define FFTS_ASSUME(cond) do { if (!(cond)) __builtin_unreachable(); } while (0)
+#elif defined(_MSC_VER)
+#define FFTS_ASSUME(cond) __assume(cond)
#else
-#define FFTS_RESTRICT __restrict
+#define FFTS_ASSUME(cond)
+#endif
+
+#if GCC_VERSION_AT_LEAST(4,7)
+#define FFTS_ASSUME_ALIGNED_16(x) __builtin_assume_aligned(x, 16)
+#else
+#define FFTS_ASSUME_ALIGNED_16(x) x
+#endif
+
+#if GCC_VERSION_AT_LEAST(4,7)
+#define FFTS_ASSUME_ALIGNED_32(x) __builtin_assume_aligned(x, 32)
+#else
+#define FFTS_ASSUME_ALIGNED_32(x) x
#endif
#endif /* FFTS_ATTRIBUTES_H */
diff --git a/src/ffts_real.c b/src/ffts_real.c
index 5522f6b..82a9e79 100644
--- a/src/ffts_real.c
+++ b/src/ffts_real.c
@@ -63,13 +63,19 @@ ffts_free_1d_real(ffts_plan_t *p)
free(p);
}
-static void ffts_execute_1d_real(ffts_plan_t *p, const void *vin, void *vout)
+static void
+ffts_execute_1d_real(ffts_plan_t *p, const void *input, void *output)
{
- float *out = (float*) vout;
- float *buf = (float*) p->buf;
- float *A = p->A;
- float *B = p->B;
- size_t N = p->N;
+ float *const FFTS_RESTRICT out =
+ (float *const FFTS_RESTRICT) FFTS_ASSUME_ALIGNED_16(output);
+ float *const FFTS_RESTRICT buf =
+ (float *const FFTS_RESTRICT) FFTS_ASSUME_ALIGNED_32(p->buf);
+ const float *const FFTS_RESTRICT A =
+ (const float *const FFTS_RESTRICT) FFTS_ASSUME_ALIGNED_32(p->A);
+ const float *const FFTS_RESTRICT B =
+ (const float *const FFTS_RESTRICT) FFTS_ASSUME_ALIGNED_32(p->B);
+ const int N = (const int) p->N;
+ int i;
#ifdef __ARM_NEON__
float *p_buf0 = buf;
@@ -77,9 +83,10 @@ static void ffts_execute_1d_real(ffts_plan_t *p, const void *vin, void *vout)
float *p_out = out;
#endif
- size_t i;
+ /* we know this */
+ FFTS_ASSUME(N/2 > 0);
- p->plans[0]->transform(p->plans[0], vin, buf);
+ p->plans[0]->transform(p->plans[0], input, buf);
buf[N + 0] = buf[0];
buf[N + 1] = buf[1];
@@ -138,14 +145,19 @@ static void ffts_execute_1d_real(ffts_plan_t *p, const void *vin, void *vout)
out[N + 1] = 0.0f;
}
-static void ffts_execute_1d_real_inv(ffts_plan_t *p, const void *vin, void *vout)
+static void
+ffts_execute_1d_real_inv(ffts_plan_t *p, const void *input, void *output)
{
- float *out = (float*) vout;
- float *in = (float*) vin;
- float *buf = (float*) p->buf;
- float *A = p->A;
- float *B = p->B;
- size_t N = p->N;
+ float *const FFTS_RESTRICT in =
+ (float *const FFTS_RESTRICT) FFTS_ASSUME_ALIGNED_16(input);
+ float *const FFTS_RESTRICT buf =
+ (float *const FFTS_RESTRICT) FFTS_ASSUME_ALIGNED_32(p->buf);
+ const float *const FFTS_RESTRICT A =
+ (const float *const FFTS_RESTRICT) FFTS_ASSUME_ALIGNED_32(p->A);
+ const float *const FFTS_RESTRICT B =
+ (const float *const FFTS_RESTRICT) FFTS_ASSUME_ALIGNED_32(p->B);
+ const int N = (const int) p->N;
+ int i;
#ifdef __ARM_NEON__
float *p_buf0 = in;
@@ -153,7 +165,8 @@ static void ffts_execute_1d_real_inv(ffts_plan_t *p, const void *vin, void *vout
float *p_out = buf;
#endif
- size_t i;
+ /* we know this */
+ FFTS_ASSUME(N/2 > 0);
#ifdef __ARM_NEON__
for (i = 0; i < N/2; i += 2) {
@@ -205,7 +218,7 @@ static void ffts_execute_1d_real_inv(ffts_plan_t *p, const void *vin, void *vout
}
#endif
- p->plans[0]->transform(p->plans[0], buf, out);
+ p->plans[0]->transform(p->plans[0], buf, output);
}
ffts_plan_t*
OpenPOWER on IntegriCloud