diff options
author | Jukka Ojanen <jukka.ojanen@linkotec.net> | 2015-07-06 12:08:32 +0300 |
---|---|---|
committer | Jukka Ojanen <jukka.ojanen@linkotec.net> | 2015-07-06 12:08:32 +0300 |
commit | fbcfb21e9de85b6443848c721523d3793ae668ff (patch) | |
tree | ed8666765ee25a2dd6dbbf1783374c8b2ff36e1f | |
parent | ceb8e6aef7f0e406ff4724896a8138bf72911a68 (diff) | |
download | ffts-fbcfb21e9de85b6443848c721523d3793ae668ff.zip ffts-fbcfb21e9de85b6443848c721523d3793ae668ff.tar.gz |
Add new attributes to help auto-vectorization
-rw-r--r-- | src/ffts_attributes.h | 26 | ||||
-rw-r--r-- | src/ffts_real.c | 47 |
2 files changed, 54 insertions, 19 deletions
diff --git a/src/ffts_attributes.h b/src/ffts_attributes.h index 6ac2ac3..763a6af 100644 --- a/src/ffts_attributes.h +++ b/src/ffts_attributes.h @@ -68,10 +68,32 @@ #define FFTS_INLINE inline #endif -#if defined(_MSC_VER) +#if defined(__GNUC__) +#define FFTS_RESTRICT __restrict +#elif defined(_MSC_VER) +#define FFTS_RESTRICT __restrict +#else #define FFTS_RESTRICT +#endif + +#if GCC_VERSION_AT_LEAST(4,5) +#define FFTS_ASSUME(cond) do { if (!(cond)) __builtin_unreachable(); } while (0) +#elif defined(_MSC_VER) +#define FFTS_ASSUME(cond) __assume(cond) #else -#define FFTS_RESTRICT __restrict +#define FFTS_ASSUME(cond) +#endif + +#if GCC_VERSION_AT_LEAST(4,7) +#define FFTS_ASSUME_ALIGNED_16(x) __builtin_assume_aligned(x, 16) +#else +#define FFTS_ASSUME_ALIGNED_16(x) x +#endif + +#if GCC_VERSION_AT_LEAST(4,7) +#define FFTS_ASSUME_ALIGNED_32(x) __builtin_assume_aligned(x, 32) +#else +#define FFTS_ASSUME_ALIGNED_32(x) x #endif #endif /* FFTS_ATTRIBUTES_H */ diff --git a/src/ffts_real.c b/src/ffts_real.c index 5522f6b..82a9e79 100644 --- a/src/ffts_real.c +++ b/src/ffts_real.c @@ -63,13 +63,19 @@ ffts_free_1d_real(ffts_plan_t *p) free(p); } -static void ffts_execute_1d_real(ffts_plan_t *p, const void *vin, void *vout) +static void +ffts_execute_1d_real(ffts_plan_t *p, const void *input, void *output) { - float *out = (float*) vout; - float *buf = (float*) p->buf; - float *A = p->A; - float *B = p->B; - size_t N = p->N; + float *const FFTS_RESTRICT out = + (float *const FFTS_RESTRICT) FFTS_ASSUME_ALIGNED_16(output); + float *const FFTS_RESTRICT buf = + (float *const FFTS_RESTRICT) FFTS_ASSUME_ALIGNED_32(p->buf); + const float *const FFTS_RESTRICT A = + (const float *const FFTS_RESTRICT) FFTS_ASSUME_ALIGNED_32(p->A); + const float *const FFTS_RESTRICT B = + (const float *const FFTS_RESTRICT) FFTS_ASSUME_ALIGNED_32(p->B); + const int N = (const int) p->N; + int i; #ifdef __ARM_NEON__ float *p_buf0 = buf; @@ -77,9 +83,10 @@ static void ffts_execute_1d_real(ffts_plan_t *p, const void *vin, void *vout) float *p_out = out; #endif - size_t i; + /* we know this */ + FFTS_ASSUME(N/2 > 0); - p->plans[0]->transform(p->plans[0], vin, buf); + p->plans[0]->transform(p->plans[0], input, buf); buf[N + 0] = buf[0]; buf[N + 1] = buf[1]; @@ -138,14 +145,19 @@ static void ffts_execute_1d_real(ffts_plan_t *p, const void *vin, void *vout) out[N + 1] = 0.0f; } -static void ffts_execute_1d_real_inv(ffts_plan_t *p, const void *vin, void *vout) +static void +ffts_execute_1d_real_inv(ffts_plan_t *p, const void *input, void *output) { - float *out = (float*) vout; - float *in = (float*) vin; - float *buf = (float*) p->buf; - float *A = p->A; - float *B = p->B; - size_t N = p->N; + float *const FFTS_RESTRICT in = + (float *const FFTS_RESTRICT) FFTS_ASSUME_ALIGNED_16(input); + float *const FFTS_RESTRICT buf = + (float *const FFTS_RESTRICT) FFTS_ASSUME_ALIGNED_32(p->buf); + const float *const FFTS_RESTRICT A = + (const float *const FFTS_RESTRICT) FFTS_ASSUME_ALIGNED_32(p->A); + const float *const FFTS_RESTRICT B = + (const float *const FFTS_RESTRICT) FFTS_ASSUME_ALIGNED_32(p->B); + const int N = (const int) p->N; + int i; #ifdef __ARM_NEON__ float *p_buf0 = in; @@ -153,7 +165,8 @@ static void ffts_execute_1d_real_inv(ffts_plan_t *p, const void *vin, void *vout float *p_out = buf; #endif - size_t i; + /* we know this */ + FFTS_ASSUME(N/2 > 0); #ifdef __ARM_NEON__ for (i = 0; i < N/2; i += 2) { @@ -205,7 +218,7 @@ static void ffts_execute_1d_real_inv(ffts_plan_t *p, const void *vin, void *vout } #endif - p->plans[0]->transform(p->plans[0], buf, out); + p->plans[0]->transform(p->plans[0], buf, output); } ffts_plan_t* |