diff options
author | Jukka Ojanen <jukka.ojanen@linkotec.net> | 2016-03-11 14:32:22 +0200 |
---|---|---|
committer | Jukka Ojanen <jukka.ojanen@linkotec.net> | 2016-03-11 14:32:22 +0200 |
commit | e667ca5e4304b31cd7093eaead481b032092b985 (patch) | |
tree | df057e6fa4502d1924eddb9bf496e5e9d338a417 /src/ffts_static.c | |
parent | 2051c214d591be08e40fdba623ccefabbba11b29 (diff) | |
download | ffts-e667ca5e4304b31cd7093eaead481b032092b985.zip ffts-e667ca5e4304b31cd7093eaead481b032092b985.tar.gz |
Restore ARM NEON optimized recursive version
Diffstat (limited to 'src/ffts_static.c')
-rw-r--r-- | src/ffts_static.c | 84 |
1 files changed, 78 insertions, 6 deletions
diff --git a/src/ffts_static.c b/src/ffts_static.c index 701cca8..7747de0 100644 --- a/src/ffts_static.c +++ b/src/ffts_static.c @@ -36,6 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "ffts_internal.h" #include "macros.h" +#if defined(HAVE_NEON) +#include "neon.h" +#endif + #include <assert.h> static const FFTS_ALIGN(16) float ffts_constants_small_32f[24] = { @@ -945,6 +949,28 @@ ffts_static_firstpass_even_32f(float *FFTS_RESTRICT out, static void ffts_static_rec_f_32f(ffts_plan_t *p, float *data, size_t N) { +#if defined(HAVE_NEON) && defined(DYNAMIC_DISABLED) + if (N > 16) { + size_t N1 = N >> 1; + size_t N2 = N >> 2; + size_t N3 = N >> 3; + float *ws = ((float *)(p->ws)) + (p->ws_is[ffts_ctzl(N)-4] << 1); + + ffts_static_rec_f_32f(p, data, N2); + ffts_static_rec_f_32f(p, data + N1, N3); + ffts_static_rec_f_32f(p, data + N1 + N2, N3); + ffts_static_rec_f_32f(p, data + N, N2); + ffts_static_rec_f_32f(p, data + N + N1, N2); + + if (N == p->N) { + neon_static_x8_t_f(data, N, ws); + } else { + neon_static_x8_f(data, N, ws); + } + } else if (N == 16) { + neon_static_x4_f(data, N, p->ws); + } +#else const float *ws = (float*) p->ws; if (N > 128) { @@ -983,11 +1009,34 @@ ffts_static_rec_f_32f(ffts_plan_t *p, float *data, size_t N) assert(N == 16); V4SF_X_4(0, data, N, ws); } +#endif } static void ffts_static_rec_i_32f(ffts_plan_t *p, float *data, size_t N) { +#if defined(HAVE_NEON) && defined(DYNAMIC_DISABLED) + if (N > 16) { + size_t N1 = N >> 1; + size_t N2 = N >> 2; + size_t N3 = N >> 3; + float *ws = ((float *)(p->ws)) + (p->ws_is[ffts_ctzl(N)-4] << 1); + + ffts_static_rec_i_32f(p, data, N2); + ffts_static_rec_i_32f(p, data + N1, N3); + ffts_static_rec_i_32f(p, data + N1 + N2, N3); + ffts_static_rec_i_32f(p, data + N, N2); + ffts_static_rec_i_32f(p, data + N + N1, N2); + + if (N == p->N) { + neon_static_x8_t_i(data, N, ws); + } else { + neon_static_x8_i(data, N, ws); + } + } else if(N==16) { + neon_static_x4_i(data, N, p->ws); + } +#else float *ws = (float*) p->ws; if (N > 128) { @@ -1026,28 +1075,51 @@ ffts_static_rec_i_32f(ffts_plan_t *p, float *data, size_t N) assert(N == 16); V4SF_X_4(1, data, N, ws); } +#endif } void ffts_static_transform_f_32f(ffts_plan_t *p, const void *in, void *out) { + const float *din = (const float*) in; + float *dout = (float*) out; + +#if defined(HAVE_NEON) && defined(DYNAMIC_DISABLED) if (ffts_ctzl(p->N) & 1) { - ffts_static_firstpass_odd_32f((float*) out, (const float*) in, p, 0); + neon_static_o_f(p, din, dout); } else { - ffts_static_firstpass_even_32f((float*) out, (const float*) in, p, 0); + neon_static_e_f(p, din, dout); } +#else + if (ffts_ctzl(p->N) & 1) { + ffts_static_firstpass_odd_32f(dout, din, p, 0); + } else { + ffts_static_firstpass_even_32f(dout, din, p, 0); + } +#endif - ffts_static_rec_f_32f(p, (float*) out, p->N); + ffts_static_rec_f_32f(p, dout, p->N); } void ffts_static_transform_i_32f(ffts_plan_t *p, const void *in, void *out) { + const float *din = (const float*) in; + float *dout = (float*) out; + +#if defined(HAVE_NEON) && defined(DYNAMIC_DISABLED) + if (ffts_ctzl(p->N) & 1) { + neon_static_o_i(p, din, dout); + } else { + neon_static_e_i(p, din, dout); + } +#else if (ffts_ctzl(p->N) & 1) { - ffts_static_firstpass_odd_32f((float*) out, (const float*) in, p, 1); + ffts_static_firstpass_odd_32f(dout, din, p, 1); } else { - ffts_static_firstpass_even_32f((float*) out, (const float*) in, p, 1); + ffts_static_firstpass_even_32f(dout, din, p, 1); } +#endif - ffts_static_rec_i_32f(p, (float*) out, p->N); + ffts_static_rec_i_32f(p, dout, p->N); }
\ No newline at end of file |