summaryrefslogtreecommitdiffstats
path: root/src/ffts_static.c
diff options
context:
space:
mode:
authorJukka Ojanen <jukka.ojanen@linkotec.net>2016-03-14 11:35:32 +0200
committerJukka Ojanen <jukka.ojanen@linkotec.net>2016-03-14 11:35:32 +0200
commit61166019c3aa54a26e6e9baeb5af769402e0b616 (patch)
tree99713c5c49afc589f704a4f7396e2fd50d9ed01a /src/ffts_static.c
parent6296905ad0b45f02a67359370a42168e2d3f1656 (diff)
downloadffts-61166019c3aa54a26e6e9baeb5af769402e0b616.zip
ffts-61166019c3aa54a26e6e9baeb5af769402e0b616.tar.gz
Peel off top-level only if-case from ARM NEON recursive implementation
Diffstat (limited to 'src/ffts_static.c')
-rw-r--r--src/ffts_static.c134
1 files changed, 79 insertions, 55 deletions
diff --git a/src/ffts_static.c b/src/ffts_static.c
index 7747de0..483b5e2 100644
--- a/src/ffts_static.c
+++ b/src/ffts_static.c
@@ -947,36 +947,31 @@ ffts_static_firstpass_even_32f(float *FFTS_RESTRICT out,
}
static void
-ffts_static_rec_f_32f(ffts_plan_t *p, float *data, size_t N)
+ffts_static_rec_f_32f(const ffts_plan_t *p, float *data, size_t N)
{
+ const float *ws = (const float*) p->ws;
+
#if defined(HAVE_NEON) && defined(DYNAMIC_DISABLED)
if (N > 16) {
- size_t N1 = N >> 1;
- size_t N2 = N >> 2;
- size_t N3 = N >> 3;
- float *ws = ((float *)(p->ws)) + (p->ws_is[ffts_ctzl(N)-4] << 1);
-
- ffts_static_rec_f_32f(p, data, N2);
- ffts_static_rec_f_32f(p, data + N1, N3);
- ffts_static_rec_f_32f(p, data + N1 + N2, N3);
- ffts_static_rec_f_32f(p, data + N, N2);
- ffts_static_rec_f_32f(p, data + N + N1, N2);
-
- if (N == p->N) {
- neon_static_x8_t_f(data, N, ws);
- } else {
- neon_static_x8_f(data, N, ws);
- }
+ const size_t N1 = N >> 1;
+ const size_t N2 = N >> 2;
+ const size_t N3 = N >> 3;
+
+ ffts_static_rec_f_32f(p, data , N2);
+ ffts_static_rec_f_32f(p, data + N1 , N3);
+ ffts_static_rec_f_32f(p, data + N1 + N2, N3);
+ ffts_static_rec_f_32f(p, data + N , N2);
+ ffts_static_rec_f_32f(p, data + N + N1 , N2);
+
+ neon_static_x8_f(data, N, ws + (p->ws_is[ffts_ctzl(N) - 4] << 1));
} else if (N == 16) {
- neon_static_x4_f(data, N, p->ws);
+ neon_static_x4_f(data, N, ws);
}
#else
- const float *ws = (float*) p->ws;
-
if (N > 128) {
- size_t N1 = N >> 1;
- size_t N2 = N >> 2;
- size_t N3 = N >> 3;
+ const size_t N1 = N >> 1;
+ const size_t N2 = N >> 2;
+ const size_t N3 = N >> 3;
ffts_static_rec_f_32f(p, data , N2);
ffts_static_rec_f_32f(p, data + N1 , N3);
@@ -1013,36 +1008,31 @@ ffts_static_rec_f_32f(ffts_plan_t *p, float *data, size_t N)
}
static void
-ffts_static_rec_i_32f(ffts_plan_t *p, float *data, size_t N)
+ffts_static_rec_i_32f(const ffts_plan_t *p, float *data, size_t N)
{
+ const float *ws = (const float*) p->ws;
+
#if defined(HAVE_NEON) && defined(DYNAMIC_DISABLED)
if (N > 16) {
- size_t N1 = N >> 1;
- size_t N2 = N >> 2;
- size_t N3 = N >> 3;
- float *ws = ((float *)(p->ws)) + (p->ws_is[ffts_ctzl(N)-4] << 1);
-
- ffts_static_rec_i_32f(p, data, N2);
- ffts_static_rec_i_32f(p, data + N1, N3);
- ffts_static_rec_i_32f(p, data + N1 + N2, N3);
- ffts_static_rec_i_32f(p, data + N, N2);
- ffts_static_rec_i_32f(p, data + N + N1, N2);
-
- if (N == p->N) {
- neon_static_x8_t_i(data, N, ws);
- } else {
- neon_static_x8_i(data, N, ws);
- }
- } else if(N==16) {
- neon_static_x4_i(data, N, p->ws);
+ const size_t N1 = N >> 1;
+ const size_t N2 = N >> 2;
+ const size_t N3 = N >> 3;
+
+ ffts_static_rec_i_32f(p, data , N2);
+ ffts_static_rec_i_32f(p, data + N1 , N3);
+ ffts_static_rec_i_32f(p, data + N1 + N2, N3);
+ ffts_static_rec_i_32f(p, data + N , N2);
+ ffts_static_rec_i_32f(p, data + N + N1 , N2);
+
+ neon_static_x8_i(data, N, ws + (p->ws_is[ffts_ctzl(N) - 4] << 1));
+ } else if (N == 16) {
+ neon_static_x4_i(data, N, ws);
}
#else
- float *ws = (float*) p->ws;
-
if (N > 128) {
- size_t N1 = N >> 1;
- size_t N2 = N >> 2;
- size_t N3 = N >> 3;
+ const size_t N1 = N >> 1;
+ const size_t N2 = N >> 2;
+ const size_t N3 = N >> 3;
ffts_static_rec_i_32f(p, data , N2);
ffts_static_rec_i_32f(p, data + N1 , N3);
@@ -1084,21 +1074,38 @@ ffts_static_transform_f_32f(ffts_plan_t *p, const void *in, void *out)
const float *din = (const float*) in;
float *dout = (float*) out;
+ const size_t N = p->N;
+ const int N_log_2 = ffts_ctzl(N);
+
#if defined(HAVE_NEON) && defined(DYNAMIC_DISABLED)
- if (ffts_ctzl(p->N) & 1) {
+ const size_t N1 = N >> 1;
+ const size_t N2 = N >> 2;
+ const size_t N3 = N >> 3;
+
+ const float *ws = ((const float*) p->ws) + (p->ws_is[N_log_2 - 4] << 1);
+
+ if (N_log_2 & 1) {
neon_static_o_f(p, din, dout);
} else {
neon_static_e_f(p, din, dout);
}
+
+ ffts_static_rec_f_32f(p, dout , N2);
+ ffts_static_rec_f_32f(p, dout + N1 , N3);
+ ffts_static_rec_f_32f(p, dout + N1 + N2, N3);
+ ffts_static_rec_f_32f(p, dout + N , N2);
+ ffts_static_rec_f_32f(p, dout + N + N1 , N2);
+
+ neon_static_x8_t_f(dout, N, ws);
#else
- if (ffts_ctzl(p->N) & 1) {
+ if (N_log_2 & 1) {
ffts_static_firstpass_odd_32f(dout, din, p, 0);
} else {
ffts_static_firstpass_even_32f(dout, din, p, 0);
}
-#endif
- ffts_static_rec_f_32f(p, dout, p->N);
+ ffts_static_rec_f_32f(p, dout, N);
+#endif
}
void
@@ -1107,19 +1114,36 @@ ffts_static_transform_i_32f(ffts_plan_t *p, const void *in, void *out)
const float *din = (const float*) in;
float *dout = (float*) out;
+ const size_t N = p->N;
+ const int N_log_2 = ffts_ctzl(N);
+
#if defined(HAVE_NEON) && defined(DYNAMIC_DISABLED)
- if (ffts_ctzl(p->N) & 1) {
+ const size_t N1 = N >> 1;
+ const size_t N2 = N >> 2;
+ const size_t N3 = N >> 3;
+
+ const float *ws = ((const float*) p->ws) + (p->ws_is[N_log_2 - 4] << 1);
+
+ if (N_log_2 & 1) {
neon_static_o_i(p, din, dout);
} else {
neon_static_e_i(p, din, dout);
}
+
+ ffts_static_rec_i_32f(p, dout , N2);
+ ffts_static_rec_i_32f(p, dout + N1 , N3);
+ ffts_static_rec_i_32f(p, dout + N1 + N2, N3);
+ ffts_static_rec_i_32f(p, dout + N , N2);
+ ffts_static_rec_i_32f(p, dout + N + N1 , N2);
+
+ neon_static_x8_t_i(dout, N, ws);
#else
- if (ffts_ctzl(p->N) & 1) {
+ if (N_log_2 & 1) {
ffts_static_firstpass_odd_32f(dout, din, p, 1);
} else {
ffts_static_firstpass_even_32f(dout, din, p, 1);
}
-#endif
- ffts_static_rec_i_32f(p, dout, p->N);
+ ffts_static_rec_i_32f(p, dout, N);
+#endif
} \ No newline at end of file
OpenPOWER on IntegriCloud