summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJukka Ojanen <jukka.ojanen@linkotec.net>2016-03-14 11:35:32 +0200
committerJukka Ojanen <jukka.ojanen@linkotec.net>2016-03-14 11:35:32 +0200
commit61166019c3aa54a26e6e9baeb5af769402e0b616 (patch)
tree99713c5c49afc589f704a4f7396e2fd50d9ed01a
parent6296905ad0b45f02a67359370a42168e2d3f1656 (diff)
downloadffts-61166019c3aa54a26e6e9baeb5af769402e0b616.zip
ffts-61166019c3aa54a26e6e9baeb5af769402e0b616.tar.gz
Peel off top-level only if-case from ARM NEON recursive implementation
-rw-r--r--src/ffts_static.c134
-rw-r--r--src/neon.h85
2 files changed, 120 insertions, 99 deletions
diff --git a/src/ffts_static.c b/src/ffts_static.c
index 7747de0..483b5e2 100644
--- a/src/ffts_static.c
+++ b/src/ffts_static.c
@@ -947,36 +947,31 @@ ffts_static_firstpass_even_32f(float *FFTS_RESTRICT out,
}
static void
-ffts_static_rec_f_32f(ffts_plan_t *p, float *data, size_t N)
+ffts_static_rec_f_32f(const ffts_plan_t *p, float *data, size_t N)
{
+ const float *ws = (const float*) p->ws;
+
#if defined(HAVE_NEON) && defined(DYNAMIC_DISABLED)
if (N > 16) {
- size_t N1 = N >> 1;
- size_t N2 = N >> 2;
- size_t N3 = N >> 3;
- float *ws = ((float *)(p->ws)) + (p->ws_is[ffts_ctzl(N)-4] << 1);
-
- ffts_static_rec_f_32f(p, data, N2);
- ffts_static_rec_f_32f(p, data + N1, N3);
- ffts_static_rec_f_32f(p, data + N1 + N2, N3);
- ffts_static_rec_f_32f(p, data + N, N2);
- ffts_static_rec_f_32f(p, data + N + N1, N2);
-
- if (N == p->N) {
- neon_static_x8_t_f(data, N, ws);
- } else {
- neon_static_x8_f(data, N, ws);
- }
+ const size_t N1 = N >> 1;
+ const size_t N2 = N >> 2;
+ const size_t N3 = N >> 3;
+
+ ffts_static_rec_f_32f(p, data , N2);
+ ffts_static_rec_f_32f(p, data + N1 , N3);
+ ffts_static_rec_f_32f(p, data + N1 + N2, N3);
+ ffts_static_rec_f_32f(p, data + N , N2);
+ ffts_static_rec_f_32f(p, data + N + N1 , N2);
+
+ neon_static_x8_f(data, N, ws + (p->ws_is[ffts_ctzl(N) - 4] << 1));
} else if (N == 16) {
- neon_static_x4_f(data, N, p->ws);
+ neon_static_x4_f(data, N, ws);
}
#else
- const float *ws = (float*) p->ws;
-
if (N > 128) {
- size_t N1 = N >> 1;
- size_t N2 = N >> 2;
- size_t N3 = N >> 3;
+ const size_t N1 = N >> 1;
+ const size_t N2 = N >> 2;
+ const size_t N3 = N >> 3;
ffts_static_rec_f_32f(p, data , N2);
ffts_static_rec_f_32f(p, data + N1 , N3);
@@ -1013,36 +1008,31 @@ ffts_static_rec_f_32f(ffts_plan_t *p, float *data, size_t N)
}
static void
-ffts_static_rec_i_32f(ffts_plan_t *p, float *data, size_t N)
+ffts_static_rec_i_32f(const ffts_plan_t *p, float *data, size_t N)
{
+ const float *ws = (const float*) p->ws;
+
#if defined(HAVE_NEON) && defined(DYNAMIC_DISABLED)
if (N > 16) {
- size_t N1 = N >> 1;
- size_t N2 = N >> 2;
- size_t N3 = N >> 3;
- float *ws = ((float *)(p->ws)) + (p->ws_is[ffts_ctzl(N)-4] << 1);
-
- ffts_static_rec_i_32f(p, data, N2);
- ffts_static_rec_i_32f(p, data + N1, N3);
- ffts_static_rec_i_32f(p, data + N1 + N2, N3);
- ffts_static_rec_i_32f(p, data + N, N2);
- ffts_static_rec_i_32f(p, data + N + N1, N2);
-
- if (N == p->N) {
- neon_static_x8_t_i(data, N, ws);
- } else {
- neon_static_x8_i(data, N, ws);
- }
- } else if(N==16) {
- neon_static_x4_i(data, N, p->ws);
+ const size_t N1 = N >> 1;
+ const size_t N2 = N >> 2;
+ const size_t N3 = N >> 3;
+
+ ffts_static_rec_i_32f(p, data , N2);
+ ffts_static_rec_i_32f(p, data + N1 , N3);
+ ffts_static_rec_i_32f(p, data + N1 + N2, N3);
+ ffts_static_rec_i_32f(p, data + N , N2);
+ ffts_static_rec_i_32f(p, data + N + N1 , N2);
+
+ neon_static_x8_i(data, N, ws + (p->ws_is[ffts_ctzl(N) - 4] << 1));
+ } else if (N == 16) {
+ neon_static_x4_i(data, N, ws);
}
#else
- float *ws = (float*) p->ws;
-
if (N > 128) {
- size_t N1 = N >> 1;
- size_t N2 = N >> 2;
- size_t N3 = N >> 3;
+ const size_t N1 = N >> 1;
+ const size_t N2 = N >> 2;
+ const size_t N3 = N >> 3;
ffts_static_rec_i_32f(p, data , N2);
ffts_static_rec_i_32f(p, data + N1 , N3);
@@ -1084,21 +1074,38 @@ ffts_static_transform_f_32f(ffts_plan_t *p, const void *in, void *out)
const float *din = (const float*) in;
float *dout = (float*) out;
+ const size_t N = p->N;
+ const int N_log_2 = ffts_ctzl(N);
+
#if defined(HAVE_NEON) && defined(DYNAMIC_DISABLED)
- if (ffts_ctzl(p->N) & 1) {
+ const size_t N1 = N >> 1;
+ const size_t N2 = N >> 2;
+ const size_t N3 = N >> 3;
+
+ const float *ws = ((const float*) p->ws) + (p->ws_is[N_log_2 - 4] << 1);
+
+ if (N_log_2 & 1) {
neon_static_o_f(p, din, dout);
} else {
neon_static_e_f(p, din, dout);
}
+
+ ffts_static_rec_f_32f(p, dout , N2);
+ ffts_static_rec_f_32f(p, dout + N1 , N3);
+ ffts_static_rec_f_32f(p, dout + N1 + N2, N3);
+ ffts_static_rec_f_32f(p, dout + N , N2);
+ ffts_static_rec_f_32f(p, dout + N + N1 , N2);
+
+ neon_static_x8_t_f(dout, N, ws);
#else
- if (ffts_ctzl(p->N) & 1) {
+ if (N_log_2 & 1) {
ffts_static_firstpass_odd_32f(dout, din, p, 0);
} else {
ffts_static_firstpass_even_32f(dout, din, p, 0);
}
-#endif
- ffts_static_rec_f_32f(p, dout, p->N);
+ ffts_static_rec_f_32f(p, dout, N);
+#endif
}
void
@@ -1107,19 +1114,36 @@ ffts_static_transform_i_32f(ffts_plan_t *p, const void *in, void *out)
const float *din = (const float*) in;
float *dout = (float*) out;
+ const size_t N = p->N;
+ const int N_log_2 = ffts_ctzl(N);
+
#if defined(HAVE_NEON) && defined(DYNAMIC_DISABLED)
- if (ffts_ctzl(p->N) & 1) {
+ const size_t N1 = N >> 1;
+ const size_t N2 = N >> 2;
+ const size_t N3 = N >> 3;
+
+ const float *ws = ((const float*) p->ws) + (p->ws_is[N_log_2 - 4] << 1);
+
+ if (N_log_2 & 1) {
neon_static_o_i(p, din, dout);
} else {
neon_static_e_i(p, din, dout);
}
+
+ ffts_static_rec_i_32f(p, dout , N2);
+ ffts_static_rec_i_32f(p, dout + N1 , N3);
+ ffts_static_rec_i_32f(p, dout + N1 + N2, N3);
+ ffts_static_rec_i_32f(p, dout + N , N2);
+ ffts_static_rec_i_32f(p, dout + N + N1 , N2);
+
+ neon_static_x8_t_i(dout, N, ws);
#else
- if (ffts_ctzl(p->N) & 1) {
+ if (N_log_2 & 1) {
ffts_static_firstpass_odd_32f(dout, din, p, 1);
} else {
ffts_static_firstpass_even_32f(dout, din, p, 1);
}
-#endif
- ffts_static_rec_i_32f(p, dout, p->N);
+ ffts_static_rec_i_32f(p, dout, N);
+#endif
} \ No newline at end of file
diff --git a/src/neon.h b/src/neon.h
index 2f51995..b40623b 100644
--- a/src/neon.h
+++ b/src/neon.h
@@ -1,38 +1,38 @@
/*
-
- This file is part of FFTS -- The Fastest Fourier Transform in the South
-
- Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
- Copyright (c) 2012, The University of Waikato
-
- All rights reserved.
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are met:
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name of the organization nor the
- names of its contributors may be used to endorse or promote products
- derived from this software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
- DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+This file is part of FFTS -- The Fastest Fourier Transform in the South
+
+Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
+Copyright (c) 2012, The University of Waikato
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+* Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+* Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+* Neither the name of the organization nor the
+names of its contributors may be used to endorse or promote products
+derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#ifndef __NEON_H__
-#define __NEON_H__
+#ifndef FFTS_NEON_H
+#define FFTS_NEON_H
#include "ffts.h"
@@ -48,19 +48,16 @@ void neon_end();
void neon_transpose(uint64_t *in, uint64_t *out, int w, int h);
void neon_transpose_to_buf(uint64_t *in, uint64_t *out, int w);
-//typedef struct _ffts_plan_t ffts_plan_t;
-
-void neon_static_e_f(ffts_plan_t * , const void * , void * );
-void neon_static_o_f(ffts_plan_t * , const void * , void * );
-void neon_static_x4_f(float *, size_t, float *);
-void neon_static_x8_f(float *, size_t, float *);
-void neon_static_x8_t_f(float *, size_t, float *);
+void neon_static_e_f(ffts_plan_t*, const void*, void*);
+void neon_static_o_f(ffts_plan_t*, const void*, void*);
+void neon_static_x4_f(float*, size_t, const float*);
+void neon_static_x8_f(float*, size_t, const float*);
+void neon_static_x8_t_f(float*, size_t, const float*);
-void neon_static_e_i(ffts_plan_t * , const void * , void * );
-void neon_static_o_i(ffts_plan_t * , const void * , void * );
-void neon_static_x4_i(float *, size_t, float *);
-void neon_static_x8_i(float *, size_t, float *);
-void neon_static_x8_t_i(float *, size_t, float *);
+void neon_static_e_i(ffts_plan_t*, const void*, void*);
+void neon_static_o_i(ffts_plan_t*, const void*, void*);
+void neon_static_x4_i(float*, size_t, const float*);
+void neon_static_x8_i(float*, size_t, const float*);
+void neon_static_x8_t_i(float*, size_t, const float*);
-#endif
-// vim: set autoindent noexpandtab tabstop=3 shiftwidth=3:
+#endif /* FFTS_NEON_H */
OpenPOWER on IntegriCloud