From 61166019c3aa54a26e6e9baeb5af769402e0b616 Mon Sep 17 00:00:00 2001 From: Jukka Ojanen Date: Mon, 14 Mar 2016 11:35:32 +0200 Subject: Peel off top-level only if-case from ARM NEON recursive implementation --- src/ffts_static.c | 134 ++++++++++++++++++++++++++++++++---------------------- src/neon.h | 85 +++++++++++++++++----------------- 2 files changed, 120 insertions(+), 99 deletions(-) diff --git a/src/ffts_static.c b/src/ffts_static.c index 7747de0..483b5e2 100644 --- a/src/ffts_static.c +++ b/src/ffts_static.c @@ -947,36 +947,31 @@ ffts_static_firstpass_even_32f(float *FFTS_RESTRICT out, } static void -ffts_static_rec_f_32f(ffts_plan_t *p, float *data, size_t N) +ffts_static_rec_f_32f(const ffts_plan_t *p, float *data, size_t N) { + const float *ws = (const float*) p->ws; + #if defined(HAVE_NEON) && defined(DYNAMIC_DISABLED) if (N > 16) { - size_t N1 = N >> 1; - size_t N2 = N >> 2; - size_t N3 = N >> 3; - float *ws = ((float *)(p->ws)) + (p->ws_is[ffts_ctzl(N)-4] << 1); - - ffts_static_rec_f_32f(p, data, N2); - ffts_static_rec_f_32f(p, data + N1, N3); - ffts_static_rec_f_32f(p, data + N1 + N2, N3); - ffts_static_rec_f_32f(p, data + N, N2); - ffts_static_rec_f_32f(p, data + N + N1, N2); - - if (N == p->N) { - neon_static_x8_t_f(data, N, ws); - } else { - neon_static_x8_f(data, N, ws); - } + const size_t N1 = N >> 1; + const size_t N2 = N >> 2; + const size_t N3 = N >> 3; + + ffts_static_rec_f_32f(p, data , N2); + ffts_static_rec_f_32f(p, data + N1 , N3); + ffts_static_rec_f_32f(p, data + N1 + N2, N3); + ffts_static_rec_f_32f(p, data + N , N2); + ffts_static_rec_f_32f(p, data + N + N1 , N2); + + neon_static_x8_f(data, N, ws + (p->ws_is[ffts_ctzl(N) - 4] << 1)); } else if (N == 16) { - neon_static_x4_f(data, N, p->ws); + neon_static_x4_f(data, N, ws); } #else - const float *ws = (float*) p->ws; - if (N > 128) { - size_t N1 = N >> 1; - size_t N2 = N >> 2; - size_t N3 = N >> 3; + const size_t N1 = N >> 1; + const size_t N2 = N >> 2; + const size_t N3 = N >> 3; ffts_static_rec_f_32f(p, data , N2); ffts_static_rec_f_32f(p, data + N1 , N3); @@ -1013,36 +1008,31 @@ ffts_static_rec_f_32f(ffts_plan_t *p, float *data, size_t N) } static void -ffts_static_rec_i_32f(ffts_plan_t *p, float *data, size_t N) +ffts_static_rec_i_32f(const ffts_plan_t *p, float *data, size_t N) { + const float *ws = (const float*) p->ws; + #if defined(HAVE_NEON) && defined(DYNAMIC_DISABLED) if (N > 16) { - size_t N1 = N >> 1; - size_t N2 = N >> 2; - size_t N3 = N >> 3; - float *ws = ((float *)(p->ws)) + (p->ws_is[ffts_ctzl(N)-4] << 1); - - ffts_static_rec_i_32f(p, data, N2); - ffts_static_rec_i_32f(p, data + N1, N3); - ffts_static_rec_i_32f(p, data + N1 + N2, N3); - ffts_static_rec_i_32f(p, data + N, N2); - ffts_static_rec_i_32f(p, data + N + N1, N2); - - if (N == p->N) { - neon_static_x8_t_i(data, N, ws); - } else { - neon_static_x8_i(data, N, ws); - } - } else if(N==16) { - neon_static_x4_i(data, N, p->ws); + const size_t N1 = N >> 1; + const size_t N2 = N >> 2; + const size_t N3 = N >> 3; + + ffts_static_rec_i_32f(p, data , N2); + ffts_static_rec_i_32f(p, data + N1 , N3); + ffts_static_rec_i_32f(p, data + N1 + N2, N3); + ffts_static_rec_i_32f(p, data + N , N2); + ffts_static_rec_i_32f(p, data + N + N1 , N2); + + neon_static_x8_i(data, N, ws + (p->ws_is[ffts_ctzl(N) - 4] << 1)); + } else if (N == 16) { + neon_static_x4_i(data, N, ws); } #else - float *ws = (float*) p->ws; - if (N > 128) { - size_t N1 = N >> 1; - size_t N2 = N >> 2; - size_t N3 = N >> 3; + const size_t N1 = N >> 1; + const size_t N2 = N >> 2; + const size_t N3 = N >> 3; ffts_static_rec_i_32f(p, data , N2); ffts_static_rec_i_32f(p, data + N1 , N3); @@ -1084,21 +1074,38 @@ ffts_static_transform_f_32f(ffts_plan_t *p, const void *in, void *out) const float *din = (const float*) in; float *dout = (float*) out; + const size_t N = p->N; + const int N_log_2 = ffts_ctzl(N); + #if defined(HAVE_NEON) && defined(DYNAMIC_DISABLED) - if (ffts_ctzl(p->N) & 1) { + const size_t N1 = N >> 1; + const size_t N2 = N >> 2; + const size_t N3 = N >> 3; + + const float *ws = ((const float*) p->ws) + (p->ws_is[N_log_2 - 4] << 1); + + if (N_log_2 & 1) { neon_static_o_f(p, din, dout); } else { neon_static_e_f(p, din, dout); } + + ffts_static_rec_f_32f(p, dout , N2); + ffts_static_rec_f_32f(p, dout + N1 , N3); + ffts_static_rec_f_32f(p, dout + N1 + N2, N3); + ffts_static_rec_f_32f(p, dout + N , N2); + ffts_static_rec_f_32f(p, dout + N + N1 , N2); + + neon_static_x8_t_f(dout, N, ws); #else - if (ffts_ctzl(p->N) & 1) { + if (N_log_2 & 1) { ffts_static_firstpass_odd_32f(dout, din, p, 0); } else { ffts_static_firstpass_even_32f(dout, din, p, 0); } -#endif - ffts_static_rec_f_32f(p, dout, p->N); + ffts_static_rec_f_32f(p, dout, N); +#endif } void @@ -1107,19 +1114,36 @@ ffts_static_transform_i_32f(ffts_plan_t *p, const void *in, void *out) const float *din = (const float*) in; float *dout = (float*) out; + const size_t N = p->N; + const int N_log_2 = ffts_ctzl(N); + #if defined(HAVE_NEON) && defined(DYNAMIC_DISABLED) - if (ffts_ctzl(p->N) & 1) { + const size_t N1 = N >> 1; + const size_t N2 = N >> 2; + const size_t N3 = N >> 3; + + const float *ws = ((const float*) p->ws) + (p->ws_is[N_log_2 - 4] << 1); + + if (N_log_2 & 1) { neon_static_o_i(p, din, dout); } else { neon_static_e_i(p, din, dout); } + + ffts_static_rec_i_32f(p, dout , N2); + ffts_static_rec_i_32f(p, dout + N1 , N3); + ffts_static_rec_i_32f(p, dout + N1 + N2, N3); + ffts_static_rec_i_32f(p, dout + N , N2); + ffts_static_rec_i_32f(p, dout + N + N1 , N2); + + neon_static_x8_t_i(dout, N, ws); #else - if (ffts_ctzl(p->N) & 1) { + if (N_log_2 & 1) { ffts_static_firstpass_odd_32f(dout, din, p, 1); } else { ffts_static_firstpass_even_32f(dout, din, p, 1); } -#endif - ffts_static_rec_i_32f(p, dout, p->N); + ffts_static_rec_i_32f(p, dout, N); +#endif } \ No newline at end of file diff --git a/src/neon.h b/src/neon.h index 2f51995..b40623b 100644 --- a/src/neon.h +++ b/src/neon.h @@ -1,38 +1,38 @@ /* - - This file is part of FFTS -- The Fastest Fourier Transform in the South - - Copyright (c) 2012, Anthony M. Blake - Copyright (c) 2012, The University of Waikato - - All rights reserved. - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the organization nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY - DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +This file is part of FFTS -- The Fastest Fourier Transform in the South + +Copyright (c) 2012, Anthony M. Blake +Copyright (c) 2012, The University of Waikato + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: +* Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +* Neither the name of the organization nor the +names of its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef __NEON_H__ -#define __NEON_H__ +#ifndef FFTS_NEON_H +#define FFTS_NEON_H #include "ffts.h" @@ -48,19 +48,16 @@ void neon_end(); void neon_transpose(uint64_t *in, uint64_t *out, int w, int h); void neon_transpose_to_buf(uint64_t *in, uint64_t *out, int w); -//typedef struct _ffts_plan_t ffts_plan_t; - -void neon_static_e_f(ffts_plan_t * , const void * , void * ); -void neon_static_o_f(ffts_plan_t * , const void * , void * ); -void neon_static_x4_f(float *, size_t, float *); -void neon_static_x8_f(float *, size_t, float *); -void neon_static_x8_t_f(float *, size_t, float *); +void neon_static_e_f(ffts_plan_t*, const void*, void*); +void neon_static_o_f(ffts_plan_t*, const void*, void*); +void neon_static_x4_f(float*, size_t, const float*); +void neon_static_x8_f(float*, size_t, const float*); +void neon_static_x8_t_f(float*, size_t, const float*); -void neon_static_e_i(ffts_plan_t * , const void * , void * ); -void neon_static_o_i(ffts_plan_t * , const void * , void * ); -void neon_static_x4_i(float *, size_t, float *); -void neon_static_x8_i(float *, size_t, float *); -void neon_static_x8_t_i(float *, size_t, float *); +void neon_static_e_i(ffts_plan_t*, const void*, void*); +void neon_static_o_i(ffts_plan_t*, const void*, void*); +void neon_static_x4_i(float*, size_t, const float*); +void neon_static_x8_i(float*, size_t, const float*); +void neon_static_x8_t_i(float*, size_t, const float*); -#endif -// vim: set autoindent noexpandtab tabstop=3 shiftwidth=3: +#endif /* FFTS_NEON_H */ -- cgit v1.1