From c602cee1b51e8c532e4817d41d973deea8ab257a Mon Sep 17 00:00:00 2001 From: Jukka Ojanen Date: Wed, 29 Oct 2014 16:13:33 +0200 Subject: Cleaning and reorganizing --- src/ffts_small.c | 208 +++++++++++++++++++++++++++++++------------------------ 1 file changed, 117 insertions(+), 91 deletions(-) (limited to 'src/ffts_small.c') diff --git a/src/ffts_small.c b/src/ffts_small.c index e53493c..6f700c6 100644 --- a/src/ffts_small.c +++ b/src/ffts_small.c @@ -1,10 +1,10 @@ /* - + This file is part of FFTS -- The Fastest Fourier Transform in the South - - Copyright (c) 2013, Michael J. Cree + + Copyright (c) 2013, Michael J. Cree Copyright (c) 2012, 2013, Anthony M. Blake - + All rights reserved. Redistribution and use in source and binary forms, with or without @@ -31,127 +31,153 @@ */ -#include "ffts.h" +#include "ffts_small.h" #include "macros.h" #include -#define DEBUG(x) - -#include "ffts_small.h" - - void firstpass_16_f(ffts_plan_t * p, const void * in, void * out) +void ffts_firstpass_16_f(ffts_plan_t *p, const void *in, void *out) { - const data_t *din = (const data_t *)in; - data_t *dout = (data_t *)out; - V r0_1,r2_3,r4_5,r6_7,r8_9,r10_11,r12_13,r14_15; - float *LUT8 = p->ws; - - L_4_4(0, din+0,din+16,din+8,din+24,&r0_1,&r2_3,&r8_9,&r10_11); - L_2_4(0, din+4,din+20,din+28,din+12,&r4_5,&r6_7,&r14_15,&r12_13); - K_N(0, VLD(LUT8),VLD(LUT8+4),&r0_1,&r2_3,&r4_5,&r6_7); - K_N(0, VLD(LUT8+8),VLD(LUT8+12),&r0_1,&r4_5,&r8_9,&r12_13); - S_4(r0_1,r4_5,r8_9,r12_13,dout+0,dout+8,dout+16,dout+24); - K_N(0, VLD(LUT8+16),VLD(LUT8+20),&r2_3,&r6_7,&r10_11,&r14_15); - S_4(r2_3,r6_7,r10_11,r14_15,dout+4,dout+12,dout+20,dout+28); + const data_t *din = (const data_t*) in; + data_t *dout = (data_t*) out; + float *LUT8 = (float*) p->ws; + V r0_1, r2_3, r4_5, r6_7, r8_9, r10_11, r12_13, r14_15; + + L_4_4(0, din+0, din+16, din+8, din+24, &r0_1, &r2_3, &r8_9, &r10_11); + L_2_4(0, din+4, din+20, din+28, din+12, &r4_5, &r6_7, &r14_15, &r12_13); + K_N(0, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7); + K_N(0, VLD(LUT8+8), VLD(LUT8+12), &r0_1, &r4_5, &r8_9, &r12_13); + S_4(r0_1, r4_5, r8_9, r12_13, dout+0, dout+8, dout+16, dout+24); + K_N(0, VLD(LUT8+16), VLD(LUT8+20), &r2_3, &r6_7, &r10_11, &r14_15); + S_4(r2_3, r6_7, r10_11, r14_15, dout+4, dout+12, dout+20, dout+28); } - void firstpass_16_b(ffts_plan_t * p, const void * in, void * out) +void ffts_firstpass_16_b(ffts_plan_t *p, const void *in, void *out) { - const data_t *din = (const data_t *)in; - data_t *dout = (data_t *)out; - V r0_1,r2_3,r4_5,r6_7,r8_9,r10_11,r12_13,r14_15; - float *LUT8 = p->ws; - - L_4_4(1, din+0,din+16,din+8,din+24,&r0_1,&r2_3,&r8_9,&r10_11); - L_2_4(1, din+4,din+20,din+28,din+12,&r4_5,&r6_7,&r14_15,&r12_13); - K_N(1, VLD(LUT8),VLD(LUT8+4),&r0_1,&r2_3,&r4_5,&r6_7); - K_N(1, VLD(LUT8+8),VLD(LUT8+12),&r0_1,&r4_5,&r8_9,&r12_13); - S_4(r0_1,r4_5,r8_9,r12_13,dout+0,dout+8,dout+16,dout+24); - K_N(1, VLD(LUT8+16),VLD(LUT8+20),&r2_3,&r6_7,&r10_11,&r14_15); - S_4(r2_3,r6_7,r10_11,r14_15,dout+4,dout+12,dout+20,dout+28); -} + const data_t *din = (const data_t*) in; + data_t *dout = (data_t*) out; + float *LUT8 = (float*) p->ws; + V r0_1, r2_3, r4_5, r6_7, r8_9, r10_11, r12_13, r14_15; + L_4_4(1, din+0, din+16, din+8, din+24, &r0_1, &r2_3, &r8_9, &r10_11); + L_2_4(1, din+4, din+20, din+28, din+12, &r4_5, &r6_7, &r14_15, &r12_13); + K_N(1, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7); + K_N(1, VLD(LUT8+8), VLD(LUT8+12),&r0_1, &r4_5, &r8_9, &r12_13); + S_4(r0_1, r4_5, r8_9, r12_13, dout+0, dout+8, dout+16, dout+24); + K_N(1, VLD(LUT8+16), VLD(LUT8+20), &r2_3, &r6_7, &r10_11, &r14_15); + S_4(r2_3, r6_7, r10_11, r14_15, dout+4, dout+12, dout+20, dout+28); +} - void firstpass_8_f(ffts_plan_t *p, const void *in, void *out) +void ffts_firstpass_8_f(ffts_plan_t *p, const void *in, void *out) { - const data_t *din = (const data_t *)in; - data_t *dout = (data_t *)out; + const data_t *din = (const data_t*) in; + data_t *dout = (data_t*) out; V r0_1, r2_3, r4_5, r6_7; - float *LUT8 = p->ws + p->ws_is[0]; + float *LUT8 = (float*) p->ws + p->ws_is[0]; L_4_2(0, din, din+8, din+4, din+12, &r0_1, &r2_3, &r4_5, &r6_7); K_N(0, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7); - S_4(r0_1,r2_3,r4_5,r6_7,dout+0,dout+4,dout+8,dout+12); + S_4(r0_1, r2_3, r4_5, r6_7, dout+0, dout+4, dout+8, dout+12); } - void firstpass_8_b(ffts_plan_t *p, const void *in, void *out) +void ffts_firstpass_8_b(ffts_plan_t *p, const void *in, void *out) { - const data_t *din = (const data_t *)in; - data_t *dout = (data_t *)out; + const data_t *din = (const data_t*) in; + data_t *dout = (data_t*) out; V r0_1, r2_3, r4_5, r6_7; - float *LUT8 = p->ws + p->ws_is[0]; + float *LUT8 = (float*) p->ws + p->ws_is[0]; L_4_2(1, din, din+8, din+4, din+12, &r0_1, &r2_3, &r4_5, &r6_7); K_N(1, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7); - S_4(r0_1,r2_3,r4_5,r6_7,dout+0,dout+4,dout+8,dout+12); + S_4(r0_1, r2_3, r4_5, r6_7, dout+0, dout+4, dout+8, dout+12); } - - void firstpass_4_f(ffts_plan_t *p, const void *in, void *out) +void ffts_firstpass_4_f(ffts_plan_t *p, const void *in, void *out) { - const data_t *din = (const data_t *)in; - data_t *dout = (data_t *)out; + const data_t *din = (const data_t*) in; + data_t *dout = (data_t*) out; cdata_t t0, t1, t2, t3, t4, t5, t6, t7; - t0[0] = din[0]; t0[1] = din[1]; - t1[0] = din[4]; t1[1] = din[5]; - t2[0] = din[2]; t2[1] = din[3]; - t3[0] = din[6]; t3[1] = din[7]; - - t4[0] = t0[0] + t1[0]; t4[1] = t0[1] + t1[1]; - t5[0] = t0[0] - t1[0]; t5[1] = t0[1] - t1[1]; - t6[0] = t2[0] + t3[0]; t6[1] = t2[1] + t3[1]; - t7[0] = t2[0] - t3[0]; t7[1] = t2[1] - t3[1]; - - dout[0] = t4[0] + t6[0]; dout[1] = t4[1] + t6[1]; - dout[4] = t4[0] - t6[0]; dout[5] = t4[1] - t6[1]; - dout[2] = t5[0] + t7[1]; dout[3] = t5[1] - t7[0]; - dout[6] = t5[0] - t7[1]; dout[7] = t5[1] + t7[0]; + + t0[0] = din[0]; + t0[1] = din[1]; + t1[0] = din[4]; + t1[1] = din[5]; + t2[0] = din[2]; + t2[1] = din[3]; + t3[0] = din[6]; + t3[1] = din[7]; + + t4[0] = t0[0] + t1[0]; + t4[1] = t0[1] + t1[1]; + t5[0] = t0[0] - t1[0]; + t5[1] = t0[1] - t1[1]; + t6[0] = t2[0] + t3[0]; + t6[1] = t2[1] + t3[1]; + t7[0] = t2[0] - t3[0]; + t7[1] = t2[1] - t3[1]; + + dout[0] = t4[0] + t6[0]; + dout[1] = t4[1] + t6[1]; + dout[4] = t4[0] - t6[0]; + dout[5] = t4[1] - t6[1]; + dout[2] = t5[0] + t7[1]; + dout[3] = t5[1] - t7[0]; + dout[6] = t5[0] - t7[1]; + dout[7] = t5[1] + t7[0]; } - void firstpass_4_b(ffts_plan_t *p, const void *in, void *out) +void ffts_firstpass_4_b(ffts_plan_t *p, const void *in, void *out) { - const data_t *din = (const data_t *)in; - data_t *dout = (data_t *)out; + const data_t *din = (const data_t*) in; + data_t *dout = (data_t*) out; cdata_t t0, t1, t2, t3, t4, t5, t6, t7; - t0[0] = din[0]; t0[1] = din[1]; - t1[0] = din[4]; t1[1] = din[5]; - t2[0] = din[2]; t2[1] = din[3]; - t3[0] = din[6]; t3[1] = din[7]; - - t4[0] = t0[0] + t1[0]; t4[1] = t0[1] + t1[1]; - t5[0] = t0[0] - t1[0]; t5[1] = t0[1] - t1[1]; - t6[0] = t2[0] + t3[0]; t6[1] = t2[1] + t3[1]; - t7[0] = t2[0] - t3[0]; t7[1] = t2[1] - t3[1]; - - dout[0] = t4[0] + t6[0]; dout[1] = t4[1] + t6[1]; - dout[4] = t4[0] - t6[0]; dout[5] = t4[1] - t6[1]; - dout[2] = t5[0] - t7[1]; dout[3] = t5[1] + t7[0]; - dout[6] = t5[0] + t7[1]; dout[7] = t5[1] - t7[0]; + + t0[0] = din[0]; + t0[1] = din[1]; + t1[0] = din[4]; + t1[1] = din[5]; + t2[0] = din[2]; + t2[1] = din[3]; + t3[0] = din[6]; + t3[1] = din[7]; + + t4[0] = t0[0] + t1[0]; + t4[1] = t0[1] + t1[1]; + t5[0] = t0[0] - t1[0]; + t5[1] = t0[1] - t1[1]; + t6[0] = t2[0] + t3[0]; + t6[1] = t2[1] + t3[1]; + t7[0] = t2[0] - t3[0]; + t7[1] = t2[1] - t3[1]; + + dout[0] = t4[0] + t6[0]; + dout[1] = t4[1] + t6[1]; + dout[4] = t4[0] - t6[0]; + dout[5] = t4[1] - t6[1]; + dout[2] = t5[0] - t7[1]; + dout[3] = t5[1] + t7[0]; + dout[6] = t5[0] + t7[1]; + dout[7] = t5[1] - t7[0]; } - void firstpass_2(ffts_plan_t *p, const void *in, void *out) +void ffts_firstpass_2(ffts_plan_t *p, const void *in, void *out) { - const data_t *din = (const data_t *)in; - data_t *dout = (data_t *)out; - cdata_t t0, t1, r0,r1; - t0[0] = din[0]; t0[1] = din[1]; - t1[0] = din[2]; t1[1] = din[3]; + const data_t *din = (const data_t*) in; + data_t *dout = (data_t*) out; + cdata_t t0, t1, r0, r1; + + t0[0] = din[0]; + t0[1] = din[1]; + t1[0] = din[2]; + t1[1] = din[3]; + r0[0] = t0[0] + t1[0]; r0[1] = t0[1] + t1[1]; r1[0] = t0[0] - t1[0]; r1[1] = t0[1] - t1[1]; - dout[0] = r0[0]; dout[1] = r0[1]; - dout[2] = r1[0]; dout[3] = r1[1]; -} -// vim: set autoindent noexpandtab tabstop=3 shiftwidth=3: + + dout[0] = r0[0]; + dout[1] = r0[1]; + dout[2] = r1[0]; + dout[3] = r1[1]; +} \ No newline at end of file -- cgit v1.1 From db6d95e7d30566d879253a09c7c318975689107d Mon Sep 17 00:00:00 2001 From: Jukka Ojanen Date: Fri, 14 Nov 2014 18:04:54 +0200 Subject: Take care of unreferenced parameters --- src/ffts_small.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'src/ffts_small.c') diff --git a/src/ffts_small.c b/src/ffts_small.c index 6f700c6..429991e 100644 --- a/src/ffts_small.c +++ b/src/ffts_small.c @@ -98,6 +98,9 @@ void ffts_firstpass_4_f(ffts_plan_t *p, const void *in, void *out) data_t *dout = (data_t*) out; cdata_t t0, t1, t2, t3, t4, t5, t6, t7; + /* unreferenced parameter */ + (void) p; + t0[0] = din[0]; t0[1] = din[1]; t1[0] = din[4]; @@ -132,6 +135,9 @@ void ffts_firstpass_4_b(ffts_plan_t *p, const void *in, void *out) data_t *dout = (data_t*) out; cdata_t t0, t1, t2, t3, t4, t5, t6, t7; + /* unreferenced parameter */ + (void) p; + t0[0] = din[0]; t0[1] = din[1]; t1[0] = din[4]; @@ -166,6 +172,9 @@ void ffts_firstpass_2(ffts_plan_t *p, const void *in, void *out) data_t *dout = (data_t*) out; cdata_t t0, t1, r0, r1; + /* unreferenced parameter */ + (void) p; + t0[0] = din[0]; t0[1] = din[1]; t1[0] = din[2]; -- cgit v1.1 From 71f1f4dae77c2f6b335c3e06c13a3ecedf73ccda Mon Sep 17 00:00:00 2001 From: Jukka Ojanen Date: Mon, 17 Nov 2014 15:39:46 +0200 Subject: Fix redefinition of ffts_plan_t --- src/ffts_small.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/ffts_small.c') diff --git a/src/ffts_small.c b/src/ffts_small.c index 429991e..8fa373f 100644 --- a/src/ffts_small.c +++ b/src/ffts_small.c @@ -32,6 +32,7 @@ */ #include "ffts_small.h" +#include "ffts_internal.h" #include "macros.h" #include -- cgit v1.1 From ebae52c72d3488d123c6ca9e31dfd95872d0575c Mon Sep 17 00:00:00 2001 From: Jukka Ojanen Date: Tue, 10 Mar 2015 12:19:38 +0200 Subject: Check existence of various headers and add guards for them --- src/ffts_small.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'src/ffts_small.c') diff --git a/src/ffts_small.c b/src/ffts_small.c index 8fa373f..ccc3ab0 100644 --- a/src/ffts_small.c +++ b/src/ffts_small.c @@ -35,8 +35,6 @@ #include "ffts_internal.h" #include "macros.h" -#include - void ffts_firstpass_16_f(ffts_plan_t *p, const void *in, void *out) { const data_t *din = (const data_t*) in; -- cgit v1.1 From 4cfaf45051e43c00ea9ac5ac996da246817e4c10 Mon Sep 17 00:00:00 2001 From: Jukka Ojanen Date: Thu, 12 Mar 2015 13:06:57 +0200 Subject: Initial steps to support double precision. Replace data_t with float, and cdata_t with ffts_cpx_32f. --- src/ffts_small.c | 374 ++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 276 insertions(+), 98 deletions(-) (limited to 'src/ffts_small.c') diff --git a/src/ffts_small.c b/src/ffts_small.c index ccc3ab0..34be7af 100644 --- a/src/ffts_small.c +++ b/src/ffts_small.c @@ -1,104 +1,140 @@ /* - This file is part of FFTS -- The Fastest Fourier Transform in the South - - Copyright (c) 2013, Michael J. Cree - Copyright (c) 2012, 2013, Anthony M. Blake - - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the organization nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY - DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +This file is part of FFTS -- The Fastest Fourier Transform in the South + +Copyright (c) 2013, Michael J. Cree +Copyright (c) 2012, 2013, Anthony M. Blake + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: +* Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +* Neither the name of the organization nor the +names of its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "ffts_small.h" + #include "ffts_internal.h" #include "macros.h" -void ffts_firstpass_16_f(ffts_plan_t *p, const void *in, void *out) +void +ffts_small_2_32f(ffts_plan_t *p, const void *in, void *out) { - const data_t *din = (const data_t*) in; - data_t *dout = (data_t*) out; - float *LUT8 = (float*) p->ws; - V r0_1, r2_3, r4_5, r6_7, r8_9, r10_11, r12_13, r14_15; + const float *din = (const float*) in; + float *dout = (float*) out; + ffts_cpx_32f t0, t1, r0, r1; - L_4_4(0, din+0, din+16, din+8, din+24, &r0_1, &r2_3, &r8_9, &r10_11); - L_2_4(0, din+4, din+20, din+28, din+12, &r4_5, &r6_7, &r14_15, &r12_13); - K_N(0, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7); - K_N(0, VLD(LUT8+8), VLD(LUT8+12), &r0_1, &r4_5, &r8_9, &r12_13); - S_4(r0_1, r4_5, r8_9, r12_13, dout+0, dout+8, dout+16, dout+24); - K_N(0, VLD(LUT8+16), VLD(LUT8+20), &r2_3, &r6_7, &r10_11, &r14_15); - S_4(r2_3, r6_7, r10_11, r14_15, dout+4, dout+12, dout+20, dout+28); -} + /* unreferenced parameter */ + (void) p; -void ffts_firstpass_16_b(ffts_plan_t *p, const void *in, void *out) -{ - const data_t *din = (const data_t*) in; - data_t *dout = (data_t*) out; - float *LUT8 = (float*) p->ws; - V r0_1, r2_3, r4_5, r6_7, r8_9, r10_11, r12_13, r14_15; + t0[0] = din[0]; + t0[1] = din[1]; + t1[0] = din[2]; + t1[1] = din[3]; - L_4_4(1, din+0, din+16, din+8, din+24, &r0_1, &r2_3, &r8_9, &r10_11); - L_2_4(1, din+4, din+20, din+28, din+12, &r4_5, &r6_7, &r14_15, &r12_13); - K_N(1, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7); - K_N(1, VLD(LUT8+8), VLD(LUT8+12),&r0_1, &r4_5, &r8_9, &r12_13); - S_4(r0_1, r4_5, r8_9, r12_13, dout+0, dout+8, dout+16, dout+24); - K_N(1, VLD(LUT8+16), VLD(LUT8+20), &r2_3, &r6_7, &r10_11, &r14_15); - S_4(r2_3, r6_7, r10_11, r14_15, dout+4, dout+12, dout+20, dout+28); + r0[0] = t0[0] + t1[0]; + r0[1] = t0[1] + t1[1]; + r1[0] = t0[0] - t1[0]; + r1[1] = t0[1] - t1[1]; + + dout[0] = r0[0]; + dout[1] = r0[1]; + dout[2] = r1[0]; + dout[3] = r1[1]; } -void ffts_firstpass_8_f(ffts_plan_t *p, const void *in, void *out) +void +ffts_small_2_64f(ffts_plan_t *p, const void *in, void *out) { - const data_t *din = (const data_t*) in; - data_t *dout = (data_t*) out; - V r0_1, r2_3, r4_5, r6_7; - float *LUT8 = (float*) p->ws + p->ws_is[0]; + const double *din = (const double*) in; + double *dout = (double*) out; + ffts_cpx_64f t0, t1, r0, r1; - L_4_2(0, din, din+8, din+4, din+12, &r0_1, &r2_3, &r4_5, &r6_7); - K_N(0, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7); - S_4(r0_1, r2_3, r4_5, r6_7, dout+0, dout+4, dout+8, dout+12); + /* unreferenced parameter */ + (void) p; + + t0[0] = din[0]; + t0[1] = din[1]; + t1[0] = din[2]; + t1[1] = din[3]; + + r0[0] = t0[0] + t1[0]; + r0[1] = t0[1] + t1[1]; + r1[0] = t0[0] - t1[0]; + r1[1] = t0[1] - t1[1]; + + dout[0] = r0[0]; + dout[1] = r0[1]; + dout[2] = r1[0]; + dout[3] = r1[1]; } -void ffts_firstpass_8_b(ffts_plan_t *p, const void *in, void *out) +void +ffts_small_forward4_32f(ffts_plan_t *p, const void *in, void *out) { - const data_t *din = (const data_t*) in; - data_t *dout = (data_t*) out; - V r0_1, r2_3, r4_5, r6_7; - float *LUT8 = (float*) p->ws + p->ws_is[0]; + const float *din = (const float*) in; + float *dout = (float*) out; + ffts_cpx_32f t0, t1, t2, t3, t4, t5, t6, t7; - L_4_2(1, din, din+8, din+4, din+12, &r0_1, &r2_3, &r4_5, &r6_7); - K_N(1, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7); - S_4(r0_1, r2_3, r4_5, r6_7, dout+0, dout+4, dout+8, dout+12); + /* unreferenced parameter */ + (void) p; + + t0[0] = din[0]; + t0[1] = din[1]; + t1[0] = din[4]; + t1[1] = din[5]; + t2[0] = din[2]; + t2[1] = din[3]; + t3[0] = din[6]; + t3[1] = din[7]; + + t4[0] = t0[0] + t1[0]; + t4[1] = t0[1] + t1[1]; + t5[0] = t0[0] - t1[0]; + t5[1] = t0[1] - t1[1]; + t6[0] = t2[0] + t3[0]; + t6[1] = t2[1] + t3[1]; + t7[0] = t2[0] - t3[0]; + t7[1] = t2[1] - t3[1]; + + dout[0] = t4[0] + t6[0]; + dout[1] = t4[1] + t6[1]; + dout[4] = t4[0] - t6[0]; + dout[5] = t4[1] - t6[1]; + dout[2] = t5[0] + t7[1]; + dout[3] = t5[1] - t7[0]; + dout[6] = t5[0] - t7[1]; + dout[7] = t5[1] + t7[0]; } -void ffts_firstpass_4_f(ffts_plan_t *p, const void *in, void *out) +void +ffts_small_forward4_64f(ffts_plan_t *p, const void *in, void *out) { - const data_t *din = (const data_t*) in; - data_t *dout = (data_t*) out; - cdata_t t0, t1, t2, t3, t4, t5, t6, t7; + const double *din = (const double*) in; + double *dout = (double*) out; + ffts_cpx_64f t0, t1, t2, t3, t4, t5, t6, t7; - /* unreferenced parameter */ - (void) p; + /* unreferenced parameter */ + (void) p; t0[0] = din[0]; t0[1] = din[1]; @@ -128,14 +164,15 @@ void ffts_firstpass_4_f(ffts_plan_t *p, const void *in, void *out) dout[7] = t5[1] + t7[0]; } -void ffts_firstpass_4_b(ffts_plan_t *p, const void *in, void *out) +void +ffts_small_backward4_32f(ffts_plan_t *p, const void *in, void *out) { - const data_t *din = (const data_t*) in; - data_t *dout = (data_t*) out; - cdata_t t0, t1, t2, t3, t4, t5, t6, t7; + const float *din = (const float*) in; + float *dout = (float*) out; + ffts_cpx_32f t0, t1, t2, t3, t4, t5, t6, t7; - /* unreferenced parameter */ - (void) p; + /* unreferenced parameter */ + (void) p; t0[0] = din[0]; t0[1] = din[1]; @@ -165,27 +202,168 @@ void ffts_firstpass_4_b(ffts_plan_t *p, const void *in, void *out) dout[7] = t5[1] - t7[0]; } -void ffts_firstpass_2(ffts_plan_t *p, const void *in, void *out) +void +ffts_small_backward4_64f(ffts_plan_t *p, const void *in, void *out) { - const data_t *din = (const data_t*) in; - data_t *dout = (data_t*) out; - cdata_t t0, t1, r0, r1; + const double *din = (const double*) in; + double *dout = (double*) out; + ffts_cpx_64f t0, t1, t2, t3, t4, t5, t6, t7; - /* unreferenced parameter */ - (void) p; + /* unreferenced parameter */ + (void) p; t0[0] = din[0]; t0[1] = din[1]; - t1[0] = din[2]; - t1[1] = din[3]; + t1[0] = din[4]; + t1[1] = din[5]; + t2[0] = din[2]; + t2[1] = din[3]; + t3[0] = din[6]; + t3[1] = din[7]; - r0[0] = t0[0] + t1[0]; - r0[1] = t0[1] + t1[1]; - r1[0] = t0[0] - t1[0]; - r1[1] = t0[1] - t1[1]; + t4[0] = t0[0] + t1[0]; + t4[1] = t0[1] + t1[1]; + t5[0] = t0[0] - t1[0]; + t5[1] = t0[1] - t1[1]; + t6[0] = t2[0] + t3[0]; + t6[1] = t2[1] + t3[1]; + t7[0] = t2[0] - t3[0]; + t7[1] = t2[1] - t3[1]; - dout[0] = r0[0]; - dout[1] = r0[1]; - dout[2] = r1[0]; - dout[3] = r1[1]; + dout[0] = t4[0] + t6[0]; + dout[1] = t4[1] + t6[1]; + dout[4] = t4[0] - t6[0]; + dout[5] = t4[1] - t6[1]; + dout[2] = t5[0] - t7[1]; + dout[3] = t5[1] + t7[0]; + dout[6] = t5[0] + t7[1]; + dout[7] = t5[1] - t7[0]; +} + +void +ffts_small_forward8_32f(ffts_plan_t *p, const void *in, void *out) +{ + const float *din = (const float*) in; + float *dout = (float*) out; + V r0_1, r2_3, r4_5, r6_7; + float *LUT8 = (float*) p->ws + p->ws_is[0]; + + L_4_2(0, din, din+8, din+4, din+12, &r0_1, &r2_3, &r4_5, &r6_7); + K_N(0, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7); + S_4(r0_1, r2_3, r4_5, r6_7, dout+0, dout+4, dout+8, dout+12); +} + +void +ffts_small_forward8_64f(ffts_plan_t *p, const void *in, void *out) +{ + const double *din = (const double*) in; + double *dout = (double*) out; + V r0_1, r2_3, r4_5, r6_7; + double *LUT8 = (double*) p->ws + p->ws_is[0]; + +#if MACROS_READY + L_4_2(0, din, din+8, din+4, din+12, &r0_1, &r2_3, &r4_5, &r6_7); + K_N(0, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7); + S_4(r0_1, r2_3, r4_5, r6_7, dout+0, dout+4, dout+8, dout+12); +#endif +} + +void +ffts_small_backward8_32f(ffts_plan_t *p, const void *in, void *out) +{ + const float *din = (const float*) in; + float *dout = (float*) out; + V r0_1, r2_3, r4_5, r6_7; + float *LUT8 = (float*) p->ws + p->ws_is[0]; + + L_4_2(1, din, din+8, din+4, din+12, &r0_1, &r2_3, &r4_5, &r6_7); + K_N(1, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7); + S_4(r0_1, r2_3, r4_5, r6_7, dout+0, dout+4, dout+8, dout+12); +} + +void +ffts_small_backward8_64f(ffts_plan_t *p, const void *in, void *out) +{ + const double *din = (const double*) in; + double *dout = (double*) out; + V r0_1, r2_3, r4_5, r6_7; + double *LUT8 = (double*) p->ws + p->ws_is[0]; + +#if MACROS_READY + L_4_2(1, din, din+8, din+4, din+12, &r0_1, &r2_3, &r4_5, &r6_7); + K_N(1, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7); + S_4(r0_1, r2_3, r4_5, r6_7, dout+0, dout+4, dout+8, dout+12); +#endif +} + +void +ffts_small_forward16_32f(ffts_plan_t *p, const void *in, void *out) +{ + const float *din = (const float*) in; + float *dout = (float*) out; + float *LUT8 = (float*) p->ws; + V r0_1, r2_3, r4_5, r6_7, r8_9, r10_11, r12_13, r14_15; + + L_4_4(0, din+0, din+16, din+8, din+24, &r0_1, &r2_3, &r8_9, &r10_11); + L_2_4(0, din+4, din+20, din+28, din+12, &r4_5, &r6_7, &r14_15, &r12_13); + K_N(0, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7); + K_N(0, VLD(LUT8+8), VLD(LUT8+12), &r0_1, &r4_5, &r8_9, &r12_13); + S_4(r0_1, r4_5, r8_9, r12_13, dout+0, dout+8, dout+16, dout+24); + K_N(0, VLD(LUT8+16), VLD(LUT8+20), &r2_3, &r6_7, &r10_11, &r14_15); + S_4(r2_3, r6_7, r10_11, r14_15, dout+4, dout+12, dout+20, dout+28); +} + +void +ffts_small_forward16_64f(ffts_plan_t *p, const void *in, void *out) +{ + const double *din = (const double*) in; + double *dout = (double*) out; + double *LUT8 = (double*) p->ws; + V r0_1, r2_3, r4_5, r6_7, r8_9, r10_11, r12_13, r14_15; + +#ifdef MACROS_READY + L_4_4(0, din+0, din+16, din+8, din+24, &r0_1, &r2_3, &r8_9, &r10_11); + L_2_4(0, din+4, din+20, din+28, din+12, &r4_5, &r6_7, &r14_15, &r12_13); + K_N(0, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7); + K_N(0, VLD(LUT8+8), VLD(LUT8+12), &r0_1, &r4_5, &r8_9, &r12_13); + S_4(r0_1, r4_5, r8_9, r12_13, dout+0, dout+8, dout+16, dout+24); + K_N(0, VLD(LUT8+16), VLD(LUT8+20), &r2_3, &r6_7, &r10_11, &r14_15); + S_4(r2_3, r6_7, r10_11, r14_15, dout+4, dout+12, dout+20, dout+28); +#endif +} + +void +ffts_small_backward16_32f(ffts_plan_t *p, const void *in, void *out) +{ + const float *din = (const float*) in; + float *dout = (float*) out; + float *LUT8 = (float*) p->ws; + V r0_1, r2_3, r4_5, r6_7, r8_9, r10_11, r12_13, r14_15; + + L_4_4(1, din+0, din+16, din+8, din+24, &r0_1, &r2_3, &r8_9, &r10_11); + L_2_4(1, din+4, din+20, din+28, din+12, &r4_5, &r6_7, &r14_15, &r12_13); + K_N(1, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7); + K_N(1, VLD(LUT8+8), VLD(LUT8+12),&r0_1, &r4_5, &r8_9, &r12_13); + S_4(r0_1, r4_5, r8_9, r12_13, dout+0, dout+8, dout+16, dout+24); + K_N(1, VLD(LUT8+16), VLD(LUT8+20), &r2_3, &r6_7, &r10_11, &r14_15); + S_4(r2_3, r6_7, r10_11, r14_15, dout+4, dout+12, dout+20, dout+28); +} + +void +ffts_small_backward16_64f(ffts_plan_t *p, const void *in, void *out) +{ + const double *din = (const double*) in; + double *dout = (double*) out; + double *LUT8 = (double*) p->ws; + V r0_1, r2_3, r4_5, r6_7, r8_9, r10_11, r12_13, r14_15; + +#ifdef MACROS_READY + L_4_4(1, din+0, din+16, din+8, din+24, &r0_1, &r2_3, &r8_9, &r10_11); + L_2_4(1, din+4, din+20, din+28, din+12, &r4_5, &r6_7, &r14_15, &r12_13); + K_N(1, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7); + K_N(1, VLD(LUT8+8), VLD(LUT8+12),&r0_1, &r4_5, &r8_9, &r12_13); + S_4(r0_1, r4_5, r8_9, r12_13, dout+0, dout+8, dout+16, dout+24); + K_N(1, VLD(LUT8+16), VLD(LUT8+20), &r2_3, &r6_7, &r10_11, &r14_15); + S_4(r2_3, r6_7, r10_11, r14_15, dout+4, dout+12, dout+20, dout+28); +#endif } \ No newline at end of file -- cgit v1.1 From 835c5ab5b3d9f3104959dc6722b4bad600eae8fe Mon Sep 17 00:00:00 2001 From: Jukka Ojanen Date: Thu, 12 Mar 2015 18:03:00 +0200 Subject: Rename vector V as V4SF; vector of 4 single precision floats. Rename all vector V macros accordingly. Redefine ffts_constants as ffts_constants_32f and ffts_constants_64f. --- src/ffts_small.c | 60 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 30 insertions(+), 30 deletions(-) (limited to 'src/ffts_small.c') diff --git a/src/ffts_small.c b/src/ffts_small.c index 34be7af..5bcbfc6 100644 --- a/src/ffts_small.c +++ b/src/ffts_small.c @@ -245,12 +245,12 @@ ffts_small_forward8_32f(ffts_plan_t *p, const void *in, void *out) { const float *din = (const float*) in; float *dout = (float*) out; - V r0_1, r2_3, r4_5, r6_7; + V4SF r0_1, r2_3, r4_5, r6_7; float *LUT8 = (float*) p->ws + p->ws_is[0]; - L_4_2(0, din, din+8, din+4, din+12, &r0_1, &r2_3, &r4_5, &r6_7); - K_N(0, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7); - S_4(r0_1, r2_3, r4_5, r6_7, dout+0, dout+4, dout+8, dout+12); + V4SF_L_4_2(0, din, din+8, din+4, din+12, &r0_1, &r2_3, &r4_5, &r6_7); + V4SF_K_N(0, V4SF_LD(LUT8), V4SF_LD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7); + V4SF_S_4(r0_1, r2_3, r4_5, r6_7, dout+0, dout+4, dout+8, dout+12); } void @@ -258,7 +258,7 @@ ffts_small_forward8_64f(ffts_plan_t *p, const void *in, void *out) { const double *din = (const double*) in; double *dout = (double*) out; - V r0_1, r2_3, r4_5, r6_7; + V4SF r0_1, r2_3, r4_5, r6_7; double *LUT8 = (double*) p->ws + p->ws_is[0]; #if MACROS_READY @@ -273,12 +273,12 @@ ffts_small_backward8_32f(ffts_plan_t *p, const void *in, void *out) { const float *din = (const float*) in; float *dout = (float*) out; - V r0_1, r2_3, r4_5, r6_7; + V4SF r0_1, r2_3, r4_5, r6_7; float *LUT8 = (float*) p->ws + p->ws_is[0]; - L_4_2(1, din, din+8, din+4, din+12, &r0_1, &r2_3, &r4_5, &r6_7); - K_N(1, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7); - S_4(r0_1, r2_3, r4_5, r6_7, dout+0, dout+4, dout+8, dout+12); + V4SF_L_4_2(1, din, din+8, din+4, din+12, &r0_1, &r2_3, &r4_5, &r6_7); + V4SF_K_N(1, V4SF_LD(LUT8), V4SF_LD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7); + V4SF_S_4(r0_1, r2_3, r4_5, r6_7, dout+0, dout+4, dout+8, dout+12); } void @@ -286,7 +286,7 @@ ffts_small_backward8_64f(ffts_plan_t *p, const void *in, void *out) { const double *din = (const double*) in; double *dout = (double*) out; - V r0_1, r2_3, r4_5, r6_7; + V4SF r0_1, r2_3, r4_5, r6_7; double *LUT8 = (double*) p->ws + p->ws_is[0]; #if MACROS_READY @@ -302,15 +302,15 @@ ffts_small_forward16_32f(ffts_plan_t *p, const void *in, void *out) const float *din = (const float*) in; float *dout = (float*) out; float *LUT8 = (float*) p->ws; - V r0_1, r2_3, r4_5, r6_7, r8_9, r10_11, r12_13, r14_15; - - L_4_4(0, din+0, din+16, din+8, din+24, &r0_1, &r2_3, &r8_9, &r10_11); - L_2_4(0, din+4, din+20, din+28, din+12, &r4_5, &r6_7, &r14_15, &r12_13); - K_N(0, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7); - K_N(0, VLD(LUT8+8), VLD(LUT8+12), &r0_1, &r4_5, &r8_9, &r12_13); - S_4(r0_1, r4_5, r8_9, r12_13, dout+0, dout+8, dout+16, dout+24); - K_N(0, VLD(LUT8+16), VLD(LUT8+20), &r2_3, &r6_7, &r10_11, &r14_15); - S_4(r2_3, r6_7, r10_11, r14_15, dout+4, dout+12, dout+20, dout+28); + V4SF r0_1, r2_3, r4_5, r6_7, r8_9, r10_11, r12_13, r14_15; + + V4SF_L_4_4(0, din+0, din+16, din+8, din+24, &r0_1, &r2_3, &r8_9, &r10_11); + V4SF_L_2_4(0, din+4, din+20, din+28, din+12, &r4_5, &r6_7, &r14_15, &r12_13); + V4SF_K_N(0, V4SF_LD(LUT8), V4SF_LD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7); + V4SF_K_N(0, V4SF_LD(LUT8+8), V4SF_LD(LUT8+12), &r0_1, &r4_5, &r8_9, &r12_13); + V4SF_S_4(r0_1, r4_5, r8_9, r12_13, dout+0, dout+8, dout+16, dout+24); + V4SF_K_N(0, V4SF_LD(LUT8+16), V4SF_LD(LUT8+20), &r2_3, &r6_7, &r10_11, &r14_15); + V4SF_S_4(r2_3, r6_7, r10_11, r14_15, dout+4, dout+12, dout+20, dout+28); } void @@ -319,7 +319,7 @@ ffts_small_forward16_64f(ffts_plan_t *p, const void *in, void *out) const double *din = (const double*) in; double *dout = (double*) out; double *LUT8 = (double*) p->ws; - V r0_1, r2_3, r4_5, r6_7, r8_9, r10_11, r12_13, r14_15; + V4SF r0_1, r2_3, r4_5, r6_7, r8_9, r10_11, r12_13, r14_15; #ifdef MACROS_READY L_4_4(0, din+0, din+16, din+8, din+24, &r0_1, &r2_3, &r8_9, &r10_11); @@ -338,15 +338,15 @@ ffts_small_backward16_32f(ffts_plan_t *p, const void *in, void *out) const float *din = (const float*) in; float *dout = (float*) out; float *LUT8 = (float*) p->ws; - V r0_1, r2_3, r4_5, r6_7, r8_9, r10_11, r12_13, r14_15; - - L_4_4(1, din+0, din+16, din+8, din+24, &r0_1, &r2_3, &r8_9, &r10_11); - L_2_4(1, din+4, din+20, din+28, din+12, &r4_5, &r6_7, &r14_15, &r12_13); - K_N(1, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7); - K_N(1, VLD(LUT8+8), VLD(LUT8+12),&r0_1, &r4_5, &r8_9, &r12_13); - S_4(r0_1, r4_5, r8_9, r12_13, dout+0, dout+8, dout+16, dout+24); - K_N(1, VLD(LUT8+16), VLD(LUT8+20), &r2_3, &r6_7, &r10_11, &r14_15); - S_4(r2_3, r6_7, r10_11, r14_15, dout+4, dout+12, dout+20, dout+28); + V4SF r0_1, r2_3, r4_5, r6_7, r8_9, r10_11, r12_13, r14_15; + + V4SF_L_4_4(1, din+0, din+16, din+8, din+24, &r0_1, &r2_3, &r8_9, &r10_11); + V4SF_L_2_4(1, din+4, din+20, din+28, din+12, &r4_5, &r6_7, &r14_15, &r12_13); + V4SF_K_N(1, V4SF_LD(LUT8+ 0), V4SF_LD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7); + V4SF_K_N(1, V4SF_LD(LUT8+ 8), V4SF_LD(LUT8+12), &r0_1, &r4_5, &r8_9, &r12_13); + V4SF_S_4(r0_1, r4_5, r8_9, r12_13, dout+0, dout+8, dout+16, dout+24); + V4SF_K_N(1, V4SF_LD(LUT8+16), V4SF_LD(LUT8+20), &r2_3, &r6_7, &r10_11, &r14_15); + V4SF_S_4(r2_3, r6_7, r10_11, r14_15, dout+4, dout+12, dout+20, dout+28); } void @@ -355,7 +355,7 @@ ffts_small_backward16_64f(ffts_plan_t *p, const void *in, void *out) const double *din = (const double*) in; double *dout = (double*) out; double *LUT8 = (double*) p->ws; - V r0_1, r2_3, r4_5, r6_7, r8_9, r10_11, r12_13, r14_15; + V4SF r0_1, r2_3, r4_5, r6_7, r8_9, r10_11, r12_13, r14_15; #ifdef MACROS_READY L_4_4(1, din+0, din+16, din+8, din+24, &r0_1, &r2_3, &r8_9, &r10_11); -- cgit v1.1 From 1ea951f98b7f35c42c49394a75ae3b8cf3e62dfe Mon Sep 17 00:00:00 2001 From: Jukka Ojanen Date: Mon, 16 Mar 2015 13:21:23 +0200 Subject: Merge ffts_small with ffts_static, and define small transforms "fully" constant --- src/ffts_small.c | 369 ------------------------------------------------------- 1 file changed, 369 deletions(-) delete mode 100644 src/ffts_small.c (limited to 'src/ffts_small.c') diff --git a/src/ffts_small.c b/src/ffts_small.c deleted file mode 100644 index 5bcbfc6..0000000 --- a/src/ffts_small.c +++ /dev/null @@ -1,369 +0,0 @@ -/* - -This file is part of FFTS -- The Fastest Fourier Transform in the South - -Copyright (c) 2013, Michael J. Cree -Copyright (c) 2012, 2013, Anthony M. Blake - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: -* Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. -* Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. -* Neither the name of the organization nor the -names of its contributors may be used to endorse or promote products -derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY -DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#include "ffts_small.h" - -#include "ffts_internal.h" -#include "macros.h" - -void -ffts_small_2_32f(ffts_plan_t *p, const void *in, void *out) -{ - const float *din = (const float*) in; - float *dout = (float*) out; - ffts_cpx_32f t0, t1, r0, r1; - - /* unreferenced parameter */ - (void) p; - - t0[0] = din[0]; - t0[1] = din[1]; - t1[0] = din[2]; - t1[1] = din[3]; - - r0[0] = t0[0] + t1[0]; - r0[1] = t0[1] + t1[1]; - r1[0] = t0[0] - t1[0]; - r1[1] = t0[1] - t1[1]; - - dout[0] = r0[0]; - dout[1] = r0[1]; - dout[2] = r1[0]; - dout[3] = r1[1]; -} - -void -ffts_small_2_64f(ffts_plan_t *p, const void *in, void *out) -{ - const double *din = (const double*) in; - double *dout = (double*) out; - ffts_cpx_64f t0, t1, r0, r1; - - /* unreferenced parameter */ - (void) p; - - t0[0] = din[0]; - t0[1] = din[1]; - t1[0] = din[2]; - t1[1] = din[3]; - - r0[0] = t0[0] + t1[0]; - r0[1] = t0[1] + t1[1]; - r1[0] = t0[0] - t1[0]; - r1[1] = t0[1] - t1[1]; - - dout[0] = r0[0]; - dout[1] = r0[1]; - dout[2] = r1[0]; - dout[3] = r1[1]; -} - -void -ffts_small_forward4_32f(ffts_plan_t *p, const void *in, void *out) -{ - const float *din = (const float*) in; - float *dout = (float*) out; - ffts_cpx_32f t0, t1, t2, t3, t4, t5, t6, t7; - - /* unreferenced parameter */ - (void) p; - - t0[0] = din[0]; - t0[1] = din[1]; - t1[0] = din[4]; - t1[1] = din[5]; - t2[0] = din[2]; - t2[1] = din[3]; - t3[0] = din[6]; - t3[1] = din[7]; - - t4[0] = t0[0] + t1[0]; - t4[1] = t0[1] + t1[1]; - t5[0] = t0[0] - t1[0]; - t5[1] = t0[1] - t1[1]; - t6[0] = t2[0] + t3[0]; - t6[1] = t2[1] + t3[1]; - t7[0] = t2[0] - t3[0]; - t7[1] = t2[1] - t3[1]; - - dout[0] = t4[0] + t6[0]; - dout[1] = t4[1] + t6[1]; - dout[4] = t4[0] - t6[0]; - dout[5] = t4[1] - t6[1]; - dout[2] = t5[0] + t7[1]; - dout[3] = t5[1] - t7[0]; - dout[6] = t5[0] - t7[1]; - dout[7] = t5[1] + t7[0]; -} - -void -ffts_small_forward4_64f(ffts_plan_t *p, const void *in, void *out) -{ - const double *din = (const double*) in; - double *dout = (double*) out; - ffts_cpx_64f t0, t1, t2, t3, t4, t5, t6, t7; - - /* unreferenced parameter */ - (void) p; - - t0[0] = din[0]; - t0[1] = din[1]; - t1[0] = din[4]; - t1[1] = din[5]; - t2[0] = din[2]; - t2[1] = din[3]; - t3[0] = din[6]; - t3[1] = din[7]; - - t4[0] = t0[0] + t1[0]; - t4[1] = t0[1] + t1[1]; - t5[0] = t0[0] - t1[0]; - t5[1] = t0[1] - t1[1]; - t6[0] = t2[0] + t3[0]; - t6[1] = t2[1] + t3[1]; - t7[0] = t2[0] - t3[0]; - t7[1] = t2[1] - t3[1]; - - dout[0] = t4[0] + t6[0]; - dout[1] = t4[1] + t6[1]; - dout[4] = t4[0] - t6[0]; - dout[5] = t4[1] - t6[1]; - dout[2] = t5[0] + t7[1]; - dout[3] = t5[1] - t7[0]; - dout[6] = t5[0] - t7[1]; - dout[7] = t5[1] + t7[0]; -} - -void -ffts_small_backward4_32f(ffts_plan_t *p, const void *in, void *out) -{ - const float *din = (const float*) in; - float *dout = (float*) out; - ffts_cpx_32f t0, t1, t2, t3, t4, t5, t6, t7; - - /* unreferenced parameter */ - (void) p; - - t0[0] = din[0]; - t0[1] = din[1]; - t1[0] = din[4]; - t1[1] = din[5]; - t2[0] = din[2]; - t2[1] = din[3]; - t3[0] = din[6]; - t3[1] = din[7]; - - t4[0] = t0[0] + t1[0]; - t4[1] = t0[1] + t1[1]; - t5[0] = t0[0] - t1[0]; - t5[1] = t0[1] - t1[1]; - t6[0] = t2[0] + t3[0]; - t6[1] = t2[1] + t3[1]; - t7[0] = t2[0] - t3[0]; - t7[1] = t2[1] - t3[1]; - - dout[0] = t4[0] + t6[0]; - dout[1] = t4[1] + t6[1]; - dout[4] = t4[0] - t6[0]; - dout[5] = t4[1] - t6[1]; - dout[2] = t5[0] - t7[1]; - dout[3] = t5[1] + t7[0]; - dout[6] = t5[0] + t7[1]; - dout[7] = t5[1] - t7[0]; -} - -void -ffts_small_backward4_64f(ffts_plan_t *p, const void *in, void *out) -{ - const double *din = (const double*) in; - double *dout = (double*) out; - ffts_cpx_64f t0, t1, t2, t3, t4, t5, t6, t7; - - /* unreferenced parameter */ - (void) p; - - t0[0] = din[0]; - t0[1] = din[1]; - t1[0] = din[4]; - t1[1] = din[5]; - t2[0] = din[2]; - t2[1] = din[3]; - t3[0] = din[6]; - t3[1] = din[7]; - - t4[0] = t0[0] + t1[0]; - t4[1] = t0[1] + t1[1]; - t5[0] = t0[0] - t1[0]; - t5[1] = t0[1] - t1[1]; - t6[0] = t2[0] + t3[0]; - t6[1] = t2[1] + t3[1]; - t7[0] = t2[0] - t3[0]; - t7[1] = t2[1] - t3[1]; - - dout[0] = t4[0] + t6[0]; - dout[1] = t4[1] + t6[1]; - dout[4] = t4[0] - t6[0]; - dout[5] = t4[1] - t6[1]; - dout[2] = t5[0] - t7[1]; - dout[3] = t5[1] + t7[0]; - dout[6] = t5[0] + t7[1]; - dout[7] = t5[1] - t7[0]; -} - -void -ffts_small_forward8_32f(ffts_plan_t *p, const void *in, void *out) -{ - const float *din = (const float*) in; - float *dout = (float*) out; - V4SF r0_1, r2_3, r4_5, r6_7; - float *LUT8 = (float*) p->ws + p->ws_is[0]; - - V4SF_L_4_2(0, din, din+8, din+4, din+12, &r0_1, &r2_3, &r4_5, &r6_7); - V4SF_K_N(0, V4SF_LD(LUT8), V4SF_LD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7); - V4SF_S_4(r0_1, r2_3, r4_5, r6_7, dout+0, dout+4, dout+8, dout+12); -} - -void -ffts_small_forward8_64f(ffts_plan_t *p, const void *in, void *out) -{ - const double *din = (const double*) in; - double *dout = (double*) out; - V4SF r0_1, r2_3, r4_5, r6_7; - double *LUT8 = (double*) p->ws + p->ws_is[0]; - -#if MACROS_READY - L_4_2(0, din, din+8, din+4, din+12, &r0_1, &r2_3, &r4_5, &r6_7); - K_N(0, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7); - S_4(r0_1, r2_3, r4_5, r6_7, dout+0, dout+4, dout+8, dout+12); -#endif -} - -void -ffts_small_backward8_32f(ffts_plan_t *p, const void *in, void *out) -{ - const float *din = (const float*) in; - float *dout = (float*) out; - V4SF r0_1, r2_3, r4_5, r6_7; - float *LUT8 = (float*) p->ws + p->ws_is[0]; - - V4SF_L_4_2(1, din, din+8, din+4, din+12, &r0_1, &r2_3, &r4_5, &r6_7); - V4SF_K_N(1, V4SF_LD(LUT8), V4SF_LD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7); - V4SF_S_4(r0_1, r2_3, r4_5, r6_7, dout+0, dout+4, dout+8, dout+12); -} - -void -ffts_small_backward8_64f(ffts_plan_t *p, const void *in, void *out) -{ - const double *din = (const double*) in; - double *dout = (double*) out; - V4SF r0_1, r2_3, r4_5, r6_7; - double *LUT8 = (double*) p->ws + p->ws_is[0]; - -#if MACROS_READY - L_4_2(1, din, din+8, din+4, din+12, &r0_1, &r2_3, &r4_5, &r6_7); - K_N(1, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7); - S_4(r0_1, r2_3, r4_5, r6_7, dout+0, dout+4, dout+8, dout+12); -#endif -} - -void -ffts_small_forward16_32f(ffts_plan_t *p, const void *in, void *out) -{ - const float *din = (const float*) in; - float *dout = (float*) out; - float *LUT8 = (float*) p->ws; - V4SF r0_1, r2_3, r4_5, r6_7, r8_9, r10_11, r12_13, r14_15; - - V4SF_L_4_4(0, din+0, din+16, din+8, din+24, &r0_1, &r2_3, &r8_9, &r10_11); - V4SF_L_2_4(0, din+4, din+20, din+28, din+12, &r4_5, &r6_7, &r14_15, &r12_13); - V4SF_K_N(0, V4SF_LD(LUT8), V4SF_LD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7); - V4SF_K_N(0, V4SF_LD(LUT8+8), V4SF_LD(LUT8+12), &r0_1, &r4_5, &r8_9, &r12_13); - V4SF_S_4(r0_1, r4_5, r8_9, r12_13, dout+0, dout+8, dout+16, dout+24); - V4SF_K_N(0, V4SF_LD(LUT8+16), V4SF_LD(LUT8+20), &r2_3, &r6_7, &r10_11, &r14_15); - V4SF_S_4(r2_3, r6_7, r10_11, r14_15, dout+4, dout+12, dout+20, dout+28); -} - -void -ffts_small_forward16_64f(ffts_plan_t *p, const void *in, void *out) -{ - const double *din = (const double*) in; - double *dout = (double*) out; - double *LUT8 = (double*) p->ws; - V4SF r0_1, r2_3, r4_5, r6_7, r8_9, r10_11, r12_13, r14_15; - -#ifdef MACROS_READY - L_4_4(0, din+0, din+16, din+8, din+24, &r0_1, &r2_3, &r8_9, &r10_11); - L_2_4(0, din+4, din+20, din+28, din+12, &r4_5, &r6_7, &r14_15, &r12_13); - K_N(0, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7); - K_N(0, VLD(LUT8+8), VLD(LUT8+12), &r0_1, &r4_5, &r8_9, &r12_13); - S_4(r0_1, r4_5, r8_9, r12_13, dout+0, dout+8, dout+16, dout+24); - K_N(0, VLD(LUT8+16), VLD(LUT8+20), &r2_3, &r6_7, &r10_11, &r14_15); - S_4(r2_3, r6_7, r10_11, r14_15, dout+4, dout+12, dout+20, dout+28); -#endif -} - -void -ffts_small_backward16_32f(ffts_plan_t *p, const void *in, void *out) -{ - const float *din = (const float*) in; - float *dout = (float*) out; - float *LUT8 = (float*) p->ws; - V4SF r0_1, r2_3, r4_5, r6_7, r8_9, r10_11, r12_13, r14_15; - - V4SF_L_4_4(1, din+0, din+16, din+8, din+24, &r0_1, &r2_3, &r8_9, &r10_11); - V4SF_L_2_4(1, din+4, din+20, din+28, din+12, &r4_5, &r6_7, &r14_15, &r12_13); - V4SF_K_N(1, V4SF_LD(LUT8+ 0), V4SF_LD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7); - V4SF_K_N(1, V4SF_LD(LUT8+ 8), V4SF_LD(LUT8+12), &r0_1, &r4_5, &r8_9, &r12_13); - V4SF_S_4(r0_1, r4_5, r8_9, r12_13, dout+0, dout+8, dout+16, dout+24); - V4SF_K_N(1, V4SF_LD(LUT8+16), V4SF_LD(LUT8+20), &r2_3, &r6_7, &r10_11, &r14_15); - V4SF_S_4(r2_3, r6_7, r10_11, r14_15, dout+4, dout+12, dout+20, dout+28); -} - -void -ffts_small_backward16_64f(ffts_plan_t *p, const void *in, void *out) -{ - const double *din = (const double*) in; - double *dout = (double*) out; - double *LUT8 = (double*) p->ws; - V4SF r0_1, r2_3, r4_5, r6_7, r8_9, r10_11, r12_13, r14_15; - -#ifdef MACROS_READY - L_4_4(1, din+0, din+16, din+8, din+24, &r0_1, &r2_3, &r8_9, &r10_11); - L_2_4(1, din+4, din+20, din+28, din+12, &r4_5, &r6_7, &r14_15, &r12_13); - K_N(1, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7); - K_N(1, VLD(LUT8+8), VLD(LUT8+12),&r0_1, &r4_5, &r8_9, &r12_13); - S_4(r0_1, r4_5, r8_9, r12_13, dout+0, dout+8, dout+16, dout+24); - K_N(1, VLD(LUT8+16), VLD(LUT8+20), &r2_3, &r6_7, &r10_11, &r14_15); - S_4(r2_3, r6_7, r10_11, r14_15, dout+4, dout+12, dout+20, dout+28); -#endif -} \ No newline at end of file -- cgit v1.1