diff options
-rw-r--r-- | src/ffts.c | 44 | ||||
-rw-r--r-- | src/ffts_small.c | 374 | ||||
-rw-r--r-- | src/ffts_small.h | 85 | ||||
-rw-r--r-- | src/macros.h | 45 | ||||
-rw-r--r-- | src/types.h | 10 |
5 files changed, 413 insertions, 145 deletions
@@ -207,7 +207,7 @@ static int ffts_generate_luts(ffts_plan_t *p, size_t N, size_t leaf_N, int sign) int hardcoded; size_t lut_size; size_t n_luts; - cdata_t *w; + ffts_cpx_32f *w; size_t i; size_t n; @@ -243,19 +243,19 @@ static int ffts_generate_luts(ffts_plan_t *p, size_t N, size_t leaf_N, int sign) if (!i || hardcoded) { #if defined(__arm__) && !defined(DYNAMIC_DISABLED) if (N <= 32) { - lut_size += n/4 * 2 * sizeof(cdata_t); + lut_size += n/4 * 2 * sizeof(ffts_cpx_32f); } else { - lut_size += n/4 * sizeof(cdata_t); + lut_size += n/4 * sizeof(ffts_cpx_32f); } #else - lut_size += n/4 * 2 * sizeof(cdata_t); + lut_size += n/4 * 2 * sizeof(ffts_cpx_32f); #endif n *= 2; } else { #if defined(__arm__) && !defined(DYNAMIC_DISABLED) - lut_size += n/8 * 3 * sizeof(cdata_t); + lut_size += n/8 * 3 * sizeof(ffts_cpx_32f); #else - lut_size += n/8 * 3 * 2 * sizeof(cdata_t); + lut_size += n/8 * 3 * 2 * sizeof(ffts_cpx_32f); #endif } n *= 2; @@ -289,11 +289,11 @@ static int ffts_generate_luts(ffts_plan_t *p, size_t N, size_t leaf_N, int sign) #endif for (i = 0; i < n_luts; i++) { - p->ws_is[i] = w - (cdata_t *)p->ws; + p->ws_is[i] = w - (ffts_cpx_32f*) p->ws; //fprintf(stderr, "LUT[%zu] = %d @ %08x - %zu\n", i, n, w, p->ws_is[i]); if(!i || hardcoded) { - cdata_t *w0 = FFTS_MALLOC(n/4 * sizeof(cdata_t), 32); + ffts_cpx_32f *w0 = FFTS_MALLOC(n/4 * sizeof(ffts_cpx_32f), 32); float *fw0 = (float*) w0; float *fw = (float *)w; @@ -305,7 +305,7 @@ static int ffts_generate_luts(ffts_plan_t *p, size_t N, size_t leaf_N, int sign) #if defined(__arm__) && !defined(DYNAMIC_DISABLED) if (N < 32) { - // w = FFTS_MALLOC(n/4 * 2 * sizeof(cdata_t), 32); + // w = FFTS_MALLOC(n/4 * 2 * sizeof(ffts_cpx_32f), 32); float *fw = (float *)w; V temp0, temp1, temp2; for (j=0; j<n/4; j+=2) { @@ -326,7 +326,7 @@ static int ffts_generate_luts(ffts_plan_t *p, size_t N, size_t leaf_N, int sign) } w += n/4 * 2; } else { - //w = FFTS_MALLOC(n/4 * sizeof(cdata_t), 32); + //w = FFTS_MALLOC(n/4 * sizeof(ffts_cpx_32f), 32); float *fw = (float *)w; #ifdef HAVE_NEON { @@ -346,7 +346,7 @@ static int ffts_generate_luts(ffts_plan_t *p, size_t N, size_t leaf_N, int sign) w += n/4; } #else - //w = FFTS_MALLOC(n/4 * 2 * sizeof(cdata_t), 32); + //w = FFTS_MALLOC(n/4 * 2 * sizeof(ffts_cpx_32f), 32); for (j = 0; j < n/4; j += 2) { V re, im, temp0; temp0 = VLD(fw0 + j*2); @@ -362,9 +362,9 @@ static int ffts_generate_luts(ffts_plan_t *p, size_t N, size_t leaf_N, int sign) FFTS_FREE(w0); } else { - cdata_t *w0 = (cdata_t*) FFTS_MALLOC(n/8 * sizeof(cdata_t), 32); - cdata_t *w1 = (cdata_t*) FFTS_MALLOC(n/8 * sizeof(cdata_t), 32); - cdata_t *w2 = (cdata_t*) FFTS_MALLOC(n/8 * sizeof(cdata_t), 32); + ffts_cpx_32f *w0 = (ffts_cpx_32f*) FFTS_MALLOC(n/8 * sizeof(ffts_cpx_32f), 32); + ffts_cpx_32f *w1 = (ffts_cpx_32f*) FFTS_MALLOC(n/8 * sizeof(ffts_cpx_32f), 32); + ffts_cpx_32f *w2 = (ffts_cpx_32f*) FFTS_MALLOC(n/8 * sizeof(ffts_cpx_32f), 32); float *fw0 = (float*) w0; float *fw1 = (float*) w1; @@ -411,7 +411,7 @@ static int ffts_generate_luts(ffts_plan_t *p, size_t N, size_t leaf_N, int sign) #endif w += n/8 * 3; #else - //w = FFTS_MALLOC(n/8 * 3 * 2 * sizeof(cdata_t), 32); + //w = FFTS_MALLOC(n/8 * 3 * 2 * sizeof(ffts_cpx_32f), 32); for (j = 0; j < n/8; j += 2) { temp0 = VLD(fw0 + j*2); re = VDUPRE(temp0); @@ -560,28 +560,28 @@ ffts_plan_t *ffts_init_1d(size_t N, int sign) } else { switch (N) { case 2: - p->transform = &ffts_firstpass_2; + p->transform = &ffts_small_2_32f; break; case 4: if (sign == -1) { - p->transform = &ffts_firstpass_4_f; + p->transform = &ffts_small_forward4_32f; } else if (sign == 1) { - p->transform = &ffts_firstpass_4_b; + p->transform = &ffts_small_backward4_32f; } break; case 8: if (sign == -1) { - p->transform = &ffts_firstpass_8_f; + p->transform = &ffts_small_forward8_32f; } else if (sign == 1) { - p->transform = &ffts_firstpass_8_b; + p->transform = &ffts_small_backward8_32f; } break; case 16: default: if (sign == -1) { - p->transform = &ffts_firstpass_16_f; + p->transform = &ffts_small_forward16_32f; } else { - p->transform = &ffts_firstpass_16_b; + p->transform = &ffts_small_backward16_32f; } break; } diff --git a/src/ffts_small.c b/src/ffts_small.c index ccc3ab0..34be7af 100644 --- a/src/ffts_small.c +++ b/src/ffts_small.c @@ -1,104 +1,140 @@ /* - This file is part of FFTS -- The Fastest Fourier Transform in the South - - Copyright (c) 2013, Michael J. Cree <mcree@orcon.net.nz> - Copyright (c) 2012, 2013, Anthony M. Blake <amb@anthonix.com> - - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the organization nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY - DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +This file is part of FFTS -- The Fastest Fourier Transform in the South + +Copyright (c) 2013, Michael J. Cree <mcree@orcon.net.nz> +Copyright (c) 2012, 2013, Anthony M. Blake <amb@anthonix.com> + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: +* Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +* Neither the name of the organization nor the +names of its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "ffts_small.h" + #include "ffts_internal.h" #include "macros.h" -void ffts_firstpass_16_f(ffts_plan_t *p, const void *in, void *out) +void +ffts_small_2_32f(ffts_plan_t *p, const void *in, void *out) { - const data_t *din = (const data_t*) in; - data_t *dout = (data_t*) out; - float *LUT8 = (float*) p->ws; - V r0_1, r2_3, r4_5, r6_7, r8_9, r10_11, r12_13, r14_15; + const float *din = (const float*) in; + float *dout = (float*) out; + ffts_cpx_32f t0, t1, r0, r1; - L_4_4(0, din+0, din+16, din+8, din+24, &r0_1, &r2_3, &r8_9, &r10_11); - L_2_4(0, din+4, din+20, din+28, din+12, &r4_5, &r6_7, &r14_15, &r12_13); - K_N(0, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7); - K_N(0, VLD(LUT8+8), VLD(LUT8+12), &r0_1, &r4_5, &r8_9, &r12_13); - S_4(r0_1, r4_5, r8_9, r12_13, dout+0, dout+8, dout+16, dout+24); - K_N(0, VLD(LUT8+16), VLD(LUT8+20), &r2_3, &r6_7, &r10_11, &r14_15); - S_4(r2_3, r6_7, r10_11, r14_15, dout+4, dout+12, dout+20, dout+28); -} + /* unreferenced parameter */ + (void) p; -void ffts_firstpass_16_b(ffts_plan_t *p, const void *in, void *out) -{ - const data_t *din = (const data_t*) in; - data_t *dout = (data_t*) out; - float *LUT8 = (float*) p->ws; - V r0_1, r2_3, r4_5, r6_7, r8_9, r10_11, r12_13, r14_15; + t0[0] = din[0]; + t0[1] = din[1]; + t1[0] = din[2]; + t1[1] = din[3]; - L_4_4(1, din+0, din+16, din+8, din+24, &r0_1, &r2_3, &r8_9, &r10_11); - L_2_4(1, din+4, din+20, din+28, din+12, &r4_5, &r6_7, &r14_15, &r12_13); - K_N(1, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7); - K_N(1, VLD(LUT8+8), VLD(LUT8+12),&r0_1, &r4_5, &r8_9, &r12_13); - S_4(r0_1, r4_5, r8_9, r12_13, dout+0, dout+8, dout+16, dout+24); - K_N(1, VLD(LUT8+16), VLD(LUT8+20), &r2_3, &r6_7, &r10_11, &r14_15); - S_4(r2_3, r6_7, r10_11, r14_15, dout+4, dout+12, dout+20, dout+28); + r0[0] = t0[0] + t1[0]; + r0[1] = t0[1] + t1[1]; + r1[0] = t0[0] - t1[0]; + r1[1] = t0[1] - t1[1]; + + dout[0] = r0[0]; + dout[1] = r0[1]; + dout[2] = r1[0]; + dout[3] = r1[1]; } -void ffts_firstpass_8_f(ffts_plan_t *p, const void *in, void *out) +void +ffts_small_2_64f(ffts_plan_t *p, const void *in, void *out) { - const data_t *din = (const data_t*) in; - data_t *dout = (data_t*) out; - V r0_1, r2_3, r4_5, r6_7; - float *LUT8 = (float*) p->ws + p->ws_is[0]; + const double *din = (const double*) in; + double *dout = (double*) out; + ffts_cpx_64f t0, t1, r0, r1; - L_4_2(0, din, din+8, din+4, din+12, &r0_1, &r2_3, &r4_5, &r6_7); - K_N(0, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7); - S_4(r0_1, r2_3, r4_5, r6_7, dout+0, dout+4, dout+8, dout+12); + /* unreferenced parameter */ + (void) p; + + t0[0] = din[0]; + t0[1] = din[1]; + t1[0] = din[2]; + t1[1] = din[3]; + + r0[0] = t0[0] + t1[0]; + r0[1] = t0[1] + t1[1]; + r1[0] = t0[0] - t1[0]; + r1[1] = t0[1] - t1[1]; + + dout[0] = r0[0]; + dout[1] = r0[1]; + dout[2] = r1[0]; + dout[3] = r1[1]; } -void ffts_firstpass_8_b(ffts_plan_t *p, const void *in, void *out) +void +ffts_small_forward4_32f(ffts_plan_t *p, const void *in, void *out) { - const data_t *din = (const data_t*) in; - data_t *dout = (data_t*) out; - V r0_1, r2_3, r4_5, r6_7; - float *LUT8 = (float*) p->ws + p->ws_is[0]; + const float *din = (const float*) in; + float *dout = (float*) out; + ffts_cpx_32f t0, t1, t2, t3, t4, t5, t6, t7; - L_4_2(1, din, din+8, din+4, din+12, &r0_1, &r2_3, &r4_5, &r6_7); - K_N(1, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7); - S_4(r0_1, r2_3, r4_5, r6_7, dout+0, dout+4, dout+8, dout+12); + /* unreferenced parameter */ + (void) p; + + t0[0] = din[0]; + t0[1] = din[1]; + t1[0] = din[4]; + t1[1] = din[5]; + t2[0] = din[2]; + t2[1] = din[3]; + t3[0] = din[6]; + t3[1] = din[7]; + + t4[0] = t0[0] + t1[0]; + t4[1] = t0[1] + t1[1]; + t5[0] = t0[0] - t1[0]; + t5[1] = t0[1] - t1[1]; + t6[0] = t2[0] + t3[0]; + t6[1] = t2[1] + t3[1]; + t7[0] = t2[0] - t3[0]; + t7[1] = t2[1] - t3[1]; + + dout[0] = t4[0] + t6[0]; + dout[1] = t4[1] + t6[1]; + dout[4] = t4[0] - t6[0]; + dout[5] = t4[1] - t6[1]; + dout[2] = t5[0] + t7[1]; + dout[3] = t5[1] - t7[0]; + dout[6] = t5[0] - t7[1]; + dout[7] = t5[1] + t7[0]; } -void ffts_firstpass_4_f(ffts_plan_t *p, const void *in, void *out) +void +ffts_small_forward4_64f(ffts_plan_t *p, const void *in, void *out) { - const data_t *din = (const data_t*) in; - data_t *dout = (data_t*) out; - cdata_t t0, t1, t2, t3, t4, t5, t6, t7; + const double *din = (const double*) in; + double *dout = (double*) out; + ffts_cpx_64f t0, t1, t2, t3, t4, t5, t6, t7; - /* unreferenced parameter */ - (void) p; + /* unreferenced parameter */ + (void) p; t0[0] = din[0]; t0[1] = din[1]; @@ -128,14 +164,15 @@ void ffts_firstpass_4_f(ffts_plan_t *p, const void *in, void *out) dout[7] = t5[1] + t7[0]; } -void ffts_firstpass_4_b(ffts_plan_t *p, const void *in, void *out) +void +ffts_small_backward4_32f(ffts_plan_t *p, const void *in, void *out) { - const data_t *din = (const data_t*) in; - data_t *dout = (data_t*) out; - cdata_t t0, t1, t2, t3, t4, t5, t6, t7; + const float *din = (const float*) in; + float *dout = (float*) out; + ffts_cpx_32f t0, t1, t2, t3, t4, t5, t6, t7; - /* unreferenced parameter */ - (void) p; + /* unreferenced parameter */ + (void) p; t0[0] = din[0]; t0[1] = din[1]; @@ -165,27 +202,168 @@ void ffts_firstpass_4_b(ffts_plan_t *p, const void *in, void *out) dout[7] = t5[1] - t7[0]; } -void ffts_firstpass_2(ffts_plan_t *p, const void *in, void *out) +void +ffts_small_backward4_64f(ffts_plan_t *p, const void *in, void *out) { - const data_t *din = (const data_t*) in; - data_t *dout = (data_t*) out; - cdata_t t0, t1, r0, r1; + const double *din = (const double*) in; + double *dout = (double*) out; + ffts_cpx_64f t0, t1, t2, t3, t4, t5, t6, t7; - /* unreferenced parameter */ - (void) p; + /* unreferenced parameter */ + (void) p; t0[0] = din[0]; t0[1] = din[1]; - t1[0] = din[2]; - t1[1] = din[3]; + t1[0] = din[4]; + t1[1] = din[5]; + t2[0] = din[2]; + t2[1] = din[3]; + t3[0] = din[6]; + t3[1] = din[7]; - r0[0] = t0[0] + t1[0]; - r0[1] = t0[1] + t1[1]; - r1[0] = t0[0] - t1[0]; - r1[1] = t0[1] - t1[1]; + t4[0] = t0[0] + t1[0]; + t4[1] = t0[1] + t1[1]; + t5[0] = t0[0] - t1[0]; + t5[1] = t0[1] - t1[1]; + t6[0] = t2[0] + t3[0]; + t6[1] = t2[1] + t3[1]; + t7[0] = t2[0] - t3[0]; + t7[1] = t2[1] - t3[1]; - dout[0] = r0[0]; - dout[1] = r0[1]; - dout[2] = r1[0]; - dout[3] = r1[1]; + dout[0] = t4[0] + t6[0]; + dout[1] = t4[1] + t6[1]; + dout[4] = t4[0] - t6[0]; + dout[5] = t4[1] - t6[1]; + dout[2] = t5[0] - t7[1]; + dout[3] = t5[1] + t7[0]; + dout[6] = t5[0] + t7[1]; + dout[7] = t5[1] - t7[0]; +} + +void +ffts_small_forward8_32f(ffts_plan_t *p, const void *in, void *out) +{ + const float *din = (const float*) in; + float *dout = (float*) out; + V r0_1, r2_3, r4_5, r6_7; + float *LUT8 = (float*) p->ws + p->ws_is[0]; + + L_4_2(0, din, din+8, din+4, din+12, &r0_1, &r2_3, &r4_5, &r6_7); + K_N(0, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7); + S_4(r0_1, r2_3, r4_5, r6_7, dout+0, dout+4, dout+8, dout+12); +} + +void +ffts_small_forward8_64f(ffts_plan_t *p, const void *in, void *out) +{ + const double *din = (const double*) in; + double *dout = (double*) out; + V r0_1, r2_3, r4_5, r6_7; + double *LUT8 = (double*) p->ws + p->ws_is[0]; + +#if MACROS_READY + L_4_2(0, din, din+8, din+4, din+12, &r0_1, &r2_3, &r4_5, &r6_7); + K_N(0, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7); + S_4(r0_1, r2_3, r4_5, r6_7, dout+0, dout+4, dout+8, dout+12); +#endif +} + +void +ffts_small_backward8_32f(ffts_plan_t *p, const void *in, void *out) +{ + const float *din = (const float*) in; + float *dout = (float*) out; + V r0_1, r2_3, r4_5, r6_7; + float *LUT8 = (float*) p->ws + p->ws_is[0]; + + L_4_2(1, din, din+8, din+4, din+12, &r0_1, &r2_3, &r4_5, &r6_7); + K_N(1, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7); + S_4(r0_1, r2_3, r4_5, r6_7, dout+0, dout+4, dout+8, dout+12); +} + +void +ffts_small_backward8_64f(ffts_plan_t *p, const void *in, void *out) +{ + const double *din = (const double*) in; + double *dout = (double*) out; + V r0_1, r2_3, r4_5, r6_7; + double *LUT8 = (double*) p->ws + p->ws_is[0]; + +#if MACROS_READY + L_4_2(1, din, din+8, din+4, din+12, &r0_1, &r2_3, &r4_5, &r6_7); + K_N(1, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7); + S_4(r0_1, r2_3, r4_5, r6_7, dout+0, dout+4, dout+8, dout+12); +#endif +} + +void +ffts_small_forward16_32f(ffts_plan_t *p, const void *in, void *out) +{ + const float *din = (const float*) in; + float *dout = (float*) out; + float *LUT8 = (float*) p->ws; + V r0_1, r2_3, r4_5, r6_7, r8_9, r10_11, r12_13, r14_15; + + L_4_4(0, din+0, din+16, din+8, din+24, &r0_1, &r2_3, &r8_9, &r10_11); + L_2_4(0, din+4, din+20, din+28, din+12, &r4_5, &r6_7, &r14_15, &r12_13); + K_N(0, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7); + K_N(0, VLD(LUT8+8), VLD(LUT8+12), &r0_1, &r4_5, &r8_9, &r12_13); + S_4(r0_1, r4_5, r8_9, r12_13, dout+0, dout+8, dout+16, dout+24); + K_N(0, VLD(LUT8+16), VLD(LUT8+20), &r2_3, &r6_7, &r10_11, &r14_15); + S_4(r2_3, r6_7, r10_11, r14_15, dout+4, dout+12, dout+20, dout+28); +} + +void +ffts_small_forward16_64f(ffts_plan_t *p, const void *in, void *out) +{ + const double *din = (const double*) in; + double *dout = (double*) out; + double *LUT8 = (double*) p->ws; + V r0_1, r2_3, r4_5, r6_7, r8_9, r10_11, r12_13, r14_15; + +#ifdef MACROS_READY + L_4_4(0, din+0, din+16, din+8, din+24, &r0_1, &r2_3, &r8_9, &r10_11); + L_2_4(0, din+4, din+20, din+28, din+12, &r4_5, &r6_7, &r14_15, &r12_13); + K_N(0, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7); + K_N(0, VLD(LUT8+8), VLD(LUT8+12), &r0_1, &r4_5, &r8_9, &r12_13); + S_4(r0_1, r4_5, r8_9, r12_13, dout+0, dout+8, dout+16, dout+24); + K_N(0, VLD(LUT8+16), VLD(LUT8+20), &r2_3, &r6_7, &r10_11, &r14_15); + S_4(r2_3, r6_7, r10_11, r14_15, dout+4, dout+12, dout+20, dout+28); +#endif +} + +void +ffts_small_backward16_32f(ffts_plan_t *p, const void *in, void *out) +{ + const float *din = (const float*) in; + float *dout = (float*) out; + float *LUT8 = (float*) p->ws; + V r0_1, r2_3, r4_5, r6_7, r8_9, r10_11, r12_13, r14_15; + + L_4_4(1, din+0, din+16, din+8, din+24, &r0_1, &r2_3, &r8_9, &r10_11); + L_2_4(1, din+4, din+20, din+28, din+12, &r4_5, &r6_7, &r14_15, &r12_13); + K_N(1, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7); + K_N(1, VLD(LUT8+8), VLD(LUT8+12),&r0_1, &r4_5, &r8_9, &r12_13); + S_4(r0_1, r4_5, r8_9, r12_13, dout+0, dout+8, dout+16, dout+24); + K_N(1, VLD(LUT8+16), VLD(LUT8+20), &r2_3, &r6_7, &r10_11, &r14_15); + S_4(r2_3, r6_7, r10_11, r14_15, dout+4, dout+12, dout+20, dout+28); +} + +void +ffts_small_backward16_64f(ffts_plan_t *p, const void *in, void *out) +{ + const double *din = (const double*) in; + double *dout = (double*) out; + double *LUT8 = (double*) p->ws; + V r0_1, r2_3, r4_5, r6_7, r8_9, r10_11, r12_13, r14_15; + +#ifdef MACROS_READY + L_4_4(1, din+0, din+16, din+8, din+24, &r0_1, &r2_3, &r8_9, &r10_11); + L_2_4(1, din+4, din+20, din+28, din+12, &r4_5, &r6_7, &r14_15, &r12_13); + K_N(1, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7); + K_N(1, VLD(LUT8+8), VLD(LUT8+12),&r0_1, &r4_5, &r8_9, &r12_13); + S_4(r0_1, r4_5, r8_9, r12_13, dout+0, dout+8, dout+16, dout+24); + K_N(1, VLD(LUT8+16), VLD(LUT8+20), &r2_3, &r6_7, &r10_11, &r14_15); + S_4(r2_3, r6_7, r10_11, r14_15, dout+4, dout+12, dout+20, dout+28); +#endif }
\ No newline at end of file diff --git a/src/ffts_small.h b/src/ffts_small.h index 5ae48cc..249dcc9 100644 --- a/src/ffts_small.h +++ b/src/ffts_small.h @@ -1,14 +1,85 @@ +/* + +This file is part of FFTS -- The Fastest Fourier Transform in the South + +Copyright (c) 2013, Michael J. Cree <mcree@orcon.net.nz> +Copyright (c) 2012, 2013, Anthony M. Blake <amb@anthonix.com> + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: +* Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +* Neither the name of the organization nor the +names of its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + #ifndef FFTS_SMALL_H #define FFTS_SMALL_H +#if defined (_MSC_VER) && (_MSC_VER >= 1020) +#pragma once +#endif + #include "ffts.h" -void ffts_firstpass_16_f(ffts_plan_t *p, const void *in, void *out); -void ffts_firstpass_16_b(ffts_plan_t *p, const void *in, void *out); -void ffts_firstpass_8_f(ffts_plan_t *p, const void *in, void *out); -void ffts_firstpass_8_b(ffts_plan_t *p, const void *in, void *out); -void ffts_firstpass_4_f(ffts_plan_t *p, const void *in, void *out); -void ffts_firstpass_4_b(ffts_plan_t *p, const void *in, void *out); -void ffts_firstpass_2(ffts_plan_t *p, const void *in, void *out); +void +ffts_small_2_32f(ffts_plan_t *p, const void *in, void *out); + +void +ffts_small_2_64f(ffts_plan_t *p, const void *in, void *out); + +void +ffts_small_forward4_32f(ffts_plan_t *p, const void *in, void *out); + +void +ffts_small_forward4_64f(ffts_plan_t *p, const void *in, void *out); + +void +ffts_small_backward4_32f(ffts_plan_t *p, const void *in, void *out); + +void +ffts_small_backward4_64f(ffts_plan_t *p, const void *in, void *out); + +void +ffts_small_forward8_32f(ffts_plan_t *p, const void *in, void *out); + +void +ffts_small_forward8_64f(ffts_plan_t *p, const void *in, void *out); + +void +ffts_small_backward8_32f(ffts_plan_t *p, const void *in, void *out); + +void +ffts_small_backward8_64f(ffts_plan_t *p, const void *in, void *out); + +void +ffts_small_forward16_32f(ffts_plan_t *p, const void *in, void *out); + +void +ffts_small_forward16_64f(ffts_plan_t *p, const void *in, void *out); + +void +ffts_small_backward16_32f(ffts_plan_t *p, const void *in, void *out); + +void +ffts_small_backward16_64f(ffts_plan_t *p, const void *in, void *out); #endif /* FFTS_SMALL_H */ diff --git a/src/macros.h b/src/macros.h index b4a6a5a..fc53ae4 100644 --- a/src/macros.h +++ b/src/macros.h @@ -48,7 +48,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "macros-alpha.h" #endif -static FFTS_INLINE void TX2(V *a, V *b) +static FFTS_INLINE void +TX2(V *a, V *b) { V TX2_t0 = VUNPACKLO(*a, *b); V TX2_t1 = VUNPACKHI(*a, *b); @@ -56,7 +57,8 @@ static FFTS_INLINE void TX2(V *a, V *b) *b = TX2_t1; } -static FFTS_INLINE void K_N(int inv, V re, V im, V *r0, V *r1, V *r2, V *r3) +static FFTS_INLINE void +K_N(int inv, V re, V im, V *r0, V *r1, V *r2, V *r3) { V uk, uk2, zk_p, zk_n, zk, zk_d; @@ -75,9 +77,16 @@ static FFTS_INLINE void K_N(int inv, V re, V im, V *r0, V *r1, V *r2, V *r3) *r1 = VSUB(uk2, zk_d); } -static FFTS_INLINE void L_2_4(int inv, const data_t* FFTS_RESTRICT i0, const data_t* FFTS_RESTRICT i1, - const data_t* FFTS_RESTRICT i2, const data_t* FFTS_RESTRICT i3, - V *r0, V *r1, V *r2, V *r3) +static FFTS_INLINE void +L_2_4(int inv, + const float *FFTS_RESTRICT i0, + const float *FFTS_RESTRICT i1, + const float *FFTS_RESTRICT i2, + const float *FFTS_RESTRICT i3, + V *r0, + V *r1, + V *r2, + V *r3) { V t0, t1, t2, t3, t4, t5, t6, t7; @@ -105,9 +114,16 @@ static FFTS_INLINE void L_2_4(int inv, const data_t* FFTS_RESTRICT i0, const dat *r2 = VUNPACKHI(t2, t3); } -static FFTS_INLINE void L_4_4(int inv, const data_t* FFTS_RESTRICT i0, const data_t* FFTS_RESTRICT i1, - const data_t* FFTS_RESTRICT i2, const data_t* FFTS_RESTRICT i3, - V *r0, V *r1, V *r2, V *r3) +static FFTS_INLINE void +L_4_4(int inv, + const float *FFTS_RESTRICT i0, + const float *FFTS_RESTRICT i1, + const float *FFTS_RESTRICT i2, + const float *FFTS_RESTRICT i3, + V *r0, + V *r1, + V *r2, + V *r3) { V t0, t1, t2, t3, t4, t5, t6, t7; @@ -136,9 +152,16 @@ static FFTS_INLINE void L_4_4(int inv, const data_t* FFTS_RESTRICT i0, const dat *r3 = t3; } -static FFTS_INLINE void L_4_2(int inv, const data_t * FFTS_RESTRICT i0, const data_t * FFTS_RESTRICT i1, - const data_t * FFTS_RESTRICT i2, const data_t * FFTS_RESTRICT i3, - V *r0, V *r1, V *r2, V *r3) +static FFTS_INLINE void +L_4_2(int inv, + const float *FFTS_RESTRICT i0, + const float *FFTS_RESTRICT i1, + const float *FFTS_RESTRICT i2, + const float *FFTS_RESTRICT i3, + V *r0, + V *r1, + V *r2, + V *r3) { V t0, t1, t2, t3, t4, t5, t6, t7; diff --git a/src/types.h b/src/types.h index 749d387..f8997ce 100644 --- a/src/types.h +++ b/src/types.h @@ -38,12 +38,8 @@ #pragma once #endif -#if defined(_Complex_I) && defined(complex) && defined(I) -typedef complex float cdata_t; -#else -typedef float cdata_t[2]; -#endif - -typedef float data_t; +/* Define complex number as two element array */ +typedef float ffts_cpx_32f[2]; +typedef double ffts_cpx_64f[2]; #endif /* FFTS_TYPES_H */ |