summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/ffts.c44
-rw-r--r--src/ffts_small.c374
-rw-r--r--src/ffts_small.h85
-rw-r--r--src/macros.h45
-rw-r--r--src/types.h10
5 files changed, 413 insertions, 145 deletions
diff --git a/src/ffts.c b/src/ffts.c
index 4474a9f..94d6f1b 100644
--- a/src/ffts.c
+++ b/src/ffts.c
@@ -207,7 +207,7 @@ static int ffts_generate_luts(ffts_plan_t *p, size_t N, size_t leaf_N, int sign)
int hardcoded;
size_t lut_size;
size_t n_luts;
- cdata_t *w;
+ ffts_cpx_32f *w;
size_t i;
size_t n;
@@ -243,19 +243,19 @@ static int ffts_generate_luts(ffts_plan_t *p, size_t N, size_t leaf_N, int sign)
if (!i || hardcoded) {
#if defined(__arm__) && !defined(DYNAMIC_DISABLED)
if (N <= 32) {
- lut_size += n/4 * 2 * sizeof(cdata_t);
+ lut_size += n/4 * 2 * sizeof(ffts_cpx_32f);
} else {
- lut_size += n/4 * sizeof(cdata_t);
+ lut_size += n/4 * sizeof(ffts_cpx_32f);
}
#else
- lut_size += n/4 * 2 * sizeof(cdata_t);
+ lut_size += n/4 * 2 * sizeof(ffts_cpx_32f);
#endif
n *= 2;
} else {
#if defined(__arm__) && !defined(DYNAMIC_DISABLED)
- lut_size += n/8 * 3 * sizeof(cdata_t);
+ lut_size += n/8 * 3 * sizeof(ffts_cpx_32f);
#else
- lut_size += n/8 * 3 * 2 * sizeof(cdata_t);
+ lut_size += n/8 * 3 * 2 * sizeof(ffts_cpx_32f);
#endif
}
n *= 2;
@@ -289,11 +289,11 @@ static int ffts_generate_luts(ffts_plan_t *p, size_t N, size_t leaf_N, int sign)
#endif
for (i = 0; i < n_luts; i++) {
- p->ws_is[i] = w - (cdata_t *)p->ws;
+ p->ws_is[i] = w - (ffts_cpx_32f*) p->ws;
//fprintf(stderr, "LUT[%zu] = %d @ %08x - %zu\n", i, n, w, p->ws_is[i]);
if(!i || hardcoded) {
- cdata_t *w0 = FFTS_MALLOC(n/4 * sizeof(cdata_t), 32);
+ ffts_cpx_32f *w0 = FFTS_MALLOC(n/4 * sizeof(ffts_cpx_32f), 32);
float *fw0 = (float*) w0;
float *fw = (float *)w;
@@ -305,7 +305,7 @@ static int ffts_generate_luts(ffts_plan_t *p, size_t N, size_t leaf_N, int sign)
#if defined(__arm__) && !defined(DYNAMIC_DISABLED)
if (N < 32) {
- // w = FFTS_MALLOC(n/4 * 2 * sizeof(cdata_t), 32);
+ // w = FFTS_MALLOC(n/4 * 2 * sizeof(ffts_cpx_32f), 32);
float *fw = (float *)w;
V temp0, temp1, temp2;
for (j=0; j<n/4; j+=2) {
@@ -326,7 +326,7 @@ static int ffts_generate_luts(ffts_plan_t *p, size_t N, size_t leaf_N, int sign)
}
w += n/4 * 2;
} else {
- //w = FFTS_MALLOC(n/4 * sizeof(cdata_t), 32);
+ //w = FFTS_MALLOC(n/4 * sizeof(ffts_cpx_32f), 32);
float *fw = (float *)w;
#ifdef HAVE_NEON
{
@@ -346,7 +346,7 @@ static int ffts_generate_luts(ffts_plan_t *p, size_t N, size_t leaf_N, int sign)
w += n/4;
}
#else
- //w = FFTS_MALLOC(n/4 * 2 * sizeof(cdata_t), 32);
+ //w = FFTS_MALLOC(n/4 * 2 * sizeof(ffts_cpx_32f), 32);
for (j = 0; j < n/4; j += 2) {
V re, im, temp0;
temp0 = VLD(fw0 + j*2);
@@ -362,9 +362,9 @@ static int ffts_generate_luts(ffts_plan_t *p, size_t N, size_t leaf_N, int sign)
FFTS_FREE(w0);
} else {
- cdata_t *w0 = (cdata_t*) FFTS_MALLOC(n/8 * sizeof(cdata_t), 32);
- cdata_t *w1 = (cdata_t*) FFTS_MALLOC(n/8 * sizeof(cdata_t), 32);
- cdata_t *w2 = (cdata_t*) FFTS_MALLOC(n/8 * sizeof(cdata_t), 32);
+ ffts_cpx_32f *w0 = (ffts_cpx_32f*) FFTS_MALLOC(n/8 * sizeof(ffts_cpx_32f), 32);
+ ffts_cpx_32f *w1 = (ffts_cpx_32f*) FFTS_MALLOC(n/8 * sizeof(ffts_cpx_32f), 32);
+ ffts_cpx_32f *w2 = (ffts_cpx_32f*) FFTS_MALLOC(n/8 * sizeof(ffts_cpx_32f), 32);
float *fw0 = (float*) w0;
float *fw1 = (float*) w1;
@@ -411,7 +411,7 @@ static int ffts_generate_luts(ffts_plan_t *p, size_t N, size_t leaf_N, int sign)
#endif
w += n/8 * 3;
#else
- //w = FFTS_MALLOC(n/8 * 3 * 2 * sizeof(cdata_t), 32);
+ //w = FFTS_MALLOC(n/8 * 3 * 2 * sizeof(ffts_cpx_32f), 32);
for (j = 0; j < n/8; j += 2) {
temp0 = VLD(fw0 + j*2);
re = VDUPRE(temp0);
@@ -560,28 +560,28 @@ ffts_plan_t *ffts_init_1d(size_t N, int sign)
} else {
switch (N) {
case 2:
- p->transform = &ffts_firstpass_2;
+ p->transform = &ffts_small_2_32f;
break;
case 4:
if (sign == -1) {
- p->transform = &ffts_firstpass_4_f;
+ p->transform = &ffts_small_forward4_32f;
} else if (sign == 1) {
- p->transform = &ffts_firstpass_4_b;
+ p->transform = &ffts_small_backward4_32f;
}
break;
case 8:
if (sign == -1) {
- p->transform = &ffts_firstpass_8_f;
+ p->transform = &ffts_small_forward8_32f;
} else if (sign == 1) {
- p->transform = &ffts_firstpass_8_b;
+ p->transform = &ffts_small_backward8_32f;
}
break;
case 16:
default:
if (sign == -1) {
- p->transform = &ffts_firstpass_16_f;
+ p->transform = &ffts_small_forward16_32f;
} else {
- p->transform = &ffts_firstpass_16_b;
+ p->transform = &ffts_small_backward16_32f;
}
break;
}
diff --git a/src/ffts_small.c b/src/ffts_small.c
index ccc3ab0..34be7af 100644
--- a/src/ffts_small.c
+++ b/src/ffts_small.c
@@ -1,104 +1,140 @@
/*
- This file is part of FFTS -- The Fastest Fourier Transform in the South
-
- Copyright (c) 2013, Michael J. Cree <mcree@orcon.net.nz>
- Copyright (c) 2012, 2013, Anthony M. Blake <amb@anthonix.com>
-
- All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are met:
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name of the organization nor the
- names of its contributors may be used to endorse or promote products
- derived from this software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
- DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+This file is part of FFTS -- The Fastest Fourier Transform in the South
+
+Copyright (c) 2013, Michael J. Cree <mcree@orcon.net.nz>
+Copyright (c) 2012, 2013, Anthony M. Blake <amb@anthonix.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+* Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+* Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+* Neither the name of the organization nor the
+names of its contributors may be used to endorse or promote products
+derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "ffts_small.h"
+
#include "ffts_internal.h"
#include "macros.h"
-void ffts_firstpass_16_f(ffts_plan_t *p, const void *in, void *out)
+void
+ffts_small_2_32f(ffts_plan_t *p, const void *in, void *out)
{
- const data_t *din = (const data_t*) in;
- data_t *dout = (data_t*) out;
- float *LUT8 = (float*) p->ws;
- V r0_1, r2_3, r4_5, r6_7, r8_9, r10_11, r12_13, r14_15;
+ const float *din = (const float*) in;
+ float *dout = (float*) out;
+ ffts_cpx_32f t0, t1, r0, r1;
- L_4_4(0, din+0, din+16, din+8, din+24, &r0_1, &r2_3, &r8_9, &r10_11);
- L_2_4(0, din+4, din+20, din+28, din+12, &r4_5, &r6_7, &r14_15, &r12_13);
- K_N(0, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7);
- K_N(0, VLD(LUT8+8), VLD(LUT8+12), &r0_1, &r4_5, &r8_9, &r12_13);
- S_4(r0_1, r4_5, r8_9, r12_13, dout+0, dout+8, dout+16, dout+24);
- K_N(0, VLD(LUT8+16), VLD(LUT8+20), &r2_3, &r6_7, &r10_11, &r14_15);
- S_4(r2_3, r6_7, r10_11, r14_15, dout+4, dout+12, dout+20, dout+28);
-}
+ /* unreferenced parameter */
+ (void) p;
-void ffts_firstpass_16_b(ffts_plan_t *p, const void *in, void *out)
-{
- const data_t *din = (const data_t*) in;
- data_t *dout = (data_t*) out;
- float *LUT8 = (float*) p->ws;
- V r0_1, r2_3, r4_5, r6_7, r8_9, r10_11, r12_13, r14_15;
+ t0[0] = din[0];
+ t0[1] = din[1];
+ t1[0] = din[2];
+ t1[1] = din[3];
- L_4_4(1, din+0, din+16, din+8, din+24, &r0_1, &r2_3, &r8_9, &r10_11);
- L_2_4(1, din+4, din+20, din+28, din+12, &r4_5, &r6_7, &r14_15, &r12_13);
- K_N(1, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7);
- K_N(1, VLD(LUT8+8), VLD(LUT8+12),&r0_1, &r4_5, &r8_9, &r12_13);
- S_4(r0_1, r4_5, r8_9, r12_13, dout+0, dout+8, dout+16, dout+24);
- K_N(1, VLD(LUT8+16), VLD(LUT8+20), &r2_3, &r6_7, &r10_11, &r14_15);
- S_4(r2_3, r6_7, r10_11, r14_15, dout+4, dout+12, dout+20, dout+28);
+ r0[0] = t0[0] + t1[0];
+ r0[1] = t0[1] + t1[1];
+ r1[0] = t0[0] - t1[0];
+ r1[1] = t0[1] - t1[1];
+
+ dout[0] = r0[0];
+ dout[1] = r0[1];
+ dout[2] = r1[0];
+ dout[3] = r1[1];
}
-void ffts_firstpass_8_f(ffts_plan_t *p, const void *in, void *out)
+void
+ffts_small_2_64f(ffts_plan_t *p, const void *in, void *out)
{
- const data_t *din = (const data_t*) in;
- data_t *dout = (data_t*) out;
- V r0_1, r2_3, r4_5, r6_7;
- float *LUT8 = (float*) p->ws + p->ws_is[0];
+ const double *din = (const double*) in;
+ double *dout = (double*) out;
+ ffts_cpx_64f t0, t1, r0, r1;
- L_4_2(0, din, din+8, din+4, din+12, &r0_1, &r2_3, &r4_5, &r6_7);
- K_N(0, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7);
- S_4(r0_1, r2_3, r4_5, r6_7, dout+0, dout+4, dout+8, dout+12);
+ /* unreferenced parameter */
+ (void) p;
+
+ t0[0] = din[0];
+ t0[1] = din[1];
+ t1[0] = din[2];
+ t1[1] = din[3];
+
+ r0[0] = t0[0] + t1[0];
+ r0[1] = t0[1] + t1[1];
+ r1[0] = t0[0] - t1[0];
+ r1[1] = t0[1] - t1[1];
+
+ dout[0] = r0[0];
+ dout[1] = r0[1];
+ dout[2] = r1[0];
+ dout[3] = r1[1];
}
-void ffts_firstpass_8_b(ffts_plan_t *p, const void *in, void *out)
+void
+ffts_small_forward4_32f(ffts_plan_t *p, const void *in, void *out)
{
- const data_t *din = (const data_t*) in;
- data_t *dout = (data_t*) out;
- V r0_1, r2_3, r4_5, r6_7;
- float *LUT8 = (float*) p->ws + p->ws_is[0];
+ const float *din = (const float*) in;
+ float *dout = (float*) out;
+ ffts_cpx_32f t0, t1, t2, t3, t4, t5, t6, t7;
- L_4_2(1, din, din+8, din+4, din+12, &r0_1, &r2_3, &r4_5, &r6_7);
- K_N(1, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7);
- S_4(r0_1, r2_3, r4_5, r6_7, dout+0, dout+4, dout+8, dout+12);
+ /* unreferenced parameter */
+ (void) p;
+
+ t0[0] = din[0];
+ t0[1] = din[1];
+ t1[0] = din[4];
+ t1[1] = din[5];
+ t2[0] = din[2];
+ t2[1] = din[3];
+ t3[0] = din[6];
+ t3[1] = din[7];
+
+ t4[0] = t0[0] + t1[0];
+ t4[1] = t0[1] + t1[1];
+ t5[0] = t0[0] - t1[0];
+ t5[1] = t0[1] - t1[1];
+ t6[0] = t2[0] + t3[0];
+ t6[1] = t2[1] + t3[1];
+ t7[0] = t2[0] - t3[0];
+ t7[1] = t2[1] - t3[1];
+
+ dout[0] = t4[0] + t6[0];
+ dout[1] = t4[1] + t6[1];
+ dout[4] = t4[0] - t6[0];
+ dout[5] = t4[1] - t6[1];
+ dout[2] = t5[0] + t7[1];
+ dout[3] = t5[1] - t7[0];
+ dout[6] = t5[0] - t7[1];
+ dout[7] = t5[1] + t7[0];
}
-void ffts_firstpass_4_f(ffts_plan_t *p, const void *in, void *out)
+void
+ffts_small_forward4_64f(ffts_plan_t *p, const void *in, void *out)
{
- const data_t *din = (const data_t*) in;
- data_t *dout = (data_t*) out;
- cdata_t t0, t1, t2, t3, t4, t5, t6, t7;
+ const double *din = (const double*) in;
+ double *dout = (double*) out;
+ ffts_cpx_64f t0, t1, t2, t3, t4, t5, t6, t7;
- /* unreferenced parameter */
- (void) p;
+ /* unreferenced parameter */
+ (void) p;
t0[0] = din[0];
t0[1] = din[1];
@@ -128,14 +164,15 @@ void ffts_firstpass_4_f(ffts_plan_t *p, const void *in, void *out)
dout[7] = t5[1] + t7[0];
}
-void ffts_firstpass_4_b(ffts_plan_t *p, const void *in, void *out)
+void
+ffts_small_backward4_32f(ffts_plan_t *p, const void *in, void *out)
{
- const data_t *din = (const data_t*) in;
- data_t *dout = (data_t*) out;
- cdata_t t0, t1, t2, t3, t4, t5, t6, t7;
+ const float *din = (const float*) in;
+ float *dout = (float*) out;
+ ffts_cpx_32f t0, t1, t2, t3, t4, t5, t6, t7;
- /* unreferenced parameter */
- (void) p;
+ /* unreferenced parameter */
+ (void) p;
t0[0] = din[0];
t0[1] = din[1];
@@ -165,27 +202,168 @@ void ffts_firstpass_4_b(ffts_plan_t *p, const void *in, void *out)
dout[7] = t5[1] - t7[0];
}
-void ffts_firstpass_2(ffts_plan_t *p, const void *in, void *out)
+void
+ffts_small_backward4_64f(ffts_plan_t *p, const void *in, void *out)
{
- const data_t *din = (const data_t*) in;
- data_t *dout = (data_t*) out;
- cdata_t t0, t1, r0, r1;
+ const double *din = (const double*) in;
+ double *dout = (double*) out;
+ ffts_cpx_64f t0, t1, t2, t3, t4, t5, t6, t7;
- /* unreferenced parameter */
- (void) p;
+ /* unreferenced parameter */
+ (void) p;
t0[0] = din[0];
t0[1] = din[1];
- t1[0] = din[2];
- t1[1] = din[3];
+ t1[0] = din[4];
+ t1[1] = din[5];
+ t2[0] = din[2];
+ t2[1] = din[3];
+ t3[0] = din[6];
+ t3[1] = din[7];
- r0[0] = t0[0] + t1[0];
- r0[1] = t0[1] + t1[1];
- r1[0] = t0[0] - t1[0];
- r1[1] = t0[1] - t1[1];
+ t4[0] = t0[0] + t1[0];
+ t4[1] = t0[1] + t1[1];
+ t5[0] = t0[0] - t1[0];
+ t5[1] = t0[1] - t1[1];
+ t6[0] = t2[0] + t3[0];
+ t6[1] = t2[1] + t3[1];
+ t7[0] = t2[0] - t3[0];
+ t7[1] = t2[1] - t3[1];
- dout[0] = r0[0];
- dout[1] = r0[1];
- dout[2] = r1[0];
- dout[3] = r1[1];
+ dout[0] = t4[0] + t6[0];
+ dout[1] = t4[1] + t6[1];
+ dout[4] = t4[0] - t6[0];
+ dout[5] = t4[1] - t6[1];
+ dout[2] = t5[0] - t7[1];
+ dout[3] = t5[1] + t7[0];
+ dout[6] = t5[0] + t7[1];
+ dout[7] = t5[1] - t7[0];
+}
+
+void
+ffts_small_forward8_32f(ffts_plan_t *p, const void *in, void *out)
+{
+ const float *din = (const float*) in;
+ float *dout = (float*) out;
+ V r0_1, r2_3, r4_5, r6_7;
+ float *LUT8 = (float*) p->ws + p->ws_is[0];
+
+ L_4_2(0, din, din+8, din+4, din+12, &r0_1, &r2_3, &r4_5, &r6_7);
+ K_N(0, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7);
+ S_4(r0_1, r2_3, r4_5, r6_7, dout+0, dout+4, dout+8, dout+12);
+}
+
+void
+ffts_small_forward8_64f(ffts_plan_t *p, const void *in, void *out)
+{
+ const double *din = (const double*) in;
+ double *dout = (double*) out;
+ V r0_1, r2_3, r4_5, r6_7;
+ double *LUT8 = (double*) p->ws + p->ws_is[0];
+
+#if MACROS_READY
+ L_4_2(0, din, din+8, din+4, din+12, &r0_1, &r2_3, &r4_5, &r6_7);
+ K_N(0, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7);
+ S_4(r0_1, r2_3, r4_5, r6_7, dout+0, dout+4, dout+8, dout+12);
+#endif
+}
+
+void
+ffts_small_backward8_32f(ffts_plan_t *p, const void *in, void *out)
+{
+ const float *din = (const float*) in;
+ float *dout = (float*) out;
+ V r0_1, r2_3, r4_5, r6_7;
+ float *LUT8 = (float*) p->ws + p->ws_is[0];
+
+ L_4_2(1, din, din+8, din+4, din+12, &r0_1, &r2_3, &r4_5, &r6_7);
+ K_N(1, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7);
+ S_4(r0_1, r2_3, r4_5, r6_7, dout+0, dout+4, dout+8, dout+12);
+}
+
+void
+ffts_small_backward8_64f(ffts_plan_t *p, const void *in, void *out)
+{
+ const double *din = (const double*) in;
+ double *dout = (double*) out;
+ V r0_1, r2_3, r4_5, r6_7;
+ double *LUT8 = (double*) p->ws + p->ws_is[0];
+
+#if MACROS_READY
+ L_4_2(1, din, din+8, din+4, din+12, &r0_1, &r2_3, &r4_5, &r6_7);
+ K_N(1, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7);
+ S_4(r0_1, r2_3, r4_5, r6_7, dout+0, dout+4, dout+8, dout+12);
+#endif
+}
+
+void
+ffts_small_forward16_32f(ffts_plan_t *p, const void *in, void *out)
+{
+ const float *din = (const float*) in;
+ float *dout = (float*) out;
+ float *LUT8 = (float*) p->ws;
+ V r0_1, r2_3, r4_5, r6_7, r8_9, r10_11, r12_13, r14_15;
+
+ L_4_4(0, din+0, din+16, din+8, din+24, &r0_1, &r2_3, &r8_9, &r10_11);
+ L_2_4(0, din+4, din+20, din+28, din+12, &r4_5, &r6_7, &r14_15, &r12_13);
+ K_N(0, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7);
+ K_N(0, VLD(LUT8+8), VLD(LUT8+12), &r0_1, &r4_5, &r8_9, &r12_13);
+ S_4(r0_1, r4_5, r8_9, r12_13, dout+0, dout+8, dout+16, dout+24);
+ K_N(0, VLD(LUT8+16), VLD(LUT8+20), &r2_3, &r6_7, &r10_11, &r14_15);
+ S_4(r2_3, r6_7, r10_11, r14_15, dout+4, dout+12, dout+20, dout+28);
+}
+
+void
+ffts_small_forward16_64f(ffts_plan_t *p, const void *in, void *out)
+{
+ const double *din = (const double*) in;
+ double *dout = (double*) out;
+ double *LUT8 = (double*) p->ws;
+ V r0_1, r2_3, r4_5, r6_7, r8_9, r10_11, r12_13, r14_15;
+
+#ifdef MACROS_READY
+ L_4_4(0, din+0, din+16, din+8, din+24, &r0_1, &r2_3, &r8_9, &r10_11);
+ L_2_4(0, din+4, din+20, din+28, din+12, &r4_5, &r6_7, &r14_15, &r12_13);
+ K_N(0, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7);
+ K_N(0, VLD(LUT8+8), VLD(LUT8+12), &r0_1, &r4_5, &r8_9, &r12_13);
+ S_4(r0_1, r4_5, r8_9, r12_13, dout+0, dout+8, dout+16, dout+24);
+ K_N(0, VLD(LUT8+16), VLD(LUT8+20), &r2_3, &r6_7, &r10_11, &r14_15);
+ S_4(r2_3, r6_7, r10_11, r14_15, dout+4, dout+12, dout+20, dout+28);
+#endif
+}
+
+void
+ffts_small_backward16_32f(ffts_plan_t *p, const void *in, void *out)
+{
+ const float *din = (const float*) in;
+ float *dout = (float*) out;
+ float *LUT8 = (float*) p->ws;
+ V r0_1, r2_3, r4_5, r6_7, r8_9, r10_11, r12_13, r14_15;
+
+ L_4_4(1, din+0, din+16, din+8, din+24, &r0_1, &r2_3, &r8_9, &r10_11);
+ L_2_4(1, din+4, din+20, din+28, din+12, &r4_5, &r6_7, &r14_15, &r12_13);
+ K_N(1, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7);
+ K_N(1, VLD(LUT8+8), VLD(LUT8+12),&r0_1, &r4_5, &r8_9, &r12_13);
+ S_4(r0_1, r4_5, r8_9, r12_13, dout+0, dout+8, dout+16, dout+24);
+ K_N(1, VLD(LUT8+16), VLD(LUT8+20), &r2_3, &r6_7, &r10_11, &r14_15);
+ S_4(r2_3, r6_7, r10_11, r14_15, dout+4, dout+12, dout+20, dout+28);
+}
+
+void
+ffts_small_backward16_64f(ffts_plan_t *p, const void *in, void *out)
+{
+ const double *din = (const double*) in;
+ double *dout = (double*) out;
+ double *LUT8 = (double*) p->ws;
+ V r0_1, r2_3, r4_5, r6_7, r8_9, r10_11, r12_13, r14_15;
+
+#ifdef MACROS_READY
+ L_4_4(1, din+0, din+16, din+8, din+24, &r0_1, &r2_3, &r8_9, &r10_11);
+ L_2_4(1, din+4, din+20, din+28, din+12, &r4_5, &r6_7, &r14_15, &r12_13);
+ K_N(1, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7);
+ K_N(1, VLD(LUT8+8), VLD(LUT8+12),&r0_1, &r4_5, &r8_9, &r12_13);
+ S_4(r0_1, r4_5, r8_9, r12_13, dout+0, dout+8, dout+16, dout+24);
+ K_N(1, VLD(LUT8+16), VLD(LUT8+20), &r2_3, &r6_7, &r10_11, &r14_15);
+ S_4(r2_3, r6_7, r10_11, r14_15, dout+4, dout+12, dout+20, dout+28);
+#endif
} \ No newline at end of file
diff --git a/src/ffts_small.h b/src/ffts_small.h
index 5ae48cc..249dcc9 100644
--- a/src/ffts_small.h
+++ b/src/ffts_small.h
@@ -1,14 +1,85 @@
+/*
+
+This file is part of FFTS -- The Fastest Fourier Transform in the South
+
+Copyright (c) 2013, Michael J. Cree <mcree@orcon.net.nz>
+Copyright (c) 2012, 2013, Anthony M. Blake <amb@anthonix.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+* Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+* Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+* Neither the name of the organization nor the
+names of its contributors may be used to endorse or promote products
+derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+*/
+
#ifndef FFTS_SMALL_H
#define FFTS_SMALL_H
+#if defined (_MSC_VER) && (_MSC_VER >= 1020)
+#pragma once
+#endif
+
#include "ffts.h"
-void ffts_firstpass_16_f(ffts_plan_t *p, const void *in, void *out);
-void ffts_firstpass_16_b(ffts_plan_t *p, const void *in, void *out);
-void ffts_firstpass_8_f(ffts_plan_t *p, const void *in, void *out);
-void ffts_firstpass_8_b(ffts_plan_t *p, const void *in, void *out);
-void ffts_firstpass_4_f(ffts_plan_t *p, const void *in, void *out);
-void ffts_firstpass_4_b(ffts_plan_t *p, const void *in, void *out);
-void ffts_firstpass_2(ffts_plan_t *p, const void *in, void *out);
+void
+ffts_small_2_32f(ffts_plan_t *p, const void *in, void *out);
+
+void
+ffts_small_2_64f(ffts_plan_t *p, const void *in, void *out);
+
+void
+ffts_small_forward4_32f(ffts_plan_t *p, const void *in, void *out);
+
+void
+ffts_small_forward4_64f(ffts_plan_t *p, const void *in, void *out);
+
+void
+ffts_small_backward4_32f(ffts_plan_t *p, const void *in, void *out);
+
+void
+ffts_small_backward4_64f(ffts_plan_t *p, const void *in, void *out);
+
+void
+ffts_small_forward8_32f(ffts_plan_t *p, const void *in, void *out);
+
+void
+ffts_small_forward8_64f(ffts_plan_t *p, const void *in, void *out);
+
+void
+ffts_small_backward8_32f(ffts_plan_t *p, const void *in, void *out);
+
+void
+ffts_small_backward8_64f(ffts_plan_t *p, const void *in, void *out);
+
+void
+ffts_small_forward16_32f(ffts_plan_t *p, const void *in, void *out);
+
+void
+ffts_small_forward16_64f(ffts_plan_t *p, const void *in, void *out);
+
+void
+ffts_small_backward16_32f(ffts_plan_t *p, const void *in, void *out);
+
+void
+ffts_small_backward16_64f(ffts_plan_t *p, const void *in, void *out);
#endif /* FFTS_SMALL_H */
diff --git a/src/macros.h b/src/macros.h
index b4a6a5a..fc53ae4 100644
--- a/src/macros.h
+++ b/src/macros.h
@@ -48,7 +48,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "macros-alpha.h"
#endif
-static FFTS_INLINE void TX2(V *a, V *b)
+static FFTS_INLINE void
+TX2(V *a, V *b)
{
V TX2_t0 = VUNPACKLO(*a, *b);
V TX2_t1 = VUNPACKHI(*a, *b);
@@ -56,7 +57,8 @@ static FFTS_INLINE void TX2(V *a, V *b)
*b = TX2_t1;
}
-static FFTS_INLINE void K_N(int inv, V re, V im, V *r0, V *r1, V *r2, V *r3)
+static FFTS_INLINE void
+K_N(int inv, V re, V im, V *r0, V *r1, V *r2, V *r3)
{
V uk, uk2, zk_p, zk_n, zk, zk_d;
@@ -75,9 +77,16 @@ static FFTS_INLINE void K_N(int inv, V re, V im, V *r0, V *r1, V *r2, V *r3)
*r1 = VSUB(uk2, zk_d);
}
-static FFTS_INLINE void L_2_4(int inv, const data_t* FFTS_RESTRICT i0, const data_t* FFTS_RESTRICT i1,
- const data_t* FFTS_RESTRICT i2, const data_t* FFTS_RESTRICT i3,
- V *r0, V *r1, V *r2, V *r3)
+static FFTS_INLINE void
+L_2_4(int inv,
+ const float *FFTS_RESTRICT i0,
+ const float *FFTS_RESTRICT i1,
+ const float *FFTS_RESTRICT i2,
+ const float *FFTS_RESTRICT i3,
+ V *r0,
+ V *r1,
+ V *r2,
+ V *r3)
{
V t0, t1, t2, t3, t4, t5, t6, t7;
@@ -105,9 +114,16 @@ static FFTS_INLINE void L_2_4(int inv, const data_t* FFTS_RESTRICT i0, const dat
*r2 = VUNPACKHI(t2, t3);
}
-static FFTS_INLINE void L_4_4(int inv, const data_t* FFTS_RESTRICT i0, const data_t* FFTS_RESTRICT i1,
- const data_t* FFTS_RESTRICT i2, const data_t* FFTS_RESTRICT i3,
- V *r0, V *r1, V *r2, V *r3)
+static FFTS_INLINE void
+L_4_4(int inv,
+ const float *FFTS_RESTRICT i0,
+ const float *FFTS_RESTRICT i1,
+ const float *FFTS_RESTRICT i2,
+ const float *FFTS_RESTRICT i3,
+ V *r0,
+ V *r1,
+ V *r2,
+ V *r3)
{
V t0, t1, t2, t3, t4, t5, t6, t7;
@@ -136,9 +152,16 @@ static FFTS_INLINE void L_4_4(int inv, const data_t* FFTS_RESTRICT i0, const dat
*r3 = t3;
}
-static FFTS_INLINE void L_4_2(int inv, const data_t * FFTS_RESTRICT i0, const data_t * FFTS_RESTRICT i1,
- const data_t * FFTS_RESTRICT i2, const data_t * FFTS_RESTRICT i3,
- V *r0, V *r1, V *r2, V *r3)
+static FFTS_INLINE void
+L_4_2(int inv,
+ const float *FFTS_RESTRICT i0,
+ const float *FFTS_RESTRICT i1,
+ const float *FFTS_RESTRICT i2,
+ const float *FFTS_RESTRICT i3,
+ V *r0,
+ V *r1,
+ V *r2,
+ V *r3)
{
V t0, t1, t2, t3, t4, t5, t6, t7;
diff --git a/src/types.h b/src/types.h
index 749d387..f8997ce 100644
--- a/src/types.h
+++ b/src/types.h
@@ -38,12 +38,8 @@
#pragma once
#endif
-#if defined(_Complex_I) && defined(complex) && defined(I)
-typedef complex float cdata_t;
-#else
-typedef float cdata_t[2];
-#endif
-
-typedef float data_t;
+/* Define complex number as two element array */
+typedef float ffts_cpx_32f[2];
+typedef double ffts_cpx_64f[2];
#endif /* FFTS_TYPES_H */
OpenPOWER on IntegriCloud