summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJukka Ojanen <jukka.ojanen@linkotec.net>2015-03-16 13:21:23 +0200
committerJukka Ojanen <jukka.ojanen@linkotec.net>2015-03-16 13:21:23 +0200
commit1ea951f98b7f35c42c49394a75ae3b8cf3e62dfe (patch)
treeec93b68f9c9847a16fd5fc301012ec339fac057b
parent4bd871bea0f657b1cfc8d55603064df05b58e55c (diff)
downloadffts-1ea951f98b7f35c42c49394a75ae3b8cf3e62dfe.zip
ffts-1ea951f98b7f35c42c49394a75ae3b8cf3e62dfe.tar.gz
Merge ffts_small with ffts_static, and define small transforms "fully" constant
-rw-r--r--CMakeLists.txt9
-rw-r--r--src/ffts.c6
-rw-r--r--src/ffts_small.c369
-rw-r--r--src/ffts_small.h85
-rw-r--r--src/ffts_static.c584
-rw-r--r--src/ffts_static.h49
6 files changed, 585 insertions, 517 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 12b3bf8..e96218b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -183,8 +183,8 @@ set(FFTS_SOURCES
src/ffts_real.c
src/ffts_real_nd.c
src/ffts_real_nd.h
- src/ffts_small.c
- src/ffts_small.h
+ src/ffts_static.c
+ src/ffts_static.h
src/macros.h
src/patterns.c
src/patterns.h
@@ -268,11 +268,6 @@ elseif(HAVE_XMMINTRIN_H)
endif(ENABLE_NEON)
if(DISABLE_DYNAMIC_CODE)
- list(APPEND FFTS_SOURCES
- src/ffts_static.c
- src/ffts_static.h
- )
-
add_definitions(-DDYNAMIC_DISABLED)
else()
list(APPEND FFTS_SOURCES
diff --git a/src/ffts.c b/src/ffts.c
index 56325cc..8f809db 100644
--- a/src/ffts.c
+++ b/src/ffts.c
@@ -34,13 +34,11 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "ffts.h"
#include "ffts_internal.h"
+#include "ffts_static.h"
#include "macros.h"
#include "patterns.h"
-#include "ffts_small.h"
-#ifdef DYNAMIC_DISABLED
-#include "ffts_static.h"
-#else
+#ifndef DYNAMIC_DISABLED
#include "codegen.h"
#endif
diff --git a/src/ffts_small.c b/src/ffts_small.c
deleted file mode 100644
index 5bcbfc6..0000000
--- a/src/ffts_small.c
+++ /dev/null
@@ -1,369 +0,0 @@
-/*
-
-This file is part of FFTS -- The Fastest Fourier Transform in the South
-
-Copyright (c) 2013, Michael J. Cree <mcree@orcon.net.nz>
-Copyright (c) 2012, 2013, Anthony M. Blake <amb@anthonix.com>
-
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-* Redistributions of source code must retain the above copyright
-notice, this list of conditions and the following disclaimer.
-* Redistributions in binary form must reproduce the above copyright
-notice, this list of conditions and the following disclaimer in the
-documentation and/or other materials provided with the distribution.
-* Neither the name of the organization nor the
-names of its contributors may be used to endorse or promote products
-derived from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
-DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-*/
-
-#include "ffts_small.h"
-
-#include "ffts_internal.h"
-#include "macros.h"
-
-void
-ffts_small_2_32f(ffts_plan_t *p, const void *in, void *out)
-{
- const float *din = (const float*) in;
- float *dout = (float*) out;
- ffts_cpx_32f t0, t1, r0, r1;
-
- /* unreferenced parameter */
- (void) p;
-
- t0[0] = din[0];
- t0[1] = din[1];
- t1[0] = din[2];
- t1[1] = din[3];
-
- r0[0] = t0[0] + t1[0];
- r0[1] = t0[1] + t1[1];
- r1[0] = t0[0] - t1[0];
- r1[1] = t0[1] - t1[1];
-
- dout[0] = r0[0];
- dout[1] = r0[1];
- dout[2] = r1[0];
- dout[3] = r1[1];
-}
-
-void
-ffts_small_2_64f(ffts_plan_t *p, const void *in, void *out)
-{
- const double *din = (const double*) in;
- double *dout = (double*) out;
- ffts_cpx_64f t0, t1, r0, r1;
-
- /* unreferenced parameter */
- (void) p;
-
- t0[0] = din[0];
- t0[1] = din[1];
- t1[0] = din[2];
- t1[1] = din[3];
-
- r0[0] = t0[0] + t1[0];
- r0[1] = t0[1] + t1[1];
- r1[0] = t0[0] - t1[0];
- r1[1] = t0[1] - t1[1];
-
- dout[0] = r0[0];
- dout[1] = r0[1];
- dout[2] = r1[0];
- dout[3] = r1[1];
-}
-
-void
-ffts_small_forward4_32f(ffts_plan_t *p, const void *in, void *out)
-{
- const float *din = (const float*) in;
- float *dout = (float*) out;
- ffts_cpx_32f t0, t1, t2, t3, t4, t5, t6, t7;
-
- /* unreferenced parameter */
- (void) p;
-
- t0[0] = din[0];
- t0[1] = din[1];
- t1[0] = din[4];
- t1[1] = din[5];
- t2[0] = din[2];
- t2[1] = din[3];
- t3[0] = din[6];
- t3[1] = din[7];
-
- t4[0] = t0[0] + t1[0];
- t4[1] = t0[1] + t1[1];
- t5[0] = t0[0] - t1[0];
- t5[1] = t0[1] - t1[1];
- t6[0] = t2[0] + t3[0];
- t6[1] = t2[1] + t3[1];
- t7[0] = t2[0] - t3[0];
- t7[1] = t2[1] - t3[1];
-
- dout[0] = t4[0] + t6[0];
- dout[1] = t4[1] + t6[1];
- dout[4] = t4[0] - t6[0];
- dout[5] = t4[1] - t6[1];
- dout[2] = t5[0] + t7[1];
- dout[3] = t5[1] - t7[0];
- dout[6] = t5[0] - t7[1];
- dout[7] = t5[1] + t7[0];
-}
-
-void
-ffts_small_forward4_64f(ffts_plan_t *p, const void *in, void *out)
-{
- const double *din = (const double*) in;
- double *dout = (double*) out;
- ffts_cpx_64f t0, t1, t2, t3, t4, t5, t6, t7;
-
- /* unreferenced parameter */
- (void) p;
-
- t0[0] = din[0];
- t0[1] = din[1];
- t1[0] = din[4];
- t1[1] = din[5];
- t2[0] = din[2];
- t2[1] = din[3];
- t3[0] = din[6];
- t3[1] = din[7];
-
- t4[0] = t0[0] + t1[0];
- t4[1] = t0[1] + t1[1];
- t5[0] = t0[0] - t1[0];
- t5[1] = t0[1] - t1[1];
- t6[0] = t2[0] + t3[0];
- t6[1] = t2[1] + t3[1];
- t7[0] = t2[0] - t3[0];
- t7[1] = t2[1] - t3[1];
-
- dout[0] = t4[0] + t6[0];
- dout[1] = t4[1] + t6[1];
- dout[4] = t4[0] - t6[0];
- dout[5] = t4[1] - t6[1];
- dout[2] = t5[0] + t7[1];
- dout[3] = t5[1] - t7[0];
- dout[6] = t5[0] - t7[1];
- dout[7] = t5[1] + t7[0];
-}
-
-void
-ffts_small_backward4_32f(ffts_plan_t *p, const void *in, void *out)
-{
- const float *din = (const float*) in;
- float *dout = (float*) out;
- ffts_cpx_32f t0, t1, t2, t3, t4, t5, t6, t7;
-
- /* unreferenced parameter */
- (void) p;
-
- t0[0] = din[0];
- t0[1] = din[1];
- t1[0] = din[4];
- t1[1] = din[5];
- t2[0] = din[2];
- t2[1] = din[3];
- t3[0] = din[6];
- t3[1] = din[7];
-
- t4[0] = t0[0] + t1[0];
- t4[1] = t0[1] + t1[1];
- t5[0] = t0[0] - t1[0];
- t5[1] = t0[1] - t1[1];
- t6[0] = t2[0] + t3[0];
- t6[1] = t2[1] + t3[1];
- t7[0] = t2[0] - t3[0];
- t7[1] = t2[1] - t3[1];
-
- dout[0] = t4[0] + t6[0];
- dout[1] = t4[1] + t6[1];
- dout[4] = t4[0] - t6[0];
- dout[5] = t4[1] - t6[1];
- dout[2] = t5[0] - t7[1];
- dout[3] = t5[1] + t7[0];
- dout[6] = t5[0] + t7[1];
- dout[7] = t5[1] - t7[0];
-}
-
-void
-ffts_small_backward4_64f(ffts_plan_t *p, const void *in, void *out)
-{
- const double *din = (const double*) in;
- double *dout = (double*) out;
- ffts_cpx_64f t0, t1, t2, t3, t4, t5, t6, t7;
-
- /* unreferenced parameter */
- (void) p;
-
- t0[0] = din[0];
- t0[1] = din[1];
- t1[0] = din[4];
- t1[1] = din[5];
- t2[0] = din[2];
- t2[1] = din[3];
- t3[0] = din[6];
- t3[1] = din[7];
-
- t4[0] = t0[0] + t1[0];
- t4[1] = t0[1] + t1[1];
- t5[0] = t0[0] - t1[0];
- t5[1] = t0[1] - t1[1];
- t6[0] = t2[0] + t3[0];
- t6[1] = t2[1] + t3[1];
- t7[0] = t2[0] - t3[0];
- t7[1] = t2[1] - t3[1];
-
- dout[0] = t4[0] + t6[0];
- dout[1] = t4[1] + t6[1];
- dout[4] = t4[0] - t6[0];
- dout[5] = t4[1] - t6[1];
- dout[2] = t5[0] - t7[1];
- dout[3] = t5[1] + t7[0];
- dout[6] = t5[0] + t7[1];
- dout[7] = t5[1] - t7[0];
-}
-
-void
-ffts_small_forward8_32f(ffts_plan_t *p, const void *in, void *out)
-{
- const float *din = (const float*) in;
- float *dout = (float*) out;
- V4SF r0_1, r2_3, r4_5, r6_7;
- float *LUT8 = (float*) p->ws + p->ws_is[0];
-
- V4SF_L_4_2(0, din, din+8, din+4, din+12, &r0_1, &r2_3, &r4_5, &r6_7);
- V4SF_K_N(0, V4SF_LD(LUT8), V4SF_LD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7);
- V4SF_S_4(r0_1, r2_3, r4_5, r6_7, dout+0, dout+4, dout+8, dout+12);
-}
-
-void
-ffts_small_forward8_64f(ffts_plan_t *p, const void *in, void *out)
-{
- const double *din = (const double*) in;
- double *dout = (double*) out;
- V4SF r0_1, r2_3, r4_5, r6_7;
- double *LUT8 = (double*) p->ws + p->ws_is[0];
-
-#if MACROS_READY
- L_4_2(0, din, din+8, din+4, din+12, &r0_1, &r2_3, &r4_5, &r6_7);
- K_N(0, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7);
- S_4(r0_1, r2_3, r4_5, r6_7, dout+0, dout+4, dout+8, dout+12);
-#endif
-}
-
-void
-ffts_small_backward8_32f(ffts_plan_t *p, const void *in, void *out)
-{
- const float *din = (const float*) in;
- float *dout = (float*) out;
- V4SF r0_1, r2_3, r4_5, r6_7;
- float *LUT8 = (float*) p->ws + p->ws_is[0];
-
- V4SF_L_4_2(1, din, din+8, din+4, din+12, &r0_1, &r2_3, &r4_5, &r6_7);
- V4SF_K_N(1, V4SF_LD(LUT8), V4SF_LD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7);
- V4SF_S_4(r0_1, r2_3, r4_5, r6_7, dout+0, dout+4, dout+8, dout+12);
-}
-
-void
-ffts_small_backward8_64f(ffts_plan_t *p, const void *in, void *out)
-{
- const double *din = (const double*) in;
- double *dout = (double*) out;
- V4SF r0_1, r2_3, r4_5, r6_7;
- double *LUT8 = (double*) p->ws + p->ws_is[0];
-
-#if MACROS_READY
- L_4_2(1, din, din+8, din+4, din+12, &r0_1, &r2_3, &r4_5, &r6_7);
- K_N(1, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7);
- S_4(r0_1, r2_3, r4_5, r6_7, dout+0, dout+4, dout+8, dout+12);
-#endif
-}
-
-void
-ffts_small_forward16_32f(ffts_plan_t *p, const void *in, void *out)
-{
- const float *din = (const float*) in;
- float *dout = (float*) out;
- float *LUT8 = (float*) p->ws;
- V4SF r0_1, r2_3, r4_5, r6_7, r8_9, r10_11, r12_13, r14_15;
-
- V4SF_L_4_4(0, din+0, din+16, din+8, din+24, &r0_1, &r2_3, &r8_9, &r10_11);
- V4SF_L_2_4(0, din+4, din+20, din+28, din+12, &r4_5, &r6_7, &r14_15, &r12_13);
- V4SF_K_N(0, V4SF_LD(LUT8), V4SF_LD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7);
- V4SF_K_N(0, V4SF_LD(LUT8+8), V4SF_LD(LUT8+12), &r0_1, &r4_5, &r8_9, &r12_13);
- V4SF_S_4(r0_1, r4_5, r8_9, r12_13, dout+0, dout+8, dout+16, dout+24);
- V4SF_K_N(0, V4SF_LD(LUT8+16), V4SF_LD(LUT8+20), &r2_3, &r6_7, &r10_11, &r14_15);
- V4SF_S_4(r2_3, r6_7, r10_11, r14_15, dout+4, dout+12, dout+20, dout+28);
-}
-
-void
-ffts_small_forward16_64f(ffts_plan_t *p, const void *in, void *out)
-{
- const double *din = (const double*) in;
- double *dout = (double*) out;
- double *LUT8 = (double*) p->ws;
- V4SF r0_1, r2_3, r4_5, r6_7, r8_9, r10_11, r12_13, r14_15;
-
-#ifdef MACROS_READY
- L_4_4(0, din+0, din+16, din+8, din+24, &r0_1, &r2_3, &r8_9, &r10_11);
- L_2_4(0, din+4, din+20, din+28, din+12, &r4_5, &r6_7, &r14_15, &r12_13);
- K_N(0, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7);
- K_N(0, VLD(LUT8+8), VLD(LUT8+12), &r0_1, &r4_5, &r8_9, &r12_13);
- S_4(r0_1, r4_5, r8_9, r12_13, dout+0, dout+8, dout+16, dout+24);
- K_N(0, VLD(LUT8+16), VLD(LUT8+20), &r2_3, &r6_7, &r10_11, &r14_15);
- S_4(r2_3, r6_7, r10_11, r14_15, dout+4, dout+12, dout+20, dout+28);
-#endif
-}
-
-void
-ffts_small_backward16_32f(ffts_plan_t *p, const void *in, void *out)
-{
- const float *din = (const float*) in;
- float *dout = (float*) out;
- float *LUT8 = (float*) p->ws;
- V4SF r0_1, r2_3, r4_5, r6_7, r8_9, r10_11, r12_13, r14_15;
-
- V4SF_L_4_4(1, din+0, din+16, din+8, din+24, &r0_1, &r2_3, &r8_9, &r10_11);
- V4SF_L_2_4(1, din+4, din+20, din+28, din+12, &r4_5, &r6_7, &r14_15, &r12_13);
- V4SF_K_N(1, V4SF_LD(LUT8+ 0), V4SF_LD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7);
- V4SF_K_N(1, V4SF_LD(LUT8+ 8), V4SF_LD(LUT8+12), &r0_1, &r4_5, &r8_9, &r12_13);
- V4SF_S_4(r0_1, r4_5, r8_9, r12_13, dout+0, dout+8, dout+16, dout+24);
- V4SF_K_N(1, V4SF_LD(LUT8+16), V4SF_LD(LUT8+20), &r2_3, &r6_7, &r10_11, &r14_15);
- V4SF_S_4(r2_3, r6_7, r10_11, r14_15, dout+4, dout+12, dout+20, dout+28);
-}
-
-void
-ffts_small_backward16_64f(ffts_plan_t *p, const void *in, void *out)
-{
- const double *din = (const double*) in;
- double *dout = (double*) out;
- double *LUT8 = (double*) p->ws;
- V4SF r0_1, r2_3, r4_5, r6_7, r8_9, r10_11, r12_13, r14_15;
-
-#ifdef MACROS_READY
- L_4_4(1, din+0, din+16, din+8, din+24, &r0_1, &r2_3, &r8_9, &r10_11);
- L_2_4(1, din+4, din+20, din+28, din+12, &r4_5, &r6_7, &r14_15, &r12_13);
- K_N(1, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7);
- K_N(1, VLD(LUT8+8), VLD(LUT8+12),&r0_1, &r4_5, &r8_9, &r12_13);
- S_4(r0_1, r4_5, r8_9, r12_13, dout+0, dout+8, dout+16, dout+24);
- K_N(1, VLD(LUT8+16), VLD(LUT8+20), &r2_3, &r6_7, &r10_11, &r14_15);
- S_4(r2_3, r6_7, r10_11, r14_15, dout+4, dout+12, dout+20, dout+28);
-#endif
-} \ No newline at end of file
diff --git a/src/ffts_small.h b/src/ffts_small.h
deleted file mode 100644
index 249dcc9..0000000
--- a/src/ffts_small.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
-
-This file is part of FFTS -- The Fastest Fourier Transform in the South
-
-Copyright (c) 2013, Michael J. Cree <mcree@orcon.net.nz>
-Copyright (c) 2012, 2013, Anthony M. Blake <amb@anthonix.com>
-
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-* Redistributions of source code must retain the above copyright
-notice, this list of conditions and the following disclaimer.
-* Redistributions in binary form must reproduce the above copyright
-notice, this list of conditions and the following disclaimer in the
-documentation and/or other materials provided with the distribution.
-* Neither the name of the organization nor the
-names of its contributors may be used to endorse or promote products
-derived from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
-DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-*/
-
-#ifndef FFTS_SMALL_H
-#define FFTS_SMALL_H
-
-#if defined (_MSC_VER) && (_MSC_VER >= 1020)
-#pragma once
-#endif
-
-#include "ffts.h"
-
-void
-ffts_small_2_32f(ffts_plan_t *p, const void *in, void *out);
-
-void
-ffts_small_2_64f(ffts_plan_t *p, const void *in, void *out);
-
-void
-ffts_small_forward4_32f(ffts_plan_t *p, const void *in, void *out);
-
-void
-ffts_small_forward4_64f(ffts_plan_t *p, const void *in, void *out);
-
-void
-ffts_small_backward4_32f(ffts_plan_t *p, const void *in, void *out);
-
-void
-ffts_small_backward4_64f(ffts_plan_t *p, const void *in, void *out);
-
-void
-ffts_small_forward8_32f(ffts_plan_t *p, const void *in, void *out);
-
-void
-ffts_small_forward8_64f(ffts_plan_t *p, const void *in, void *out);
-
-void
-ffts_small_backward8_32f(ffts_plan_t *p, const void *in, void *out);
-
-void
-ffts_small_backward8_64f(ffts_plan_t *p, const void *in, void *out);
-
-void
-ffts_small_forward16_32f(ffts_plan_t *p, const void *in, void *out);
-
-void
-ffts_small_forward16_64f(ffts_plan_t *p, const void *in, void *out);
-
-void
-ffts_small_backward16_32f(ffts_plan_t *p, const void *in, void *out);
-
-void
-ffts_small_backward16_64f(ffts_plan_t *p, const void *in, void *out);
-
-#endif /* FFTS_SMALL_H */
diff --git a/src/ffts_static.c b/src/ffts_static.c
index 7a0bf4a..701cca8 100644
--- a/src/ffts_static.c
+++ b/src/ffts_static.c
@@ -38,80 +38,220 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <assert.h>
-static const FFTS_ALIGN(16) float ffts_constants_32f[16] = {
- 0.70710678118654757273731092936941f,
- 0.70710678118654757273731092936941f,
- 0.70710678118654757273731092936941f,
- 0.70710678118654757273731092936941f,
- -0.70710678118654746171500846685376f,
- 0.70710678118654746171500846685376f,
- -0.70710678118654746171500846685376f,
- 0.70710678118654746171500846685376f,
+static const FFTS_ALIGN(16) float ffts_constants_small_32f[24] = {
1.0f,
1.0f,
- 0.70710678118654757273731092936941f,
- 0.70710678118654757273731092936941f,
+ 0.7071067811865475244008443621048490392848359376884740f,
+ 0.7071067811865475244008443621048490392848359376884740f,
+
+ -0.0f,
0.0f,
+ -0.7071067811865475244008443621048490392848359376884740f,
+ 0.7071067811865475244008443621048490392848359376884740f,
+
+ 1.0f,
+ 1.0f,
+ 0.9238795325112867561281831893967882868224166258636425f,
+ 0.9238795325112867561281831893967882868224166258636425f,
+
+ -0.0f,
0.0f,
- -0.70710678118654746171500846685376f,
- 0.70710678118654746171500846685376f
+ -0.3826834323650897717284599840303988667613445624856270f,
+ 0.3826834323650897717284599840303988667613445624856270f,
+
+ 0.7071067811865475244008443621048490392848359376884740f,
+ 0.7071067811865475244008443621048490392848359376884740f,
+ 0.3826834323650897717284599840303988667613445624856270f,
+ 0.3826834323650897717284599840303988667613445624856270f,
+
+ -0.7071067811865475244008443621048490392848359376884740f,
+ 0.7071067811865475244008443621048490392848359376884740f,
+ -0.9238795325112867561281831893967882868224166258636425f,
+ 0.9238795325112867561281831893967882868224166258636425f
};
-static const FFTS_ALIGN(16) float ffts_constants_inv_32f[16] = {
- 0.70710678118654757273731092936941f,
- 0.70710678118654757273731092936941f,
- 0.70710678118654757273731092936941f,
- 0.70710678118654757273731092936941f,
- 0.70710678118654746171500846685376f,
- -0.70710678118654746171500846685376f,
- 0.70710678118654746171500846685376f,
- -0.70710678118654746171500846685376f,
+static const FFTS_ALIGN(16) double ffts_constants_small_64f[24] = {
+ 1.0,
+ 1.0,
+ 0.7071067811865475244008443621048490392848359376884740,
+ 0.7071067811865475244008443621048490392848359376884740,
+
+ -0.0,
+ 0.0,
+ -0.7071067811865475244008443621048490392848359376884740,
+ 0.7071067811865475244008443621048490392848359376884740,
+
+ 1.0,
+ 1.0,
+ 0.9238795325112867561281831893967882868224166258636425,
+ 0.9238795325112867561281831893967882868224166258636425,
+
+ -0.0,
+ 0.0,
+ -0.3826834323650897717284599840303988667613445624856270,
+ 0.3826834323650897717284599840303988667613445624856270,
+
+ 0.7071067811865475244008443621048490392848359376884740,
+ 0.7071067811865475244008443621048490392848359376884740,
+ 0.3826834323650897717284599840303988667613445624856270,
+ 0.3826834323650897717284599840303988667613445624856270,
+
+ -0.7071067811865475244008443621048490392848359376884740,
+ 0.7071067811865475244008443621048490392848359376884740,
+ -0.9238795325112867561281831893967882868224166258636425,
+ 0.9238795325112867561281831893967882868224166258636425
+};
+
+static const FFTS_ALIGN(16) float ffts_constants_small_inv_32f[24] = {
1.0f,
1.0f,
- 0.70710678118654757273731092936941f,
- 0.70710678118654757273731092936941f,
+ 0.7071067811865475244008443621048490392848359376884740f,
+ 0.7071067811865475244008443621048490392848359376884740f,
+
0.0f,
+ -0.0f,
+ 0.7071067811865475244008443621048490392848359376884740f,
+ -0.7071067811865475244008443621048490392848359376884740f,
+
+ 1.0f,
+ 1.0f,
+ 0.9238795325112867561281831893967882868224166258636425f,
+ 0.9238795325112867561281831893967882868224166258636425f,
+
0.0f,
- 0.70710678118654746171500846685376f,
- -0.70710678118654746171500846685376f
+ -0.0f,
+ 0.3826834323650897717284599840303988667613445624856270f,
+ -0.3826834323650897717284599840303988667613445624856270f,
+
+ 0.7071067811865475244008443621048490392848359376884740f,
+ 0.7071067811865475244008443621048490392848359376884740f,
+ 0.3826834323650897717284599840303988667613445624856270f,
+ 0.3826834323650897717284599840303988667613445624856270f,
+
+ 0.7071067811865475244008443621048490392848359376884740f,
+ -0.7071067811865475244008443621048490392848359376884740f,
+ 0.9238795325112867561281831893967882868224166258636425f,
+ -0.9238795325112867561281831893967882868224166258636425f
+};
+
+static const FFTS_ALIGN(16) double ffts_constants_small_inv_64f[24] = {
+ 1.0,
+ 1.0,
+ 0.7071067811865475244008443621048490392848359376884740,
+ 0.7071067811865475244008443621048490392848359376884740,
+
+ 0.0,
+ -0.0,
+ 0.7071067811865475244008443621048490392848359376884740,
+ -0.7071067811865475244008443621048490392848359376884740,
+
+ 1.0,
+ 1.0,
+ 0.9238795325112867561281831893967882868224166258636425,
+ 0.9238795325112867561281831893967882868224166258636425,
+
+ 0.0,
+ -0.0,
+ 0.3826834323650897717284599840303988667613445624856270,
+ -0.3826834323650897717284599840303988667613445624856270,
+
+ 0.7071067811865475244008443621048490392848359376884740,
+ 0.7071067811865475244008443621048490392848359376884740,
+ 0.3826834323650897717284599840303988667613445624856270,
+ 0.3826834323650897717284599840303988667613445624856270,
+
+ 0.7071067811865475244008443621048490392848359376884740,
+ -0.7071067811865475244008443621048490392848359376884740,
+ 0.9238795325112867561281831893967882868224166258636425,
+ -0.9238795325112867561281831893967882868224166258636425
+};
+
+static const FFTS_ALIGN(16) float ffts_constants_32f[16] = {
+ 0.7071067811865475244008443621048490392848359376884740f,
+ 0.7071067811865475244008443621048490392848359376884740f,
+ 0.7071067811865475244008443621048490392848359376884740f,
+ 0.7071067811865475244008443621048490392848359376884740f,
+
+ -0.7071067811865475244008443621048490392848359376884740f,
+ 0.7071067811865475244008443621048490392848359376884740f,
+ -0.7071067811865475244008443621048490392848359376884740f,
+ 0.7071067811865475244008443621048490392848359376884740f,
+
+ 1.0f,
+ 1.0f,
+ 0.7071067811865475244008443621048490392848359376884740f,
+ 0.7071067811865475244008443621048490392848359376884740f,
+
+ 0.0f,
+ 0.0f,
+ -0.7071067811865475244008443621048490392848359376884740f,
+ 0.7071067811865475244008443621048490392848359376884740f
};
static const FFTS_ALIGN(16) double ffts_constants_64f[16] = {
- 0.70710678118654757273731092936941,
- 0.70710678118654757273731092936941,
- 0.70710678118654757273731092936941,
- 0.70710678118654757273731092936941,
- -0.70710678118654746171500846685376,
- 0.70710678118654746171500846685376,
- -0.70710678118654746171500846685376,
- 0.70710678118654746171500846685376,
+ 0.7071067811865475244008443621048490392848359376884740,
+ 0.7071067811865475244008443621048490392848359376884740,
+ 0.7071067811865475244008443621048490392848359376884740,
+ 0.7071067811865475244008443621048490392848359376884740,
+
+ -0.7071067811865475244008443621048490392848359376884740,
+ 0.7071067811865475244008443621048490392848359376884740,
+ -0.7071067811865475244008443621048490392848359376884740,
+ 0.7071067811865475244008443621048490392848359376884740,
+
1.0,
1.0,
- 0.70710678118654757273731092936941,
- 0.70710678118654757273731092936941,
+ 0.7071067811865475244008443621048490392848359376884740,
+ 0.7071067811865475244008443621048490392848359376884740,
+
0.0,
0.0,
- -0.70710678118654746171500846685376,
- 0.70710678118654746171500846685376
+ -0.7071067811865475244008443621048490392848359376884740,
+ 0.7071067811865475244008443621048490392848359376884740
+};
+
+static const FFTS_ALIGN(16) float ffts_constants_inv_32f[16] = {
+ 0.7071067811865475244008443621048490392848359376884740f,
+ 0.7071067811865475244008443621048490392848359376884740f,
+ 0.7071067811865475244008443621048490392848359376884740f,
+ 0.7071067811865475244008443621048490392848359376884740f,
+
+ 0.7071067811865475244008443621048490392848359376884740f,
+ -0.7071067811865475244008443621048490392848359376884740f,
+ 0.7071067811865475244008443621048490392848359376884740f,
+ -0.7071067811865475244008443621048490392848359376884740f,
+
+ 1.0f,
+ 1.0f,
+ 0.7071067811865475244008443621048490392848359376884740f,
+ 0.7071067811865475244008443621048490392848359376884740f,
+
+ 0.0f,
+ 0.0f,
+ 0.7071067811865475244008443621048490392848359376884740f,
+ -0.7071067811865475244008443621048490392848359376884740f
};
static const FFTS_ALIGN(16) double ffts_constants_inv_64f[16] = {
- 0.70710678118654757273731092936941,
- 0.70710678118654757273731092936941,
- 0.70710678118654757273731092936941,
- 0.70710678118654757273731092936941,
- 0.70710678118654746171500846685376,
- -0.70710678118654746171500846685376,
- 0.70710678118654746171500846685376,
- -0.70710678118654746171500846685376,
+ 0.7071067811865475244008443621048490392848359376884740,
+ 0.7071067811865475244008443621048490392848359376884740,
+ 0.7071067811865475244008443621048490392848359376884740,
+ 0.7071067811865475244008443621048490392848359376884740,
+
+ 0.7071067811865475244008443621048490392848359376884740,
+ -0.7071067811865475244008443621048490392848359376884740,
+ 0.7071067811865475244008443621048490392848359376884740,
+ -0.7071067811865475244008443621048490392848359376884740,
+
1.0,
1.0,
- 0.70710678118654757273731092936941,
- 0.70710678118654757273731092936941,
+ 0.7071067811865475244008443621048490392848359376884740,
+ 0.7071067811865475244008443621048490392848359376884740,
+
0.0,
0.0,
- 0.70710678118654746171500846685376,
- -0.70710678118654746171500846685376
+ 0.7071067811865475244008443621048490392848359376884740,
+ -0.7071067811865475244008443621048490392848359376884740
};
static FFTS_INLINE void
@@ -425,6 +565,350 @@ ffts_static_firstpass_odd_32f(float *const FFTS_RESTRICT out,
}
}
+void
+ffts_small_2_32f(ffts_plan_t *p, const void *in, void *out)
+{
+ const float *din = (const float*) in;
+ float *dout = (float*) out;
+ ffts_cpx_32f t0, t1, r0, r1;
+
+ /* unreferenced parameter */
+ (void) p;
+
+ t0[0] = din[0];
+ t0[1] = din[1];
+ t1[0] = din[2];
+ t1[1] = din[3];
+
+ r0[0] = t0[0] + t1[0];
+ r0[1] = t0[1] + t1[1];
+ r1[0] = t0[0] - t1[0];
+ r1[1] = t0[1] - t1[1];
+
+ dout[0] = r0[0];
+ dout[1] = r0[1];
+ dout[2] = r1[0];
+ dout[3] = r1[1];
+}
+
+void
+ffts_small_2_64f(ffts_plan_t *p, const void *in, void *out)
+{
+ const double *din = (const double*) in;
+ double *dout = (double*) out;
+ ffts_cpx_64f t0, t1, r0, r1;
+
+ /* unreferenced parameter */
+ (void) p;
+
+ t0[0] = din[0];
+ t0[1] = din[1];
+ t1[0] = din[2];
+ t1[1] = din[3];
+
+ r0[0] = t0[0] + t1[0];
+ r0[1] = t0[1] + t1[1];
+ r1[0] = t0[0] - t1[0];
+ r1[1] = t0[1] - t1[1];
+
+ dout[0] = r0[0];
+ dout[1] = r0[1];
+ dout[2] = r1[0];
+ dout[3] = r1[1];
+}
+
+void
+ffts_small_forward4_32f(ffts_plan_t *p, const void *in, void *out)
+{
+ const float *din = (const float*) in;
+ float *dout = (float*) out;
+ ffts_cpx_32f t0, t1, t2, t3, t4, t5, t6, t7;
+
+ /* unreferenced parameter */
+ (void) p;
+
+ t0[0] = din[0];
+ t0[1] = din[1];
+ t1[0] = din[4];
+ t1[1] = din[5];
+ t2[0] = din[2];
+ t2[1] = din[3];
+ t3[0] = din[6];
+ t3[1] = din[7];
+
+ t4[0] = t0[0] + t1[0];
+ t4[1] = t0[1] + t1[1];
+ t5[0] = t0[0] - t1[0];
+ t5[1] = t0[1] - t1[1];
+ t6[0] = t2[0] + t3[0];
+ t6[1] = t2[1] + t3[1];
+ t7[0] = t2[0] - t3[0];
+ t7[1] = t2[1] - t3[1];
+
+ dout[0] = t4[0] + t6[0];
+ dout[1] = t4[1] + t6[1];
+ dout[4] = t4[0] - t6[0];
+ dout[5] = t4[1] - t6[1];
+ dout[2] = t5[0] + t7[1];
+ dout[3] = t5[1] - t7[0];
+ dout[6] = t5[0] - t7[1];
+ dout[7] = t5[1] + t7[0];
+}
+
+void
+ffts_small_forward4_64f(ffts_plan_t *p, const void *in, void *out)
+{
+ const double *din = (const double*) in;
+ double *dout = (double*) out;
+ ffts_cpx_64f t0, t1, t2, t3, t4, t5, t6, t7;
+
+ /* unreferenced parameter */
+ (void) p;
+
+ t0[0] = din[0];
+ t0[1] = din[1];
+ t1[0] = din[4];
+ t1[1] = din[5];
+ t2[0] = din[2];
+ t2[1] = din[3];
+ t3[0] = din[6];
+ t3[1] = din[7];
+
+ t4[0] = t0[0] + t1[0];
+ t4[1] = t0[1] + t1[1];
+ t5[0] = t0[0] - t1[0];
+ t5[1] = t0[1] - t1[1];
+ t6[0] = t2[0] + t3[0];
+ t6[1] = t2[1] + t3[1];
+ t7[0] = t2[0] - t3[0];
+ t7[1] = t2[1] - t3[1];
+
+ dout[0] = t4[0] + t6[0];
+ dout[1] = t4[1] + t6[1];
+ dout[4] = t4[0] - t6[0];
+ dout[5] = t4[1] - t6[1];
+ dout[2] = t5[0] + t7[1];
+ dout[3] = t5[1] - t7[0];
+ dout[6] = t5[0] - t7[1];
+ dout[7] = t5[1] + t7[0];
+}
+
+void
+ffts_small_backward4_32f(ffts_plan_t *p, const void *in, void *out)
+{
+ const float *din = (const float*) in;
+ float *dout = (float*) out;
+ ffts_cpx_32f t0, t1, t2, t3, t4, t5, t6, t7;
+
+ /* unreferenced parameter */
+ (void) p;
+
+ t0[0] = din[0];
+ t0[1] = din[1];
+ t1[0] = din[4];
+ t1[1] = din[5];
+ t2[0] = din[2];
+ t2[1] = din[3];
+ t3[0] = din[6];
+ t3[1] = din[7];
+
+ t4[0] = t0[0] + t1[0];
+ t4[1] = t0[1] + t1[1];
+ t5[0] = t0[0] - t1[0];
+ t5[1] = t0[1] - t1[1];
+ t6[0] = t2[0] + t3[0];
+ t6[1] = t2[1] + t3[1];
+ t7[0] = t2[0] - t3[0];
+ t7[1] = t2[1] - t3[1];
+
+ dout[0] = t4[0] + t6[0];
+ dout[1] = t4[1] + t6[1];
+ dout[4] = t4[0] - t6[0];
+ dout[5] = t4[1] - t6[1];
+ dout[2] = t5[0] - t7[1];
+ dout[3] = t5[1] + t7[0];
+ dout[6] = t5[0] + t7[1];
+ dout[7] = t5[1] - t7[0];
+}
+
+void
+ffts_small_backward4_64f(ffts_plan_t *p, const void *in, void *out)
+{
+ const double *din = (const double*) in;
+ double *dout = (double*) out;
+ ffts_cpx_64f t0, t1, t2, t3, t4, t5, t6, t7;
+
+ /* unreferenced parameter */
+ (void) p;
+
+ t0[0] = din[0];
+ t0[1] = din[1];
+ t1[0] = din[4];
+ t1[1] = din[5];
+ t2[0] = din[2];
+ t2[1] = din[3];
+ t3[0] = din[6];
+ t3[1] = din[7];
+
+ t4[0] = t0[0] + t1[0];
+ t4[1] = t0[1] + t1[1];
+ t5[0] = t0[0] - t1[0];
+ t5[1] = t0[1] - t1[1];
+ t6[0] = t2[0] + t3[0];
+ t6[1] = t2[1] + t3[1];
+ t7[0] = t2[0] - t3[0];
+ t7[1] = t2[1] - t3[1];
+
+ dout[0] = t4[0] + t6[0];
+ dout[1] = t4[1] + t6[1];
+ dout[4] = t4[0] - t6[0];
+ dout[5] = t4[1] - t6[1];
+ dout[2] = t5[0] - t7[1];
+ dout[3] = t5[1] + t7[0];
+ dout[6] = t5[0] + t7[1];
+ dout[7] = t5[1] - t7[0];
+}
+
+void
+ffts_small_forward8_32f(ffts_plan_t *p, const void *in, void *out)
+{
+ const float *FFTS_RESTRICT lut = ffts_constants_small_32f;
+ const float *din = (const float*) in;
+ float *dout = (float*) out;
+ V4SF r0_1, r2_3, r4_5, r6_7;
+
+ /* unreferenced parameter */
+ (void) p;
+
+ V4SF_L_4_2(0, din, din+8, din+4, din+12, &r0_1, &r2_3, &r4_5, &r6_7);
+ V4SF_K_N(0, V4SF_LD(lut), V4SF_LD(lut + 4), &r0_1, &r2_3, &r4_5, &r6_7);
+ V4SF_S_4(r0_1, r2_3, r4_5, r6_7, dout+0, dout+4, dout+8, dout+12);
+}
+
+void
+ffts_small_forward8_64f(ffts_plan_t *p, const void *in, void *out)
+{
+ const double *din = (const double*) in;
+ double *dout = (double*) out;
+ V4SF r0_1, r2_3, r4_5, r6_7;
+ double *LUT8 = (double*) p->ws + p->ws_is[0];
+
+#if MACROS_READY
+ L_4_2(0, din, din+8, din+4, din+12, &r0_1, &r2_3, &r4_5, &r6_7);
+ K_N(0, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7);
+ S_4(r0_1, r2_3, r4_5, r6_7, dout+0, dout+4, dout+8, dout+12);
+#endif
+}
+
+void
+ffts_small_backward8_32f(ffts_plan_t *p, const void *in, void *out)
+{
+ const float *FFTS_RESTRICT lut = ffts_constants_small_inv_32f;
+ const float *din = (const float*) in;
+ float *dout = (float*) out;
+ V4SF r0_1, r2_3, r4_5, r6_7;
+
+ /* unreferenced parameter */
+ (void) p;
+
+ V4SF_L_4_2(1, din, din+8, din+4, din+12, &r0_1, &r2_3, &r4_5, &r6_7);
+ V4SF_K_N(1, V4SF_LD(lut), V4SF_LD(lut+4), &r0_1, &r2_3, &r4_5, &r6_7);
+ V4SF_S_4(r0_1, r2_3, r4_5, r6_7, dout+0, dout+4, dout+8, dout+12);
+}
+
+void
+ffts_small_backward8_64f(ffts_plan_t *p, const void *in, void *out)
+{
+ const double *din = (const double*) in;
+ double *dout = (double*) out;
+ V4SF r0_1, r2_3, r4_5, r6_7;
+ double *LUT8 = (double*) p->ws + p->ws_is[0];
+
+#if MACROS_READY
+ L_4_2(1, din, din+8, din+4, din+12, &r0_1, &r2_3, &r4_5, &r6_7);
+ K_N(1, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7);
+ S_4(r0_1, r2_3, r4_5, r6_7, dout+0, dout+4, dout+8, dout+12);
+#endif
+}
+
+void
+ffts_small_forward16_32f(ffts_plan_t *p, const void *in, void *out)
+{
+ const float *FFTS_RESTRICT lut = ffts_constants_small_32f;
+ const float *din = (const float*) in;
+ float *dout = (float*) out;
+ V4SF r0_1, r2_3, r4_5, r6_7, r8_9, r10_11, r12_13, r14_15;
+
+ /* unreferenced parameter */
+ (void) p;
+
+ V4SF_L_4_4(0, din+0, din+16, din+8, din+24, &r0_1, &r2_3, &r8_9, &r10_11);
+ V4SF_L_2_4(0, din+4, din+20, din+28, din+12, &r4_5, &r6_7, &r14_15, &r12_13);
+ V4SF_K_N(0, V4SF_LD(lut), V4SF_LD(lut+4), &r0_1, &r2_3, &r4_5, &r6_7);
+ V4SF_K_N(0, V4SF_LD(lut+8), V4SF_LD(lut+12), &r0_1, &r4_5, &r8_9, &r12_13);
+ V4SF_S_4(r0_1, r4_5, r8_9, r12_13, dout+0, dout+8, dout+16, dout+24);
+ V4SF_K_N(0, V4SF_LD(lut+16), V4SF_LD(lut+20), &r2_3, &r6_7, &r10_11, &r14_15);
+ V4SF_S_4(r2_3, r6_7, r10_11, r14_15, dout+4, dout+12, dout+20, dout+28);
+}
+
+void
+ffts_small_forward16_64f(ffts_plan_t *p, const void *in, void *out)
+{
+ const double *din = (const double*) in;
+ double *dout = (double*) out;
+ double *LUT8 = (double*) p->ws;
+ V4SF r0_1, r2_3, r4_5, r6_7, r8_9, r10_11, r12_13, r14_15;
+
+#ifdef MACROS_READY
+ L_4_4(0, din+0, din+16, din+8, din+24, &r0_1, &r2_3, &r8_9, &r10_11);
+ L_2_4(0, din+4, din+20, din+28, din+12, &r4_5, &r6_7, &r14_15, &r12_13);
+ K_N(0, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7);
+ K_N(0, VLD(LUT8+8), VLD(LUT8+12), &r0_1, &r4_5, &r8_9, &r12_13);
+ S_4(r0_1, r4_5, r8_9, r12_13, dout+0, dout+8, dout+16, dout+24);
+ K_N(0, VLD(LUT8+16), VLD(LUT8+20), &r2_3, &r6_7, &r10_11, &r14_15);
+ S_4(r2_3, r6_7, r10_11, r14_15, dout+4, dout+12, dout+20, dout+28);
+#endif
+}
+
+void
+ffts_small_backward16_32f(ffts_plan_t *p, const void *in, void *out)
+{
+ const float *FFTS_RESTRICT lut = ffts_constants_small_inv_32f;
+ const float *din = (const float*) in;
+ float *dout = (float*) out;
+ V4SF r0_1, r2_3, r4_5, r6_7, r8_9, r10_11, r12_13, r14_15;
+
+ /* unreferenced parameter */
+ (void) p;
+
+ V4SF_L_4_4(1, din+0, din+16, din+8, din+24, &r0_1, &r2_3, &r8_9, &r10_11);
+ V4SF_L_2_4(1, din+4, din+20, din+28, din+12, &r4_5, &r6_7, &r14_15, &r12_13);
+ V4SF_K_N(1, V4SF_LD(lut), V4SF_LD(lut+4), &r0_1, &r2_3, &r4_5, &r6_7);
+ V4SF_K_N(1, V4SF_LD(lut+8), V4SF_LD(lut+12), &r0_1, &r4_5, &r8_9, &r12_13);
+ V4SF_S_4(r0_1, r4_5, r8_9, r12_13, dout+0, dout+8, dout+16, dout+24);
+ V4SF_K_N(1, V4SF_LD(lut+16), V4SF_LD(lut+20), &r2_3, &r6_7, &r10_11, &r14_15);
+ V4SF_S_4(r2_3, r6_7, r10_11, r14_15, dout+4, dout+12, dout+20, dout+28);
+}
+
+void
+ffts_small_backward16_64f(ffts_plan_t *p, const void *in, void *out)
+{
+ const double *din = (const double*) in;
+ double *dout = (double*) out;
+ double *LUT8 = (double*) p->ws;
+ V4SF r0_1, r2_3, r4_5, r6_7, r8_9, r10_11, r12_13, r14_15;
+
+#ifdef MACROS_READY
+ L_4_4(1, din+0, din+16, din+8, din+24, &r0_1, &r2_3, &r8_9, &r10_11);
+ L_2_4(1, din+4, din+20, din+28, din+12, &r4_5, &r6_7, &r14_15, &r12_13);
+ K_N(1, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7);
+ K_N(1, VLD(LUT8+8), VLD(LUT8+12),&r0_1, &r4_5, &r8_9, &r12_13);
+ S_4(r0_1, r4_5, r8_9, r12_13, dout+0, dout+8, dout+16, dout+24);
+ K_N(1, VLD(LUT8+16), VLD(LUT8+20), &r2_3, &r6_7, &r10_11, &r14_15);
+ S_4(r2_3, r6_7, r10_11, r14_15, dout+4, dout+12, dout+20, dout+28);
+#endif
+}
+
static FFTS_INLINE void
ffts_static_firstpass_even_32f(float *FFTS_RESTRICT out,
const float *FFTS_RESTRICT in,
diff --git a/src/ffts_static.h b/src/ffts_static.h
index 924c3e1..5a42fc2 100644
--- a/src/ffts_static.h
+++ b/src/ffts_static.h
@@ -40,7 +40,52 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "ffts.h"
-void ffts_static_transform_f_32f(ffts_plan_t *p, const void *in, void *out);
-void ffts_static_transform_i_32f(ffts_plan_t *p, const void *in, void *out);
+void
+ffts_small_2_32f(ffts_plan_t *p, const void *in, void *out);
+
+void
+ffts_small_2_64f(ffts_plan_t *p, const void *in, void *out);
+
+void
+ffts_small_forward4_32f(ffts_plan_t *p, const void *in, void *out);
+
+void
+ffts_small_forward4_64f(ffts_plan_t *p, const void *in, void *out);
+
+void
+ffts_small_backward4_32f(ffts_plan_t *p, const void *in, void *out);
+
+void
+ffts_small_backward4_64f(ffts_plan_t *p, const void *in, void *out);
+
+void
+ffts_small_forward8_32f(ffts_plan_t *p, const void *in, void *out);
+
+void
+ffts_small_forward8_64f(ffts_plan_t *p, const void *in, void *out);
+
+void
+ffts_small_backward8_32f(ffts_plan_t *p, const void *in, void *out);
+
+void
+ffts_small_backward8_64f(ffts_plan_t *p, const void *in, void *out);
+
+void
+ffts_small_forward16_32f(ffts_plan_t *p, const void *in, void *out);
+
+void
+ffts_small_forward16_64f(ffts_plan_t *p, const void *in, void *out);
+
+void
+ffts_small_backward16_32f(ffts_plan_t *p, const void *in, void *out);
+
+void
+ffts_small_backward16_64f(ffts_plan_t *p, const void *in, void *out);
+
+void
+ffts_static_transform_f_32f(ffts_plan_t *p, const void *in, void *out);
+
+void
+ffts_static_transform_i_32f(ffts_plan_t *p, const void *in, void *out);
#endif /* FFTS_STATIC_H */
OpenPOWER on IntegriCloud