diff options
author | Jukka Ojanen <jukka.ojanen@linkotec.net> | 2015-03-13 11:13:10 +0200 |
---|---|---|
committer | Jukka Ojanen <jukka.ojanen@linkotec.net> | 2015-03-13 11:13:10 +0200 |
commit | 13a81490439e7ce823fea11fcbd09cfa7286671a (patch) | |
tree | b29b8362e359e5fc9da0e03cfd689316a9518adb | |
parent | 835c5ab5b3d9f3104959dc6722b4bad600eae8fe (diff) | |
download | ffts-13a81490439e7ce823fea11fcbd09cfa7286671a.zip ffts-13a81490439e7ce823fea11fcbd09cfa7286671a.tar.gz |
Forgot to rename some V macros
-rw-r--r-- | src/codegen_arm.h | 2 | ||||
-rw-r--r-- | src/ffts.c | 46 | ||||
-rw-r--r-- | src/ffts_real_nd.h | 2 | ||||
-rw-r--r-- | src/macros-neon.h | 4 |
4 files changed, 28 insertions, 26 deletions
diff --git a/src/codegen_arm.h b/src/codegen_arm.h index 2948ec3..7508f57 100644 --- a/src/codegen_arm.h +++ b/src/codegen_arm.h @@ -34,6 +34,8 @@ #ifndef FFTS_CODEGEN_ARM_H #define FFTS_CODEGEN_ARM_H +#include "neon.h" + uint32_t BL(void *pos, void *target) { return 0xeb000000 | (((target - pos) / 4) & 0xffffff); } @@ -285,7 +285,7 @@ static int ffts_generate_luts(ffts_plan_t *p, size_t N, size_t leaf_N, int sign) } #ifdef HAVE_NEON - V neg = (sign < 0) ? VLIT4(0.0f, 0.0f, 0.0f, 0.0f) : VLIT4(-0.0f, -0.0f, -0.0f, -0.0f); + V4SF neg = (sign < 0) ? V4SF_LIT4(0.0f, 0.0f, 0.0f, 0.0f) : V4SF_LIT4(-0.0f, -0.0f, -0.0f, -0.0f); #endif for (i = 0; i < n_luts; i++) { @@ -307,21 +307,21 @@ static int ffts_generate_luts(ffts_plan_t *p, size_t N, size_t leaf_N, int sign) if (N < 32) { // w = FFTS_MALLOC(n/4 * 2 * sizeof(ffts_cpx_32f), 32); float *fw = (float *)w; - V temp0, temp1, temp2; + V4SF temp0, temp1, temp2; for (j=0; j<n/4; j+=2) { // #ifdef HAVE_NEON temp0 = VLD(fw0 + j*2); - V re, im; - re = VDUPRE(temp0); - im = VDUPIM(temp0); + V4SF re, im; + re = V4SF_DUPLICATE_RE(temp0); + im = V4SF_DUPLICATE_IM(temp0); #ifdef HAVE_NEON - im = VXOR(im, MULI_SIGN); + im = V4SF_XOR(im, MULI_SIGN); //im = IMULI(sign>0, im); #else - im = MULI(sign>0, im); + im = V4SF_MULI(sign>0, im); #endif - VST(fw + j*4 , re); - VST(fw + j*4+4, im); + V4SF_ST(fw + j*4 , re); + V4SF_ST(fw + j*4+4, im); // #endif } w += n/4 * 2; @@ -330,11 +330,11 @@ static int ffts_generate_luts(ffts_plan_t *p, size_t N, size_t leaf_N, int sign) float *fw = (float *)w; #ifdef HAVE_NEON { - VS temp0, temp1, temp2; + V4SF2 temp0, temp1, temp2; for (j=0; j<n/4; j+=4) { - temp0 = VLD2(fw0 + j*2); - temp0.val[1] = VXOR(temp0.val[1], neg); - STORESPR(fw + j*2, temp0); + temp0 = V4SF2_LD(fw0 + j*2); + temp0.val[1] = V4SF_XOR(temp0.val[1], neg); + V4SF2_STORE_SPR(fw + j*2, temp0); } } #else @@ -386,17 +386,17 @@ static int ffts_generate_luts(ffts_plan_t *p, size_t N, size_t leaf_N, int sign) #if defined(__arm__) && !defined(DYNAMIC_DISABLED) #ifdef HAVE_NEON { - VS temp0, temp1, temp2; + V4SF2 temp0, temp1, temp2; for (j = 0; j < n/8; j += 4) { - temp0 = VLD2(fw0 + j*2); - temp0.val[1] = VXOR(temp0.val[1], neg); - STORESPR(fw + j*2*3, temp0); - temp1 = VLD2(fw1 + j*2); - temp1.val[1] = VXOR(temp1.val[1], neg); - STORESPR(fw + j*2*3 + 8, temp1); - temp2 = VLD2(fw2 + j*2); - temp2.val[1] = VXOR(temp2.val[1], neg); - STORESPR(fw + j*2*3 + 16, temp2); + temp0 = V4SF2_LD(fw0 + j*2); + temp0.val[1] = V4SF_XOR(temp0.val[1], neg); + V4SF_STORE_SPR(fw + j*2*3, temp0); + temp1 = V4SF2_LD(fw1 + j*2); + temp1.val[1] = V4SF_XOR(temp1.val[1], neg); + V4SF2_STORE_SPR(fw + j*2*3 + 8, temp1); + temp2 = V4SF2_LD(fw2 + j*2); + temp2.val[1] = V4SF_XOR(temp2.val[1], neg); + V4SF2_STORE_SPR(fw + j*2*3 + 16, temp2); } } #else diff --git a/src/ffts_real_nd.h b/src/ffts_real_nd.h index d23a002..22a708d 100644 --- a/src/ffts_real_nd.h +++ b/src/ffts_real_nd.h @@ -1,4 +1,4 @@ -/* + /* This file is part of FFTS -- The Fastest Fourier Transform in the South diff --git a/src/macros-neon.h b/src/macros-neon.h index 4ec92b3..29aa49f 100644 --- a/src/macros-neon.h +++ b/src/macros-neon.h @@ -81,7 +81,7 @@ V4SF_LIT4(float f3, float f2, float f1, float f0) vcombine_f32(vdup_lane_f32(vget_low_f32(r),1), vdup_lane_f32(vget_high_f32(r),1)) static FFTS_ALWAYS_INLINE V4SF -V4SF_IMULI(int inv, V a) +V4SF_IMULI(int inv, V4SF a) { if (inv) { return V4SF_SWAP_PAIRS(V4SF_XOR(a, V4SF_LIT4(0.0f, -0.0f, 0.0f, -0.0f))); @@ -99,7 +99,7 @@ V4SF_IMUL(V4SF d, V4SF re, V4SF im) } static FFTS_ALWAYS_INLINE V4SF -V4SF_IMULJ(V d, V re, V im) +V4SF_IMULJ(V4SF d, V4SF re, V4SF im) { re = V4SF_MUL(re, d); im = V4SF_MUL(im, V4SF_SWAP_PAIRS(d)); |