summaryrefslogtreecommitdiffstats
path: root/src/ffts.c
diff options
context:
space:
mode:
authorAnthony Blake <anthonix@me.com>2013-04-22 16:45:17 +1200
committerAnthony Blake <anthonix@me.com>2013-04-22 16:45:17 +1200
commit7b3907cff81fb82380787e63e4304fb8af807c0c (patch)
tree11ae035cd02aab22e1a4069737fb6f422a7531eb /src/ffts.c
parenta45464980b8de7faef21eb46479f7e09fd056441 (diff)
downloadffts-7b3907cff81fb82380787e63e4304fb8af807c0c.zip
ffts-7b3907cff81fb82380787e63e4304fb8af807c0c.tar.gz
Fixed up the smaller VFP transforms. Inverse VFP and real/nd VFP still not working yet.
Diffstat (limited to 'src/ffts.c')
-rw-r--r--src/ffts.c20
1 files changed, 13 insertions, 7 deletions
diff --git a/src/ffts.c b/src/ffts.c
index fe4b590..e83bf6a 100644
--- a/src/ffts.c
+++ b/src/ffts.c
@@ -34,6 +34,7 @@
#include "macros.h"
//#include "mini_macros.h"
#include "patterns.h"
+#include "ffts_small.h"
#ifdef DYNAMIC_DISABLED
#include "ffts_static.h"
@@ -89,12 +90,12 @@ ffts_plan_t *ffts_init_1d(size_t N, int sign) {
size_t i;
#ifdef __arm__
-#ifdef HAVE_NEON
+//#ifdef HAVE_NEON
V MULI_SIGN;
if(sign < 0) MULI_SIGN = VLIT4(-0.0f, 0.0f, -0.0f, 0.0f);
else MULI_SIGN = VLIT4(0.0f, -0.0f, 0.0f, -0.0f);
-#endif
+//#endif
#else
V MULI_SIGN;
@@ -230,22 +231,27 @@ ffts_plan_t *ffts_init_1d(size_t N, int sign) {
float *fw = (float *)w;
V temp0, temp1, temp2;
for(j=0;j<n/4;j+=2) {
- #ifdef HAVE_NEON
+ // #ifdef HAVE_NEON
temp0 = VLD(fw0 + j*2);
V re, im;
re = VDUPRE(temp0);
im = VDUPIM(temp0);
- im = VXOR(im, MULI_SIGN);
+ #ifdef HAVE_NEON
+ im = VXOR(im, MULI_SIGN);
+ //im = IMULI(sign>0, im);
+ #else
+ im = MULI(sign>0, im);
+ #endif
VST(fw + j*4 , re);
VST(fw + j*4+4, im);
- #endif
+ // #endif
}
w += n/4 * 2;
}else{
//w = FFTS_MALLOC(n/4 * sizeof(cdata_t), 32);
float *fw = (float *)w;
- VS temp0, temp1, temp2;
#ifdef HAVE_NEON
+ VS temp0, temp1, temp2;
for(j=0;j<n/4;j+=4) {
temp0 = VLD2(fw0 + j*2);
temp0.val[1] = VXOR(temp0.val[1], neg);
@@ -299,8 +305,8 @@ ffts_plan_t *ffts_init_1d(size_t N, int sign) {
#ifdef __arm__
//w = FFTS_MALLOC(n/8 * 3 * sizeof(cdata_t), 32);
float *fw = (float *)w;
- VS temp0, temp1, temp2;
#ifdef HAVE_NEON
+ VS temp0, temp1, temp2;
for(j=0;j<n/8;j+=4) {
temp0 = VLD2(fw0 + j*2);
temp0.val[1] = VXOR(temp0.val[1], neg);
OpenPOWER on IntegriCloud