summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJukka Ojanen <jukka.ojanen@linkotec.net>2015-03-13 11:13:10 +0200
committerJukka Ojanen <jukka.ojanen@linkotec.net>2015-03-13 11:13:10 +0200
commit13a81490439e7ce823fea11fcbd09cfa7286671a (patch)
treeb29b8362e359e5fc9da0e03cfd689316a9518adb
parent835c5ab5b3d9f3104959dc6722b4bad600eae8fe (diff)
downloadffts-13a81490439e7ce823fea11fcbd09cfa7286671a.zip
ffts-13a81490439e7ce823fea11fcbd09cfa7286671a.tar.gz
Forgot to rename some V macros
-rw-r--r--src/codegen_arm.h2
-rw-r--r--src/ffts.c46
-rw-r--r--src/ffts_real_nd.h2
-rw-r--r--src/macros-neon.h4
4 files changed, 28 insertions, 26 deletions
diff --git a/src/codegen_arm.h b/src/codegen_arm.h
index 2948ec3..7508f57 100644
--- a/src/codegen_arm.h
+++ b/src/codegen_arm.h
@@ -34,6 +34,8 @@
#ifndef FFTS_CODEGEN_ARM_H
#define FFTS_CODEGEN_ARM_H
+#include "neon.h"
+
uint32_t BL(void *pos, void *target) {
return 0xeb000000 | (((target - pos) / 4) & 0xffffff);
}
diff --git a/src/ffts.c b/src/ffts.c
index fd0b716..5774a56 100644
--- a/src/ffts.c
+++ b/src/ffts.c
@@ -285,7 +285,7 @@ static int ffts_generate_luts(ffts_plan_t *p, size_t N, size_t leaf_N, int sign)
}
#ifdef HAVE_NEON
- V neg = (sign < 0) ? VLIT4(0.0f, 0.0f, 0.0f, 0.0f) : VLIT4(-0.0f, -0.0f, -0.0f, -0.0f);
+ V4SF neg = (sign < 0) ? V4SF_LIT4(0.0f, 0.0f, 0.0f, 0.0f) : V4SF_LIT4(-0.0f, -0.0f, -0.0f, -0.0f);
#endif
for (i = 0; i < n_luts; i++) {
@@ -307,21 +307,21 @@ static int ffts_generate_luts(ffts_plan_t *p, size_t N, size_t leaf_N, int sign)
if (N < 32) {
// w = FFTS_MALLOC(n/4 * 2 * sizeof(ffts_cpx_32f), 32);
float *fw = (float *)w;
- V temp0, temp1, temp2;
+ V4SF temp0, temp1, temp2;
for (j=0; j<n/4; j+=2) {
// #ifdef HAVE_NEON
temp0 = VLD(fw0 + j*2);
- V re, im;
- re = VDUPRE(temp0);
- im = VDUPIM(temp0);
+ V4SF re, im;
+ re = V4SF_DUPLICATE_RE(temp0);
+ im = V4SF_DUPLICATE_IM(temp0);
#ifdef HAVE_NEON
- im = VXOR(im, MULI_SIGN);
+ im = V4SF_XOR(im, MULI_SIGN);
//im = IMULI(sign>0, im);
#else
- im = MULI(sign>0, im);
+ im = V4SF_MULI(sign>0, im);
#endif
- VST(fw + j*4 , re);
- VST(fw + j*4+4, im);
+ V4SF_ST(fw + j*4 , re);
+ V4SF_ST(fw + j*4+4, im);
// #endif
}
w += n/4 * 2;
@@ -330,11 +330,11 @@ static int ffts_generate_luts(ffts_plan_t *p, size_t N, size_t leaf_N, int sign)
float *fw = (float *)w;
#ifdef HAVE_NEON
{
- VS temp0, temp1, temp2;
+ V4SF2 temp0, temp1, temp2;
for (j=0; j<n/4; j+=4) {
- temp0 = VLD2(fw0 + j*2);
- temp0.val[1] = VXOR(temp0.val[1], neg);
- STORESPR(fw + j*2, temp0);
+ temp0 = V4SF2_LD(fw0 + j*2);
+ temp0.val[1] = V4SF_XOR(temp0.val[1], neg);
+ V4SF2_STORE_SPR(fw + j*2, temp0);
}
}
#else
@@ -386,17 +386,17 @@ static int ffts_generate_luts(ffts_plan_t *p, size_t N, size_t leaf_N, int sign)
#if defined(__arm__) && !defined(DYNAMIC_DISABLED)
#ifdef HAVE_NEON
{
- VS temp0, temp1, temp2;
+ V4SF2 temp0, temp1, temp2;
for (j = 0; j < n/8; j += 4) {
- temp0 = VLD2(fw0 + j*2);
- temp0.val[1] = VXOR(temp0.val[1], neg);
- STORESPR(fw + j*2*3, temp0);
- temp1 = VLD2(fw1 + j*2);
- temp1.val[1] = VXOR(temp1.val[1], neg);
- STORESPR(fw + j*2*3 + 8, temp1);
- temp2 = VLD2(fw2 + j*2);
- temp2.val[1] = VXOR(temp2.val[1], neg);
- STORESPR(fw + j*2*3 + 16, temp2);
+ temp0 = V4SF2_LD(fw0 + j*2);
+ temp0.val[1] = V4SF_XOR(temp0.val[1], neg);
+ V4SF_STORE_SPR(fw + j*2*3, temp0);
+ temp1 = V4SF2_LD(fw1 + j*2);
+ temp1.val[1] = V4SF_XOR(temp1.val[1], neg);
+ V4SF2_STORE_SPR(fw + j*2*3 + 8, temp1);
+ temp2 = V4SF2_LD(fw2 + j*2);
+ temp2.val[1] = V4SF_XOR(temp2.val[1], neg);
+ V4SF2_STORE_SPR(fw + j*2*3 + 16, temp2);
}
}
#else
diff --git a/src/ffts_real_nd.h b/src/ffts_real_nd.h
index d23a002..22a708d 100644
--- a/src/ffts_real_nd.h
+++ b/src/ffts_real_nd.h
@@ -1,4 +1,4 @@
-/*
+ /*
This file is part of FFTS -- The Fastest Fourier Transform in the South
diff --git a/src/macros-neon.h b/src/macros-neon.h
index 4ec92b3..29aa49f 100644
--- a/src/macros-neon.h
+++ b/src/macros-neon.h
@@ -81,7 +81,7 @@ V4SF_LIT4(float f3, float f2, float f1, float f0)
vcombine_f32(vdup_lane_f32(vget_low_f32(r),1), vdup_lane_f32(vget_high_f32(r),1))
static FFTS_ALWAYS_INLINE V4SF
-V4SF_IMULI(int inv, V a)
+V4SF_IMULI(int inv, V4SF a)
{
if (inv) {
return V4SF_SWAP_PAIRS(V4SF_XOR(a, V4SF_LIT4(0.0f, -0.0f, 0.0f, -0.0f)));
@@ -99,7 +99,7 @@ V4SF_IMUL(V4SF d, V4SF re, V4SF im)
}
static FFTS_ALWAYS_INLINE V4SF
-V4SF_IMULJ(V d, V re, V im)
+V4SF_IMULJ(V4SF d, V4SF re, V4SF im)
{
re = V4SF_MUL(re, d);
im = V4SF_MUL(im, V4SF_SWAP_PAIRS(d));
OpenPOWER on IntegriCloud