summaryrefslogtreecommitdiffstats
path: root/src/codegen.c
diff options
context:
space:
mode:
authorAnthony Blake <anthonix@me.com>2013-04-03 15:21:12 +1300
committerAnthony Blake <anthonix@me.com>2013-04-03 15:21:12 +1300
commit7f43121489310f9c5131a9e20f5aff532b66ad6b (patch)
tree04965f9f60a3ed46f2e8557d035c78c9f4e23cda /src/codegen.c
parent17aaf9d6216cd7c608824a2b4fd0e735004e612e (diff)
downloadffts-7f43121489310f9c5131a9e20f5aff532b66ad6b.zip
ffts-7f43121489310f9c5131a9e20f5aff532b66ad6b.tar.gz
VFP support
Diffstat (limited to 'src/codegen.c')
-rw-r--r--src/codegen.c35
1 files changed, 18 insertions, 17 deletions
diff --git a/src/codegen.c b/src/codegen.c
index 21f8be0..01ca3e8 100644
--- a/src/codegen.c
+++ b/src/codegen.c
@@ -42,10 +42,11 @@
#include <sys/types.h>
#include <sys/mman.h>
-#ifdef __ARM_NEON__
- #include "codegen_neon.h"
-// #include "neon_float.h"
+#ifdef HAVE_NEON
+ #include "codegen_arm.h"
#include "neon.h"
+#elif HAVE_VFP
+ #include "codegen_arm.h"
#include "vfp.h"
#else
#include "codegen_sse.h"
@@ -99,7 +100,7 @@ uint32_t LUT_offset(size_t N, size_t leafN) {
for(i=0;i<n_luts-1;i++) {
p_lut_size = lut_size;
if(!i || hardcoded) {
- #ifdef __ARM_NEON__
+ #ifdef __arm__
if(N <= 32) lut_size += n/4 * 2 * sizeof(cdata_t);
else lut_size += n/4 * sizeof(cdata_t);
#else
@@ -107,7 +108,7 @@ uint32_t LUT_offset(size_t N, size_t leafN) {
#endif
// n *= 2;
} else {
- #ifdef __ARM_NEON__
+ #ifdef __arm__
lut_size += n/8 * 3 * sizeof(cdata_t);
#else
lut_size += n/8 * 3 * 2 * sizeof(cdata_t);
@@ -118,7 +119,7 @@ uint32_t LUT_offset(size_t N, size_t leafN) {
return lut_size;
}
-#ifdef __ARM_NEON__
+#ifdef __arm__
typedef uint32_t insns_t;
#else
typedef uint8_t insns_t;
@@ -147,7 +148,7 @@ void insert_nops(uint8_t **p, uint32_t count) {
void align_mem16(uint8_t **p, uint32_t offset) {
-#ifdef __ARM_NEON__
+#ifdef __x86_64__
int r = (16 - (offset & 0xf)) - ((uint32_t)(*p) & 0xf);
r = (16 + r) & 0xf;
insert_nops(p, r);
@@ -170,7 +171,7 @@ void ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leafN, int sign) {
pps = ps;
-#ifdef __ARM_NEON__
+#ifdef __arm__
if(N < 8192) p->transform_size = 8192;
else p->transform_size = N;
#else
@@ -203,7 +204,7 @@ void ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leafN, int sign) {
insns_t *x_8_addr = fp;
#ifdef __arm__
-#ifdef __ARM_NEON__
+#ifdef HAVE_NEON
memcpy(fp, neon_x8, neon_x8_t - neon_x8);
if(sign < 0) {
fp[31] ^= 0x00200000; fp[32] ^= 0x00200000; fp[33] ^= 0x00200000; fp[34] ^= 0x00200000;
@@ -229,7 +230,7 @@ void ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leafN, int sign) {
insns_t *x_4_addr = fp;
#ifdef __arm__
-#ifdef __ARM_NEON__
+#ifdef HAVE_NEON
memcpy(fp, neon_x4, neon_x8 - neon_x4);
if(sign < 0) {
fp[26] ^= 0x00200000; fp[28] ^= 0x00200000; fp[31] ^= 0x00200000; fp[32] ^= 0x00200000;
@@ -248,7 +249,7 @@ void ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leafN, int sign) {
#endif
insns_t *start = fp;
-#ifdef __ARM_NEON__
+#ifdef __arm__
*fp = PUSH_LR(); fp++;
*fp = 0xed2d8b10; fp++;
@@ -271,7 +272,7 @@ void ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leafN, int sign) {
#ifdef __arm__
*fp = LDRI(2, 1, ((uint32_t)&p->ee_ws) - ((uint32_t)p)); fp++;
- #ifdef __ARM_NEON__
+ #ifdef HAVE_NEON
MOVI(&fp, 11, p->i0);
#else
MOVI(&fp, 11, p->i0);
@@ -291,7 +292,7 @@ void ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leafN, int sign) {
#endif
//fp++;
#ifdef __arm__
-#ifdef __ARM_NEON__
+#ifdef HAVE_NEON
memcpy(fp, neon_ee, neon_oo - neon_ee);
if(sign < 0) {
fp[33] ^= 0x00200000; fp[37] ^= 0x00200000; fp[38] ^= 0x00200000; fp[39] ^= 0x00200000;
@@ -425,14 +426,14 @@ void ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leafN, int sign) {
if(pps[0] == 2*leafN) {
- // CALL(&fp, x_4_addr);
+ CALL(&fp, x_4_addr);
// }else if(!pps[2]){
// //uint32_t *x_8_t_addr = fp;
// memcpy(fp, neon_x8_t, neon_ee - neon_x8_t);
// fp += (neon_ee - neon_x8_t) / 4;
// //*fp++ = BL(fp+2, x_8_t_addr);
}else{
- // CALL(&fp, x_8_addr);
+ CALL(&fp, x_8_addr);
}
pAddr = pps[1] * 4;
@@ -445,7 +446,7 @@ void ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leafN, int sign) {
}
#endif
#ifdef __arm__
-#ifdef __ARM_NEON__
+#ifdef HAVE_NEON
if(__builtin_ctzl(N) & 1){
ADDI(&fp, 2, 7, 0);
ADDI(&fp, 7, 9, 0);
@@ -612,7 +613,7 @@ void ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leafN, int sign) {
*fp = BL(fp+2, x_4_addr); fp++;
}else if(!pps[2]){
//uint32_t *x_8_t_addr = fp;
-#ifdef __ARM_NEON__
+#ifdef HAVE_NEON
memcpy(fp, neon_x8_t, neon_ee - neon_x8_t);
if(sign < 0) {
fp[31] ^= 0x00200000; fp[32] ^= 0x00200000; fp[33] ^= 0x00200000; fp[34] ^= 0x00200000;
OpenPOWER on IntegriCloud