summaryrefslogtreecommitdiffstats
path: root/src/codegen.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/codegen.c')
-rw-r--r--src/codegen.c63
1 files changed, 31 insertions, 32 deletions
diff --git a/src/codegen.c b/src/codegen.c
index e7cee54..a66ecda 100644
--- a/src/codegen.c
+++ b/src/codegen.c
@@ -247,23 +247,22 @@ void ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leafN, int sign) {
//fp += (neon_end - neon_x8_t) / 4;
insns_t *x_4_addr = fp;
#ifdef __arm__
-
-#ifdef HAVE_NEON
- memcpy(fp, neon_x4, neon_x8 - neon_x4);
- if(sign < 0) {
- fp[26] ^= 0x00200000; fp[28] ^= 0x00200000; fp[31] ^= 0x00200000; fp[32] ^= 0x00200000;
- }
- fp += (neon_x8 - neon_x4) / 4;
-#else
- memcpy(fp, vfp_x4, vfp_x8 - vfp_x4);
- if(sign > 0) {
- fp[36] ^= 0x00000040;
- fp[38] ^= 0x00000040;
- fp[43] ^= 0x00000040;
- fp[44] ^= 0x00000040;
- }
- fp += (vfp_x8 - vfp_x4) / 4;
-#endif
+ #ifdef HAVE_NEON
+ memcpy(fp, neon_x4, neon_x8 - neon_x4);
+ if(sign < 0) {
+ fp[26] ^= 0x00200000; fp[28] ^= 0x00200000; fp[31] ^= 0x00200000; fp[32] ^= 0x00200000;
+ }
+ fp += (neon_x8 - neon_x4) / 4;
+ #else
+ memcpy(fp, vfp_x4, vfp_x8 - vfp_x4);
+ if(sign > 0) {
+ fp[36] ^= 0x00000040;
+ fp[38] ^= 0x00000040;
+ fp[43] ^= 0x00000040;
+ fp[44] ^= 0x00000040;
+ }
+ fp += (vfp_x8 - vfp_x4) / 4;
+ #endif
#else
align_mem16(&fp, 0);
x_4_addr = fp;
@@ -375,20 +374,21 @@ void ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leafN, int sign) {
fp += (leaf_oo - leaf_ee);
if(__builtin_ctzl(N) & 1){
+
if(p->i1) {
lp_cnt += p->i1 * 4;
- MOVI(&fp, RCX, lp_cnt);
+ MOVI(&fp, RCX, lp_cnt);
align_mem16(&fp, 4);
- memcpy(fp, leaf_oo, leaf_eo - leaf_oo);
- for(i=0;i<8;i++) IMM32_NI(fp + sse_leaf_oo_offsets[i], offsets_o[i]*4);
- fp += (leaf_eo - leaf_oo);
+ memcpy(fp, leaf_oo, leaf_eo - leaf_oo);
+ for(i=0;i<8;i++) IMM32_NI(fp + sse_leaf_oo_offsets[i], offsets_o[i]*4);
+ fp += (leaf_eo - leaf_oo);
}
- memcpy(fp, leaf_oe, leaf_end - leaf_oe);
- lp_cnt += 4;
- for(i=0;i<8;i++) IMM32_NI(fp + sse_leaf_oe_offsets[i], offsets_o[i]*4);
- fp += (leaf_end - leaf_oe);
+ memcpy(fp, leaf_oe, leaf_end - leaf_oe);
+ lp_cnt += 4;
+ for(i=0;i<8;i++) IMM32_NI(fp + sse_leaf_oe_offsets[i], offsets_o[i]*4);
+ fp += (leaf_end - leaf_oe);
}else{
@@ -398,18 +398,17 @@ void ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leafN, int sign) {
for(i=0;i<8;i++) IMM32_NI(fp + sse_leaf_eo_offsets[i], offsets[i]*4);
fp += (leaf_oe - leaf_eo);
- if(p->i1) {
+ if(p->i1) {
lp_cnt += p->i1 * 4;
- MOVI(&fp, RCX, lp_cnt);
+ MOVI(&fp, RCX, lp_cnt);
align_mem16(&fp, 4);
- memcpy(fp, leaf_oo, leaf_eo - leaf_oo);
- for(i=0;i<8;i++) IMM32_NI(fp + sse_leaf_oo_offsets[i], offsets_o[i]*4);
- fp += (leaf_eo - leaf_oo);
- }
+ memcpy(fp, leaf_oo, leaf_eo - leaf_oo);
+ for(i=0;i<8;i++) IMM32_NI(fp + sse_leaf_oo_offsets[i], offsets_o[i]*4);
+ fp += (leaf_eo - leaf_oo);
+ }
}
if(p->i1) {
-
lp_cnt += p->i1 * 4;
MOVI(&fp, RCX, lp_cnt);
align_mem16(&fp, 9);
OpenPOWER on IntegriCloud