summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorAnthony Blake <anthonix@me.com>2012-08-31 18:17:10 +1200
committerAnthony Blake <anthonix@me.com>2012-08-31 18:17:10 +1200
commitdd9dac02d88ba4ccd8076be9c1c818a2b628bfc6 (patch)
treec87465617575f6336864218ab235167678bd2ca9 /src
parente601fcceea846a7272633e6846f43a05e5332e2a (diff)
downloadffts-dd9dac02d88ba4ccd8076be9c1c818a2b628bfc6.zip
ffts-dd9dac02d88ba4ccd8076be9c1c818a2b628bfc6.tar.gz
Moved EE store so they are in block, misc other stuff
Diffstat (limited to 'src')
-rw-r--r--src/codegen.c6
-rw-r--r--src/sse.s33
2 files changed, 17 insertions, 22 deletions
diff --git a/src/codegen.c b/src/codegen.c
index 5bc8fb6..5877ee0 100644
--- a/src/codegen.c
+++ b/src/codegen.c
@@ -196,9 +196,6 @@ void ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leafN) {
PUSH(&fp, R13);
PUSH(&fp, R14);
PUSH(&fp, R15);
- PUSH(&fp, R9);
- PUSH(&fp, R8);
- PUSH(&fp, RCX);
int i;
memcpy(fp, leaf_ee_init, leaf_ee - leaf_ee_init);
@@ -446,9 +443,6 @@ void ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leafN) {
*fp++ = POP_LR(); count++;
#else
- POP(&fp, RCX);
- POP(&fp, R8);
- POP(&fp, R9);
POP(&fp, R15);
POP(&fp, R14);
POP(&fp, R13);
diff --git a/src/sse.s b/src/sse.s
index 3742844..edaa771 100644
--- a/src/sse.s
+++ b/src/sse.s
@@ -106,7 +106,7 @@ LEAF_EE_const_7:
addq $4, %rax
shufps $238, %xmm4, %xmm2 #83.5
movaps %xmm1, %xmm4 #83.5
- movaps %xmm3, (%rdx,%r11,4) #83.5
+ #movntdq %xmm3, (%rdx,%r11,4) #83.5
subps %xmm12, %xmm7 #83.5
addps %xmm12, %xmm14 #83.5
movlhps %xmm7, %xmm4 #83.5
@@ -116,13 +116,14 @@ LEAF_EE_const_7:
movlhps %xmm14, %xmm9 #83.5
shufps $238, %xmm13, %xmm5 #83.5
shufps $238, %xmm14, %xmm6 #83.5
- movaps %xmm4, 16(%rdx,%r11,4) #83.5
- movaps %xmm7, 32(%rdx,%r11,4) #83.5
- movaps %xmm9, 48(%rdx,%r11,4) #83.5
- movaps %xmm2, (%rdx,%r12,4) #83.5
- movaps %xmm1, 16(%rdx,%r12,4) #83.5
- movaps %xmm5, 32(%rdx,%r12,4) #83.5
- movaps %xmm6, 48(%rdx,%r12,4) #83.5
+ movntdq %xmm3, (%rdx,%r11,4) #83.5
+ movntdq %xmm4, 16(%rdx,%r11,4) #83.5
+ movntdq %xmm7, 32(%rdx,%r11,4) #83.5
+ movntdq %xmm9, 48(%rdx,%r11,4) #83.5
+ movntdq %xmm2, (%rdx,%r12,4) #83.5
+ movntdq %xmm1, 16(%rdx,%r12,4) #83.5
+ movntdq %xmm5, 32(%rdx,%r12,4) #83.5
+ movntdq %xmm6, 48(%rdx,%r12,4) #83.5
cmpq %rcx, %rax
jne LEAF_EE_1
@@ -191,14 +192,14 @@ LEAF_OO_const_7:
shufps $238, %xmm15, %xmm3 #93.5
shufps $238, %xmm13, %xmm9 #93.5
shufps $238, %xmm1, %xmm2 #93.5
- movaps %xmm14, (%rdx,%r11,4) #93.5
- movaps %xmm7, 16(%rdx,%r11,4) #93.5
- movaps %xmm4, 32(%rdx,%r11,4) #93.5
- movaps %xmm8, 48(%rdx,%r11,4) #93.5
- movaps %xmm3, (%rdx,%r12,4) #93.5
- movaps %xmm6, 16(%rdx,%r12,4) #93.5
- movaps %xmm9, 32(%rdx,%r12,4) #93.5
- movaps %xmm2, 48(%rdx,%r12,4) #93.5
+ movntdq %xmm14, (%rdx,%r11,4) #93.5
+ movntdq %xmm7, 16(%rdx,%r11,4) #93.5
+ movntdq %xmm4, 32(%rdx,%r11,4) #93.5
+ movntdq %xmm8, 48(%rdx,%r11,4) #93.5
+ movntdq %xmm3, (%rdx,%r12,4) #93.5
+ movntdq %xmm6, 16(%rdx,%r12,4) #93.5
+ movntdq %xmm9, 32(%rdx,%r12,4) #93.5
+ movntdq %xmm2, 48(%rdx,%r12,4) #93.5
cmpq %rcx, %rax
jne LEAF_OO_1 # Prob 95% #92.14
OpenPOWER on IntegriCloud