diff options
author | Anthony Blake <anthonix@me.com> | 2012-08-31 18:17:10 +1200 |
---|---|---|
committer | Anthony Blake <anthonix@me.com> | 2012-08-31 18:17:10 +1200 |
commit | dd9dac02d88ba4ccd8076be9c1c818a2b628bfc6 (patch) | |
tree | c87465617575f6336864218ab235167678bd2ca9 /src | |
parent | e601fcceea846a7272633e6846f43a05e5332e2a (diff) | |
download | ffts-dd9dac02d88ba4ccd8076be9c1c818a2b628bfc6.zip ffts-dd9dac02d88ba4ccd8076be9c1c818a2b628bfc6.tar.gz |
Moved EE store so they are in block, misc other stuff
Diffstat (limited to 'src')
-rw-r--r-- | src/codegen.c | 6 | ||||
-rw-r--r-- | src/sse.s | 33 |
2 files changed, 17 insertions, 22 deletions
diff --git a/src/codegen.c b/src/codegen.c index 5bc8fb6..5877ee0 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -196,9 +196,6 @@ void ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leafN) { PUSH(&fp, R13); PUSH(&fp, R14); PUSH(&fp, R15); - PUSH(&fp, R9); - PUSH(&fp, R8); - PUSH(&fp, RCX); int i; memcpy(fp, leaf_ee_init, leaf_ee - leaf_ee_init); @@ -446,9 +443,6 @@ void ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leafN) { *fp++ = POP_LR(); count++; #else - POP(&fp, RCX); - POP(&fp, R8); - POP(&fp, R9); POP(&fp, R15); POP(&fp, R14); POP(&fp, R13); @@ -106,7 +106,7 @@ LEAF_EE_const_7: addq $4, %rax shufps $238, %xmm4, %xmm2 #83.5 movaps %xmm1, %xmm4 #83.5 - movaps %xmm3, (%rdx,%r11,4) #83.5 + #movntdq %xmm3, (%rdx,%r11,4) #83.5 subps %xmm12, %xmm7 #83.5 addps %xmm12, %xmm14 #83.5 movlhps %xmm7, %xmm4 #83.5 @@ -116,13 +116,14 @@ LEAF_EE_const_7: movlhps %xmm14, %xmm9 #83.5 shufps $238, %xmm13, %xmm5 #83.5 shufps $238, %xmm14, %xmm6 #83.5 - movaps %xmm4, 16(%rdx,%r11,4) #83.5 - movaps %xmm7, 32(%rdx,%r11,4) #83.5 - movaps %xmm9, 48(%rdx,%r11,4) #83.5 - movaps %xmm2, (%rdx,%r12,4) #83.5 - movaps %xmm1, 16(%rdx,%r12,4) #83.5 - movaps %xmm5, 32(%rdx,%r12,4) #83.5 - movaps %xmm6, 48(%rdx,%r12,4) #83.5 + movntdq %xmm3, (%rdx,%r11,4) #83.5 + movntdq %xmm4, 16(%rdx,%r11,4) #83.5 + movntdq %xmm7, 32(%rdx,%r11,4) #83.5 + movntdq %xmm9, 48(%rdx,%r11,4) #83.5 + movntdq %xmm2, (%rdx,%r12,4) #83.5 + movntdq %xmm1, 16(%rdx,%r12,4) #83.5 + movntdq %xmm5, 32(%rdx,%r12,4) #83.5 + movntdq %xmm6, 48(%rdx,%r12,4) #83.5 cmpq %rcx, %rax jne LEAF_EE_1 @@ -191,14 +192,14 @@ LEAF_OO_const_7: shufps $238, %xmm15, %xmm3 #93.5 shufps $238, %xmm13, %xmm9 #93.5 shufps $238, %xmm1, %xmm2 #93.5 - movaps %xmm14, (%rdx,%r11,4) #93.5 - movaps %xmm7, 16(%rdx,%r11,4) #93.5 - movaps %xmm4, 32(%rdx,%r11,4) #93.5 - movaps %xmm8, 48(%rdx,%r11,4) #93.5 - movaps %xmm3, (%rdx,%r12,4) #93.5 - movaps %xmm6, 16(%rdx,%r12,4) #93.5 - movaps %xmm9, 32(%rdx,%r12,4) #93.5 - movaps %xmm2, 48(%rdx,%r12,4) #93.5 + movntdq %xmm14, (%rdx,%r11,4) #93.5 + movntdq %xmm7, 16(%rdx,%r11,4) #93.5 + movntdq %xmm4, 32(%rdx,%r11,4) #93.5 + movntdq %xmm8, 48(%rdx,%r11,4) #93.5 + movntdq %xmm3, (%rdx,%r12,4) #93.5 + movntdq %xmm6, 16(%rdx,%r12,4) #93.5 + movntdq %xmm9, 32(%rdx,%r12,4) #93.5 + movntdq %xmm2, 48(%rdx,%r12,4) #93.5 cmpq %rcx, %rax jne LEAF_OO_1 # Prob 95% #92.14 |