diff options
author | Jukka Ojanen <jukka.ojanen@linkotec.net> | 2014-11-06 11:38:55 +0200 |
---|---|---|
committer | Jukka Ojanen <jukka.ojanen@linkotec.net> | 2014-11-06 11:38:55 +0200 |
commit | 18d19a9f8b4e409b4db46338c9040a61555f9c58 (patch) | |
tree | d086566753530cd86bafc68f03887a1eceb844fd /src/codegen.c | |
parent | a0db4af6fe8f68a62cbf993871137d4cd341dfc5 (diff) | |
download | ffts-18d19a9f8b4e409b4db46338c9040a61555f9c58.zip ffts-18d19a9f8b4e409b4db46338c9040a61555f9c58.tar.gz |
Win64 actually "generate_size8_base_case" instead of copying
Diffstat (limited to 'src/codegen.c')
-rw-r--r-- | src/codegen.c | 74 |
1 files changed, 31 insertions, 43 deletions
diff --git a/src/codegen.c b/src/codegen.c index 9d95519..4e70cb1 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -144,21 +144,21 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N fp = (insns_t*) p->transform_base; - /* generate base cases */ - x_4_addr = generate_size4_base_case(&fp, sign); - x_8_addr = generate_size8_base_case(&fp, sign); + /* generate base cases */ + x_4_addr = generate_size4_base_case(&fp, sign); + x_8_addr = generate_size8_base_case(&fp, sign); #ifdef __arm__ - start = generate_prologue(&fp, p); + start = generate_prologue(&fp, p); #else - start = generate_prologue(&fp, p); + start = generate_prologue(&fp, p); /* assign loop counter register */ loop_count = 4 * p->i0; #ifdef _M_X64 - MOVI(&fp, EBX, loop_count); + MOV_I(&fp, EBX, loop_count); #else - MOVI(&fp, ECX, loop_count); + MOV_I(&fp, ECX, loop_count); #endif #endif @@ -204,18 +204,18 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N //fprintf(stderr, "Body start address = %016p\n", start); #ifdef _M_X64 - /* generate function */ + /* generate function */ - /* clear */ - XOR2(&fp, EAX, EAX); - - /* set "pointer" to offsets */ - MOV(&fp, RDI, RCX, 0, 0); + /* clear */ + XOR2(&fp, EAX, EAX); - /* set "pointer" to constants */ - MOV(&fp, RSI, RCX, 0xE0, 0); + /* set "pointer" to offsets */ + MOV_D(&fp, RDI, RCX, 0, 0); - /* align loop/jump destination */ + /* set "pointer" to constants */ + MOV_D(&fp, RSI, RCX, 0xE0, 0); + + /* align loop/jump destination */ ffts_align_mem16(&fp, 8); #else /* copy function */ @@ -245,10 +245,10 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N /* align loop/jump destination */ #ifdef _M_X64 - MOVI(&fp, EBX, loop_count); + MOV_I(&fp, EBX, loop_count); ffts_align_mem16(&fp, 3); #else - MOVI(&fp, ECX, loop_count); + MOV_I(&fp, ECX, loop_count); ffts_align_mem16(&fp, 4); #endif @@ -298,10 +298,10 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N /* align loop/jump destination */ #ifdef _M_X64 - MOVI(&fp, EBX, loop_count); + MOV_I(&fp, EBX, loop_count); ffts_align_mem16(&fp, 3); #else - MOVI(&fp, ECX, loop_count); + MOV_I(&fp, ECX, loop_count); ffts_align_mem16(&fp, 4); #endif @@ -325,10 +325,10 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N /* align loop/jump destination */ #ifdef _M_X64 - MOVI(&fp, EBX, loop_count); + MOV_I(&fp, EBX, loop_count); ffts_align_mem16(&fp, 8); #else - MOVI(&fp, ECX, loop_count); + MOV_I(&fp, ECX, loop_count); ffts_align_mem16(&fp, 9); #endif @@ -343,38 +343,26 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N fp += len; } -#ifdef _M_X64 - /* generate function */ - MOVAPS2(&fp, XMM3, RSI); - - /* set "pointer" to twiddle factors */ - MOV(&fp, RDI, RCX, 0x20, 0); -#else - /* copy function */ - assert((char*) x4 > (char*) x_init); - len = (char*) x4 - (char*) x_init; - memcpy(fp, x_init, len); - fp += len; -#endif + generate_transform_init(&fp); - /* generate subtransform calls */ + /* generate subtransform calls */ count = 2; while (pps[0]) { size_t ws_is; if (!pN) { #ifdef _M_X64 - MOVI(&fp, EBX, pps[0]); + MOV_I(&fp, EBX, pps[0]); #else - MOVI(&fp, ECX, pps[0] / 4); + MOV_I(&fp, ECX, pps[0] / 4); #endif } else { int offset = (4 * pps[1]) - pAddr; if (offset) { #ifdef _M_X64 - ADDI(&fp, R8, offset); + ADD_I(&fp, R8, offset); #else - ADDI(&fp, RDX, offset); + ADD_I(&fp, RDX, offset); #endif } @@ -394,9 +382,9 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N int offset = (int) (ws_is - pLUT); #ifdef _M_X64 - ADDI(&fp, RDI, offset); + ADD_I(&fp, RDI, offset); #else - ADDI(&fp, R8, offset); + ADD_I(&fp, R8, offset); #endif } @@ -701,7 +689,7 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N *fp++ = POP_LR(); count++; #else - generate_epilogue(&fp); + generate_epilogue(&fp); #endif // *fp++ = B(14); count++; |