diff options
author | Jukka Ojanen <jukka.ojanen@linkotec.net> | 2014-11-08 23:50:02 +0200 |
---|---|---|
committer | Jukka Ojanen <jukka.ojanen@linkotec.net> | 2014-11-08 23:50:02 +0200 |
commit | 401348f4fd39a2ceee2c058091381697301193d2 (patch) | |
tree | b7a4ec0c4b5e535f4417230ad59ed12830145b53 | |
parent | 784c3da6784335a0c2a4eeef908a51757c7d8916 (diff) | |
download | ffts-401348f4fd39a2ceee2c058091381697301193d2.zip ffts-401348f4fd39a2ceee2c058091381697301193d2.tar.gz |
Replace register names with new definitions
-rw-r--r-- | src/codegen.c | 38 | ||||
-rw-r--r-- | src/codegen_sse.h | 137 |
2 files changed, 73 insertions, 102 deletions
diff --git a/src/codegen.c b/src/codegen.c index 4e70cb1..72ab6ef 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -156,9 +156,9 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N /* assign loop counter register */ loop_count = 4 * p->i0; #ifdef _M_X64 - MOV_I(&fp, EBX, loop_count); + MOV_I(&fp, X86_EBX, loop_count); #else - MOV_I(&fp, ECX, loop_count); + MOV_I(&fp, X86_ECX, loop_count); #endif #endif @@ -207,13 +207,13 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N /* generate function */ /* clear */ - XOR2(&fp, EAX, EAX); + XOR2(&fp, X86_EAX, X86_EAX); /* set "pointer" to offsets */ - MOV_D(&fp, RDI, RCX, 0, 0); + MOV_D(&fp, X64_RDI, X64_RCX, 0, 0); /* set "pointer" to constants */ - MOV_D(&fp, RSI, RCX, 0xE0, 0); + MOV_D(&fp, X64_RSI, X64_RCX, 0xE0, 0); /* align loop/jump destination */ ffts_align_mem16(&fp, 8); @@ -245,10 +245,10 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N /* align loop/jump destination */ #ifdef _M_X64 - MOV_I(&fp, EBX, loop_count); + MOV_I(&fp, X86_EBX, loop_count); ffts_align_mem16(&fp, 3); #else - MOV_I(&fp, ECX, loop_count); + MOV_I(&fp, X86_ECX, loop_count); ffts_align_mem16(&fp, 4); #endif @@ -298,10 +298,10 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N /* align loop/jump destination */ #ifdef _M_X64 - MOV_I(&fp, EBX, loop_count); + MOV_I(&fp, X86_EBX, loop_count); ffts_align_mem16(&fp, 3); #else - MOV_I(&fp, ECX, loop_count); + MOV_I(&fp, X86_ECX, loop_count); ffts_align_mem16(&fp, 4); #endif @@ -325,10 +325,10 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N /* align loop/jump destination */ #ifdef _M_X64 - MOV_I(&fp, EBX, loop_count); + MOV_I(&fp, X86_EBX, loop_count); ffts_align_mem16(&fp, 8); #else - MOV_I(&fp, ECX, loop_count); + MOV_I(&fp, X86_ECX, loop_count); ffts_align_mem16(&fp, 9); #endif @@ -352,17 +352,17 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N if (!pN) { #ifdef _M_X64 - MOV_I(&fp, EBX, pps[0]); + MOV_I(&fp, X86_EBX, pps[0]); #else - MOV_I(&fp, ECX, pps[0] / 4); + MOV_I(&fp, X86_ECX, pps[0] / 4); #endif } else { int offset = (4 * pps[1]) - pAddr; if (offset) { #ifdef _M_X64 - ADD_I(&fp, R8, offset); + ADD_I(&fp, X64_R8, offset); #else - ADD_I(&fp, RDX, offset); + ADD_I(&fp, X64_RDX, offset); #endif } @@ -370,9 +370,9 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N int factor = ffts_ctzl(pps[0]) - ffts_ctzl(pN); #ifdef _M_X64 - SHIFT(&fp, EBX, factor); + SHIFT(&fp, X86_EBX, factor); #else - SHIFT(&fp, ECX, factor); + SHIFT(&fp, X86_ECX, factor); #endif } } @@ -382,9 +382,9 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N int offset = (int) (ws_is - pLUT); #ifdef _M_X64 - ADD_I(&fp, RDI, offset); + ADD_I(&fp, X64_RDI, offset); #else - ADD_I(&fp, R8, offset); + ADD_I(&fp, X64_R8, offset); #endif } diff --git a/src/codegen_sse.h b/src/codegen_sse.h index fa67a32..6b01773 100644 --- a/src/codegen_sse.h +++ b/src/codegen_sse.h @@ -34,6 +34,8 @@ #ifndef FFTS_CODEGEN_SSE_H #define FFTS_CODEGEN_SSE_H +#include "arch/x64/x64-codegen.h" + #include <assert.h> #include <string.h> @@ -61,32 +63,6 @@ extern const uint32_t sse_leaf_oo_offsets[8]; extern const uint32_t sse_leaf_eo_offsets[8]; extern const uint32_t sse_leaf_oe_offsets[8]; -#define EAX 0 -#define ECX 1 -#define EDX 2 -#define EBX 3 -#define ESP 4 -#define EBP 5 -#define ESI 6 -#define EDI 7 - -#define RAX 0 -#define RCX 1 -#define RDX 2 -#define RBX 3 -#define RSP 4 -#define RBP 5 -#define RSI 6 -#define RDI 7 -#define R8 8 -#define R9 9 -#define R10 10 -#define R11 11 -#define R12 12 -#define R13 13 -#define R14 14 -#define R15 15 - #define XMM_REG 0x40 #define XMM0 (XMM_REG | 0x0) @@ -122,7 +98,7 @@ static FFTS_INLINE void ADDPS(uint8_t **p, uint8_t reg2, uint8_t reg1) *(*p)++ = 0x40 | ((reg1 & 8) >> 3) | ((reg2 & 8) >> 1); } - /* esacape opcode */ + /* escape opcode */ *(*p)++ = 0x0F; /* opcode */ @@ -515,11 +491,6 @@ static int32_t READ_IMM32(uint8_t *p) return rval; } -static void RET(uint8_t **p) -{ - *(*p)++ = 0xc3; -} - static void SHIFT(uint8_t **p, uint8_t reg, int shift) { if (reg >= 8) { @@ -720,36 +691,36 @@ static FFTS_INLINE void generate_epilogue(insns_t **fp) { #ifdef _M_X64 /* restore nonvolatile registers */ - MOVDQA3(fp, XMM6, RSP, 0); - MOVDQA3(fp, XMM7, RSP, 16); - MOVDQA3(fp, XMM8, RSP, 32); - MOVDQA3(fp, XMM9, RSP, 48); - MOVDQA3(fp, XMM10, RSP, 64); - MOVDQA3(fp, XMM11, RSP, 80); - MOVDQA3(fp, XMM12, RSP, 96); - MOVDQA3(fp, XMM13, RSP, 112); - MOVDQA3(fp, XMM14, RSP, 128); - MOVDQA3(fp, XMM15, RSP, 144); + MOVDQA3(fp, XMM6, X64_RSP, 0); + MOVDQA3(fp, XMM7, X64_RSP, 16); + MOVDQA3(fp, XMM8, X64_RSP, 32); + MOVDQA3(fp, XMM9, X64_RSP, 48); + MOVDQA3(fp, XMM10, X64_RSP, 64); + MOVDQA3(fp, XMM11, X64_RSP, 80); + MOVDQA3(fp, XMM12, X64_RSP, 96); + MOVDQA3(fp, XMM13, X64_RSP, 112); + MOVDQA3(fp, XMM14, X64_RSP, 128); + MOVDQA3(fp, XMM15, X64_RSP, 144); /* restore stack */ - ADD_I(fp, RSP, 168); + ADD_I(fp, X64_RSP, 168); /* restore the last 3 registers from the shadow space */ - MOV_D(fp, RBX, RSP, 8, 0); - MOV_D(fp, RSI, RSP, 16, 0); - MOV_D(fp, RDI, RSP, 24, 0); + MOV_D(fp, X64_RBX, X64_RSP, 8, 0); + MOV_D(fp, X64_RSI, X64_RSP, 16, 0); + MOV_D(fp, X64_RDI, X64_RSP, 24, 0); #else - POP(fp, R15); - POP(fp, R14); - POP(fp, R13); - POP(fp, R12); - POP(fp, R11); - POP(fp, R10); - POP(fp, RBX); - POP(fp, RBP); + POP(fp, X64_R15); + POP(fp, X64_R14); + POP(fp, X64_R13); + POP(fp, X64_R12); + POP(fp, X64_R11); + POP(fp, X64_R10); + POP(fp, X64_RBX); + POP(fp, X64_RBP); #endif - RET(fp); + x64_ret(*fp); } static FFTS_INLINE insns_t* generate_prologue(insns_t **fp, ffts_plan_t *p) @@ -763,33 +734,33 @@ static FFTS_INLINE insns_t* generate_prologue(insns_t **fp, ffts_plan_t *p) /* save nonvolatile registers */ #ifdef _M_X64 /* use the shadow space to save first 3 registers */ - MOV_D(fp, RBX, RSP, 8, 1); - MOV_D(fp, RSI, RSP, 16, 1); - MOV_D(fp, RDI, RSP, 24, 1); + MOV_D(fp, X64_RBX, X64_RSP, 8, 1); + MOV_D(fp, X64_RSI, X64_RSP, 16, 1); + MOV_D(fp, X64_RDI, X64_RSP, 24, 1); /* reserve space.. */ - SUB_I(fp, RSP, 168); + SUB_I(fp, X64_RSP, 168); /* to save XMM6-XMM15 registers */ - MOVDQA3(fp, RSP, 0, XMM6); - MOVDQA3(fp, RSP, 16, XMM7); - MOVDQA3(fp, RSP, 32, XMM8); - MOVDQA3(fp, RSP, 48, XMM9); - MOVDQA3(fp, RSP, 64, XMM10); - MOVDQA3(fp, RSP, 80, XMM11); - MOVDQA3(fp, RSP, 96, XMM12); - MOVDQA3(fp, RSP, 112, XMM13); - MOVDQA3(fp, RSP, 128, XMM14); - MOVDQA3(fp, RSP, 144, XMM15); + MOVDQA3(fp, X64_RSP, 0, XMM6); + MOVDQA3(fp, X64_RSP, 16, XMM7); + MOVDQA3(fp, X64_RSP, 32, XMM8); + MOVDQA3(fp, X64_RSP, 48, XMM9); + MOVDQA3(fp, X64_RSP, 64, XMM10); + MOVDQA3(fp, X64_RSP, 80, XMM11); + MOVDQA3(fp, X64_RSP, 96, XMM12); + MOVDQA3(fp, X64_RSP, 112, XMM13); + MOVDQA3(fp, X64_RSP, 128, XMM14); + MOVDQA3(fp, X64_RSP, 144, XMM15); #else - PUSH(fp, RBP); - PUSH(fp, RBX); - PUSH(fp, R10); - PUSH(fp, R11); - PUSH(fp, R12); - PUSH(fp, R13); - PUSH(fp, R14); - PUSH(fp, R15); + PUSH(fp, X64_RBP); + PUSH(fp, X64_RBX); + PUSH(fp, X64_R10); + PUSH(fp, X64_R11); + PUSH(fp, X64_R12); + PUSH(fp, X64_R13); + PUSH(fp, X64_R14); + PUSH(fp, X64_R15); #endif return start; @@ -799,10 +770,10 @@ static FFTS_INLINE void generate_transform_init(insns_t **fp) { #ifdef _M_X64 /* generate function */ - MOVAPS2(fp, XMM3, RSI); + MOVAPS2(fp, XMM3, X64_RSI); /* set "pointer" to twiddle factors */ - MOV_D(fp, RDI, RCX, 0x20, 0); + MOV_D(fp, X64_RDI, X64_RCX, 0x20, 0); #else size_t len; @@ -854,10 +825,10 @@ static FFTS_INLINE insns_t* generate_size8_base_case(insns_t **fp, int sign) #ifdef _M_X64 /* input */ - MOV_R(fp, RDI, RAX, 1); + MOV_R(fp, X64_RDI, X64_RAX, 1); /* output */ - MOV_R(fp, R8, RCX, 1); + MOV_R(fp, X64_R8, X64_RCX, 1); /* lea rdx, [r8 + rbx] */ /* loop stop (output + output_stride) */ @@ -1053,7 +1024,7 @@ static FFTS_INLINE insns_t* generate_size8_base_case(insns_t **fp, int sign) *(*fp)++ = 0x50; /* input + 6 * input_stride */ - ADD_I(fp, RAX, 0x60); + ADD_I(fp, X64_RAX, 0x60); MULPS(fp, XMM13, XMM7); SUBPS(fp, XMM6, XMM15); @@ -1201,7 +1172,7 @@ static FFTS_INLINE insns_t* generate_size8_base_case(insns_t **fp, int sign) *(*fp)++ = 0xFF; /* ret */ - RET(fp); + x64_ret(*fp); #else /* copy function */ assert((char*) x8_soft_end >= (char*) x8_soft); |