From c82441c33c3527d1b13f7779c52d58e477f36a93 Mon Sep 17 00:00:00 2001 From: Jukka Ojanen Date: Sun, 9 Nov 2014 01:42:51 +0200 Subject: Replace XOR2 with x86_clear_reg, MOV_D with x64_mov_membase_reg/x86_mov_reg_membase, MOV_R with x64_mov_reg_reg and x64_alu_reg_imm_size_body with x64_alu_reg_imm_size --- src/codegen.c | 14 +++--- src/codegen_sse.h | 124 ++++++------------------------------------------------ 2 files changed, 19 insertions(+), 119 deletions(-) diff --git a/src/codegen.c b/src/codegen.c index d08be0d..92f7553 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -207,13 +207,13 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N /* generate function */ /* clear */ - XOR2(&fp, X86_EAX, X86_EAX); + x86_clear_reg(fp, X86_EAX); /* set "pointer" to offsets */ - MOV_D(&fp, X64_RDI, X64_RCX, 0, 0); + x64_mov_reg_membase(fp, X64_RDI, X64_RCX, 0x0, 8); /* set "pointer" to constants */ - MOV_D(&fp, X64_RSI, X64_RCX, 0xE0, 0); + x64_mov_reg_membase(fp, X64_RSI, X64_RCX, 0xE0, 8); /* align loop/jump destination */ ffts_align_mem16(&fp, 8); @@ -360,9 +360,9 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N int offset = (4 * pps[1]) - pAddr; if (offset) { #ifdef _M_X64 - x64_alu_reg_imm_size_body(fp, X86_ADD, X64_R8, offset, 8); + x64_alu_reg_imm_size(fp, X86_ADD, X64_R8, offset, 8); #else - x64_alu_reg_imm_size_body(fp, X86_ADD, X64_RDX, offset, 8); + x64_alu_reg_imm_size(fp, X86_ADD, X64_RDX, offset, 8); #endif } @@ -390,9 +390,9 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N int offset = (int) (ws_is - pLUT); #ifdef _M_X64 - x64_alu_reg_imm_size_body(fp, X86_ADD, X64_RDI, offset, 8); + x64_alu_reg_imm_size(fp, X86_ADD, X64_RDI, offset, 8); #else - x64_alu_reg_imm_size_body(fp, X86_ADD, X64_R8, offset, 8); + x64_alu_reg_imm_size(fp, X86_ADD, X64_R8, offset, 8); #endif } diff --git a/src/codegen_sse.h b/src/codegen_sse.h index c7351fc..f30933e 100644 --- a/src/codegen_sse.h +++ b/src/codegen_sse.h @@ -106,19 +106,6 @@ static FFTS_INLINE void ADDPS(uint8_t **p, uint8_t reg2, uint8_t reg1) *(*p)++ = 0xC0 | r1 | (r2 << 3); } -static void ADDRMODE(uint8_t **p, uint8_t reg, uint8_t rm, int32_t disp) -{ - if (disp == 0) { - *(*p)++ = (rm & 7) | ((reg & 7) << 3); - } else if (disp <= 127 || disp >= -128) { - *(*p)++ = 0x40 | (rm & 7) | ((reg & 7) << 3); - IMM8(p, disp); - } else { - *(*p)++ = 0x80 | (rm & 7) | ((reg & 7) << 3); - IMM32(p, disp); - } -} - static void IMM8(uint8_t **p, int32_t imm) { *(*p)++ = (imm & 0xff); @@ -151,13 +138,6 @@ static void IMM32_NI(uint8_t *p, int32_t imm) } } -static void LEA(uint8_t **p, uint8_t dst, uint8_t base, int32_t disp) -{ - *(*p)++ = 0x48 | ((base & 0x8) >> 3) | ((dst & 0x8) >> 1); - *(*p)++ = 0x8d; - ADDRMODE(p, dst, base, disp); -} - static FFTS_INLINE void MOVAPS(uint8_t **p, uint8_t reg1, uint8_t reg2, int32_t disp, int is_store) { uint8_t r1 = (reg1 & 7); @@ -311,72 +291,6 @@ static FFTS_INLINE void MOVDQA3(uint8_t **p, uint8_t reg1, int32_t op2, int32_t } } -static FFTS_INLINE void MOV_D(uint8_t **p, uint8_t reg1, uint8_t reg2, int32_t disp, int is_store) -{ - uint8_t r1 = (reg1 & 7); - uint8_t r2 = (reg2 & 7); - - if ((reg1 & 8) || (reg2 & 8)) { - *(*p)++ = 0x49; - } else { - *(*p)++ = 0x48; - } - - if (is_store) { - *(*p)++ = 0x89; - } else { - *(*p)++ = 0x8B; - } - - if (disp == 0) { - *(*p)++ = r2 | (r1 << 3); - - if (r2 == 4) { - *(*p)++ = 0x24; - } - } else if (disp <= 127 && disp >= -128) { - *(*p)++ = 0x40 | r2 | (r1 << 3); - - if (r2 == 4) { - *(*p)++ = 0x24; - } - - IMM8(p, disp); - } else { - *(*p)++ = 0x80 | r2 | (r1 << 3) | (r1 << 11); - - if (r2 == 4) { - *(*p)++ = 0x24; - } - - IMM32(p, disp); - } -} - -static FFTS_INLINE void MOV_R(uint8_t **p, uint8_t reg1, uint8_t reg2, int is_store) -{ - uint8_t r1 = (reg1 & 7); - uint8_t r2 = (reg2 & 7); - - if ((reg1 & 8) || (reg2 & 8)) { - *(*p)++ = 0x48 | ((reg2 & 8) >> 3) | ((reg1 & 8) >> 1); - } else { - *(*p)++ = 0x48; - } - - if (is_store) { - *(*p)++ = 0x89; - } else { - *(*p)++ = 0x8B; - } - - *(*p)++ = 0xC0 | r2 | (r1 << 3); - - if (r2 == 4) { - *(*p)++ = 0x24; - } -} - static FFTS_INLINE void MULPS(uint8_t **p, uint8_t reg2, uint8_t reg1) { uint8_t r1 = (reg1 & 7); @@ -436,20 +350,6 @@ static FFTS_INLINE void SUBPS(uint8_t **p, uint8_t reg2, uint8_t reg1) *(*p)++ = 0xC0 | r1 | (r2 << 3); } -static FFTS_INLINE void XOR2(uint8_t **p, uint8_t reg1, uint8_t reg2) -{ - uint8_t r1 = (reg1 & 7); - uint8_t r2 = (reg2 & 7); - - /* REX prefix */ - if ((reg1 & 8) || (reg2 & 8)) { - *(*p)++ = 0x40 | ((reg1 & 8) >> 3) | ((reg2 & 8) >> 1); - } - - *(*p)++ = 0x31; - *(*p)++ = 0xC0 | r2 | (r1 << 3); -} - static FFTS_INLINE void XORPS(uint8_t **p, uint8_t reg2, uint8_t reg1) { uint8_t r1 = (reg1 & 7); @@ -567,12 +467,12 @@ static FFTS_INLINE void generate_epilogue(insns_t **fp) MOVDQA3(fp, XMM15, X64_RSP, 144); /* restore stack */ - x64_alu_reg_imm_size_body(*fp, X86_ADD, X64_RSP, 168, 8); + x64_alu_reg_imm_size(*fp, X86_ADD, X64_RSP, 168, 8); /* restore the last 3 registers from the shadow space */ - MOV_D(fp, X64_RBX, X64_RSP, 8, 0); - MOV_D(fp, X64_RSI, X64_RSP, 16, 0); - MOV_D(fp, X64_RDI, X64_RSP, 24, 0); + x64_mov_reg_membase(*fp, X64_RBX, X64_RSP, 8, 8); + x64_mov_reg_membase(*fp, X64_RSI, X64_RSP, 16, 8); + x64_mov_reg_membase(*fp, X64_RDI, X64_RSP, 24, 8); #else x64_pop_reg(*fp, X64_R15); x64_pop_reg(*fp, X64_R14); @@ -598,12 +498,12 @@ static FFTS_INLINE insns_t* generate_prologue(insns_t **fp, ffts_plan_t *p) /* save nonvolatile registers */ #ifdef _M_X64 /* use the shadow space to save first 3 registers */ - MOV_D(fp, X64_RBX, X64_RSP, 8, 1); - MOV_D(fp, X64_RSI, X64_RSP, 16, 1); - MOV_D(fp, X64_RDI, X64_RSP, 24, 1); + x64_mov_membase_reg(*fp, X64_RSP, 8, X64_RBX, 8); + x64_mov_membase_reg(*fp, X64_RSP, 16, X64_RSI, 8); + x64_mov_membase_reg(*fp, X64_RSP, 24, X64_RDI, 8); /* reserve space.. */ - x64_alu_reg_imm_size_body(*fp, X86_SUB, X64_RSP, 168, 8); + x64_alu_reg_imm_size(*fp, X86_SUB, X64_RSP, 168, 8); /* to save XMM6-XMM15 registers */ MOVDQA3(fp, X64_RSP, 0, XMM6); @@ -637,7 +537,7 @@ static FFTS_INLINE void generate_transform_init(insns_t **fp) MOVAPS2(fp, XMM3, X64_RSI); /* set "pointer" to twiddle factors */ - MOV_D(fp, X64_RDI, X64_RCX, 0x20, 0); + x64_mov_reg_membase(*fp, X64_RDI, X64_RCX, 0x20, 8); #else size_t len; @@ -689,10 +589,10 @@ static FFTS_INLINE insns_t* generate_size8_base_case(insns_t **fp, int sign) #ifdef _M_X64 /* input */ - MOV_R(fp, X64_RDI, X64_RAX, 1); + x64_mov_reg_reg(*fp, X64_RAX, X64_RDI, 8); /* output */ - MOV_R(fp, X64_R8, X64_RCX, 1); + x64_mov_reg_reg(*fp, X64_RCX, X64_R8, 8); /* lea rdx, [r8 + rbx] */ /* loop stop (output + output_stride) */ @@ -888,7 +788,7 @@ static FFTS_INLINE insns_t* generate_size8_base_case(insns_t **fp, int sign) *(*fp)++ = 0x50; /* input + 6 * input_stride */ - x64_alu_reg_imm_size_body(*fp, X86_ADD, X64_RAX, 0x60, 8); + x64_alu_reg_imm_size(*fp, X86_ADD, X64_RAX, 0x60, 8); MULPS(fp, XMM13, XMM7); SUBPS(fp, XMM6, XMM15); -- cgit v1.1