summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJukka Ojanen <jukka.ojanen@linkotec.net>2014-11-09 01:42:51 +0200
committerJukka Ojanen <jukka.ojanen@linkotec.net>2014-11-09 01:42:51 +0200
commitc82441c33c3527d1b13f7779c52d58e477f36a93 (patch)
tree00aaccee58f81c15fa3cd41db6c6cee06fffb44e
parent0a98074a2bbde2a3f190e9f32cfeebba594cbbf0 (diff)
downloadffts-c82441c33c3527d1b13f7779c52d58e477f36a93.zip
ffts-c82441c33c3527d1b13f7779c52d58e477f36a93.tar.gz
Replace XOR2 with x86_clear_reg, MOV_D with x64_mov_membase_reg/x86_mov_reg_membase, MOV_R with x64_mov_reg_reg and x64_alu_reg_imm_size_body with x64_alu_reg_imm_size
-rw-r--r--src/codegen.c14
-rw-r--r--src/codegen_sse.h124
2 files changed, 19 insertions, 119 deletions
diff --git a/src/codegen.c b/src/codegen.c
index d08be0d..92f7553 100644
--- a/src/codegen.c
+++ b/src/codegen.c
@@ -207,13 +207,13 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N
/* generate function */
/* clear */
- XOR2(&fp, X86_EAX, X86_EAX);
+ x86_clear_reg(fp, X86_EAX);
/* set "pointer" to offsets */
- MOV_D(&fp, X64_RDI, X64_RCX, 0, 0);
+ x64_mov_reg_membase(fp, X64_RDI, X64_RCX, 0x0, 8);
/* set "pointer" to constants */
- MOV_D(&fp, X64_RSI, X64_RCX, 0xE0, 0);
+ x64_mov_reg_membase(fp, X64_RSI, X64_RCX, 0xE0, 8);
/* align loop/jump destination */
ffts_align_mem16(&fp, 8);
@@ -360,9 +360,9 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N
int offset = (4 * pps[1]) - pAddr;
if (offset) {
#ifdef _M_X64
- x64_alu_reg_imm_size_body(fp, X86_ADD, X64_R8, offset, 8);
+ x64_alu_reg_imm_size(fp, X86_ADD, X64_R8, offset, 8);
#else
- x64_alu_reg_imm_size_body(fp, X86_ADD, X64_RDX, offset, 8);
+ x64_alu_reg_imm_size(fp, X86_ADD, X64_RDX, offset, 8);
#endif
}
@@ -390,9 +390,9 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N
int offset = (int) (ws_is - pLUT);
#ifdef _M_X64
- x64_alu_reg_imm_size_body(fp, X86_ADD, X64_RDI, offset, 8);
+ x64_alu_reg_imm_size(fp, X86_ADD, X64_RDI, offset, 8);
#else
- x64_alu_reg_imm_size_body(fp, X86_ADD, X64_R8, offset, 8);
+ x64_alu_reg_imm_size(fp, X86_ADD, X64_R8, offset, 8);
#endif
}
diff --git a/src/codegen_sse.h b/src/codegen_sse.h
index c7351fc..f30933e 100644
--- a/src/codegen_sse.h
+++ b/src/codegen_sse.h
@@ -106,19 +106,6 @@ static FFTS_INLINE void ADDPS(uint8_t **p, uint8_t reg2, uint8_t reg1)
*(*p)++ = 0xC0 | r1 | (r2 << 3);
}
-static void ADDRMODE(uint8_t **p, uint8_t reg, uint8_t rm, int32_t disp)
-{
- if (disp == 0) {
- *(*p)++ = (rm & 7) | ((reg & 7) << 3);
- } else if (disp <= 127 || disp >= -128) {
- *(*p)++ = 0x40 | (rm & 7) | ((reg & 7) << 3);
- IMM8(p, disp);
- } else {
- *(*p)++ = 0x80 | (rm & 7) | ((reg & 7) << 3);
- IMM32(p, disp);
- }
-}
-
static void IMM8(uint8_t **p, int32_t imm)
{
*(*p)++ = (imm & 0xff);
@@ -151,13 +138,6 @@ static void IMM32_NI(uint8_t *p, int32_t imm)
}
}
-static void LEA(uint8_t **p, uint8_t dst, uint8_t base, int32_t disp)
-{
- *(*p)++ = 0x48 | ((base & 0x8) >> 3) | ((dst & 0x8) >> 1);
- *(*p)++ = 0x8d;
- ADDRMODE(p, dst, base, disp);
-}
-
static FFTS_INLINE void MOVAPS(uint8_t **p, uint8_t reg1, uint8_t reg2, int32_t disp, int is_store)
{
uint8_t r1 = (reg1 & 7);
@@ -311,72 +291,6 @@ static FFTS_INLINE void MOVDQA3(uint8_t **p, uint8_t reg1, int32_t op2, int32_t
}
}
-static FFTS_INLINE void MOV_D(uint8_t **p, uint8_t reg1, uint8_t reg2, int32_t disp, int is_store)
-{
- uint8_t r1 = (reg1 & 7);
- uint8_t r2 = (reg2 & 7);
-
- if ((reg1 & 8) || (reg2 & 8)) {
- *(*p)++ = 0x49;
- } else {
- *(*p)++ = 0x48;
- }
-
- if (is_store) {
- *(*p)++ = 0x89;
- } else {
- *(*p)++ = 0x8B;
- }
-
- if (disp == 0) {
- *(*p)++ = r2 | (r1 << 3);
-
- if (r2 == 4) {
- *(*p)++ = 0x24;
- }
- } else if (disp <= 127 && disp >= -128) {
- *(*p)++ = 0x40 | r2 | (r1 << 3);
-
- if (r2 == 4) {
- *(*p)++ = 0x24;
- }
-
- IMM8(p, disp);
- } else {
- *(*p)++ = 0x80 | r2 | (r1 << 3) | (r1 << 11);
-
- if (r2 == 4) {
- *(*p)++ = 0x24;
- }
-
- IMM32(p, disp);
- }
-}
-
-static FFTS_INLINE void MOV_R(uint8_t **p, uint8_t reg1, uint8_t reg2, int is_store)
-{
- uint8_t r1 = (reg1 & 7);
- uint8_t r2 = (reg2 & 7);
-
- if ((reg1 & 8) || (reg2 & 8)) {
- *(*p)++ = 0x48 | ((reg2 & 8) >> 3) | ((reg1 & 8) >> 1);
- } else {
- *(*p)++ = 0x48;
- }
-
- if (is_store) {
- *(*p)++ = 0x89;
- } else {
- *(*p)++ = 0x8B;
- }
-
- *(*p)++ = 0xC0 | r2 | (r1 << 3);
-
- if (r2 == 4) {
- *(*p)++ = 0x24;
- }
-}
-
static FFTS_INLINE void MULPS(uint8_t **p, uint8_t reg2, uint8_t reg1)
{
uint8_t r1 = (reg1 & 7);
@@ -436,20 +350,6 @@ static FFTS_INLINE void SUBPS(uint8_t **p, uint8_t reg2, uint8_t reg1)
*(*p)++ = 0xC0 | r1 | (r2 << 3);
}
-static FFTS_INLINE void XOR2(uint8_t **p, uint8_t reg1, uint8_t reg2)
-{
- uint8_t r1 = (reg1 & 7);
- uint8_t r2 = (reg2 & 7);
-
- /* REX prefix */
- if ((reg1 & 8) || (reg2 & 8)) {
- *(*p)++ = 0x40 | ((reg1 & 8) >> 3) | ((reg2 & 8) >> 1);
- }
-
- *(*p)++ = 0x31;
- *(*p)++ = 0xC0 | r2 | (r1 << 3);
-}
-
static FFTS_INLINE void XORPS(uint8_t **p, uint8_t reg2, uint8_t reg1)
{
uint8_t r1 = (reg1 & 7);
@@ -567,12 +467,12 @@ static FFTS_INLINE void generate_epilogue(insns_t **fp)
MOVDQA3(fp, XMM15, X64_RSP, 144);
/* restore stack */
- x64_alu_reg_imm_size_body(*fp, X86_ADD, X64_RSP, 168, 8);
+ x64_alu_reg_imm_size(*fp, X86_ADD, X64_RSP, 168, 8);
/* restore the last 3 registers from the shadow space */
- MOV_D(fp, X64_RBX, X64_RSP, 8, 0);
- MOV_D(fp, X64_RSI, X64_RSP, 16, 0);
- MOV_D(fp, X64_RDI, X64_RSP, 24, 0);
+ x64_mov_reg_membase(*fp, X64_RBX, X64_RSP, 8, 8);
+ x64_mov_reg_membase(*fp, X64_RSI, X64_RSP, 16, 8);
+ x64_mov_reg_membase(*fp, X64_RDI, X64_RSP, 24, 8);
#else
x64_pop_reg(*fp, X64_R15);
x64_pop_reg(*fp, X64_R14);
@@ -598,12 +498,12 @@ static FFTS_INLINE insns_t* generate_prologue(insns_t **fp, ffts_plan_t *p)
/* save nonvolatile registers */
#ifdef _M_X64
/* use the shadow space to save first 3 registers */
- MOV_D(fp, X64_RBX, X64_RSP, 8, 1);
- MOV_D(fp, X64_RSI, X64_RSP, 16, 1);
- MOV_D(fp, X64_RDI, X64_RSP, 24, 1);
+ x64_mov_membase_reg(*fp, X64_RSP, 8, X64_RBX, 8);
+ x64_mov_membase_reg(*fp, X64_RSP, 16, X64_RSI, 8);
+ x64_mov_membase_reg(*fp, X64_RSP, 24, X64_RDI, 8);
/* reserve space.. */
- x64_alu_reg_imm_size_body(*fp, X86_SUB, X64_RSP, 168, 8);
+ x64_alu_reg_imm_size(*fp, X86_SUB, X64_RSP, 168, 8);
/* to save XMM6-XMM15 registers */
MOVDQA3(fp, X64_RSP, 0, XMM6);
@@ -637,7 +537,7 @@ static FFTS_INLINE void generate_transform_init(insns_t **fp)
MOVAPS2(fp, XMM3, X64_RSI);
/* set "pointer" to twiddle factors */
- MOV_D(fp, X64_RDI, X64_RCX, 0x20, 0);
+ x64_mov_reg_membase(*fp, X64_RDI, X64_RCX, 0x20, 8);
#else
size_t len;
@@ -689,10 +589,10 @@ static FFTS_INLINE insns_t* generate_size8_base_case(insns_t **fp, int sign)
#ifdef _M_X64
/* input */
- MOV_R(fp, X64_RDI, X64_RAX, 1);
+ x64_mov_reg_reg(*fp, X64_RAX, X64_RDI, 8);
/* output */
- MOV_R(fp, X64_R8, X64_RCX, 1);
+ x64_mov_reg_reg(*fp, X64_RCX, X64_R8, 8);
/* lea rdx, [r8 + rbx] */
/* loop stop (output + output_stride) */
@@ -888,7 +788,7 @@ static FFTS_INLINE insns_t* generate_size8_base_case(insns_t **fp, int sign)
*(*fp)++ = 0x50;
/* input + 6 * input_stride */
- x64_alu_reg_imm_size_body(*fp, X86_ADD, X64_RAX, 0x60, 8);
+ x64_alu_reg_imm_size(*fp, X86_ADD, X64_RAX, 0x60, 8);
MULPS(fp, XMM13, XMM7);
SUBPS(fp, XMM6, XMM15);
OpenPOWER on IntegriCloud