summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJukka Ojanen <jukka.ojanen@linkotec.net>2014-11-09 01:03:08 +0200
committerJukka Ojanen <jukka.ojanen@linkotec.net>2014-11-09 01:03:08 +0200
commit0a98074a2bbde2a3f190e9f32cfeebba594cbbf0 (patch)
treedc894b0188e1f89bac5a72dbb9f5ec8ebbdc0cad
parentec158717d8a46def60917145b54b656d7a541eb2 (diff)
downloadffts-0a98074a2bbde2a3f190e9f32cfeebba594cbbf0.zip
ffts-0a98074a2bbde2a3f190e9f32cfeebba594cbbf0.tar.gz
Replace MOV_I with x86_mov_reg_imm, SHIFT with x86_shift_reg_imm, CALL with x64_call_imm, POP with x64_pop_reg, PUSH with x64_push_reg
-rw-r--r--src/codegen.c36
-rw-r--r--src/codegen_sse.h123
2 files changed, 39 insertions, 120 deletions
diff --git a/src/codegen.c b/src/codegen.c
index 7814b04..d08be0d 100644
--- a/src/codegen.c
+++ b/src/codegen.c
@@ -156,9 +156,9 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N
/* assign loop counter register */
loop_count = 4 * p->i0;
#ifdef _M_X64
- MOV_I(&fp, X86_EBX, loop_count);
+ x86_mov_reg_imm(fp, X86_EBX, loop_count);
#else
- MOV_I(&fp, X86_ECX, loop_count);
+ x86_mov_reg_imm(fp, X86_ECX, loop_count);
#endif
#endif
@@ -245,10 +245,10 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N
/* align loop/jump destination */
#ifdef _M_X64
- MOV_I(&fp, X86_EBX, loop_count);
+ x86_mov_reg_imm(fp, X86_EBX, loop_count);
ffts_align_mem16(&fp, 3);
#else
- MOV_I(&fp, X86_ECX, loop_count);
+ x86_mov_reg_imm(fp, X86_ECX, loop_count);
ffts_align_mem16(&fp, 4);
#endif
@@ -298,10 +298,10 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N
/* align loop/jump destination */
#ifdef _M_X64
- MOV_I(&fp, X86_EBX, loop_count);
+ x86_mov_reg_imm(fp, X86_EBX, loop_count);
ffts_align_mem16(&fp, 3);
#else
- MOV_I(&fp, X86_ECX, loop_count);
+ x86_mov_reg_imm(fp, X86_ECX, loop_count);
ffts_align_mem16(&fp, 4);
#endif
@@ -325,10 +325,10 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N
/* align loop/jump destination */
#ifdef _M_X64
- MOV_I(&fp, X86_EBX, loop_count);
+ x86_mov_reg_imm(fp, X86_EBX, loop_count);
ffts_align_mem16(&fp, 8);
#else
- MOV_I(&fp, X86_ECX, loop_count);
+ x86_mov_reg_imm(fp, X86_ECX, loop_count);
ffts_align_mem16(&fp, 9);
#endif
@@ -352,9 +352,9 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N
if (!pN) {
#ifdef _M_X64
- MOV_I(&fp, X86_EBX, pps[0]);
+ x86_mov_reg_imm(fp, X86_EBX, pps[0]);
#else
- MOV_I(&fp, X86_ECX, pps[0] / 4);
+ x86_mov_reg_imm(fp, X86_ECX, pps[0] / 4);
#endif
} else {
int offset = (4 * pps[1]) - pAddr;
@@ -370,9 +370,17 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N
int factor = ffts_ctzl(pps[0]) - ffts_ctzl(pN);
#ifdef _M_X64
- SHIFT(&fp, X86_EBX, factor);
+ if (factor > 0) {
+ x86_shift_reg_imm(fp, X86_SHL, X86_EBX, factor);
+ } else {
+ x86_shift_reg_imm(fp, X86_SHR, X86_EBX, -factor);
+ }
#else
- SHIFT(&fp, X86_ECX, factor);
+ if (factor > 0) {
+ x86_shift_reg_imm(fp, X86_SHL, X86_ECX, factor);
+ } else {
+ x86_shift_reg_imm(fp, X86_SHR, X86_ECX, -factor);
+ }
#endif
}
}
@@ -389,9 +397,9 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N
}
if (pps[0] == 2 * leaf_N) {
- CALL(&fp, x_4_addr);
+ x64_call_imm(fp, (char*) x_4_addr - ((char*) fp + 4));
} else {
- CALL(&fp, x_8_addr);
+ x64_call_imm(fp, (char*) x_8_addr - ((char*) fp + 4));
}
pAddr = 4 * pps[1];
diff --git a/src/codegen_sse.h b/src/codegen_sse.h
index 3c3a6ef..c7351fc 100644
--- a/src/codegen_sse.h
+++ b/src/codegen_sse.h
@@ -119,26 +119,11 @@ static void ADDRMODE(uint8_t **p, uint8_t reg, uint8_t rm, int32_t disp)
}
}
-static void CALL(uint8_t **p, uint8_t *func)
-{
- *(*p)++ = 0xe8;
- IMM32(p, func - *p - 4);
-}
-
static void IMM8(uint8_t **p, int32_t imm)
{
*(*p)++ = (imm & 0xff);
}
-static void IMM16(uint8_t **p, int32_t imm)
-{
- int i;
-
- for (i = 0; i < 2; i++) {
- *(*p)++ = (imm & (0xff << (8 * i))) >> (8 * i);
- }
-}
-
static void IMM32(uint8_t **p, int32_t imm)
{
int i;
@@ -368,33 +353,6 @@ static FFTS_INLINE void MOV_D(uint8_t **p, uint8_t reg1, uint8_t reg2, int32_t d
}
}
-static void MOV_I(uint8_t **p, uint8_t dst, uint64_t imm)
-{
- /* REX prefix */
- if (dst >= 8 || imm > UINT32_MAX) {
- uint8_t val = 0x40;
-
- if (dst >= 8) {
- val |= 1;
- }
-
- if (imm > UINT32_MAX) {
- val |= 8;
- }
-
- *(*p)++ = val;
- }
-
- /* opcode */
- *(*p)++ = 0xb8 | (dst & 0x7);
-
- if (imm > UINT32_MAX) {
- IMM64(p, imm);
- } else {
- IMM32(p, imm);
- }
-}
-
static FFTS_INLINE void MOV_R(uint8_t **p, uint8_t reg1, uint8_t reg2, int is_store)
{
uint8_t r1 = (reg1 & 7);
@@ -437,53 +395,6 @@ static FFTS_INLINE void MULPS(uint8_t **p, uint8_t reg2, uint8_t reg1)
*(*p)++ = 0xC0 | r1 | (r2 << 3);
}
-static void POP(uint8_t **p, uint8_t reg)
-{
- if (reg >= 8) {
- *(*p)++ = 0x41;
- }
-
- *(*p)++ = 0x58 | (reg & 7);
-}
-
-static void PUSH(uint8_t **p, uint8_t reg)
-{
- if (reg >= 8) {
- *(*p)++ = 0x41;
- }
-
- *(*p)++ = 0x50 | (reg & 7);
-}
-
-static int32_t READ_IMM32(uint8_t *p)
-{
- int32_t rval = 0;
- int i;
-
- for (i = 0; i < 4; i++) {
- rval |= *(p+i) << (8 * i);
- }
-
- return rval;
-}
-
-static void SHIFT(uint8_t **p, uint8_t reg, int shift)
-{
- if (reg >= 8) {
- *(*p)++ = 0x49;
- }
-
-
- *(*p)++ = 0xc1;
- if (shift > 0) {
- *(*p)++ = 0xe0 | (reg & 7);
- *(*p)++ = (shift & 0xff);
- } else {
- *(*p)++ = 0xe8 | (reg & 7);
- *(*p)++ = ((-shift) & 0xff);
- }
-}
-
static FFTS_INLINE void SHUFPS(uint8_t **p, uint8_t reg2, uint8_t reg1, const int select)
{
uint8_t r1 = (reg1 & 7);
@@ -662,15 +573,15 @@ static FFTS_INLINE void generate_epilogue(insns_t **fp)
MOV_D(fp, X64_RBX, X64_RSP, 8, 0);
MOV_D(fp, X64_RSI, X64_RSP, 16, 0);
MOV_D(fp, X64_RDI, X64_RSP, 24, 0);
-#else
- POP(fp, X64_R15);
- POP(fp, X64_R14);
- POP(fp, X64_R13);
- POP(fp, X64_R12);
- POP(fp, X64_R11);
- POP(fp, X64_R10);
- POP(fp, X64_RBX);
- POP(fp, X64_RBP);
+#else
+ x64_pop_reg(*fp, X64_R15);
+ x64_pop_reg(*fp, X64_R14);
+ x64_pop_reg(*fp, X64_R13);
+ x64_pop_reg(*fp, X64_R12);
+ x64_pop_reg(*fp, X64_R11);
+ x64_pop_reg(*fp, X64_R10);
+ x64_pop_reg(*fp, X64_RBX);
+ x64_pop_reg(*fp, X64_RBP);
#endif
x64_ret(*fp);
@@ -706,14 +617,14 @@ static FFTS_INLINE insns_t* generate_prologue(insns_t **fp, ffts_plan_t *p)
MOVDQA3(fp, X64_RSP, 128, XMM14);
MOVDQA3(fp, X64_RSP, 144, XMM15);
#else
- PUSH(fp, X64_RBP);
- PUSH(fp, X64_RBX);
- PUSH(fp, X64_R10);
- PUSH(fp, X64_R11);
- PUSH(fp, X64_R12);
- PUSH(fp, X64_R13);
- PUSH(fp, X64_R14);
- PUSH(fp, X64_R15);
+ x64_push_reg(*fp, X64_RBP);
+ x64_push_reg(*fp, X64_RBX);
+ x64_push_reg(*fp, X64_R10);
+ x64_push_reg(*fp, X64_R11);
+ x64_push_reg(*fp, X64_R12);
+ x64_push_reg(*fp, X64_R13);
+ x64_push_reg(*fp, X64_R14);
+ x64_push_reg(*fp, X64_R15);
#endif
return start;
OpenPOWER on IntegriCloud