summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJukka Ojanen <jukka.ojanen@linkotec.net>2014-11-08 23:50:02 +0200
committerJukka Ojanen <jukka.ojanen@linkotec.net>2014-11-08 23:50:02 +0200
commit401348f4fd39a2ceee2c058091381697301193d2 (patch)
treeb7a4ec0c4b5e535f4417230ad59ed12830145b53
parent784c3da6784335a0c2a4eeef908a51757c7d8916 (diff)
downloadffts-401348f4fd39a2ceee2c058091381697301193d2.zip
ffts-401348f4fd39a2ceee2c058091381697301193d2.tar.gz
Replace register names with new definitions
-rw-r--r--src/codegen.c38
-rw-r--r--src/codegen_sse.h137
2 files changed, 73 insertions, 102 deletions
diff --git a/src/codegen.c b/src/codegen.c
index 4e70cb1..72ab6ef 100644
--- a/src/codegen.c
+++ b/src/codegen.c
@@ -156,9 +156,9 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N
/* assign loop counter register */
loop_count = 4 * p->i0;
#ifdef _M_X64
- MOV_I(&fp, EBX, loop_count);
+ MOV_I(&fp, X86_EBX, loop_count);
#else
- MOV_I(&fp, ECX, loop_count);
+ MOV_I(&fp, X86_ECX, loop_count);
#endif
#endif
@@ -207,13 +207,13 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N
/* generate function */
/* clear */
- XOR2(&fp, EAX, EAX);
+ XOR2(&fp, X86_EAX, X86_EAX);
/* set "pointer" to offsets */
- MOV_D(&fp, RDI, RCX, 0, 0);
+ MOV_D(&fp, X64_RDI, X64_RCX, 0, 0);
/* set "pointer" to constants */
- MOV_D(&fp, RSI, RCX, 0xE0, 0);
+ MOV_D(&fp, X64_RSI, X64_RCX, 0xE0, 0);
/* align loop/jump destination */
ffts_align_mem16(&fp, 8);
@@ -245,10 +245,10 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N
/* align loop/jump destination */
#ifdef _M_X64
- MOV_I(&fp, EBX, loop_count);
+ MOV_I(&fp, X86_EBX, loop_count);
ffts_align_mem16(&fp, 3);
#else
- MOV_I(&fp, ECX, loop_count);
+ MOV_I(&fp, X86_ECX, loop_count);
ffts_align_mem16(&fp, 4);
#endif
@@ -298,10 +298,10 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N
/* align loop/jump destination */
#ifdef _M_X64
- MOV_I(&fp, EBX, loop_count);
+ MOV_I(&fp, X86_EBX, loop_count);
ffts_align_mem16(&fp, 3);
#else
- MOV_I(&fp, ECX, loop_count);
+ MOV_I(&fp, X86_ECX, loop_count);
ffts_align_mem16(&fp, 4);
#endif
@@ -325,10 +325,10 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N
/* align loop/jump destination */
#ifdef _M_X64
- MOV_I(&fp, EBX, loop_count);
+ MOV_I(&fp, X86_EBX, loop_count);
ffts_align_mem16(&fp, 8);
#else
- MOV_I(&fp, ECX, loop_count);
+ MOV_I(&fp, X86_ECX, loop_count);
ffts_align_mem16(&fp, 9);
#endif
@@ -352,17 +352,17 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N
if (!pN) {
#ifdef _M_X64
- MOV_I(&fp, EBX, pps[0]);
+ MOV_I(&fp, X86_EBX, pps[0]);
#else
- MOV_I(&fp, ECX, pps[0] / 4);
+ MOV_I(&fp, X86_ECX, pps[0] / 4);
#endif
} else {
int offset = (4 * pps[1]) - pAddr;
if (offset) {
#ifdef _M_X64
- ADD_I(&fp, R8, offset);
+ ADD_I(&fp, X64_R8, offset);
#else
- ADD_I(&fp, RDX, offset);
+ ADD_I(&fp, X64_RDX, offset);
#endif
}
@@ -370,9 +370,9 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N
int factor = ffts_ctzl(pps[0]) - ffts_ctzl(pN);
#ifdef _M_X64
- SHIFT(&fp, EBX, factor);
+ SHIFT(&fp, X86_EBX, factor);
#else
- SHIFT(&fp, ECX, factor);
+ SHIFT(&fp, X86_ECX, factor);
#endif
}
}
@@ -382,9 +382,9 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N
int offset = (int) (ws_is - pLUT);
#ifdef _M_X64
- ADD_I(&fp, RDI, offset);
+ ADD_I(&fp, X64_RDI, offset);
#else
- ADD_I(&fp, R8, offset);
+ ADD_I(&fp, X64_R8, offset);
#endif
}
diff --git a/src/codegen_sse.h b/src/codegen_sse.h
index fa67a32..6b01773 100644
--- a/src/codegen_sse.h
+++ b/src/codegen_sse.h
@@ -34,6 +34,8 @@
#ifndef FFTS_CODEGEN_SSE_H
#define FFTS_CODEGEN_SSE_H
+#include "arch/x64/x64-codegen.h"
+
#include <assert.h>
#include <string.h>
@@ -61,32 +63,6 @@ extern const uint32_t sse_leaf_oo_offsets[8];
extern const uint32_t sse_leaf_eo_offsets[8];
extern const uint32_t sse_leaf_oe_offsets[8];
-#define EAX 0
-#define ECX 1
-#define EDX 2
-#define EBX 3
-#define ESP 4
-#define EBP 5
-#define ESI 6
-#define EDI 7
-
-#define RAX 0
-#define RCX 1
-#define RDX 2
-#define RBX 3
-#define RSP 4
-#define RBP 5
-#define RSI 6
-#define RDI 7
-#define R8 8
-#define R9 9
-#define R10 10
-#define R11 11
-#define R12 12
-#define R13 13
-#define R14 14
-#define R15 15
-
#define XMM_REG 0x40
#define XMM0 (XMM_REG | 0x0)
@@ -122,7 +98,7 @@ static FFTS_INLINE void ADDPS(uint8_t **p, uint8_t reg2, uint8_t reg1)
*(*p)++ = 0x40 | ((reg1 & 8) >> 3) | ((reg2 & 8) >> 1);
}
- /* esacape opcode */
+ /* escape opcode */
*(*p)++ = 0x0F;
/* opcode */
@@ -515,11 +491,6 @@ static int32_t READ_IMM32(uint8_t *p)
return rval;
}
-static void RET(uint8_t **p)
-{
- *(*p)++ = 0xc3;
-}
-
static void SHIFT(uint8_t **p, uint8_t reg, int shift)
{
if (reg >= 8) {
@@ -720,36 +691,36 @@ static FFTS_INLINE void generate_epilogue(insns_t **fp)
{
#ifdef _M_X64
/* restore nonvolatile registers */
- MOVDQA3(fp, XMM6, RSP, 0);
- MOVDQA3(fp, XMM7, RSP, 16);
- MOVDQA3(fp, XMM8, RSP, 32);
- MOVDQA3(fp, XMM9, RSP, 48);
- MOVDQA3(fp, XMM10, RSP, 64);
- MOVDQA3(fp, XMM11, RSP, 80);
- MOVDQA3(fp, XMM12, RSP, 96);
- MOVDQA3(fp, XMM13, RSP, 112);
- MOVDQA3(fp, XMM14, RSP, 128);
- MOVDQA3(fp, XMM15, RSP, 144);
+ MOVDQA3(fp, XMM6, X64_RSP, 0);
+ MOVDQA3(fp, XMM7, X64_RSP, 16);
+ MOVDQA3(fp, XMM8, X64_RSP, 32);
+ MOVDQA3(fp, XMM9, X64_RSP, 48);
+ MOVDQA3(fp, XMM10, X64_RSP, 64);
+ MOVDQA3(fp, XMM11, X64_RSP, 80);
+ MOVDQA3(fp, XMM12, X64_RSP, 96);
+ MOVDQA3(fp, XMM13, X64_RSP, 112);
+ MOVDQA3(fp, XMM14, X64_RSP, 128);
+ MOVDQA3(fp, XMM15, X64_RSP, 144);
/* restore stack */
- ADD_I(fp, RSP, 168);
+ ADD_I(fp, X64_RSP, 168);
/* restore the last 3 registers from the shadow space */
- MOV_D(fp, RBX, RSP, 8, 0);
- MOV_D(fp, RSI, RSP, 16, 0);
- MOV_D(fp, RDI, RSP, 24, 0);
+ MOV_D(fp, X64_RBX, X64_RSP, 8, 0);
+ MOV_D(fp, X64_RSI, X64_RSP, 16, 0);
+ MOV_D(fp, X64_RDI, X64_RSP, 24, 0);
#else
- POP(fp, R15);
- POP(fp, R14);
- POP(fp, R13);
- POP(fp, R12);
- POP(fp, R11);
- POP(fp, R10);
- POP(fp, RBX);
- POP(fp, RBP);
+ POP(fp, X64_R15);
+ POP(fp, X64_R14);
+ POP(fp, X64_R13);
+ POP(fp, X64_R12);
+ POP(fp, X64_R11);
+ POP(fp, X64_R10);
+ POP(fp, X64_RBX);
+ POP(fp, X64_RBP);
#endif
- RET(fp);
+ x64_ret(*fp);
}
static FFTS_INLINE insns_t* generate_prologue(insns_t **fp, ffts_plan_t *p)
@@ -763,33 +734,33 @@ static FFTS_INLINE insns_t* generate_prologue(insns_t **fp, ffts_plan_t *p)
/* save nonvolatile registers */
#ifdef _M_X64
/* use the shadow space to save first 3 registers */
- MOV_D(fp, RBX, RSP, 8, 1);
- MOV_D(fp, RSI, RSP, 16, 1);
- MOV_D(fp, RDI, RSP, 24, 1);
+ MOV_D(fp, X64_RBX, X64_RSP, 8, 1);
+ MOV_D(fp, X64_RSI, X64_RSP, 16, 1);
+ MOV_D(fp, X64_RDI, X64_RSP, 24, 1);
/* reserve space.. */
- SUB_I(fp, RSP, 168);
+ SUB_I(fp, X64_RSP, 168);
/* to save XMM6-XMM15 registers */
- MOVDQA3(fp, RSP, 0, XMM6);
- MOVDQA3(fp, RSP, 16, XMM7);
- MOVDQA3(fp, RSP, 32, XMM8);
- MOVDQA3(fp, RSP, 48, XMM9);
- MOVDQA3(fp, RSP, 64, XMM10);
- MOVDQA3(fp, RSP, 80, XMM11);
- MOVDQA3(fp, RSP, 96, XMM12);
- MOVDQA3(fp, RSP, 112, XMM13);
- MOVDQA3(fp, RSP, 128, XMM14);
- MOVDQA3(fp, RSP, 144, XMM15);
+ MOVDQA3(fp, X64_RSP, 0, XMM6);
+ MOVDQA3(fp, X64_RSP, 16, XMM7);
+ MOVDQA3(fp, X64_RSP, 32, XMM8);
+ MOVDQA3(fp, X64_RSP, 48, XMM9);
+ MOVDQA3(fp, X64_RSP, 64, XMM10);
+ MOVDQA3(fp, X64_RSP, 80, XMM11);
+ MOVDQA3(fp, X64_RSP, 96, XMM12);
+ MOVDQA3(fp, X64_RSP, 112, XMM13);
+ MOVDQA3(fp, X64_RSP, 128, XMM14);
+ MOVDQA3(fp, X64_RSP, 144, XMM15);
#else
- PUSH(fp, RBP);
- PUSH(fp, RBX);
- PUSH(fp, R10);
- PUSH(fp, R11);
- PUSH(fp, R12);
- PUSH(fp, R13);
- PUSH(fp, R14);
- PUSH(fp, R15);
+ PUSH(fp, X64_RBP);
+ PUSH(fp, X64_RBX);
+ PUSH(fp, X64_R10);
+ PUSH(fp, X64_R11);
+ PUSH(fp, X64_R12);
+ PUSH(fp, X64_R13);
+ PUSH(fp, X64_R14);
+ PUSH(fp, X64_R15);
#endif
return start;
@@ -799,10 +770,10 @@ static FFTS_INLINE void generate_transform_init(insns_t **fp)
{
#ifdef _M_X64
/* generate function */
- MOVAPS2(fp, XMM3, RSI);
+ MOVAPS2(fp, XMM3, X64_RSI);
/* set "pointer" to twiddle factors */
- MOV_D(fp, RDI, RCX, 0x20, 0);
+ MOV_D(fp, X64_RDI, X64_RCX, 0x20, 0);
#else
size_t len;
@@ -854,10 +825,10 @@ static FFTS_INLINE insns_t* generate_size8_base_case(insns_t **fp, int sign)
#ifdef _M_X64
/* input */
- MOV_R(fp, RDI, RAX, 1);
+ MOV_R(fp, X64_RDI, X64_RAX, 1);
/* output */
- MOV_R(fp, R8, RCX, 1);
+ MOV_R(fp, X64_R8, X64_RCX, 1);
/* lea rdx, [r8 + rbx] */
/* loop stop (output + output_stride) */
@@ -1053,7 +1024,7 @@ static FFTS_INLINE insns_t* generate_size8_base_case(insns_t **fp, int sign)
*(*fp)++ = 0x50;
/* input + 6 * input_stride */
- ADD_I(fp, RAX, 0x60);
+ ADD_I(fp, X64_RAX, 0x60);
MULPS(fp, XMM13, XMM7);
SUBPS(fp, XMM6, XMM15);
@@ -1201,7 +1172,7 @@ static FFTS_INLINE insns_t* generate_size8_base_case(insns_t **fp, int sign)
*(*fp)++ = 0xFF;
/* ret */
- RET(fp);
+ x64_ret(*fp);
#else
/* copy function */
assert((char*) x8_soft_end >= (char*) x8_soft);
OpenPOWER on IntegriCloud