diff options
author | Jukka Ojanen <jukka.ojanen@linkotec.net> | 2014-11-03 10:46:09 +0200 |
---|---|---|
committer | Jukka Ojanen <jukka.ojanen@linkotec.net> | 2014-11-03 10:46:09 +0200 |
commit | 14c88113349263fafc88a671a71facca7e177dc9 (patch) | |
tree | aa467450d9944c61a9b87bc788d9935a92aca1a6 /src/codegen_sse.h | |
parent | b4ec2061aab28f7cc626f36a3d8324eebeaab88a (diff) | |
download | ffts-14c88113349263fafc88a671a71facca7e177dc9.zip ffts-14c88113349263fafc88a671a71facca7e177dc9.tar.gz |
MOVDQA "intrinsic", two operand MOVDQA2, three operand MOVDQA3 helpers
Diffstat (limited to 'src/codegen_sse.h')
-rw-r--r-- | src/codegen_sse.h | 126 |
1 files changed, 110 insertions, 16 deletions
diff --git a/src/codegen_sse.h b/src/codegen_sse.h index 269d142..6690b92 100644 --- a/src/codegen_sse.h +++ b/src/codegen_sse.h @@ -34,6 +34,8 @@ #ifndef FFTS_CODEGEN_SSE_H #define FFTS_CODEGEN_SSE_H +#include <assert.h> + void neon_x4(float *, size_t, float *); void neon_x8(float *, size_t, float *); void neon_x8_t(float *, size_t, float *); @@ -84,12 +86,31 @@ extern const uint32_t sse_leaf_oe_offsets[8]; #define R14 14 #define R15 15 -void IMM8(uint8_t **p, int32_t imm) +#define XMM_REG 0x40 + +#define XMM0 (XMM_REG | 0x0) +#define XMM1 (XMM_REG | 0x1) +#define XMM2 (XMM_REG | 0x2) +#define XMM3 (XMM_REG | 0x3) +#define XMM4 (XMM_REG | 0x4) +#define XMM5 (XMM_REG | 0x5) +#define XMM6 (XMM_REG | 0x6) +#define XMM7 (XMM_REG | 0x7) +#define XMM8 (XMM_REG | 0x8) +#define XMM9 (XMM_REG | 0x9) +#define XMM10 (XMM_REG | 0xa) +#define XMM11 (XMM_REG | 0xb) +#define XMM12 (XMM_REG | 0xc) +#define XMM13 (XMM_REG | 0xd) +#define XMM14 (XMM_REG | 0xe) +#define XMM15 (XMM_REG | 0xf) + +static void IMM8(uint8_t **p, int32_t imm) { *(*p)++ = (imm & 0xff); } -void IMM16(uint8_t **p, int32_t imm) +static void IMM16(uint8_t **p, int32_t imm) { int i; @@ -98,7 +119,7 @@ void IMM16(uint8_t **p, int32_t imm) } } -void IMM32(uint8_t **p, int32_t imm) +static void IMM32(uint8_t **p, int32_t imm) { int i; @@ -107,7 +128,7 @@ void IMM32(uint8_t **p, int32_t imm) } } -void IMM32_NI(uint8_t *p, int32_t imm) +static void IMM32_NI(uint8_t *p, int32_t imm) { int i; @@ -116,7 +137,7 @@ void IMM32_NI(uint8_t *p, int32_t imm) } } -int32_t READ_IMM32(uint8_t *p) +static int32_t READ_IMM32(uint8_t *p) { int32_t rval = 0; int i; @@ -128,7 +149,7 @@ int32_t READ_IMM32(uint8_t *p) return rval; } -void MOVI(uint8_t **p, uint8_t dst, uint32_t imm) +static void MOVI(uint8_t **p, uint8_t dst, uint32_t imm) { if (dst >= 8) { *(*p)++ = 0x41; @@ -138,7 +159,7 @@ void MOVI(uint8_t **p, uint8_t dst, uint32_t imm) IMM32(p, imm); } -void ADDRMODE(uint8_t **p, uint8_t reg, uint8_t rm, int32_t disp) +static void ADDRMODE(uint8_t **p, uint8_t reg, uint8_t rm, int32_t disp) { if (disp == 0) { *(*p)++ = (rm & 7) | ((reg & 7) << 3); @@ -151,19 +172,93 @@ void ADDRMODE(uint8_t **p, uint8_t reg, uint8_t rm, int32_t disp) } } -void LEA(uint8_t **p, uint8_t dst, uint8_t base, int32_t disp) +static FFTS_INLINE void MOVDQA(uint8_t **p, uint8_t reg1, uint8_t reg2, int32_t disp, int is_store) +{ + uint8_t r1 = (reg1 & 7); + uint8_t r2 = (reg2 & 7); + uint8_t r; + + *(*p)++ = 0x66; + + if ((reg1 & 8) || (reg2 & 8)) { + *(*p)++ = 0x40 | ((reg1 & 8) >> 3) | ((reg2 & 8) >> 1); + } + + *(*p)++ = 0x0F; + + if (is_store) { + *(*p)++ = 0x7F; + } else { + *(*p)++ = 0x6F; + } + + r = r1 | (r2 << 3); + + if ((reg1 & XMM_REG) && (reg2 & XMM_REG)) { + assert(disp == 0); + *(*p)++ = 0xC0 | r; + } else { + assert((reg1 & XMM_REG) || (reg2 & XMM_REG)); + + if (disp == 0 && r1 != 5) { + *(*p)++ = r; + + if (r1 == 4) { + *(*p)++ = 0x24; + } + } else { + if (disp <= 127 && disp >= -128) { + *(*p)++ = 0x40 | r; + + if (r1 == 4) { + *(*p)++ = 0x24; + } + + IMM8(p, disp); + } else { + *(*p)++ = 0x80 | r; + + if (r1 == 4) { + *(*p)++ = 0x24; + } + + IMM32(p, disp); + } + } + } +} + +static FFTS_INLINE void MOVDQA2(uint8_t **p, uint8_t reg1, uint8_t reg2) +{ + if (reg1 & XMM_REG) { + MOVDQA(p, reg2, reg1, 0, 0); + } else { + MOVDQA(p, reg1, reg2, 0, 1); + } +} + +static FFTS_INLINE void MOVDQA3(uint8_t **p, uint8_t reg1, int32_t op2, int32_t op3) +{ + if (reg1 & XMM_REG) { + MOVDQA(p, (uint8_t) op2, reg1, op3, 0); + } else { + MOVDQA(p, reg1, (uint8_t) op3, op2, 1); + } +} + +static void LEA(uint8_t **p, uint8_t dst, uint8_t base, int32_t disp) { *(*p)++ = 0x48 | ((base & 0x8) >> 3) | ((dst & 0x8) >> 1); *(*p)++ = 0x8d; ADDRMODE(p, dst, base, disp); } -void RET(uint8_t **p) +static void RET(uint8_t **p) { *(*p)++ = 0xc3; } -void ADDI(uint8_t **p, uint8_t dst, int32_t imm) +static void ADDI(uint8_t **p, uint8_t dst, int32_t imm) { if (dst >= 8) { *(*p)++ = 0x49; @@ -186,7 +281,7 @@ void ADDI(uint8_t **p, uint8_t dst, int32_t imm) } } -void SUBI(uint8_t **p, uint8_t dst, int32_t imm) +static void SUBI(uint8_t **p, uint8_t dst, int32_t imm) { if (dst >= 8) { *(*p)++ = 0x49; @@ -209,13 +304,13 @@ void SUBI(uint8_t **p, uint8_t dst, int32_t imm) } } -void CALL(uint8_t **p, uint8_t *func) +static void CALL(uint8_t **p, uint8_t *func) { *(*p)++ = 0xe8; IMM32(p, func - *p - 4); } -void PUSH(uint8_t **p, uint8_t reg) +static void PUSH(uint8_t **p, uint8_t reg) { if (reg >= 8) { *(*p)++ = 0x41; @@ -224,7 +319,7 @@ void PUSH(uint8_t **p, uint8_t reg) *(*p)++ = 0x50 | (reg & 7); } -void POP(uint8_t **p, uint8_t reg) +static void POP(uint8_t **p, uint8_t reg) { if (reg >= 8) { *(*p)++ = 0x41; @@ -233,7 +328,7 @@ void POP(uint8_t **p, uint8_t reg) *(*p)++ = 0x58 | (reg & 7); } -void SHIFT(uint8_t **p, uint8_t reg, int shift) +static void SHIFT(uint8_t **p, uint8_t reg, int shift) { if (reg >= 8) { *(*p)++ = 0x49; @@ -250,5 +345,4 @@ void SHIFT(uint8_t **p, uint8_t reg, int shift) } } - #endif /* FFTS_CODEGEN_SSE_H */
\ No newline at end of file |