diff options
author | Jukka Ojanen <jukka.ojanen@linkotec.net> | 2014-11-05 11:43:08 +0200 |
---|---|---|
committer | Jukka Ojanen <jukka.ojanen@linkotec.net> | 2014-11-05 11:43:08 +0200 |
commit | b4efe4fc9fa2485679c4f6e4a9963c99d791aa0b (patch) | |
tree | 41e5c312f195beab291dc4c89007e3e82f62b569 /src/codegen_sse.h | |
parent | 160d9c835c114fea9f03cff9b19979f1e4c1824b (diff) | |
download | ffts-b4efe4fc9fa2485679c4f6e4a9963c99d791aa0b.zip ffts-b4efe4fc9fa2485679c4f6e4a9963c99d791aa0b.tar.gz |
Reorder functions to alphabetical order
Diffstat (limited to 'src/codegen_sse.h')
-rw-r--r-- | src/codegen_sse.h | 328 |
1 files changed, 206 insertions, 122 deletions
diff --git a/src/codegen_sse.h b/src/codegen_sse.h index f1b1500..abb5008 100644 --- a/src/codegen_sse.h +++ b/src/codegen_sse.h @@ -107,6 +107,52 @@ extern const uint32_t sse_leaf_oe_offsets[8]; #define P(x) (*(*p)++ = x) +/* forward declarations */ +static void IMM8(uint8_t **p, int32_t imm); +static void IMM32(uint8_t **p, int32_t imm); + +static void ADDI(uint8_t **p, uint8_t dst, int32_t imm) +{ + if (dst >= 8) { + *(*p)++ = 0x49; + } else { + *(*p)++ = 0x48; + } + + if (imm > 127 || imm <= -128) { + *(*p)++ = 0x81; + } else { + *(*p)++ = 0x83; + } + + *(*p)++ = 0xc0 | (dst & 0x7); + + if (imm > 127 || imm <= -128) { + IMM32(p, imm); + } else { + IMM8(p, imm); + } +} + +static void ADDRMODE(uint8_t **p, uint8_t reg, uint8_t rm, int32_t disp) +{ + if (disp == 0) { + *(*p)++ = (rm & 7) | ((reg & 7) << 3); + } else if (disp <= 127 || disp >= -128) { + *(*p)++ = 0x40 | (rm & 7) | ((reg & 7) << 3); + IMM8(p, disp); + } else { + *(*p)++ = 0x80 | (rm & 7) | ((reg & 7) << 3); + IMM32(p, disp); + } +} + +static void CALL(uint8_t **p, uint8_t *func) +{ + *(*p)++ = 0xe8; + IMM32(p, func - *p - 4); +} + static void IMM8(uint8_t **p, int32_t imm) { *(*p)++ = (imm & 0xff); @@ -148,53 +194,52 @@ static void IMM32_NI(uint8_t *p, int32_t imm) } } -static int32_t READ_IMM32(uint8_t *p) +static void LEA(uint8_t **p, uint8_t dst, uint8_t base, int32_t disp) { - int32_t rval = 0; - int i; - - for (i = 0; i < 4; i++) { - rval |= *(p+i) << (8 * i); - } - - return rval; + *(*p)++ = 0x48 | ((base & 0x8) >> 3) | ((dst & 0x8) >> 1); + *(*p)++ = 0x8d; + ADDRMODE(p, dst, base, disp); } -static void MOVI(uint8_t **p, uint8_t dst, uint64_t imm) +static FFTS_INLINE void MOV(uint8_t **p, uint8_t reg1, uint8_t reg2, int32_t disp, int is_store) { - if (dst >= 8 || imm > UINT32_MAX) { - uint8_t val = 0x40; - - if (dst >= 8) { - val |= 1; - } - - if (imm > UINT32_MAX) { - val |= 8; - } + uint8_t r1 = (reg1 & 7); + uint8_t r2 = (reg2 & 7); - *(*p)++ = val; - } + if ((reg1 & 8) || (reg2 & 8)) { + *(*p)++ = 0x49; + } else { + *(*p)++ = 0x48; + } - *(*p)++ = 0xb8 | (dst & 0x7); - - if (imm > UINT32_MAX) { - IMM64(p, imm); + if (is_store) { + *(*p)++ = 0x89; } else { - IMM32(p, imm); + *(*p)++ = 0x8B; } -} -static void ADDRMODE(uint8_t **p, uint8_t reg, uint8_t rm, int32_t disp) -{ - if (disp == 0) { - *(*p)++ = (rm & 7) | ((reg & 7) << 3); - } else if (disp <= 127 || disp >= -128) { - *(*p)++ = 0x40 | (rm & 7) | ((reg & 7) << 3); - IMM8(p, disp); + if (disp == 0) { + *(*p)++ = r2 | (r1 << 3); + + if (r2 == 4) { + *(*p)++ = 0x24; + } + } else if (disp <= 127 && disp >= -128) { + *(*p)++ = 0x40 | r2 | (r1 << 3); + + if (r2 == 4) { + *(*p)++ = 0x24; + } + + IMM8(p, disp); } else { - *(*p)++ = 0x80 | (rm & 7) | ((reg & 7) << 3); - IMM32(p, disp); + *(*p)++ = 0x80 | r2 | (r1 << 3) | (r1 << 11); + + if (r2 == 4) { + *(*p)++ = 0x24; + } + + IMM32(p, disp); } } @@ -204,12 +249,15 @@ static FFTS_INLINE void MOVAPS(uint8_t **p, uint8_t reg1, uint8_t reg2, int32_t uint8_t r2 = (reg2 & 7); uint8_t r; + /* REX prefix */ if ((reg1 & 8) || (reg2 & 8)) { *(*p)++ = 0x40 | ((reg1 & 8) >> 3) | ((reg2 & 8) >> 1); } + /* esacape opcode */ *(*p)++ = 0x0F; + /* opcode */ if (is_store) { *(*p)++ = 0x29; } else { @@ -276,14 +324,18 @@ static FFTS_INLINE void MOVDQA(uint8_t **p, uint8_t reg1, uint8_t reg2, int32_t uint8_t r2 = (reg2 & 7); uint8_t r; + /* mandatory prefix */ *(*p)++ = 0x66; + /* REX prefix */ if ((reg1 & 8) || (reg2 & 8)) { *(*p)++ = 0x40 | ((reg1 & 8) >> 3) | ((reg2 & 8) >> 1); } + /* esacape opcode */ *(*p)++ = 0x0F; + /* opcode */ if (is_store) { *(*p)++ = 0x7F; } else { @@ -344,86 +396,137 @@ static FFTS_INLINE void MOVDQA3(uint8_t **p, uint8_t reg1, int32_t op2, int32_t } } -static void LEA(uint8_t **p, uint8_t dst, uint8_t base, int32_t disp) +static void MOVI(uint8_t **p, uint8_t dst, uint64_t imm) { - *(*p)++ = 0x48 | ((base & 0x8) >> 3) | ((dst & 0x8) >> 1); - *(*p)++ = 0x8d; - ADDRMODE(p, dst, base, disp); -} + /* REX prefix */ + if (dst >= 8 || imm > UINT32_MAX) { + uint8_t val = 0x40; + + if (dst >= 8) { + val |= 1; + } -static void RET(uint8_t **p) -{ - *(*p)++ = 0xc3; + if (imm > UINT32_MAX) { + val |= 8; + } + + *(*p)++ = val; + } + + /* opcode */ + *(*p)++ = 0xb8 | (dst & 0x7); + + if (imm > UINT32_MAX) { + IMM64(p, imm); + } else { + IMM32(p, imm); + } } -static void ADDI(uint8_t **p, uint8_t dst, int32_t imm) +static FFTS_INLINE void MULPS(uint8_t **p, uint8_t reg1, uint8_t reg2, int32_t disp, int is_store) { - if (dst >= 8) { - *(*p)++ = 0x49; - } else { - *(*p)++ = 0x48; - } + uint8_t r1 = (reg1 & 7); + uint8_t r2 = (reg2 & 7); + uint8_t r; - if (imm > 127 || imm <= -128) { - *(*p)++ = 0x81; - } else { - *(*p)++ = 0x83; - } + /* REX prefix */ + if ((reg1 & 8) || (reg2 & 8)) { + *(*p)++ = 0x40 | ((reg1 & 8) >> 3) | ((reg2 & 8) >> 1); + } - *(*p)++ = 0xc0 | (dst & 0x7); + /* esacape opcode */ + *(*p)++ = 0x0F; + + /* opcode */ + *(*p)++ = 0x59; + + r = r1 | (r2 << 3); - if (imm > 127 || imm <= -128) { - IMM32(p, imm); - } else { - IMM8(p, imm); - } -} + if ((reg1 & XMM_REG) && (reg2 & XMM_REG)) { + assert(disp == 0); + *(*p)++ = 0xC0 | r; + } else { + assert((reg1 & XMM_REG) || (reg2 & XMM_REG)); -static void SUBI(uint8_t **p, uint8_t dst, int32_t imm) -{ - if (dst >= 8) { - *(*p)++ = 0x49; - } else { - *(*p)++ = 0x48; - } + if (disp == 0 && r1 != 5) { + *(*p)++ = r; - if (imm > 127 || imm <= -128) { - *(*p)++ = 0x81; - } else { - *(*p)++ = 0x83; - } + if (r1 == 4) { + *(*p)++ = 0x24; + } + } else { + if (disp <= 127 && disp >= -128) { + *(*p)++ = 0x40 | r; - *(*p)++ = 0xe8 | (dst & 0x7); + if (r1 == 4) { + *(*p)++ = 0x24; + } - if (imm > 127 || imm <= -128) { - IMM32(p, imm); - } else { - IMM8(p, imm); - } + IMM8(p, disp); + } else { + *(*p)++ = 0x80 | r; + + if (r1 == 4) { + *(*p)++ = 0x24; + } + + IMM32(p, disp); + } + } + } } -static void CALL(uint8_t **p, uint8_t *func) +static FFTS_INLINE void MULPS2(uint8_t **p, uint8_t reg1, uint8_t reg2) { - *(*p)++ = 0xe8; - IMM32(p, func - *p - 4); + if (reg1 & XMM_REG) { + MULPS(p, reg2, reg1, 0, 0); + } else { + MULPS(p, reg1, reg2, 0, 1); + } } -static void PUSH(uint8_t **p, uint8_t reg) +static FFTS_INLINE void MULPS3(uint8_t **p, uint8_t reg1, int32_t op2, int32_t op3) +{ + if (reg1 & XMM_REG) { + MULPS(p, (uint8_t) op2, reg1, op3, 0); + } else { + MULPS(p, reg1, (uint8_t) op3, op2, 1); + } +} + +static void POP(uint8_t **p, uint8_t reg) { if (reg >= 8) { *(*p)++ = 0x41; } - *(*p)++ = 0x50 | (reg & 7); + *(*p)++ = 0x58 | (reg & 7); } -static void POP(uint8_t **p, uint8_t reg) +static void PUSH(uint8_t **p, uint8_t reg) { if (reg >= 8) { *(*p)++ = 0x41; } - *(*p)++ = 0x58 | (reg & 7); + *(*p)++ = 0x50 | (reg & 7); +} + +static int32_t READ_IMM32(uint8_t *p) +{ + int32_t rval = 0; + int i; + + for (i = 0; i < 4; i++) { + rval |= *(p+i) << (8 * i); + } + + return rval; +} + +static void RET(uint8_t **p) +{ + *(*p)++ = 0xc3; } static void SHIFT(uint8_t **p, uint8_t reg, int shift) @@ -443,45 +546,26 @@ static void SHIFT(uint8_t **p, uint8_t reg, int shift) } } -static FFTS_INLINE void MOV(uint8_t **p, uint8_t reg1, uint8_t reg2, int32_t disp, int is_store) +static void SUBI(uint8_t **p, uint8_t dst, int32_t imm) { - uint8_t r1 = (reg1 & 7); - uint8_t r2 = (reg2 & 7); - - if ((reg1 & 8) || (reg2 & 8)) { - *(*p)++ = 0x49; - } else { - *(*p)++ = 0x48; - } - - if (is_store) { - *(*p)++ = 0x89; - } else { - *(*p)++ = 0x8B; - } - - if (disp == 0) { - *(*p)++ = r2 | (r1 << 3); - - if (r2 == 4) { - *(*p)++ = 0x24; - } - } else if (disp <= 127 && disp >= -128) { - *(*p)++ = 0x40 | r2 | (r1 << 3); - - if (r2 == 4) { - *(*p)++ = 0x24; - } + if (dst >= 8) { + *(*p)++ = 0x49; + } else { + *(*p)++ = 0x48; + } - IMM8(p, disp); + if (imm > 127 || imm <= -128) { + *(*p)++ = 0x81; } else { - *(*p)++ = 0x80 | r2 | (r1 << 3) | (r1 << 11); + *(*p)++ = 0x83; + } - if (r2 == 4) { - *(*p)++ = 0x24; - } + *(*p)++ = 0xe8 | (dst & 0x7); - IMM32(p, disp); + if (imm > 127 || imm <= -128) { + IMM32(p, imm); + } else { + IMM8(p, imm); } } |