diff options
author | Anthony Blake <anthonix@me.com> | 2012-08-29 18:08:30 +1200 |
---|---|---|
committer | Anthony Blake <anthonix@me.com> | 2012-08-29 18:08:30 +1200 |
commit | 5e4a32240e9ed9cb03ef51d2344ec80c615489cb (patch) | |
tree | ae0fea8ad5906bad0a2393868eeb10b0019815f9 /src/codegen_sse.h | |
parent | 625f46968820cb98391d67782a9deac4504e289a (diff) | |
download | ffts-5e4a32240e9ed9cb03ef51d2344ec80c615489cb.zip ffts-5e4a32240e9ed9cb03ef51d2344ec80c615489cb.tar.gz |
SSE LEAF EE works
Diffstat (limited to 'src/codegen_sse.h')
-rw-r--r-- | src/codegen_sse.h | 104 |
1 files changed, 104 insertions, 0 deletions
diff --git a/src/codegen_sse.h b/src/codegen_sse.h new file mode 100644 index 0000000..d3b136a --- /dev/null +++ b/src/codegen_sse.h @@ -0,0 +1,104 @@ +#ifndef __CODEGEN_SSE_H__ +#define __CODEGEN_SSE_H__ + +static const __attribute__ ((aligned(16))) float ee_w_data[4] = {0.70710678118654757273731092936941,0.70710678118654746171500846685376, + -0.70710678118654757273731092936941,-0.70710678118654746171500846685376}; +static const __attribute__ ((aligned(16))) data_t oe_w_data[4] = {1.0f,0.70710678118654757273731092936941f, 0.0f,-0.70710678118654746171500846685376}; +static const __attribute__ ((aligned(16))) data_t eo_w_data[4] = {1.0f,0.70710678118654757273731092936941f, 0.0f,-0.70710678118654746171500846685376}; +void neon_x4(float *, size_t, float *); +void neon_x8(float *, size_t, float *); +void neon_x8_t(float *, size_t, float *); +void leaf_ee(); +void neon_oo(); +void neon_eo(); +void neon_oe(); +void neon_end(); + + +extern const uint32_t sse_leaf_ee_offsets[8]; + +#define EAX 0 +#define ECX 1 +#define EDX 2 +#define EBX 3 +#define ESI 6 +#define EDI 7 +#define EBP 5 + +#define RAX 0 +#define RCX 1 +#define RDX 2 +#define RBX 3 +#define RSI 6 +#define RDI 7 +#define RBP 5 +#define R8 8 +#define R9 9 +#define R10 10 +#define R11 11 +#define R12 12 +#define R13 13 +#define R14 14 +#define R15 15 + +void IMM8(uint8_t **p, uint32_t imm) { + *(*p)++ = (imm & 0xff); +} + +void IMM32(uint8_t **p, uint32_t imm) { + int i; + for(i=0;i<4;i++) { + *(*p)++ = (imm & (0xff << (i*8))) >> (i*8); + } +} +void IMM32_NI(uint8_t *p, uint32_t imm) { + int i; + for(i=0;i<4;i++) { + *(p+i) = (imm & (0xff << (i*8))) >> (i*8); + } +} + +uint32_t READ_IMM32(uint8_t *p) { + uint32_t rval = 0; + int i; + for(i=0;i<4;i++) { + rval |= *(p+i) << (i*8); + } + return rval; +} + +void MOVI(uint8_t **p, uint8_t dst, uint32_t imm) { + if(dst < 8) { + *(*p)++ = 0xb8 + dst; + }else{ + *(*p)++ = 0x49; + *(*p)++ = 0xc7; + *(*p)++ = 0xc0 | (dst - 8); + } + IMM32(p, imm); +} + +void ADDRMODE(uint8_t **p, uint8_t reg, uint8_t rm, int32_t disp) { + if(disp == 0) { + *(*p)++ = (rm & 7) | ((reg & 7) << 3); + }else if(disp <= 127 || disp >= -128) { + *(*p)++ = 0x40 | (rm & 7) | ((reg & 7) << 3); + IMM8(p, disp); + }else{ + *(*p)++ = 0x80 | (rm & 7) | ((reg & 7) << 3); + IMM32(p, disp); + } +} + +void LEA(uint8_t **p, uint8_t dst, uint8_t base, int32_t disp) { + + *(*p)++ = 0x48 | ((base & 0x8) >> 3) | ((dst & 0x8) >> 1); + *(*p)++ = 0x8d; + ADDRMODE(p, dst, base, disp); +} + +void RET(uint8_t **p) { + *(*p)++ = 0xc3; +} + +#endif |