summaryrefslogtreecommitdiffstats
path: root/src/codegen_sse.h
diff options
context:
space:
mode:
authorAnthony Blake <anthonix@me.com>2012-08-29 18:08:30 +1200
committerAnthony Blake <anthonix@me.com>2012-08-29 18:08:30 +1200
commit5e4a32240e9ed9cb03ef51d2344ec80c615489cb (patch)
treeae0fea8ad5906bad0a2393868eeb10b0019815f9 /src/codegen_sse.h
parent625f46968820cb98391d67782a9deac4504e289a (diff)
downloadffts-5e4a32240e9ed9cb03ef51d2344ec80c615489cb.zip
ffts-5e4a32240e9ed9cb03ef51d2344ec80c615489cb.tar.gz
SSE LEAF EE works
Diffstat (limited to 'src/codegen_sse.h')
-rw-r--r--src/codegen_sse.h104
1 files changed, 104 insertions, 0 deletions
diff --git a/src/codegen_sse.h b/src/codegen_sse.h
new file mode 100644
index 0000000..d3b136a
--- /dev/null
+++ b/src/codegen_sse.h
@@ -0,0 +1,104 @@
+#ifndef __CODEGEN_SSE_H__
+#define __CODEGEN_SSE_H__
+
+static const __attribute__ ((aligned(16))) float ee_w_data[4] = {0.70710678118654757273731092936941,0.70710678118654746171500846685376,
+ -0.70710678118654757273731092936941,-0.70710678118654746171500846685376};
+static const __attribute__ ((aligned(16))) data_t oe_w_data[4] = {1.0f,0.70710678118654757273731092936941f, 0.0f,-0.70710678118654746171500846685376};
+static const __attribute__ ((aligned(16))) data_t eo_w_data[4] = {1.0f,0.70710678118654757273731092936941f, 0.0f,-0.70710678118654746171500846685376};
+void neon_x4(float *, size_t, float *);
+void neon_x8(float *, size_t, float *);
+void neon_x8_t(float *, size_t, float *);
+void leaf_ee();
+void neon_oo();
+void neon_eo();
+void neon_oe();
+void neon_end();
+
+
+extern const uint32_t sse_leaf_ee_offsets[8];
+
+#define EAX 0
+#define ECX 1
+#define EDX 2
+#define EBX 3
+#define ESI 6
+#define EDI 7
+#define EBP 5
+
+#define RAX 0
+#define RCX 1
+#define RDX 2
+#define RBX 3
+#define RSI 6
+#define RDI 7
+#define RBP 5
+#define R8 8
+#define R9 9
+#define R10 10
+#define R11 11
+#define R12 12
+#define R13 13
+#define R14 14
+#define R15 15
+
+void IMM8(uint8_t **p, uint32_t imm) {
+ *(*p)++ = (imm & 0xff);
+}
+
+void IMM32(uint8_t **p, uint32_t imm) {
+ int i;
+ for(i=0;i<4;i++) {
+ *(*p)++ = (imm & (0xff << (i*8))) >> (i*8);
+ }
+}
+void IMM32_NI(uint8_t *p, uint32_t imm) {
+ int i;
+ for(i=0;i<4;i++) {
+ *(p+i) = (imm & (0xff << (i*8))) >> (i*8);
+ }
+}
+
+uint32_t READ_IMM32(uint8_t *p) {
+ uint32_t rval = 0;
+ int i;
+ for(i=0;i<4;i++) {
+ rval |= *(p+i) << (i*8);
+ }
+ return rval;
+}
+
+void MOVI(uint8_t **p, uint8_t dst, uint32_t imm) {
+ if(dst < 8) {
+ *(*p)++ = 0xb8 + dst;
+ }else{
+ *(*p)++ = 0x49;
+ *(*p)++ = 0xc7;
+ *(*p)++ = 0xc0 | (dst - 8);
+ }
+ IMM32(p, imm);
+}
+
+void ADDRMODE(uint8_t **p, uint8_t reg, uint8_t rm, int32_t disp) {
+ if(disp == 0) {
+ *(*p)++ = (rm & 7) | ((reg & 7) << 3);
+ }else if(disp <= 127 || disp >= -128) {
+ *(*p)++ = 0x40 | (rm & 7) | ((reg & 7) << 3);
+ IMM8(p, disp);
+ }else{
+ *(*p)++ = 0x80 | (rm & 7) | ((reg & 7) << 3);
+ IMM32(p, disp);
+ }
+}
+
+void LEA(uint8_t **p, uint8_t dst, uint8_t base, int32_t disp) {
+
+ *(*p)++ = 0x48 | ((base & 0x8) >> 3) | ((dst & 0x8) >> 1);
+ *(*p)++ = 0x8d;
+ ADDRMODE(p, dst, base, disp);
+}
+
+void RET(uint8_t **p) {
+ *(*p)++ = 0xc3;
+}
+
+#endif
OpenPOWER on IntegriCloud