summaryrefslogtreecommitdiffstats
path: root/src/codegen_sse.h
diff options
context:
space:
mode:
authorJukka Ojanen <jukka.ojanen@linkotec.net>2014-11-03 10:46:09 +0200
committerJukka Ojanen <jukka.ojanen@linkotec.net>2014-11-03 10:46:09 +0200
commit14c88113349263fafc88a671a71facca7e177dc9 (patch)
treeaa467450d9944c61a9b87bc788d9935a92aca1a6 /src/codegen_sse.h
parentb4ec2061aab28f7cc626f36a3d8324eebeaab88a (diff)
downloadffts-14c88113349263fafc88a671a71facca7e177dc9.zip
ffts-14c88113349263fafc88a671a71facca7e177dc9.tar.gz
MOVDQA "intrinsic", two operand MOVDQA2, three operand MOVDQA3 helpers
Diffstat (limited to 'src/codegen_sse.h')
-rw-r--r--src/codegen_sse.h126
1 files changed, 110 insertions, 16 deletions
diff --git a/src/codegen_sse.h b/src/codegen_sse.h
index 269d142..6690b92 100644
--- a/src/codegen_sse.h
+++ b/src/codegen_sse.h
@@ -34,6 +34,8 @@
#ifndef FFTS_CODEGEN_SSE_H
#define FFTS_CODEGEN_SSE_H
+#include <assert.h>
+
void neon_x4(float *, size_t, float *);
void neon_x8(float *, size_t, float *);
void neon_x8_t(float *, size_t, float *);
@@ -84,12 +86,31 @@ extern const uint32_t sse_leaf_oe_offsets[8];
#define R14 14
#define R15 15
-void IMM8(uint8_t **p, int32_t imm)
+#define XMM_REG 0x40
+
+#define XMM0 (XMM_REG | 0x0)
+#define XMM1 (XMM_REG | 0x1)
+#define XMM2 (XMM_REG | 0x2)
+#define XMM3 (XMM_REG | 0x3)
+#define XMM4 (XMM_REG | 0x4)
+#define XMM5 (XMM_REG | 0x5)
+#define XMM6 (XMM_REG | 0x6)
+#define XMM7 (XMM_REG | 0x7)
+#define XMM8 (XMM_REG | 0x8)
+#define XMM9 (XMM_REG | 0x9)
+#define XMM10 (XMM_REG | 0xa)
+#define XMM11 (XMM_REG | 0xb)
+#define XMM12 (XMM_REG | 0xc)
+#define XMM13 (XMM_REG | 0xd)
+#define XMM14 (XMM_REG | 0xe)
+#define XMM15 (XMM_REG | 0xf)
+
+static void IMM8(uint8_t **p, int32_t imm)
{
*(*p)++ = (imm & 0xff);
}
-void IMM16(uint8_t **p, int32_t imm)
+static void IMM16(uint8_t **p, int32_t imm)
{
int i;
@@ -98,7 +119,7 @@ void IMM16(uint8_t **p, int32_t imm)
}
}
-void IMM32(uint8_t **p, int32_t imm)
+static void IMM32(uint8_t **p, int32_t imm)
{
int i;
@@ -107,7 +128,7 @@ void IMM32(uint8_t **p, int32_t imm)
}
}
-void IMM32_NI(uint8_t *p, int32_t imm)
+static void IMM32_NI(uint8_t *p, int32_t imm)
{
int i;
@@ -116,7 +137,7 @@ void IMM32_NI(uint8_t *p, int32_t imm)
}
}
-int32_t READ_IMM32(uint8_t *p)
+static int32_t READ_IMM32(uint8_t *p)
{
int32_t rval = 0;
int i;
@@ -128,7 +149,7 @@ int32_t READ_IMM32(uint8_t *p)
return rval;
}
-void MOVI(uint8_t **p, uint8_t dst, uint32_t imm)
+static void MOVI(uint8_t **p, uint8_t dst, uint32_t imm)
{
if (dst >= 8) {
*(*p)++ = 0x41;
@@ -138,7 +159,7 @@ void MOVI(uint8_t **p, uint8_t dst, uint32_t imm)
IMM32(p, imm);
}
-void ADDRMODE(uint8_t **p, uint8_t reg, uint8_t rm, int32_t disp)
+static void ADDRMODE(uint8_t **p, uint8_t reg, uint8_t rm, int32_t disp)
{
if (disp == 0) {
*(*p)++ = (rm & 7) | ((reg & 7) << 3);
@@ -151,19 +172,93 @@ void ADDRMODE(uint8_t **p, uint8_t reg, uint8_t rm, int32_t disp)
}
}
-void LEA(uint8_t **p, uint8_t dst, uint8_t base, int32_t disp)
+static FFTS_INLINE void MOVDQA(uint8_t **p, uint8_t reg1, uint8_t reg2, int32_t disp, int is_store)
+{
+ uint8_t r1 = (reg1 & 7);
+ uint8_t r2 = (reg2 & 7);
+ uint8_t r;
+
+ *(*p)++ = 0x66;
+
+ if ((reg1 & 8) || (reg2 & 8)) {
+ *(*p)++ = 0x40 | ((reg1 & 8) >> 3) | ((reg2 & 8) >> 1);
+ }
+
+ *(*p)++ = 0x0F;
+
+ if (is_store) {
+ *(*p)++ = 0x7F;
+ } else {
+ *(*p)++ = 0x6F;
+ }
+
+ r = r1 | (r2 << 3);
+
+ if ((reg1 & XMM_REG) && (reg2 & XMM_REG)) {
+ assert(disp == 0);
+ *(*p)++ = 0xC0 | r;
+ } else {
+ assert((reg1 & XMM_REG) || (reg2 & XMM_REG));
+
+ if (disp == 0 && r1 != 5) {
+ *(*p)++ = r;
+
+ if (r1 == 4) {
+ *(*p)++ = 0x24;
+ }
+ } else {
+ if (disp <= 127 && disp >= -128) {
+ *(*p)++ = 0x40 | r;
+
+ if (r1 == 4) {
+ *(*p)++ = 0x24;
+ }
+
+ IMM8(p, disp);
+ } else {
+ *(*p)++ = 0x80 | r;
+
+ if (r1 == 4) {
+ *(*p)++ = 0x24;
+ }
+
+ IMM32(p, disp);
+ }
+ }
+ }
+}
+
+static FFTS_INLINE void MOVDQA2(uint8_t **p, uint8_t reg1, uint8_t reg2)
+{
+ if (reg1 & XMM_REG) {
+ MOVDQA(p, reg2, reg1, 0, 0);
+ } else {
+ MOVDQA(p, reg1, reg2, 0, 1);
+ }
+}
+
+static FFTS_INLINE void MOVDQA3(uint8_t **p, uint8_t reg1, int32_t op2, int32_t op3)
+{
+ if (reg1 & XMM_REG) {
+ MOVDQA(p, (uint8_t) op2, reg1, op3, 0);
+ } else {
+ MOVDQA(p, reg1, (uint8_t) op3, op2, 1);
+ }
+}
+
+static void LEA(uint8_t **p, uint8_t dst, uint8_t base, int32_t disp)
{
*(*p)++ = 0x48 | ((base & 0x8) >> 3) | ((dst & 0x8) >> 1);
*(*p)++ = 0x8d;
ADDRMODE(p, dst, base, disp);
}
-void RET(uint8_t **p)
+static void RET(uint8_t **p)
{
*(*p)++ = 0xc3;
}
-void ADDI(uint8_t **p, uint8_t dst, int32_t imm)
+static void ADDI(uint8_t **p, uint8_t dst, int32_t imm)
{
if (dst >= 8) {
*(*p)++ = 0x49;
@@ -186,7 +281,7 @@ void ADDI(uint8_t **p, uint8_t dst, int32_t imm)
}
}
-void SUBI(uint8_t **p, uint8_t dst, int32_t imm)
+static void SUBI(uint8_t **p, uint8_t dst, int32_t imm)
{
if (dst >= 8) {
*(*p)++ = 0x49;
@@ -209,13 +304,13 @@ void SUBI(uint8_t **p, uint8_t dst, int32_t imm)
}
}
-void CALL(uint8_t **p, uint8_t *func)
+static void CALL(uint8_t **p, uint8_t *func)
{
*(*p)++ = 0xe8;
IMM32(p, func - *p - 4);
}
-void PUSH(uint8_t **p, uint8_t reg)
+static void PUSH(uint8_t **p, uint8_t reg)
{
if (reg >= 8) {
*(*p)++ = 0x41;
@@ -224,7 +319,7 @@ void PUSH(uint8_t **p, uint8_t reg)
*(*p)++ = 0x50 | (reg & 7);
}
-void POP(uint8_t **p, uint8_t reg)
+static void POP(uint8_t **p, uint8_t reg)
{
if (reg >= 8) {
*(*p)++ = 0x41;
@@ -233,7 +328,7 @@ void POP(uint8_t **p, uint8_t reg)
*(*p)++ = 0x58 | (reg & 7);
}
-void SHIFT(uint8_t **p, uint8_t reg, int shift)
+static void SHIFT(uint8_t **p, uint8_t reg, int shift)
{
if (reg >= 8) {
*(*p)++ = 0x49;
@@ -250,5 +345,4 @@ void SHIFT(uint8_t **p, uint8_t reg, int shift)
}
}
-
#endif /* FFTS_CODEGEN_SSE_H */ \ No newline at end of file
OpenPOWER on IntegriCloud