summaryrefslogtreecommitdiffstats
path: root/src/codegen_sse.h
diff options
context:
space:
mode:
authorJukka Ojanen <jukka.ojanen@linkotec.net>2014-11-09 11:22:16 +0200
committerJukka Ojanen <jukka.ojanen@linkotec.net>2014-11-09 11:22:16 +0200
commitad38065bfe4371c489ce9a8afe1c7c3ff42083c1 (patch)
tree785161939e0a8a4c723def59bd8d2250966e3288 /src/codegen_sse.h
parent43bb4d927ad562c6dd07f28e3f8c345a71837105 (diff)
downloadffts-ad38065bfe4371c489ce9a8afe1c7c3ff42083c1.zip
ffts-ad38065bfe4371c489ce9a8afe1c7c3ff42083c1.tar.gz
Replace XORPS with x64_sse_xorps_reg_reg
Diffstat (limited to 'src/codegen_sse.h')
-rw-r--r--src/codegen_sse.h25
1 files changed, 4 insertions, 21 deletions
diff --git a/src/codegen_sse.h b/src/codegen_sse.h
index f30933e..da1d1ed 100644
--- a/src/codegen_sse.h
+++ b/src/codegen_sse.h
@@ -350,24 +350,6 @@ static FFTS_INLINE void SUBPS(uint8_t **p, uint8_t reg2, uint8_t reg1)
*(*p)++ = 0xC0 | r1 | (r2 << 3);
}
-static FFTS_INLINE void XORPS(uint8_t **p, uint8_t reg2, uint8_t reg1)
-{
- uint8_t r1 = (reg1 & 7);
- uint8_t r2 = (reg2 & 7);
-
- /* REX prefix */
- if ((reg1 & 8) || (reg2 & 8)) {
- *(*p)++ = 0x40 | ((reg1 & 8) >> 3) | ((reg2 & 8) >> 1);
- }
-
- /* esacape opcode */
- *(*p)++ = 0x0F;
-
- /* opcode */
- *(*p)++ = 0x57;
- *(*p)++ = 0xC0 | r1 | (r2 << 3);
-}
-
static FFTS_INLINE void ffts_insert_nops(uint8_t **p, uint32_t count)
{
if (count >= 9) {
@@ -720,7 +702,7 @@ static FFTS_INLINE insns_t* generate_size8_base_case(insns_t **fp, int sign)
*(*fp)++ = 0x2C;
*(*fp)++ = 0x71;
- XORPS(fp, XMM11, XMM3);
+ x64_sse_xorps_reg_reg(*fp, X64_XMM11, X64_XMM3);
/* movaps xmm14, [rax + 0x30] */
/* input + 3 * input_stride */
@@ -797,7 +779,8 @@ static FFTS_INLINE insns_t* generate_size8_base_case(insns_t **fp, int sign)
SUBPS(fp, XMM0, XMM12);
ADDPS(fp, XMM5, XMM12);
SHUFPS(fp, XMM7, XMM7, 0xB1);
- XORPS(fp, XMM6, XMM3);
+ x64_sse_xorps_reg_reg(*fp, X64_XMM6, X64_XMM3);
+
SHUFPS(fp, XMM8, XMM8, 0xB1);
/* movaps xmm12, xmm2 */
@@ -850,7 +833,7 @@ static FFTS_INLINE insns_t* generate_size8_base_case(insns_t **fp, int sign)
SUBPS(fp, XMM13, XMM10);
ADDPS(fp, XMM11, XMM10);
- XORPS(fp, XMM13, XMM3);
+ x64_sse_xorps_reg_reg(*fp, X64_XMM13, X64_XMM3);
ADDPS(fp, XMM4, XMM11);
SUBPS(fp, XMM14, XMM11);
SHUFPS(fp, XMM13, XMM13, 0xB1);
OpenPOWER on IntegriCloud