summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJukka Ojanen <jukka.ojanen@linkotec.net>2014-11-04 15:04:06 +0200
committerJukka Ojanen <jukka.ojanen@linkotec.net>2014-11-04 15:04:06 +0200
commit311c05f05c4e9bee5f4731c6a3cd6b8122fc14b4 (patch)
treedfb7c54912c26ec3b30414ae9a7eae0615909991
parent0f7c426a663af998e92f586820fd2ca561aafb68 (diff)
downloadffts-311c05f05c4e9bee5f4731c6a3cd6b8122fc14b4.zip
ffts-311c05f05c4e9bee5f4731c6a3cd6b8122fc14b4.tar.gz
Replace _M_AMD64 with _M_X64 as it is equal and "neutral"
-rw-r--r--src/codegen.c19
-rw-r--r--src/codegen_sse.h6
-rw-r--r--src/ffts.h2
3 files changed, 14 insertions, 13 deletions
diff --git a/src/codegen.c b/src/codegen.c
index 36fdf8d..880f598 100644
--- a/src/codegen.c
+++ b/src/codegen.c
@@ -155,7 +155,7 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N
/* assign loop counter register */
loop_count = 4 * p->i0;
-#ifdef _M_AMD64
+#ifdef _M_X64
MOVI(&fp, EBX, loop_count);
#else
MOVI(&fp, ECX, loop_count);
@@ -210,7 +210,7 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N
fp += len;
/* align loop/jump destination */
-#ifdef _M_AMD64
+#ifdef _M_X64
ffts_align_mem16(&fp, 8);
#else
ffts_align_mem16(&fp, 9);
@@ -233,7 +233,7 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N
loop_count += 4 * p->i1;
/* align loop/jump destination */
-#ifdef _M_AMD64
+#ifdef _M_X64
MOVI(&fp, EBX, loop_count);
ffts_align_mem16(&fp, 3);
#else
@@ -286,7 +286,7 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N
loop_count += 4 * p->i1;
/* align loop/jump destination */
-#ifdef _M_AMD64
+#ifdef _M_X64
MOVI(&fp, EBX, loop_count);
ffts_align_mem16(&fp, 3);
#else
@@ -313,7 +313,7 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N
loop_count += 4 * p->i1;
/* align loop/jump destination */
-#ifdef _M_AMD64
+#ifdef _M_X64
MOVI(&fp, EBX, loop_count);
ffts_align_mem16(&fp, 8);
#else
@@ -337,12 +337,13 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N
memcpy(fp, x_init, len);
fp += len;
+ /* generate subtransform calls */
count = 2;
while (pps[0]) {
size_t ws_is;
if (!pN) {
-#ifdef _M_AMD64
+#ifdef _M_X64
MOVI(&fp, EBX, pps[0]);
#else
MOVI(&fp, ECX, pps[0] / 4);
@@ -350,7 +351,7 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N
} else {
int offset = (4 * pps[1]) - pAddr;
if (offset) {
-#ifdef _M_AMD64
+#ifdef _M_X64
ADDI(&fp, R8, offset);
#else
ADDI(&fp, RDX, offset);
@@ -360,7 +361,7 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N
if (pps[0] > leaf_N && pps[0] - pN) {
int factor = ffts_ctzl(pps[0]) - ffts_ctzl(pN);
-#ifdef _M_AMD64
+#ifdef _M_X64
SHIFT(&fp, EBX, factor);
#else
SHIFT(&fp, ECX, factor);
@@ -372,7 +373,7 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N
if (ws_is != pLUT) {
int offset = (int) (ws_is - pLUT);
-#ifdef _M_AMD64
+#ifdef _M_X64
ADDI(&fp, RDI, offset);
#else
ADDI(&fp, R8, offset);
diff --git a/src/codegen_sse.h b/src/codegen_sse.h
index 33d2b2c..d65af9a 100644
--- a/src/codegen_sse.h
+++ b/src/codegen_sse.h
@@ -458,7 +458,7 @@ static FFTS_INLINE insns_t* generate_size8_base_case(insns_t **fp, int sign)
x_8_addr = *fp;
/* align loop/jump destination */
-#ifdef _M_AMD64
+#ifdef _M_X64
ffts_align_mem16(fp, 6);
#else
ffts_align_mem16(fp, 5);
@@ -482,7 +482,7 @@ static FFTS_INLINE insns_t* generate_prologue(insns_t **fp, ffts_plan_t *p)
start = *fp;
/* save nonvolatile registers */
-#ifdef _M_AMD64
+#ifdef _M_X64
/* use the shadow space to save first 3 registers */
/* mov [rsp + 8], rbx */
@@ -535,7 +535,7 @@ static FFTS_INLINE insns_t* generate_prologue(insns_t **fp, ffts_plan_t *p)
static FFTS_INLINE void generate_epilogue(insns_t **fp)
{
-#ifdef _M_AMD64
+#ifdef _M_X64
/* restore nonvolatile registers */
MOVDQA3(fp, XMM6, RSP, 0);
MOVDQA3(fp, XMM7, RSP, 16);
diff --git a/src/ffts.h b/src/ffts.h
index ca2951a..156a3b3 100644
--- a/src/ffts.h
+++ b/src/ffts.h
@@ -187,7 +187,7 @@ static FFTS_INLINE void ffts_aligned_free(void *p)
#define ffts_ctzl __builtin_ctzl
#elif defined(_MSC_VER)
#include <intrin.h>
-#ifdef _M_AMD64
+#ifdef _M_X64
#pragma intrinsic(_BitScanForward64)
static __inline unsigned long ffts_ctzl(size_t N)
{
OpenPOWER on IntegriCloud