diff options
author | Jukka Ojanen <jukka.ojanen@linkotec.net> | 2014-11-04 15:04:06 +0200 |
---|---|---|
committer | Jukka Ojanen <jukka.ojanen@linkotec.net> | 2014-11-04 15:04:06 +0200 |
commit | 311c05f05c4e9bee5f4731c6a3cd6b8122fc14b4 (patch) | |
tree | dfb7c54912c26ec3b30414ae9a7eae0615909991 /src | |
parent | 0f7c426a663af998e92f586820fd2ca561aafb68 (diff) | |
download | ffts-311c05f05c4e9bee5f4731c6a3cd6b8122fc14b4.zip ffts-311c05f05c4e9bee5f4731c6a3cd6b8122fc14b4.tar.gz |
Replace _M_AMD64 with _M_X64 as it is equal and "neutral"
Diffstat (limited to 'src')
-rw-r--r-- | src/codegen.c | 19 | ||||
-rw-r--r-- | src/codegen_sse.h | 6 | ||||
-rw-r--r-- | src/ffts.h | 2 |
3 files changed, 14 insertions, 13 deletions
diff --git a/src/codegen.c b/src/codegen.c index 36fdf8d..880f598 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -155,7 +155,7 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N /* assign loop counter register */ loop_count = 4 * p->i0; -#ifdef _M_AMD64 +#ifdef _M_X64 MOVI(&fp, EBX, loop_count); #else MOVI(&fp, ECX, loop_count); @@ -210,7 +210,7 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N fp += len; /* align loop/jump destination */ -#ifdef _M_AMD64 +#ifdef _M_X64 ffts_align_mem16(&fp, 8); #else ffts_align_mem16(&fp, 9); @@ -233,7 +233,7 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N loop_count += 4 * p->i1; /* align loop/jump destination */ -#ifdef _M_AMD64 +#ifdef _M_X64 MOVI(&fp, EBX, loop_count); ffts_align_mem16(&fp, 3); #else @@ -286,7 +286,7 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N loop_count += 4 * p->i1; /* align loop/jump destination */ -#ifdef _M_AMD64 +#ifdef _M_X64 MOVI(&fp, EBX, loop_count); ffts_align_mem16(&fp, 3); #else @@ -313,7 +313,7 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N loop_count += 4 * p->i1; /* align loop/jump destination */ -#ifdef _M_AMD64 +#ifdef _M_X64 MOVI(&fp, EBX, loop_count); ffts_align_mem16(&fp, 8); #else @@ -337,12 +337,13 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N memcpy(fp, x_init, len); fp += len; + /* generate subtransform calls */ count = 2; while (pps[0]) { size_t ws_is; if (!pN) { -#ifdef _M_AMD64 +#ifdef _M_X64 MOVI(&fp, EBX, pps[0]); #else MOVI(&fp, ECX, pps[0] / 4); @@ -350,7 +351,7 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N } else { int offset = (4 * pps[1]) - pAddr; if (offset) { -#ifdef _M_AMD64 +#ifdef _M_X64 ADDI(&fp, R8, offset); #else ADDI(&fp, RDX, offset); @@ -360,7 +361,7 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N if (pps[0] > leaf_N && pps[0] - pN) { int factor = ffts_ctzl(pps[0]) - ffts_ctzl(pN); -#ifdef _M_AMD64 +#ifdef _M_X64 SHIFT(&fp, EBX, factor); #else SHIFT(&fp, ECX, factor); @@ -372,7 +373,7 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N if (ws_is != pLUT) { int offset = (int) (ws_is - pLUT); -#ifdef _M_AMD64 +#ifdef _M_X64 ADDI(&fp, RDI, offset); #else ADDI(&fp, R8, offset); diff --git a/src/codegen_sse.h b/src/codegen_sse.h index 33d2b2c..d65af9a 100644 --- a/src/codegen_sse.h +++ b/src/codegen_sse.h @@ -458,7 +458,7 @@ static FFTS_INLINE insns_t* generate_size8_base_case(insns_t **fp, int sign) x_8_addr = *fp; /* align loop/jump destination */ -#ifdef _M_AMD64 +#ifdef _M_X64 ffts_align_mem16(fp, 6); #else ffts_align_mem16(fp, 5); @@ -482,7 +482,7 @@ static FFTS_INLINE insns_t* generate_prologue(insns_t **fp, ffts_plan_t *p) start = *fp; /* save nonvolatile registers */ -#ifdef _M_AMD64 +#ifdef _M_X64 /* use the shadow space to save first 3 registers */ /* mov [rsp + 8], rbx */ @@ -535,7 +535,7 @@ static FFTS_INLINE insns_t* generate_prologue(insns_t **fp, ffts_plan_t *p) static FFTS_INLINE void generate_epilogue(insns_t **fp) { -#ifdef _M_AMD64 +#ifdef _M_X64 /* restore nonvolatile registers */ MOVDQA3(fp, XMM6, RSP, 0); MOVDQA3(fp, XMM7, RSP, 16); @@ -187,7 +187,7 @@ static FFTS_INLINE void ffts_aligned_free(void *p) #define ffts_ctzl __builtin_ctzl #elif defined(_MSC_VER) #include <intrin.h> -#ifdef _M_AMD64 +#ifdef _M_X64 #pragma intrinsic(_BitScanForward64) static __inline unsigned long ffts_ctzl(size_t N) { |