summaryrefslogtreecommitdiffstats
path: root/src/codegen.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/codegen.c')
-rw-r--r--src/codegen.c7
1 files changed, 4 insertions, 3 deletions
diff --git a/src/codegen.c b/src/codegen.c
index 05219dd..ab1f87a 100644
--- a/src/codegen.c
+++ b/src/codegen.c
@@ -103,7 +103,7 @@ uint32_t LUT_offset(size_t N, size_t leafN) {
if(!i || hardcoded) {
#ifdef __ARM_NEON__
if(N <= 32) lut_size += n/4 * 2 * sizeof(cdata_t);
- else lut_size += n/4 * sizeof(cdata_t);
+ else lut_size += n/4 * sizeof(cdata_t);
#else
lut_size += n/4 * 2 * sizeof(cdata_t);
#endif
@@ -167,8 +167,9 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leafN)
}else{
*fp++ = ADDI(0, 0, (pps[1] * 4)- pAddr);
*fp++ = ADDI(1, 1, pps[0] - pN);
- *fp++ = ADDI(2, 2, LUT_offset(pps[0], leafN) - pLUT);
}
+ //*fp++ = ADDI(2, 2, LUT_offset(pps[0], leafN) - pLUT);
+ *fp++ = ADDI(2, 2, p->ws_is[__builtin_ctzl(pps[0]/leafN)-1]*8 - pLUT);
if(pps[0] == 2*leafN) {
@@ -181,7 +182,7 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leafN)
pAddr = pps[1] * 4;
pN = pps[0];
- pLUT = LUT_offset(pps[0], leafN);
+ pLUT = p->ws_is[__builtin_ctzl(pps[0]/leafN)-1]*8;//LUT_offset(pps[0], leafN);
fprintf(stderr, "LUT offset for %d is %d\n", pN, pLUT);
count += 4;
pps += 2;
OpenPOWER on IntegriCloud