summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorAnthony Blake <anthonix@me.com>2012-08-12 16:38:27 +1200
committerAnthony Blake <anthonix@me.com>2012-08-12 16:38:27 +1200
commit8cc439268f3cad8e8bc8569ee6a0770ec0b2b56e (patch)
treeb8fed37807c6e2cc15e2f3a210d0dba4c1061643 /src
parentfacf16267d192eee4514666dc132fa9ee92905c9 (diff)
downloadffts-8cc439268f3cad8e8bc8569ee6a0770ec0b2b56e.zip
ffts-8cc439268f3cad8e8bc8569ee6a0770ec0b2b56e.tar.gz
Other sizes work
Diffstat (limited to 'src')
-rw-r--r--src/codegen.c7
-rw-r--r--src/cp_sse.c6
-rw-r--r--src/neon.s1
3 files changed, 7 insertions, 7 deletions
diff --git a/src/codegen.c b/src/codegen.c
index 05219dd..ab1f87a 100644
--- a/src/codegen.c
+++ b/src/codegen.c
@@ -103,7 +103,7 @@ uint32_t LUT_offset(size_t N, size_t leafN) {
if(!i || hardcoded) {
#ifdef __ARM_NEON__
if(N <= 32) lut_size += n/4 * 2 * sizeof(cdata_t);
- else lut_size += n/4 * sizeof(cdata_t);
+ else lut_size += n/4 * sizeof(cdata_t);
#else
lut_size += n/4 * 2 * sizeof(cdata_t);
#endif
@@ -167,8 +167,9 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leafN)
}else{
*fp++ = ADDI(0, 0, (pps[1] * 4)- pAddr);
*fp++ = ADDI(1, 1, pps[0] - pN);
- *fp++ = ADDI(2, 2, LUT_offset(pps[0], leafN) - pLUT);
}
+ //*fp++ = ADDI(2, 2, LUT_offset(pps[0], leafN) - pLUT);
+ *fp++ = ADDI(2, 2, p->ws_is[__builtin_ctzl(pps[0]/leafN)-1]*8 - pLUT);
if(pps[0] == 2*leafN) {
@@ -181,7 +182,7 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leafN)
pAddr = pps[1] * 4;
pN = pps[0];
- pLUT = LUT_offset(pps[0], leafN);
+ pLUT = p->ws_is[__builtin_ctzl(pps[0]/leafN)-1]*8;//LUT_offset(pps[0], leafN);
fprintf(stderr, "LUT offset for %d is %d\n", pN, pLUT);
count += 4;
pps += 2;
diff --git a/src/cp_sse.c b/src/cp_sse.c
index 43163df..b35605c 100644
--- a/src/cp_sse.c
+++ b/src/cp_sse.c
@@ -122,11 +122,11 @@ void ffts_free(ffts_plan_t *p) {
// for(i=0;i<p->n_luts;i++) {
// FFTS_FREE(p->ws[i]);
// }
- free(p->ws);
+ FFTS_FREE(p->ws);
}
if(p->is) free(p->is);
if(p->offsets) free(p->offsets);
- free(p->transforms);
+ //free(p->transforms);
free(p);
}
@@ -247,7 +247,7 @@ ffts_plan_t *ffts_init(size_t N, int sign) {
n = leafN*2;
for(i=0;i<n_luts;i++) {
p->ws_is[i] = w - (cdata_t *)p->ws;
- fprintf(stderr, "LUT[%zu] = %d @ %08x\n", i, n, w);
+ fprintf(stderr, "LUT[%zu] = %d @ %08x - %zu\n", i, n, w, p->ws_is[i]);
if(!i || hardcoded) {
cdata_t *w0 = FFTS_MALLOC(n/4 * sizeof(cdata_t), 32);
diff --git a/src/neon.s b/src/neon.s
index 3cc56fd..bbcd0c7 100644
--- a/src/neon.s
+++ b/src/neon.s
@@ -199,7 +199,6 @@ neon_x8_t_loop:
vmov q1, q3
vmov q8, q2
vld1.32 {q2,q3}, [r12, :128]!
- @vld1.64 {d2, d3}, [sp] @ 16-byte Reload
vmul.f32 q0, q12, q2
vmul.f32 q11, q14, q2
vmul.f32 q4, q15, q2
OpenPOWER on IntegriCloud