diff options
author | Anthony Blake <anthonix@me.com> | 2012-08-12 16:38:27 +1200 |
---|---|---|
committer | Anthony Blake <anthonix@me.com> | 2012-08-12 16:38:27 +1200 |
commit | 8cc439268f3cad8e8bc8569ee6a0770ec0b2b56e (patch) | |
tree | b8fed37807c6e2cc15e2f3a210d0dba4c1061643 /src | |
parent | facf16267d192eee4514666dc132fa9ee92905c9 (diff) | |
download | ffts-8cc439268f3cad8e8bc8569ee6a0770ec0b2b56e.zip ffts-8cc439268f3cad8e8bc8569ee6a0770ec0b2b56e.tar.gz |
Other sizes work
Diffstat (limited to 'src')
-rw-r--r-- | src/codegen.c | 7 | ||||
-rw-r--r-- | src/cp_sse.c | 6 | ||||
-rw-r--r-- | src/neon.s | 1 |
3 files changed, 7 insertions, 7 deletions
diff --git a/src/codegen.c b/src/codegen.c index 05219dd..ab1f87a 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -103,7 +103,7 @@ uint32_t LUT_offset(size_t N, size_t leafN) { if(!i || hardcoded) { #ifdef __ARM_NEON__ if(N <= 32) lut_size += n/4 * 2 * sizeof(cdata_t); - else lut_size += n/4 * sizeof(cdata_t); + else lut_size += n/4 * sizeof(cdata_t); #else lut_size += n/4 * 2 * sizeof(cdata_t); #endif @@ -167,8 +167,9 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leafN) }else{ *fp++ = ADDI(0, 0, (pps[1] * 4)- pAddr); *fp++ = ADDI(1, 1, pps[0] - pN); - *fp++ = ADDI(2, 2, LUT_offset(pps[0], leafN) - pLUT); } + //*fp++ = ADDI(2, 2, LUT_offset(pps[0], leafN) - pLUT); + *fp++ = ADDI(2, 2, p->ws_is[__builtin_ctzl(pps[0]/leafN)-1]*8 - pLUT); if(pps[0] == 2*leafN) { @@ -181,7 +182,7 @@ transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leafN) pAddr = pps[1] * 4; pN = pps[0]; - pLUT = LUT_offset(pps[0], leafN); + pLUT = p->ws_is[__builtin_ctzl(pps[0]/leafN)-1]*8;//LUT_offset(pps[0], leafN); fprintf(stderr, "LUT offset for %d is %d\n", pN, pLUT); count += 4; pps += 2; diff --git a/src/cp_sse.c b/src/cp_sse.c index 43163df..b35605c 100644 --- a/src/cp_sse.c +++ b/src/cp_sse.c @@ -122,11 +122,11 @@ void ffts_free(ffts_plan_t *p) { // for(i=0;i<p->n_luts;i++) { // FFTS_FREE(p->ws[i]); // } - free(p->ws); + FFTS_FREE(p->ws); } if(p->is) free(p->is); if(p->offsets) free(p->offsets); - free(p->transforms); + //free(p->transforms); free(p); } @@ -247,7 +247,7 @@ ffts_plan_t *ffts_init(size_t N, int sign) { n = leafN*2; for(i=0;i<n_luts;i++) { p->ws_is[i] = w - (cdata_t *)p->ws; - fprintf(stderr, "LUT[%zu] = %d @ %08x\n", i, n, w); + fprintf(stderr, "LUT[%zu] = %d @ %08x - %zu\n", i, n, w, p->ws_is[i]); if(!i || hardcoded) { cdata_t *w0 = FFTS_MALLOC(n/4 * sizeof(cdata_t), 32); @@ -199,7 +199,6 @@ neon_x8_t_loop: vmov q1, q3 vmov q8, q2 vld1.32 {q2,q3}, [r12, :128]! - @vld1.64 {d2, d3}, [sp] @ 16-byte Reload vmul.f32 q0, q12, q2 vmul.f32 q11, q14, q2 vmul.f32 q4, q15, q2 |