diff options
author | Anthony Blake <anthonix@me.com> | 2012-11-15 15:31:39 +1300 |
---|---|---|
committer | Anthony Blake <anthonix@me.com> | 2012-11-15 15:31:39 +1300 |
commit | 09beffbe4a15a1d9ff4eab718f7e077ad52f9181 (patch) | |
tree | 9cbb15bd31bb00ce4f0f1df85b8aaa2f3760c29d | |
parent | 3c5f840a17c20a20eeb09f6628c1e2f16b8b9ca9 (diff) | |
download | ffts-09beffbe4a15a1d9ff4eab718f7e077ad52f9181.zip ffts-09beffbe4a15a1d9ff4eab718f7e077ad52f9181.tar.gz |
Fixed some compiler warnings, generalized transform interface (in prep for double etc)
-rw-r--r-- | src/codegen.c | 3 | ||||
-rw-r--r-- | src/ffts.h | 7 | ||||
-rw-r--r-- | src/ffts_nd.c | 2 | ||||
-rw-r--r-- | src/ffts_nd.h | 6 | ||||
-rw-r--r-- | src/ffts_real.h | 1 | ||||
-rw-r--r-- | src/macros.h | 96 |
6 files changed, 69 insertions, 46 deletions
diff --git a/src/codegen.c b/src/codegen.c index 5e761e3..67e9683 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -144,10 +144,13 @@ void insert_nops(uint8_t **p, uint32_t count) { } } + void align_mem16(uint8_t **p, uint32_t offset) { +#ifdef __ARM_NEON__ int r = (16 - (offset & 0xf)) - ((uint32_t)(*p) & 0xf); r = (16 + r) & 0xf; insert_nops(p, r); +#endif } void ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leafN, int sign) { @@ -58,6 +58,8 @@ typedef size_t transform_index_t; //typedef void (*transform_func_t)(float *data, size_t N, float *LUT); typedef void (*transform_func_t)(float *data, size_t N, float *LUT); +typedef struct _ffts_plan_t ffts_plan_t; + struct _ffts_plan_t { ptrdiff_t *offsets; void __attribute__ ((aligned(32))) *ws; @@ -69,7 +71,7 @@ struct _ffts_plan_t { void *lastlut; transform_index_t *transforms; //transform_func_t transform; - void (*transform)(struct _ffts_plan_t * , const data_t * , data_t * ); + void (*transform)(ffts_plan_t * , const void * , void * ); void *transform_base; size_t transform_size; void *constants; @@ -82,12 +84,11 @@ struct _ffts_plan_t { void *transpose_buf; - void (*destroy)(struct _ffts_plan_t *); + void (*destroy)(ffts_plan_t *); float *A, *B; }; -typedef struct _ffts_plan_t ffts_plan_t; void ffts_free(ffts_plan_t *); ffts_plan_t *ffts_init_1d(size_t N, int sign); diff --git a/src/ffts_nd.c b/src/ffts_nd.c index 84dba7f..b87822e 100644 --- a/src/ffts_nd.c +++ b/src/ffts_nd.c @@ -146,7 +146,7 @@ void ffts_transpose(uint64_t *in, uint64_t *out, int w, int h, uint64_t *buf) { } -void ffts_execute_nd(ffts_plan_t *p, const data_t * in, data_t * out) { +void ffts_execute_nd(ffts_plan_t *p, const void * in, void * out) { uint64_t *din = (uint64_t *)in; uint64_t *buf = p->buf; diff --git a/src/ffts_nd.h b/src/ffts_nd.h index 4d2474d..7761f52 100644 --- a/src/ffts_nd.h +++ b/src/ffts_nd.h @@ -46,6 +46,12 @@ #include <xmmintrin.h> #endif +void ffts_free_nd(ffts_plan_t *p); +void ffts_transpose(uint64_t *in, uint64_t *out, int w, int h, uint64_t *buf); + +void ffts_execute_nd(ffts_plan_t *p, const void * in, void * out); +ffts_plan_t *ffts_init_nd(int rank, size_t *Ns, int sign); +ffts_plan_t *ffts_init_2d(size_t N1, size_t N2, int sign); #endif diff --git a/src/ffts_real.h b/src/ffts_real.h index 06e15bb..e904b95 100644 --- a/src/ffts_real.h +++ b/src/ffts_real.h @@ -46,6 +46,7 @@ #include <xmmintrin.h> #endif +ffts_plan_t *ffts_init_1d_real(size_t N, int sign); #endif diff --git a/src/macros.h b/src/macros.h index e645b11..effc87d 100644 --- a/src/macros.h +++ b/src/macros.h @@ -221,99 +221,111 @@ L_4_2(int inv, const data_t * restrict i0, const data_t * restrict i1, const dat } __INLINE void -firstpass_16_f(ffts_plan_t * p, const data_t * in, data_t * out) { +firstpass_16_f(ffts_plan_t * p, const void * in, void * out) { + const data_t *din = (const data_t *)in; + data_t *dout = (data_t *)out; V r0_1,r2_3,r4_5,r6_7,r8_9,r10_11,r12_13,r14_15; float *LUT8 = p->ws ; - L_4_4(0, in+0,in+16,in+8,in+24,&r0_1,&r2_3,&r8_9,&r10_11); - L_2_4(0, in+4,in+20,in+28,in+12,&r4_5,&r6_7,&r14_15,&r12_13); + L_4_4(0, din+0,din+16,din+8,din+24,&r0_1,&r2_3,&r8_9,&r10_11); + L_2_4(0, din+4,din+20,din+28,din+12,&r4_5,&r6_7,&r14_15,&r12_13); K_N(0, VLD(LUT8),VLD(LUT8+4),&r0_1,&r2_3,&r4_5,&r6_7); K_N(0, VLD(LUT8+8),VLD(LUT8+12),&r0_1,&r4_5,&r8_9,&r12_13); - S_4(r0_1,r4_5,r8_9,r12_13,out+0,out+8,out+16,out+24); + S_4(r0_1,r4_5,r8_9,r12_13,dout+0,dout+8,dout+16,dout+24); K_N(0, VLD(LUT8+16),VLD(LUT8+20),&r2_3,&r6_7,&r10_11,&r14_15); - S_4(r2_3,r6_7,r10_11,r14_15,out+4,out+12,out+20,out+28); + S_4(r2_3,r6_7,r10_11,r14_15,dout+4,dout+12,dout+20,dout+28); } __INLINE void -firstpass_16_b(ffts_plan_t * p, const data_t * in, data_t * out) { +firstpass_16_b(ffts_plan_t * p, const void * in, void * out) { + const data_t *din = (const data_t *)in; + data_t *dout = (data_t *)out; V r0_1,r2_3,r4_5,r6_7,r8_9,r10_11,r12_13,r14_15; float *LUT8 = p->ws ; - L_4_4(1, in+0,in+16,in+8,in+24,&r0_1,&r2_3,&r8_9,&r10_11); - L_2_4(1, in+4,in+20,in+28,in+12,&r4_5,&r6_7,&r14_15,&r12_13); + L_4_4(1, din+0,din+16,din+8,din+24,&r0_1,&r2_3,&r8_9,&r10_11); + L_2_4(1, din+4,din+20,din+28,din+12,&r4_5,&r6_7,&r14_15,&r12_13); K_N(1, VLD(LUT8),VLD(LUT8+4),&r0_1,&r2_3,&r4_5,&r6_7); K_N(1, VLD(LUT8+8),VLD(LUT8+12),&r0_1,&r4_5,&r8_9,&r12_13); - S_4(r0_1,r4_5,r8_9,r12_13,out+0,out+8,out+16,out+24); + S_4(r0_1,r4_5,r8_9,r12_13,dout+0,dout+8,dout+16,dout+24); K_N(1, VLD(LUT8+16),VLD(LUT8+20),&r2_3,&r6_7,&r10_11,&r14_15); - S_4(r2_3,r6_7,r10_11,r14_15,out+4,out+12,out+20,out+28); + S_4(r2_3,r6_7,r10_11,r14_15,dout+4,dout+12,dout+20,dout+28); } __INLINE void -firstpass_8_f(ffts_plan_t * p, const data_t * in, data_t * out) { +firstpass_8_f(ffts_plan_t * p, const void * in, void * out) { + const data_t *din = (const data_t *)in; + data_t *dout = (data_t *)out; V r0_1,r2_3,r4_5,r6_7; float *LUT8 = p->ws + p->ws_is[0]; - L_4_2(0, in+0,in+8,in+4,in+12,&r0_1,&r2_3,&r4_5,&r6_7); + L_4_2(0, din+0,din+8,din+4,din+12,&r0_1,&r2_3,&r4_5,&r6_7); K_N(0, VLD(LUT8),VLD(LUT8+4),&r0_1,&r2_3,&r4_5,&r6_7); - S_4(r0_1,r2_3,r4_5,r6_7,out+0,out+4,out+8,out+12); + S_4(r0_1,r2_3,r4_5,r6_7,dout+0,dout+4,dout+8,dout+12); } __INLINE void -firstpass_8_b(ffts_plan_t * p, const data_t * in, data_t * out) { +firstpass_8_b(ffts_plan_t * p, const void * in, void * out) { + const data_t *din = (const data_t *)in; + data_t *dout = (data_t *)out; V r0_1,r2_3,r4_5,r6_7; float *LUT8 = p->ws + p->ws_is[0]; - L_4_2(1, in+0,in+8,in+4,in+12,&r0_1,&r2_3,&r4_5,&r6_7); + L_4_2(1, din+0,din+8,din+4,din+12,&r0_1,&r2_3,&r4_5,&r6_7); K_N(1, VLD(LUT8),VLD(LUT8+4),&r0_1,&r2_3,&r4_5,&r6_7); - S_4(r0_1,r2_3,r4_5,r6_7,out+0,out+4,out+8,out+12); + S_4(r0_1,r2_3,r4_5,r6_7,dout+0,dout+4,dout+8,dout+12); } __INLINE void -firstpass_4_f(ffts_plan_t * p, const data_t * in, data_t * out) { - cdata_t *i = (cdata_t *)in, *o = (cdata_t *)out; +firstpass_4_f(ffts_plan_t * p, const void * in, void * out) { + const data_t *din = (const data_t *)in; + data_t *dout = (data_t *)out; cdata_t t0, t1, t2, t3, t4, t5, t6, t7; - t0[0] = in[0]; t0[1] = in[1]; - t1[0] = in[4]; t1[1] = in[5]; - t2[0] = in[2]; t2[1] = in[3]; - t3[0] = in[6]; t3[1] = in[7]; + t0[0] = din[0]; t0[1] = din[1]; + t1[0] = din[4]; t1[1] = din[5]; + t2[0] = din[2]; t2[1] = din[3]; + t3[0] = din[6]; t3[1] = din[7]; t4[0] = t0[0] + t1[0]; t4[1] = t0[1] + t1[1]; t5[0] = t0[0] - t1[0]; t5[1] = t0[1] - t1[1]; t6[0] = t2[0] + t3[0]; t6[1] = t2[1] + t3[1]; t7[0] = t2[0] - t3[0]; t7[1] = t2[1] - t3[1]; - out[0] = t4[0] + t6[0]; out[1] = t4[1] + t6[1]; - out[4] = t4[0] - t6[0]; out[5] = t4[1] - t6[1]; - out[2] = t5[0] + t7[1]; out[3] = t5[1] - t7[0]; - out[6] = t5[0] - t7[1]; out[7] = t5[1] + t7[0]; + dout[0] = t4[0] + t6[0]; dout[1] = t4[1] + t6[1]; + dout[4] = t4[0] - t6[0]; dout[5] = t4[1] - t6[1]; + dout[2] = t5[0] + t7[1]; dout[3] = t5[1] - t7[0]; + dout[6] = t5[0] - t7[1]; dout[7] = t5[1] + t7[0]; } __INLINE void -firstpass_4_b(ffts_plan_t * p, const data_t * in, data_t * out) { - cdata_t *i = (cdata_t *)in, *o = (cdata_t *)out; +firstpass_4_b(ffts_plan_t * p, const void * in, void * out) { + const data_t *din = (const data_t *)in; + data_t *dout = (data_t *)out; cdata_t t0, t1, t2, t3, t4, t5, t6, t7; - t0[0] = in[0]; t0[1] = in[1]; - t1[0] = in[4]; t1[1] = in[5]; - t2[0] = in[2]; t2[1] = in[3]; - t3[0] = in[6]; t3[1] = in[7]; + t0[0] = din[0]; t0[1] = din[1]; + t1[0] = din[4]; t1[1] = din[5]; + t2[0] = din[2]; t2[1] = din[3]; + t3[0] = din[6]; t3[1] = din[7]; t4[0] = t0[0] + t1[0]; t4[1] = t0[1] + t1[1]; t5[0] = t0[0] - t1[0]; t5[1] = t0[1] - t1[1]; t6[0] = t2[0] + t3[0]; t6[1] = t2[1] + t3[1]; t7[0] = t2[0] - t3[0]; t7[1] = t2[1] - t3[1]; - out[0] = t4[0] + t6[0]; out[1] = t4[1] + t6[1]; - out[4] = t4[0] - t6[0]; out[5] = t4[1] - t6[1]; - out[2] = t5[0] - t7[1]; out[3] = t5[1] + t7[0]; - out[6] = t5[0] + t7[1]; out[7] = t5[1] - t7[0]; + dout[0] = t4[0] + t6[0]; dout[1] = t4[1] + t6[1]; + dout[4] = t4[0] - t6[0]; dout[5] = t4[1] - t6[1]; + dout[2] = t5[0] - t7[1]; dout[3] = t5[1] + t7[0]; + dout[6] = t5[0] + t7[1]; dout[7] = t5[1] - t7[0]; } __INLINE void -firstpass_2(ffts_plan_t * p, const data_t * in, data_t * out) { - cdata_t t0, t1, r0,r1; - t0[0] = in[0]; t0[1] = in[1]; - t1[0] = in[2]; t1[1] = in[3]; +firstpass_2(ffts_plan_t * p, const void * in, void * out) { + const data_t *din = (const data_t *)in; + data_t *dout = (data_t *)out; + cdata_t t0, t1, r0,r1; + t0[0] = din[0]; t0[1] = din[1]; + t1[0] = din[2]; t1[1] = din[3]; r0[0] = t0[0] + t1[0]; r0[1] = t0[1] + t1[1]; r1[0] = t0[0] - t1[0]; r1[1] = t0[1] - t1[1]; - out[0] = r0[0]; out[1] = r0[1]; - out[2] = r1[0]; out[3] = r1[1]; + dout[0] = r0[0]; dout[1] = r0[1]; + dout[2] = r1[0]; dout[3] = r1[1]; } #endif |