summaryrefslogtreecommitdiffstats
path: root/src/cp_sse.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/cp_sse.c')
-rw-r--r--src/cp_sse.c54
1 files changed, 10 insertions, 44 deletions
diff --git a/src/cp_sse.c b/src/cp_sse.c
index b35605c..1356c7b 100644
--- a/src/cp_sse.c
+++ b/src/cp_sse.c
@@ -73,45 +73,8 @@ firstpass_64(const float * restrict in, float * restrict out, ffts_plan_t * rest
void ffts_execute(ffts_plan_t *p, const void * restrict in, void * restrict out) {
transform_index_t *ps = p->transforms;
- int leafN = 8;
p->firstpass((const float *)in, (float *)out, p);
- p->transform(out, p->N, p->ws);
-
- /*
- size_t ps0_next = ps[0];
- while(ps0_next) {
- size_t ps0 = ps0_next;
- size_t ps1 = ps[1];
- ps0_next = ps[2];
- ps += 2;
-
- if(ps0 == 2*leafN) {
- float *LUT = (float *)p->ws[0];
- float *data = (float *)(out) + ps1;
- #ifdef __ARM_NEON__
- X_4_SPLIT(data, 16, LUT);
- #else
- X_4(data, 16, LUT);
- #endif
-
- }else{
- int index = __builtin_ctzl(ps0)-4;
- float *LUT = (float *)p->ws[__builtin_ctzl(ps0)-4];
- #ifdef __ARM_NEON__
- X_8_SPLIT(((float *)out) + ps1, ps0, LUT);
- #else
- X_8(((float *)out) + ps1, ps0, LUT);
- #endif
- }
-
-
- }
-
- #ifdef __ARM_NEON__
- if(p->N>32)
- X_8_SPLIT_T((float *)out, p->N, p->lastlut);
- #endif
-*/
+ if(p->transform) p->transform(out, p->N, p->ws);
}
void ffts_free(ffts_plan_t *p) {
@@ -119,15 +82,14 @@ void ffts_free(ffts_plan_t *p) {
size_t i;
if(p->ws) {
-// for(i=0;i<p->n_luts;i++) {
-// FFTS_FREE(p->ws[i]);
-// }
FFTS_FREE(p->ws);
}
if(p->is) free(p->is);
if(p->offsets) free(p->offsets);
//free(p->transforms);
+// if(p->transform_base) free(p->transform_base);
+
free(p);
}
@@ -143,8 +105,9 @@ ffts_plan_t *ffts_init(size_t N, int sign) {
else SCALAR_MULI_SIGN = -0.0f;
p->transform = NULL;
+ p->transform_base = NULL;
- if(N > 32) {
+ if(N >= 32) {
ffts_init_offsets(p, N, leafN);
ffts_init_is(p, N, leafN, 2);
// ffts_init_tree(p, N, leafN);
@@ -203,7 +166,7 @@ ffts_plan_t *ffts_init(size_t N, int sign) {
/* LUTS */
size_t n_luts = __builtin_ctzl(N/leafN);
- if(N <= 32) { n_luts = __builtin_ctzl(N/4); hardcoded = 1; }
+ if(N < 32) { n_luts = __builtin_ctzl(N/4); hardcoded = 1; }
if(n_luts >= 32) n_luts = 0;
@@ -245,6 +208,9 @@ ffts_plan_t *ffts_init(size_t N, int sign) {
w = p->ws;
n = leafN*2;
+ if(hardcoded) n = 8;
+
+
for(i=0;i<n_luts;i++) {
p->ws_is[i] = w - (cdata_t *)p->ws;
fprintf(stderr, "LUT[%zu] = %d @ %08x - %zu\n", i, n, w, p->ws_is[i]);
@@ -391,7 +357,7 @@ ffts_plan_t *ffts_init(size_t N, int sign) {
p->N = N;
p->lastlut = w;
p->n_luts = n_luts;
- if(N>32)
+ if(N>=32)
p->transform = ffts_generate_func_code(p, N, leafN);
// fprintf(stderr, "sizeof(size_t) == %lu\n", sizeof(size_t));
OpenPOWER on IntegriCloud