diff options
author | Jukka Ojanen <jukka.ojanen@linkotec.net> | 2016-03-29 17:01:01 +0300 |
---|---|---|
committer | Jukka Ojanen <jukka.ojanen@linkotec.net> | 2016-03-29 17:01:01 +0300 |
commit | e464bcb622d5ab1426b14a2314d852fc6e1539e1 (patch) | |
tree | bbd0158431c0ac2f2d51345d1f777bd30b025341 /src/ffts_nd.c | |
parent | ab556f87890e483d1a0814096cad491de270b6ad (diff) | |
download | ffts-e464bcb622d5ab1426b14a2314d852fc6e1539e1.zip ffts-e464bcb622d5ab1426b14a2314d852fc6e1539e1.tar.gz |
Fix neon_transpose8 for non-square matrices, move loops to assembly side, about 5% faster
Diffstat (limited to 'src/ffts_nd.c')
-rw-r--r-- | src/ffts_nd.c | 8 |
1 files changed, 1 insertions, 7 deletions
diff --git a/src/ffts_nd.c b/src/ffts_nd.c index ebce101..5745cd5 100644 --- a/src/ffts_nd.c +++ b/src/ffts_nd.c @@ -94,13 +94,7 @@ ffts_transpose(uint64_t *in, uint64_t *out, int w, int h) #if 0 neon_transpose4(in, out, w, h); #else - size_t i, j; - - for (j = 0; j < h; j += 8) { - for (i = 0; i < w; i += 8) { - neon_transpose8(in + j*w + i, out + i*h + j, w, h); - } - } + neon_transpose8(in, out, w, h); #endif #else #ifdef HAVE_SSE |