summaryrefslogtreecommitdiffstats
path: root/src/ffts_nd.c
diff options
context:
space:
mode:
authorJukka Ojanen <jukka.ojanen@linkotec.net>2016-03-29 17:01:01 +0300
committerJukka Ojanen <jukka.ojanen@linkotec.net>2016-03-29 17:01:01 +0300
commite464bcb622d5ab1426b14a2314d852fc6e1539e1 (patch)
treebbd0158431c0ac2f2d51345d1f777bd30b025341 /src/ffts_nd.c
parentab556f87890e483d1a0814096cad491de270b6ad (diff)
downloadffts-e464bcb622d5ab1426b14a2314d852fc6e1539e1.zip
ffts-e464bcb622d5ab1426b14a2314d852fc6e1539e1.tar.gz
Fix neon_transpose8 for non-square matrices, move loops to assembly side, about 5% faster
Diffstat (limited to 'src/ffts_nd.c')
-rw-r--r--src/ffts_nd.c8
1 files changed, 1 insertions, 7 deletions
diff --git a/src/ffts_nd.c b/src/ffts_nd.c
index ebce101..5745cd5 100644
--- a/src/ffts_nd.c
+++ b/src/ffts_nd.c
@@ -94,13 +94,7 @@ ffts_transpose(uint64_t *in, uint64_t *out, int w, int h)
#if 0
neon_transpose4(in, out, w, h);
#else
- size_t i, j;
-
- for (j = 0; j < h; j += 8) {
- for (i = 0; i < w; i += 8) {
- neon_transpose8(in + j*w + i, out + i*h + j, w, h);
- }
- }
+ neon_transpose8(in, out, w, h);
#endif
#else
#ifdef HAVE_SSE
OpenPOWER on IntegriCloud