From e464bcb622d5ab1426b14a2314d852fc6e1539e1 Mon Sep 17 00:00:00 2001 From: Jukka Ojanen Date: Tue, 29 Mar 2016 17:01:01 +0300 Subject: Fix neon_transpose8 for non-square matrices, move loops to assembly side, about 5% faster --- src/ffts_nd.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'src/ffts_nd.c') diff --git a/src/ffts_nd.c b/src/ffts_nd.c index ebce101..5745cd5 100644 --- a/src/ffts_nd.c +++ b/src/ffts_nd.c @@ -94,13 +94,7 @@ ffts_transpose(uint64_t *in, uint64_t *out, int w, int h) #if 0 neon_transpose4(in, out, w, h); #else - size_t i, j; - - for (j = 0; j < h; j += 8) { - for (i = 0; i < w; i += 8) { - neon_transpose8(in + j*w + i, out + i*h + j, w, h); - } - } + neon_transpose8(in, out, w, h); #endif #else #ifdef HAVE_SSE -- cgit v1.1