From e464bcb622d5ab1426b14a2314d852fc6e1539e1 Mon Sep 17 00:00:00 2001
From: Jukka Ojanen <jukka.ojanen@linkotec.net>
Date: Tue, 29 Mar 2016 17:01:01 +0300
Subject: Fix neon_transpose8 for non-square matrices, move loops to assembly
 side, about 5% faster

---
 src/ffts_nd.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

(limited to 'src/ffts_nd.c')

diff --git a/src/ffts_nd.c b/src/ffts_nd.c
index ebce101..5745cd5 100644
--- a/src/ffts_nd.c
+++ b/src/ffts_nd.c
@@ -94,13 +94,7 @@ ffts_transpose(uint64_t *in, uint64_t *out, int w, int h)
 #if 0
     neon_transpose4(in, out, w, h);
 #else
-    size_t i, j;
-
-    for (j = 0; j < h; j += 8) {
-        for (i = 0; i < w; i += 8) {
-            neon_transpose8(in + j*w + i, out + i*h + j, w, h);
-        }
-    }
+    neon_transpose8(in, out, w, h);
 #endif
 #else
 #ifdef HAVE_SSE
-- 
cgit v1.1