diff options
author | Jukka Ojanen <jukka.ojanen@linkotec.net> | 2016-03-28 23:45:33 +0300 |
---|---|---|
committer | Jukka Ojanen <jukka.ojanen@linkotec.net> | 2016-03-28 23:45:33 +0300 |
commit | ab556f87890e483d1a0814096cad491de270b6ad (patch) | |
tree | abc04f8e60cbbf3fa64938d9648082d137410fa1 | |
parent | 7f74c87546f97b8d5864e1f20da54d226de9030b (diff) | |
download | ffts-ab556f87890e483d1a0814096cad491de270b6ad.zip ffts-ab556f87890e483d1a0814096cad491de270b6ad.tar.gz |
Rename neon_transpose to neon_transpose4, 4x4 tiled matrix transpose.
Rename neon_transpose_to_buf to neon_transpose8, 8x8 tiled matrix transpose.
-rw-r--r-- | src/ffts_nd.c | 4 | ||||
-rw-r--r-- | src/neon.h | 4 | ||||
-rw-r--r-- | src/neon.s | 16 |
3 files changed, 12 insertions, 12 deletions
diff --git a/src/ffts_nd.c b/src/ffts_nd.c index c964d7f..ebce101 100644 --- a/src/ffts_nd.c +++ b/src/ffts_nd.c @@ -92,13 +92,13 @@ ffts_transpose(uint64_t *in, uint64_t *out, int w, int h) { #ifdef HAVE_NEON #if 0 - neon_transpose(in, out, w, h); + neon_transpose4(in, out, w, h); #else size_t i, j; for (j = 0; j < h; j += 8) { for (i = 0; i < w; i += 8) { - neon_transpose_to_buf(in + j*w + i, out + i*h + j, w); + neon_transpose8(in + j*w + i, out + i*h + j, w, h); } } #endif @@ -45,8 +45,8 @@ void neon_eo(); void neon_oe(); void neon_end(); -void neon_transpose(uint64_t *in, uint64_t *out, int w, int h); -void neon_transpose_to_buf(uint64_t *in, uint64_t *out, int w); +void neon_transpose4(uint64_t *in, uint64_t *out, int w, int h); +void neon_transpose8(uint64_t *in, uint64_t *out, int w, int h); void neon_static_e_f(ffts_plan_t*, const void*, void*); void neon_static_o_f(ffts_plan_t*, const void*, void*); @@ -638,11 +638,11 @@ neon_end: .align 4 #ifdef __APPLE__ - .globl _neon_transpose -_neon_transpose: + .globl _neon_transpose4 +_neon_transpose4: #else - .globl neon_transpose -neon_transpose: + .globl neon_transpose4 +neon_transpose4: #endif push {r4-r6, lr} mov r5, r3 @@ -676,11 +676,11 @@ neon_transpose: .align 4 #ifdef __APPLE__ - .globl _neon_transpose_to_buf -_neon_transpose_to_buf: + .globl _neon_transpose8 +_neon_transpose8: #else - .globl neon_transpose_to_buf -neon_transpose_to_buf: + .globl neon_transpose8 +neon_transpose8: #endif push {r4-r8, lr} vpush {q4-q7} |