From ab556f87890e483d1a0814096cad491de270b6ad Mon Sep 17 00:00:00 2001 From: Jukka Ojanen Date: Mon, 28 Mar 2016 23:45:33 +0300 Subject: Rename neon_transpose to neon_transpose4, 4x4 tiled matrix transpose. Rename neon_transpose_to_buf to neon_transpose8, 8x8 tiled matrix transpose. --- src/ffts_nd.c | 4 ++-- src/neon.h | 4 ++-- src/neon.s | 16 ++++++++-------- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/ffts_nd.c b/src/ffts_nd.c index c964d7f..ebce101 100644 --- a/src/ffts_nd.c +++ b/src/ffts_nd.c @@ -92,13 +92,13 @@ ffts_transpose(uint64_t *in, uint64_t *out, int w, int h) { #ifdef HAVE_NEON #if 0 - neon_transpose(in, out, w, h); + neon_transpose4(in, out, w, h); #else size_t i, j; for (j = 0; j < h; j += 8) { for (i = 0; i < w; i += 8) { - neon_transpose_to_buf(in + j*w + i, out + i*h + j, w); + neon_transpose8(in + j*w + i, out + i*h + j, w, h); } } #endif diff --git a/src/neon.h b/src/neon.h index 66dcd4b..f719159 100644 --- a/src/neon.h +++ b/src/neon.h @@ -45,8 +45,8 @@ void neon_eo(); void neon_oe(); void neon_end(); -void neon_transpose(uint64_t *in, uint64_t *out, int w, int h); -void neon_transpose_to_buf(uint64_t *in, uint64_t *out, int w); +void neon_transpose4(uint64_t *in, uint64_t *out, int w, int h); +void neon_transpose8(uint64_t *in, uint64_t *out, int w, int h); void neon_static_e_f(ffts_plan_t*, const void*, void*); void neon_static_o_f(ffts_plan_t*, const void*, void*); diff --git a/src/neon.s b/src/neon.s index 1e7fb92..9b6ccab 100644 --- a/src/neon.s +++ b/src/neon.s @@ -638,11 +638,11 @@ neon_end: .align 4 #ifdef __APPLE__ - .globl _neon_transpose -_neon_transpose: + .globl _neon_transpose4 +_neon_transpose4: #else - .globl neon_transpose -neon_transpose: + .globl neon_transpose4 +neon_transpose4: #endif push {r4-r6, lr} mov r5, r3 @@ -676,11 +676,11 @@ neon_transpose: .align 4 #ifdef __APPLE__ - .globl _neon_transpose_to_buf -_neon_transpose_to_buf: + .globl _neon_transpose8 +_neon_transpose8: #else - .globl neon_transpose_to_buf -neon_transpose_to_buf: + .globl neon_transpose8 +neon_transpose8: #endif push {r4-r8, lr} vpush {q4-q7} -- cgit v1.1