summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJukka Ojanen <jukka.ojanen@linkotec.net>2016-03-28 23:45:33 +0300
committerJukka Ojanen <jukka.ojanen@linkotec.net>2016-03-28 23:45:33 +0300
commitab556f87890e483d1a0814096cad491de270b6ad (patch)
treeabc04f8e60cbbf3fa64938d9648082d137410fa1
parent7f74c87546f97b8d5864e1f20da54d226de9030b (diff)
downloadffts-ab556f87890e483d1a0814096cad491de270b6ad.zip
ffts-ab556f87890e483d1a0814096cad491de270b6ad.tar.gz
Rename neon_transpose to neon_transpose4, 4x4 tiled matrix transpose.
Rename neon_transpose_to_buf to neon_transpose8, 8x8 tiled matrix transpose.
-rw-r--r--src/ffts_nd.c4
-rw-r--r--src/neon.h4
-rw-r--r--src/neon.s16
3 files changed, 12 insertions, 12 deletions
diff --git a/src/ffts_nd.c b/src/ffts_nd.c
index c964d7f..ebce101 100644
--- a/src/ffts_nd.c
+++ b/src/ffts_nd.c
@@ -92,13 +92,13 @@ ffts_transpose(uint64_t *in, uint64_t *out, int w, int h)
{
#ifdef HAVE_NEON
#if 0
- neon_transpose(in, out, w, h);
+ neon_transpose4(in, out, w, h);
#else
size_t i, j;
for (j = 0; j < h; j += 8) {
for (i = 0; i < w; i += 8) {
- neon_transpose_to_buf(in + j*w + i, out + i*h + j, w);
+ neon_transpose8(in + j*w + i, out + i*h + j, w, h);
}
}
#endif
diff --git a/src/neon.h b/src/neon.h
index 66dcd4b..f719159 100644
--- a/src/neon.h
+++ b/src/neon.h
@@ -45,8 +45,8 @@ void neon_eo();
void neon_oe();
void neon_end();
-void neon_transpose(uint64_t *in, uint64_t *out, int w, int h);
-void neon_transpose_to_buf(uint64_t *in, uint64_t *out, int w);
+void neon_transpose4(uint64_t *in, uint64_t *out, int w, int h);
+void neon_transpose8(uint64_t *in, uint64_t *out, int w, int h);
void neon_static_e_f(ffts_plan_t*, const void*, void*);
void neon_static_o_f(ffts_plan_t*, const void*, void*);
diff --git a/src/neon.s b/src/neon.s
index 1e7fb92..9b6ccab 100644
--- a/src/neon.s
+++ b/src/neon.s
@@ -638,11 +638,11 @@ neon_end:
.align 4
#ifdef __APPLE__
- .globl _neon_transpose
-_neon_transpose:
+ .globl _neon_transpose4
+_neon_transpose4:
#else
- .globl neon_transpose
-neon_transpose:
+ .globl neon_transpose4
+neon_transpose4:
#endif
push {r4-r6, lr}
mov r5, r3
@@ -676,11 +676,11 @@ neon_transpose:
.align 4
#ifdef __APPLE__
- .globl _neon_transpose_to_buf
-_neon_transpose_to_buf:
+ .globl _neon_transpose8
+_neon_transpose8:
#else
- .globl neon_transpose_to_buf
-neon_transpose_to_buf:
+ .globl neon_transpose8
+neon_transpose8:
#endif
push {r4-r8, lr}
vpush {q4-q7}
OpenPOWER on IntegriCloud