summaryrefslogtreecommitdiffstats
path: root/src/ffts_nd.c
diff options
context:
space:
mode:
authorAnthony Blake <anthonix@me.com>2012-12-13 23:39:18 +1300
committerAnthony Blake <anthonix@me.com>2012-12-13 23:39:18 +1300
commitd063c809bbffcbffc6276967e52d3107210d3e79 (patch)
tree120c8375af46e89e712ad775212d76609b77a108 /src/ffts_nd.c
parent9aa63fdfd340a2fb1cb72fcf95ff3241487df273 (diff)
downloadffts-d063c809bbffcbffc6276967e52d3107210d3e79.zip
ffts-d063c809bbffcbffc6276967e52d3107210d3e79.tar.gz
misc
Diffstat (limited to 'src/ffts_nd.c')
-rw-r--r--src/ffts_nd.c38
1 files changed, 37 insertions, 1 deletions
diff --git a/src/ffts_nd.c b/src/ffts_nd.c
index 78f16c1..af1423d 100644
--- a/src/ffts_nd.c
+++ b/src/ffts_nd.c
@@ -58,10 +58,45 @@ void ffts_free_nd(ffts_plan_t *p) {
free(p->transpose_buf);
free(p);
}
-
+#define TSIZE 32
+#include <string.h>
void ffts_transpose(uint64_t *in, uint64_t *out, int w, int h, uint64_t *buf) {
#ifdef __ARM_NEON__
+ uint64_t tmp[TSIZE*TSIZE] __attribute__((aligned(64)));
+ int tx, ty;
+ int x, y;
+ int tw = w / TSIZE;
+ int th = h / TSIZE;
+ for (ty=0;ty<th;ty++) {
+ for (tx=0;tx<tw;tx++) {
+ uint64_t *ip0 = in + w*ty*TSIZE + tx * TSIZE;
+ uint64_t *op0 = tmp;
+
+ // Copy/transpose to tmp
+ for (y=0;y<TSIZE;y+=1) {
+ uint64_t *ip = ip0;
+ uint64_t *op = op0;
+
+ ip0 += w;
+ op0 += 1;
+
+ for (x=0;x<TSIZE;x+=1) {
+ op[x*TSIZE] = ip[x];
+ }
+ }
+
+ // Copy from tmp to output
+ op0 = out + w*tx*TSIZE + ty * TSIZE;
+ ip0 = tmp;
+ for (y=0;y<TSIZE;y+=1) {
+ memcpy(op0, ip0, TSIZE * sizeof(*ip0));
+ op0 += w;
+ ip0 += TSIZE;
+ }
+ }
+ }
+/*
size_t i,j,k;
int linebytes = w*8;
@@ -129,6 +164,7 @@ void ffts_transpose(uint64_t *in, uint64_t *out, int w, int h, uint64_t *buf) {
// out[i*h + j] = in[j*w + i];
}
}
+ */
#else
size_t i,j;
for(i=0;i<w;i+=2) {
OpenPOWER on IntegriCloud