summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAnthony Blake <anthonix@me.com>2012-10-31 17:14:54 +1300
committerAnthony Blake <anthonix@me.com>2012-10-31 17:14:54 +1300
commit383e8e34fc65553ce0103bb2d28e92587d530f87 (patch)
treedd17c41a3cec56bae2cc087d4789b6e2d3a983ae
parenta6f60ef7f723745bd20e8a70f18fe1964c077714 (diff)
downloadffts-383e8e34fc65553ce0103bb2d28e92587d530f87.zip
ffts-383e8e34fc65553ce0103bb2d28e92587d530f87.tar.gz
Square 2D works
-rw-r--r--src/Makefile.am2
-rw-r--r--src/Makefile.in10
-rw-r--r--src/ffts.h11
-rw-r--r--src/ffts_nd.c80
-rw-r--r--src/ffts_nd.h4
5 files changed, 95 insertions, 12 deletions
diff --git a/src/Makefile.am b/src/Makefile.am
index 8e8a94d..80c4dcb 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -2,7 +2,7 @@
lib_LTLIBRARIES = libffts.la
-libffts_la_SOURCES = ffts.c patterns.c codegen.c
+libffts_la_SOURCES = ffts.c ffts_nd.c patterns.c codegen.c
libffts_includedir=$(includedir)/ffts
libffts_include_HEADERS = ../include/ffts.h
diff --git a/src/Makefile.in b/src/Makefile.in
index f9b8bb5..ee42f18 100644
--- a/src/Makefile.in
+++ b/src/Makefile.in
@@ -95,10 +95,11 @@ am__installdirs = "$(DESTDIR)$(libdir)" \
"$(DESTDIR)$(libffts_includedir)"
LTLIBRARIES = $(lib_LTLIBRARIES)
libffts_la_LIBADD =
-am__libffts_la_SOURCES_DIST = ffts.c patterns.c codegen.c neon.s sse.s
+am__libffts_la_SOURCES_DIST = ffts.c ffts_nd.c patterns.c codegen.c \
+ neon.s sse.s
@HAVE_NEON_TRUE@am__objects_1 = neon.lo
@HAVE_NEON_FALSE@@HAVE_SSE_TRUE@am__objects_2 = sse.lo
-am_libffts_la_OBJECTS = ffts.lo patterns.lo codegen.lo \
+am_libffts_la_OBJECTS = ffts.lo ffts_nd.lo patterns.lo codegen.lo \
$(am__objects_1) $(am__objects_2)
libffts_la_OBJECTS = $(am_libffts_la_OBJECTS)
DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
@@ -250,8 +251,8 @@ top_build_prefix = @top_build_prefix@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
lib_LTLIBRARIES = libffts.la
-libffts_la_SOURCES = ffts.c patterns.c codegen.c $(am__append_1) \
- $(am__append_2)
+libffts_la_SOURCES = ffts.c ffts_nd.c patterns.c codegen.c \
+ $(am__append_1) $(am__append_2)
libffts_includedir = $(includedir)/ffts
libffts_include_HEADERS = ../include/ffts.h
all: all-am
@@ -333,6 +334,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/codegen.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ffts.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ffts_nd.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/patterns.Plo@am__quote@
.c.o:
diff --git a/src/ffts.h b/src/ffts.h
index ed61700..d0abe29 100644
--- a/src/ffts.h
+++ b/src/ffts.h
@@ -69,15 +69,17 @@ struct _ffts_plan_t {
void *lastlut;
transform_index_t *transforms;
//transform_func_t transform;
- void (*transform)(struct _ffts_plan_t * , const float * , float * );
+ void (*transform)(struct _ffts_plan_t * , const void * , void * );
void *transform_base;
size_t transform_size;
void *constants;
// multi-dimensional stuff:
- struct _ffts_plan_t *plans;
+ struct _ffts_plan_t **plans;
int rank;
- size_t *Ns;
+ size_t *Ns, *Ms;
+ void *buf;
+
void (*destroy)(struct _ffts_plan_t *);
@@ -85,4 +87,7 @@ struct _ffts_plan_t {
typedef struct _ffts_plan_t ffts_plan_t;
+void ffts_free(ffts_plan_t *);
+ffts_plan_t *ffts_init_1d(size_t N, int sign);
+void ffts_execute(ffts_plan_t *, const void *, void *);
#endif
diff --git a/src/ffts_nd.c b/src/ffts_nd.c
index 51915aa..a6c66bb 100644
--- a/src/ffts_nd.c
+++ b/src/ffts_nd.c
@@ -36,23 +36,95 @@
void ffts_free_nd(ffts_plan_t *p) {
free(p->Ns);
+ free(p->Ms);
+
+ int i;
+ for(i=0;i<p->rank;i++) {
+ ffts_free(p->plans[i]);
+ }
+
+ free(p->plans);
+ free(p->buf);
+ free(p);
+}
+
+void ffts_transpose(void *d, int n1, int n2) {
+ uint64_t *v = (uint64_t *)d;
+
+ int i, j;
+ for (i = 0; i<n2; i++) {
+ for (j = i+1; j<n1; j++) {
+ uint64_t temp = v[j*n2 + i];
+ v[j*n2 + i] = v[i*n1 + j];
+ v[i*n1 + j] = temp;
+// uint64_t temp = v[i*n2 + j];
+// v[i*n2 + j] = v[j*n1 + i];
+// v[j*n1 + i] = temp;
+ }
+ }
}
void ffts_execute_nd(ffts_plan_t *p, const void * in, void * out) {
+ printf("Exe ND\n");
+ uint64_t *din = in;
+ uint64_t *buf0, *buf1;
+
+ if(p->rank & 1) {
+ buf1 = out;
+ buf0 = p->buf;
+ }else{
+ buf1 = p->buf;
+ buf0 = out;
+ }
+
+ size_t i,j;
+ for(i=0;i<p->Ms[0];i++) {
+ ffts_execute(p->plans[0], din + (i * p->Ns[0]), buf1 + (i * p->Ns[0]));
+ }
+ ffts_transpose(buf1, p->Ms[0], p->Ns[0]);
+
+ for(i=1;i<p->rank;i++) {
+ printf("t %zu\n", i);
+ for(j=0;j<p->Ms[i];j++) {
+ ffts_execute(p->plans[i], buf1 + (j * p->Ns[i]), buf0 + (j * p->Ns[i]));
+ }
+ ffts_transpose(buf0, p->Ms[i], p->Ns[i]);
+
+ void *b = buf0;
+ buf0 = buf1;
+ buf1 = b;
+ }
}
ffts_plan_t *ffts_init_nd(int rank, size_t *Ns, int sign) {
+ size_t vol = 1;
+
ffts_plan_t *p = malloc(sizeof(ffts_plan_t));
- p->transform = ffts_execute_nd;
- p->destroy = ffts_free_nd;
+ p->transform = &ffts_execute_nd;
+ p->destroy = &ffts_free_nd;
p->rank = rank;
-
p->Ns = malloc(sizeof(size_t) * rank);
-
+ p->Ms = malloc(sizeof(size_t) * rank);
+ p->plans = malloc(sizeof(ffts_plan_t **) * rank);
+ printf("rank = %d\n", rank);
+ int i;
+ for(i=0;i<rank;i++) {
+ p->Ns[i] = Ns[i];
+ printf("N %zu\n", p->Ns[i]);
+ vol *= Ns[i];
+ }
+ printf("VOL %zu\n", vol);
+ p->buf = malloc(sizeof(float) * 2 * vol);
+
+ for(i=0;i<rank;i++) {
+ p->Ms[i] = vol / p->Ns[i];
+ printf("M N %zu %zu\n", p->Ms[i], p->Ns[i]);
+ p->plans[i] = ffts_init_1d(p->Ns[i], sign);
+ }
return p;
}
diff --git a/src/ffts_nd.h b/src/ffts_nd.h
index 3d3abd9..2eef287 100644
--- a/src/ffts_nd.h
+++ b/src/ffts_nd.h
@@ -34,6 +34,10 @@
#ifndef __FFTS_ND_H__
#define __FFTS_ND_H__
+#include <stdint.h>
+#include <stddef.h>
+#include <stdio.h>
+
#include "ffts.h"
OpenPOWER on IntegriCloud