summaryrefslogtreecommitdiffstats
path: root/src/macros-alpha.h
diff options
context:
space:
mode:
authorJukka Ojanen <jukka.ojanen@linkotec.net>2014-12-07 12:05:26 +0200
committerJukka Ojanen <jukka.ojanen@linkotec.net>2014-12-07 12:05:26 +0200
commitd5496f9a78bb45192b176bd32d4b81c72fb576dd (patch)
tree4118ab4bc56a7b4a362fcf25df7d56315d6cf09d /src/macros-alpha.h
parent91ac7ca9420aa77e79bd533b15536c8f98b162c5 (diff)
downloadffts-d5496f9a78bb45192b176bd32d4b81c72fb576dd.zip
ffts-d5496f9a78bb45192b176bd32d4b81c72fb576dd.tar.gz
Fix issue #29 "Make FFTS work on all architectures"
Modify macros-alpha.h to provide scalar operations on all platforms. Using union and memcpy to avoid strict aliasing issues.
Diffstat (limited to 'src/macros-alpha.h')
-rw-r--r--src/macros-alpha.h214
1 files changed, 121 insertions, 93 deletions
diff --git a/src/macros-alpha.h b/src/macros-alpha.h
index be5ec20..f4efaf8 100644
--- a/src/macros-alpha.h
+++ b/src/macros-alpha.h
@@ -1,10 +1,10 @@
/*
-
+
This file is part of FFTS -- The Fastest Fourier Transform in the South
-
- Copyright (c) 2013, Michael J. Cree <mcree@orcon.net.nz>
+
+ Copyright (c) 2013, Michael J. Cree <mcree@orcon.net.nz>
Copyright (c) 2012, 2013, Anthony M. Blake <amb@anthonix.com>
-
+
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -31,177 +31,205 @@
*/
-#ifndef __MACROS_ALPHA_H__
-#define __MACROS_ALPHA_H__
+#ifndef FFTS_MACROS_ALPHA_H
+#define FFTS_MACROS_ALPHA_H
-#include <math.h>
+#include <string.h>
-#ifdef __alpha__
-#define restrict
-#endif
-
-typedef struct {float r1, i1, r2, i2;} V;
+typedef union {
+ struct {
+ float r1;
+ float i1;
+ float r2;
+ float i2;
+ } r;
+ uint32_t u[4];
+} V;
#define FFTS_MALLOC(d,a) malloc(d)
#define FFTS_FREE(d) free(d)
-#define VLIT4(f3,f2,f1,f0) ((V){f0,f1,f2,f3})
-
-static inline V VADD(V x, V y)
+static FFTS_ALWAYS_INLINE V VLIT4(float f3, float f2, float f1, float f0)
{
V z;
- z.r1 = x.r1 + y.r1;
- z.i1 = x.i1 + y.i1;
- z.r2 = x.r2 + y.r2;
- z.i2 = x.i2 + y.i2;
+
+ z.r.r1 = f0;
+ z.r.i1 = f1;
+ z.r.r2 = f2;
+ z.r.i2 = f3;
+
return z;
}
-
-static inline V VSUB(V x, V y)
+static FFTS_ALWAYS_INLINE V VADD(V x, V y)
{
V z;
- z.r1 = x.r1 - y.r1;
- z.i1 = x.i1 - y.i1;
- z.r2 = x.r2 - y.r2;
- z.i2 = x.i2 - y.i2;
+
+ z.r.r1 = x.r.r1 + y.r.r1;
+ z.r.i1 = x.r.i1 + y.r.i1;
+ z.r.r2 = x.r.r2 + y.r.r2;
+ z.r.i2 = x.r.i2 + y.r.i2;
+
return z;
}
-
-static inline V VMUL(V x, V y)
+static FFTS_ALWAYS_INLINE V VSUB(V x, V y)
{
V z;
- z.r1 = x.r1 * y.r1;
- z.i1 = x.i1 * y.i1;
- z.r2 = x.r2 * y.r2;
- z.i2 = x.i2 * y.i2;
+
+ z.r.r1 = x.r.r1 - y.r.r1;
+ z.r.i1 = x.r.i1 - y.r.i1;
+ z.r.r2 = x.r.r2 - y.r.r2;
+ z.r.i2 = x.r.i2 - y.r.i2;
+
return z;
}
-static inline V VXOR(V x, V y)
+static FFTS_ALWAYS_INLINE V VMUL(V x, V y)
{
- V r;
- r.r1 = (uint32_t)x.r1 ^ (uint32_t)y.r1;
- r.i1 = (uint32_t)x.i1 ^ (uint32_t)y.i1;
- r.r2 = (uint32_t)x.r2 ^ (uint32_t)y.r2;
- r.i2 = (uint32_t)x.i2 ^ (uint32_t)y.i2;
- return r;
+ V z;
+
+ z.r.r1 = x.r.r1 * y.r.r1;
+ z.r.i1 = x.r.i1 * y.r.i1;
+ z.r.r2 = x.r.r2 * y.r.r2;
+ z.r.i2 = x.r.i2 * y.r.i2;
+
+ return z;
}
-static inline V VSWAPPAIRS(V x)
+static FFTS_ALWAYS_INLINE V VXOR(V x, V y)
{
V z;
- z.r1 = x.i1;
- z.i1 = x.r1;
- z.r2 = x.i2;
- z.i2 = x.r2;
+
+ z.u[0] = x.u[0] ^ y.u[0];
+ z.u[1] = x.u[1] ^ y.u[1];
+ z.u[2] = x.u[2] ^ y.u[2];
+ z.u[3] = x.u[3] ^ y.u[3];
+
return z;
}
+static FFTS_ALWAYS_INLINE V VSWAPPAIRS(V x)
+{
+ V z;
+
+ z.r.r1 = x.r.i1;
+ z.r.i1 = x.r.r1;
+ z.r.r2 = x.r.i2;
+ z.r.i2 = x.r.r2;
+
+ return z;
+}
-static inline V VBLEND(V x, V y)
+static FFTS_ALWAYS_INLINE V VBLEND(V x, V y)
{
V z;
- z.r1 = x.r1;
- z.i1 = x.i1;
- z.r2 = y.r2;
- z.i2 = y.i2;
+
+ z.r.r1 = x.r.r1;
+ z.r.i1 = x.r.i1;
+ z.r.r2 = y.r.r2;
+ z.r.i2 = y.r.i2;
+
return z;
}
-static inline V VUNPACKHI(V x, V y)
+static FFTS_ALWAYS_INLINE V VUNPACKHI(V x, V y)
{
V z;
- z.r1 = x.r2;
- z.i1 = x.i2;
- z.r2 = y.r2;
- z.i2 = y.i2;
+
+ z.r.r1 = x.r.r2;
+ z.r.i1 = x.r.i2;
+ z.r.r2 = y.r.r2;
+ z.r.i2 = y.r.i2;
+
return z;
}
-static inline V VUNPACKLO(V x, V y)
+static FFTS_ALWAYS_INLINE V VUNPACKLO(V x, V y)
{
V z;
- z.r1 = x.r1;
- z.i1 = x.i1;
- z.r2 = y.r1;
- z.i2 = y.i1;
+
+ z.r.r1 = x.r.r1;
+ z.r.i1 = x.r.i1;
+ z.r.r2 = y.r.r1;
+ z.r.i2 = y.r.i1;
+
return z;
}
-static inline V VDUPRE(V x)
+static FFTS_ALWAYS_INLINE V VDUPRE(V x)
{
V z;
- z.r1 = x.r1;
- z.i1 = x.r1;
- z.r2 = x.r2;
- z.i2 = x.r2;
+
+ z.r.r1 = x.r.r1;
+ z.r.i1 = x.r.r1;
+ z.r.r2 = x.r.r2;
+ z.r.i2 = x.r.r2;
+
return z;
}
-static inline V VDUPIM(V x)
+static FFTS_ALWAYS_INLINE V VDUPIM(V x)
{
V z;
- z.r1 = x.i1;
- z.i1 = x.i1;
- z.r2 = x.i2;
- z.i2 = x.i2;
+
+ z.r.r1 = x.r.i1;
+ z.r.i1 = x.r.i1;
+ z.r.r2 = x.r.i2;
+ z.r.i2 = x.r.i2;
+
return z;
}
-static inline V IMUL(V d, V re, V im)
+static FFTS_ALWAYS_INLINE V IMUL(V d, V re, V im)
{
re = VMUL(re, d);
im = VMUL(im, VSWAPPAIRS(d));
- return VSUB(re, im);
+ return VSUB(re, im);
}
-
-static inline V IMULJ(V d, V re, V im)
+static FFTS_ALWAYS_INLINE V IMULJ(V d, V re, V im)
{
re = VMUL(re, d);
im = VMUL(im, VSWAPPAIRS(d));
return VADD(re, im);
}
-static inline V MULI(int inv, V x)
+static FFTS_ALWAYS_INLINE V MULI(int inv, V x)
{
V z;
if (inv) {
- z.r1 = -x.r1;
- z.i1 = x.i1;
- z.r2 = -x.r2;
- z.i2 = x.i2;
- }else{
- z.r1 = x.r1;
- z.i1 = -x.i1;
- z.r2 = x.r2;
- z.i2 = -x.i2;
+ z.r.r1 = -x.r.r1;
+ z.r.i1 = x.r.i1;
+ z.r.r2 = -x.r.r2;
+ z.r.i2 = x.r.i2;
+ } else {
+ z.r.r1 = x.r.r1;
+ z.r.i1 = -x.r.i1;
+ z.r.r2 = x.r.r2;
+ z.r.i2 = -x.r.i2;
}
+
return z;
}
-
-static inline V IMULI(int inv, V x)
+static FFTS_ALWAYS_INLINE V IMULI(int inv, V x)
{
return VSWAPPAIRS(MULI(inv, x));
}
-
-static inline V VLD(const void *s)
+static FFTS_ALWAYS_INLINE V VLD(const void *s)
{
- V *d = (V *)s;
- return *d;
+ V z;
+ memcpy(&z, s, sizeof(z));
+ return z;
}
-
-static inline void VST(void *d, V s)
+static FFTS_ALWAYS_INLINE void VST(void *d, V s)
{
- V *r = (V *)d;
+ V *r = (V*) d;
*r = s;
}
-#endif
-// vim: set autoindent noexpandtab tabstop=3 shiftwidth=3:
+#endif /* FFTS_MACROS_ALPHA_H */ \ No newline at end of file
OpenPOWER on IntegriCloud