diff options
author | Jukka Ojanen <jukka.ojanen@linkotec.net> | 2014-12-07 12:05:26 +0200 |
---|---|---|
committer | Jukka Ojanen <jukka.ojanen@linkotec.net> | 2014-12-07 12:05:26 +0200 |
commit | d5496f9a78bb45192b176bd32d4b81c72fb576dd (patch) | |
tree | 4118ab4bc56a7b4a362fcf25df7d56315d6cf09d /src | |
parent | 91ac7ca9420aa77e79bd533b15536c8f98b162c5 (diff) | |
download | ffts-d5496f9a78bb45192b176bd32d4b81c72fb576dd.zip ffts-d5496f9a78bb45192b176bd32d4b81c72fb576dd.tar.gz |
Fix issue #29 "Make FFTS work on all architectures"
Modify macros-alpha.h to provide scalar operations on all platforms.
Using union and memcpy to avoid strict aliasing issues.
Diffstat (limited to 'src')
-rw-r--r-- | src/macros-alpha.h | 214 |
1 files changed, 121 insertions, 93 deletions
diff --git a/src/macros-alpha.h b/src/macros-alpha.h index be5ec20..f4efaf8 100644 --- a/src/macros-alpha.h +++ b/src/macros-alpha.h @@ -1,10 +1,10 @@ /* - + This file is part of FFTS -- The Fastest Fourier Transform in the South - - Copyright (c) 2013, Michael J. Cree <mcree@orcon.net.nz> + + Copyright (c) 2013, Michael J. Cree <mcree@orcon.net.nz> Copyright (c) 2012, 2013, Anthony M. Blake <amb@anthonix.com> - + All rights reserved. Redistribution and use in source and binary forms, with or without @@ -31,177 +31,205 @@ */ -#ifndef __MACROS_ALPHA_H__ -#define __MACROS_ALPHA_H__ +#ifndef FFTS_MACROS_ALPHA_H +#define FFTS_MACROS_ALPHA_H -#include <math.h> +#include <string.h> -#ifdef __alpha__ -#define restrict -#endif - -typedef struct {float r1, i1, r2, i2;} V; +typedef union { + struct { + float r1; + float i1; + float r2; + float i2; + } r; + uint32_t u[4]; +} V; #define FFTS_MALLOC(d,a) malloc(d) #define FFTS_FREE(d) free(d) -#define VLIT4(f3,f2,f1,f0) ((V){f0,f1,f2,f3}) - -static inline V VADD(V x, V y) +static FFTS_ALWAYS_INLINE V VLIT4(float f3, float f2, float f1, float f0) { V z; - z.r1 = x.r1 + y.r1; - z.i1 = x.i1 + y.i1; - z.r2 = x.r2 + y.r2; - z.i2 = x.i2 + y.i2; + + z.r.r1 = f0; + z.r.i1 = f1; + z.r.r2 = f2; + z.r.i2 = f3; + return z; } - -static inline V VSUB(V x, V y) +static FFTS_ALWAYS_INLINE V VADD(V x, V y) { V z; - z.r1 = x.r1 - y.r1; - z.i1 = x.i1 - y.i1; - z.r2 = x.r2 - y.r2; - z.i2 = x.i2 - y.i2; + + z.r.r1 = x.r.r1 + y.r.r1; + z.r.i1 = x.r.i1 + y.r.i1; + z.r.r2 = x.r.r2 + y.r.r2; + z.r.i2 = x.r.i2 + y.r.i2; + return z; } - -static inline V VMUL(V x, V y) +static FFTS_ALWAYS_INLINE V VSUB(V x, V y) { V z; - z.r1 = x.r1 * y.r1; - z.i1 = x.i1 * y.i1; - z.r2 = x.r2 * y.r2; - z.i2 = x.i2 * y.i2; + + z.r.r1 = x.r.r1 - y.r.r1; + z.r.i1 = x.r.i1 - y.r.i1; + z.r.r2 = x.r.r2 - y.r.r2; + z.r.i2 = x.r.i2 - y.r.i2; + return z; } -static inline V VXOR(V x, V y) +static FFTS_ALWAYS_INLINE V VMUL(V x, V y) { - V r; - r.r1 = (uint32_t)x.r1 ^ (uint32_t)y.r1; - r.i1 = (uint32_t)x.i1 ^ (uint32_t)y.i1; - r.r2 = (uint32_t)x.r2 ^ (uint32_t)y.r2; - r.i2 = (uint32_t)x.i2 ^ (uint32_t)y.i2; - return r; + V z; + + z.r.r1 = x.r.r1 * y.r.r1; + z.r.i1 = x.r.i1 * y.r.i1; + z.r.r2 = x.r.r2 * y.r.r2; + z.r.i2 = x.r.i2 * y.r.i2; + + return z; } -static inline V VSWAPPAIRS(V x) +static FFTS_ALWAYS_INLINE V VXOR(V x, V y) { V z; - z.r1 = x.i1; - z.i1 = x.r1; - z.r2 = x.i2; - z.i2 = x.r2; + + z.u[0] = x.u[0] ^ y.u[0]; + z.u[1] = x.u[1] ^ y.u[1]; + z.u[2] = x.u[2] ^ y.u[2]; + z.u[3] = x.u[3] ^ y.u[3]; + return z; } +static FFTS_ALWAYS_INLINE V VSWAPPAIRS(V x) +{ + V z; + + z.r.r1 = x.r.i1; + z.r.i1 = x.r.r1; + z.r.r2 = x.r.i2; + z.r.i2 = x.r.r2; + + return z; +} -static inline V VBLEND(V x, V y) +static FFTS_ALWAYS_INLINE V VBLEND(V x, V y) { V z; - z.r1 = x.r1; - z.i1 = x.i1; - z.r2 = y.r2; - z.i2 = y.i2; + + z.r.r1 = x.r.r1; + z.r.i1 = x.r.i1; + z.r.r2 = y.r.r2; + z.r.i2 = y.r.i2; + return z; } -static inline V VUNPACKHI(V x, V y) +static FFTS_ALWAYS_INLINE V VUNPACKHI(V x, V y) { V z; - z.r1 = x.r2; - z.i1 = x.i2; - z.r2 = y.r2; - z.i2 = y.i2; + + z.r.r1 = x.r.r2; + z.r.i1 = x.r.i2; + z.r.r2 = y.r.r2; + z.r.i2 = y.r.i2; + return z; } -static inline V VUNPACKLO(V x, V y) +static FFTS_ALWAYS_INLINE V VUNPACKLO(V x, V y) { V z; - z.r1 = x.r1; - z.i1 = x.i1; - z.r2 = y.r1; - z.i2 = y.i1; + + z.r.r1 = x.r.r1; + z.r.i1 = x.r.i1; + z.r.r2 = y.r.r1; + z.r.i2 = y.r.i1; + return z; } -static inline V VDUPRE(V x) +static FFTS_ALWAYS_INLINE V VDUPRE(V x) { V z; - z.r1 = x.r1; - z.i1 = x.r1; - z.r2 = x.r2; - z.i2 = x.r2; + + z.r.r1 = x.r.r1; + z.r.i1 = x.r.r1; + z.r.r2 = x.r.r2; + z.r.i2 = x.r.r2; + return z; } -static inline V VDUPIM(V x) +static FFTS_ALWAYS_INLINE V VDUPIM(V x) { V z; - z.r1 = x.i1; - z.i1 = x.i1; - z.r2 = x.i2; - z.i2 = x.i2; + + z.r.r1 = x.r.i1; + z.r.i1 = x.r.i1; + z.r.r2 = x.r.i2; + z.r.i2 = x.r.i2; + return z; } -static inline V IMUL(V d, V re, V im) +static FFTS_ALWAYS_INLINE V IMUL(V d, V re, V im) { re = VMUL(re, d); im = VMUL(im, VSWAPPAIRS(d)); - return VSUB(re, im); + return VSUB(re, im); } - -static inline V IMULJ(V d, V re, V im) +static FFTS_ALWAYS_INLINE V IMULJ(V d, V re, V im) { re = VMUL(re, d); im = VMUL(im, VSWAPPAIRS(d)); return VADD(re, im); } -static inline V MULI(int inv, V x) +static FFTS_ALWAYS_INLINE V MULI(int inv, V x) { V z; if (inv) { - z.r1 = -x.r1; - z.i1 = x.i1; - z.r2 = -x.r2; - z.i2 = x.i2; - }else{ - z.r1 = x.r1; - z.i1 = -x.i1; - z.r2 = x.r2; - z.i2 = -x.i2; + z.r.r1 = -x.r.r1; + z.r.i1 = x.r.i1; + z.r.r2 = -x.r.r2; + z.r.i2 = x.r.i2; + } else { + z.r.r1 = x.r.r1; + z.r.i1 = -x.r.i1; + z.r.r2 = x.r.r2; + z.r.i2 = -x.r.i2; } + return z; } - -static inline V IMULI(int inv, V x) +static FFTS_ALWAYS_INLINE V IMULI(int inv, V x) { return VSWAPPAIRS(MULI(inv, x)); } - -static inline V VLD(const void *s) +static FFTS_ALWAYS_INLINE V VLD(const void *s) { - V *d = (V *)s; - return *d; + V z; + memcpy(&z, s, sizeof(z)); + return z; } - -static inline void VST(void *d, V s) +static FFTS_ALWAYS_INLINE void VST(void *d, V s) { - V *r = (V *)d; + V *r = (V*) d; *r = s; } -#endif -// vim: set autoindent noexpandtab tabstop=3 shiftwidth=3: +#endif /* FFTS_MACROS_ALPHA_H */
\ No newline at end of file |