summaryrefslogtreecommitdiffstats
path: root/src/macros-altivec.h
diff options
context:
space:
mode:
authorAnthony Blake <anthonix@me.com>2013-04-22 17:21:47 +1200
committerAnthony Blake <anthonix@me.com>2013-04-22 17:21:47 +1200
commit752031ba2441f5fef3617b05b9cd2d36cb3b30c4 (patch)
tree6f96efd68262ea89a2948b82255c05536b32f6d5 /src/macros-altivec.h
parentbd43284b757bd62f9d9f1f1108703b134efc16d7 (diff)
downloadffts-752031ba2441f5fef3617b05b9cd2d36cb3b30c4.zip
ffts-752031ba2441f5fef3617b05b9cd2d36cb3b30c4.tar.gz
Included new files I forgot to commit earlier -- thanks Michael Cree
Diffstat (limited to 'src/macros-altivec.h')
-rw-r--r--src/macros-altivec.h137
1 files changed, 137 insertions, 0 deletions
diff --git a/src/macros-altivec.h b/src/macros-altivec.h
new file mode 100644
index 0000000..0d148a5
--- /dev/null
+++ b/src/macros-altivec.h
@@ -0,0 +1,137 @@
+/*
+
+ This file is part of FFTS -- The Fastest Fourier Transform in the South
+
+ Copyright (c) 2013, Michael J. Cree <mcree@orcon.net.nz>
+ Copyright (c) 2012, 2013, Anthony M. Blake <amb@anthonix.com>
+
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of the organization nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
+ DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+*/
+
+#ifndef __MACROS_ALTIVEC_H__
+#define __MACROS_ALTIVEC_H__
+
+#include <math.h>
+#include <altivec.h>
+
+#define restrict
+
+typedef vector float V;
+typedef vector unsigned char VUC;
+
+#ifdef __apple__
+#define FFTS_MALLOC(d,a) vec_malloc(d)
+#define FFTS_FREE(d) vec_free(d)
+#else
+/* It appears vec_malloc() and friends are not implemented on Linux */
+#include <malloc.h>
+#define FFTS_MALLOC(d,a) memalign(16,d)
+#define FFTS_FREE(d) free(d)
+#endif
+
+#define VLIT4(f0,f1,f2,f3) ((V){f0, f1, f2, f3})
+
+#define VADD(x,y) vec_add(x,y)
+#define VSUB(x,y) vec_sub(x,y)
+#define VMUL(x,y) vec_madd(x,y,(V){0})
+#define VMULADD(x,y,z) vec_madd(x,y,z)
+#define VNMULSUB(x,y,z) vec_nmsub(x,y,z)
+#define VXOR(x,y) vec_xor((x),(y))
+#define VSWAPPAIRS(x) \
+ vec_perm(x,x,(VUC){0x04,0x05,0x06,0x07,0x00,0x01,0x02,0x03, \
+ 0x0c,0x0d,0x0e,0x0f,0x08,0x09,0x0a,0x0b})
+
+#define VBLEND(x,y) \
+ vec_perm(x,y,(VUC){0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, \
+ 0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f})
+
+#define VUNPACKHI(x,y) \
+ vec_perm(x,y,(VUC){0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f, \
+ 0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f})
+
+#define VUNPACKLO(x,y) \
+ vec_perm(x,y,(VUC){0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, \
+ 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17})
+
+#define VDUPRE(x) \
+ vec_perm(x,x,(VUC){0x00,0x01,0x02,0x03,0x00,0x01,0x02,0x03, \
+ 0x18,0x19,0x1a,0x1b,0x18,0x19,0x1a,0x1b})
+
+#define VDUPIM(x) \
+ vec_perm(x,x,(VUC){0x04,0x05,0x06,0x07,0x04,0x05,0x06,0x07, \
+ 0x1c,0x1d,0x1e,0x1f,0x1c,0x1d,0x1e,0x1f})
+
+
+static inline V IMUL(V d, V re, V im)
+{
+ im = VMUL(im, VSWAPPAIRS(d));
+ re = VMUL(re, d);
+ return VSUB(re, im);
+}
+
+
+static inline V IMULJ(V d, V re, V im)
+{
+ im = VMUL(im, VSWAPPAIRS(d));
+ return VMULADD(re, d, im);
+}
+
+#ifndef __GNUC__
+/* gcc (4.6 and 4.7) ICEs on this code! */
+static inline V MULI(int inv, V x)
+{
+ return VXOR(x, inv ? VLIT4(-0.0f,0.0f,-0.0f,0.0f) : VLIT4(0.0f,-0.0f,0.0f,-0.0f));
+}
+#else
+/* but compiles this fine... */
+static inline V MULI(int inv, V x)
+{
+ V t;
+ t = inv ? VLIT4(-0.0f,0.0f,-0.0f,0.0f) : VLIT4(0.0f,-0.0f,0.0f,-0.0f);
+ return VXOR(x, t);
+}
+#endif
+
+
+static inline V IMULI(int inv, V x)
+{
+ return VSWAPPAIRS(MULI(inv, x));
+}
+
+
+static inline V VLD(const void *s)
+{
+ V *d = (V *)s;
+ return *d;
+}
+
+
+static inline void VST(void *d, V s)
+{
+ V *r = (V *)d;
+ *r = s;
+}
+#endif
OpenPOWER on IntegriCloud