summaryrefslogtreecommitdiffstats
path: root/src/macros.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/macros.h')
-rw-r--r--src/macros.h308
1 files changed, 175 insertions, 133 deletions
diff --git a/src/macros.h b/src/macros.h
index 08029a3..b755775 100644
--- a/src/macros.h
+++ b/src/macros.h
@@ -1,162 +1,204 @@
/*
-
- This file is part of FFTS -- The Fastest Fourier Transform in the South
-
- Copyright (c) 2013, Michael J. Cree <mcree@orcon.net.nz>
- Copyright (c) 2012, 2013, Anthony M. Blake <amb@anthonix.com>
-
- All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are met:
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name of the organization nor the
- names of its contributors may be used to endorse or promote products
- derived from this software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
- DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+This file is part of FFTS -- The Fastest Fourier Transform in the South
+
+Copyright (c) 2013, Michael J. Cree <mcree@orcon.net.nz>
+Copyright (c) 2012, 2013, Anthony M. Blake <amb@anthonix.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+* Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+* Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+* Neither the name of the organization nor the
+names of its contributors may be used to endorse or promote products
+derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#ifndef __MACROS_H__
-#define __MACROS_H__
+#ifndef FFTS_MACROS_H
+#define FFTS_MACROS_H
+
+#if defined (_MSC_VER) && (_MSC_VER >= 1020)
+#pragma once
+#endif
#ifdef HAVE_NEON
#include "macros-neon.h"
+#elif HAVE_SSE
+#include "macros-sse.h"
+#elif __powerpc__
+#include "macros-altivec.h"
#else
-#ifdef __alpha__
#include "macros-alpha.h"
-#else
-#ifdef __powerpc__
-#include "macros-altivec.h"
-#endif
#endif
-#endif
+static FFTS_INLINE void
+V4SF_TX2(V4SF *a, V4SF *b)
+{
+ V4SF t0 = V4SF_UNPACK_LO(*a, *b);
+ V4SF t1 = V4SF_UNPACK_HI(*a, *b);
+ *a = t0;
+ *b = t1;
+}
+static FFTS_INLINE void
+V4SF_K_N(int inv,
+ V4SF re,
+ V4SF im,
+ V4SF *r0,
+ V4SF *r1,
+ V4SF *r2,
+ V4SF *r3)
+{
+ V4SF uk, uk2, zk_p, zk_n, zk, zk_d;
-#ifdef HAVE_VFP
-#include "macros-alpha.h"
-#endif
-#ifdef HAVE_SSE
- #include "macros-sse.h"
-#endif
+ uk = *r0;
+ uk2 = *r1;
-static inline void TX2(V *a, V *b)
-{
- V TX2_t0 = VUNPACKLO(*a, *b);
- V TX2_t1 = VUNPACKHI(*a, *b);
- *a = TX2_t0; *b = TX2_t1;
+ zk_p = V4SF_IMUL(*r2, re, im);
+ zk_n = V4SF_IMULJ(*r3, re, im);
+
+ zk = V4SF_ADD(zk_p, zk_n);
+ zk_d = V4SF_IMULI(inv, V4SF_SUB(zk_p, zk_n));
+
+ *r2 = V4SF_SUB(uk, zk);
+ *r0 = V4SF_ADD(uk, zk);
+ *r3 = V4SF_ADD(uk2, zk_d);
+ *r1 = V4SF_SUB(uk2, zk_d);
}
-static inline void K_N(int inv, V re, V im, V *r0, V *r1, V *r2, V *r3)
+static FFTS_INLINE void
+V4SF_L_2_4(int inv,
+ const float *FFTS_RESTRICT i0,
+ const float *FFTS_RESTRICT i1,
+ const float *FFTS_RESTRICT i2,
+ const float *FFTS_RESTRICT i3,
+ V4SF *r0,
+ V4SF *r1,
+ V4SF *r2,
+ V4SF *r3)
{
- V uk, uk2, zk_p, zk_n, zk, zk_d;
- uk = *r0; uk2 = *r1;
- zk_p = IMUL(*r2, re, im);
- zk_n = IMULJ(*r3, re, im);
-
- zk = VADD(zk_p, zk_n);
- zk_d = IMULI(inv, VSUB(zk_p, zk_n));
-
- *r2 = VSUB(uk, zk);
- *r0 = VADD(uk, zk);
- *r3 = VADD(uk2, zk_d);
- *r1 = VSUB(uk2, zk_d);
-}
+ V4SF t0, t1, t2, t3, t4, t5, t6, t7;
+ t0 = V4SF_LD(i0);
+ t1 = V4SF_LD(i1);
+ t2 = V4SF_LD(i2);
+ t3 = V4SF_LD(i3);
-static inline void S_4(V r0, V r1, V r2, V r3,
- data_t * restrict o0, data_t * restrict o1,
- data_t * restrict o2, data_t * restrict o3)
-{
- VST(o0, r0); VST(o1, r1); VST(o2, r2); VST(o3, r3);
-}
+ t4 = V4SF_ADD(t0, t1);
+ t5 = V4SF_SUB(t0, t1);
+ t6 = V4SF_ADD(t2, t3);
+ t7 = V4SF_SUB(t2, t3);
+ *r0 = V4SF_UNPACK_LO(t4, t5);
+ *r1 = V4SF_UNPACK_LO(t6, t7);
-static inline void L_2_4(int inv,
- const data_t * restrict i0, const data_t * restrict i1,
- const data_t * restrict i2, const data_t * restrict i3,
- V *r0, V *r1, V *r2, V *r3)
-{
- V t0, t1, t2, t3, t4, t5, t6, t7;
-
- t0 = VLD(i0); t1 = VLD(i1); t2 = VLD(i2); t3 = VLD(i3);
- t4 = VADD(t0, t1);
- t5 = VSUB(t0, t1);
- t6 = VADD(t2, t3);
- t7 = VSUB(t2, t3);
- *r0 = VUNPACKLO(t4, t5);
- *r1 = VUNPACKLO(t6, t7);
- t5 = IMULI(inv, t5);
- t0 = VADD(t6, t4);
- t2 = VSUB(t6, t4);
- t1 = VSUB(t7, t5);
- t3 = VADD(t7, t5);
- *r3 = VUNPACKHI(t0, t1);
- *r2 = VUNPACKHI(t2, t3);
-}
+ t5 = V4SF_IMULI(inv, t5);
+ t0 = V4SF_ADD(t6, t4);
+ t2 = V4SF_SUB(t6, t4);
+ t1 = V4SF_SUB(t7, t5);
+ t3 = V4SF_ADD(t7, t5);
-static inline void L_4_4(int inv,
- const data_t * restrict i0, const data_t * restrict i1,
- const data_t * restrict i2, const data_t * restrict i3,
- V *r0, V *r1, V *r2, V *r3)
-{
- V t0, t1, t2, t3, t4, t5, t6, t7;
-
- t0 = VLD(i0); t1 = VLD(i1); t2 = VLD(i2); t3 = VLD(i3);
- t4 = VADD(t0, t1);
- t5 = VSUB(t0, t1);
- t6 = VADD(t2, t3);
- t7 = IMULI(inv, VSUB(t2, t3));
- t0 = VADD(t4, t6);
- t2 = VSUB(t4, t6);
- t1 = VSUB(t5, t7);
- t3 = VADD(t5, t7);
- TX2(&t0, &t1);
- TX2(&t2, &t3);
- *r0 = t0; *r2 = t1; *r1 = t2; *r3 = t3;
+ *r3 = V4SF_UNPACK_HI(t0, t1);
+ *r2 = V4SF_UNPACK_HI(t2, t3);
}
+static FFTS_INLINE void
+V4SF_L_4_4(int inv,
+ const float *FFTS_RESTRICT i0,
+ const float *FFTS_RESTRICT i1,
+ const float *FFTS_RESTRICT i2,
+ const float *FFTS_RESTRICT i3,
+ V4SF *r0,
+ V4SF *r1,
+ V4SF *r2,
+ V4SF *r3)
+{
+ V4SF t0, t1, t2, t3, t4, t5, t6, t7;
+
+ t0 = V4SF_LD(i0);
+ t1 = V4SF_LD(i1);
+ t2 = V4SF_LD(i2);
+ t3 = V4SF_LD(i3);
+ t4 = V4SF_ADD(t0, t1);
+ t5 = V4SF_SUB(t0, t1);
+ t6 = V4SF_ADD(t2, t3);
+
+ t7 = V4SF_IMULI(inv, V4SF_SUB(t2, t3));
+
+ t0 = V4SF_ADD(t4, t6);
+ t2 = V4SF_SUB(t4, t6);
+ t1 = V4SF_SUB(t5, t7);
+ t3 = V4SF_ADD(t5, t7);
+
+ V4SF_TX2(&t0, &t1);
+ V4SF_TX2(&t2, &t3);
+
+ *r0 = t0;
+ *r2 = t1;
+ *r1 = t2;
+ *r3 = t3;
+}
-static inline void L_4_2(int inv,
- const data_t * restrict i0, const data_t * restrict i1,
- const data_t * restrict i2, const data_t * restrict i3,
- V *r0, V *r1, V *r2, V *r3)
+static FFTS_INLINE void
+V4SF_L_4_2(int inv,
+ const float *FFTS_RESTRICT i0,
+ const float *FFTS_RESTRICT i1,
+ const float *FFTS_RESTRICT i2,
+ const float *FFTS_RESTRICT i3,
+ V4SF *r0,
+ V4SF *r1,
+ V4SF *r2,
+ V4SF *r3)
{
- V t0, t1, t2, t3, t4, t5, t6, t7;
-
- t0 = VLD(i0); t1 = VLD(i1); t6 = VLD(i2); t7 = VLD(i3);
- t2 = VBLEND(t6, t7);
- t3 = VBLEND(t7, t6);
- t4 = VADD(t0, t1);
- t5 = VSUB(t0, t1);
- t6 = VADD(t2, t3);
- t7 = VSUB(t2, t3);
- *r2 = VUNPACKHI(t4, t5);
- *r3 = VUNPACKHI(t6, t7);
- t7 = IMULI(inv, t7);
- t0 = VADD(t4, t6);
- t2 = VSUB(t4, t6);
- t1 = VSUB(t5, t7);
- t3 = VADD(t5, t7);
- *r0 = VUNPACKLO(t0, t1);
- *r1 = VUNPACKLO(t2, t3);
+ V4SF t0, t1, t2, t3, t4, t5, t6, t7;
+
+ t0 = V4SF_LD(i0);
+ t1 = V4SF_LD(i1);
+ t6 = V4SF_LD(i2);
+ t7 = V4SF_LD(i3);
+
+ t2 = V4SF_BLEND(t6, t7);
+ t3 = V4SF_BLEND(t7, t6);
+
+ t4 = V4SF_ADD(t0, t1);
+ t5 = V4SF_SUB(t0, t1);
+ t6 = V4SF_ADD(t2, t3);
+ t7 = V4SF_SUB(t2, t3);
+
+ *r2 = V4SF_UNPACK_HI(t4, t5);
+ *r3 = V4SF_UNPACK_HI(t6, t7);
+
+ t7 = V4SF_IMULI(inv, t7);
+
+ t0 = V4SF_ADD(t4, t6);
+ t2 = V4SF_SUB(t4, t6);
+ t1 = V4SF_SUB(t5, t7);
+ t3 = V4SF_ADD(t5, t7);
+
+ *r0 = V4SF_UNPACK_LO(t0, t1);
+ *r1 = V4SF_UNPACK_LO(t2, t3);
}
-#endif
-// vim: set autoindent noexpandtab tabstop=3 shiftwidth=3:
+
+#define V4SF_S_4(r0, r1, r2, r3, o0, o1, o2, o3) \
+ V4SF_ST(o0, r0); V4SF_ST(o1, r1); V4SF_ST(o2, r2); V4SF_ST(o3, r3);
+
+#endif /* FFTS_MACROS_H */ \ No newline at end of file
OpenPOWER on IntegriCloud