diff options
Diffstat (limited to 'src/macros-sse.h')
-rw-r--r-- | src/macros-sse.h | 141 |
1 files changed, 78 insertions, 63 deletions
diff --git a/src/macros-sse.h b/src/macros-sse.h index d845734..827aa67 100644 --- a/src/macros-sse.h +++ b/src/macros-sse.h @@ -1,85 +1,100 @@ /* - - This file is part of FFTS -- The Fastest Fourier Transform in the South - - Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com> - Copyright (c) 2012, The University of Waikato - - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the organization nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY - DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +This file is part of FFTS -- The Fastest Fourier Transform in the South + +Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com> +Copyright (c) 2012, The University of Waikato + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: +* Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +* Neither the name of the organization nor the +names of its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef __SSE_FLOAT_H__ -#define __SSE_FLOAT_H__ +#ifndef FFTS_MACROS_SSE_H +#define FFTS_MACROS_SSE_H + +#if defined (_MSC_VER) && (_MSC_VER >= 1020) +#pragma once +#endif #include <xmmintrin.h> -//#define VL 4 +#define FFTS_MALLOC(d,a) (_mm_malloc(d,a)) +#define FFTS_FREE(d) (_mm_free(d)) -typedef __m128 V; +typedef __m128 V4SF; -#define VADD _mm_add_ps -#define VSUB _mm_sub_ps -#define VMUL _mm_mul_ps -//#define VLIT4 _mm_set_ps -#define VXOR _mm_xor_ps -#define VST _mm_store_ps -#define VLD _mm_load_ps +#define V4SF_ADD _mm_add_ps +#define V4SF_SUB _mm_sub_ps +#define V4SF_MUL _mm_mul_ps +#define V4SF_LIT4 _mm_set_ps +#define V4SF_XOR _mm_xor_ps +#define V4SF_ST _mm_store_ps +#define V4SF_LD _mm_load_ps -#define VSWAPPAIRS(x) (_mm_shuffle_ps(x,x,_MM_SHUFFLE(2,3,0,1))) +#define V4SF_SWAP_PAIRS(x) \ + (_mm_shuffle_ps(x, x, _MM_SHUFFLE(2,3,0,1))) -#define VUNPACKHI(x,y) (_mm_shuffle_ps(x,y,_MM_SHUFFLE(3,2,3,2))) -#define VUNPACKLO(x,y) (_mm_shuffle_ps(x,y,_MM_SHUFFLE(1,0,1,0))) +#define V4SF_UNPACK_HI(x,y) \ + (_mm_shuffle_ps(x, y, _MM_SHUFFLE(3,2,3,2))) -#define VBLEND(x,y) (_mm_shuffle_ps(x,y,_MM_SHUFFLE(3,2,1,0))) +#define V4SF_UNPACK_LO(x,y) \ + (_mm_movelh_ps(x, y)) -#define VLIT4 _mm_set_ps +#define V4SF_BLEND(x, y) \ + (_mm_shuffle_ps(x, y, _MM_SHUFFLE(3,2,1,0))) -#define VDUPRE(r) (_mm_shuffle_ps(r,r,_MM_SHUFFLE(2,2,0,0))) -#define VDUPIM(r) (_mm_shuffle_ps(r,r,_MM_SHUFFLE(3,3,1,1))) +#define V4SF_DUPLICATE_RE(r) \ + (_mm_shuffle_ps(r, r, _MM_SHUFFLE(2,2,0,0))) -#define FFTS_MALLOC(d,a) (_mm_malloc(d,a)) -#define FFTS_FREE(d) (_mm_free(d)) +#define V4SF_DUPLICATE_IM(r) \ + (_mm_shuffle_ps(r, r, _MM_SHUFFLE(3,3,1,1))) -__INLINE V IMULI(int inv, V a) { - if(inv) return VSWAPPAIRS(VXOR(a, VLIT4(0.0f, -0.0f, 0.0f, -0.0f))); - else return VSWAPPAIRS(VXOR(a, VLIT4(-0.0f, 0.0f, -0.0f, 0.0f))); +static FFTS_ALWAYS_INLINE V4SF +V4SF_IMULI(int inv, V4SF a) +{ + if (inv) { + return V4SF_SWAP_PAIRS(V4SF_XOR(a, V4SF_LIT4(0.0f, -0.0f, 0.0f, -0.0f))); + } else { + return V4SF_SWAP_PAIRS(V4SF_XOR(a, V4SF_LIT4(-0.0f, 0.0f, -0.0f, 0.0f))); + } } - -__INLINE V IMUL(V d, V re, V im) { - re = VMUL(re, d); - im = VMUL(im, VSWAPPAIRS(d)); - return VSUB(re, im); +static FFTS_ALWAYS_INLINE V4SF +V4SF_IMUL(V4SF d, V4SF re, V4SF im) +{ + re = V4SF_MUL(re, d); + im = V4SF_MUL(im, V4SF_SWAP_PAIRS(d)); + return V4SF_SUB(re, im); } -__INLINE V IMULJ(V d, V re, V im) { - re = VMUL(re, d); - im = VMUL(im, VSWAPPAIRS(d)); - return VADD(re, im); +static FFTS_ALWAYS_INLINE V4SF +V4SF_IMULJ(V4SF d, V4SF re, V4SF im) +{ + re = V4SF_MUL(re, d); + im = V4SF_MUL(im, V4SF_SWAP_PAIRS(d)); + return V4SF_ADD(re, im); } -#endif -// vim: set autoindent noexpandtab tabstop=3 shiftwidth=3: +#endif /* FFTS_MACROS_SSE_H */ |