diff options
author | Erik Schnetter <schnetter@gmail.com> | 2013-09-02 18:24:25 -0400 |
---|---|---|
committer | Erik Schnetter <schnetter@gmail.com> | 2013-09-02 22:15:56 -0400 |
commit | fad83c572e8ef9674e3fe5a217195b5934972730 (patch) | |
tree | 6e6a53dca5d3a629ee2257cb8f9deef1bee5f4a2 /vec_sse_float4.h | |
parent | ea1b0d10679852bafcf2ecff2fb49ffc0bddebfd (diff) | |
download | vecmathlib-fad83c572e8ef9674e3fe5a217195b5934972730.zip vecmathlib-fad83c572e8ef9674e3fe5a217195b5934972730.tar.gz |
SSE: Check whether abs/max/min are available as intrinsics before using them
Diffstat (limited to 'vec_sse_float4.h')
-rw-r--r-- | vec_sse_float4.h | 36 |
1 files changed, 33 insertions, 3 deletions
diff --git a/vec_sse_float4.h b/vec_sse_float4.h index ee4bde8..816e9d3 100644 --- a/vec_sse_float4.h +++ b/vec_sse_float4.h @@ -14,6 +14,9 @@ #ifdef __SSE3__ // Intel's SSE 3 # include <pmmintrin.h> #endif +#ifdef __SSSE3__ // Intel's SSSE 3 +# include <tmmintrin.h> +#endif #if defined __SSE4_1__ // Intel's SSE 4.1 # include <smmintrin.h> #endif @@ -313,10 +316,10 @@ namespace vecmathlib { return ! (*this < x); } - intvec_t abs() const { return _mm_abs_epi32(v); } + intvec_t abs() const; boolvec_t isignbit() const { return as_bool(); } - intvec_t max(intvec_t x) const { return _mm_max_epi32(v, x.v); } - intvec_t min(intvec_t x) const { return _mm_min_epi32(v, x.v); } + intvec_t max(intvec_t x) const; + intvec_t min(intvec_t x) const; }; @@ -695,6 +698,15 @@ namespace vecmathlib { // intvec definitions + inline intvec<float,4> intvec<float,4>::abs() const + { +#ifdef __SSSE3__ + return _mm_abs_epi32(v); +#else + return MF::vml_abs(*this); +#endif + } + inline realvec<float,4> intvec<float,4>::as_float() const { return _mm_castsi128_ps(v); @@ -716,6 +728,24 @@ namespace vecmathlib { return _mm_cvtepi32_ps(v); } + inline intvec<float,4> intvec<float,4>::max(intvec_t x) const + { +#ifdef __SSE4_1__ + return _mm_max_epi32(v, x.v); +#else + return MF::vml_max(*this, v); +#endif + } + + inline intvec<float,4> intvec<float,4>::min(intvec_t x) const + { +#ifdef __SSE4_1__ + return _mm_min_epi32(v, x.v); +#else + return MF::vml_min(*this, v); +#endif + } + inline intvec<float,4> intvec<float,4>::popcount() const { return MF::vml_popcount(*this); |