summaryrefslogtreecommitdiffstats
path: root/vec_sse_float4.h
diff options
context:
space:
mode:
authorErik Schnetter <schnetter@gmail.com>2013-09-02 18:24:25 -0400
committerErik Schnetter <schnetter@gmail.com>2013-09-02 22:15:56 -0400
commitfad83c572e8ef9674e3fe5a217195b5934972730 (patch)
tree6e6a53dca5d3a629ee2257cb8f9deef1bee5f4a2 /vec_sse_float4.h
parentea1b0d10679852bafcf2ecff2fb49ffc0bddebfd (diff)
downloadvecmathlib-fad83c572e8ef9674e3fe5a217195b5934972730.zip
vecmathlib-fad83c572e8ef9674e3fe5a217195b5934972730.tar.gz
SSE: Check whether abs/max/min are available as intrinsics before using them
Diffstat (limited to 'vec_sse_float4.h')
-rw-r--r--vec_sse_float4.h36
1 files changed, 33 insertions, 3 deletions
diff --git a/vec_sse_float4.h b/vec_sse_float4.h
index ee4bde8..816e9d3 100644
--- a/vec_sse_float4.h
+++ b/vec_sse_float4.h
@@ -14,6 +14,9 @@
#ifdef __SSE3__ // Intel's SSE 3
# include <pmmintrin.h>
#endif
+#ifdef __SSSE3__ // Intel's SSSE 3
+# include <tmmintrin.h>
+#endif
#if defined __SSE4_1__ // Intel's SSE 4.1
# include <smmintrin.h>
#endif
@@ -313,10 +316,10 @@ namespace vecmathlib {
return ! (*this < x);
}
- intvec_t abs() const { return _mm_abs_epi32(v); }
+ intvec_t abs() const;
boolvec_t isignbit() const { return as_bool(); }
- intvec_t max(intvec_t x) const { return _mm_max_epi32(v, x.v); }
- intvec_t min(intvec_t x) const { return _mm_min_epi32(v, x.v); }
+ intvec_t max(intvec_t x) const;
+ intvec_t min(intvec_t x) const;
};
@@ -695,6 +698,15 @@ namespace vecmathlib {
// intvec definitions
+ inline intvec<float,4> intvec<float,4>::abs() const
+ {
+#ifdef __SSSE3__
+ return _mm_abs_epi32(v);
+#else
+ return MF::vml_abs(*this);
+#endif
+ }
+
inline realvec<float,4> intvec<float,4>::as_float() const
{
return _mm_castsi128_ps(v);
@@ -716,6 +728,24 @@ namespace vecmathlib {
return _mm_cvtepi32_ps(v);
}
+ inline intvec<float,4> intvec<float,4>::max(intvec_t x) const
+ {
+#ifdef __SSE4_1__
+ return _mm_max_epi32(v, x.v);
+#else
+ return MF::vml_max(*this, v);
+#endif
+ }
+
+ inline intvec<float,4> intvec<float,4>::min(intvec_t x) const
+ {
+#ifdef __SSE4_1__
+ return _mm_min_epi32(v, x.v);
+#else
+ return MF::vml_min(*this, v);
+#endif
+ }
+
inline intvec<float,4> intvec<float,4>::popcount() const
{
return MF::vml_popcount(*this);
OpenPOWER on IntegriCloud