summaryrefslogtreecommitdiffstats
path: root/vec_sse_float4.h
diff options
context:
space:
mode:
authorErik Schnetter <schnetter@gmail.com>2013-07-04 15:35:35 -0400
committerErik Schnetter <schnetter@gmail.com>2013-07-04 15:35:35 -0400
commit1a456c91a43494036111dea2db89ae656aa9c987 (patch)
treee784c81254e8ab8139435c8ead79821060eb51bb /vec_sse_float4.h
parent00578fbb97c8b839ea8675400eae41e761d2a8e2 (diff)
downloadvecmathlib-1a456c91a43494036111dea2db89ae656aa9c987.zip
vecmathlib-1a456c91a43494036111dea2db89ae656aa9c987.tar.gz
Optimize prod and sum
Diffstat (limited to 'vec_sse_float4.h')
-rw-r--r--vec_sse_float4.h12
1 files changed, 10 insertions, 2 deletions
diff --git a/vec_sse_float4.h b/vec_sse_float4.h
index 1466013..dc3ecb6 100644
--- a/vec_sse_float4.h
+++ b/vec_sse_float4.h
@@ -499,7 +499,11 @@ namespace vecmathlib {
}
real_t prod() const
{
- return (*this)[0] * (*this)[1] * (*this)[2] * (*this)[3];
+ // return (*this)[0] * (*this)[1] * (*this)[2] * (*this)[3];
+ realvec_t x0123 = *this;
+ realvec_t x1032 = _mm_shuffle_ps(x0123, x0123, 0b10110001);
+ realvec_t y0022 = x0123 * x1032;
+ return y0022[0] * y0022[2];
}
real_t sum() const
{
@@ -509,7 +513,11 @@ namespace vecmathlib {
x = _mm_hadd_ps(x.v, x.v);
return x[0];
#else
- return (*this)[0] + (*this)[1] + (*this)[2] + (*this)[3];
+ // return (*this)[0] + (*this)[1] + (*this)[2] + (*this)[3];
+ realvec_t x0123 = *this;
+ realvec_t x1032 = _mm_shuffle_ps(x0123, x0123, 0b10110001);
+ realvec_t y0022 = x0123 + x1032;
+ return y0022[0] + y0022[2];
#endif
}
OpenPOWER on IntegriCloud