summaryrefslogtreecommitdiffstats
path: root/instantiations.cc
diff options
context:
space:
mode:
Diffstat (limited to 'instantiations.cc')
-rw-r--r--instantiations.cc239
1 files changed, 109 insertions, 130 deletions
diff --git a/instantiations.cc b/instantiations.cc
index 9bd5351..956e1b9 100644
--- a/instantiations.cc
+++ b/instantiations.cc
@@ -7,84 +7,105 @@
#include "vecmathlib.h"
+namespace vecmathlib {
+template <typename realvec_t, int n>
+typename realvec_t::real_t get_elt(realvec_t x) {
+ return x[n];
+}
+template <typename realvec_t, int n>
+realvec_t set_elt(realvec_t x, typename realvec_t::real_t a) {
+ return x.set_elt(n, a);
+}
+
+// template realbuiltinvec<float,1> fabs(realbuiltinvec<float,1> x);
+// template realbuiltinvec<float,1> fmin(realbuiltinvec<float,1> x,
+// realbuiltinvec<float,1> y);
+// template intbuiltinvec<float,1> lsr(intbuiltinvec<float,1> x,
+// intbuiltinvec<float,1>::int_t n);
+// template intbuiltinvec<double,1> lsr(intbuiltinvec<double,1> x,
+// intbuiltinvec<double,1>::int_t n);
+// template intbuiltinvec<double,2> lsr(intbuiltinvec<double,2> x,
+// intbuiltinvec<double,2>::int_t n);
+// template intbuiltinvec<double,2> lsr(intbuiltinvec<double,2> x,
+// intbuiltinvec<double,2> n);
+// template realbuiltinvec<float,1> ifthen(realbuiltinvec<float,1>::boolvec_t c,
+// realbuiltinvec<float,1> x, realbuiltinvec<float,1> y);
+// template realbuiltinvec<double,1> ifthen(realbuiltinvec<double,1>::boolvec_t
+// c, realbuiltinvec<double,1> x, realbuiltinvec<double,1> y);
+// template realbuiltinvec<float,4> ifthen(realbuiltinvec<float,4>::boolvec_t c,
+// realbuiltinvec<float,4> x, realbuiltinvec<float,4> y);
+// template realbuiltinvec<double,2> ifthen(realbuiltinvec<double,2>::boolvec_t
+// c, realbuiltinvec<double,2> x, realbuiltinvec<double,2> y);
-namespace vecmathlib {
-
- template<typename realvec_t, int n>
- typename realvec_t::real_t get_elt(realvec_t x)
- {
- return x[n];
- }
- template<typename realvec_t, int n>
- realvec_t set_elt(realvec_t x, typename realvec_t::real_t a)
- {
- return x.set_elt(n, a);
- }
-
- // template realbuiltinvec<float,1> fabs(realbuiltinvec<float,1> x);
- // template realbuiltinvec<float,1> fmin(realbuiltinvec<float,1> x, realbuiltinvec<float,1> y);
- // template intbuiltinvec<float,1> lsr(intbuiltinvec<float,1> x, intbuiltinvec<float,1>::int_t n);
- // template intbuiltinvec<double,1> lsr(intbuiltinvec<double,1> x, intbuiltinvec<double,1>::int_t n);
- // template intbuiltinvec<double,2> lsr(intbuiltinvec<double,2> x, intbuiltinvec<double,2>::int_t n);
- // template intbuiltinvec<double,2> lsr(intbuiltinvec<double,2> x, intbuiltinvec<double,2> n);
- // template realbuiltinvec<float,1> ifthen(realbuiltinvec<float,1>::boolvec_t c, realbuiltinvec<float,1> x, realbuiltinvec<float,1> y);
- // template realbuiltinvec<double,1> ifthen(realbuiltinvec<double,1>::boolvec_t c, realbuiltinvec<double,1> x, realbuiltinvec<double,1> y);
- // template realbuiltinvec<float,4> ifthen(realbuiltinvec<float,4>::boolvec_t c, realbuiltinvec<float,4> x, realbuiltinvec<float,4> y);
- // template realbuiltinvec<double,2> ifthen(realbuiltinvec<double,2>::boolvec_t c, realbuiltinvec<double,2> x, realbuiltinvec<double,2> y);
-
#ifdef VECMATHLIB_HAVE_VEC_FLOAT_1
- template realvec<float,1> round(realvec<float,1> x);
+template realvec<float, 1> round(realvec<float, 1> x);
#endif
-
+
#ifdef VECMATHLIB_HAVE_VEC_FLOAT_8
- template intvec<float,8> popcount(intvec<float,8>);
+template intvec<float, 8> popcount(intvec<float, 8>);
#endif
-
+
#ifdef VECMATHLIB_HAVE_VEC_DOUBLE_1
- template realvec<double,1> exp(realvec<double,1> x);
- template realvec<double,1> log(realvec<double,1> x);
- template realvec<double,1> sin(realvec<double,1> x);
- template realvec<double,1> sqrt(realvec<double,1> x);
- template realvec<double,1>::real_t get_elt<realvec<double,1>,0>(realvec<double,1> x);
- template realvec<double,1> set_elt<realvec<double,1>,0>(realvec<double,1> x, realvec<double,1>::real_t a);
+template realvec<double, 1> exp(realvec<double, 1> x);
+template realvec<double, 1> log(realvec<double, 1> x);
+template realvec<double, 1> sin(realvec<double, 1> x);
+template realvec<double, 1> sqrt(realvec<double, 1> x);
+template realvec<double, 1>::real_t
+get_elt<realvec<double, 1>, 0>(realvec<double, 1> x);
+template realvec<double, 1>
+set_elt<realvec<double, 1>, 0>(realvec<double, 1> x,
+ realvec<double, 1>::real_t a);
#endif
-
+
#ifdef VECMATHLIB_HAVE_VEC_DOUBLE_2
- template realvec<double,2> exp(realvec<double,2> x);
- template realvec<double,2> log(realvec<double,2> x);
- template realvec<double,2> sin(realvec<double,2> x);
- template realvec<double,2> sqrt(realvec<double,2> x);
- template realvec<double,2>::real_t get_elt<realvec<double,2>,0>(realvec<double,2>);
- template realvec<double,2>::real_t get_elt<realvec<double,2>,1>(realvec<double,2>);
- template realvec<double,2> set_elt<realvec<double,2>,0>(realvec<double,2> x, realvec<double,2>::real_t a);
- template realvec<double,2> set_elt<realvec<double,2>,1>(realvec<double,2> x, realvec<double,2>::real_t a);
+template realvec<double, 2> exp(realvec<double, 2> x);
+template realvec<double, 2> log(realvec<double, 2> x);
+template realvec<double, 2> sin(realvec<double, 2> x);
+template realvec<double, 2> sqrt(realvec<double, 2> x);
+template realvec<double, 2>::real_t
+get_elt<realvec<double, 2>, 0>(realvec<double, 2>);
+template realvec<double, 2>::real_t
+get_elt<realvec<double, 2>, 1>(realvec<double, 2>);
+template realvec<double, 2>
+set_elt<realvec<double, 2>, 0>(realvec<double, 2> x,
+ realvec<double, 2>::real_t a);
+template realvec<double, 2>
+set_elt<realvec<double, 2>, 1>(realvec<double, 2> x,
+ realvec<double, 2>::real_t a);
#endif
-
+
#ifdef VECMATHLIB_HAVE_VEC_DOUBLE_4
- template realvec<double,4> exp(realvec<double,4> x);
- template realvec<double,4> log(realvec<double,4> x);
- template realvec<double,4> sin(realvec<double,4> x);
- template realvec<double,4> sqrt(realvec<double,4> x);
- template realvec<double,4>::real_t get_elt<realvec<double,4>,0>(realvec<double,4>);
- template realvec<double,4>::real_t get_elt<realvec<double,4>,1>(realvec<double,4>);
- template realvec<double,4>::real_t get_elt<realvec<double,4>,2>(realvec<double,4>);
- template realvec<double,4>::real_t get_elt<realvec<double,4>,3>(realvec<double,4>);
- template realvec<double,4> set_elt<realvec<double,4>,0>(realvec<double,4> x, realvec<double,4>::real_t a);
- template realvec<double,4> set_elt<realvec<double,4>,1>(realvec<double,4> x, realvec<double,4>::real_t a);
- template realvec<double,4> set_elt<realvec<double,4>,2>(realvec<double,4> x, realvec<double,4>::real_t a);
- template realvec<double,4> set_elt<realvec<double,4>,3>(realvec<double,4> x, realvec<double,4>::real_t a);
- template intvec<double,4> popcount(intvec<double,4>);
+template realvec<double, 4> exp(realvec<double, 4> x);
+template realvec<double, 4> log(realvec<double, 4> x);
+template realvec<double, 4> sin(realvec<double, 4> x);
+template realvec<double, 4> sqrt(realvec<double, 4> x);
+template realvec<double, 4>::real_t
+get_elt<realvec<double, 4>, 0>(realvec<double, 4>);
+template realvec<double, 4>::real_t
+get_elt<realvec<double, 4>, 1>(realvec<double, 4>);
+template realvec<double, 4>::real_t
+get_elt<realvec<double, 4>, 2>(realvec<double, 4>);
+template realvec<double, 4>::real_t
+get_elt<realvec<double, 4>, 3>(realvec<double, 4>);
+template realvec<double, 4>
+set_elt<realvec<double, 4>, 0>(realvec<double, 4> x,
+ realvec<double, 4>::real_t a);
+template realvec<double, 4>
+set_elt<realvec<double, 4>, 1>(realvec<double, 4> x,
+ realvec<double, 4>::real_t a);
+template realvec<double, 4>
+set_elt<realvec<double, 4>, 2>(realvec<double, 4> x,
+ realvec<double, 4>::real_t a);
+template realvec<double, 4>
+set_elt<realvec<double, 4>, 3>(realvec<double, 4> x,
+ realvec<double, 4>::real_t a);
+template intvec<double, 4> popcount(intvec<double, 4>);
#endif
-
}
-
-
// Various tests to detect auto-vectorization features
-
-
#include <cassert>
#include <cstdlib>
using namespace std;
@@ -92,32 +113,25 @@ using namespace std;
using namespace vecmathlib;
#if defined VECMATHLIB_HAVE_VEC_DOUBLE_4
-typedef realvec<double,4> realV;
+typedef realvec<double, 4> realV;
#elif defined VECMATHLIB_HAVE_VEC_DOUBLE_2
-typedef realvec<double,2> realV;
+typedef realvec<double, 2> realV;
#elif defined VECMATHLIB_HAVE_VEC_FLOAT_8
-typedef realvec<float,8> realV;
+typedef realvec<float, 8> realV;
#elif defined VECMATHLIB_HAVE_VEC_FLOAT_4
-typedef realvec<float,4> realV;
+typedef realvec<float, 4> realV;
#elif defined VECMATHLIB_HAVE_VEC_FLOAT_2
-typedef realvec<float,2> realV;
+typedef realvec<float, 2> realV;
#else
-# error "There are no vector types"
+#error "There are no vector types"
#endif
typedef realV::scalar_t real;
const int vecsize = realV::size;
-
-
// Simple, naive loop adding two arrays
-extern "C"
-void loop_add(real* a,
- real* b,
- real* c,
- ptrdiff_t n)
-{
- for (ptrdiff_t i=0; i<n; i+=vecsize) {
+extern "C" void loop_add(real *a, real *b, real *c, ptrdiff_t n) {
+ for (ptrdiff_t i = 0; i < n; i += vecsize) {
realV tmpb = realV::loadu(&b[i]);
realV tmpc = realV::loadu(&c[i]);
realV tmpa = tmpb + tmpc;
@@ -125,16 +139,10 @@ void loop_add(real* a,
}
}
-
-
// Declare pointers as restrict
-extern "C"
-void loop_add_restrict(real *restrict a,
- real *restrict b,
- real *restrict c,
- ptrdiff_t n)
-{
- for (ptrdiff_t i=0; i<n; i+=vecsize) {
+extern "C" void loop_add_restrict(real *restrict a, real *restrict b,
+ real *restrict c, ptrdiff_t n) {
+ for (ptrdiff_t i = 0; i < n; i += vecsize) {
realV tmpb = realV::loadu(&b[i]);
realV tmpc = realV::loadu(&c[i]);
realV tmpa = tmpb + tmpc;
@@ -142,16 +150,10 @@ void loop_add_restrict(real *restrict a,
}
}
-
-
// Declare pointers as restrict and aligned
-extern "C"
-void loop_add_aligned(real *restrict a,
- real *restrict b,
- real *restrict c,
- ptrdiff_t n)
-{
- for (ptrdiff_t i=0; i<n; i+=vecsize) {
+extern "C" void loop_add_aligned(real *restrict a, real *restrict b,
+ real *restrict c, ptrdiff_t n) {
+ for (ptrdiff_t i = 0; i < n; i += vecsize) {
realV tmpb = realV::loada(&b[i]);
realV tmpc = realV::loada(&c[i]);
realV tmpa = tmpb + tmpc;
@@ -159,16 +161,11 @@ void loop_add_aligned(real *restrict a,
}
}
-
-
// Reduction loop
-extern "C"
-real loop_dot_reduce(real *restrict a,
- real *restrict b,
- ptrdiff_t n)
-{
+extern "C" real loop_dot_reduce(real *restrict a, real *restrict b,
+ ptrdiff_t n) {
realV sumV = 0.0;
- for (ptrdiff_t i=0; i<n; i+=vecsize) {
+ for (ptrdiff_t i = 0; i < n; i += vecsize) {
realV tmpa = realV::loada(&a[i]);
realV tmpb = realV::loada(&b[i]);
sumV += tmpa * tmpb;
@@ -176,16 +173,10 @@ real loop_dot_reduce(real *restrict a,
return sum(sumV);
}
-
-
// Loop with a simple if condition (fmax)
-extern "C"
-void loop_if_simple(real *restrict a,
- real *restrict b,
- real *restrict c,
- ptrdiff_t n)
-{
- for (ptrdiff_t i=0; i<n; i+=vecsize) {
+extern "C" void loop_if_simple(real *restrict a, real *restrict b,
+ real *restrict c, ptrdiff_t n) {
+ for (ptrdiff_t i = 0; i < n; i += vecsize) {
realV tmpb = realV::loada(&b[i]);
realV tmpc = realV::loada(&c[i]);
realV tmpa = ifthen(tmpb > tmpc, tmpb, tmpc);
@@ -193,16 +184,10 @@ void loop_if_simple(real *restrict a,
}
}
-
-
// Loop with a complex if condition (select)
-extern "C"
-void loop_if(real *restrict a,
- real *restrict b,
- real *restrict c,
- ptrdiff_t n)
-{
- for (ptrdiff_t i=0; i<n; i+=vecsize) {
+extern "C" void loop_if(real *restrict a, real *restrict b, real *restrict c,
+ ptrdiff_t n) {
+ for (ptrdiff_t i = 0; i < n; i += vecsize) {
realV tmpb = realV::loada(&b[i]);
realV tmpc = realV::loada(&c[i]);
realV tmpa = ifthen(tmpb > realV(0.0), tmpb * tmpc, realV(1.0));
@@ -210,16 +195,10 @@ void loop_if(real *restrict a,
}
}
-
-
// Skip ghost points
-extern "C"
-void loop_add_masked(real *restrict a,
- real *restrict b,
- real *restrict c,
- ptrdiff_t n)
-{
- for (realV::mask_t mask(1, n-1, 0); mask; ++mask) {
+extern "C" void loop_add_masked(real *restrict a, real *restrict b,
+ real *restrict c, ptrdiff_t n) {
+ for (realV::mask_t mask(1, n - 1, 0); mask; ++mask) {
ptrdiff_t i = mask.index();
realV tmpb = realV::loada(&b[i]);
realV tmpc = realV::loada(&c[i]);
OpenPOWER on IntegriCloud