1 files changed, 109 insertions, 130 deletions
diff --git a/instantiations.cc b/instantiations.cc
index 9bd5351..956e1b9 100644
--- a/instantiations.cc
+++ b/instantiations.cc
@@ -7,84 +7,105 @@
 
 #include "vecmathlib.h"
 
+namespace vecmathlib {
 
+template <typename realvec_t, int n>
+typename realvec_t::real_t get_elt(realvec_t x) {
+  return x[n];
+}
+template <typename realvec_t, int n>
+realvec_t set_elt(realvec_t x, typename realvec_t::real_t a) {
+  return x.set_elt(n, a);
+}
+
+// template realbuiltinvec<float,1> fabs(realbuiltinvec<float,1> x);
+// template realbuiltinvec<float,1> fmin(realbuiltinvec<float,1> x,
+// realbuiltinvec<float,1> y);
+// template intbuiltinvec<float,1> lsr(intbuiltinvec<float,1> x,
+// intbuiltinvec<float,1>::int_t n);
+// template intbuiltinvec<double,1> lsr(intbuiltinvec<double,1> x,
+// intbuiltinvec<double,1>::int_t n);
+// template intbuiltinvec<double,2> lsr(intbuiltinvec<double,2> x,
+// intbuiltinvec<double,2>::int_t n);
+// template intbuiltinvec<double,2> lsr(intbuiltinvec<double,2> x,
+// intbuiltinvec<double,2> n);
+// template realbuiltinvec<float,1> ifthen(realbuiltinvec<float,1>::boolvec_t c,
+// realbuiltinvec<float,1> x, realbuiltinvec<float,1> y);
+// template realbuiltinvec<double,1> ifthen(realbuiltinvec<double,1>::boolvec_t
+// c, realbuiltinvec<double,1> x, realbuiltinvec<double,1> y);
+// template realbuiltinvec<float,4> ifthen(realbuiltinvec<float,4>::boolvec_t c,
+// realbuiltinvec<float,4> x, realbuiltinvec<float,4> y);
+// template realbuiltinvec<double,2> ifthen(realbuiltinvec<double,2>::boolvec_t
+// c, realbuiltinvec<double,2> x, realbuiltinvec<double,2> y);
 
-namespace vecmathlib {
-  
-  template<typename realvec_t, int n>
-  typename realvec_t::real_t get_elt(realvec_t x)
-  {
-    return x[n];
-  }
-  template<typename realvec_t, int n>
-  realvec_t set_elt(realvec_t x, typename realvec_t::real_t a)
-  {
-    return x.set_elt(n, a);
-  }
-  
-  // template realbuiltinvec<float,1> fabs(realbuiltinvec<float,1> x);
-  // template realbuiltinvec<float,1> fmin(realbuiltinvec<float,1> x, realbuiltinvec<float,1> y);
-  // template intbuiltinvec<float,1> lsr(intbuiltinvec<float,1> x, intbuiltinvec<float,1>::int_t n);
-  // template intbuiltinvec<double,1> lsr(intbuiltinvec<double,1> x, intbuiltinvec<double,1>::int_t n);
-  // template intbuiltinvec<double,2> lsr(intbuiltinvec<double,2> x, intbuiltinvec<double,2>::int_t n);
-  // template intbuiltinvec<double,2> lsr(intbuiltinvec<double,2> x, intbuiltinvec<double,2> n);
-  // template realbuiltinvec<float,1> ifthen(realbuiltinvec<float,1>::boolvec_t c, realbuiltinvec<float,1> x, realbuiltinvec<float,1> y);
-  // template realbuiltinvec<double,1> ifthen(realbuiltinvec<double,1>::boolvec_t c, realbuiltinvec<double,1> x, realbuiltinvec<double,1> y);
-  // template realbuiltinvec<float,4> ifthen(realbuiltinvec<float,4>::boolvec_t c, realbuiltinvec<float,4> x, realbuiltinvec<float,4> y);
-  // template realbuiltinvec<double,2> ifthen(realbuiltinvec<double,2>::boolvec_t c, realbuiltinvec<double,2> x, realbuiltinvec<double,2> y);
-  
 #ifdef VECMATHLIB_HAVE_VEC_FLOAT_1
-  template realvec<float,1> round(realvec<float,1> x);
+template realvec<float, 1> round(realvec<float, 1> x);
 #endif
-  
+
 #ifdef VECMATHLIB_HAVE_VEC_FLOAT_8
-  template intvec<float,8> popcount(intvec<float,8>);
+template intvec<float, 8> popcount(intvec<float, 8>);
 #endif
-  
+
 #ifdef VECMATHLIB_HAVE_VEC_DOUBLE_1
-  template realvec<double,1> exp(realvec<double,1> x);
-  template realvec<double,1> log(realvec<double,1> x);
-  template realvec<double,1> sin(realvec<double,1> x);
-  template realvec<double,1> sqrt(realvec<double,1> x);
-  template realvec<double,1>::real_t get_elt<realvec<double,1>,0>(realvec<double,1> x);
-  template realvec<double,1> set_elt<realvec<double,1>,0>(realvec<double,1> x, realvec<double,1>::real_t a);
+template realvec<double, 1> exp(realvec<double, 1> x);
+template realvec<double, 1> log(realvec<double, 1> x);
+template realvec<double, 1> sin(realvec<double, 1> x);
+template realvec<double, 1> sqrt(realvec<double, 1> x);
+template realvec<double, 1>::real_t
+get_elt<realvec<double, 1>, 0>(realvec<double, 1> x);
+template realvec<double, 1>
+set_elt<realvec<double, 1>, 0>(realvec<double, 1> x,
+                               realvec<double, 1>::real_t a);
 #endif
-  
+
 #ifdef VECMATHLIB_HAVE_VEC_DOUBLE_2
-  template realvec<double,2> exp(realvec<double,2> x);
-  template realvec<double,2> log(realvec<double,2> x);
-  template realvec<double,2> sin(realvec<double,2> x);
-  template realvec<double,2> sqrt(realvec<double,2> x);
-  template realvec<double,2>::real_t get_elt<realvec<double,2>,0>(realvec<double,2>);
-  template realvec<double,2>::real_t get_elt<realvec<double,2>,1>(realvec<double,2>);
-  template realvec<double,2> set_elt<realvec<double,2>,0>(realvec<double,2> x, realvec<double,2>::real_t a);
-  template realvec<double,2> set_elt<realvec<double,2>,1>(realvec<double,2> x, realvec<double,2>::real_t a);
+template realvec<double, 2> exp(realvec<double, 2> x);
+template realvec<double, 2> log(realvec<double, 2> x);
+template realvec<double, 2> sin(realvec<double, 2> x);
+template realvec<double, 2> sqrt(realvec<double, 2> x);
+template realvec<double, 2>::real_t
+get_elt<realvec<double, 2>, 0>(realvec<double, 2>);
+template realvec<double, 2>::real_t
+get_elt<realvec<double, 2>, 1>(realvec<double, 2>);
+template realvec<double, 2>
+set_elt<realvec<double, 2>, 0>(realvec<double, 2> x,
+                               realvec<double, 2>::real_t a);
+template realvec<double, 2>
+set_elt<realvec<double, 2>, 1>(realvec<double, 2> x,
+                               realvec<double, 2>::real_t a);
 #endif
-  
+
 #ifdef VECMATHLIB_HAVE_VEC_DOUBLE_4
-  template realvec<double,4> exp(realvec<double,4> x);
-  template realvec<double,4> log(realvec<double,4> x);
-  template realvec<double,4> sin(realvec<double,4> x);
-  template realvec<double,4> sqrt(realvec<double,4> x);
-  template realvec<double,4>::real_t get_elt<realvec<double,4>,0>(realvec<double,4>);
-  template realvec<double,4>::real_t get_elt<realvec<double,4>,1>(realvec<double,4>);
-  template realvec<double,4>::real_t get_elt<realvec<double,4>,2>(realvec<double,4>);
-  template realvec<double,4>::real_t get_elt<realvec<double,4>,3>(realvec<double,4>);
-  template realvec<double,4> set_elt<realvec<double,4>,0>(realvec<double,4> x, realvec<double,4>::real_t a);
-  template realvec<double,4> set_elt<realvec<double,4>,1>(realvec<double,4> x, realvec<double,4>::real_t a);
-  template realvec<double,4> set_elt<realvec<double,4>,2>(realvec<double,4> x, realvec<double,4>::real_t a);
-  template realvec<double,4> set_elt<realvec<double,4>,3>(realvec<double,4> x, realvec<double,4>::real_t a);
-  template intvec<double,4> popcount(intvec<double,4>);
+template realvec<double, 4> exp(realvec<double, 4> x);
+template realvec<double, 4> log(realvec<double, 4> x);
+template realvec<double, 4> sin(realvec<double, 4> x);
+template realvec<double, 4> sqrt(realvec<double, 4> x);
+template realvec<double, 4>::real_t
+get_elt<realvec<double, 4>, 0>(realvec<double, 4>);
+template realvec<double, 4>::real_t
+get_elt<realvec<double, 4>, 1>(realvec<double, 4>);
+template realvec<double, 4>::real_t
+get_elt<realvec<double, 4>, 2>(realvec<double, 4>);
+template realvec<double, 4>::real_t
+get_elt<realvec<double, 4>, 3>(realvec<double, 4>);
+template realvec<double, 4>
+set_elt<realvec<double, 4>, 0>(realvec<double, 4> x,
+                               realvec<double, 4>::real_t a);
+template realvec<double, 4>
+set_elt<realvec<double, 4>, 1>(realvec<double, 4> x,
+                               realvec<double, 4>::real_t a);
+template realvec<double, 4>
+set_elt<realvec<double, 4>, 2>(realvec<double, 4> x,
+                               realvec<double, 4>::real_t a);
+template realvec<double, 4>
+set_elt<realvec<double, 4>, 3>(realvec<double, 4> x,
+                               realvec<double, 4>::real_t a);
+template intvec<double, 4> popcount(intvec<double, 4>);
 #endif
-  
 }
 
-
-
 // Various tests to detect auto-vectorization features
 
-
-
 #include <cassert>
 #include <cstdlib>
 using namespace std;
@@ -92,32 +113,25 @@ using namespace std;
 using namespace vecmathlib;
 
 #if defined VECMATHLIB_HAVE_VEC_DOUBLE_4
-typedef realvec<double,4> realV;
+typedef realvec<double, 4> realV;
 #elif defined VECMATHLIB_HAVE_VEC_DOUBLE_2
-typedef realvec<double,2> realV;
+typedef realvec<double, 2> realV;
 #elif defined VECMATHLIB_HAVE_VEC_FLOAT_8
-typedef realvec<float,8> realV;
+typedef realvec<float, 8> realV;
 #elif defined VECMATHLIB_HAVE_VEC_FLOAT_4
-typedef realvec<float,4> realV;
+typedef realvec<float, 4> realV;
 #elif defined VECMATHLIB_HAVE_VEC_FLOAT_2
-typedef realvec<float,2> realV;
+typedef realvec<float, 2> realV;
 #else
-#  error "There are no vector types"
+#error "There are no vector types"
 #endif
 
 typedef realV::scalar_t real;
 const int vecsize = realV::size;
 
-
-
 // Simple, naive loop adding two arrays
-extern "C"
-void loop_add(real* a,
-              real* b,
-              real* c,
-              ptrdiff_t n)
-{
-  for (ptrdiff_t i=0; i<n; i+=vecsize) {
+extern "C" void loop_add(real *a, real *b, real *c, ptrdiff_t n) {
+  for (ptrdiff_t i = 0; i < n; i += vecsize) {
     realV tmpb = realV::loadu(&b[i]);
     realV tmpc = realV::loadu(&c[i]);
     realV tmpa = tmpb + tmpc;
@@ -125,16 +139,10 @@ void loop_add(real* a,
   }
 }
 
-
-
 // Declare pointers as restrict
-extern "C"
-void loop_add_restrict(real *restrict a,
-                       real *restrict b,
-                       real *restrict c,
-                       ptrdiff_t n)
-{
-  for (ptrdiff_t i=0; i<n; i+=vecsize) {
+extern "C" void loop_add_restrict(real *restrict a, real *restrict b,
+                                  real *restrict c, ptrdiff_t n) {
+  for (ptrdiff_t i = 0; i < n; i += vecsize) {
     realV tmpb = realV::loadu(&b[i]);
     realV tmpc = realV::loadu(&c[i]);
     realV tmpa = tmpb + tmpc;
@@ -142,16 +150,10 @@ void loop_add_restrict(real *restrict a,
   }
 }
 
-
-
 // Declare pointers as restrict and aligned
-extern "C"
-void loop_add_aligned(real *restrict a,
-                      real *restrict b,
-                      real *restrict c,
-                      ptrdiff_t n)
-{
-  for (ptrdiff_t i=0; i<n; i+=vecsize) {
+extern "C" void loop_add_aligned(real *restrict a, real *restrict b,
+                                 real *restrict c, ptrdiff_t n) {
+  for (ptrdiff_t i = 0; i < n; i += vecsize) {
     realV tmpb = realV::loada(&b[i]);
     realV tmpc = realV::loada(&c[i]);
     realV tmpa = tmpb + tmpc;
@@ -159,16 +161,11 @@ void loop_add_aligned(real *restrict a,
   }
 }
 
-
-
 // Reduction loop
-extern "C"
-real loop_dot_reduce(real *restrict a,
-                     real *restrict b,
-                     ptrdiff_t n)
-{
+extern "C" real loop_dot_reduce(real *restrict a, real *restrict b,
+                                ptrdiff_t n) {
   realV sumV = 0.0;
-  for (ptrdiff_t i=0; i<n; i+=vecsize) {
+  for (ptrdiff_t i = 0; i < n; i += vecsize) {
     realV tmpa = realV::loada(&a[i]);
     realV tmpb = realV::loada(&b[i]);
     sumV += tmpa * tmpb;
@@ -176,16 +173,10 @@ real loop_dot_reduce(real *restrict a,
   return sum(sumV);
 }
 
-
-
 // Loop with a simple if condition (fmax)
-extern "C"
-void loop_if_simple(real *restrict a,
-                    real *restrict b,
-                    real *restrict c,
-                    ptrdiff_t n)
-{
-  for (ptrdiff_t i=0; i<n; i+=vecsize) {
+extern "C" void loop_if_simple(real *restrict a, real *restrict b,
+                               real *restrict c, ptrdiff_t n) {
+  for (ptrdiff_t i = 0; i < n; i += vecsize) {
     realV tmpb = realV::loada(&b[i]);
     realV tmpc = realV::loada(&c[i]);
     realV tmpa = ifthen(tmpb > tmpc, tmpb, tmpc);
@@ -193,16 +184,10 @@ void loop_if_simple(real *restrict a,
   }
 }
 
-
-
 // Loop with a complex if condition (select)
-extern "C"
-void loop_if(real *restrict a,
-             real *restrict b,
-             real *restrict c,
-             ptrdiff_t n)
-{
-  for (ptrdiff_t i=0; i<n; i+=vecsize) {
+extern "C" void loop_if(real *restrict a, real *restrict b, real *restrict c,
+                        ptrdiff_t n) {
+  for (ptrdiff_t i = 0; i < n; i += vecsize) {
     realV tmpb = realV::loada(&b[i]);
     realV tmpc = realV::loada(&c[i]);
     realV tmpa = ifthen(tmpb > realV(0.0), tmpb * tmpc, realV(1.0));
@@ -210,16 +195,10 @@ void loop_if(real *restrict a,
   }
 }
 
-
-
 // Skip ghost points
-extern "C"
-void loop_add_masked(real *restrict a,
-                     real *restrict b,
-                     real *restrict c,
-                     ptrdiff_t n)
-{
-  for (realV::mask_t mask(1, n-1, 0); mask; ++mask) {
+extern "C" void loop_add_masked(real *restrict a, real *restrict b,
+                                real *restrict c, ptrdiff_t n) {
+  for (realV::mask_t mask(1, n - 1, 0); mask; ++mask) {
     ptrdiff_t i = mask.index();
     realV tmpb = realV::loada(&b[i]);
     realV tmpc = realV::loada(&c[i]);