1 files changed, 115 insertions, 159 deletions
diff --git a/bench.cc b/bench.cc
index e795985..ac3eb46 100644
--- a/bench.cc
+++ b/bench.cc
@@ -16,47 +16,38 @@
 using namespace std;
 using namespace vecmathlib;
 
-
-
 #ifndef __has_builtin
-#  define __has_builtin(x) 0 // Compatibility with non-clang compilers
+#define __has_builtin(x) 0 // Compatibility with non-clang compilers
 #endif
 
-
-
 typedef unsigned long long ticks;
-inline ticks getticks()
-{
+inline ticks getticks() {
 #if __has_builtin(__builtin_readcyclecounter)
   return __builtin_readcyclecounter();
 #elif defined __x86_64__
   ticks a, d;
-  asm volatile("rdtsc" : "=a" (a), "=d" (d));
+  asm volatile("rdtsc" : "=a"(a), "=d"(d));
   return a | (d << 32);
 #elif defined __powerpc__
   unsigned int tbl, tbu, tbu1;
   do {
-    asm volatile("mftbu %0": "=r"(tbu));
-    asm volatile("mftb %0": "=r"(tbl));
-    asm volatile("mftbu %0": "=r"(tbu1));
+    asm volatile("mftbu %0" : "=r"(tbu));
+    asm volatile("mftb %0" : "=r"(tbl));
+    asm volatile("mftbu %0" : "=r"(tbu1));
   } while (tbu != tbu1);
   return ((unsigned long long)tbu << 32) | tbl;
 #else
   timeval tv;
   gettimeofday(&tv, NULL);
   return 1000000ULL * tv.tv_sec + tv.tv_usec;
-  // timespec ts;
-  // clock_gettime(CLOCK_REALTIME, &ts);
-  // return 1000000000ULL * ts.tv_sec + ts.tv_nsec;
+// timespec ts;
+// clock_gettime(CLOCK_REALTIME, &ts);
+// return 1000000000ULL * ts.tv_sec + ts.tv_nsec;
 #endif
 }
-inline double elapsed(ticks t1, ticks t0)
-{
-  return t1-t0;
-}
+inline double elapsed(ticks t1, ticks t0) { return t1 - t0; }
 
-double get_sys_time()
-{
+double get_sys_time() {
   timeval tv;
   gettimeofday(&tv, NULL);
   return tv.tv_sec + 1.0e-6 * tv.tv_usec;
@@ -65,8 +56,7 @@ double get_sys_time()
   // return ts.tv_sec + 1.0e-9 * ts.tv_nsec;
 }
 
-double measure_tick()
-{
+double measure_tick() {
   ticks const rstart = getticks();
   double const wstart = get_sys_time();
   while (get_sys_time() - wstart < 0.1) {
@@ -74,124 +64,103 @@ double measure_tick()
   }
   ticks const rend = getticks();
   double const wend = get_sys_time();
-  assert(wend-wstart >= 0.09);
+  assert(wend - wstart >= 0.09);
   return (wend - wstart) / elapsed(rend, rstart);
 }
 
-
-
 double global_result = 0.0;
-template<typename realvec_t>
-void save_result(realvec_t result)
-{
-  for (int i=0; i<realvec_t::size; ++i) {
+template <typename realvec_t> void save_result(realvec_t result) {
+  for (int i = 0; i < realvec_t::size; ++i) {
     global_result += result[i];
   }
   // Check global accumulator to prevent optimisation
-  if (! vml_std::isfinite(global_result)) {
+  if (!vml_std::isfinite(global_result)) {
     cout << "\n"
          << "WARNING: Global accumulator is not finite\n";
   }
 }
 
+template <typename T> inline T nop(T x) { return x; }
 
+template <typename T> inline T fneg(T x) { return -x; }
 
-template<typename T> inline T nop(T x) { return x; }
-
-template<typename T> inline T fneg(T x) { return -x; }
+template <typename T> inline T fadd(T x, T y) { return x + y; }
+template <typename T> inline T fsub(T x, T y) { return x - y; }
+template <typename T> inline T fmul(T x, T y) { return x * y; }
+template <typename T> inline T fdiv(T x, T y) { return x / y; }
 
-template<typename T> inline T fadd(T x, T y) { return x+y; }
-template<typename T> inline T fsub(T x, T y) { return x-y; }
-template<typename T> inline T fmul(T x, T y) { return x*y; }
-template<typename T> inline T fdiv(T x, T y) { return x/y; }
-
-template<typename T> inline T frexp0(T x)
-{
+template <typename T> inline T frexp0(T x) {
   typename T::intvec_t ir;
   return frexp(x, &ir);
 }
-template<typename T> inline typename T::intvec_t frexp1(T x)
-{
+template <typename T> inline typename T::intvec_t frexp1(T x) {
   typename T::intvec_t ir;
   frexp(x, &ir);
   return ir;
 }
 
-template<typename T> inline T ldexps(T x, T y)
-{
+template <typename T> inline T ldexps(T x, T y) {
   typename T::intvec_t iy = convert_int(y);
   return ldexp(x, iy[0]);
 }
-template<typename T> inline T ldexpv(T x, T y)
-{
+template <typename T> inline T ldexpv(T x, T y) {
   typename T::intvec_t iy = convert_int(y);
   return ldexp(x, iy);
 }
 
-
-
-#define DECLARE_FUNCTOR(FUNC, XMIN, XMAX)                       \
-  template<typename T>                                          \
-  struct functor_##FUNC {                                       \
-    static typename T::real_t get_xmin() { return XMIN; }       \
-    static typename T::real_t get_xmax() { return XMAX; }       \
-    static const char* name() { return #FUNC; }                 \
-    T operator()(T x) {                                         \
-      return FUNC(x);                                           \
-    }                                                           \
+#define DECLARE_FUNCTOR(FUNC, XMIN, XMAX)                                      \
+  template <typename T> struct functor_##FUNC {                                \
+    static typename T::real_t get_xmin() { return XMIN; }                      \
+    static typename T::real_t get_xmax() { return XMAX; }                      \
+    static const char *name() { return #FUNC; }                                \
+    T operator()(T x) { return FUNC(x); }                                      \
   }
 
-#define DECLARE_BFUNCTOR(FUNC, XMIN, XMAX)                      \
-  template<typename T>                                          \
-  struct functor_##FUNC {                                       \
-    static typename T::real_t get_xmin() { return XMIN; }       \
-    static typename T::real_t get_xmax() { return XMAX; }       \
-    static const char* name() { return #FUNC; }                 \
-    T operator()(T x) {                                         \
-      typename T::boolvec_t res = FUNC(x);                      \
-      return convert_float(convert_int(res));                   \
-    }                                                           \
+#define DECLARE_BFUNCTOR(FUNC, XMIN, XMAX)                                     \
+  template <typename T> struct functor_##FUNC {                                \
+    static typename T::real_t get_xmin() { return XMIN; }                      \
+    static typename T::real_t get_xmax() { return XMAX; }                      \
+    static const char *name() { return #FUNC; }                                \
+    T operator()(T x) {                                                        \
+      typename T::boolvec_t res = FUNC(x);                                     \
+      return convert_float(convert_int(res));                                  \
+    }                                                                          \
   }
 
-#define DECLARE_IFUNCTOR(FUNC, XMIN, XMAX)                      \
-  template<typename T>                                          \
-  struct functor_##FUNC {                                       \
-    static typename T::real_t get_xmin() { return XMIN; }       \
-    static typename T::real_t get_xmax() { return XMAX; }       \
-    static const char* name() { return #FUNC; }                 \
-    T operator()(T x) {                                         \
-      typename T::intvec_t res = FUNC(x);                       \
-      return convert_float(res);                                \
-    }                                                           \
+#define DECLARE_IFUNCTOR(FUNC, XMIN, XMAX)                                     \
+  template <typename T> struct functor_##FUNC {                                \
+    static typename T::real_t get_xmin() { return XMIN; }                      \
+    static typename T::real_t get_xmax() { return XMAX; }                      \
+    static const char *name() { return #FUNC; }                                \
+    T operator()(T x) {                                                        \
+      typename T::intvec_t res = FUNC(x);                                      \
+      return convert_float(res);                                               \
+    }                                                                          \
   }
 
-#define DECLARE_FUNCTOR2(FUNC, XMIN, XMAX, YOFFSET)             \
-  template<typename T>                                          \
-  struct functor_##FUNC {                                       \
-    static typename T::real_t get_xmin() { return XMIN; }       \
-    static typename T::real_t get_xmax() { return XMAX; }       \
-    static const char* name() { return #FUNC; }                 \
-    T operator()(T x) {                                         \
-      const typename T::real_t yoffset = YOFFSET;               \
-      return FUNC(x, x + T(yoffset));                           \
-    }                                                           \
+#define DECLARE_FUNCTOR2(FUNC, XMIN, XMAX, YOFFSET)                            \
+  template <typename T> struct functor_##FUNC {                                \
+    static typename T::real_t get_xmin() { return XMIN; }                      \
+    static typename T::real_t get_xmax() { return XMAX; }                      \
+    static const char *name() { return #FUNC; }                                \
+    T operator()(T x) {                                                        \
+      const typename T::real_t yoffset = YOFFSET;                              \
+      return FUNC(x, x + T(yoffset));                                          \
+    }                                                                          \
   }
 
-#define DECLARE_FUNCTOR3(FUNC, XMIN, XMAX, YOFFSET, ZOFFSET)    \
-  template<typename T>                                          \
-  struct functor_##FUNC {                                       \
-    static typename T::real_t get_xmin() { return XMIN; }       \
-    static typename T::real_t get_xmax() { return XMAX; }       \
-    static const char* name() { return #FUNC; }                 \
-    T operator()(T x) {                                         \
-      const typename T::real_t yoffset = YOFFSET;               \
-      const typename T::real_t zoffset = ZOFFSET;               \
-      return FUNC(x, x + T(yoffset), x + T(zoffset));           \
-    }                                                           \
+#define DECLARE_FUNCTOR3(FUNC, XMIN, XMAX, YOFFSET, ZOFFSET)                   \
+  template <typename T> struct functor_##FUNC {                                \
+    static typename T::real_t get_xmin() { return XMIN; }                      \
+    static typename T::real_t get_xmax() { return XMAX; }                      \
+    static const char *name() { return #FUNC; }                                \
+    T operator()(T x) {                                                        \
+      const typename T::real_t yoffset = YOFFSET;                              \
+      const typename T::real_t zoffset = ZOFFSET;                              \
+      return FUNC(x, x + T(yoffset), x + T(zoffset));                          \
+    }                                                                          \
   }
 
-
-
 DECLARE_FUNCTOR(nop, 0.0, 1.0);
 
 DECLARE_FUNCTOR(fneg, 0.0, 1.0);
@@ -252,137 +221,127 @@ DECLARE_FUNCTOR(tan, 0.0, 1.0);
 DECLARE_FUNCTOR(tanh, -1.0, +1.0);
 DECLARE_FUNCTOR(trunc, -1.0, +1.0);
 
-
-
-template<typename realvec_t, template<typename> class func_t>
-double run_bench()
-{
+template <typename realvec_t, template <typename> class func_t>
+double run_bench() {
   const int numiters = 1000000;
-  
+
   typedef typename realvec_t::real_t real_t;
   const real_t xmin = func_t<realvec_t>::get_xmin();
   const real_t xmax = func_t<realvec_t>::get_xmax();
   realvec_t x0, dx;
-  for (int i=0; i<realvec_t::size; ++i) {
+  for (int i = 0; i < realvec_t::size; ++i) {
     x0.set_elt(i, xmin + (xmax - xmin) / numiters * i / realvec_t::size);
     dx.set_elt(i, (xmax - xmin) / numiters);
   }
   realvec_t x, y;
   ticks t0, t1;
   double const cycles_per_tick = 1.0; // measure_tick();
-  
+
   func_t<realvec_t> func;
   t0 = getticks();
   x = y = x0;
-  for (int n=0; n<numiters; ++n) {
+  for (int n = 0; n < numiters; ++n) {
     y += func(x);
     x += dx;
   }
   t1 = getticks();
   save_result(y);
-  
-  return cycles_per_tick * elapsed(t1,t0) * realvec_t::size / numiters;
+
+  return cycles_per_tick * elapsed(t1, t0) * realvec_t::size / numiters;
 }
 
-template<typename realvec_t, template<typename> class func_t>
-void bench_type_func()
-{
-  cout << "   "
-       << setw(-5) << func_t<realvec_t>::name() << " "
-       << setw(18) << realvec_t::name() << ": " << flush;
+template <typename realvec_t, template <typename> class func_t>
+void bench_type_func() {
+  cout << "   " << setw(-5) << func_t<realvec_t>::name() << " " << setw(18)
+       << realvec_t::name() << ": " << flush;
   double const cycles = run_bench<realvec_t, func_t>();
   cout << cycles << " cycles\n" << flush;
 }
 
-template<template<typename> class func_t>
-void bench_func()
-{
+template <template <typename> class func_t> void bench_func() {
   cout << "\n"
        << "Benchmarking " << func_t<float32_vec>().name() << ":\n";
-  
+
   // Note: We benchmark neither testvec (since this is known to be
   // slow), nor builtinvec (since this has about the same performance
   // as pseudovec, and is also not very efficient).
-  
-  bench_type_func<realpseudovec<float,1>, func_t>();
+
+  bench_type_func<realpseudovec<float, 1>, func_t>();
 #ifdef __clang__
-  bench_type_func<realbuiltinvec<float,1>, func_t>();
+  bench_type_func<realbuiltinvec<float, 1>, func_t>();
 #endif
-  bench_type_func<realtestvec<float,1>, func_t>();
+  bench_type_func<realtestvec<float, 1>, func_t>();
 #ifdef VECMATHLIB_HAVE_VEC_FLOAT_1
-  bench_type_func<realvec<float,1>, func_t>();
+  bench_type_func<realvec<float, 1>, func_t>();
 #endif
 #ifdef VECMATHLIB_HAVE_VEC_FLOAT_2
-  bench_type_func<realpseudovec<float,2>, func_t>();
+  bench_type_func<realpseudovec<float, 2>, func_t>();
 #ifdef __clang__
-  bench_type_func<realbuiltinvec<float,2>, func_t>();
+  bench_type_func<realbuiltinvec<float, 2>, func_t>();
 #endif
   // bench_type_func<realtestvec<float,2>, func_t>();
-  bench_type_func<realvec<float,2>, func_t>();
+  bench_type_func<realvec<float, 2>, func_t>();
 #endif
 #ifdef VECMATHLIB_HAVE_VEC_FLOAT_4
-  bench_type_func<realpseudovec<float,4>, func_t>();
+  bench_type_func<realpseudovec<float, 4>, func_t>();
 #ifdef __clang__
-  bench_type_func<realbuiltinvec<float,4>, func_t>();
+  bench_type_func<realbuiltinvec<float, 4>, func_t>();
 #endif
   // bench_type_func<realtestvec<float,4>, func_t>();
-  bench_type_func<realvec<float,4>, func_t>();
+  bench_type_func<realvec<float, 4>, func_t>();
 #endif
 #ifdef VECMATHLIB_HAVE_VEC_FLOAT_8
-  bench_type_func<realpseudovec<float,8>, func_t>();
+  bench_type_func<realpseudovec<float, 8>, func_t>();
 #ifdef __clang__
-  bench_type_func<realbuiltinvec<float,8>, func_t>();
+  bench_type_func<realbuiltinvec<float, 8>, func_t>();
 #endif
   // bench_type_func<realtestvec<float,8>, func_t>();
-  bench_type_func<realvec<float,8>, func_t>();
+  bench_type_func<realvec<float, 8>, func_t>();
 #endif
-  
-  bench_type_func<realpseudovec<double,1>, func_t>();
+
+  bench_type_func<realpseudovec<double, 1>, func_t>();
 #ifdef __clang__
-  bench_type_func<realbuiltinvec<double,1>, func_t>();
+  bench_type_func<realbuiltinvec<double, 1>, func_t>();
 #endif
-  bench_type_func<realtestvec<double,1>, func_t>();
+  bench_type_func<realtestvec<double, 1>, func_t>();
 #ifdef VECMATHLIB_HAVE_VEC_DOUBLE_1
-  bench_type_func<realvec<double,1>, func_t>();
+  bench_type_func<realvec<double, 1>, func_t>();
 #endif
 #ifdef VECMATHLIB_HAVE_VEC_DOUBLE_2
-  bench_type_func<realpseudovec<double,2>, func_t>();
+  bench_type_func<realpseudovec<double, 2>, func_t>();
 #ifdef __clang__
-  bench_type_func<realbuiltinvec<double,2>, func_t>();
+  bench_type_func<realbuiltinvec<double, 2>, func_t>();
 #endif
   // bench_type_func<realtestvec<double,2>, func_t>();
-  bench_type_func<realvec<double,2>, func_t>();
+  bench_type_func<realvec<double, 2>, func_t>();
 #endif
 #ifdef VECMATHLIB_HAVE_VEC_DOUBLE_4
-  bench_type_func<realpseudovec<double,4>, func_t>();
+  bench_type_func<realpseudovec<double, 4>, func_t>();
 #ifdef __clang__
-  bench_type_func<realbuiltinvec<double,4>, func_t>();
+  bench_type_func<realbuiltinvec<double, 4>, func_t>();
 #endif
   // bench_type_func<realtestvec<double,4>, func_t>();
-  bench_type_func<realvec<double,4>, func_t>();
+  bench_type_func<realvec<double, 4>, func_t>();
 #endif
 #ifdef VECMATHLIB_HAVE_VEC_DOUBLE_8
-  bench_type_func<realpseudovec<double,8>, func_t>();
+  bench_type_func<realpseudovec<double, 8>, func_t>();
 #ifdef __clang__
-  bench_type_func<realbuiltinvec<double,8>, func_t>();
+  bench_type_func<realbuiltinvec<double, 8>, func_t>();
 #endif
   // bench_type_func<realtestvec<double,8>, func_t>();
-  bench_type_func<realvec<double,8>, func_t>();
+  bench_type_func<realvec<double, 8>, func_t>();
 #endif
 }
 
-
-
-void bench()
-{
+void bench() {
   bench_func<functor_nop>();
-  
+
   bench_func<functor_fneg>();
   bench_func<functor_fadd>();
   bench_func<functor_fsub>();
   bench_func<functor_fmul>();
   bench_func<functor_fdiv>();
-  
+
   bench_func<functor_acos>();
   bench_func<functor_acosh>();
   bench_func<functor_asin>();
@@ -436,10 +395,7 @@ void bench()
   bench_func<functor_trunc>();
 }
 
-
-
-int main(int argc, char** argv)
-{
+int main(int argc, char **argv) {
   cout << "Benchmarking math functions:\n";
   bench();
   return 0;