1 files changed, 877 insertions, 864 deletions
diff --git a/selftest.cc b/selftest.cc
index 4296f14..334d95f 100644
--- a/selftest.cc
+++ b/selftest.cc
@@ -14,22 +14,17 @@
 
 using namespace std;
 
-
-  
 int num_errors = 0;
 
+template <typename realvec_t> struct vecmathlib_test {
 
-
-template<typename realvec_t>
-struct vecmathlib_test {
-  
   typedef typename realvec_t::boolvec_t boolvec_t;
   typedef typename realvec_t::intvec_t intvec_t;
-  
+
   typedef typename realvec_t::int_t int_t;
   typedef typename realvec_t::uint_t uint_t;
   typedef typename realvec_t::real_t real_t;
-  
+
   // Short names for type casts
   typedef real_t R;
   typedef int_t I;
@@ -37,16 +32,13 @@ struct vecmathlib_test {
   typedef realvec_t RV;
   typedef intvec_t IV;
   typedef boolvec_t BV;
-  
+
   typedef vecmathlib::floatprops<real_t> FP;
   typedef vecmathlib::mathfuncs<realvec_t> MF;
-  
-  
-  
+
   // Test each function with this many random values
   static const int imax = 10000;
-  static real_t accuracy(real_t ulp = R(0.5))
-  {
+  static real_t accuracy(real_t ulp = R(0.5)) {
 #ifdef VML_HAVE_FP_CONTRACT
     // Require that 100% of the digits are correct
     // real_t digit_fraction = 1.0;
@@ -56,526 +48,451 @@ struct vecmathlib_test {
     // Require that 80% of the digits are correct
     real_t digit_fraction = 0.8;
 #endif
-    digit_fraction *= 0.95;     // some lenience for testing (why?)
+    digit_fraction *= 0.95; // some lenience for testing (why?)
     return pow(ulp * realvec_t::epsilon(), digit_fraction);
   }
-  
-  
-  
-  static realvec_t random(const real_t xmin, const real_t xmax)
-  {
+
+  static realvec_t random(const real_t xmin, const real_t xmax) {
     realvec_t x;
-    for (int i=0; i<realvec_t::size; ++i) {
-      const real_t r =
-        (xmax - xmin) * FP::convert_float(rand()) / FP::convert_float(RAND_MAX);
+    for (int i = 0; i < realvec_t::size; ++i) {
+      const real_t r = (xmax - xmin) * FP::convert_float(rand()) /
+                       FP::convert_float(RAND_MAX);
       x.set_elt(i, xmin + r);
     }
     return x;
   }
-  
-  static intvec_t random(const int_t nmin, const int_t nmax)
-  {
+
+  static intvec_t random(const int_t nmin, const int_t nmax) {
     intvec_t n;
-    for (int i=0; i<intvec_t::size; ++i) {
-      const real_t r =
-        R(nmax - nmin + 1) * R(rand()) / (R(RAND_MAX) + R(1.0));
+    for (int i = 0; i < intvec_t::size; ++i) {
+      const real_t r = R(nmax - nmin + 1) * R(rand()) / (R(RAND_MAX) + R(1.0));
       n.set_elt(i, nmin + FP::convert_int(floor(r)));
     }
     return n;
   }
-  
-  
-  
-  static bool is_big_endian()
-  {
+
+  static bool is_big_endian() {
     const int i = 1;
     unsigned char cs[sizeof i];
     memcpy(cs, &i, sizeof i);
-    return cs[0]==0;
+    return cs[0] == 0;
   }
-  
-  template<typename T>
-  static string hex(const T x)
-  {
+
+  template <typename T> static string hex(const T x) {
     unsigned char cs[sizeof x];
     memcpy(cs, &x, sizeof x);
     ostringstream buf;
     buf << "0x";
-    const char* const hexdigits = "0123456789abcdef";
+    const char *const hexdigits = "0123456789abcdef";
     const int n0 = is_big_endian() ? 0 : sizeof x - 1;
     const int dn = is_big_endian() ? +1 : -1;
     const int n1 = n0 + sizeof x * dn;
-    for (int n=n0; n!=n1; n+=dn) {
-      buf << hexdigits[cs[n]>>4] << hexdigits[cs[n]&15];
+    for (int n = n0; n != n1; n += dn) {
+      buf << hexdigits[cs[n] >> 4] << hexdigits[cs[n] & 15];
     }
     return buf.str();
   }
-  
-  
-  
-  static boolvec_t supported(realvec_t x)
-  {
-    return x==RV(0.0) || MF::vml_ieee_isnormal(x)
+
+  static boolvec_t supported(realvec_t x) {
+    return x == RV(0.0) || MF::vml_ieee_isnormal(x)
 #ifdef VML_HAVE_DENORMALS
-      || MF::vml_ieee_isfinite(x)
+           || MF::vml_ieee_isfinite(x)
 #endif
 #ifdef VML_HAVE_INF
-      || MF::vml_ieee_isinf(x)
+           || MF::vml_ieee_isinf(x)
 #endif
 #ifdef VML_HAVE_NAN
-      || MF::vml_ieee_isnan(x)
+           || MF::vml_ieee_isnan(x)
 #endif
-      ;
-  }
-  
-  static boolvec_t supported(intvec_t x)
-  {
-    return true;
-  }
-  
-  static boolvec_t supported(boolvec_t x)
-  {
-    return true;
+        ;
   }
-  
-  
-  
+
+  static boolvec_t supported(intvec_t x) { return true; }
+
+  static boolvec_t supported(boolvec_t x) { return true; }
+
   // Check load memory access
-  static void check_mem(const char* const func,
-                        const realvec_t x,
-                        const real_t* const p,
-                        const realvec_t xold,
-                        const int mval)
-  {
+  static void check_mem(const char *const func, const realvec_t x,
+                        const real_t *const p, const realvec_t xold,
+                        const int mval) {
     realvec_t xwant;
-    for (int i=0; i<realvec_t::size; ++i) {
-      xwant.set_elt(i, mval & (1<<i) ? p[i] : xold[i]);
+    for (int i = 0; i < realvec_t::size; ++i) {
+      xwant.set_elt(i, mval & (1 << i) ? p[i] : xold[i]);
     }
     const boolvec_t isbad = x != xwant;
     if (any(isbad)) {
-      ++ num_errors;
-      cout << setprecision(realvec_t::digits10+2)
-           << "Error in " << func << ":\n"
+      ++num_errors;
+      cout << setprecision(realvec_t::digits10 + 2) << "Error in " << func
+           << ":\n"
            << "   found=" << x << " [" << hex(x) << "]\n"
            << "   expected=" << xwant << " [" << hex(xwant) << "]\n"
            << "   mval=" << mval << " [" << hex(mval) << "]\n"
-           << "   isbad=" << isbad << "\n"
-           << flush;
+           << "   isbad=" << isbad << "\n" << flush;
     }
   }
-  
+
   // Check store memory access
-  static void check_mem(const char* const func,
-                        const real_t* const p,
-                        const realvec_t x,
-                        const real_t* const pold,
-                        const int mval)
-  {
+  static void check_mem(const char *const func, const real_t *const p,
+                        const realvec_t x, const real_t *const pold,
+                        const int mval) {
     realvec_t pv, pvwant;
-    for (int i=0; i<realvec_t::size; ++i) {
+    for (int i = 0; i < realvec_t::size; ++i) {
       pv.set_elt(i, p[i]);
-      pvwant.set_elt(i, mval & (1<<i) ? x[i] : pold[i]);
+      pvwant.set_elt(i, mval & (1 << i) ? x[i] : pold[i]);
     }
     const boolvec_t isbad = pv != pvwant;
     if (any(isbad)) {
-      ++ num_errors;
-      cout << setprecision(realvec_t::digits10+2)
-           << "Error in " << func << ":\n"
+      ++num_errors;
+      cout << setprecision(realvec_t::digits10 + 2) << "Error in " << func
+           << ":\n"
            << "   found=" << pv << " [" << hex(pv) << "]\n"
            << "   expected=" << pvwant << " [" << hex(pvwant) << "]\n"
-           << "   isbad=" << isbad << "\n"
-           << flush;
+           << "   isbad=" << isbad << "\n" << flush;
     }
   }
-  
-  static void check_bool(const char* const func,
-                         const bool rstd, const bool rvml)
-  {
+
+  static void check_bool(const char *const func, const bool rstd,
+                         const bool rvml) {
     const bool dr = rstd ^ rvml;
     const bool isbad = dr;
     if (isbad) {
-      ++ num_errors;
+      ++num_errors;
       cout << "Error in " << func << ":\n"
            << "   fstd()=" << rstd << " [" << hex(rstd) << "]\n"
            << "   fvml()=" << rvml << " [" << hex(rvml) << "]\n"
-           << "   isbad()=" << isbad << "\n"
-           << flush;
+           << "   isbad()=" << isbad << "\n" << flush;
     }
   }
-  
-  template<typename A>
-  static void check_bool(const char* const func,
-                         const bool rstd, const bool rvml, const A x)
-  {
+
+  template <typename A>
+  static void check_bool(const char *const func, const bool rstd,
+                         const bool rvml, const A x) {
     const bool dr = rstd ^ rvml;
     const bool isbad = dr;
     if (isbad) {
-      ++ num_errors;
+      ++num_errors;
       cout << "Error in " << func << ":\n"
            << "   x=" << x << " [" << hex(x) << "]\n"
            << "   fstd(x)=" << rstd << " [" << hex(rstd) << "]\n"
            << "   fvml(x)=" << rvml << " [" << hex(rvml) << "]\n"
-           << "   isbad(x)=" << isbad << "\n"
-           << flush;
+           << "   isbad(x)=" << isbad << "\n" << flush;
     }
   }
-  
-  template<typename A>
-  static void check_bool(const char* const func,
-                         const boolvec_t rstd, const boolvec_t rvml,
-                         const A x)
-  {
+
+  template <typename A>
+  static void check_bool(const char *const func, const boolvec_t rstd,
+                         const boolvec_t rvml, const A x) {
     boolvec_t dr;
     bool isbad = false;
-    for (int i=0; i<realvec_t::size; ++i) {
+    for (int i = 0; i < realvec_t::size; ++i) {
       dr.set_elt(i, rstd[i] ^ rvml[i]);
       isbad |= dr[i];
     }
     if (isbad) {
-      ++ num_errors;
+      ++num_errors;
       cout << "Error in " << func << ":\n"
            << "   x=" << x << " [" << hex(x) << "]\n"
            << "   fstd(x)=" << rstd << " [" << hex(rstd) << "]\n"
            << "   fvml(x)=" << rvml << " [" << hex(rvml) << "]\n"
            << "   error(x)=" << dr << " [" << hex(rvml) << "]\n"
-           << "   isbad(x)=" << isbad << "\n"
-           << flush;
+           << "   isbad(x)=" << isbad << "\n" << flush;
     }
   }
-  
-  template<typename A, typename B>
-  static void check_bool(const char* const func,
-                         const boolvec_t rstd, const boolvec_t rvml,
-                         const A x, const B y)
-  {
+
+  template <typename A, typename B>
+  static void check_bool(const char *const func, const boolvec_t rstd,
+                         const boolvec_t rvml, const A x, const B y) {
     boolvec_t dr;
     bool isbad = false;
-    for (int i=0; i<realvec_t::size; ++i) {
+    for (int i = 0; i < realvec_t::size; ++i) {
       dr.set_elt(i, rstd[i] ^ rvml[i]);
       isbad |= dr[i];
     }
     if (isbad) {
-      ++ num_errors;
+      ++num_errors;
       cout << "Error in " << func << ":\n"
            << "   x=" << x << " [" << hex(x) << "]\n"
            << "   y=" << y << " [" << hex(y) << "]\n"
            << "   fstd(x,y)=" << rstd << " [" << hex(rstd) << "]\n"
            << "   fvml(x,y)=" << rvml << " [" << hex(rvml) << "]\n"
            << "   error(x,y)=" << dr << " [" << hex(rvml) << "]\n"
-           << "   isbad(x,y)=" << isbad << "\n"
-           << flush;
+           << "   isbad(x,y)=" << isbad << "\n" << flush;
     }
   }
-  
-  template<typename A>
-  static void check_bool(const char* const func,
-                         bool fstd(typename A::scalar_t x),
-                         boolvec_t fvml(A x),
-                         const A x)
-  {
+
+  template <typename A>
+  static void check_bool(const char *const func,
+                         bool fstd(typename A::scalar_t x), boolvec_t fvml(A x),
+                         const A x) {
     boolvec_t rstd;
-    for (int i=0; i<boolvec_t::size; ++i) {
+    for (int i = 0; i < boolvec_t::size; ++i) {
       rstd.set_elt(i, fstd(x[i]));
     }
     const boolvec_t rvml = fvml(x);
     const boolvec_t dr = rstd != rvml;
     const boolvec_t isbad = supported(x) && supported(rstd) && dr;
     if (any(isbad)) {
-      ++ num_errors;
-      cout << setprecision(realvec_t::digits10+2)
-           << "Error in " << func << ":\n"
+      ++num_errors;
+      cout << setprecision(realvec_t::digits10 + 2) << "Error in " << func
+           << ":\n"
            << "   x=" << x << " [" << hex(x) << "]\n"
            << "   fstd(x)=" << rstd << " [" << hex(rstd) << "]\n"
            << "   fvml(x)=" << rvml << " [" << hex(rvml) << "]\n"
            << "   error(x)=" << dr << " [" << hex(dr) << "]\n"
-           << "   isbad(x)=" << isbad << "\n"
-           << flush;
+           << "   isbad(x)=" << isbad << "\n" << flush;
     }
   }
-  
-  template<typename A, typename B>
-  static void check_bool(const char* const func,
+
+  template <typename A, typename B>
+  static void check_bool(const char *const func,
                          bool fstd(typename A::scalar_t x,
                                    typename B::scalar_t y),
-                         boolvec_t fvml(A x, B y),
-                         const A x, const B y)
-  {
+                         boolvec_t fvml(A x, B y), const A x, const B y) {
     boolvec_t rstd;
-    for (int i=0; i<boolvec_t::size; ++i) {
+    for (int i = 0; i < boolvec_t::size; ++i) {
       rstd.set_elt(i, fstd(x[i], y[i]));
     }
     const boolvec_t rvml = fvml(x, y);
     const boolvec_t dr = rstd != rvml;
     const boolvec_t isbad = supported(x) && supported(rstd) && dr;
     if (any(isbad)) {
-      ++ num_errors;
-      cout << setprecision(realvec_t::digits10+2)
-           << "Error in " << func << ":\n"
+      ++num_errors;
+      cout << setprecision(realvec_t::digits10 + 2) << "Error in " << func
+           << ":\n"
            << "   x=" << x << " [" << hex(x) << "]\n"
            << "   y=" << y << " [" << hex(y) << "]\n"
            << "   fstd(x,y)=" << rstd << " [" << hex(rstd) << "]\n"
            << "   fvml(x,y)=" << rvml << " [" << hex(rvml) << "]\n"
            << "   error(x,y)=" << dr << " [" << hex(dr) << "]\n"
-           << "   isbad(x,y)=" << isbad << "\n"
-           << flush;
+           << "   isbad(x,y)=" << isbad << "\n" << flush;
     }
   }
-  
-  template<typename A, typename B, typename C>
-  static void check_bool(const char* const func,
-                         bool fstd(typename A::scalar_t x,
-                                   typename B::scalar_t y,
-                                   typename C::scalar_t z),
-                         boolvec_t fvml(A x, B y, C z),
-                         const A x, const B y, const C z)
-  {
+
+  template <typename A, typename B, typename C>
+  static void
+  check_bool(const char *const func,
+             bool fstd(typename A::scalar_t x, typename B::scalar_t y,
+                       typename C::scalar_t z),
+             boolvec_t fvml(A x, B y, C z), const A x, const B y, const C z) {
     boolvec_t rstd;
-    for (int i=0; i<boolvec_t::size; ++i) {
+    for (int i = 0; i < boolvec_t::size; ++i) {
       rstd.set_elt(i, fstd(x[i], y[i], z[i]));
     }
     const boolvec_t rvml = fvml(x, y, z);
     const boolvec_t dr = rstd != rvml;
     const boolvec_t isbad = supported(x) && supported(rstd) && dr;
     if (any(isbad)) {
-      ++ num_errors;
-      cout << setprecision(realvec_t::digits10+2)
-           << "Error in " << func << ":\n"
+      ++num_errors;
+      cout << setprecision(realvec_t::digits10 + 2) << "Error in " << func
+           << ":\n"
            << "   x=" << x << " [" << hex(x) << "]\n"
            << "   y=" << y << " [" << hex(y) << "]\n"
            << "   z=" << z << " [" << hex(z) << "]\n"
            << "   fstd(x,y,z)=" << rstd << " [" << hex(rstd) << "]\n"
            << "   fvml(x,y,z)=" << rvml << " [" << hex(rvml) << "]\n"
            << "   error(x,y,z)=" << dr << " [" << hex(dr) << "]\n"
-           << "   isbad(x,y,z)=" << isbad << "\n"
-           << flush;
+           << "   isbad(x,y,z)=" << isbad << "\n" << flush;
     }
   }
-  
-  static void check_int(const char* const func,
-                        const int_t rstd, const int_t rvml)
-  {
+
+  static void check_int(const char *const func, const int_t rstd,
+                        const int_t rvml) {
     const int_t dr = rstd - rvml;
     const bool isbad = dr;
     if (isbad) {
-      ++ num_errors;
-      cout << setprecision(realvec_t::digits10+2)
-           << "Error in " << func << ":\n"
+      ++num_errors;
+      cout << setprecision(realvec_t::digits10 + 2) << "Error in " << func
+           << ":\n"
            << "   fstd()=" << rstd << " [" << hex(rstd) << "]\n"
            << "   fvml()=" << rvml << " [" << hex(rvml) << "]\n"
            << "   error()=" << dr << " [" << hex(dr) << "]\n"
-           << "   isbad()=" << isbad << "\n"
-           << flush;
+           << "   isbad()=" << isbad << "\n" << flush;
     }
   }
-  
-  template<typename A>
-  static void check_int(const char* const func,
-                        int_t fstd(typename A::scalar_t x),
-                        intvec_t fvml(A x),
-                        const A x)
-  {
+
+  template <typename A>
+  static void check_int(const char *const func,
+                        int_t fstd(typename A::scalar_t x), intvec_t fvml(A x),
+                        const A x) {
     intvec_t rstd;
-    for (int i=0; i<intvec_t::size; ++i) {
+    for (int i = 0; i < intvec_t::size; ++i) {
       rstd.set_elt(i, fstd(x[i]));
     }
     const intvec_t rvml = fvml(x);
     const intvec_t dr = rstd - rvml;
     const boolvec_t isbad = supported(x) && supported(rstd) && convert_bool(dr);
     if (any(isbad)) {
-      ++ num_errors;
-      cout << setprecision(realvec_t::digits10+2)
-           << "Error in " << func << ":\n"
+      ++num_errors;
+      cout << setprecision(realvec_t::digits10 + 2) << "Error in " << func
+           << ":\n"
            << "   x=" << x << " [" << hex(x) << "]\n"
            << "   fstd(x)=" << rstd << " [" << hex(rstd) << "]\n"
            << "   fvml(x)=" << rvml << " [" << hex(rvml) << "]\n"
            << "   error(x)=" << dr << " [" << hex(dr) << "]\n"
-           << "   isbad(x)=" << isbad << "\n"
-           << flush;
+           << "   isbad(x)=" << isbad << "\n" << flush;
     }
   }
-  
-  template<typename A, typename B>
-  static void check_int(const char* const func,
+
+  template <typename A, typename B>
+  static void check_int(const char *const func,
                         int_t fstd(typename A::scalar_t x, B y),
-                        intvec_t fvml(A x, B y),
-                        const A x, const B y)
-  {
+                        intvec_t fvml(A x, B y), const A x, const B y) {
     intvec_t rstd;
-    for (int i=0; i<intvec_t::size; ++i) {
+    for (int i = 0; i < intvec_t::size; ++i) {
       rstd.set_elt(i, fstd(x[i], y));
     }
     const intvec_t rvml = fvml(x, y);
     const intvec_t dr = rstd - rvml;
     const boolvec_t isbad = supported(x) && supported(rstd) && convert_bool(dr);
     if (any(isbad)) {
-      ++ num_errors;
-      cout << setprecision(realvec_t::digits10+2)
-           << "Error in " << func << ":\n"
+      ++num_errors;
+      cout << setprecision(realvec_t::digits10 + 2) << "Error in " << func
+           << ":\n"
            << "   x=" << x << " [" << hex(x) << "]\n"
            << "   y=" << y << " [" << hex(y) << "]\n"
            << "   fstd(x,y)=" << rstd << " [" << hex(rstd) << "]\n"
            << "   fvml(x,y)=" << rvml << " [" << hex(rvml) << "]\n"
            << "   error(x,y)=" << dr << " [" << hex(dr) << "]\n"
-           << "   isbad(x,y)=" << isbad << "\n"
-           << flush;
+           << "   isbad(x,y)=" << isbad << "\n" << flush;
     }
   }
-  
-  template<typename A, typename B>
-  static void check_int(const char* const func,
+
+  template <typename A, typename B>
+  static void check_int(const char *const func,
                         int_t fstd(typename A::scalar_t x,
                                    typename B::scalar_t y),
-                        intvec_t fvml(A x, B y),
-                        const A x, const B y)
-  {
+                        intvec_t fvml(A x, B y), const A x, const B y) {
     intvec_t rstd;
-    for (int i=0; i<intvec_t::size; ++i) {
+    for (int i = 0; i < intvec_t::size; ++i) {
       rstd.set_elt(i, fstd(x[i], y[i]));
     }
     const intvec_t rvml = fvml(x, y);
     const intvec_t dr = rstd - rvml;
     const boolvec_t isbad =
-      supported(x) && supported(y) && supported(rstd) && convert_bool(dr);
+        supported(x) && supported(y) && supported(rstd) && convert_bool(dr);
     if (any(isbad)) {
-      ++ num_errors;
-      cout << setprecision(realvec_t::digits10+2)
-           << "Error in " << func << ":\n"
+      ++num_errors;
+      cout << setprecision(realvec_t::digits10 + 2) << "Error in " << func
+           << ":\n"
            << "   x=" << x << " [" << hex(x) << "]\n"
            << "   y=" << y << " [" << hex(y) << "]\n"
            << "   fstd(x,y)=" << rstd << " [" << hex(rstd) << "]\n"
            << "   fvml(x,y)=" << rvml << " [" << hex(rvml) << "]\n"
            << "   error(x,y)=" << dr << " [" << hex(dr) << "]\n"
-           << "   isbad(x,y)=" << isbad << "\n"
-           << flush;
+           << "   isbad(x,y)=" << isbad << "\n" << flush;
     }
   }
-  
-  template<typename A, typename B, typename C>
-  static void check_int(const char* const func,
-                        int_t fstd(typename A::scalar_t x,
-                                   typename B::scalar_t y,
-                                   typename C::scalar_t z),
-                        intvec_t fvml(A x, B y, C z),
-                        const A x, const B y, const C z)
-  {
+
+  template <typename A, typename B, typename C>
+  static void
+  check_int(const char *const func,
+            int_t fstd(typename A::scalar_t x, typename B::scalar_t y,
+                       typename C::scalar_t z),
+            intvec_t fvml(A x, B y, C z), const A x, const B y, const C z) {
     intvec_t rstd;
-    for (int i=0; i<intvec_t::size; ++i) {
+    for (int i = 0; i < intvec_t::size; ++i) {
       rstd.set_elt(i, fstd(x[i], y[i], z[i]));
     }
     const intvec_t rvml = fvml(x, y, z);
     const intvec_t dr = rstd - rvml;
-    const boolvec_t isbad =
-      supported(x) && supported(y) && supported(z) && supported(rstd) &&
-      convert_bool(dr);
+    const boolvec_t isbad = supported(x) && supported(y) && supported(z) &&
+                            supported(rstd) && convert_bool(dr);
     if (any(isbad)) {
-      ++ num_errors;
-      cout << setprecision(realvec_t::digits10+2)
-           << "Error in " << func << ":\n"
+      ++num_errors;
+      cout << setprecision(realvec_t::digits10 + 2) << "Error in " << func
+           << ":\n"
            << "   x=" << x << " [" << hex(x) << "]\n"
            << "   y=" << y << " [" << hex(y) << "]\n"
            << "   z=" << z << " [" << hex(z) << "]\n"
            << "   fstd(x,y,z)=" << rstd << " [" << hex(rstd) << "]\n"
            << "   fvml(x,y,z)=" << rvml << " [" << hex(rvml) << "]\n"
            << "   error(x,y,z)=" << dr << " [" << hex(dr) << "]\n"
-           << "   isbad(x,y,z)=" << isbad << "\n"
-           << flush;
+           << "   isbad(x,y,z)=" << isbad << "\n" << flush;
     }
   }
-  
-  static void check_real(const char* const func,
-                         const real_t rstd, const real_t rvml)
-  {
+
+  static void check_real(const char *const func, const real_t rstd,
+                         const real_t rvml) {
     const real_t dr = rstd - rvml;
     const bool isbad = dr != R(0.0);
     if (isbad) {
-      ++ num_errors;
-      cout << setprecision(realvec_t::digits10+2)
-           << "Error in " << func << "():\n"
+      ++num_errors;
+      cout << setprecision(realvec_t::digits10 + 2) << "Error in " << func
+           << "():\n"
            << "   fstd()=" << rstd << " [" << hex(rstd) << "]\n"
            << "   fvml()=" << rvml << " [" << hex(rvml) << "]\n"
            << "   error()=" << dr << "\n"
-           << "   isbad()=" << isbad << "\n"
-           << flush;
+           << "   isbad()=" << isbad << "\n" << flush;
     }
   }
-  
-  template<typename A>
-  static void check_real(const char* const func,
-                         const real_t rstd, const real_t rvml, const A x,
-                         const real_t accuracy)
-  {
+
+  template <typename A>
+  static void check_real(const char *const func, const real_t rstd,
+                         const real_t rvml, const A x, const real_t accuracy) {
     const real_t dr = rstd - rvml;
     real_t maxabs = 0.0;
-    for (int i=0; i<realvec_t::size; ++i) {
+    for (int i = 0; i < realvec_t::size; ++i) {
       maxabs = vml_std::fmax(maxabs, vml_std::fabs(x[i]));
     }
     const real_t scale = fabs(rstd) + fabs(rvml) + fabs(maxabs) + R(1.0);
     const bool isbad = fabs(dr) > accuracy * scale;
     if (isbad) {
-      ++ num_errors;
-      cout << setprecision(realvec_t::digits10+2)
-           << "Error in " << func << "():\n"
+      ++num_errors;
+      cout << setprecision(realvec_t::digits10 + 2) << "Error in " << func
+           << "():\n"
            << "   x=" << x << " [" << hex(x) << "]\n"
            << "   fstd(x)=" << rstd << " [" << hex(rstd) << "]\n"
            << "   fvml(x)=" << rvml << " [" << hex(rvml) << "]\n"
            << "   error(x)=" << dr << "\n"
-           << "   isbad(x)=" << isbad << "\n"
-           << flush;
+           << "   isbad(x)=" << isbad << "\n" << flush;
     }
   }
-  
-  template<typename A>
-  static void check_real(const char* const func,
-                         real_t fstd(typename A::scalar_t x),
-                         realvec_t fvml(A x),
-                         const A x,
-                         const real_t accuracy)
-  {
+
+  template <typename A>
+  static void
+  check_real(const char *const func, real_t fstd(typename A::scalar_t x),
+             realvec_t fvml(A x), const A x, const real_t accuracy) {
     realvec_t rstd;
-    for (int i=0; i<realvec_t::size; ++i) {
+    for (int i = 0; i < realvec_t::size; ++i) {
       rstd.set_elt(i, fstd(x[i]));
     }
     const realvec_t rvml = fvml(x);
     const realvec_t dr = rstd - rvml;
     const realvec_t scale = fabs(rstd) + fabs(rvml) + realvec_t(1.0);
-    const boolvec_t isbad =
-      supported(x) && supported(rstd) &&
-      fabs(dr) > realvec_t(accuracy) * scale;
+    const boolvec_t isbad = supported(x) && supported(rstd) &&
+                            fabs(dr) > realvec_t(accuracy) * scale;
     if (any(isbad)) {
-      ++ num_errors;
-      cout << setprecision(realvec_t::digits10+2)
-           << "Error in " << func << ":\n"
+      ++num_errors;
+      cout << setprecision(realvec_t::digits10 + 2) << "Error in " << func
+           << ":\n"
            << "   x=" << x << " [" << hex(x) << "]\n"
            << "   fstd(x)=" << rstd << " [" << hex(rstd) << "]\n"
            << "   fvml(x)=" << rvml << " [" << hex(rvml) << "]\n"
            << "   abs-error(x)=" << fabs(dr) << "\n"
            << "   rel-error(x)=" << fabs(dr) / scale << "\n"
            << "   isbad(x)=" << isbad << "\n"
-           << "   accuracy=" << accuracy << "\n"
-           << flush;
+           << "   accuracy=" << accuracy << "\n" << flush;
     }
   }
-  
-  template<typename A, typename B>
-  static void check_real(const char* const func,
+
+  template <typename A, typename B>
+  static void check_real(const char *const func,
                          real_t fstd(typename A::scalar_t x, B y),
-                         realvec_t fvml(A x, B y),
-                         const A x, const B y,
-                         const real_t accuracy)
-  {
+                         realvec_t fvml(A x, B y), const A x, const B y,
+                         const real_t accuracy) {
     realvec_t rstd;
-    for (int i=0; i<realvec_t::size; ++i) {
+    for (int i = 0; i < realvec_t::size; ++i) {
       rstd.set_elt(i, fstd(x[i], y));
     }
     const realvec_t rvml = fvml(x, y);
     const realvec_t dr = rstd - rvml;
     const realvec_t scale = fabs(rstd) + fabs(rvml) + realvec_t(1.0);
-    const boolvec_t isbad =
-      supported(x) && supported(rstd) && fabs(dr) > realvec_t(accuracy) * scale;
+    const boolvec_t isbad = supported(x) && supported(rstd) &&
+                            fabs(dr) > realvec_t(accuracy) * scale;
     if (any(isbad)) {
-      ++ num_errors;
-      cout << setprecision(realvec_t::digits10+2)
-           << "Error in " << func << ":\n"
+      ++num_errors;
+      cout << setprecision(realvec_t::digits10 + 2) << "Error in " << func
+           << ":\n"
            << "   x=" << x << " [" << hex(x) << "]\n"
            << "   y=" << y << " [" << hex(y) << "]\n"
            << "   fstd(x,y)=" << rstd << " [" << hex(rstd) << "]\n"
@@ -583,38 +500,32 @@ struct vecmathlib_test {
            << "   abs-error(x,y)=" << fabs(dr) << "\n"
            << "   rel-error(x,y)=" << fabs(dr) / scale << "\n"
            << "   isbad(x,y)=" << isbad << "\n"
-           << "   accuracy=" << accuracy << "\n"
-           << flush;
+           << "   accuracy=" << accuracy << "\n" << flush;
     }
   }
-  
-  template<typename A, typename B>
-  static void check_real(const char* const func,
-                         real_t fstd(typename A::scalar_t x,
-                                     typename B::scalar_t y),
-                         realvec_t fvml(A x, B y),
-                         const A x, const B y,
-                         const real_t accuracy,
-                         const realvec_t offset = RV(0.0))
-  {
+
+  template <typename A, typename B>
+  static void
+  check_real(const char *const func,
+             real_t fstd(typename A::scalar_t x, typename B::scalar_t y),
+             realvec_t fvml(A x, B y), const A x, const B y,
+             const real_t accuracy, const realvec_t offset = RV(0.0)) {
     realvec_t rstd;
-    for (int i=0; i<realvec_t::size; ++i) {
+    for (int i = 0; i < realvec_t::size; ++i) {
       rstd.set_elt(i, fstd(x[i], y[i]));
     }
     realvec_t rvml = fvml(x, y);
     // Fix up rvml by adding/subtracting the offset
-    rvml = ifthen(fabs(rstd-rvml)>fabs(offset/RV(2.0)),
-                  rvml + copysign(offset, rstd-rvml),
-                  rvml);
+    rvml = ifthen(fabs(rstd - rvml) > fabs(offset / RV(2.0)),
+                  rvml + copysign(offset, rstd - rvml), rvml);
     const realvec_t dr = rstd - rvml;
     const realvec_t scale = fabs(rstd) + fabs(rvml) + realvec_t(1.0);
-    const boolvec_t isbad =
-      supported(x) && supported(y) && supported(rstd) &&
-      fabs(dr) > realvec_t(accuracy) * scale;
+    const boolvec_t isbad = supported(x) && supported(y) && supported(rstd) &&
+                            fabs(dr) > realvec_t(accuracy) * scale;
     if (any(isbad)) {
-      ++ num_errors;
-      cout << setprecision(realvec_t::digits10+2)
-           << "Error in " << func << ":\n"
+      ++num_errors;
+      cout << setprecision(realvec_t::digits10 + 2) << "Error in " << func
+           << ":\n"
            << "   x=" << x << " [" << hex(x) << "]\n"
            << "   y=" << y << " [" << hex(y) << "]\n"
            << "   fstd(x,y)=" << rstd << " [" << hex(rstd) << "]\n"
@@ -622,34 +533,31 @@ struct vecmathlib_test {
            << "   abs-error(x,y)=" << fabs(dr) << "\n"
            << "   rel-error(x,y)=" << fabs(dr) / scale << "\n"
            << "   isbad(x,y)=" << isbad << "\n"
-           << "   accuracy=" << accuracy << "\n"
-           << flush;
+           << "   accuracy=" << accuracy << "\n" << flush;
     }
   }
-  
-  template<typename A, typename B, typename C>
-  static void check_real(const char* const func,
+
+  template <typename A, typename B, typename C>
+  static void check_real(const char *const func,
                          real_t fstd(typename A::scalar_t x,
                                      typename B::scalar_t y,
                                      typename C::scalar_t z),
-                         realvec_t fvml(A x, B y, C z),
-                         const A x, const B y, C const z,
-                         const real_t accuracy)
-  {
+                         realvec_t fvml(A x, B y, C z), const A x, const B y,
+                         C const z, const real_t accuracy) {
     realvec_t rstd;
-    for (int i=0; i<realvec_t::size; ++i) {
+    for (int i = 0; i < realvec_t::size; ++i) {
       rstd.set_elt(i, fstd(x[i], y[i], z[i]));
     }
     const realvec_t rvml = fvml(x, y, z);
     const realvec_t dr = rstd - rvml;
     const realvec_t scale = fabs(rstd) + fabs(rvml) + realvec_t(1.0);
-    const boolvec_t isbad =
-      supported(x) && supported(y) && supported(z) && supported(rstd) &&
-      fabs(dr) > realvec_t(accuracy) * scale;
+    const boolvec_t isbad = supported(x) && supported(y) && supported(z) &&
+                            supported(rstd) &&
+                            fabs(dr) > realvec_t(accuracy) * scale;
     if (any(isbad)) {
-      ++ num_errors;
-      cout << setprecision(realvec_t::digits10+2)
-           << "Error in " << func << ":\n"
+      ++num_errors;
+      cout << setprecision(realvec_t::digits10 + 2) << "Error in " << func
+           << ":\n"
            << "   x=" << x << " [" << hex(x) << "]\n"
            << "   y=" << y << " [" << hex(y) << "]\n"
            << "   z=" << z << " [" << hex(z) << "]\n"
@@ -658,61 +566,57 @@ struct vecmathlib_test {
            << "   abs-error(x,y,z)=" << fabs(dr) << "\n"
            << "   rel-error(x,y,z)=" << fabs(dr) / scale << "\n"
            << "   isbad(x,y,z)=" << isbad << "\n"
-           << "   accuracy=" << accuracy << "\n"
-           << flush;
+           << "   accuracy=" << accuracy << "\n" << flush;
     }
   }
-  
-  
-  
-  static real_t* align_mem(real_t* p)
-  {
+
+  static real_t *align_mem(real_t *p) {
     const ptrdiff_t alignment = sizeof(realvec_t);
-    p = (real_t*)((intptr_t(p) + alignment-1) & -alignment);
+    p = (real_t *)((intptr_t(p) + alignment - 1) & -alignment);
     assert(intptr_t(p) % alignment == 0);
     return p;
   }
-  static string add_suffix(const char* str, int i)
-  {
+  static string add_suffix(const char *str, int i) {
     ostringstream buf;
     buf << str << "." << i;
     return buf.str();
   }
-  static void test_mem()
-  {
-    cout << "   testing loada loadu storea storeu (errors may lead to segfaults)...\n" << flush;
+  static void test_mem() {
+    cout << "   testing loada loadu storea storeu (errors may lead to "
+            "segfaults)...\n"
+         << flush;
     const int n = 4;
     const int sz = realvec_t::size;
-    const int nbytes = n*sz*sizeof(real_t);
-    real_t* const x = align_mem(new real_t[(n+1)*sz]);
-    real_t* const xnew = align_mem(new real_t[(n+1)*sz]);
-    for (int i=0; i<n; ++i) {
+    const int nbytes = n * sz * sizeof(real_t);
+    real_t *const x = align_mem(new real_t[(n + 1) * sz]);
+    real_t *const xnew = align_mem(new real_t[(n + 1) * sz]);
+    for (int i = 0; i < n; ++i) {
       realvec_t xv = random(R(-10.0), R(+10.0));
-      memcpy(&x[i*sz], &xv, sizeof xv);
+      memcpy(&x[i * sz], &xv, sizeof xv);
     }
     const realvec_t z = random(R(-10.0), R(+10.0));
-    
+
     // loada
     {
       const real_t *p = &x[sz];
       realvec_t y = realvec_t::loada(p);
       check_mem("loada", y, p, z, ~0);
     }
-    
+
     // loadu
-    for (ptrdiff_t i=0; i<realvec_t::size; ++i) {
+    for (ptrdiff_t i = 0; i < realvec_t::size; ++i) {
       const real_t *p = &x[sz];
-      realvec_t y = realvec_t::loadu(p+i);
-      check_mem(add_suffix("loadu", i).c_str(), y, p+i, z, ~0);
+      realvec_t y = realvec_t::loadu(p + i);
+      check_mem(add_suffix("loadu", i).c_str(), y, p + i, z, ~0);
     }
-    
+
     // loadu(ioff)
-    for (ptrdiff_t ioff=0; ioff<realvec_t::size; ++ioff) {
+    for (ptrdiff_t ioff = 0; ioff < realvec_t::size; ++ioff) {
       const real_t *p = &x[sz];
       realvec_t y = realvec_t::loadu(p, ioff);
-      check_mem(add_suffix("loadu(ioff)", ioff).c_str(), y, p+ioff, z, ~0);
+      check_mem(add_suffix("loadu(ioff)", ioff).c_str(), y, p + ioff, z, ~0);
     }
-    
+
     // storea
     {
       memcpy(xnew, x, nbytes);
@@ -720,50 +624,51 @@ struct vecmathlib_test {
       storea(z, p);
       check_mem("storea", p, z, &x[sz], ~0);
     }
-    
+
     // storeu
-    for (ptrdiff_t i=0; i<realvec_t::size; ++i) {
+    for (ptrdiff_t i = 0; i < realvec_t::size; ++i) {
       memcpy(xnew, x, nbytes);
       real_t *p = &xnew[sz];
-      storeu(z, p+i);
-      check_mem(add_suffix("storeu", i).c_str(), p+i, z, &x[sz+i], ~0);
+      storeu(z, p + i);
+      check_mem(add_suffix("storeu", i).c_str(), p + i, z, &x[sz + i], ~0);
     }
-    
+
     // storeu
-    for (ptrdiff_t ioff=0; ioff<realvec_t::size; ++ioff) {
+    for (ptrdiff_t ioff = 0; ioff < realvec_t::size; ++ioff) {
       memcpy(xnew, x, nbytes);
       real_t *p = &xnew[sz];
       storeu(z, p, ioff);
-      check_mem(add_suffix("storeu(ioff)", ioff).c_str(),
-                p+ioff, z, &x[sz+ioff], ~0);
+      check_mem(add_suffix("storeu(ioff)", ioff).c_str(), p + ioff, z,
+                &x[sz + ioff], ~0);
     }
-    
-    for (int mval=0; mval<(1<<realvec_t::size); ++mval) {
+
+    for (int mval = 0; mval < (1 << realvec_t::size); ++mval) {
       boolvec_t mbool;
-      for (int i=0; i<realvec_t::size; ++i) mbool.set_elt(i, mval & (1<<i));
+      for (int i = 0; i < realvec_t::size; ++i)
+        mbool.set_elt(i, mval & (1 << i));
       typename realvec_t::mask_t mask(mbool);
-      
+
       // loada(mask)
       {
         const real_t *p = &x[sz];
         realvec_t y = loada(p, z, mask);
         check_mem("loada(mask)", y, p, z, mval);
       }
-      
+
       // loadu(mask)
-      for (ptrdiff_t i=0; i<realvec_t::size; ++i) {
+      for (ptrdiff_t i = 0; i < realvec_t::size; ++i) {
         const real_t *p = &x[sz];
-        realvec_t y = loadu(p+i, z, mask);
-        check_mem("loadu(mask)", y, p+i, z, mval);
+        realvec_t y = loadu(p + i, z, mask);
+        check_mem("loadu(mask)", y, p + i, z, mval);
       }
-      
+
       // loadu(ioff, mask)
-      for (ptrdiff_t ioff=0; ioff<realvec_t::size; ++ioff) {
+      for (ptrdiff_t ioff = 0; ioff < realvec_t::size; ++ioff) {
         const real_t *p = &x[sz];
         realvec_t y = loadu(p, ioff, z, mask);
-        check_mem("loadu(ioff,mask)", y, p+ioff, z, mval);
+        check_mem("loadu(ioff,mask)", y, p + ioff, z, mval);
       }
-      
+
       // storea
       {
         memcpy(xnew, x, nbytes);
@@ -771,37 +676,35 @@ struct vecmathlib_test {
         storea(z, p, mask);
         check_mem("storea(mask)", p, z, &x[sz], mval);
       }
-      
+
       // storeu
-      for (ptrdiff_t i=0; i<realvec_t::size; ++i) {
+      for (ptrdiff_t i = 0; i < realvec_t::size; ++i) {
         memcpy(xnew, x, nbytes);
         real_t *p = &xnew[sz];
-        storeu(z, p+i, mask);
-        check_mem("storeu(mask)", p+i, z, &x[sz+i], mval);
+        storeu(z, p + i, mask);
+        check_mem("storeu(mask)", p + i, z, &x[sz + i], mval);
       }
-      
+
       // storeu
-      for (ptrdiff_t ioff=0; ioff<realvec_t::size; ++ioff) {
+      for (ptrdiff_t ioff = 0; ioff < realvec_t::size; ++ioff) {
         memcpy(xnew, x, nbytes);
         real_t *p = &xnew[sz];
         storeu(z, p, ioff, mask);
-        check_mem("storeu(ioff,mask)", p+ioff, z, &x[sz+ioff], mval);
+        check_mem("storeu(ioff,mask)", p + ioff, z, &x[sz + ioff], mval);
       }
-      
+
     } // for mval
   }
-  
-  
-  
-  template<typename T>
-  static T local_ifthen(bool b, T x, T y) { return b ? x : y; }
-  static void test_bool()
-  {
+
+  template <typename T> static T local_ifthen(bool b, T x, T y) {
+    return b ? x : y;
+  }
+  static void test_bool() {
     cout << "   testing boolean operations...\n" << flush;
-    
+
     const boolvec_t bf = boolvec_t(false);
     const boolvec_t bt = boolvec_t(true);
-    for (int i=0; i<realvec_t::size; ++i) {
+    for (int i = 0; i < realvec_t::size; ++i) {
       check_bool("false", false, bf[i]);
       check_bool("true", true, bt[i]);
     }
@@ -809,32 +712,32 @@ struct vecmathlib_test {
     check_bool("all", true, all(bt), true);
     check_bool("any", false, any(bf), false);
     check_bool("any", true, any(bt), true);
-    
+
     boolvec_t b0 = bt;
     boolvec_t b1 = bf;
-    for (int n=0; n<realvec_t::size; ++n) {
+    for (int n = 0; n < realvec_t::size; ++n) {
       b0.set_elt(n, false);
       b1.set_elt(n, true);
-      for (int i=0; i<realvec_t::size; ++i) {
-        check_bool("set_elt", i<=n ? false : true, b0[i], false);
-        check_bool("set_elt", i<=n ? true : false, b1[i], true);
+      for (int i = 0; i < realvec_t::size; ++i) {
+        check_bool("set_elt", i <= n ? false : true, b0[i], false);
+        check_bool("set_elt", i <= n ? true : false, b1[i], true);
       }
     }
-    
-    for (int n=0; n<(1<<realvec_t::size); ++n) {
+
+    for (int n = 0; n < (1 << realvec_t::size); ++n) {
       boolvec_t x;
-      for (int i=0; i<realvec_t::size; ++i) {
-        x.set_elt(i, n & (1<<i));
+      for (int i = 0; i < realvec_t::size; ++i) {
+        x.set_elt(i, n & (1 << i));
       }
-      for (int i=0; i<realvec_t::size; ++i) {
-        bool rstd = n & (1<<i);
+      for (int i = 0; i < realvec_t::size; ++i) {
+        bool rstd = n & (1 << i);
         bool rvml = x[i];
         check_bool("[]", rstd, rvml, x);
       }
-      
+
       {
         boolvec_t rstd;
-        for (int i=0; i<realvec_t::size; ++i) {
+        for (int i = 0; i < realvec_t::size; ++i) {
           rstd.set_elt(i, !x[i]);
         }
         boolvec_t rvml = !x;
@@ -842,7 +745,7 @@ struct vecmathlib_test {
       }
       {
         bool rstd = x[0];
-        for (int i=1; i<realvec_t::size; ++i) {
+        for (int i = 1; i < realvec_t::size; ++i) {
           rstd &= x[i];
         }
         bool rvml = all(x);
@@ -850,39 +753,36 @@ struct vecmathlib_test {
       }
       {
         bool rstd = x[0];
-        for (int i=1; i<realvec_t::size; ++i) {
+        for (int i = 1; i < realvec_t::size; ++i) {
           rstd |= x[i];
         }
         bool rvml = any(x);
         check_bool("any", rstd, rvml, x);
       }
-      check_bool
-        ("ifthen(bool)",
-         local_ifthen<bool>,
-         (boolvec_t(*)(boolvec_t,boolvec_t,boolvec_t))vecmathlib::ifthen,
-         x, BV(false), BV(true));
-      check_int("ifthen(int)",
-                local_ifthen<int_t>,
-                (intvec_t(*)(boolvec_t,intvec_t,intvec_t))vecmathlib::ifthen,
+      check_bool(
+          "ifthen(bool)", local_ifthen<bool>,
+          (boolvec_t (*)(boolvec_t, boolvec_t, boolvec_t))vecmathlib::ifthen, x,
+          BV(false), BV(true));
+      check_int("ifthen(int)", local_ifthen<int_t>,
+                (intvec_t (*)(boolvec_t, intvec_t, intvec_t))vecmathlib::ifthen,
                 x, IV(I(1)), IV(I(2)));
-      check_real("ifthen(real)",
-                 local_ifthen<real_t>,
-                 ((realvec_t(*)(boolvec_t,realvec_t,realvec_t))
-                  vecmathlib::ifthen),
-                x, RV(1.0), RV(2.0), R(0.0));
-    }
-    
-    for (int n=0; n<(1<<realvec_t::size); ++n) {
-      for (int m=0; m<(1<<realvec_t::size); ++m) {
+      check_real(
+          "ifthen(real)", local_ifthen<real_t>,
+          ((realvec_t (*)(boolvec_t, realvec_t, realvec_t))vecmathlib::ifthen),
+          x, RV(1.0), RV(2.0), R(0.0));
+    }
+
+    for (int n = 0; n < (1 << realvec_t::size); ++n) {
+      for (int m = 0; m < (1 << realvec_t::size); ++m) {
         boolvec_t x, y;
-        for (int i=0; i<realvec_t::size; ++i) {
-          x.set_elt(i, n & (1<<i));
-          y.set_elt(i, m & (1<<i));
+        for (int i = 0; i < realvec_t::size; ++i) {
+          x.set_elt(i, n & (1 << i));
+          y.set_elt(i, m & (1 << i));
         }
-        
+
         {
           boolvec_t rstd;
-          for (int i=0; i<realvec_t::size; ++i) {
+          for (int i = 0; i < realvec_t::size; ++i) {
             rstd.set_elt(i, x[i] && y[i]);
           }
           boolvec_t rvml = x && y;
@@ -890,7 +790,7 @@ struct vecmathlib_test {
         }
         {
           boolvec_t rstd;
-          for (int i=0; i<realvec_t::size; ++i) {
+          for (int i = 0; i < realvec_t::size; ++i) {
             rstd.set_elt(i, x[i] || y[i]);
           }
           boolvec_t rvml = x || y;
@@ -898,7 +798,7 @@ struct vecmathlib_test {
         }
         {
           boolvec_t rstd;
-          for (int i=0; i<realvec_t::size; ++i) {
+          for (int i = 0; i < realvec_t::size; ++i) {
             rstd.set_elt(i, x[i] == y[i]);
           }
           boolvec_t rvml = x == y;
@@ -906,7 +806,7 @@ struct vecmathlib_test {
         }
         {
           boolvec_t rstd;
-          for (int i=0; i<realvec_t::size; ++i) {
+          for (int i = 0; i < realvec_t::size; ++i) {
             rstd.set_elt(i, x[i] != y[i]);
           }
           boolvec_t rvml = x != y;
@@ -915,322 +815,374 @@ struct vecmathlib_test {
       }
     }
   }
-  
-  
-  
+
   static bool local_convert_bool(int_t x) { return x; }
   static int_t local_convert_int(bool x) { return x; }
-  template<typename T> static T local_pos(T x) { return +x; }
-  template<typename T> static T local_neg(T x) { return -x; }
-  template<typename T> static T local_not(T x) { return ~x; }
-  template<typename T> static T local_add(T x, T y) { return x+y; }
-  template<typename T> static T local_sub(T x, T y) { return x-y; }
-  template<typename T> static T local_mul(T x, T y) { return x*y; }
-  template<typename T> static T local_div(T x, T y) { return x/y; }
-  template<typename T> static T local_mod(T x, T y) { return x%y; }
-  template<typename T> static T local_and(T x, T y) { return x&y; }
-  template<typename T> static T local_or(T x, T y) { return x|y; }
-  template<typename T> static T local_xor(T x, T y) { return x^y; }
-  
-  static int_t local_lsr(int_t x, int_t y) { return uint_t(x)>>uint_t(y); }
-  template<typename T> static T local_srs(T x, typename T::scalar_t y)
-  {
-    return x>>y;
+  template <typename T> static T local_pos(T x) { return +x; }
+  template <typename T> static T local_neg(T x) { return -x; }
+  template <typename T> static T local_not(T x) { return ~x; }
+  template <typename T> static T local_add(T x, T y) { return x + y; }
+  template <typename T> static T local_sub(T x, T y) { return x - y; }
+  template <typename T> static T local_mul(T x, T y) { return x * y; }
+  template <typename T> static T local_div(T x, T y) { return x / y; }
+  template <typename T> static T local_mod(T x, T y) { return x % y; }
+  template <typename T> static T local_and(T x, T y) { return x & y; }
+  template <typename T> static T local_or(T x, T y) { return x | y; }
+  template <typename T> static T local_xor(T x, T y) { return x ^ y; }
+
+  static int_t local_lsr(int_t x, int_t y) { return uint_t(x) >> uint_t(y); }
+  template <typename T> static T local_srs(T x, typename T::scalar_t y) {
+    return x >> y;
   }
-  template<typename T> static T local_sls(T x, typename T::scalar_t y)
-  {
-    return x<<y;
+  template <typename T> static T local_sls(T x, typename T::scalar_t y) {
+    return x << y;
   }
-  template<typename T> static T local_sr(T x, T y) { return x>>y; }
-  template<typename T> static T local_sl(T x, T y) { return x<<y; }
-
-  template<typename T> static bool local_isignbit(T x) { return x<0; }
-  template<typename T> static bool local_eq(T x, T y) { return x==y; }
-  template<typename T> static bool local_ne(T x, T y) { return x!=y; }
-  template<typename T> static bool local_lt(T x, T y) { return x<y; }
-  template<typename T> static bool local_le(T x, T y) { return x<=y; }
-  template<typename T> static bool local_gt(T x, T y) { return x>y; }
-  template<typename T> static bool local_ge(T x, T y) { return x>=y; }
-  template<typename T> static boolvec_t local_veq(T x, T y) { return x==y; }
-  template<typename T> static boolvec_t local_vne(T x, T y) { return x!=y; }
-  template<typename T> static boolvec_t local_vlt(T x, T y) { return x<y; }
-  template<typename T> static boolvec_t local_vle(T x, T y) { return x<=y; }
-  template<typename T> static boolvec_t local_vgt(T x, T y) { return x>y; }
-  template<typename T> static boolvec_t local_vge(T x, T y) { return x>=y; }
-  static void test_int()
-  {
+  template <typename T> static T local_sr(T x, T y) { return x >> y; }
+  template <typename T> static T local_sl(T x, T y) { return x << y; }
+
+  template <typename T> static bool local_isignbit(T x) { return x < 0; }
+  template <typename T> static bool local_eq(T x, T y) { return x == y; }
+  template <typename T> static bool local_ne(T x, T y) { return x != y; }
+  template <typename T> static bool local_lt(T x, T y) { return x < y; }
+  template <typename T> static bool local_le(T x, T y) { return x <= y; }
+  template <typename T> static bool local_gt(T x, T y) { return x > y; }
+  template <typename T> static bool local_ge(T x, T y) { return x >= y; }
+  template <typename T> static boolvec_t local_veq(T x, T y) { return x == y; }
+  template <typename T> static boolvec_t local_vne(T x, T y) { return x != y; }
+  template <typename T> static boolvec_t local_vlt(T x, T y) { return x < y; }
+  template <typename T> static boolvec_t local_vle(T x, T y) { return x <= y; }
+  template <typename T> static boolvec_t local_vgt(T x, T y) { return x > y; }
+  template <typename T> static boolvec_t local_vge(T x, T y) { return x >= y; }
+  static void test_int() {
     cout << "   testing integer operations...\n" << flush;
-    
+
     intvec_t i0 = intvec_t(I(0));
     intvec_t i1 = intvec_t(I(1));
     intvec_t iiota = intvec_t::iota();
-    for (int i=0; i<realvec_t::size; ++i) {
+    for (int i = 0; i < realvec_t::size; ++i) {
       check_int("0", 0, i0[i]);
       check_int("1", 1, i1[i]);
       check_int("iota", i, iiota[i]);
     }
-    
+
     i0 = intvec_t(I(1));
     i1 = intvec_t(I(0));
-    for (int n=0; n<realvec_t::size; ++n) {
+    for (int n = 0; n < realvec_t::size; ++n) {
       i0.set_elt(n, 0);
       i1.set_elt(n, 1);
-      for (int i=0; i<realvec_t::size; ++i) {
-        check_bool("set_elt", i<=n ? 0 : 1, i0[i], 0);
-        check_bool("set_elt", i<=n ? 1 : 0, i1[i], 1);
+      for (int i = 0; i < realvec_t::size; ++i) {
+        check_bool("set_elt", i <= n ? 0 : 1, i0[i], 0);
+        check_bool("set_elt", i <= n ? 1 : 0, i1[i], 1);
       }
     }
-    
+
     const int_t int_min = std::numeric_limits<int_t>::min();
     const int_t int_max = std::numeric_limits<int_t>::max();
     const int_t values[] = {
-      0, 1, 2, 3, -1, -2, -3,
-      int_min, int_min+1, int_min+2, int_min+3,
-      int_max, int_max-1, int_max-2, int_max-3,
+        0,           1,       2,           3,           -1,
+        -2,          -3,      int_min,     int_min + 1, int_min + 2,
+        int_min + 3, int_max, int_max - 1, int_max - 2, int_max - 3,
     };
     const int nvalues = sizeof values / sizeof *values;
 
-    for (int i=0; i<nvalues*nvalues+2*imax; ++i) {
+    for (int i = 0; i < nvalues * nvalues + 2 * imax; ++i) {
       intvec_t x, y;
-      if (i<nvalues*nvalues) {
-        x = values[i%nvalues];
-        y = values[i/nvalues];
-      } else if (i<nvalues*nvalues+imax) {
+      if (i < nvalues * nvalues) {
+        x = values[i % nvalues];
+        y = values[i / nvalues];
+      } else if (i < nvalues * nvalues + imax) {
         x = random(I(-100), I(+100));
         y = random(I(-100), I(+100));
       } else {
-        x = random(int_min/2, int_max/2);
-        y = random(int_min/2, int_max/2);
+        x = random(int_min / 2, int_max / 2);
+        y = random(int_min / 2, int_max / 2);
       }
       boolvec_t b = convert_bool(random(I(0), I(1)));
-      
-      check_bool<IV>("convert_bool(int)",
-                     local_convert_bool, vecmathlib::convert_bool, x);
-      check_int<BV>("convert_int(bool)",
-                    local_convert_int, vecmathlib::convert_int, b);
-      
+
+      check_bool<IV>("convert_bool(int)", local_convert_bool,
+                     vecmathlib::convert_bool, x);
+      check_int<BV>("convert_int(bool)", local_convert_int,
+                    vecmathlib::convert_int, b);
+
       check_int<IV>("+", local_pos, local_pos, x);
       check_int<IV>("-", local_neg, local_neg, x);
       check_int<IV>("~", local_not, local_not, x);
-      
-      check_int<IV,IV>("+", local_add, local_add, x, y);
-      check_int<IV,IV>("-", local_sub, local_sub, x, y);
-      check_int<IV,IV>("&", local_and, local_and, x, y);
-      check_int<IV,IV>("|", local_or, local_or, x, y);
-      check_int<IV,IV>("^", local_xor, local_xor, x, y);
-      
-      const int_t bits = 8*sizeof(int_t);
-      check_int<IV,I>("lsr", local_lsr, vecmathlib::lsr, x, y[0] & (bits-1));
-      check_int<IV,I>(">>", local_sr, local_srs, x, y[0] & (bits-1));
-      check_int<IV,I>("<<", local_sl, local_sls, x, y[0] & (bits-1));
-      check_int<IV,IV>("lsr", local_lsr, vecmathlib::lsr, x, y & IV(bits-1));
-      check_int<IV,IV>(">>", local_sr, local_sr, x, y & IV(bits-1));
-      check_int<IV,IV>("<<", local_sl, local_sl, x, y & IV(bits-1));
-      
+
+      check_int<IV, IV>("+", local_add, local_add, x, y);
+      check_int<IV, IV>("-", local_sub, local_sub, x, y);
+      check_int<IV, IV>("&", local_and, local_and, x, y);
+      check_int<IV, IV>("|", local_or, local_or, x, y);
+      check_int<IV, IV>("^", local_xor, local_xor, x, y);
+
+      const int_t bits = 8 * sizeof(int_t);
+      check_int<IV, I>("lsr", local_lsr, vecmathlib::lsr, x, y[0] & (bits - 1));
+      check_int<IV, I>(">>", local_sr, local_srs, x, y[0] & (bits - 1));
+      check_int<IV, I>("<<", local_sl, local_sls, x, y[0] & (bits - 1));
+      check_int<IV, IV>("lsr", local_lsr, vecmathlib::lsr, x, y & IV(bits - 1));
+      check_int<IV, IV>(">>", local_sr, local_sr, x, y & IV(bits - 1));
+      check_int<IV, IV>("<<", local_sl, local_sl, x, y & IV(bits - 1));
+
       check_bool<IV>("isignbit", local_isignbit, vecmathlib::isignbit, x);
-      check_bool<IV,IV>("==", local_eq, local_veq, x, y);
-      check_bool<IV,IV>("!=", local_ne, local_vne, x, y);
-      check_bool<IV,IV>("<", local_lt, local_vlt, x, y);
-      check_bool<IV,IV>("<=", local_le, local_vle, x, y);
-      check_bool<IV,IV>(">", local_gt, local_vgt, x, y);
-      check_bool<IV,IV>(">=", local_ge, local_vge, x, y);
+      check_bool<IV, IV>("==", local_eq, local_veq, x, y);
+      check_bool<IV, IV>("!=", local_ne, local_vne, x, y);
+      check_bool<IV, IV>("<", local_lt, local_vlt, x, y);
+      check_bool<IV, IV>("<=", local_le, local_vle, x, y);
+      check_bool<IV, IV>(">", local_gt, local_vgt, x, y);
+      check_bool<IV, IV>(">=", local_ge, local_vge, x, y);
     }
   }
-  
-  static void test_real()
-  {
+
+  static void test_real() {
     cout << "   testing real operations...\n" << flush;
-    
+
     realvec_t r0 = realvec_t(0.0);
     realvec_t r1 = realvec_t(1.0);
-    for (int i=0; i<realvec_t::size; ++i) {
+    for (int i = 0; i < realvec_t::size; ++i) {
       check_real("0.0", R(0.0), r0[i]);
       check_real("1.0", R(1.0), r1[i]);
     }
-    
+
     r0 = realvec_t(1.0);
     r1 = realvec_t(0.0);
-    for (int n=0; n<realvec_t::size; ++n) {
+    for (int n = 0; n < realvec_t::size; ++n) {
       r0.set_elt(n, R(0.0));
       r1.set_elt(n, R(1.0));
-      for (int i=0; i<realvec_t::size; ++i) {
-        check_bool("set_elt", i<=n ? R(0.0) : R(1.0), r0[i], R(0.0));
-        check_bool("set_elt", i<=n ? R(1.0) : R(0.0), r1[i], R(1.0));
+      for (int i = 0; i < realvec_t::size; ++i) {
+        check_bool("set_elt", i <= n ? R(0.0) : R(1.0), r0[i], R(0.0));
+        check_bool("set_elt", i <= n ? R(1.0) : R(0.0), r1[i], R(1.0));
       }
     }
-    
+
     // barrier
     realvec_t rcancel = r1;
     rcancel += RV(R(FP::max() / 2));
     rcancel.barrier();
     rcancel -= RV(R(FP::max() / 2));
     check_real("barrier", R(0.0), rcancel[0]);
-    
+
     // rounding (break ties to even, or break ties away from zero?)
     realvec_t rbase = RV(R(1.0));
-    rbase += RV(FP::epsilon()/2);
+    rbase += RV(FP::epsilon() / 2);
     check_real("flt_rounds", R(1.0), rbase[0]);
     rbase = RV(R(1.0) + FP::epsilon());
-    rbase += RV(FP::epsilon()/2);
-    check_real("flt_rounds", R(1.0) + 2*FP::epsilon(), rbase[0]);
+    rbase += RV(FP::epsilon() / 2);
+    check_real("flt_rounds", R(1.0) + 2 * FP::epsilon(), rbase[0]);
   }
-  
-  static int_t local_bitifthen(int_t x, int_t y, int_t z)
-  {
+
+  static int_t local_bitifthen(int_t x, int_t y, int_t z) {
     return (x & y) | (~x & z);
   }
-  static int_t local_clz(int_t x)
-  {
+  static int_t local_clz(int_t x) {
     int bits = CHAR_BIT * sizeof(x);
     int res = 0;
-    for (; res<bits; ++res) {
-      if (x & (I(1) << (bits-res-1))) break;
+    for (; res < bits; ++res) {
+      if (x & (I(1) << (bits - res - 1)))
+        break;
     }
     return res;
   }
-  static int_t local_max(int_t x, int_t y)
-  {
-    return std::max(x, y);
-  }
-  static int_t local_min(int_t x, int_t y)
-  {
-    return std::min(x, y);
-  }
-  static int_t local_popcount(int_t x)
-  {
+  static int_t local_max(int_t x, int_t y) { return std::max(x, y); }
+  static int_t local_min(int_t x, int_t y) { return std::min(x, y); }
+  static int_t local_popcount(int_t x) {
     int bits = CHAR_BIT * sizeof(x);
     int res = 0;
-    for (int d=0; d<bits; ++d) {
-      if (x & (I(1) << d)) ++res;
+    for (int d = 0; d < bits; ++d) {
+      if (x & (I(1) << d))
+        ++res;
     }
     return res;
   }
-  static int_t local_rotate(int_t x, int_t n)
-  {
+  static int_t local_rotate(int_t x, int_t n) {
     int_t mask = CHAR_BIT * sizeof(int_t) - 1;
     int_t left = x << (n & mask);
     int_t right = I(U(x) >> U(-n & mask));
     return left | right;
   }
-  static void test_abs()
-  {
-    cout << "   testing abs bitifthen clz isignbit max min popcount rotate...\n" << flush;
-        
-    for (int i=0; i<imax; ++i) {
+  static void test_abs() {
+    cout << "   testing abs bitifthen clz isignbit max min popcount rotate...\n"
+         << flush;
+
+    for (int i = 0; i < imax; ++i) {
       const intvec_t x = random(I(-1000000), I(+1000000));
       const intvec_t y = random(I(-1000000), I(+1000000));
       const intvec_t z = random(I(-1000000), I(+1000000));
-      
+
       check_int<IV>("abs", std::abs, vecmathlib::abs, x);
-      check_int<IV,IV,IV>("bitifthen",
-                          local_bitifthen, vecmathlib::bitifthen, x, y, z);
+      check_int<IV, IV, IV>("bitifthen", local_bitifthen, vecmathlib::bitifthen,
+                            x, y, z);
       check_int<IV>("clz", local_clz, vecmathlib::clz, x);
-      check_int<IV,IV>("max", local_max, vecmathlib::max, x, y);
-      check_int<IV,IV>("min", local_min, vecmathlib::min, x, y);
+      check_int<IV, IV>("max", local_max, vecmathlib::max, x, y);
+      check_int<IV, IV>("min", local_min, vecmathlib::min, x, y);
       check_int<IV>("popcount", local_popcount, vecmathlib::popcount, x);
-      check_int<IV,IV>("rotate", local_rotate, vecmathlib::rotate, x, y[0]);
-      check_int<IV,IV>("rotate", local_rotate, vecmathlib::rotate, x, y);
+      check_int<IV, IV>("rotate", local_rotate, vecmathlib::rotate, x, y[0]);
+      check_int<IV, IV>("rotate", local_rotate, vecmathlib::rotate, x, y);
     }
   }
-  
+
   // Change signature: "int" -> "int_t"
-  static real_t local_frexp0(real_t x)
-  {
+  static real_t local_frexp0(real_t x) {
     int r;
     return vml_std::frexp(x, &r);
   }
-  static int_t local_frexp1(real_t x)
-  {
-    if (vml_std::isinf(x)) return std::numeric_limits<int_t>::max();
-    if (vml_std::isnan(x)) return std::numeric_limits<int_t>::min();
+  static int_t local_frexp1(real_t x) {
+    if (vml_std::isinf(x))
+      return std::numeric_limits<int_t>::max();
+    if (vml_std::isnan(x))
+      return std::numeric_limits<int_t>::min();
     int r;
     vml_std::frexp(x, &r);
     return r;
   }
-  static realvec_t local_vfrexp0(realvec_t x)
-  {
+  static realvec_t local_vfrexp0(realvec_t x) {
     intvec_t r;
     return vecmathlib::frexp(x, &r);
   }
-  static intvec_t local_vfrexp1(realvec_t x)
-  {
+  static intvec_t local_vfrexp1(realvec_t x) {
     intvec_t r;
     vecmathlib::frexp(x, &r);
     return r;
   }
-  static int_t local_ilogb(real_t x)
-  {
-    if (x==R(0.0)) return std::numeric_limits<int_t>::min();
-    if (vml_std::isinf(x)) return std::numeric_limits<int_t>::max();
-    if (vml_std::isnan(x)) return std::numeric_limits<int_t>::min();
+  static int_t local_ilogb(real_t x) {
+    if (x == R(0.0))
+      return std::numeric_limits<int_t>::min();
+    if (vml_std::isinf(x))
+      return std::numeric_limits<int_t>::max();
+    if (vml_std::isnan(x))
+      return std::numeric_limits<int_t>::min();
     return vml_std::ilogb(x);
   }
   static real_t local_ldexp(real_t x, int_t n) { return ldexp(x, n); }
-  static real_t local_mad(real_t x, real_t y, real_t z) { return x*y+z; }
-  static void test_fabs()
-  {
-    cout << "   testing + - + - * == != < <= > >= copysign fabs fdim fma fmax fmin frexp ilogb isfinite isinf isnan isnormal ldexp mad nextafter signbit...\n" << flush;
-    
+  static real_t local_mad(real_t x, real_t y, real_t z) { return x * y + z; }
+  static void test_fabs() {
+    cout << "   testing + - + - * == != < <= > >= copysign fabs fdim fma fmax "
+            "fmin frexp ilogb isfinite isinf isnan isnormal ldexp mad "
+            "nextafter signbit...\n"
+         << flush;
+
     const real_t eps = FP::epsilon();
     const real_t int_min = R(std::numeric_limits<int_t>::min());
     const real_t int_max = R(std::numeric_limits<int_t>::max());
     const real_t uint_min = R(std::numeric_limits<uint_t>::min());
     const real_t uint_max = R(std::numeric_limits<uint_t>::max());
     const real_t values[] = {
-      R(+0.0), R(+0.1), R(+0.9), R(+1.0), R(+1.1),
-      R(-0.0), R(-0.1), R(-0.9), R(-1.0), R(-1.1),
-      R(+0.0)+eps, R(+0.1)+eps, R(+0.9)+eps, R(+1.0)+eps, R(+1.1)+eps,
-      R(-0.0)+eps, R(-0.1)+eps, R(-0.9)+eps, R(-1.0)+eps, R(-1.1)+eps,
-      R(+0.0)-eps, R(+0.1)-eps, R(+0.9)-eps, R(+1.0)-eps, R(+1.1)-eps,
-      R(-0.0)-eps, R(-0.1)-eps, R(-0.9)-eps, R(-1.0)-eps, R(-1.1)-eps,
+        R(+0.0),
+        R(+0.1),
+        R(+0.9),
+        R(+1.0),
+        R(+1.1),
+        R(-0.0),
+        R(-0.1),
+        R(-0.9),
+        R(-1.0),
+        R(-1.1),
+        R(+0.0) + eps,
+        R(+0.1) + eps,
+        R(+0.9) + eps,
+        R(+1.0) + eps,
+        R(+1.1) + eps,
+        R(-0.0) + eps,
+        R(-0.1) + eps,
+        R(-0.9) + eps,
+        R(-1.0) + eps,
+        R(-1.1) + eps,
+        R(+0.0) - eps,
+        R(+0.1) - eps,
+        R(+0.9) - eps,
+        R(+1.0) - eps,
+        R(+1.1) - eps,
+        R(-0.0) - eps,
+        R(-0.1) - eps,
+        R(-0.9) - eps,
+        R(-1.0) - eps,
+        R(-1.1) - eps,
 #ifdef VML_HAVE_DENORMALS
-      +FP::min(), +FP::min()*(R(1.0)+eps), +FP::min()*R(2.0),
-      -FP::min(), -FP::min()*(R(1.0)+eps), -FP::min()*R(2.0),
+        +FP::min(),
+        +FP::min() * (R(1.0) + eps),
+        +FP::min() * R(2.0),
+        -FP::min(),
+        -FP::min() * (R(1.0) + eps),
+        -FP::min() * R(2.0),
 #endif
-      +FP::max(), +FP::max()*(R(1.0)-eps), +FP::max()*(R(1.0)-R(2.0)*eps),
-      -FP::max(), -FP::max()*(R(1.0)-eps), -FP::max()*(R(1.0)-R(2.0)*eps),
-      +R(0.5)*FP::max(), +R(0.5)*FP::max()*(R(1.0)+eps),
-      -R(0.5)*FP::max(), -R(0.5)*FP::max()*(R(1.0)+eps),
+        +FP::max(),
+        +FP::max() * (R(1.0) - eps),
+        +FP::max() * (R(1.0) - R(2.0) * eps),
+        -FP::max(),
+        -FP::max() * (R(1.0) - eps),
+        -FP::max() * (R(1.0) - R(2.0) * eps),
+        +R(0.5) * FP::max(),
+        +R(0.5) * FP::max() * (R(1.0) + eps),
+        -R(0.5) * FP::max(),
+        -R(0.5) * FP::max() * (R(1.0) + eps),
 #ifdef VML_HAVE_INF
-      +R(1.0/0.0),              // +FP::infinity()
-      -R(1.0/0.0),              // -FP::infinity()
+        +R(1.0 / 0.0), // +FP::infinity()
+        -R(1.0 / 0.0), // -FP::infinity()
 #endif
 #ifdef VML_HAVE_NAN
-      R(0.0/0.0),               // FP::quiet_NaN()
+        R(0.0 / 0.0), // FP::quiet_NaN()
 #endif
-      +int_min, +int_max, +uint_min, +uint_max,
-      -int_min, -int_max, -uint_min, -uint_max,
-      +int_min+R(0.1), +int_max+R(0.1), +uint_min+R(0.1), +uint_max+R(0.1),
-      -int_min+R(0.1), -int_max+R(0.1), -uint_min+R(0.1), -uint_max+R(0.1),
-      +int_min-R(0.1), +int_max-R(0.1), +uint_min-R(0.1), +uint_max-R(0.1),
-      -int_min-R(0.1), -int_max-R(0.1), -uint_min-R(0.1), -uint_max-R(0.1),
-      +int_min+R(1.0), +int_max+R(1.0), +uint_min+R(1.0), +uint_max+R(1.0),
-      -int_min+R(1.0), -int_max+R(1.0), -uint_min+R(1.0), -uint_max+R(1.0),
-      +int_min-R(1.0), +int_max-R(1.0), +uint_min-R(1.0), +uint_max-R(1.0),
-      -int_min-R(1.0), -int_max-R(1.0), -uint_min-R(1.0), -uint_max-R(1.0),
-      -R(443.9999425),
+        +int_min,
+        +int_max,
+        +uint_min,
+        +uint_max,
+        -int_min,
+        -int_max,
+        -uint_min,
+        -uint_max,
+        +int_min + R(0.1),
+        +int_max + R(0.1),
+        +uint_min + R(0.1),
+        +uint_max + R(0.1),
+        -int_min + R(0.1),
+        -int_max + R(0.1),
+        -uint_min + R(0.1),
+        -uint_max + R(0.1),
+        +int_min - R(0.1),
+        +int_max - R(0.1),
+        +uint_min - R(0.1),
+        +uint_max - R(0.1),
+        -int_min - R(0.1),
+        -int_max - R(0.1),
+        -uint_min - R(0.1),
+        -uint_max - R(0.1),
+        +int_min + R(1.0),
+        +int_max + R(1.0),
+        +uint_min + R(1.0),
+        +uint_max + R(1.0),
+        -int_min + R(1.0),
+        -int_max + R(1.0),
+        -uint_min + R(1.0),
+        -uint_max + R(1.0),
+        +int_min - R(1.0),
+        +int_max - R(1.0),
+        +uint_min - R(1.0),
+        +uint_max - R(1.0),
+        -int_min - R(1.0),
+        -int_max - R(1.0),
+        -uint_min - R(1.0),
+        -uint_max - R(1.0),
+        -R(443.9999425),
     };
     const int nvalues = sizeof values / sizeof *values;
-    
-    for (int i=0; i<8*nvalues+imax; ++i) {
-      const realvec_t x =
-        i<8*nvalues && i&1 ? RV(values[i/8]) : random(R(-10.0), R(+10.0));
-      const realvec_t y =
-        i<8*nvalues && i&2 ? RV(values[i/8]) : random(R(-10.0), R(+10.0));
-      const realvec_t z =
-        i<8*nvalues && i&4 ? RV(values[i/8]) : random(R(-10.0), R(+10.0));
+
+    for (int i = 0; i < 8 * nvalues + imax; ++i) {
+      const realvec_t x = i < 8 * nvalues && i & 1 ? RV(values[i / 8])
+                                                   : random(R(-10.0), R(+10.0));
+      const realvec_t y = i < 8 * nvalues && i & 2 ? RV(values[i / 8])
+                                                   : random(R(-10.0), R(+10.0));
+      const realvec_t z = i < 8 * nvalues && i & 4 ? RV(values[i / 8])
+                                                   : random(R(-10.0), R(+10.0));
       const intvec_t n = random(int_t(-10), int_t(+10));
-      
+
       check_real<RV>("+", local_pos, local_pos, x, R(0.0));
       check_real<RV>("-", local_neg, local_neg, x, R(0.0));
-      
-      check_real<RV,RV>("+", local_add, local_add, x, y, R(0.0));
-      check_real<RV,RV>("-", local_sub, local_sub, x, y, R(0.0));
-      check_real<RV,RV>("*", local_mul, local_mul, x, y, R(0.0));
-      
+
+      check_real<RV, RV>("+", local_add, local_add, x, y, R(0.0));
+      check_real<RV, RV>("-", local_sub, local_sub, x, y, R(0.0));
+      check_real<RV, RV>("*", local_mul, local_mul, x, y, R(0.0));
+
       {
         real_t rstd = x[0];
-        for (int i=1; i<realvec_t::size; ++i) {
+        for (int i = 1; i < realvec_t::size; ++i) {
           rstd += x[i];
         }
         real_t rvml = sum(x);
@@ -1238,7 +1190,7 @@ struct vecmathlib_test {
       }
       {
         real_t rstd = x[0];
-        for (int i=1; i<realvec_t::size; ++i) {
+        for (int i = 1; i < realvec_t::size; ++i) {
           rstd *= x[i];
         }
         real_t rvml = prod(x);
@@ -1246,7 +1198,7 @@ struct vecmathlib_test {
       }
       {
         real_t rstd = x[0];
-        for (int i=1; i<realvec_t::size; ++i) {
+        for (int i = 1; i < realvec_t::size; ++i) {
           rstd = vml_std::fmax(rstd, x[i]);
         }
         real_t rvml = vecmathlib::maxval(x);
@@ -1254,34 +1206,33 @@ struct vecmathlib_test {
       }
       {
         real_t rstd = x[0];
-        for (int i=1; i<realvec_t::size; ++i) {
+        for (int i = 1; i < realvec_t::size; ++i) {
           rstd = vml_std::fmin(rstd, x[i]);
         }
         real_t rvml = vecmathlib::minval(x);
         check_real("minval", rstd, rvml, x, R(0.0));
       }
-      
-      check_bool<RV,RV>("==", local_eq, local_veq, x, y);
-      check_bool<RV,RV>("!=", local_ne, local_vne, x, y);
-      check_bool<RV,RV>("<", local_lt, local_vlt, x, y);
-      check_bool<RV,RV>("<=", local_le, local_vle, x, y);
-      check_bool<RV,RV>(">", local_gt, local_vgt, x, y);
-      check_bool<RV,RV>(">=", local_ge, local_vge, x, y);
-      
-      check_real<RV,RV>("copysign",
-                        vml_std::copysign, vecmathlib::copysign, x, y, 0.0);
+
+      check_bool<RV, RV>("==", local_eq, local_veq, x, y);
+      check_bool<RV, RV>("!=", local_ne, local_vne, x, y);
+      check_bool<RV, RV>("<", local_lt, local_vlt, x, y);
+      check_bool<RV, RV>("<=", local_le, local_vle, x, y);
+      check_bool<RV, RV>(">", local_gt, local_vgt, x, y);
+      check_bool<RV, RV>(">=", local_ge, local_vge, x, y);
+
+      check_real<RV, RV>("copysign", vml_std::copysign, vecmathlib::copysign, x,
+                         y, 0.0);
       check_real<RV>("fabs", vml_std::fabs, vecmathlib::fabs, x, 0.0);
-      check_real<RV,RV>("fdim",
-                        vml_std::fdim, vecmathlib::fdim, x, y, accuracy());
-      check_real<RV,RV,RV>("fma",
-                           vml_std::fma, vecmathlib::fma,
-                           x, y, z, R(10.0)*accuracy());
-      check_real<RV,RV>("fmax", vml_std::fmax, vecmathlib::fmax, x, y, 0.0);
-      check_real<RV,RV>("fmin", vml_std::fmin, vecmathlib::fmin, x, y, 0.0);
+      check_real<RV, RV>("fdim", vml_std::fdim, vecmathlib::fdim, x, y,
+                         accuracy());
+      check_real<RV, RV, RV>("fma", vml_std::fma, vecmathlib::fma, x, y, z,
+                             R(10.0) * accuracy());
+      check_real<RV, RV>("fmax", vml_std::fmax, vecmathlib::fmax, x, y, 0.0);
+      check_real<RV, RV>("fmin", vml_std::fmin, vecmathlib::fmin, x, y, 0.0);
       check_real<RV>("frexp0", local_frexp0, local_vfrexp0, x, 0.0);
       check_int<RV>("frexp1", local_frexp1, local_vfrexp1, x);
-      check_int<RV>("ilogb",
-                    local_ilogb, (intvec_t(*)(realvec_t))vecmathlib::ilogb, x);
+      check_int<RV>("ilogb", local_ilogb,
+                    (intvec_t (*)(realvec_t))vecmathlib::ilogb, x);
 #if defined VML_HAVE_INF || defined VML_HAVE_NAN
       check_bool<RV>("isfinite", vml_std::isfinite, vecmathlib::isfinite, x);
 #endif
@@ -1294,91 +1245,162 @@ struct vecmathlib_test {
 #ifdef VML_HAVE_DENORMALS
       check_bool<RV>("isnormal", vml_std::isnormal, vecmathlib::isnormal, x);
 #endif
-      check_real<RV,I>("ldexp", local_ldexp, vecmathlib::ldexp, x, n[0], 0.0);
-      check_real<RV,IV>("ldexp", local_ldexp, vecmathlib::ldexp, x, n, 0.0);
-      check_real<RV,RV,RV>("mad",
-                           local_mad, vecmathlib::mad,
-                           x, y, z, R(10.0)*accuracy());
-      check_real<RV,RV>("nextafter",
-                        vml_std::nextafter, vecmathlib::nextafter, x, y, 0.0);
+      check_real<RV, I>("ldexp", local_ldexp, vecmathlib::ldexp, x, n[0], 0.0);
+      check_real<RV, IV>("ldexp", local_ldexp, vecmathlib::ldexp, x, n, 0.0);
+      check_real<RV, RV, RV>("mad", local_mad, vecmathlib::mad, x, y, z,
+                             R(10.0) * accuracy());
+      check_real<RV, RV>("nextafter", vml_std::nextafter, vecmathlib::nextafter,
+                         x, y, 0.0);
       check_bool<RV>("signbit", vml_std::signbit, vecmathlib::signbit, x);
     }
   }
-  
-  static void test_convert()
-  {
-    cout << "   testing ceil convert_float convert_int floor rint round trunc...\n"
+
+  static void test_convert() {
+    cout << "   testing ceil convert_float convert_int floor rint round "
+            "trunc...\n"
          << flush;
-    
+
     const real_t eps = FP::epsilon();
     const real_t int_min = R(std::numeric_limits<int_t>::min());
     const real_t int_max = R(std::numeric_limits<int_t>::max());
     const real_t uint_min = R(std::numeric_limits<uint_t>::min());
     const real_t uint_max = R(std::numeric_limits<uint_t>::max());
-    const real_t mantissa_max = (U(1) << (FP::mantissa_bits+1)) - U(1);
-    const real_t real_max =
-      (((U(1) << (FP::mantissa_bits+1)) - U(1)) << (FP::exponent_bits-1)) +
-      (U(1) << (FP::exponent_bits-1)) - U(1);
+    const real_t mantissa_max = (U(1) << (FP::mantissa_bits + 1)) - U(1);
+    const real_t real_max = (((U(1) << (FP::mantissa_bits + 1)) - U(1))
+                             << (FP::exponent_bits - 1)) +
+                            (U(1) << (FP::exponent_bits - 1)) - U(1);
     const real_t values[] = {
-      R(+0.0), R(+0.1), R(+0.9), R(+1.0), R(+1.1),
-      R(-0.0), R(-0.1), R(-0.9), R(-1.0), R(-1.1),
-      R(+0.0)+eps, R(+0.1)+eps, R(+0.9)+eps, R(+1.0)+eps, R(+1.1)+eps,
-      R(-0.0)+eps, R(-0.1)+eps, R(-0.9)+eps, R(-1.0)+eps, R(-1.1)+eps,
-      R(+0.0)-eps, R(+0.1)-eps, R(+0.9)-eps, R(+1.0)-eps, R(+1.1)-eps,
-      R(-0.0)-eps, R(-0.1)-eps, R(-0.9)-eps, R(-1.0)-eps, R(-1.1)-eps,
+        R(+0.0),
+        R(+0.1),
+        R(+0.9),
+        R(+1.0),
+        R(+1.1),
+        R(-0.0),
+        R(-0.1),
+        R(-0.9),
+        R(-1.0),
+        R(-1.1),
+        R(+0.0) + eps,
+        R(+0.1) + eps,
+        R(+0.9) + eps,
+        R(+1.0) + eps,
+        R(+1.1) + eps,
+        R(-0.0) + eps,
+        R(-0.1) + eps,
+        R(-0.9) + eps,
+        R(-1.0) + eps,
+        R(-1.1) + eps,
+        R(+0.0) - eps,
+        R(+0.1) - eps,
+        R(+0.9) - eps,
+        R(+1.0) - eps,
+        R(+1.1) - eps,
+        R(-0.0) - eps,
+        R(-0.1) - eps,
+        R(-0.9) - eps,
+        R(-1.0) - eps,
+        R(-1.1) - eps,
 #ifdef VML_HAVE_DENORMALS
-      +FP::min(), +FP::min()*(R(1.0)+eps), +FP::min()*R(2.0),
-      -FP::min(), -FP::min()*(R(1.0)+eps), -FP::min()*R(2.0),
+        +FP::min(),
+        +FP::min() * (R(1.0) + eps),
+        +FP::min() * R(2.0),
+        -FP::min(),
+        -FP::min() * (R(1.0) + eps),
+        -FP::min() * R(2.0),
 #endif
-      +FP::max(), +FP::max()*(R(1.0)-eps), +FP::max()*(R(1.0)-R(2.0)*eps),
-      -FP::max(), -FP::max()*(R(1.0)-eps), -FP::max()*(R(1.0)-R(2.0)*eps),
-      +R(0.5)*FP::max(), +R(0.5)*FP::max()*(R(1.0)+eps),
-      -R(0.5)*FP::max(), -R(0.5)*FP::max()*(R(1.0)+eps),
+        +FP::max(),
+        +FP::max() * (R(1.0) - eps),
+        +FP::max() * (R(1.0) - R(2.0) * eps),
+        -FP::max(),
+        -FP::max() * (R(1.0) - eps),
+        -FP::max() * (R(1.0) - R(2.0) * eps),
+        +R(0.5) * FP::max(),
+        +R(0.5) * FP::max() * (R(1.0) + eps),
+        -R(0.5) * FP::max(),
+        -R(0.5) * FP::max() * (R(1.0) + eps),
 #ifdef VML_HAVE_INF
-      +R(1.0/0.0),              // +FP::infinity()
-      -R(1.0/0.0),              // -FP::infinity()
+        +R(1.0 / 0.0), // +FP::infinity()
+        -R(1.0 / 0.0), // -FP::infinity()
 #endif
 #ifdef VML_HAVE_NAN
-      R(0.0/0.0),               // FP::quiet_NaN()
+        R(0.0 / 0.0), // FP::quiet_NaN()
 #endif
-      +int_min, +int_max, +uint_min, +uint_max,
-      -int_min, -int_max, -uint_min, -uint_max,
-      +int_min+R(0.1), +int_max+R(0.1), +uint_min+R(0.1), +uint_max+R(0.1),
-      -int_min+R(0.1), -int_max+R(0.1), -uint_min+R(0.1), -uint_max+R(0.1),
-      +int_min-R(0.1), +int_max-R(0.1), +uint_min-R(0.1), +uint_max-R(0.1),
-      -int_min-R(0.1), -int_max-R(0.1), -uint_min-R(0.1), -uint_max-R(0.1),
-      +int_min+R(1.0), +int_max+R(1.0), +uint_min+R(1.0), +uint_max+R(1.0),
-      -int_min+R(1.0), -int_max+R(1.0), -uint_min+R(1.0), -uint_max+R(1.0),
-      +int_min-R(1.0), +int_max-R(1.0), +uint_min-R(1.0), +uint_max-R(1.0),
-      -int_min-R(1.0), -int_max-R(1.0), -uint_min-R(1.0), -uint_max-R(1.0),
-      +mantissa_max, +mantissa_max-R(1.0), +mantissa_max+R(1.0),
-      -mantissa_max, -mantissa_max-R(1.0), -mantissa_max+R(1.0),
-      +real_max, +real_max-R(1.0), +real_max+R(1.0),
-      -real_max, -real_max-R(1.0), -real_max+R(1.0),
-      -R(443.9999425),
+        +int_min,
+        +int_max,
+        +uint_min,
+        +uint_max,
+        -int_min,
+        -int_max,
+        -uint_min,
+        -uint_max,
+        +int_min + R(0.1),
+        +int_max + R(0.1),
+        +uint_min + R(0.1),
+        +uint_max + R(0.1),
+        -int_min + R(0.1),
+        -int_max + R(0.1),
+        -uint_min + R(0.1),
+        -uint_max + R(0.1),
+        +int_min - R(0.1),
+        +int_max - R(0.1),
+        +uint_min - R(0.1),
+        +uint_max - R(0.1),
+        -int_min - R(0.1),
+        -int_max - R(0.1),
+        -uint_min - R(0.1),
+        -uint_max - R(0.1),
+        +int_min + R(1.0),
+        +int_max + R(1.0),
+        +uint_min + R(1.0),
+        +uint_max + R(1.0),
+        -int_min + R(1.0),
+        -int_max + R(1.0),
+        -uint_min + R(1.0),
+        -uint_max + R(1.0),
+        +int_min - R(1.0),
+        +int_max - R(1.0),
+        +uint_min - R(1.0),
+        +uint_max - R(1.0),
+        -int_min - R(1.0),
+        -int_max - R(1.0),
+        -uint_min - R(1.0),
+        -uint_max - R(1.0),
+        +mantissa_max,
+        +mantissa_max - R(1.0),
+        +mantissa_max + R(1.0),
+        -mantissa_max,
+        -mantissa_max - R(1.0),
+        -mantissa_max + R(1.0),
+        +real_max,
+        +real_max - R(1.0),
+        +real_max + R(1.0),
+        -real_max,
+        -real_max - R(1.0),
+        -real_max + R(1.0),
+        -R(443.9999425),
     };
     const int nvalues = sizeof values / sizeof *values;
-    
-    for (int i=0; i<nvalues+imax; ++i) {
+
+    for (int i = 0; i < nvalues + imax; ++i) {
       const realvec_t x =
-        i<nvalues ? RV(values[i]) : random(R(-1.0e+10), R(+1.0e+10));
+          i < nvalues ? RV(values[i]) : random(R(-1.0e+10), R(+1.0e+10));
       const intvec_t n1 = random(int_t(-100), int_t(+100));
-      //const intvec_t n2 = random(int_t(-1000000000), int_t(+1000000000));
+      // const intvec_t n2 = random(int_t(-1000000000), int_t(+1000000000));
       const intvec_t n2 =
-        random(std::numeric_limits<int_t>::min() / 2, // avoid overflow
-               std::numeric_limits<int_t>::max() / 2);
+          random(std::numeric_limits<int_t>::min() / 2, // avoid overflow
+                 std::numeric_limits<int_t>::max() / 2);
       const realvec_t fn1 = vecmathlib::convert_float(n1);
       const realvec_t fn2 = vecmathlib::convert_float(n2);
       const realvec_t fn1h = vecmathlib::convert_float(n1) * RV(0.25);
       const realvec_t fn2h = vecmathlib::convert_float(n2) * RV(0.25);
-      check_real<IV>("convert_float",
-                     FP::convert_float, vecmathlib::convert_float, n1, R(0.0));
-      check_real<IV>("convert_float",
-                     FP::convert_float, vecmathlib::convert_float, n2, R(0.0));
+      check_real<IV>("convert_float", FP::convert_float,
+                     vecmathlib::convert_float, n1, R(0.0));
+      check_real<IV>("convert_float", FP::convert_float,
+                     vecmathlib::convert_float, n2, R(0.0));
       // Note: RV(int_max) > int_max due to rounding
       if (all(x >= RV(int_min) && x < RV(int_max))) {
-        check_int<RV>("convert_int",
-                      FP::convert_int, vecmathlib::convert_int, x);
+        check_int<RV>("convert_int", FP::convert_int, vecmathlib::convert_int,
+                      x);
       }
       // TODO: These should all have accuracy R(0.0) instead!
       check_real<RV>("ceil", vml_std::ceil, vecmathlib::ceil, x, accuracy());
@@ -1387,218 +1409,213 @@ struct vecmathlib_test {
       check_real<RV>("ceil", vml_std::ceil, vecmathlib::ceil, fn1h, accuracy());
       check_real<RV>("ceil", vml_std::ceil, vecmathlib::ceil, fn2h, accuracy());
       check_real<RV>("floor", vml_std::floor, vecmathlib::floor, x, accuracy());
-      check_real<RV>("floor", vml_std::floor, vecmathlib::floor, fn1, accuracy());
-      check_real<RV>("floor", vml_std::floor, vecmathlib::floor, fn2, accuracy());
-      check_real<RV>("floor", vml_std::floor, vecmathlib::floor, fn1h, accuracy());
-      check_real<RV>("floor", vml_std::floor, vecmathlib::floor, fn2h, accuracy());
-      // check_int<RV>("lrint", vml_std::lrint, vecmathlib::rint, x, accuracy());
-      // check_int<RV>("lrint", vml_std::lrint, vecmathlib::rint, fn1, accuracy());
-      // check_int<RV>("lrint", vml_std::lrint, vecmathlib::rint, fn2, accuracy());
-      // check_int<RV>("lrint", vml_std::lrint, vecmathlib::rint, fn1h, accuracy());
-      // check_int<RV>("lrint", vml_std::lrint, vecmathlib::rint, fn2h, accuracy());
+      check_real<RV>("floor", vml_std::floor, vecmathlib::floor, fn1,
+                     accuracy());
+      check_real<RV>("floor", vml_std::floor, vecmathlib::floor, fn2,
+                     accuracy());
+      check_real<RV>("floor", vml_std::floor, vecmathlib::floor, fn1h,
+                     accuracy());
+      check_real<RV>("floor", vml_std::floor, vecmathlib::floor, fn2h,
+                     accuracy());
+      // check_int<RV>("lrint", vml_std::lrint, vecmathlib::rint, x,
+      // accuracy());
+      // check_int<RV>("lrint", vml_std::lrint, vecmathlib::rint, fn1,
+      // accuracy());
+      // check_int<RV>("lrint", vml_std::lrint, vecmathlib::rint, fn2,
+      // accuracy());
+      // check_int<RV>("lrint", vml_std::lrint, vecmathlib::rint, fn1h,
+      // accuracy());
+      // check_int<RV>("lrint", vml_std::lrint, vecmathlib::rint, fn2h,
+      // accuracy());
       check_real<RV>("rint", vml_std::rint, vecmathlib::rint, x, accuracy());
       check_real<RV>("rint", vml_std::rint, vecmathlib::rint, fn1, accuracy());
       check_real<RV>("rint", vml_std::rint, vecmathlib::rint, fn2, accuracy());
       check_real<RV>("rint", vml_std::rint, vecmathlib::rint, fn1h, accuracy());
       check_real<RV>("rint", vml_std::rint, vecmathlib::rint, fn2h, accuracy());
       check_real<RV>("round", vml_std::round, vecmathlib::round, x, accuracy());
-      check_real<RV>("round", vml_std::round, vecmathlib::round, fn1, accuracy());
-      check_real<RV>("round", vml_std::round, vecmathlib::round, fn2, accuracy());
-      check_real<RV>("round", vml_std::round, vecmathlib::round, fn1h, accuracy());
-      check_real<RV>("round", vml_std::round, vecmathlib::round, fn2h, accuracy());
+      check_real<RV>("round", vml_std::round, vecmathlib::round, fn1,
+                     accuracy());
+      check_real<RV>("round", vml_std::round, vecmathlib::round, fn2,
+                     accuracy());
+      check_real<RV>("round", vml_std::round, vecmathlib::round, fn1h,
+                     accuracy());
+      check_real<RV>("round", vml_std::round, vecmathlib::round, fn2h,
+                     accuracy());
       check_real<RV>("trunc", vml_std::trunc, vecmathlib::trunc, x, accuracy());
-      check_real<RV>("trunc", vml_std::trunc, vecmathlib::trunc, fn1, accuracy());
-      check_real<RV>("trunc", vml_std::trunc, vecmathlib::trunc, fn2, accuracy());
-      check_real<RV>("trunc", vml_std::trunc, vecmathlib::trunc, fn1h, accuracy());
-      check_real<RV>("trunc", vml_std::trunc, vecmathlib::trunc, fn2h, accuracy());
+      check_real<RV>("trunc", vml_std::trunc, vecmathlib::trunc, fn1,
+                     accuracy());
+      check_real<RV>("trunc", vml_std::trunc, vecmathlib::trunc, fn2,
+                     accuracy());
+      check_real<RV>("trunc", vml_std::trunc, vecmathlib::trunc, fn1h,
+                     accuracy());
+      check_real<RV>("trunc", vml_std::trunc, vecmathlib::trunc, fn2h,
+                     accuracy());
     }
   }
-  
-  
-  
-  static void test_asin()
-  {
+
+  static void test_asin() {
     cout << "   testing asin acos atan atan2...\n" << flush;
-    for (int i=0; i<imax; ++i) {
+    for (int i = 0; i < imax; ++i) {
       const realvec_t x = random(R(-1.0), R(+1.0));
       check_real<RV>("asin", vml_std::asin, vecmathlib::asin, x, accuracy(4));
       check_real<RV>("acos", vml_std::acos, vecmathlib::acos, x, accuracy(4));
     }
-    for (int i=0; i<imax; ++i) {
+    for (int i = 0; i < imax; ++i) {
       const realvec_t x = random(R(-100.0), R(+100.0));
       const realvec_t y = random(R(-100.0), R(+100.0));
       check_real<RV>("atan", vml_std::atan, vecmathlib::atan, x, accuracy(5));
-      check_real<RV,RV>("atan2",
-                        vml_std::atan2, vecmathlib::atan2, x, y, accuracy(6));
+      check_real<RV, RV>("atan2", vml_std::atan2, vecmathlib::atan2, x, y,
+                         accuracy(6));
     }
   }
-  
-  static void test_asinh()
-  {
+
+  static void test_asinh() {
     cout << "   testing asinh acosh atanh...\n" << flush;
-    for (int i=0; i<imax; ++i) {
+    for (int i = 0; i < imax; ++i) {
       const realvec_t x = random(R(-1000.0), R(+1000.0));
-      check_real<RV>("asinh",
-                     vml_std::asinh, vecmathlib::asinh, x, accuracy(4));
+      check_real<RV>("asinh", vml_std::asinh, vecmathlib::asinh, x,
+                     accuracy(4));
     }
-    for (int i=0; i<imax; ++i) {
+    for (int i = 0; i < imax; ++i) {
       const realvec_t x = random(R(1.0), R(1000.0));
-      check_real<RV>("acosh",
-                     vml_std::acosh, vecmathlib::acosh, x, accuracy(4));
+      check_real<RV>("acosh", vml_std::acosh, vecmathlib::acosh, x,
+                     accuracy(4));
     }
-    for (int i=0; i<imax; ++i) {
+    for (int i = 0; i < imax; ++i) {
       const realvec_t x = random(R(-1.0), R(+1.0));
-      check_real<RV>("atanh",
-                     vml_std::atanh, vecmathlib::atanh, x, accuracy(5));
+      check_real<RV>("atanh", vml_std::atanh, vecmathlib::atanh, x,
+                     accuracy(5));
     }
   }
-  
+
   static real_t local_exp10(real_t x) { return pow(R(10.0), x); }
-  static void test_exp()
-  {
+  static void test_exp() {
     cout << "   testing exp exp10 exp2 expm1...\n" << flush;
-    for (int i=0; i<imax; ++i) {
+    for (int i = 0; i < imax; ++i) {
       const realvec_t x = random(R(-100.0), R(+100.0));
       check_real<RV>("exp", vml_std::exp, vecmathlib::exp, x, accuracy(3));
       check_real<RV>("exp10", local_exp10, vecmathlib::exp10, x, accuracy(3));
       check_real<RV>("exp2", vml_std::exp2, vecmathlib::exp2, x, accuracy(3));
-      check_real<RV>("expm1",
-                     vml_std::expm1, vecmathlib::expm1, x, accuracy(3));
+      check_real<RV>("expm1", vml_std::expm1, vecmathlib::expm1, x,
+                     accuracy(3));
     }
   }
-  
-  static void test_log()
-  {
+
+  static void test_log() {
     cout << "   testing log log10 log1p log2...\n" << flush;
-    for (int i=0; i<imax; ++i) {
+    for (int i = 0; i < imax; ++i) {
       const realvec_t x = random(R(1.0e-10), R(1.0e+10));
       check_real<RV>("log", vml_std::log, vecmathlib::log, x, accuracy(3));
-      check_real<RV>("log10",
-                     vml_std::log10, vecmathlib::log10, x, accuracy(3));
-      check_real<RV>("log1p",
-                     vml_std::log1p, vecmathlib::log1p, x, accuracy(2));
+      check_real<RV>("log10", vml_std::log10, vecmathlib::log10, x,
+                     accuracy(3));
+      check_real<RV>("log1p", vml_std::log1p, vecmathlib::log1p, x,
+                     accuracy(2));
       check_real<RV>("log2", vml_std::log2, vecmathlib::log2, x, accuracy(3));
     }
   }
-  
-  static void test_pow()
-  {
+
+  static void test_pow() {
     cout << "   testing pow...\n" << flush;
-    for (int i=0; i<imax; ++i) {
+    for (int i = 0; i < imax; ++i) {
       const realvec_t x = random(R(0.001), R(1000.0));
       const realvec_t y = random(R(-10.0), R(+10.0));
       const realvec_t ya = fabs(y);
       const intvec_t n = random(I(-10), I(+10));
       const realvec_t fn = vecmathlib::convert_float(n);
-      check_real<RV,RV>("pow(0,y)",
-                        vml_std::pow, vecmathlib::pow, RV(0.0), ya,
-                        accuracy(16));
-      check_real<RV,RV>("pow(x,0)",
-                        vml_std::pow, vecmathlib::pow, x, RV(0.0),
-                        accuracy(16));
+      check_real<RV, RV>("pow(0,y)", vml_std::pow, vecmathlib::pow, RV(0.0), ya,
+                         accuracy(16));
+      check_real<RV, RV>("pow(x,0)", vml_std::pow, vecmathlib::pow, x, RV(0.0),
+                         accuracy(16));
       // just to check
       check_real<RV>("log(x)", vml_std::log, vecmathlib::log, x, accuracy(3));
-      check_real<RV,RV>("pow(x,y)",
-                        vml_std::pow, vecmathlib::pow, x, y, accuracy(16));
-      check_real<RV,RV>("pow(-x,n)",
-                        vml_std::pow, vecmathlib::pow, -x, fn, accuracy(16));
+      check_real<RV, RV>("pow(x,y)", vml_std::pow, vecmathlib::pow, x, y,
+                         accuracy(16));
+      check_real<RV, RV>("pow(-x,n)", vml_std::pow, vecmathlib::pow, -x, fn,
+                         accuracy(16));
     }
   }
-  
-  static real_t local_rcp(real_t x) { return R(1.0)/x; }
-  static void test_rcp()
-  {
+
+  static real_t local_rcp(real_t x) { return R(1.0) / x; }
+  static void test_rcp() {
     cout << "   testing / fmod rcp remainder...\n" << flush;
-    for (int i=0; i<imax; ++i) {
+    for (int i = 0; i < imax; ++i) {
       const realvec_t x = random(R(-100.0), R(+100.0));
       const realvec_t y = random(R(-100.0), R(+100.0));
       const intvec_t n = random(I(-100), I(+100));
       const intvec_t m = random(I(-100), I(+100));
       const realvec_t fn = vecmathlib::convert_float(n);
-      const realvec_t fm = vecmathlib::convert_float
-        (m + vecmathlib::convert_int(m == intvec_t(I(0))));
-      check_real<RV,RV>("/", local_div, local_div, x, y, accuracy());
+      const realvec_t fm = vecmathlib::convert_float(
+          m + vecmathlib::convert_int(m == intvec_t(I(0))));
+      check_real<RV, RV>("/", local_div, local_div, x, y, accuracy());
       check_real<RV>("rcp", local_rcp, vecmathlib::rcp, x, accuracy());
-      check_real<RV,RV>("fmod(x,y)",
-                        vml_std::fmod, vecmathlib::fmod, x, y,
-                        2.0*accuracy(), y);
-      check_real<RV,RV>("fmod(x,m)",
-                        vml_std::fmod, vecmathlib::fmod, x, fm,
-                        2.0*accuracy(), fm);
-      check_real<RV,RV>("fmod(n,y)",
-                        vml_std::fmod, vecmathlib::fmod, fn, y,
-                        2.0*accuracy(), y);
-      check_real<RV,RV>("remainder(x,y)",
-                        vml_std::remainder, vecmathlib::remainder,
-                        x, y, R(2.0)*accuracy(), y);
-      check_real<RV,RV>("remainder(x,m)",
-                        vml_std::remainder, vecmathlib::remainder,
-                        x, fm, R(2.0)*accuracy(), fm);
-      check_real<RV,RV>("remainder(n,y)",
-                        vml_std::remainder, vecmathlib::remainder,
-                        fn, y, R(2.0)*accuracy(), y);
+      check_real<RV, RV>("fmod(x,y)", vml_std::fmod, vecmathlib::fmod, x, y,
+                         2.0 * accuracy(), y);
+      check_real<RV, RV>("fmod(x,m)", vml_std::fmod, vecmathlib::fmod, x, fm,
+                         2.0 * accuracy(), fm);
+      check_real<RV, RV>("fmod(n,y)", vml_std::fmod, vecmathlib::fmod, fn, y,
+                         2.0 * accuracy(), y);
+      check_real<RV, RV>("remainder(x,y)", vml_std::remainder,
+                         vecmathlib::remainder, x, y, R(2.0) * accuracy(), y);
+      check_real<RV, RV>("remainder(x,m)", vml_std::remainder,
+                         vecmathlib::remainder, x, fm, R(2.0) * accuracy(), fm);
+      check_real<RV, RV>("remainder(n,y)", vml_std::remainder,
+                         vecmathlib::remainder, fn, y, R(2.0) * accuracy(), y);
     }
   }
-  
-  static void test_sin()
-  {
+
+  static void test_sin() {
     cout << "   testing cos sin tan...\n" << flush;
-    for (int i=0; i<imax; ++i) {
+    for (int i = 0; i < imax; ++i) {
       const realvec_t x = random(R(-10.0), R(+10.0));
       check_real<RV>("sin", vml_std::sin, vecmathlib::sin, x, accuracy(4));
       check_real<RV>("cos", vml_std::cos, vecmathlib::cos, x, accuracy(4));
     }
-    for (int i=0; i<imax; ++i) {
+    for (int i = 0; i < imax; ++i) {
       const realvec_t x0 = random(R(-1.55), R(+1.55));
       const intvec_t n = random(I(-10), I(+10));
       const realvec_t x = x0 + vecmathlib::convert_float(n) * RV(M_PI);
       // tan loses accuracy near pi/2
       // (by definition, not by implementation?)
-      check_real<RV>("tan",
-                     vml_std::tan, vecmathlib::tan, x, R(20.0)*accuracy(5));
+      check_real<RV>("tan", vml_std::tan, vecmathlib::tan, x,
+                     R(20.0) * accuracy(5));
     }
   }
-  
-  static void test_sinh()
-  {
+
+  static void test_sinh() {
     cout << "   testing cosh sinh tanh...\n" << flush;
-    for (int i=0; i<imax; ++i) {
+    for (int i = 0; i < imax; ++i) {
       const realvec_t x = random(R(-10.0), R(+10.0));
       check_real<RV>("sinh", vml_std::sinh, vecmathlib::sinh, x, accuracy(4));
       check_real<RV>("cosh", vml_std::cosh, vecmathlib::cosh, x, accuracy(4));
       check_real<RV>("tanh", vml_std::tanh, vecmathlib::tanh, x, accuracy(5));
     }
   }
-  
-  static real_t local_rsqrt(real_t x) { return R(1.0)/sqrt(x); }
-  static void test_sqrt()
-  {
+
+  static real_t local_rsqrt(real_t x) { return R(1.0) / sqrt(x); }
+  static void test_sqrt() {
     cout << "   testing cbrt hypot rsqrt sqrt...\n" << flush;
-    for (int i=0; i<imax; ++i) {
+    for (int i = 0; i < imax; ++i) {
       const realvec_t x = random(R(1.0e-3), R(1.0e+3));
       const realvec_t y = random(-R(1.0e+3), R(1.0e+3));
       const realvec_t z = random(-R(1.0e+3), R(1.0e+3));
       check_real<RV>("cbrt", vml_std::cbrt, vecmathlib::cbrt, x, accuracy());
-      check_real<RV,RV>("hypot",
-                        vml_std::hypot, vecmathlib::hypot, y, z, accuracy());
+      check_real<RV, RV>("hypot", vml_std::hypot, vecmathlib::hypot, y, z,
+                         accuracy());
       check_real<RV>("rsqrt", local_rsqrt, vecmathlib::rsqrt, x, accuracy());
       check_real<RV>("sqrt", vml_std::sqrt, vecmathlib::sqrt, x, accuracy());
     }
   }
-  
-  
-  
-  static void test()
-  {
+
+  static void test() {
     cout << "\n"
          << "Testing math functions for type " << realvec_t::name() << ":\n";
-    
+
     test_bool();
     test_int();
     test_real();
-    
+
     test_mem();
-    
+
     // Test "basic" functions first
     test_abs();
     test_fabs();
@@ -1615,90 +1632,86 @@ struct vecmathlib_test {
   }
 };
 
-
-
-int main(int argc, char** argv)
-{
+int main(int argc, char **argv) {
   using namespace vecmathlib;
 
   cout << "Testing math functions:\n"
-       << "[" VECMATHLIB_CONFIGURATION "]\n"
-       << flush;
-  
-  vecmathlib_test<realpseudovec<float,1> >::test();
+       << "[" VECMATHLIB_CONFIGURATION "]\n" << flush;
+
+  vecmathlib_test<realpseudovec<float, 1>>::test();
 #ifdef __clang__
-  vecmathlib_test<realbuiltinvec<float,1> >::test();
+  vecmathlib_test<realbuiltinvec<float, 1>>::test();
 #endif
-  vecmathlib_test<realtestvec<float,1> >::test();
+  vecmathlib_test<realtestvec<float, 1>>::test();
 #ifdef VECMATHLIB_HAVE_VEC_FLOAT_1
-  vecmathlib_test<realvec<float,1> >::test();
+  vecmathlib_test<realvec<float, 1>>::test();
 #endif
-  vecmathlib_test<realpseudovec<float,2> >::test();
+  vecmathlib_test<realpseudovec<float, 2>>::test();
 #ifdef __clang__
-  vecmathlib_test<realbuiltinvec<float,2> >::test();
+  vecmathlib_test<realbuiltinvec<float, 2>>::test();
 #endif
-  vecmathlib_test<realtestvec<float,2> >::test();
+  vecmathlib_test<realtestvec<float, 2>>::test();
 #ifdef VECMATHLIB_HAVE_VEC_FLOAT_2
-  vecmathlib_test<realvec<float,2> >::test();
+  vecmathlib_test<realvec<float, 2>>::test();
 #endif
-  vecmathlib_test<realpseudovec<float,4> >::test();
+  vecmathlib_test<realpseudovec<float, 4>>::test();
 #ifdef __clang__
-  vecmathlib_test<realbuiltinvec<float,4> >::test();
+  vecmathlib_test<realbuiltinvec<float, 4>>::test();
 #endif
-  vecmathlib_test<realtestvec<float,4> >::test();
+  vecmathlib_test<realtestvec<float, 4>>::test();
 #ifdef VECMATHLIB_HAVE_VEC_FLOAT_4
-  vecmathlib_test<realvec<float,4> >::test();
+  vecmathlib_test<realvec<float, 4>>::test();
 #endif
 #ifdef VECMATHLIB_HAVE_VEC_FLOAT_8
-  vecmathlib_test<realpseudovec<float,8> >::test();
+  vecmathlib_test<realpseudovec<float, 8>>::test();
 #ifdef __clang__
-  vecmathlib_test<realbuiltinvec<float,8> >::test();
+  vecmathlib_test<realbuiltinvec<float, 8>>::test();
 #endif
-  vecmathlib_test<realtestvec<float,8> >::test();
-  vecmathlib_test<realvec<float,8> >::test();
+  vecmathlib_test<realtestvec<float, 8>>::test();
+  vecmathlib_test<realvec<float, 8>>::test();
 #endif
 #ifdef VECMATHLIB_HAVE_VEC_FLOAT_16
-  vecmathlib_test<realpseudovec<float,16> >::test();
+  vecmathlib_test<realpseudovec<float, 16>>::test();
 #ifdef __clang__
-  vecmathlib_test<realbuiltinvec<float,16> >::test();
+  vecmathlib_test<realbuiltinvec<float, 16>>::test();
 #endif
-  vecmathlib_test<realtestvec<float,16> >::test();
-  vecmathlib_test<realvec<float,16> >::test();
+  vecmathlib_test<realtestvec<float, 16>>::test();
+  vecmathlib_test<realvec<float, 16>>::test();
 #endif
-  
-  vecmathlib_test<realpseudovec<double,1> >::test();
+
+  vecmathlib_test<realpseudovec<double, 1>>::test();
 #ifdef __clang__
-  vecmathlib_test<realbuiltinvec<double,1> >::test();
+  vecmathlib_test<realbuiltinvec<double, 1>>::test();
 #endif
-  vecmathlib_test<realtestvec<double,1> >::test();
+  vecmathlib_test<realtestvec<double, 1>>::test();
 #ifdef VECMATHLIB_HAVE_VEC_DOUBLE_1
-  vecmathlib_test<realvec<double,1> >::test();
+  vecmathlib_test<realvec<double, 1>>::test();
 #endif
-  vecmathlib_test<realpseudovec<double,2> >::test();
+  vecmathlib_test<realpseudovec<double, 2>>::test();
 #ifdef __clang__
-  vecmathlib_test<realbuiltinvec<double,2> >::test();
+  vecmathlib_test<realbuiltinvec<double, 2>>::test();
 #endif
-  vecmathlib_test<realtestvec<double,2> >::test();
+  vecmathlib_test<realtestvec<double, 2>>::test();
 #ifdef VECMATHLIB_HAVE_VEC_DOUBLE_2
-  vecmathlib_test<realvec<double,2> >::test();
+  vecmathlib_test<realvec<double, 2>>::test();
 #endif
 #ifdef VECMATHLIB_HAVE_VEC_DOUBLE_4
-  vecmathlib_test<realpseudovec<double,4> >::test();
+  vecmathlib_test<realpseudovec<double, 4>>::test();
 #ifdef __clang__
-  vecmathlib_test<realbuiltinvec<double,4> >::test();
+  vecmathlib_test<realbuiltinvec<double, 4>>::test();
 #endif
-  vecmathlib_test<realtestvec<double,4> >::test();
-  vecmathlib_test<realvec<double,4> >::test();
+  vecmathlib_test<realtestvec<double, 4>>::test();
+  vecmathlib_test<realvec<double, 4>>::test();
 #endif
 #ifdef VECMATHLIB_HAVE_VEC_DOUBLE_8
-  vecmathlib_test<realpseudovec<double,8> >::test();
+  vecmathlib_test<realpseudovec<double, 8>>::test();
 #ifdef __clang__
-  vecmathlib_test<realbuiltinvec<double,8> >::test();
+  vecmathlib_test<realbuiltinvec<double, 8>>::test();
 #endif
-  vecmathlib_test<realtestvec<double,8> >::test();
-  vecmathlib_test<realvec<double,8> >::test();
+  vecmathlib_test<realtestvec<double, 8>>::test();
+  vecmathlib_test<realvec<double, 8>>::test();
 #endif
-  
+
   cout << "\n";
   if (num_errors == 0) {
     cout << "SUCCESS";
@@ -1706,6 +1719,6 @@ int main(int argc, char** argv)
     cout << "FAILURE";
   }
   cout << ": " << num_errors << " errors found\n" << flush;
-  
+
   return num_errors == 0 ? 0 : 1;
 }