1 files changed, 113 insertions, 124 deletions
diff --git a/mathfuncs_exp.h b/mathfuncs_exp.h
index d357a21..e35fb1b 100644
--- a/mathfuncs_exp.h
+++ b/mathfuncs_exp.h
@@ -7,156 +7,145 @@
 
 #include <cmath>
 
+namespace vecmathlib {
 
+template <typename realvec_t>
+realvec_t mathfuncs<realvec_t>::vml_exp2(realvec_t x) {
+  // TODO: Check SLEEF 2.80 algorithm
+  // (in particular the improved-precision truncation)
+
+  // Rescale
+  realvec_t x0 = x;
+
+// realvec_t round_x = rint(x);
+// intvec_t iround_x = convert_int(round_x);
+// r = ldexp(r, iround_x);
 
-namespace vecmathlib {
-  
-  template<typename realvec_t>
-  realvec_t mathfuncs<realvec_t>::vml_exp2(realvec_t x)
-  {
-    // TODO: Check SLEEF 2.80 algorithm
-    // (in particular the improved-precision truncation)
-    
-    // Rescale
-    realvec_t x0 = x;
-    
-    // realvec_t round_x = rint(x);
-    // intvec_t iround_x = convert_int(round_x);
-    // r = ldexp(r, iround_x);
-    
 #if 0
     // Straightforward implementation
     realvec_t round_x = rint(x);
     x -= round_x;
 #elif 1
-    // Round by adding, then subtracting again a large number
-    // Add a large number to move the mantissa bits to the right
-    int_t large = (U(1) << FP::mantissa_bits) + FP::exponent_offset;
-    realvec_t tmp = x + RV(R(large));
-    tmp.barrier();
-    
-    realvec_t round_x = tmp - RV(R(large));
-    x -= round_x;
+  // Round by adding, then subtracting again a large number
+  // Add a large number to move the mantissa bits to the right
+  int_t large = (U(1) << FP::mantissa_bits) + FP::exponent_offset;
+  realvec_t tmp = x + RV(R(large));
+  tmp.barrier();
+
+  realvec_t round_x = tmp - RV(R(large));
+  x -= round_x;
 #else
-    // Straightforward implementation, using round instead of rint,
-    // since round is faster for QPX
-    realvec_t round_x = round(x);
-    x -= round_x;
+  // Straightforward implementation, using round instead of rint,
+  // since round is faster for QPX
+  realvec_t round_x = round(x);
+  x -= round_x;
 #endif
-    VML_ASSERT(all(x >= RV(-0.5) && x <= RV(0.5)));
-    
-    // Polynomial expansion
-    realvec_t r;
-    switch (sizeof(real_t)) {
-    case 4:
+  VML_ASSERT(all(x >= RV(-0.5) && x <= RV(0.5)));
+
+  // Polynomial expansion
+  realvec_t r;
+  switch (sizeof(real_t)) {
+  case 4:
 #ifdef VML_HAVE_FP_CONTRACT
-      // float, error=4.55549108005200277750378992345e-9
-      r = RV(0.000154653240842602623787395880898);
-      r = mad(r, x, RV(0.00133952915439234389712105060319));
-      r = mad(r, x, RV(0.0096180399118156827664944870552));
-      r = mad(r, x, RV(0.055503406540531310853149866446));
-      r = mad(r, x, RV(0.240226511015459465468737123346));
-      r = mad(r, x, RV(0.69314720007380208630542805293));
-      r = mad(r, x, RV(0.99999999997182023878745628977));
+    // float, error=4.55549108005200277750378992345e-9
+    r = RV(0.000154653240842602623787395880898);
+    r = mad(r, x, RV(0.00133952915439234389712105060319));
+    r = mad(r, x, RV(0.0096180399118156827664944870552));
+    r = mad(r, x, RV(0.055503406540531310853149866446));
+    r = mad(r, x, RV(0.240226511015459465468737123346));
+    r = mad(r, x, RV(0.69314720007380208630542805293));
+    r = mad(r, x, RV(0.99999999997182023878745628977));
 #else
-      // float, error=1.62772721960621336664735896836e-7
-      r = RV(0.00133952915439234389712105060319);
-      r = mad(r, x, RV(0.009670773148229417605024318985));
-      r = mad(r, x, RV(0.055503406540531310853149866446));
-      r = mad(r, x, RV(0.240222115700585316818177639177));
-      r = mad(r, x, RV(0.69314720007380208630542805293));
-      r = mad(r, x, RV(1.00000005230745711373079206024));
+    // float, error=1.62772721960621336664735896836e-7
+    r = RV(0.00133952915439234389712105060319);
+    r = mad(r, x, RV(0.009670773148229417605024318985));
+    r = mad(r, x, RV(0.055503406540531310853149866446));
+    r = mad(r, x, RV(0.240222115700585316818177639177));
+    r = mad(r, x, RV(0.69314720007380208630542805293));
+    r = mad(r, x, RV(1.00000005230745711373079206024));
 #endif
-      break;
-    case 8:
+    break;
+  case 8:
 #ifdef VML_HAVE_FP_CONTRACT
-      // double, error=9.32016781355638010975628074746e-18
-      r = RV(4.45623165388261696886670014471e-10);
-      r = mad(r, x, RV(7.0733589360775271430968224806e-9));
-      r = mad(r, x, RV(1.01780540270960163558119510246e-7));
-      r = mad(r, x, RV(1.3215437348041505269462510712e-6));
-      r = mad(r, x, RV(0.000015252733849766201174247690629));
-      r = mad(r, x, RV(0.000154035304541242555115696403795));
-      r = mad(r, x, RV(0.00133335581463968601407096905671));
-      r = mad(r, x, RV(0.0096181291075949686712855561931));
-      r = mad(r, x, RV(0.055504108664821672870565883052));
-      r = mad(r, x, RV(0.240226506959101382690753994082));
-      r = mad(r, x, RV(0.69314718055994530864272481773));
-      r = mad(r, x, RV(0.9999999999999999978508676375));
+    // double, error=9.32016781355638010975628074746e-18
+    r = RV(4.45623165388261696886670014471e-10);
+    r = mad(r, x, RV(7.0733589360775271430968224806e-9));
+    r = mad(r, x, RV(1.01780540270960163558119510246e-7));
+    r = mad(r, x, RV(1.3215437348041505269462510712e-6));
+    r = mad(r, x, RV(0.000015252733849766201174247690629));
+    r = mad(r, x, RV(0.000154035304541242555115696403795));
+    r = mad(r, x, RV(0.00133335581463968601407096905671));
+    r = mad(r, x, RV(0.0096181291075949686712855561931));
+    r = mad(r, x, RV(0.055504108664821672870565883052));
+    r = mad(r, x, RV(0.240226506959101382690753994082));
+    r = mad(r, x, RV(0.69314718055994530864272481773));
+    r = mad(r, x, RV(0.9999999999999999978508676375));
 #else
-      // double, error=3.74939899823302048807873981077e-14
-      r = RV(1.02072375599725694063203809188e-7);
-      r = mad(r, x, RV(1.32573274434801314145133004073e-6));
-      r = mad(r, x, RV(0.0000152526647170731944840736190013));
-      r = mad(r, x, RV(0.000154034441925859828261898614555));
-      r = mad(r, x, RV(0.00133335582175770747495287552557));
-      r = mad(r, x, RV(0.0096181291794939392517233403183));
-      r = mad(r, x, RV(0.055504108664525029438908798685));
-      r = mad(r, x, RV(0.240226506957026959772247598695));
-      r = mad(r, x, RV(0.6931471805599487321347668143));
-      r = mad(r, x, RV(1.00000000000000942892870993489));
+    // double, error=3.74939899823302048807873981077e-14
+    r = RV(1.02072375599725694063203809188e-7);
+    r = mad(r, x, RV(1.32573274434801314145133004073e-6));
+    r = mad(r, x, RV(0.0000152526647170731944840736190013));
+    r = mad(r, x, RV(0.000154034441925859828261898614555));
+    r = mad(r, x, RV(0.00133335582175770747495287552557));
+    r = mad(r, x, RV(0.0096181291794939392517233403183));
+    r = mad(r, x, RV(0.055504108664525029438908798685));
+    r = mad(r, x, RV(0.240226506957026959772247598695));
+    r = mad(r, x, RV(0.6931471805599487321347668143));
+    r = mad(r, x, RV(1.00000000000000942892870993489));
 #endif
-      break;
-    default:
-      __builtin_unreachable();
-    }
-    
-    // Undo rescaling
+    break;
+  default:
+    __builtin_unreachable();
+  }
+
+// Undo rescaling
 #if 0
     // Straightforward implementation
     r = ldexp(r, convert_int(round_x));
 #elif 1
-    // Use direct integer manipulation
-    // Extract integer as lowest mantissa bits (highest bits still
-    // contain offset, exponent, and sign)
-    intvec_t itmp = as_int(tmp);
-    // Construct scale factor by setting exponent (this shifts out the
-    // highest bits)
-    realvec_t scale = as_float(itmp << I(FP::mantissa_bits));
-    r *= scale;
+  // Use direct integer manipulation
+  // Extract integer as lowest mantissa bits (highest bits still
+  // contain offset, exponent, and sign)
+  intvec_t itmp = as_int(tmp);
+  // Construct scale factor by setting exponent (this shifts out the
+  // highest bits)
+  realvec_t scale = as_float(itmp << I(FP::mantissa_bits));
+  r *= scale;
 #else
-    // Use floating point operations instead of integer operations,
-    // since these are faster for QPX
-    real_t exponent_factor = R(I(1) << I(FP::mantissa_bits));
-    real_t exponent_offset = R(I(FP::exponent_offset) << I(FP::mantissa_bits));
-    realvec_t exponent = mad(round_x, RV(exponent_factor), RV(exponent_offset));
-    realvec_t scale = as_float(convert_int(exponent));
-    r *= scale;
+  // Use floating point operations instead of integer operations,
+  // since these are faster for QPX
+  real_t exponent_factor = R(I(1) << I(FP::mantissa_bits));
+  real_t exponent_offset = R(I(FP::exponent_offset) << I(FP::mantissa_bits));
+  realvec_t exponent = mad(round_x, RV(exponent_factor), RV(exponent_offset));
+  realvec_t scale = as_float(convert_int(exponent));
+  r *= scale;
 #endif
-    
-    r = ifthen(x0 < RV(R(FP::min_exponent)), RV(0.0), r);
-    
-    return r;
-  }
-  
-  
-  
-  template<typename realvec_t>
-  inline
-  realvec_t mathfuncs<realvec_t>::vml_exp(realvec_t x)
-  {
-    return exp2(RV(M_LOG2E) * x);
-  }
 
-  template<typename realvec_t>
-  inline
-  realvec_t mathfuncs<realvec_t>::vml_exp10(realvec_t x)
-  {
-    return exp2(RV(M_LOG2E * M_LN10) * x);
-  }
+  r = ifthen(x0 < RV(R(FP::min_exponent)), RV(0.0), r);
+
+  return r;
+}
 
-  template<typename realvec_t>
-  inline
-  realvec_t mathfuncs<realvec_t>::vml_expm1(realvec_t x)
-  {
-    // TODO: improve this
-    return exp(x) - RV(1.0);
+template <typename realvec_t>
+inline realvec_t mathfuncs<realvec_t>::vml_exp(realvec_t x) {
+  return exp2(RV(M_LOG2E) * x);
+}
+
+template <typename realvec_t>
+inline realvec_t mathfuncs<realvec_t>::vml_exp10(realvec_t x) {
+  return exp2(RV(M_LOG2E * M_LN10) * x);
+}
+
+template <typename realvec_t>
+inline realvec_t mathfuncs<realvec_t>::vml_expm1(realvec_t x) {
+  // TODO: improve this
+  return exp(x) - RV(1.0);
 #if 0
     r = exp(x) - RV(1.0);
     return ifthen(r == RV(0.0), x, r);
 #endif
-  }
-  
+}
+
 }; // namespace vecmathlib
 
-#endif  // #ifndef MATHFUNCS_EXP_H
+#endif // #ifndef MATHFUNCS_EXP_H