Some ideas to improve exp on QPX

author: Erik Schnetter <schnetter@gmail.com> 2013-11-25 15:02:18 -0500
committer: Erik Schnetter <schnetter@gmail.com> 2013-11-25 15:02:18 -0500
commit: 967755a9fec3e08a15d0391f7b3e3fb8f0fadd9b (patch)
tree: c67b039c0f84be30c37e960aaecdf1ab8d8f484d
parent: 6ab7b34b64a7750780141be9bde302fcf9e0ee38 (diff)
download: vecmathlib-967755a9fec3e08a15d0391f7b3e3fb8f0fadd9b.zip
vecmathlib-967755a9fec3e08a15d0391f7b3e3fb8f0fadd9b.tar.gz
1 files changed, 28 insertions, 2 deletions
diff --git a/mathfuncs_exp.h b/mathfuncs_exp.h
index a615db1..c7a1498 100644
--- a/mathfuncs_exp.h
+++ b/mathfuncs_exp.h
@@ -21,6 +21,11 @@ namespace vecmathlib {
     // intvec_t iround_x = convert_int(round_x);
     // r = ldexp(r, iround_x);
     
+#if 0
+    // Straightforward implementation
+    realvec_t round_x = rint(x);
+    x -= round_x;
+#elif 1
     // Round by adding, then subtracting again a large number
     // Add a large number to move the mantissa bits to the right
     int_t large = (U(1) << FP::mantissa_bits) + FP::exponent_offset;
@@ -29,6 +34,12 @@ namespace vecmathlib {
     
     realvec_t round_x = tmp - RV(R(large));
     x -= round_x;
+#else
+    // Straightforward implementation, using round instead of rint,
+    // since round is faster for QPX
+    realvec_t round_x = round(x);
+    x -= round_x;
+#endif
     VML_ASSERT(all(x >= RV(-0.5) && x <= RV(0.5)));
     
     // Polynomial expansion
@@ -88,15 +99,29 @@ namespace vecmathlib {
     }
     
     // Undo rescaling
+#if 0
+    // Straightforward implementation
+    r = ldexp(r, convert_int(round_x));
+#elif 1
+    // Use direct integer manipulation
     // Extract integer as lowest mantissa bits (highest bits still
     // contain offset, exponent, and sign)
     intvec_t itmp = as_int(tmp);
     // Construct scale factor by setting exponent (this shifts out the
     // highest bits)
     realvec_t scale = as_float(itmp << I(FP::mantissa_bits));
-    scale = ifthen(x0 < RV(R(FP::min_exponent)), RV(0.0), scale);
-    
     r *= scale;
+#else
+    // Use floating point operations instead of integer operations,
+    // since these are faster for QPX
+    real_t exponent_factor = R(I(1) << I(FP::mantissa_bits));
+    real_t exponent_offset = R(I(FP::exponent_offset) << I(FP::mantissa_bits));
+    realvec_t exponent = mad(round_x, RV(exponent_factor), RV(exponent_offset));
+    realvec_t scale = as_float(convert_int(exponent));
+    r *= scale;
+#endif
+    
+    r = ifthen(x0 < RV(R(FP::min_exponent)), RV(0.0), r);
     
     return r;
   }
@@ -121,6 +146,7 @@ namespace vecmathlib {
   inline
   realvec_t mathfuncs<realvec_t>::vml_expm1(realvec_t x)
   {
+    // TODO: improve this
     return exp(x) - RV(1.0);
 #if 0
     r = exp(x) - RV(1.0);
author	Erik Schnetter <schnetter@gmail.com>	2013-11-25 15:02:18 -0500
committer	Erik Schnetter <schnetter@gmail.com>	2013-11-25 15:02:18 -0500
commit	967755a9fec3e08a15d0391f7b3e3fb8f0fadd9b (patch)
tree	c67b039c0f84be30c37e960aaecdf1ab8d8f484d
parent	6ab7b34b64a7750780141be9bde302fcf9e0ee38 (diff)
download	vecmathlib-967755a9fec3e08a15d0391f7b3e3fb8f0fadd9b.zip vecmathlib-967755a9fec3e08a15d0391f7b3e3fb8f0fadd9b.tar.gz