diff options
author | Erik Schnetter <schnetter@gmail.com> | 2013-11-25 15:02:18 -0500 |
---|---|---|
committer | Erik Schnetter <schnetter@gmail.com> | 2013-11-25 15:02:18 -0500 |
commit | 967755a9fec3e08a15d0391f7b3e3fb8f0fadd9b (patch) | |
tree | c67b039c0f84be30c37e960aaecdf1ab8d8f484d | |
parent | 6ab7b34b64a7750780141be9bde302fcf9e0ee38 (diff) | |
download | vecmathlib-967755a9fec3e08a15d0391f7b3e3fb8f0fadd9b.zip vecmathlib-967755a9fec3e08a15d0391f7b3e3fb8f0fadd9b.tar.gz |
Some ideas to improve exp on QPX
-rw-r--r-- | mathfuncs_exp.h | 30 |
1 files changed, 28 insertions, 2 deletions
diff --git a/mathfuncs_exp.h b/mathfuncs_exp.h index a615db1..c7a1498 100644 --- a/mathfuncs_exp.h +++ b/mathfuncs_exp.h @@ -21,6 +21,11 @@ namespace vecmathlib { // intvec_t iround_x = convert_int(round_x); // r = ldexp(r, iround_x); +#if 0 + // Straightforward implementation + realvec_t round_x = rint(x); + x -= round_x; +#elif 1 // Round by adding, then subtracting again a large number // Add a large number to move the mantissa bits to the right int_t large = (U(1) << FP::mantissa_bits) + FP::exponent_offset; @@ -29,6 +34,12 @@ namespace vecmathlib { realvec_t round_x = tmp - RV(R(large)); x -= round_x; +#else + // Straightforward implementation, using round instead of rint, + // since round is faster for QPX + realvec_t round_x = round(x); + x -= round_x; +#endif VML_ASSERT(all(x >= RV(-0.5) && x <= RV(0.5))); // Polynomial expansion @@ -88,15 +99,29 @@ namespace vecmathlib { } // Undo rescaling +#if 0 + // Straightforward implementation + r = ldexp(r, convert_int(round_x)); +#elif 1 + // Use direct integer manipulation // Extract integer as lowest mantissa bits (highest bits still // contain offset, exponent, and sign) intvec_t itmp = as_int(tmp); // Construct scale factor by setting exponent (this shifts out the // highest bits) realvec_t scale = as_float(itmp << I(FP::mantissa_bits)); - scale = ifthen(x0 < RV(R(FP::min_exponent)), RV(0.0), scale); - r *= scale; +#else + // Use floating point operations instead of integer operations, + // since these are faster for QPX + real_t exponent_factor = R(I(1) << I(FP::mantissa_bits)); + real_t exponent_offset = R(I(FP::exponent_offset) << I(FP::mantissa_bits)); + realvec_t exponent = mad(round_x, RV(exponent_factor), RV(exponent_offset)); + realvec_t scale = as_float(convert_int(exponent)); + r *= scale; +#endif + + r = ifthen(x0 < RV(R(FP::min_exponent)), RV(0.0), r); return r; } @@ -121,6 +146,7 @@ namespace vecmathlib { inline realvec_t mathfuncs<realvec_t>::vml_expm1(realvec_t x) { + // TODO: improve this return exp(x) - RV(1.0); #if 0 r = exp(x) - RV(1.0); |