summaryrefslogtreecommitdiffstats
path: root/mathfuncs_exp.h
diff options
context:
space:
mode:
Diffstat (limited to 'mathfuncs_exp.h')
-rw-r--r--mathfuncs_exp.h237
1 files changed, 113 insertions, 124 deletions
diff --git a/mathfuncs_exp.h b/mathfuncs_exp.h
index d357a21..e35fb1b 100644
--- a/mathfuncs_exp.h
+++ b/mathfuncs_exp.h
@@ -7,156 +7,145 @@
#include <cmath>
+namespace vecmathlib {
+template <typename realvec_t>
+realvec_t mathfuncs<realvec_t>::vml_exp2(realvec_t x) {
+ // TODO: Check SLEEF 2.80 algorithm
+ // (in particular the improved-precision truncation)
+
+ // Rescale
+ realvec_t x0 = x;
+
+// realvec_t round_x = rint(x);
+// intvec_t iround_x = convert_int(round_x);
+// r = ldexp(r, iround_x);
-namespace vecmathlib {
-
- template<typename realvec_t>
- realvec_t mathfuncs<realvec_t>::vml_exp2(realvec_t x)
- {
- // TODO: Check SLEEF 2.80 algorithm
- // (in particular the improved-precision truncation)
-
- // Rescale
- realvec_t x0 = x;
-
- // realvec_t round_x = rint(x);
- // intvec_t iround_x = convert_int(round_x);
- // r = ldexp(r, iround_x);
-
#if 0
// Straightforward implementation
realvec_t round_x = rint(x);
x -= round_x;
#elif 1
- // Round by adding, then subtracting again a large number
- // Add a large number to move the mantissa bits to the right
- int_t large = (U(1) << FP::mantissa_bits) + FP::exponent_offset;
- realvec_t tmp = x + RV(R(large));
- tmp.barrier();
-
- realvec_t round_x = tmp - RV(R(large));
- x -= round_x;
+ // Round by adding, then subtracting again a large number
+ // Add a large number to move the mantissa bits to the right
+ int_t large = (U(1) << FP::mantissa_bits) + FP::exponent_offset;
+ realvec_t tmp = x + RV(R(large));
+ tmp.barrier();
+
+ realvec_t round_x = tmp - RV(R(large));
+ x -= round_x;
#else
- // Straightforward implementation, using round instead of rint,
- // since round is faster for QPX
- realvec_t round_x = round(x);
- x -= round_x;
+ // Straightforward implementation, using round instead of rint,
+ // since round is faster for QPX
+ realvec_t round_x = round(x);
+ x -= round_x;
#endif
- VML_ASSERT(all(x >= RV(-0.5) && x <= RV(0.5)));
-
- // Polynomial expansion
- realvec_t r;
- switch (sizeof(real_t)) {
- case 4:
+ VML_ASSERT(all(x >= RV(-0.5) && x <= RV(0.5)));
+
+ // Polynomial expansion
+ realvec_t r;
+ switch (sizeof(real_t)) {
+ case 4:
#ifdef VML_HAVE_FP_CONTRACT
- // float, error=4.55549108005200277750378992345e-9
- r = RV(0.000154653240842602623787395880898);
- r = mad(r, x, RV(0.00133952915439234389712105060319));
- r = mad(r, x, RV(0.0096180399118156827664944870552));
- r = mad(r, x, RV(0.055503406540531310853149866446));
- r = mad(r, x, RV(0.240226511015459465468737123346));
- r = mad(r, x, RV(0.69314720007380208630542805293));
- r = mad(r, x, RV(0.99999999997182023878745628977));
+ // float, error=4.55549108005200277750378992345e-9
+ r = RV(0.000154653240842602623787395880898);
+ r = mad(r, x, RV(0.00133952915439234389712105060319));
+ r = mad(r, x, RV(0.0096180399118156827664944870552));
+ r = mad(r, x, RV(0.055503406540531310853149866446));
+ r = mad(r, x, RV(0.240226511015459465468737123346));
+ r = mad(r, x, RV(0.69314720007380208630542805293));
+ r = mad(r, x, RV(0.99999999997182023878745628977));
#else
- // float, error=1.62772721960621336664735896836e-7
- r = RV(0.00133952915439234389712105060319);
- r = mad(r, x, RV(0.009670773148229417605024318985));
- r = mad(r, x, RV(0.055503406540531310853149866446));
- r = mad(r, x, RV(0.240222115700585316818177639177));
- r = mad(r, x, RV(0.69314720007380208630542805293));
- r = mad(r, x, RV(1.00000005230745711373079206024));
+ // float, error=1.62772721960621336664735896836e-7
+ r = RV(0.00133952915439234389712105060319);
+ r = mad(r, x, RV(0.009670773148229417605024318985));
+ r = mad(r, x, RV(0.055503406540531310853149866446));
+ r = mad(r, x, RV(0.240222115700585316818177639177));
+ r = mad(r, x, RV(0.69314720007380208630542805293));
+ r = mad(r, x, RV(1.00000005230745711373079206024));
#endif
- break;
- case 8:
+ break;
+ case 8:
#ifdef VML_HAVE_FP_CONTRACT
- // double, error=9.32016781355638010975628074746e-18
- r = RV(4.45623165388261696886670014471e-10);
- r = mad(r, x, RV(7.0733589360775271430968224806e-9));
- r = mad(r, x, RV(1.01780540270960163558119510246e-7));
- r = mad(r, x, RV(1.3215437348041505269462510712e-6));
- r = mad(r, x, RV(0.000015252733849766201174247690629));
- r = mad(r, x, RV(0.000154035304541242555115696403795));
- r = mad(r, x, RV(0.00133335581463968601407096905671));
- r = mad(r, x, RV(0.0096181291075949686712855561931));
- r = mad(r, x, RV(0.055504108664821672870565883052));
- r = mad(r, x, RV(0.240226506959101382690753994082));
- r = mad(r, x, RV(0.69314718055994530864272481773));
- r = mad(r, x, RV(0.9999999999999999978508676375));
+ // double, error=9.32016781355638010975628074746e-18
+ r = RV(4.45623165388261696886670014471e-10);
+ r = mad(r, x, RV(7.0733589360775271430968224806e-9));
+ r = mad(r, x, RV(1.01780540270960163558119510246e-7));
+ r = mad(r, x, RV(1.3215437348041505269462510712e-6));
+ r = mad(r, x, RV(0.000015252733849766201174247690629));
+ r = mad(r, x, RV(0.000154035304541242555115696403795));
+ r = mad(r, x, RV(0.00133335581463968601407096905671));
+ r = mad(r, x, RV(0.0096181291075949686712855561931));
+ r = mad(r, x, RV(0.055504108664821672870565883052));
+ r = mad(r, x, RV(0.240226506959101382690753994082));
+ r = mad(r, x, RV(0.69314718055994530864272481773));
+ r = mad(r, x, RV(0.9999999999999999978508676375));
#else
- // double, error=3.74939899823302048807873981077e-14
- r = RV(1.02072375599725694063203809188e-7);
- r = mad(r, x, RV(1.32573274434801314145133004073e-6));
- r = mad(r, x, RV(0.0000152526647170731944840736190013));
- r = mad(r, x, RV(0.000154034441925859828261898614555));
- r = mad(r, x, RV(0.00133335582175770747495287552557));
- r = mad(r, x, RV(0.0096181291794939392517233403183));
- r = mad(r, x, RV(0.055504108664525029438908798685));
- r = mad(r, x, RV(0.240226506957026959772247598695));
- r = mad(r, x, RV(0.6931471805599487321347668143));
- r = mad(r, x, RV(1.00000000000000942892870993489));
+ // double, error=3.74939899823302048807873981077e-14
+ r = RV(1.02072375599725694063203809188e-7);
+ r = mad(r, x, RV(1.32573274434801314145133004073e-6));
+ r = mad(r, x, RV(0.0000152526647170731944840736190013));
+ r = mad(r, x, RV(0.000154034441925859828261898614555));
+ r = mad(r, x, RV(0.00133335582175770747495287552557));
+ r = mad(r, x, RV(0.0096181291794939392517233403183));
+ r = mad(r, x, RV(0.055504108664525029438908798685));
+ r = mad(r, x, RV(0.240226506957026959772247598695));
+ r = mad(r, x, RV(0.6931471805599487321347668143));
+ r = mad(r, x, RV(1.00000000000000942892870993489));
#endif
- break;
- default:
- __builtin_unreachable();
- }
-
- // Undo rescaling
+ break;
+ default:
+ __builtin_unreachable();
+ }
+
+// Undo rescaling
#if 0
// Straightforward implementation
r = ldexp(r, convert_int(round_x));
#elif 1
- // Use direct integer manipulation
- // Extract integer as lowest mantissa bits (highest bits still
- // contain offset, exponent, and sign)
- intvec_t itmp = as_int(tmp);
- // Construct scale factor by setting exponent (this shifts out the
- // highest bits)
- realvec_t scale = as_float(itmp << I(FP::mantissa_bits));
- r *= scale;
+ // Use direct integer manipulation
+ // Extract integer as lowest mantissa bits (highest bits still
+ // contain offset, exponent, and sign)
+ intvec_t itmp = as_int(tmp);
+ // Construct scale factor by setting exponent (this shifts out the
+ // highest bits)
+ realvec_t scale = as_float(itmp << I(FP::mantissa_bits));
+ r *= scale;
#else
- // Use floating point operations instead of integer operations,
- // since these are faster for QPX
- real_t exponent_factor = R(I(1) << I(FP::mantissa_bits));
- real_t exponent_offset = R(I(FP::exponent_offset) << I(FP::mantissa_bits));
- realvec_t exponent = mad(round_x, RV(exponent_factor), RV(exponent_offset));
- realvec_t scale = as_float(convert_int(exponent));
- r *= scale;
+ // Use floating point operations instead of integer operations,
+ // since these are faster for QPX
+ real_t exponent_factor = R(I(1) << I(FP::mantissa_bits));
+ real_t exponent_offset = R(I(FP::exponent_offset) << I(FP::mantissa_bits));
+ realvec_t exponent = mad(round_x, RV(exponent_factor), RV(exponent_offset));
+ realvec_t scale = as_float(convert_int(exponent));
+ r *= scale;
#endif
-
- r = ifthen(x0 < RV(R(FP::min_exponent)), RV(0.0), r);
-
- return r;
- }
-
-
-
- template<typename realvec_t>
- inline
- realvec_t mathfuncs<realvec_t>::vml_exp(realvec_t x)
- {
- return exp2(RV(M_LOG2E) * x);
- }
- template<typename realvec_t>
- inline
- realvec_t mathfuncs<realvec_t>::vml_exp10(realvec_t x)
- {
- return exp2(RV(M_LOG2E * M_LN10) * x);
- }
+ r = ifthen(x0 < RV(R(FP::min_exponent)), RV(0.0), r);
+
+ return r;
+}
- template<typename realvec_t>
- inline
- realvec_t mathfuncs<realvec_t>::vml_expm1(realvec_t x)
- {
- // TODO: improve this
- return exp(x) - RV(1.0);
+template <typename realvec_t>
+inline realvec_t mathfuncs<realvec_t>::vml_exp(realvec_t x) {
+ return exp2(RV(M_LOG2E) * x);
+}
+
+template <typename realvec_t>
+inline realvec_t mathfuncs<realvec_t>::vml_exp10(realvec_t x) {
+ return exp2(RV(M_LOG2E * M_LN10) * x);
+}
+
+template <typename realvec_t>
+inline realvec_t mathfuncs<realvec_t>::vml_expm1(realvec_t x) {
+ // TODO: improve this
+ return exp(x) - RV(1.0);
#if 0
r = exp(x) - RV(1.0);
return ifthen(r == RV(0.0), x, r);
#endif
- }
-
+}
+
}; // namespace vecmathlib
-#endif // #ifndef MATHFUNCS_EXP_H
+#endif // #ifndef MATHFUNCS_EXP_H
OpenPOWER on IntegriCloud