diff options
Diffstat (limited to 'mathfuncs_sqrt.h')
-rw-r--r-- | mathfuncs_sqrt.h | 124 |
1 files changed, 56 insertions, 68 deletions
diff --git a/mathfuncs_sqrt.h b/mathfuncs_sqrt.h index dea5fd6..7a362f9 100644 --- a/mathfuncs_sqrt.h +++ b/mathfuncs_sqrt.h @@ -7,13 +7,10 @@ #include <cmath> - - namespace vecmathlib { - - template<typename realvec_t> - realvec_t mathfuncs<realvec_t>::vml_sqrt(realvec_t x) - { + +template <typename realvec_t> +realvec_t mathfuncs<realvec_t>::vml_sqrt(realvec_t x) { #if 0 // Handle special case: zero boolvec_t is_zero = x <= RV(0.0); @@ -49,29 +46,23 @@ namespace vecmathlib { // Handle special case: zero r = ifthen(is_zero, RV(0.0), r); #endif - - realvec_t r = x * rsqrt(x); - // Handle special case: zero - r = ifthen(x == RV(0.0), RV(0.0), r); - - return r; - } - - - - // TODO: Use "Halley's method with cubic convergence": - // <http://press.mcs.anl.gov/gswjanuary12/files/2012/01/Optimizing-Single-Node-Performance-on-BlueGene.pdf> - template<typename realvec_t> - realvec_t mathfuncs<realvec_t>::vml_cbrt(realvec_t x) - { - return pow(x, RV(1.0/3.0)); - } - - - - template<typename realvec_t> - realvec_t mathfuncs<realvec_t>::vml_rsqrt(realvec_t x) - { + + realvec_t r = x * rsqrt(x); + // Handle special case: zero + r = ifthen(x == RV(0.0), RV(0.0), r); + + return r; +} + +// TODO: Use "Halley's method with cubic convergence": +// <http://press.mcs.anl.gov/gswjanuary12/files/2012/01/Optimizing-Single-Node-Performance-on-BlueGene.pdf> +template <typename realvec_t> +realvec_t mathfuncs<realvec_t>::vml_cbrt(realvec_t x) { + return pow(x, RV(1.0 / 3.0)); +} + +template <typename realvec_t> +realvec_t mathfuncs<realvec_t>::vml_rsqrt(realvec_t x) { #if 0 // See <http://en.wikipedia.org/wiki/Fast_inverse_square_root> realvec_t x_2 = RV(0.5) * x; @@ -85,46 +76,43 @@ namespace vecmathlib { r += r * (RV(0.5) - (x_2 * r * r)); return r; #else - // Initial guess - // VML_ASSERT(all(x > RV(0.0))); - intvec_t ilogb_x = ilogb(x); - realvec_t s = + // Initial guess + // VML_ASSERT(all(x > RV(0.0))); + intvec_t ilogb_x = ilogb(x); + realvec_t s = ifthen(convert_bool(ilogb_x & IV(I(1))), RV(R(0.583)), RV(R(0.824))); - realvec_t r = ldexp(s, -(ilogb_x >> I(1))); - - realvec_t x_2 = RV(0.5) * x; - - // Iterate - // nmax iterations give an accuracy of 2^nmax binary digits. 5 - // iterations suffice for double precision with its 53 digits. - int const nmax = sizeof(real_t)==4 ? 4 : 5; - for (int n=0; n<nmax; ++n) { - // Step - VML_ASSERT(all(r > RV(0.0))); - // Newton method: - // Solve f(r) = 0 for f(r) = x - 1/r^2 - // r <- r - f(r) / f'(r) - // r <- (3 r - r^3 x) / 2 - // r <- r (3/2 - r^2 x/2) - - // Note: don't rewrite this expression, this may introduce - // cancellation errors (says who?) - // r *= RV(1.5) - x_2 * r*r; - r += r * (RV(0.5) - x_2 * r*r); - } - - return r; -#endif - } - - - - template<typename realvec_t> - realvec_t mathfuncs<realvec_t>::vml_hypot(realvec_t x, realvec_t y) - { - return sqrt(x*x + y*y); + realvec_t r = ldexp(s, -(ilogb_x >> I(1))); + + realvec_t x_2 = RV(0.5) * x; + + // Iterate + // nmax iterations give an accuracy of 2^nmax binary digits. 5 + // iterations suffice for double precision with its 53 digits. + int const nmax = sizeof(real_t) == 4 ? 4 : 5; + for (int n = 0; n < nmax; ++n) { + // Step + VML_ASSERT(all(r > RV(0.0))); + // Newton method: + // Solve f(r) = 0 for f(r) = x - 1/r^2 + // r <- r - f(r) / f'(r) + // r <- (3 r - r^3 x) / 2 + // r <- r (3/2 - r^2 x/2) + + // Note: don't rewrite this expression, this may introduce + // cancellation errors (says who?) + // r *= RV(1.5) - x_2 * r*r; + r += r * (RV(0.5) - x_2 * r * r); } - + + return r; +#endif +} + +template <typename realvec_t> +realvec_t mathfuncs<realvec_t>::vml_hypot(realvec_t x, realvec_t y) { + return sqrt(x * x + y * y); +} + }; // namespace vecmathlib -#endif // #ifndef MATHFUNCS_SQRT_H +#endif // #ifndef MATHFUNCS_SQRT_H |