summaryrefslogtreecommitdiffstats
path: root/mathfuncs_sqrt.h
diff options
context:
space:
mode:
Diffstat (limited to 'mathfuncs_sqrt.h')
-rw-r--r--mathfuncs_sqrt.h124
1 files changed, 56 insertions, 68 deletions
diff --git a/mathfuncs_sqrt.h b/mathfuncs_sqrt.h
index dea5fd6..7a362f9 100644
--- a/mathfuncs_sqrt.h
+++ b/mathfuncs_sqrt.h
@@ -7,13 +7,10 @@
#include <cmath>
-
-
namespace vecmathlib {
-
- template<typename realvec_t>
- realvec_t mathfuncs<realvec_t>::vml_sqrt(realvec_t x)
- {
+
+template <typename realvec_t>
+realvec_t mathfuncs<realvec_t>::vml_sqrt(realvec_t x) {
#if 0
// Handle special case: zero
boolvec_t is_zero = x <= RV(0.0);
@@ -49,29 +46,23 @@ namespace vecmathlib {
// Handle special case: zero
r = ifthen(is_zero, RV(0.0), r);
#endif
-
- realvec_t r = x * rsqrt(x);
- // Handle special case: zero
- r = ifthen(x == RV(0.0), RV(0.0), r);
-
- return r;
- }
-
-
-
- // TODO: Use "Halley's method with cubic convergence":
- // <http://press.mcs.anl.gov/gswjanuary12/files/2012/01/Optimizing-Single-Node-Performance-on-BlueGene.pdf>
- template<typename realvec_t>
- realvec_t mathfuncs<realvec_t>::vml_cbrt(realvec_t x)
- {
- return pow(x, RV(1.0/3.0));
- }
-
-
-
- template<typename realvec_t>
- realvec_t mathfuncs<realvec_t>::vml_rsqrt(realvec_t x)
- {
+
+ realvec_t r = x * rsqrt(x);
+ // Handle special case: zero
+ r = ifthen(x == RV(0.0), RV(0.0), r);
+
+ return r;
+}
+
+// TODO: Use "Halley's method with cubic convergence":
+// <http://press.mcs.anl.gov/gswjanuary12/files/2012/01/Optimizing-Single-Node-Performance-on-BlueGene.pdf>
+template <typename realvec_t>
+realvec_t mathfuncs<realvec_t>::vml_cbrt(realvec_t x) {
+ return pow(x, RV(1.0 / 3.0));
+}
+
+template <typename realvec_t>
+realvec_t mathfuncs<realvec_t>::vml_rsqrt(realvec_t x) {
#if 0
// See <http://en.wikipedia.org/wiki/Fast_inverse_square_root>
realvec_t x_2 = RV(0.5) * x;
@@ -85,46 +76,43 @@ namespace vecmathlib {
r += r * (RV(0.5) - (x_2 * r * r));
return r;
#else
- // Initial guess
- // VML_ASSERT(all(x > RV(0.0)));
- intvec_t ilogb_x = ilogb(x);
- realvec_t s =
+ // Initial guess
+ // VML_ASSERT(all(x > RV(0.0)));
+ intvec_t ilogb_x = ilogb(x);
+ realvec_t s =
ifthen(convert_bool(ilogb_x & IV(I(1))), RV(R(0.583)), RV(R(0.824)));
- realvec_t r = ldexp(s, -(ilogb_x >> I(1)));
-
- realvec_t x_2 = RV(0.5) * x;
-
- // Iterate
- // nmax iterations give an accuracy of 2^nmax binary digits. 5
- // iterations suffice for double precision with its 53 digits.
- int const nmax = sizeof(real_t)==4 ? 4 : 5;
- for (int n=0; n<nmax; ++n) {
- // Step
- VML_ASSERT(all(r > RV(0.0)));
- // Newton method:
- // Solve f(r) = 0 for f(r) = x - 1/r^2
- // r <- r - f(r) / f'(r)
- // r <- (3 r - r^3 x) / 2
- // r <- r (3/2 - r^2 x/2)
-
- // Note: don't rewrite this expression, this may introduce
- // cancellation errors (says who?)
- // r *= RV(1.5) - x_2 * r*r;
- r += r * (RV(0.5) - x_2 * r*r);
- }
-
- return r;
-#endif
- }
-
-
-
- template<typename realvec_t>
- realvec_t mathfuncs<realvec_t>::vml_hypot(realvec_t x, realvec_t y)
- {
- return sqrt(x*x + y*y);
+ realvec_t r = ldexp(s, -(ilogb_x >> I(1)));
+
+ realvec_t x_2 = RV(0.5) * x;
+
+ // Iterate
+ // nmax iterations give an accuracy of 2^nmax binary digits. 5
+ // iterations suffice for double precision with its 53 digits.
+ int const nmax = sizeof(real_t) == 4 ? 4 : 5;
+ for (int n = 0; n < nmax; ++n) {
+ // Step
+ VML_ASSERT(all(r > RV(0.0)));
+ // Newton method:
+ // Solve f(r) = 0 for f(r) = x - 1/r^2
+ // r <- r - f(r) / f'(r)
+ // r <- (3 r - r^3 x) / 2
+ // r <- r (3/2 - r^2 x/2)
+
+ // Note: don't rewrite this expression, this may introduce
+ // cancellation errors (says who?)
+ // r *= RV(1.5) - x_2 * r*r;
+ r += r * (RV(0.5) - x_2 * r * r);
}
-
+
+ return r;
+#endif
+}
+
+template <typename realvec_t>
+realvec_t mathfuncs<realvec_t>::vml_hypot(realvec_t x, realvec_t y) {
+ return sqrt(x * x + y * y);
+}
+
}; // namespace vecmathlib
-#endif // #ifndef MATHFUNCS_SQRT_H
+#endif // #ifndef MATHFUNCS_SQRT_H
OpenPOWER on IntegriCloud