summaryrefslogtreecommitdiffstats
path: root/mathfuncs_rcp.h
diff options
context:
space:
mode:
Diffstat (limited to 'mathfuncs_rcp.h')
-rw-r--r--mathfuncs_rcp.h117
1 files changed, 55 insertions, 62 deletions
diff --git a/mathfuncs_rcp.h b/mathfuncs_rcp.h
index 6e12b27..f703454 100644
--- a/mathfuncs_rcp.h
+++ b/mathfuncs_rcp.h
@@ -7,10 +7,8 @@
#include <cmath>
-
-
namespace vecmathlib {
-
+
#if 0
// This routine works, but may be slower than the one below
template<typename realvec_t>
@@ -50,66 +48,61 @@ namespace vecmathlib {
return r;
}
#endif
-
- template<typename realvec_t>
- realvec_t mathfuncs<realvec_t>::vml_rcp(realvec_t x)
- {
- // Handle negative values
- realvec_t x0 = x;
- x = fabs(x);
-
- // <https://en.wikipedia.org/wiki/Division_algorithm> [2013-06-28]
-
- // Initial guess
+
+template <typename realvec_t>
+realvec_t mathfuncs<realvec_t>::vml_rcp(realvec_t x) {
+ // Handle negative values
+ realvec_t x0 = x;
+ x = fabs(x);
+
+ // <https://en.wikipedia.org/wiki/Division_algorithm> [2013-06-28]
+
+ // Initial guess
+ VML_ASSERT(all(x > RV(0.0)));
+ intvec_t x_exp;
+ x = frexp(x, &x_exp);
+ VML_ASSERT(all(x >= RV(0.5) && x < RV(1.0)));
+ realvec_t r = RV(R(48.0) / R(17.0)) - RV(R(32.0) / R(17.0)) * x;
+
+ // Iterate
+ int const nmax = sizeof(real_t) == 4 ? 3 : 4;
+ for (int n = 0; n < nmax; ++n) {
+ // Step
VML_ASSERT(all(x > RV(0.0)));
- intvec_t x_exp;
- x = frexp(x, &x_exp);
- VML_ASSERT(all(x >= RV(0.5) && x < RV(1.0)));
- realvec_t r = RV(R(48.0)/R(17.0)) - RV(R(32.0)/R(17.0)) * x;
-
- // Iterate
- int const nmax = sizeof(real_t)==4 ? 3 : 4;
- for (int n=0; n<nmax; ++n) {
- // Step
- VML_ASSERT(all(x > RV(0.0)));
- // Newton method:
- // Solve f(r) = 0 for f(r) = x - 1/r
- // r <- r - f(r) / f'(r)
- // r <- 2 r - r^2 x
- // r <- r + r (1 - r x)
-
- // Note: don't rewrite this expression, this may introduce
- // cancellation errors
- r += r * (RV(1.0) - x*r);
-
- // NEON: r = r * (RV(2.0) - x*r);
- }
- r = ldexp(r, -x_exp);
-
- // Handle negative values
- r = copysign(r, x0);
-
- return r;
- }
-
-
-
- template<typename realvec_t>
- realvec_t mathfuncs<realvec_t>::vml_remainder(realvec_t x, realvec_t y)
- {
- return x - rint(x / y) * y;
- // realvec_t r = x / y;
- // return y * (r - rint(r));
- }
-
- template<typename realvec_t>
- realvec_t mathfuncs<realvec_t>::vml_fmod(realvec_t x, realvec_t y)
- {
- return x - y * trunc(x / y);
- // realvec_t r = x / y;
- // return y * (r - trunc(r));
+ // Newton method:
+ // Solve f(r) = 0 for f(r) = x - 1/r
+ // r <- r - f(r) / f'(r)
+ // r <- 2 r - r^2 x
+ // r <- r + r (1 - r x)
+
+ // Note: don't rewrite this expression, this may introduce
+ // cancellation errors
+ r += r * (RV(1.0) - x * r);
+
+ // NEON: r = r * (RV(2.0) - x*r);
}
-
+ r = ldexp(r, -x_exp);
+
+ // Handle negative values
+ r = copysign(r, x0);
+
+ return r;
+}
+
+template <typename realvec_t>
+realvec_t mathfuncs<realvec_t>::vml_remainder(realvec_t x, realvec_t y) {
+ return x - rint(x / y) * y;
+ // realvec_t r = x / y;
+ // return y * (r - rint(r));
+}
+
+template <typename realvec_t>
+realvec_t mathfuncs<realvec_t>::vml_fmod(realvec_t x, realvec_t y) {
+ return x - y * trunc(x / y);
+ // realvec_t r = x / y;
+ // return y * (r - trunc(r));
+}
+
}; // namespace vecmathlib
-#endif // #ifndef MATHFUNCS_RCP_H
+#endif // #ifndef MATHFUNCS_RCP_H
OpenPOWER on IntegriCloud