diff options
author | Erik Schnetter <schnetter@gmail.com> | 2013-06-28 14:05:48 -0400 |
---|---|---|
committer | Erik Schnetter <schnetter@gmail.com> | 2013-06-28 14:05:48 -0400 |
commit | 4ab94961f2e5198c5d98a2297cad06530cd4418d (patch) | |
tree | 5863226d76c41fd93ca6edbd341d292817fd57ff /mathfuncs_rcp.h | |
parent | ba43da889e5d662eedb4c2957162b4d9254557f9 (diff) | |
download | vecmathlib-4ab94961f2e5198c5d98a2297cad06530cd4418d.zip vecmathlib-4ab94961f2e5198c5d98a2297cad06530cd4418d.tar.gz |
Rewrite rcp (reciprocal) function
Diffstat (limited to 'mathfuncs_rcp.h')
-rw-r--r-- | mathfuncs_rcp.h | 46 |
1 files changed, 45 insertions, 1 deletions
diff --git a/mathfuncs_rcp.h b/mathfuncs_rcp.h index d5e412f..e540f76 100644 --- a/mathfuncs_rcp.h +++ b/mathfuncs_rcp.h @@ -11,6 +11,8 @@ namespace vecmathlib { +#if 0 + // This routine works, but may be slower than the one below template<typename realvec_t> realvec_t mathfuncs<realvec_t>::vml_rcp(realvec_t x) { @@ -40,7 +42,49 @@ namespace vecmathlib { r += r * (RV(1.0) - x*r); // NEON: r = r * (RV(2.0) - x*r); - } + } + + // Handle negative values + r = copysign(r, x0); + + return r; + } +#endif + + template<typename realvec_t> + realvec_t mathfuncs<realvec_t>::vml_rcp(realvec_t x) + { + // Handle negative values + realvec_t x0 = x; + x = fabs(x); + + // <https://en.wikipedia.org/wiki/Division_algorithm> [2013-06-28] + + // Initial guess + VML_ASSERT(all(x > RV(0.0))); + intvec_t x_exp; + x = frexp(x, x_exp); + VML_ASSERT(all(x >= RV(0.5) && x < RV(1.0))); + realvec_t r = RV(R(48.0)/R(17.0)) - RV(R(32.0)/R(17.0)) * x; + + // Iterate + int const nmax = sizeof(real_t)==4 ? 3 : 4; + for (int n=0; n<nmax; ++n) { + // Step + VML_ASSERT(all(x > RV(0.0))); + // Newton method: + // Solve f(r) = 0 for f(r) = x - 1/r + // r <- r - f(r) / f'(r) + // r <- 2 r - r^2 x + // r <- r + r (1 - r x) + + // Note: don't rewrite this expression, this may introduce + // cancellation errors + r += r * (RV(1.0) - x*r); + + // NEON: r = r * (RV(2.0) - x*r); + } + r = ldexp(r, -x_exp); // Handle negative values r = copysign(r, x0); |