summaryrefslogtreecommitdiffstats
path: root/mathfuncs_rcp.h
diff options
context:
space:
mode:
authorErik Schnetter <schnetter@gmail.com>2013-06-28 14:05:48 -0400
committerErik Schnetter <schnetter@gmail.com>2013-06-28 14:05:48 -0400
commit4ab94961f2e5198c5d98a2297cad06530cd4418d (patch)
tree5863226d76c41fd93ca6edbd341d292817fd57ff /mathfuncs_rcp.h
parentba43da889e5d662eedb4c2957162b4d9254557f9 (diff)
downloadvecmathlib-4ab94961f2e5198c5d98a2297cad06530cd4418d.zip
vecmathlib-4ab94961f2e5198c5d98a2297cad06530cd4418d.tar.gz
Rewrite rcp (reciprocal) function
Diffstat (limited to 'mathfuncs_rcp.h')
-rw-r--r--mathfuncs_rcp.h46
1 files changed, 45 insertions, 1 deletions
diff --git a/mathfuncs_rcp.h b/mathfuncs_rcp.h
index d5e412f..e540f76 100644
--- a/mathfuncs_rcp.h
+++ b/mathfuncs_rcp.h
@@ -11,6 +11,8 @@
namespace vecmathlib {
+#if 0
+ // This routine works, but may be slower than the one below
template<typename realvec_t>
realvec_t mathfuncs<realvec_t>::vml_rcp(realvec_t x)
{
@@ -40,7 +42,49 @@ namespace vecmathlib {
r += r * (RV(1.0) - x*r);
// NEON: r = r * (RV(2.0) - x*r);
- }
+ }
+
+ // Handle negative values
+ r = copysign(r, x0);
+
+ return r;
+ }
+#endif
+
+ template<typename realvec_t>
+ realvec_t mathfuncs<realvec_t>::vml_rcp(realvec_t x)
+ {
+ // Handle negative values
+ realvec_t x0 = x;
+ x = fabs(x);
+
+ // <https://en.wikipedia.org/wiki/Division_algorithm> [2013-06-28]
+
+ // Initial guess
+ VML_ASSERT(all(x > RV(0.0)));
+ intvec_t x_exp;
+ x = frexp(x, x_exp);
+ VML_ASSERT(all(x >= RV(0.5) && x < RV(1.0)));
+ realvec_t r = RV(R(48.0)/R(17.0)) - RV(R(32.0)/R(17.0)) * x;
+
+ // Iterate
+ int const nmax = sizeof(real_t)==4 ? 3 : 4;
+ for (int n=0; n<nmax; ++n) {
+ // Step
+ VML_ASSERT(all(x > RV(0.0)));
+ // Newton method:
+ // Solve f(r) = 0 for f(r) = x - 1/r
+ // r <- r - f(r) / f'(r)
+ // r <- 2 r - r^2 x
+ // r <- r + r (1 - r x)
+
+ // Note: don't rewrite this expression, this may introduce
+ // cancellation errors
+ r += r * (RV(1.0) - x*r);
+
+ // NEON: r = r * (RV(2.0) - x*r);
+ }
+ r = ldexp(r, -x_exp);
// Handle negative values
r = copysign(r, x0);
OpenPOWER on IntegriCloud