Rewrite rcp (reciprocal) function

author: Erik Schnetter <schnetter@gmail.com> 2013-06-28 14:05:48 -0400
committer: Erik Schnetter <schnetter@gmail.com> 2013-06-28 14:05:48 -0400
commit: 4ab94961f2e5198c5d98a2297cad06530cd4418d (patch)
tree: 5863226d76c41fd93ca6edbd341d292817fd57ff /mathfuncs_rcp.h
parent: ba43da889e5d662eedb4c2957162b4d9254557f9 (diff)
download: vecmathlib-4ab94961f2e5198c5d98a2297cad06530cd4418d.zip
vecmathlib-4ab94961f2e5198c5d98a2297cad06530cd4418d.tar.gz
1 files changed, 45 insertions, 1 deletions
diff --git a/mathfuncs_rcp.h b/mathfuncs_rcp.h
index d5e412f..e540f76 100644
--- a/mathfuncs_rcp.h
+++ b/mathfuncs_rcp.h
@@ -11,6 +11,8 @@
 
 namespace vecmathlib {
   
+#if 0
+  // This routine works, but may be slower than the one below
   template<typename realvec_t>
   realvec_t mathfuncs<realvec_t>::vml_rcp(realvec_t x)
   {
@@ -40,7 +42,49 @@ namespace vecmathlib {
       r += r * (RV(1.0) - x*r);
       
       // NEON: r = r * (RV(2.0) - x*r);
-   }
+    }
+    
+    // Handle negative values
+    r = copysign(r, x0);
+    
+    return r;
+  }
+#endif
+  
+  template<typename realvec_t>
+  realvec_t mathfuncs<realvec_t>::vml_rcp(realvec_t x)
+  {
+    // Handle negative values
+    realvec_t x0 = x;
+    x = fabs(x);
+    
+    // <https://en.wikipedia.org/wiki/Division_algorithm> [2013-06-28]
+    
+    // Initial guess
+    VML_ASSERT(all(x > RV(0.0)));
+    intvec_t x_exp;
+    x = frexp(x, x_exp);
+    VML_ASSERT(all(x >= RV(0.5) && x < RV(1.0)));
+    realvec_t r = RV(R(48.0)/R(17.0)) - RV(R(32.0)/R(17.0)) * x;
+    
+    // Iterate
+    int const nmax = sizeof(real_t)==4 ? 3 : 4;
+    for (int n=0; n<nmax; ++n) {
+      // Step
+      VML_ASSERT(all(x > RV(0.0)));
+      // Newton method:
+      // Solve   f(r) = 0   for   f(r) = x - 1/r
+      //    r <- r - f(r) / f'(r)
+      //    r <- 2 r - r^2 x
+      //    r <- r + r (1 - r x)
+      
+      // Note: don't rewrite this expression, this may introduce
+      // cancellation errors
+      r += r * (RV(1.0) - x*r);
+      
+      // NEON: r = r * (RV(2.0) - x*r);
+    }
+    r = ldexp(r, -x_exp);
     
     // Handle negative values
     r = copysign(r, x0);
author	Erik Schnetter <schnetter@gmail.com>	2013-06-28 14:05:48 -0400
committer	Erik Schnetter <schnetter@gmail.com>	2013-06-28 14:05:48 -0400
commit	4ab94961f2e5198c5d98a2297cad06530cd4418d (patch)
tree	5863226d76c41fd93ca6edbd341d292817fd57ff /mathfuncs_rcp.h
parent	ba43da889e5d662eedb4c2957162b4d9254557f9 (diff)
download	vecmathlib-4ab94961f2e5198c5d98a2297cad06530cd4418d.zip vecmathlib-4ab94961f2e5198c5d98a2297cad06530cd4418d.tar.gz