Use a better algorithm for reducing the error in __kernel_cos[f]().

This supersedes the fix for the old algorithm in rev.1.8 of k_cosf.c. I want this change mainly because it is an optimization. It helps make software cos[f](x) and sin[f](x) faster than the i387 hardware versions for small x. It is also a simplification, and reduces the maximum relative error for cosf() and sinf() on machines like amd64 from about 0.87 ulps to about 0.80 ulps. It was validated for cosf() and sinf() by exhaustive testing. Exhaustive testing is not possible for cos() and sin(), but ucbtest reports a similar reduction for the worst case found by non-exhaustive testing. ucbtest's non-exhaustive testing seems to be good enough to find problems in algorithms but not maximum relative errors when there are spikes. E.g., short runs of it find only 3 ulp error where the i387 hardware cos() has an error of about 2**40 ulps near pi/2.
author: bde <bde@FreeBSD.org> 2005-10-26 12:36:18 +0000
committer: bde <bde@FreeBSD.org> 2005-10-26 12:36:18 +0000
commit: 96c89ee304b32108340680ce85ac644a0a7d9b85 (patch)
tree: 8ce10742e4a2dd610eb83b20221e8fd316940c2f /lib
parent: 189f76a3bd90569ae539a785b257610d3f0437ea (diff)
download: FreeBSD-src-96c89ee304b32108340680ce85ac644a0a7d9b85.zip
FreeBSD-src-96c89ee304b32108340680ce85ac644a0a7d9b85.tar.gz
2 files changed, 22 insertions, 51 deletions
diff --git a/lib/msun/src/k_cos.c b/lib/msun/src/k_cos.c
index 39a2974..dfd9867 100644
--- a/lib/msun/src/k_cos.c
+++ b/lib/msun/src/k_cos.c
@@ -36,18 +36,22 @@ static char rcsid[] = "$FreeBSD$";
  * 
  * 	               4     6     8     10    12     14 
  *	4. let r = C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  , then
- *	       cos(x) = 1 - x*x/2 + r
+ *	       cos(x) ~ 1 - x*x/2 + r
  *	   since cos(x+y) ~ cos(x) - sin(x)*y 
  *			  ~ cos(x) - x*y,
  *	   a correction term is necessary in cos(x) and hence
  *		cos(x+y) = 1 - (x*x/2 - (r - x*y))
- *	   For better accuracy when x > 0.3, let qx = |x|/4 with
- *	   the last 32 bits mask off, and if x > 0.78125, let qx = 0.28125.
- *	   Then
- *		cos(x+y) = (1-qx) - ((x*x/2-qx) - (r-x*y)).
- *	   Note that 1-qx and (x*x/2-qx) is EXACT here, and the
- *	   magnitude of the latter is at least a quarter of x*x/2,
- *	   thus, reducing the rounding error in the subtraction.
+ *	   For better accuracy, rearrange to
+ *		cos(x+y) ~ w + (tmp + (r-x*y))
+ *	   where w = 1 - x*x/2 and tmp is a tiny correction term
+ *	   (1 - x*x/2 == w + tmp exactly in infinite precision).
+ *	   The exactness of w + tmp in infinite precision depends on w
+ *	   and tmp having the same precision as x.  If they have extra
+ *	   precision due to compiler bugs, then the extra precision is
+ *	   only good provided it is retained in all terms of the final
+ *	   expression for cos().  Retention happens in all cases tested
+ *	   under FreeBSD, so don't pessimize things by forcibly clipping
+ *	   any extra precision in w.
  */
 
 #include "math.h"
@@ -65,22 +69,11 @@ C6  = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */
 double
 __kernel_cos(double x, double y)
 {
-	double a,hz,z,r,qx;
-	int32_t ix;
-	GET_HIGH_WORD(ix,x);
-	ix &= 0x7fffffff;			/* ix = |x|'s high word*/
+	double hz,z,r,w;
+
 	z  = x*x;
 	r  = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6)))));
-	if(ix < 0x3FD33333) 			/* if |x| < 0.3 */ 
-	    return one - (0.5*z - (z*r - x*y));
-	else {
-	    if(ix > 0x3fe90000) {		/* x > 0.78125 */
-		qx = 0.28125;
-	    } else {
-	        INSERT_WORDS(qx,ix-0x00200000,0);	/* x/4 */
-	    }
-	    hz = 0.5*z-qx;
-	    a  = one-qx;
-	    return a - (hz - (z*r-x*y));
-	}
+	hz = (float)0.5*z;
+	w  = one-hz;
+	return w + (((one-w)-hz) + (z*r-x*y));
 }
diff --git a/lib/msun/src/k_cosf.c b/lib/msun/src/k_cosf.c
index e1012ec..5ce7608 100644
--- a/lib/msun/src/k_cosf.c
+++ b/lib/msun/src/k_cosf.c
@@ -32,33 +32,11 @@ C6  = -1.1359647598e-11; /* 0xad47d74e */
 float
 __kernel_cosf(float x, float y)
 {
-	float a,hz,z,r,qx;
-	int32_t ix;
-	GET_FLOAT_WORD(ix,x);
-	ix &= 0x7fffffff;			/* ix = |x|'s high word*/
+	float hz,z,r,w;
+
 	z  = x*x;
 	r  = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6)))));
-	if(ix < 0x3e99999a) 			/* if |x| < 0.3 */
-	    return one - ((float)0.5*z - (z*r - x*y));
-	else {
-	    /*
-	     * qx is an approximation to x*x/2 such that 1-qx and x*x/2-qx
-	     * are both exact.  Its implementation is optimized for
-	     * efficiency in preference to accuracy.  We use x*x/2 ~ x/4 for
-	     * x near 0.5 and mask off just enough low bits (3) for both of
-	     * the above differences to be exact.  We use a constant for
-	     * x > 0.78125 to keep using the same algorithm as k_cos.c,
-	     * although this gives only a small improvement in accuracy, at
-	     * least here.  Using x*x/2 to approximate itself (masking off
-	     * 3 low bits again) would give better accuracy.
-	     */
-	    if(ix > 0x3f480000) {		/* x > 0.78125 */
-		qx = (float)0.28125;
-	    } else {
-	        SET_FLOAT_WORD(qx,(ix-0x01000000)&0xfffffff8);
-	    }
-	    hz = (float)0.5*z-qx;
-	    a  = one-qx;
-	    return a - (hz - (z*r-x*y));
-	}
+	hz = (float)0.5*z;
+	w  = one-hz;
+	return w + (((one-w)-hz) + (z*r-x*y));
 }
author	bde <bde@FreeBSD.org>	2005-10-26 12:36:18 +0000
committer	bde <bde@FreeBSD.org>	2005-10-26 12:36:18 +0000
commit	96c89ee304b32108340680ce85ac644a0a7d9b85 (patch)
tree	8ce10742e4a2dd610eb83b20221e8fd316940c2f /lib
parent	189f76a3bd90569ae539a785b257610d3f0437ea (diff)
download	FreeBSD-src-96c89ee304b32108340680ce85ac644a0a7d9b85.zip FreeBSD-src-96c89ee304b32108340680ce85ac644a0a7d9b85.tar.gz