Use double precision to simplify and optimize arg reduction for small

and medium size args too: instead of conditionally subtracting a float 17+24, 17+17+24 or 17+17+17+24 bit approximation to pi/2, always subtract a double 33+53 bit one. The float version is now closer to the double version than to old versions of itself -- it uses the same 33+53 bit approximation as the simplest cases in the double version, and where the float version had to switch to the slow general case at |x| == 2^7*pi/2, it now switches at |x| == 2^19*pi/2 the same as the double version. This speeds up arg reduction by a factor of 2 for |x| between 3*pi/4 and 2^7*pi/4, and by a factor of 7 for |x| between 2^7*pi/4 and 2^19*pi/4.
author: bde <bde@FreeBSD.org> 2005-10-29 16:34:50 +0000
committer: bde <bde@FreeBSD.org> 2005-10-29 16:34:50 +0000
commit: bbfb40721e74d5bf0d3cef337da864daa1cf217b (patch)
tree: 99aac75834cfbb3654f449676ad3878a0ff01bb4 /lib/msun/src
parent: 203994509017931f946a1c3b544d89a994819f99 (diff)
download: FreeBSD-src-bbfb40721e74d5bf0d3cef337da864daa1cf217b.zip
FreeBSD-src-bbfb40721e74d5bf0d3cef337da864daa1cf217b.tar.gz
1 files changed, 22 insertions, 97 deletions
diff --git a/lib/msun/src/e_rem_pio2f.c b/lib/msun/src/e_rem_pio2f.c
index 2fa4a01..eaedc48 100644
--- a/lib/msun/src/e_rem_pio2f.c
+++ b/lib/msun/src/e_rem_pio2f.c
@@ -1,5 +1,6 @@
 /* e_rem_pio2f.c -- float version of e_rem_pio2.c
  * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
+ * Conversion to not use float except for the arg by Bruce Evans.
  */
 
 /*
@@ -26,9 +27,6 @@ static char rcsid[] = "$FreeBSD$";
 #include "math.h"
 #include "math_private.h"
 
-/* Clip any extra precision in the float variable v. */
-#define	cliptofloat(v)	(*(volatile float *)&(v))
-
 /*
  * Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
  */
@@ -47,123 +45,51 @@ static const int32_t two_over_pi[] = {
 };
 
 /*
- * This array is like the one in e_rem_pio2.c, but the numbers are
- * single precision and the last few bits (8 here) are ignored by
- * masking them off in the float word instead of by omitting the low
- * word.
- * 
- * Masking off 8 bits is not enough, but we defer further masking to
- * runtime so that the mask is easy to change.  We now mask off 21
- * bits, which is the smallest number that makes the "quick check no
- * cancellation" detect all cancellations for cases that it is used.
- * It doesn't detect all non-cancellations, especiallly for small
- * multiples of pi/2, but then the non-quick code selects the best
- * approximation of pi/2 to use.  The result is that arg reduction is
- * always done with between 8 or 9 and 17 bits of extra precision in
- * the medium-multiple case.  With only 8 bits masked of we had
- * negative extra precision in some cases starting near +-13*pi/2.
- */
-static const int32_t npio2_hw[] = {
-0x3fc90f00, 0x40490f00, 0x4096cb00, 0x40c90f00, 0x40fb5300, 0x4116cb00,
-0x412fed00, 0x41490f00, 0x41623100, 0x417b5300, 0x418a3a00, 0x4196cb00,
-0x41a35c00, 0x41afed00, 0x41bc7e00, 0x41c90f00, 0x41d5a000, 0x41e23100,
-0x41eec200, 0x41fb5300, 0x4203f200, 0x420a3a00, 0x42108300, 0x4216cb00,
-0x421d1400, 0x42235c00, 0x4229a500, 0x422fed00, 0x42363600, 0x423c7e00,
-0x4242c700, 0x42490f00
-};
-
-/*
- * invpio2:  24 bits of 2/pi
- * pio2_1:   first  17 bit of pi/2
+ * invpio2:  53 bits of 2/pi
+ * pio2_1:   first  33 bit of pi/2
  * pio2_1t:  pi/2 - pio2_1
- * pio2_2:   second 17 bit of pi/2
- * pio2_2t:  pi/2 - (pio2_1+pio2_2)
- * pio2_3:   third  17 bit of pi/2
- * pio2_3t:  pi/2 - (pio2_1+pio2_2+pio2_3)
  */
 
-static const float
-zero =  0.0000000000e+00, /* 0x00000000 */
-half =  5.0000000000e-01, /* 0x3f000000 */
-invpio2 =  6.3661980629e-01, /* 0x3f22f984 */
-pio2_1  =  1.5707855225e+00, /* 0x3fc90f80 */
-pio2_1t =  1.0804334124e-05, /* 0x37354443 */
-pio2_2  =  1.0804273188e-05, /* 0x37354400 */
-pio2_2t =  6.0770999344e-11, /* 0x2e85a308 */
-pio2_3  =  6.0770943833e-11, /* 0x2e85a300 */
-pio2_3t =  6.1232342629e-17; /* 0x248d3132 */
-
 static const double
-two24 =  1.67772160000000000000e+07; /* 0x41700000, 0x00000000 */
+zero =  0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */
+half =  5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
+two24 =  1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
+invpio2 =  6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
+pio2_1  =  1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */
+pio2_1t =  6.07710050650619224932e-11; /* 0x3DD0B461, 0x1A626331 */
 
 	int32_t __ieee754_rem_pio2f(float x, float *y)
 {
-	double dz;
-	float z,w,t,r,fn;
+	double z,w,t,r,fn;
 	double tx[3];
-	int32_t e0,i,j,nx,n,ix,hx;
+	int32_t e0,i,nx,n,ix,hx;
 
 	GET_FLOAT_WORD(hx,x);
 	ix = hx&0x7fffffff;
 	if(ix<=0x3f490fd8)   /* |x| ~<= pi/4 , no need for reduction */
 	    {y[0] = x; y[1] = 0; return 0;}
+	/* 33+53 bit pi is good enough for special and medium size cases */
 	if(ix<0x4016cbe4) {  /* |x| < 3pi/4, special case with n=+-1 */
-	    if(hx>0) {
+	    if(hx>0) { 
 		z = x - pio2_1;
-		if((ix&0xfffe0000)!=0x3fc80000) { /* 17+24 bit pi OK */
-		    y[0] = z - pio2_1t;
-		    y[1] = (z-cliptofloat(y[0]))-pio2_1t;
-		} else {		/* near pi/2, use 17+17+24 bit pi */
-		    z -= pio2_2;
-		    y[0] = z - pio2_2t;
-		    y[1] = (z-cliptofloat(y[0]))-pio2_2t;
-		}
+		y[0] = z - pio2_1t;
+		y[1] = (z-y[0])-pio2_1t;
 		return 1;
 	    } else {	/* negative x */
 		z = x + pio2_1;
-		if((ix&0xfffe0000)!=0x3fc80000) { /* 17+24 bit pi OK */
-		    y[0] = z + pio2_1t;
-		    y[1] = (z-cliptofloat(y[0]))+pio2_1t;
-		} else {		/* near pi/2, use 17+17+24 bit pi */
-		    z += pio2_2;
-		    y[0] = z + pio2_2t;
-		    y[1] = (z-cliptofloat(y[0]))+pio2_2t;
-		}
+		y[0] = z + pio2_1t;
+		y[1] = (z-y[0])+pio2_1t;
 		return -1;
 	    }
 	}
-	if(ix<=0x43490f80) { /* |x| ~<= 2^7*(pi/2), medium size */
+	if(ix<=0x49490f80) { /* |x| ~<= 2^19*(pi/2), medium size */
 	    t  = fabsf(x);
 	    n  = (int32_t) (t*invpio2+half);
-	    fn = (float)n;
+	    fn = (double)n;
 	    r  = t-fn*pio2_1;
-	    w  = fn*pio2_1t;	/* 1st round good to 40 bit */
-	    if(n<32&&(ix&0xffe00000)!=(npio2_hw[n-1]&0xffe00000)) {
-		y[0] = r-w;	/* quick check no cancellation */
-	    } else {
-	        u_int32_t high;
-	        j  = ix>>23;
-	        y[0] = r-w;
-		GET_FLOAT_WORD(high,y[0]);
-	        i = j-((high>>23)&0xff);
-	        if(i>8) {  /* 2nd iteration needed, good to 57 */
-		    t  = r;
-		    w  = fn*pio2_2;
-		    r  = t-w;
-		    w  = fn*pio2_2t-((t-r)-w);
-		    y[0] = r-w;
-		    GET_FLOAT_WORD(high,y[0]);
-		    i = j-((high>>23)&0xff);
-		    if(i>25)  {	/* 3rd iteration need, 74 bits acc */
-		    	t  = r;	/* will cover all possible cases */
-		    	w  = fn*pio2_3;
-		    	r  = t-w;
-		    	w  = fn*pio2_3t-((t-r)-w);
-		    	y[0] = r-w;
-		    }
-		}
-	    }
-	    y[1] = (r-cliptofloat(y[0]))-w;
+	    w  = fn*pio2_1t;
+	    y[0] = r-w;
+	    y[1] = (r-y[0])-w;
 	    if(hx<0) 	{y[0] = -y[0]; y[1] = -y[1]; return -n;}
 	    else	 return n;
 	}
@@ -173,7 +99,6 @@ two24 =  1.67772160000000000000e+07; /* 0x41700000, 0x00000000 */
 	if(ix>=0x7f800000) {		/* x is inf or NaN */
 	    y[0]=y[1]=x-x; return 0;
 	}
-#define	z	dz
     /* set z = scalbn(|x|,ilogb(x)-23) */
 	z = x;
 	GET_HIGH_WORD(hx,z);
author	bde <bde@FreeBSD.org>	2005-10-29 16:34:50 +0000
committer	bde <bde@FreeBSD.org>	2005-10-29 16:34:50 +0000
commit	bbfb40721e74d5bf0d3cef337da864daa1cf217b (patch)
tree	99aac75834cfbb3654f449676ad3878a0ff01bb4 /lib/msun/src
parent	203994509017931f946a1c3b544d89a994819f99 (diff)
download	FreeBSD-src-bbfb40721e74d5bf0d3cef337da864daa1cf217b.zip FreeBSD-src-bbfb40721e74d5bf0d3cef337da864daa1cf217b.tar.gz