diff options
author | bde <bde@FreeBSD.org> | 2005-10-29 16:34:50 +0000 |
---|---|---|
committer | bde <bde@FreeBSD.org> | 2005-10-29 16:34:50 +0000 |
commit | bbfb40721e74d5bf0d3cef337da864daa1cf217b (patch) | |
tree | 99aac75834cfbb3654f449676ad3878a0ff01bb4 /lib/msun/src | |
parent | 203994509017931f946a1c3b544d89a994819f99 (diff) | |
download | FreeBSD-src-bbfb40721e74d5bf0d3cef337da864daa1cf217b.zip FreeBSD-src-bbfb40721e74d5bf0d3cef337da864daa1cf217b.tar.gz |
Use double precision to simplify and optimize arg reduction for small
and medium size args too: instead of conditionally subtracting a float
17+24, 17+17+24 or 17+17+17+24 bit approximation to pi/2, always
subtract a double 33+53 bit one. The float version is now closer to
the double version than to old versions of itself -- it uses the same
33+53 bit approximation as the simplest cases in the double version,
and where the float version had to switch to the slow general case at
|x| == 2^7*pi/2, it now switches at |x| == 2^19*pi/2 the same as the
double version.
This speeds up arg reduction by a factor of 2 for |x| between 3*pi/4 and
2^7*pi/4, and by a factor of 7 for |x| between 2^7*pi/4 and 2^19*pi/4.
Diffstat (limited to 'lib/msun/src')
-rw-r--r-- | lib/msun/src/e_rem_pio2f.c | 119 |
1 files changed, 22 insertions, 97 deletions
diff --git a/lib/msun/src/e_rem_pio2f.c b/lib/msun/src/e_rem_pio2f.c index 2fa4a01..eaedc48 100644 --- a/lib/msun/src/e_rem_pio2f.c +++ b/lib/msun/src/e_rem_pio2f.c @@ -1,5 +1,6 @@ /* e_rem_pio2f.c -- float version of e_rem_pio2.c * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + * Conversion to not use float except for the arg by Bruce Evans. */ /* @@ -26,9 +27,6 @@ static char rcsid[] = "$FreeBSD$"; #include "math.h" #include "math_private.h" -/* Clip any extra precision in the float variable v. */ -#define cliptofloat(v) (*(volatile float *)&(v)) - /* * Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi */ @@ -47,123 +45,51 @@ static const int32_t two_over_pi[] = { }; /* - * This array is like the one in e_rem_pio2.c, but the numbers are - * single precision and the last few bits (8 here) are ignored by - * masking them off in the float word instead of by omitting the low - * word. - * - * Masking off 8 bits is not enough, but we defer further masking to - * runtime so that the mask is easy to change. We now mask off 21 - * bits, which is the smallest number that makes the "quick check no - * cancellation" detect all cancellations for cases that it is used. - * It doesn't detect all non-cancellations, especiallly for small - * multiples of pi/2, but then the non-quick code selects the best - * approximation of pi/2 to use. The result is that arg reduction is - * always done with between 8 or 9 and 17 bits of extra precision in - * the medium-multiple case. With only 8 bits masked of we had - * negative extra precision in some cases starting near +-13*pi/2. - */ -static const int32_t npio2_hw[] = { -0x3fc90f00, 0x40490f00, 0x4096cb00, 0x40c90f00, 0x40fb5300, 0x4116cb00, -0x412fed00, 0x41490f00, 0x41623100, 0x417b5300, 0x418a3a00, 0x4196cb00, -0x41a35c00, 0x41afed00, 0x41bc7e00, 0x41c90f00, 0x41d5a000, 0x41e23100, -0x41eec200, 0x41fb5300, 0x4203f200, 0x420a3a00, 0x42108300, 0x4216cb00, -0x421d1400, 0x42235c00, 0x4229a500, 0x422fed00, 0x42363600, 0x423c7e00, -0x4242c700, 0x42490f00 -}; - -/* - * invpio2: 24 bits of 2/pi - * pio2_1: first 17 bit of pi/2 + * invpio2: 53 bits of 2/pi + * pio2_1: first 33 bit of pi/2 * pio2_1t: pi/2 - pio2_1 - * pio2_2: second 17 bit of pi/2 - * pio2_2t: pi/2 - (pio2_1+pio2_2) - * pio2_3: third 17 bit of pi/2 - * pio2_3t: pi/2 - (pio2_1+pio2_2+pio2_3) */ -static const float -zero = 0.0000000000e+00, /* 0x00000000 */ -half = 5.0000000000e-01, /* 0x3f000000 */ -invpio2 = 6.3661980629e-01, /* 0x3f22f984 */ -pio2_1 = 1.5707855225e+00, /* 0x3fc90f80 */ -pio2_1t = 1.0804334124e-05, /* 0x37354443 */ -pio2_2 = 1.0804273188e-05, /* 0x37354400 */ -pio2_2t = 6.0770999344e-11, /* 0x2e85a308 */ -pio2_3 = 6.0770943833e-11, /* 0x2e85a300 */ -pio2_3t = 6.1232342629e-17; /* 0x248d3132 */ - static const double -two24 = 1.67772160000000000000e+07; /* 0x41700000, 0x00000000 */ +zero = 0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */ +half = 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */ +two24 = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */ +invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */ +pio2_1 = 1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */ +pio2_1t = 6.07710050650619224932e-11; /* 0x3DD0B461, 0x1A626331 */ int32_t __ieee754_rem_pio2f(float x, float *y) { - double dz; - float z,w,t,r,fn; + double z,w,t,r,fn; double tx[3]; - int32_t e0,i,j,nx,n,ix,hx; + int32_t e0,i,nx,n,ix,hx; GET_FLOAT_WORD(hx,x); ix = hx&0x7fffffff; if(ix<=0x3f490fd8) /* |x| ~<= pi/4 , no need for reduction */ {y[0] = x; y[1] = 0; return 0;} + /* 33+53 bit pi is good enough for special and medium size cases */ if(ix<0x4016cbe4) { /* |x| < 3pi/4, special case with n=+-1 */ - if(hx>0) { + if(hx>0) { z = x - pio2_1; - if((ix&0xfffe0000)!=0x3fc80000) { /* 17+24 bit pi OK */ - y[0] = z - pio2_1t; - y[1] = (z-cliptofloat(y[0]))-pio2_1t; - } else { /* near pi/2, use 17+17+24 bit pi */ - z -= pio2_2; - y[0] = z - pio2_2t; - y[1] = (z-cliptofloat(y[0]))-pio2_2t; - } + y[0] = z - pio2_1t; + y[1] = (z-y[0])-pio2_1t; return 1; } else { /* negative x */ z = x + pio2_1; - if((ix&0xfffe0000)!=0x3fc80000) { /* 17+24 bit pi OK */ - y[0] = z + pio2_1t; - y[1] = (z-cliptofloat(y[0]))+pio2_1t; - } else { /* near pi/2, use 17+17+24 bit pi */ - z += pio2_2; - y[0] = z + pio2_2t; - y[1] = (z-cliptofloat(y[0]))+pio2_2t; - } + y[0] = z + pio2_1t; + y[1] = (z-y[0])+pio2_1t; return -1; } } - if(ix<=0x43490f80) { /* |x| ~<= 2^7*(pi/2), medium size */ + if(ix<=0x49490f80) { /* |x| ~<= 2^19*(pi/2), medium size */ t = fabsf(x); n = (int32_t) (t*invpio2+half); - fn = (float)n; + fn = (double)n; r = t-fn*pio2_1; - w = fn*pio2_1t; /* 1st round good to 40 bit */ - if(n<32&&(ix&0xffe00000)!=(npio2_hw[n-1]&0xffe00000)) { - y[0] = r-w; /* quick check no cancellation */ - } else { - u_int32_t high; - j = ix>>23; - y[0] = r-w; - GET_FLOAT_WORD(high,y[0]); - i = j-((high>>23)&0xff); - if(i>8) { /* 2nd iteration needed, good to 57 */ - t = r; - w = fn*pio2_2; - r = t-w; - w = fn*pio2_2t-((t-r)-w); - y[0] = r-w; - GET_FLOAT_WORD(high,y[0]); - i = j-((high>>23)&0xff); - if(i>25) { /* 3rd iteration need, 74 bits acc */ - t = r; /* will cover all possible cases */ - w = fn*pio2_3; - r = t-w; - w = fn*pio2_3t-((t-r)-w); - y[0] = r-w; - } - } - } - y[1] = (r-cliptofloat(y[0]))-w; + w = fn*pio2_1t; + y[0] = r-w; + y[1] = (r-y[0])-w; if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;} else return n; } @@ -173,7 +99,6 @@ two24 = 1.67772160000000000000e+07; /* 0x41700000, 0x00000000 */ if(ix>=0x7f800000) { /* x is inf or NaN */ y[0]=y[1]=x-x; return 0; } -#define z dz /* set z = scalbn(|x|,ilogb(x)-23) */ z = x; GET_HIGH_WORD(hx,z); |