summaryrefslogtreecommitdiffstats
path: root/lib/msun/src
diff options
context:
space:
mode:
authorbde <bde@FreeBSD.org>2005-10-29 16:34:50 +0000
committerbde <bde@FreeBSD.org>2005-10-29 16:34:50 +0000
commitbbfb40721e74d5bf0d3cef337da864daa1cf217b (patch)
tree99aac75834cfbb3654f449676ad3878a0ff01bb4 /lib/msun/src
parent203994509017931f946a1c3b544d89a994819f99 (diff)
downloadFreeBSD-src-bbfb40721e74d5bf0d3cef337da864daa1cf217b.zip
FreeBSD-src-bbfb40721e74d5bf0d3cef337da864daa1cf217b.tar.gz
Use double precision to simplify and optimize arg reduction for small
and medium size args too: instead of conditionally subtracting a float 17+24, 17+17+24 or 17+17+17+24 bit approximation to pi/2, always subtract a double 33+53 bit one. The float version is now closer to the double version than to old versions of itself -- it uses the same 33+53 bit approximation as the simplest cases in the double version, and where the float version had to switch to the slow general case at |x| == 2^7*pi/2, it now switches at |x| == 2^19*pi/2 the same as the double version. This speeds up arg reduction by a factor of 2 for |x| between 3*pi/4 and 2^7*pi/4, and by a factor of 7 for |x| between 2^7*pi/4 and 2^19*pi/4.
Diffstat (limited to 'lib/msun/src')
-rw-r--r--lib/msun/src/e_rem_pio2f.c119
1 files changed, 22 insertions, 97 deletions
diff --git a/lib/msun/src/e_rem_pio2f.c b/lib/msun/src/e_rem_pio2f.c
index 2fa4a01..eaedc48 100644
--- a/lib/msun/src/e_rem_pio2f.c
+++ b/lib/msun/src/e_rem_pio2f.c
@@ -1,5 +1,6 @@
/* e_rem_pio2f.c -- float version of e_rem_pio2.c
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
+ * Conversion to not use float except for the arg by Bruce Evans.
*/
/*
@@ -26,9 +27,6 @@ static char rcsid[] = "$FreeBSD$";
#include "math.h"
#include "math_private.h"
-/* Clip any extra precision in the float variable v. */
-#define cliptofloat(v) (*(volatile float *)&(v))
-
/*
* Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
*/
@@ -47,123 +45,51 @@ static const int32_t two_over_pi[] = {
};
/*
- * This array is like the one in e_rem_pio2.c, but the numbers are
- * single precision and the last few bits (8 here) are ignored by
- * masking them off in the float word instead of by omitting the low
- * word.
- *
- * Masking off 8 bits is not enough, but we defer further masking to
- * runtime so that the mask is easy to change. We now mask off 21
- * bits, which is the smallest number that makes the "quick check no
- * cancellation" detect all cancellations for cases that it is used.
- * It doesn't detect all non-cancellations, especiallly for small
- * multiples of pi/2, but then the non-quick code selects the best
- * approximation of pi/2 to use. The result is that arg reduction is
- * always done with between 8 or 9 and 17 bits of extra precision in
- * the medium-multiple case. With only 8 bits masked of we had
- * negative extra precision in some cases starting near +-13*pi/2.
- */
-static const int32_t npio2_hw[] = {
-0x3fc90f00, 0x40490f00, 0x4096cb00, 0x40c90f00, 0x40fb5300, 0x4116cb00,
-0x412fed00, 0x41490f00, 0x41623100, 0x417b5300, 0x418a3a00, 0x4196cb00,
-0x41a35c00, 0x41afed00, 0x41bc7e00, 0x41c90f00, 0x41d5a000, 0x41e23100,
-0x41eec200, 0x41fb5300, 0x4203f200, 0x420a3a00, 0x42108300, 0x4216cb00,
-0x421d1400, 0x42235c00, 0x4229a500, 0x422fed00, 0x42363600, 0x423c7e00,
-0x4242c700, 0x42490f00
-};
-
-/*
- * invpio2: 24 bits of 2/pi
- * pio2_1: first 17 bit of pi/2
+ * invpio2: 53 bits of 2/pi
+ * pio2_1: first 33 bit of pi/2
* pio2_1t: pi/2 - pio2_1
- * pio2_2: second 17 bit of pi/2
- * pio2_2t: pi/2 - (pio2_1+pio2_2)
- * pio2_3: third 17 bit of pi/2
- * pio2_3t: pi/2 - (pio2_1+pio2_2+pio2_3)
*/
-static const float
-zero = 0.0000000000e+00, /* 0x00000000 */
-half = 5.0000000000e-01, /* 0x3f000000 */
-invpio2 = 6.3661980629e-01, /* 0x3f22f984 */
-pio2_1 = 1.5707855225e+00, /* 0x3fc90f80 */
-pio2_1t = 1.0804334124e-05, /* 0x37354443 */
-pio2_2 = 1.0804273188e-05, /* 0x37354400 */
-pio2_2t = 6.0770999344e-11, /* 0x2e85a308 */
-pio2_3 = 6.0770943833e-11, /* 0x2e85a300 */
-pio2_3t = 6.1232342629e-17; /* 0x248d3132 */
-
static const double
-two24 = 1.67772160000000000000e+07; /* 0x41700000, 0x00000000 */
+zero = 0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */
+half = 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
+two24 = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
+invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
+pio2_1 = 1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */
+pio2_1t = 6.07710050650619224932e-11; /* 0x3DD0B461, 0x1A626331 */
int32_t __ieee754_rem_pio2f(float x, float *y)
{
- double dz;
- float z,w,t,r,fn;
+ double z,w,t,r,fn;
double tx[3];
- int32_t e0,i,j,nx,n,ix,hx;
+ int32_t e0,i,nx,n,ix,hx;
GET_FLOAT_WORD(hx,x);
ix = hx&0x7fffffff;
if(ix<=0x3f490fd8) /* |x| ~<= pi/4 , no need for reduction */
{y[0] = x; y[1] = 0; return 0;}
+ /* 33+53 bit pi is good enough for special and medium size cases */
if(ix<0x4016cbe4) { /* |x| < 3pi/4, special case with n=+-1 */
- if(hx>0) {
+ if(hx>0) {
z = x - pio2_1;
- if((ix&0xfffe0000)!=0x3fc80000) { /* 17+24 bit pi OK */
- y[0] = z - pio2_1t;
- y[1] = (z-cliptofloat(y[0]))-pio2_1t;
- } else { /* near pi/2, use 17+17+24 bit pi */
- z -= pio2_2;
- y[0] = z - pio2_2t;
- y[1] = (z-cliptofloat(y[0]))-pio2_2t;
- }
+ y[0] = z - pio2_1t;
+ y[1] = (z-y[0])-pio2_1t;
return 1;
} else { /* negative x */
z = x + pio2_1;
- if((ix&0xfffe0000)!=0x3fc80000) { /* 17+24 bit pi OK */
- y[0] = z + pio2_1t;
- y[1] = (z-cliptofloat(y[0]))+pio2_1t;
- } else { /* near pi/2, use 17+17+24 bit pi */
- z += pio2_2;
- y[0] = z + pio2_2t;
- y[1] = (z-cliptofloat(y[0]))+pio2_2t;
- }
+ y[0] = z + pio2_1t;
+ y[1] = (z-y[0])+pio2_1t;
return -1;
}
}
- if(ix<=0x43490f80) { /* |x| ~<= 2^7*(pi/2), medium size */
+ if(ix<=0x49490f80) { /* |x| ~<= 2^19*(pi/2), medium size */
t = fabsf(x);
n = (int32_t) (t*invpio2+half);
- fn = (float)n;
+ fn = (double)n;
r = t-fn*pio2_1;
- w = fn*pio2_1t; /* 1st round good to 40 bit */
- if(n<32&&(ix&0xffe00000)!=(npio2_hw[n-1]&0xffe00000)) {
- y[0] = r-w; /* quick check no cancellation */
- } else {
- u_int32_t high;
- j = ix>>23;
- y[0] = r-w;
- GET_FLOAT_WORD(high,y[0]);
- i = j-((high>>23)&0xff);
- if(i>8) { /* 2nd iteration needed, good to 57 */
- t = r;
- w = fn*pio2_2;
- r = t-w;
- w = fn*pio2_2t-((t-r)-w);
- y[0] = r-w;
- GET_FLOAT_WORD(high,y[0]);
- i = j-((high>>23)&0xff);
- if(i>25) { /* 3rd iteration need, 74 bits acc */
- t = r; /* will cover all possible cases */
- w = fn*pio2_3;
- r = t-w;
- w = fn*pio2_3t-((t-r)-w);
- y[0] = r-w;
- }
- }
- }
- y[1] = (r-cliptofloat(y[0]))-w;
+ w = fn*pio2_1t;
+ y[0] = r-w;
+ y[1] = (r-y[0])-w;
if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;}
else return n;
}
@@ -173,7 +99,6 @@ two24 = 1.67772160000000000000e+07; /* 0x41700000, 0x00000000 */
if(ix>=0x7f800000) { /* x is inf or NaN */
y[0]=y[1]=x-x; return 0;
}
-#define z dz
/* set z = scalbn(|x|,ilogb(x)-23) */
z = x;
GET_HIGH_WORD(hx,z);
OpenPOWER on IntegriCloud