diff options
Diffstat (limited to 'lib/msun/src')
-rw-r--r-- | lib/msun/src/s_cbrt.c | 37 | ||||
-rw-r--r-- | lib/msun/src/s_cbrtf.c | 25 |
2 files changed, 42 insertions, 20 deletions
diff --git a/lib/msun/src/s_cbrt.c b/lib/msun/src/s_cbrt.c index 6d90078..e9ed330 100644 --- a/lib/msun/src/s_cbrt.c +++ b/lib/msun/src/s_cbrt.c @@ -37,7 +37,12 @@ double cbrt(double x) { int32_t hx; + union { + double value; + uint64_t bits; + } u; double r,s,t=0.0,w; + uint64_t bits; u_int32_t sign; u_int32_t high,low; @@ -47,7 +52,7 @@ cbrt(double x) if(hx>=0x7ff00000) return(x+x); /* cbrt(NaN,INF) is itself */ GET_LOW_WORD(low,x); if((hx|low)==0) - return(x); /* cbrt(0) is itself */ + return(x); /* cbrt(0) is itself */ /* * Rough cbrt to 5 bits: @@ -73,7 +78,7 @@ cbrt(double x) SET_HIGH_WORD(t,sign|(hx/3+B1)); /* - * New cbrt to 26 bits; may be implemented in single precision: + * New cbrt to 25 bits: * cbrt(x) = t*cbrt(x/t**3) ~= t*R(x/t**3) * where R(r) = (14*r**2 + 35*r + 5)/(5*r**2 + 35*r + 14) is the * (2,2) Pade approximation to cbrt(r) at r = 1. We replace @@ -91,16 +96,26 @@ cbrt(double x) s=C+r*t; t*=G+F/(s+E+D/s); - /* chop t to 20 bits and make it larger in magnitude than cbrt(x) */ - GET_HIGH_WORD(high,t); - INSERT_WORDS(t,high+0x00000001,0); + /* + * Round t away from zero to 25 bits (sloppily except for ensuring that + * the result is larger in magnitude than cbrt(x) but not much more than + * 2 25-bit ulps larger). With rounding towards zero, the error bound + * would be ~5/6 instead of ~4/6. With a maximum error of 1 25-bit ulps + * in the rounded t, the infinite-precision error in the Newton + * approximation barely affects third digit in the the final error + * 0.667; the error in the rounded t can be up to about 12 25-bit ulps + * before the final error is larger than 0.667 ulps. + */ + u.value=t; + u.bits=(u.bits+0x20000000)&0xfffffffff0000000ULL; + t=u.value; - /* one step Newton iteration to 53 bits with error less than 0.667 ulps */ - s=t*t; /* t*t is exact */ - r=x/s; - w=t+t; - r=(r-t)/(w+r); /* r-t is exact */ - t=t+t*r; + /* one step Newton iteration to 53 bits with error < 0.667 ulps */ + s=t*t; /* t*t is exact */ + r=x/s; /* error <= 0.5 ulps; |r| < |t| */ + w=t+t; /* t+t is exact */ + r=(r-t)/(w+r); /* r-t is exact; w+r ~= 3*t */ + t=t+t*r; /* error <= 0.5 + 0.5/3 + epsilon */ return(t); } diff --git a/lib/msun/src/s_cbrtf.c b/lib/msun/src/s_cbrtf.c index d6f6d73..89f3507 100644 --- a/lib/msun/src/s_cbrtf.c +++ b/lib/msun/src/s_cbrtf.c @@ -48,7 +48,7 @@ cbrtf(float x) hx ^=sign; if(hx>=0x7f800000) return(x+x); /* cbrt(NaN,INF) is itself */ if(hx==0) - return(x); /* cbrt(0) is itself */ + return(x); /* cbrt(0) is itself */ /* rough cbrt to 5 bits */ if(hx<0x00800000) { /* subnormal number */ @@ -64,16 +64,23 @@ cbrtf(float x) s=C+r*t; t*=G+F/(s+E+D/s); - /* chop t to 12 bits and make it larger in magnitude than cbrt(x) */ + /* + * Round t away from zero to 12 bits (sloppily except for ensuring that + * the result is larger in magnitude than cbrt(x) but not much more than + * 1 12-bit ulp larger). With rounding towards zero, the error bound + * would be ~5/6 instead of ~4/6, and with t 2 12-bit ulps larger the + * infinite-precision error in the Newton approximation would affect + * the second digit instead of the third digit of 4/6 = 0.666..., etc. + */ GET_FLOAT_WORD(high,t); - SET_FLOAT_WORD(t,(high&0xfffff000)+0x00001000); + SET_FLOAT_WORD(t,(high+0x1002)&0xfffff000); - /* one step Newton iteration to 24 bits with error less than 0.667 ulps */ - s=t*t; /* t*t is exact */ - r=x/s; - w=t+t; - r=(r-t)/(w+r); /* r-t is exact */ - t=t+t*r; + /* one step Newton iteration to 24 bits with error < 0.667 ulps */ + s=t*t; /* t*t is exact */ + r=x/s; /* error <= 0.5 ulps; |r| < |t| */ + w=t+t; /* t+t is exact */ + r=(r-t)/(w+r); /* r-t is exact; w+r ~= 3*t */ + t=t+t*r; /* error <= 0.5 + 0.5/3 + epsilon */ return(t); } |