diff options
Diffstat (limited to 'lib/msun/src')
36 files changed, 1575 insertions, 373 deletions
diff --git a/lib/msun/src/e_cosh.c b/lib/msun/src/e_cosh.c index 11e6590..a363695 100644 --- a/lib/msun/src/e_cosh.c +++ b/lib/msun/src/e_cosh.c @@ -45,7 +45,6 @@ __ieee754_cosh(double x) { double t,w; int32_t ix; - u_int32_t lx; /* High word of |x|. */ GET_HIGH_WORD(ix,x); @@ -72,13 +71,8 @@ __ieee754_cosh(double x) if (ix < 0x40862E42) return half*__ieee754_exp(fabs(x)); /* |x| in [log(maxdouble), overflowthresold] */ - GET_LOW_WORD(lx,x); - if (ix<0x408633CE || - ((ix==0x408633ce)&&(lx<=(u_int32_t)0x8fb9f87d))) { - w = __ieee754_exp(half*fabs(x)); - t = half*w; - return t*w; - } + if (ix<=0x408633CE) + return __ldexp_exp(fabs(x), -1); /* |x| > overflowthresold, cosh(x) overflow */ return huge*huge; diff --git a/lib/msun/src/e_coshf.c b/lib/msun/src/e_coshf.c index 4a1d499..95a0d6e 100644 --- a/lib/msun/src/e_coshf.c +++ b/lib/msun/src/e_coshf.c @@ -51,11 +51,8 @@ __ieee754_coshf(float x) if (ix < 0x42b17217) return half*__ieee754_expf(fabsf(x)); /* |x| in [log(maxfloat), overflowthresold] */ - if (ix<=0x42b2d4fc) { - w = __ieee754_expf(half*fabsf(x)); - t = half*w; - return t*w; - } + if (ix<=0x42b2d4fc) + return __ldexp_expf(fabsf(x), -1); /* |x| > overflowthresold, cosh(x) overflow */ return huge*huge; diff --git a/lib/msun/src/e_exp.c b/lib/msun/src/e_exp.c index 5b9a10c..b47aef5 100644 --- a/lib/msun/src/e_exp.c +++ b/lib/msun/src/e_exp.c @@ -76,6 +76,8 @@ __FBSDID("$FreeBSD$"); * to produce the hexadecimal values shown. */ +#include <float.h> + #include "math.h" #include "math_private.h" @@ -133,7 +135,7 @@ __ieee754_exp(double x) /* default IEEE double exp */ hi = x - t*ln2HI[0]; /* t*ln2HI is exact here */ lo = t*ln2LO[0]; } - x = hi - lo; + STRICT_ASSIGN(double, x, hi - lo); } else if(hx < 0x3e300000) { /* when |x|<2**-28 */ if(huge+x>one) return one+x;/* trigger inexact */ diff --git a/lib/msun/src/e_expf.c b/lib/msun/src/e_expf.c index 502e421..a479076 100644 --- a/lib/msun/src/e_expf.c +++ b/lib/msun/src/e_expf.c @@ -16,6 +16,8 @@ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); +#include <float.h> + #include "math.h" #include "math_private.h" @@ -40,7 +42,7 @@ P2 = -2.7667332906e-3; /* -0xb55215.0p-32 */ static volatile float twom100 = 7.8886090522e-31; /* 2**-100=0x0d800000 */ float -__ieee754_expf(float x) /* default IEEE double exp */ +__ieee754_expf(float x) { float y,hi=0.0,lo=0.0,c,t,twopk; int32_t k=0,xsb; @@ -70,7 +72,7 @@ __ieee754_expf(float x) /* default IEEE double exp */ hi = x - t*ln2HI[0]; /* t*ln2HI is exact here */ lo = t*ln2LO[0]; } - x = hi - lo; + STRICT_ASSIGN(float, x, hi - lo); } else if(hx < 0x39000000) { /* when |x|<2**-14 */ if(huge+x>one) return one+x;/* trigger inexact */ diff --git a/lib/msun/src/e_hypot.c b/lib/msun/src/e_hypot.c index fb498c1..2398e98 100644 --- a/lib/msun/src/e_hypot.c +++ b/lib/msun/src/e_hypot.c @@ -54,7 +54,7 @@ __FBSDID("$FreeBSD$"); double __ieee754_hypot(double x, double y) { - double a=x,b=y,t1,t2,y1,y2,w; + double a,b,t1,t2,y1,y2,w; int32_t j,k,ha,hb; GET_HIGH_WORD(ha,x); diff --git a/lib/msun/src/e_hypotf.c b/lib/msun/src/e_hypotf.c index c82c6e7..6d083e4 100644 --- a/lib/msun/src/e_hypotf.c +++ b/lib/msun/src/e_hypotf.c @@ -22,7 +22,7 @@ __FBSDID("$FreeBSD$"); float __ieee754_hypotf(float x, float y) { - float a=x,b=y,t1,t2,y1,y2,w; + float a,b,t1,t2,y1,y2,w; int32_t j,k,ha,hb; GET_FLOAT_WORD(ha,x); diff --git a/lib/msun/src/e_hypotl.c b/lib/msun/src/e_hypotl.c index 0c899bf..7b5ab89 100644 --- a/lib/msun/src/e_hypotl.c +++ b/lib/msun/src/e_hypotl.c @@ -21,13 +21,6 @@ __FBSDID("$FreeBSD$"); #include "math.h" #include "math_private.h" -#define GET_LDBL_EXPSIGN(i, v) do { \ - union IEEEl2bits uv; \ - \ - uv.e = v; \ - i = uv.xbits.expsign; \ -} while (0) - #define GET_LDBL_MAN(h, l, v) do { \ union IEEEl2bits uv; \ \ @@ -36,14 +29,6 @@ __FBSDID("$FreeBSD$"); l = uv.bits.manl; \ } while (0) -#define SET_LDBL_EXPSIGN(v, i) do { \ - union IEEEl2bits uv; \ - \ - uv.e = v; \ - uv.xbits.expsign = i; \ - v = uv.e; \ -} while (0) - #undef GET_HIGH_WORD #define GET_HIGH_WORD(i, v) GET_LDBL_EXPSIGN(i, v) #undef SET_HIGH_WORD diff --git a/lib/msun/src/e_lgamma_r.c b/lib/msun/src/e_lgamma_r.c index a587b8f..1cff592 100644 --- a/lib/msun/src/e_lgamma_r.c +++ b/lib/msun/src/e_lgamma_r.c @@ -269,7 +269,6 @@ __ieee754_lgamma_r(double x, int *signgamp) } else if(ix<0x40200000) { /* x < 8.0 */ i = (int)x; - t = zero; y = x-(double)i; p = y*(s0+y*(s1+y*(s2+y*(s3+y*(s4+y*(s5+y*s6)))))); q = one+y*(r1+y*(r2+y*(r3+y*(r4+y*(r5+y*r6))))); diff --git a/lib/msun/src/e_lgammaf_r.c b/lib/msun/src/e_lgammaf_r.c index 47c6ed0..e2d90ef 100644 --- a/lib/msun/src/e_lgammaf_r.c +++ b/lib/msun/src/e_lgammaf_r.c @@ -202,7 +202,6 @@ __ieee754_lgammaf_r(float x, int *signgamp) } else if(ix<0x41000000) { /* x < 8.0 */ i = (int)x; - t = zero; y = x-(float)i; p = y*(s0+y*(s1+y*(s2+y*(s3+y*(s4+y*(s5+y*s6)))))); q = one+y*(r1+y*(r2+y*(r3+y*(r4+y*(r5+y*r6))))); diff --git a/lib/msun/src/e_log10.c b/lib/msun/src/e_log10.c index 135f0dc..104d257 100644 --- a/lib/msun/src/e_log10.c +++ b/lib/msun/src/e_log10.c @@ -15,7 +15,11 @@ __FBSDID("$FreeBSD$"); /* - * Return the base 10 logarithm of x. See k_log.c for details on the algorithm. + * Return the base 10 logarithm of x. See e_log.c and k_log.h for most + * comments. + * + * log10(x) = (f - 0.5*f*f + k_log1p(f)) / ln10 + k * log10(2) + * in not-quite-routine extra precision. */ #include "math.h" @@ -34,31 +38,50 @@ static const double zero = 0.0; double __ieee754_log10(double x) { - double f,hi,lo,y,z; + double f,hfsq,hi,lo,r,val_hi,val_lo,w,y,y2; int32_t i,k,hx; u_int32_t lx; EXTRACT_WORDS(hx,lx,x); - k=0; - if (hx < 0x00100000) { /* x < 2**-1022 */ - if (((hx&0x7fffffff)|lx)==0) - return -two54/zero; /* log(+-0)=-inf */ - if (hx<0) return (x-x)/zero; /* log(-#) = NaN */ - k -= 54; x *= two54; /* subnormal number, scale up x */ + k=0; + if (hx < 0x00100000) { /* x < 2**-1022 */ + if (((hx&0x7fffffff)|lx)==0) + return -two54/zero; /* log(+-0)=-inf */ + if (hx<0) return (x-x)/zero; /* log(-#) = NaN */ + k -= 54; x *= two54; /* subnormal number, scale up x */ GET_HIGH_WORD(hx,x); - } + } if (hx >= 0x7ff00000) return x+x; + if (hx == 0x3ff00000 && lx == 0) + return zero; /* log(1) = +0 */ k += (hx>>20)-1023; hx &= 0x000fffff; i = (hx+0x95f64)&0x100000; SET_HIGH_WORD(x,hx|(i^0x3ff00000)); /* normalize x or x/2 */ k += (i>>20); y = (double)k; - f = __kernel_log(x); - hi = x = x - 1; + f = x - 1.0; + hfsq = 0.5*f*f; + r = k_log1p(f); + + /* See e_log2.c for most details. */ + hi = f - hfsq; SET_LOW_WORD(hi,0); - lo = x - hi; - z = y*log10_2lo + (x+f)*ivln10lo + (lo+f)*ivln10hi + hi*ivln10hi; - return z+y*log10_2hi; + lo = (f - hi) - hfsq + r; + val_hi = hi*ivln10hi; + y2 = y*log10_2hi; + val_lo = y*log10_2lo + (lo+hi)*ivln10lo + lo*ivln10hi; + + /* + * Extra precision in for adding y*log10_2hi is not strictly needed + * since there is no very large cancellation near x = sqrt(2) or + * x = 1/sqrt(2), but we do it anyway since it costs little on CPUs + * with some parallelism and it reduces the error for many args. + */ + w = y2 + val_hi; + val_lo += (y2 - w) + val_hi; + val_hi = w; + + return val_lo + val_hi; } diff --git a/lib/msun/src/e_log10f.c b/lib/msun/src/e_log10f.c index 940b831..c876594 100644 --- a/lib/msun/src/e_log10f.c +++ b/lib/msun/src/e_log10f.c @@ -13,7 +13,7 @@ __FBSDID("$FreeBSD$"); /* - * Return the base 10 logarithm of x. See k_log.c for details on the algorithm. + * Float version of e_log10.c. See the latter for most comments. */ #include "math.h" @@ -32,31 +32,40 @@ static const float zero = 0.0; float __ieee754_log10f(float x) { - float f,hi,lo,y,z; + float f,hfsq,hi,lo,r,y; int32_t i,k,hx; GET_FLOAT_WORD(hx,x); - k=0; - if (hx < 0x00800000) { /* x < 2**-126 */ - if ((hx&0x7fffffff)==0) - return -two25/zero; /* log(+-0)=-inf */ - if (hx<0) return (x-x)/zero; /* log(-#) = NaN */ - k -= 25; x *= two25; /* subnormal number, scale up x */ + k=0; + if (hx < 0x00800000) { /* x < 2**-126 */ + if ((hx&0x7fffffff)==0) + return -two25/zero; /* log(+-0)=-inf */ + if (hx<0) return (x-x)/zero; /* log(-#) = NaN */ + k -= 25; x *= two25; /* subnormal number, scale up x */ GET_FLOAT_WORD(hx,x); - } + } if (hx >= 0x7f800000) return x+x; + if (hx == 0x3f800000) + return zero; /* log(1) = +0 */ k += (hx>>23)-127; hx &= 0x007fffff; i = (hx+(0x4afb0d))&0x800000; SET_FLOAT_WORD(x,hx|(i^0x3f800000)); /* normalize x or x/2 */ k += (i>>23); y = (float)k; - f = __kernel_logf(x); - x = x - (float)1.0; - GET_FLOAT_WORD(hx,x); + f = x - (float)1.0; + hfsq = (float)0.5*f*f; + r = k_log1pf(f); + + /* See e_log2f.c and e_log2.c for details. */ + if (sizeof(float_t) > sizeof(float)) + return (r - hfsq + f) * ((float_t)ivln10lo + ivln10hi) + + y * ((float_t)log10_2lo + log10_2hi); + hi = f - hfsq; + GET_FLOAT_WORD(hx,hi); SET_FLOAT_WORD(hi,hx&0xfffff000); - lo = x - hi; - z = y*log10_2lo + (x+f)*ivln10lo + (lo+f)*ivln10hi + hi*ivln10hi; - return z+y*log10_2hi; + lo = (f - hi) - hfsq + r; + return y*log10_2lo + (lo+hi)*ivln10lo + lo*ivln10hi + hi*ivln10hi + + y*log10_2hi; } diff --git a/lib/msun/src/e_log2.c b/lib/msun/src/e_log2.c index 6cf3dbc..1fc44a5 100644 --- a/lib/msun/src/e_log2.c +++ b/lib/msun/src/e_log2.c @@ -15,7 +15,13 @@ __FBSDID("$FreeBSD$"); /* - * Return the base 2 logarithm of x. See k_log.c for details on the algorithm. + * Return the base 2 logarithm of x. See e_log.c and k_log.h for most + * comments. + * + * This reduces x to {k, 1+f} exactly as in e_log.c, then calls the kernel, + * then does the combining and scaling steps + * log2(x) = (f - 0.5*f*f + k_log1p(f)) / ln2 + k + * in not-quite-routine extra precision. */ #include "math.h" @@ -32,29 +38,73 @@ static const double zero = 0.0; double __ieee754_log2(double x) { - double f,hi,lo; + double f,hfsq,hi,lo,r,val_hi,val_lo,w,y; int32_t i,k,hx; u_int32_t lx; EXTRACT_WORDS(hx,lx,x); - k=0; - if (hx < 0x00100000) { /* x < 2**-1022 */ - if (((hx&0x7fffffff)|lx)==0) - return -two54/zero; /* log(+-0)=-inf */ - if (hx<0) return (x-x)/zero; /* log(-#) = NaN */ - k -= 54; x *= two54; /* subnormal number, scale up x */ + k=0; + if (hx < 0x00100000) { /* x < 2**-1022 */ + if (((hx&0x7fffffff)|lx)==0) + return -two54/zero; /* log(+-0)=-inf */ + if (hx<0) return (x-x)/zero; /* log(-#) = NaN */ + k -= 54; x *= two54; /* subnormal number, scale up x */ GET_HIGH_WORD(hx,x); - } + } if (hx >= 0x7ff00000) return x+x; + if (hx == 0x3ff00000 && lx == 0) + return zero; /* log(1) = +0 */ k += (hx>>20)-1023; hx &= 0x000fffff; i = (hx+0x95f64)&0x100000; SET_HIGH_WORD(x,hx|(i^0x3ff00000)); /* normalize x or x/2 */ k += (i>>20); - f = __kernel_log(x); - hi = x = x - 1; + y = (double)k; + f = x - 1.0; + hfsq = 0.5*f*f; + r = k_log1p(f); + + /* + * f-hfsq must (for args near 1) be evaluated in extra precision + * to avoid a large cancellation when x is near sqrt(2) or 1/sqrt(2). + * This is fairly efficient since f-hfsq only depends on f, so can + * be evaluated in parallel with R. Not combining hfsq with R also + * keeps R small (though not as small as a true `lo' term would be), + * so that extra precision is not needed for terms involving R. + * + * Compiler bugs involving extra precision used to break Dekker's + * theorem for spitting f-hfsq as hi+lo, unless double_t was used + * or the multi-precision calculations were avoided when double_t + * has extra precision. These problems are now automatically + * avoided as a side effect of the optimization of combining the + * Dekker splitting step with the clear-low-bits step. + * + * y must (for args near sqrt(2) and 1/sqrt(2)) be added in extra + * precision to avoid a very large cancellation when x is very near + * these values. Unlike the above cancellations, this problem is + * specific to base 2. It is strange that adding +-1 is so much + * harder than adding +-ln2 or +-log10_2. + * + * This uses Dekker's theorem to normalize y+val_hi, so the + * compiler bugs are back in some configurations, sigh. And I + * don't want to used double_t to avoid them, since that gives a + * pessimization and the support for avoiding the pessimization + * is not yet available. + * + * The multi-precision calculations for the multiplications are + * routine. + */ + hi = f - hfsq; SET_LOW_WORD(hi,0); - lo = x - hi; - return (x+f)*ivln2lo + (lo+f)*ivln2hi + hi*ivln2hi + k; + lo = (f - hi) - hfsq + r; + val_hi = hi*ivln2hi; + val_lo = (lo+hi)*ivln2lo + lo*ivln2hi; + + /* spadd(val_hi, val_lo, y), except for not using double_t: */ + w = y + val_hi; + val_lo += (y - w) + val_hi; + val_hi = w; + + return val_lo + val_hi; } diff --git a/lib/msun/src/e_log2f.c b/lib/msun/src/e_log2f.c index bb308d3..7166346 100644 --- a/lib/msun/src/e_log2f.c +++ b/lib/msun/src/e_log2f.c @@ -13,7 +13,7 @@ __FBSDID("$FreeBSD$"); /* - * Return the base 2 logarithm of x. See k_log.c for details on the algorithm. + * Float version of e_log2.c. See the latter for most comments. */ #include "math.h" @@ -30,29 +30,52 @@ static const float zero = 0.0; float __ieee754_log2f(float x) { - float f,hi,lo; + float f,hfsq,hi,lo,r,y; int32_t i,k,hx; GET_FLOAT_WORD(hx,x); - k=0; - if (hx < 0x00800000) { /* x < 2**-126 */ - if ((hx&0x7fffffff)==0) - return -two25/zero; /* log(+-0)=-inf */ - if (hx<0) return (x-x)/zero; /* log(-#) = NaN */ - k -= 25; x *= two25; /* subnormal number, scale up x */ + k=0; + if (hx < 0x00800000) { /* x < 2**-126 */ + if ((hx&0x7fffffff)==0) + return -two25/zero; /* log(+-0)=-inf */ + if (hx<0) return (x-x)/zero; /* log(-#) = NaN */ + k -= 25; x *= two25; /* subnormal number, scale up x */ GET_FLOAT_WORD(hx,x); - } + } if (hx >= 0x7f800000) return x+x; + if (hx == 0x3f800000) + return zero; /* log(1) = +0 */ k += (hx>>23)-127; hx &= 0x007fffff; i = (hx+(0x4afb0d))&0x800000; SET_FLOAT_WORD(x,hx|(i^0x3f800000)); /* normalize x or x/2 */ k += (i>>23); - f = __kernel_logf(x); - x = x - (float)1.0; - GET_FLOAT_WORD(hx,x); + y = (float)k; + f = x - (float)1.0; + hfsq = (float)0.5*f*f; + r = k_log1pf(f); + + /* + * We no longer need to avoid falling into the multi-precision + * calculations due to compiler bugs breaking Dekker's theorem. + * Keep avoiding this as an optimization. See e_log2.c for more + * details (some details are here only because the optimization + * is not yet available in double precision). + * + * Another compiler bug turned up. With gcc on i386, + * (ivln2lo + ivln2hi) would be evaluated in float precision + * despite runtime evaluations using double precision. So we + * must cast one of its terms to float_t. This makes the whole + * expression have type float_t, so return is forced to waste + * time clobbering its extra precision. + */ + if (sizeof(float_t) > sizeof(float)) + return (r - hfsq + f) * ((float_t)ivln2lo + ivln2hi) + y; + + hi = f - hfsq; + GET_FLOAT_WORD(hx,hi); SET_FLOAT_WORD(hi,hx&0xfffff000); - lo = x - hi; - return (x+f)*ivln2lo + (lo+f)*ivln2hi + hi*ivln2hi + k; + lo = (f - hi) - hfsq + r; + return (lo+hi)*ivln2lo + lo*ivln2hi + hi*ivln2hi + y; } diff --git a/lib/msun/src/e_pow.c b/lib/msun/src/e_pow.c index 4aa00d5..7607a4a 100644 --- a/lib/msun/src/e_pow.c +++ b/lib/msun/src/e_pow.c @@ -109,6 +109,9 @@ __ieee754_pow(double x, double y) /* y==zero: x**0 = 1 */ if((iy|ly)==0) return one; + /* x==1: 1**y = 1, even if y is NaN */ + if (hx==0x3ff00000 && lx == 0) return one; + /* y!=zero: result is NaN if either arg is NaN */ if(ix > 0x7ff00000 || ((ix==0x7ff00000)&&(lx!=0)) || iy > 0x7ff00000 || ((iy==0x7ff00000)&&(ly!=0))) @@ -138,7 +141,7 @@ __ieee754_pow(double x, double y) if(ly==0) { if (iy==0x7ff00000) { /* y is +-inf */ if(((ix-0x3ff00000)|lx)==0) - return y - y; /* inf**+-1 is NaN */ + return one; /* (-1)**+-inf is NaN */ else if (ix >= 0x3ff00000)/* (|x|>1)**+-inf = inf,0 */ return (hy>=0)? y: zero; else /* (|x|<1)**-,+inf = inf,0 */ diff --git a/lib/msun/src/e_powf.c b/lib/msun/src/e_powf.c index 466ed72..5c46478 100644 --- a/lib/msun/src/e_powf.c +++ b/lib/msun/src/e_powf.c @@ -67,6 +67,9 @@ __ieee754_powf(float x, float y) /* y==zero: x**0 = 1 */ if(iy==0) return one; + /* x==1: 1**y = 1, even if y is NaN */ + if (hx==0x3f800000) return one; + /* y!=zero: result is NaN if either arg is NaN */ if(ix > 0x7f800000 || iy > 0x7f800000) @@ -90,7 +93,7 @@ __ieee754_powf(float x, float y) /* special value of y */ if (iy==0x7f800000) { /* y is +-inf */ if (ix==0x3f800000) - return y - y; /* inf**+-1 is NaN */ + return one; /* (-1)**+-inf is NaN */ else if (ix > 0x3f800000)/* (|x|>1)**+-inf = inf,0 */ return (hy>=0)? y: zero; else /* (|x|<1)**-,+inf = inf,0 */ diff --git a/lib/msun/src/e_sinh.c b/lib/msun/src/e_sinh.c index afb8e43..17442d0 100644 --- a/lib/msun/src/e_sinh.c +++ b/lib/msun/src/e_sinh.c @@ -40,9 +40,8 @@ static const double one = 1.0, shuge = 1.0e307; double __ieee754_sinh(double x) { - double t,w,h; + double t,h; int32_t ix,jx; - u_int32_t lx; /* High word of |x|. */ GET_HIGH_WORD(jx,x); @@ -66,12 +65,8 @@ __ieee754_sinh(double x) if (ix < 0x40862E42) return h*__ieee754_exp(fabs(x)); /* |x| in [log(maxdouble), overflowthresold] */ - GET_LOW_WORD(lx,x); - if (ix<0x408633CE || ((ix==0x408633ce)&&(lx<=(u_int32_t)0x8fb9f87d))) { - w = __ieee754_exp(0.5*fabs(x)); - t = h*w; - return t*w; - } + if (ix<=0x408633CE) + return h*2.0*__ldexp_exp(fabs(x), -1); /* |x| > overflowthresold, sinh(x) overflow */ return x*shuge; diff --git a/lib/msun/src/e_sinhf.c b/lib/msun/src/e_sinhf.c index 0f96b2b..1be2dc3 100644 --- a/lib/msun/src/e_sinhf.c +++ b/lib/msun/src/e_sinhf.c @@ -24,7 +24,7 @@ static const float one = 1.0, shuge = 1.0e37; float __ieee754_sinhf(float x) { - float t,w,h; + float t,h; int32_t ix,jx; GET_FLOAT_WORD(jx,x); @@ -48,11 +48,8 @@ __ieee754_sinhf(float x) if (ix < 0x42b17217) return h*__ieee754_expf(fabsf(x)); /* |x| in [logf(maxfloat), overflowthresold] */ - if (ix<=0x42b2d4fc) { - w = __ieee754_expf((float)0.5*fabsf(x)); - t = h*w; - return t*w; - } + if (ix<=0x42b2d4fc) + return h*2.0F*__ldexp_expf(fabsf(x), -1); /* |x| > overflowthresold, sinh(x) overflow */ return x*shuge; diff --git a/lib/msun/src/k_exp.c b/lib/msun/src/k_exp.c new file mode 100644 index 0000000..f592f69 --- /dev/null +++ b/lib/msun/src/k_exp.c @@ -0,0 +1,108 @@ +/*- + * Copyright (c) 2011 David Schultz <das@FreeBSD.ORG> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <complex.h> + +#include "math.h" +#include "math_private.h" + +static const uint32_t k = 1799; /* constant for reduction */ +static const double kln2 = 1246.97177782734161156; /* k * ln2 */ + +/* + * Compute exp(x), scaled to avoid spurious overflow. An exponent is + * returned separately in 'expt'. + * + * Input: ln(DBL_MAX) <= x < ln(2 * DBL_MAX / DBL_MIN_DENORM) ~= 1454.91 + * Output: 2**1023 <= y < 2**1024 + */ +static double +__frexp_exp(double x, int *expt) +{ + double exp_x; + uint32_t hx; + + /* + * We use exp(x) = exp(x - kln2) * 2**k, carefully chosen to + * minimize |exp(kln2) - 2**k|. We also scale the exponent of + * exp_x to MAX_EXP so that the result can be multiplied by + * a tiny number without losing accuracy due to denormalization. + */ + exp_x = exp(x - kln2); + GET_HIGH_WORD(hx, exp_x); + *expt = (hx >> 20) - (0x3ff + 1023) + k; + SET_HIGH_WORD(exp_x, (hx & 0xfffff) | ((0x3ff + 1023) << 20)); + return (exp_x); +} + +/* + * __ldexp_exp(x, expt) and __ldexp_cexp(x, expt) compute exp(x) * 2**expt. + * They are intended for large arguments (real part >= ln(DBL_MAX)) + * where care is needed to avoid overflow. + * + * The present implementation is narrowly tailored for our hyperbolic and + * exponential functions. We assume expt is small (0 or -1), and the caller + * has filtered out very large x, for which overflow would be inevitable. + */ + +double +__ldexp_exp(double x, int expt) +{ + double exp_x, scale; + int ex_expt; + + exp_x = __frexp_exp(x, &ex_expt); + expt += ex_expt; + INSERT_WORDS(scale, (0x3ff + expt) << 20, 0); + return (exp_x * scale); +} + +double complex +__ldexp_cexp(double complex z, int expt) +{ + double x, y, exp_x, scale1, scale2; + int ex_expt, half_expt; + + x = creal(z); + y = cimag(z); + exp_x = __frexp_exp(x, &ex_expt); + expt += ex_expt; + + /* + * Arrange so that scale1 * scale2 == 2**expt. We use this to + * compensate for scalbn being horrendously slow. + */ + half_expt = expt / 2; + INSERT_WORDS(scale1, (0x3ff + half_expt) << 20, 0); + half_expt = expt - half_expt; + INSERT_WORDS(scale2, (0x3ff + half_expt) << 20, 0); + + return (cpack(cos(y) * exp_x * scale1 * scale2, + sin(y) * exp_x * scale1 * scale2)); +} diff --git a/lib/msun/src/k_expf.c b/lib/msun/src/k_expf.c new file mode 100644 index 0000000..a860b9f --- /dev/null +++ b/lib/msun/src/k_expf.c @@ -0,0 +1,87 @@ +/*- + * Copyright (c) 2011 David Schultz <das@FreeBSD.ORG> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <complex.h> + +#include "math.h" +#include "math_private.h" + +static const uint32_t k = 235; /* constant for reduction */ +static const float kln2 = 162.88958740F; /* k * ln2 */ + +/* + * See k_exp.c for details. + * + * Input: ln(FLT_MAX) <= x < ln(2 * FLT_MAX / FLT_MIN_DENORM) ~= 192.7 + * Output: 2**127 <= y < 2**128 + */ +static float +__frexp_expf(float x, int *expt) +{ + double exp_x; + uint32_t hx; + + exp_x = expf(x - kln2); + GET_FLOAT_WORD(hx, exp_x); + *expt = (hx >> 23) - (0x7f + 127) + k; + SET_FLOAT_WORD(exp_x, (hx & 0x7fffff) | ((0x7f + 127) << 23)); + return (exp_x); +} + +float +__ldexp_expf(float x, int expt) +{ + float exp_x, scale; + int ex_expt; + + exp_x = __frexp_expf(x, &ex_expt); + expt += ex_expt; + SET_FLOAT_WORD(scale, (0x7f + expt) << 23); + return (exp_x * scale); +} + +float complex +__ldexp_cexpf(float complex z, int expt) +{ + float x, y, exp_x, scale1, scale2; + int ex_expt, half_expt; + + x = crealf(z); + y = cimagf(z); + exp_x = __frexp_expf(x, &ex_expt); + expt += ex_expt; + + half_expt = expt / 2; + SET_FLOAT_WORD(scale1, (0x7f + half_expt) << 23); + half_expt = expt - half_expt; + SET_FLOAT_WORD(scale2, (0x7f + half_expt) << 23); + + return (cpackf(cosf(y) * exp_x * scale1 * scale2, + sinf(y) * exp_x * scale1 * scale2)); +} diff --git a/lib/msun/src/k_log.h b/lib/msun/src/k_log.h index 206355c..aaff8bd 100644 --- a/lib/msun/src/k_log.h +++ b/lib/msun/src/k_log.h @@ -14,8 +14,9 @@ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); -/* __kernel_log(x) - * Return log(x) - (x-1) for x in ~[sqrt(2)/2, sqrt(2)]. +/* + * k_log1p(f): + * Return log(1+f) - f for 1+f in ~[sqrt(2)/2, sqrt(2)]. * * The following describes the overall strategy for computing * logarithms in base e. The argument reduction and adding the final @@ -80,37 +81,20 @@ Lg6 = 1.531383769920937332e-01, /* 3FC39A09 D078C69F */ Lg7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ /* - * We always inline __kernel_log(), since doing so produces a + * We always inline k_log1p(), since doing so produces a * substantial performance improvement (~40% on amd64). */ static inline double -__kernel_log(double x) +k_log1p(double f) { - double hfsq,f,s,z,R,w,t1,t2; - int32_t hx,i,j; - u_int32_t lx; - - EXTRACT_WORDS(hx,lx,x); + double hfsq,s,z,R,w,t1,t2; - f = x-1.0; - if((0x000fffff&(2+hx))<3) { /* -2**-20 <= f < 2**-20 */ - if(f==0.0) return 0.0; - return f*f*(0.33333333333333333*f-0.5); - } - s = f/(2.0+f); + s = f/(2.0+f); z = s*s; - hx &= 0x000fffff; - i = hx-0x6147a; w = z*z; - j = 0x6b851-hx; - t1= w*(Lg2+w*(Lg4+w*Lg6)); - t2= z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7))); - i |= j; + t1= w*(Lg2+w*(Lg4+w*Lg6)); + t2= z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7))); R = t2+t1; - if (i>0) { - hfsq=0.5*f*f; - return s*(hfsq+R) - hfsq; - } else { - return s*(R-f); - } + hfsq=0.5*f*f; + return s*(hfsq+R); } diff --git a/lib/msun/src/k_logf.h b/lib/msun/src/k_logf.h index d9f0f3d..71c547e 100644 --- a/lib/msun/src/k_logf.h +++ b/lib/msun/src/k_logf.h @@ -13,7 +13,7 @@ __FBSDID("$FreeBSD$"); /* - * float version of __kernel_log(x). See k_log.c for details. + * Float version of k_log.h. See the latter for most comments. */ static const float @@ -24,32 +24,16 @@ Lg3 = 0x91e9ee.0p-25, /* 0.28498786688 */ Lg4 = 0xf89e26.0p-26; /* 0.24279078841 */ static inline float -__kernel_logf(float x) +k_log1pf(float f) { - float hfsq,f,s,z,R,w,t1,t2; - int32_t ix,i,j; + float hfsq,s,z,R,w,t1,t2; - GET_FLOAT_WORD(ix,x); - - f = x-(float)1.0; - if((0x007fffff&(0x8000+ix))<0xc000) { /* -2**-9 <= f < 2**-9 */ - if(f==0.0f) return 0.0f; - return f*f*((float)0.33333333333333333*f-(float)0.5); - } s = f/((float)2.0+f); z = s*s; - ix &= 0x007fffff; - i = ix-(0x6147a<<3); w = z*z; - j = (0x6b851<<3)-ix; t1= w*(Lg2+w*Lg4); t2= z*(Lg1+w*Lg3); - i |= j; R = t2+t1; - if(i>0) { - hfsq=(float)0.5*f*f; - return s*(hfsq+R) - hfsq; - } else { - return s*(R-f); - } + hfsq=(float)0.5*f*f; + return s*(hfsq+R); } diff --git a/lib/msun/src/math.h b/lib/msun/src/math.h index 8ad13ed..69b138e 100644 --- a/lib/msun/src/math.h +++ b/lib/msun/src/math.h @@ -68,14 +68,11 @@ extern const union __nan_un { #define MATH_ERREXCEPT 2 #define math_errhandling MATH_ERREXCEPT -/* XXX We need a <machine/math.h>. */ -#if defined(__ia64__) || defined(__sparc64__) -#define FP_FAST_FMA 1 -#endif +#define FP_FAST_FMAF 1 #ifdef __ia64__ +#define FP_FAST_FMA 1 #define FP_FAST_FMAL 1 #endif -#define FP_FAST_FMAF 1 /* Symbolic constants to classify floating point numbers. */ #define FP_INFINITE 0x01 diff --git a/lib/msun/src/math_private.h b/lib/msun/src/math_private.h index d79f808..79280e3 100644 --- a/lib/msun/src/math_private.h +++ b/lib/msun/src/math_private.h @@ -58,6 +58,10 @@ typedef union u_int32_t msw; u_int32_t lsw; } parts; + struct + { + u_int64_t w; + } xparts; } ieee_double_shape_type; #endif @@ -72,6 +76,10 @@ typedef union u_int32_t lsw; u_int32_t msw; } parts; + struct + { + u_int64_t w; + } xparts; } ieee_double_shape_type; #endif @@ -86,6 +94,14 @@ do { \ (ix1) = ew_u.parts.lsw; \ } while (0) +/* Get a 64-bit int from a double. */ +#define EXTRACT_WORD64(ix,d) \ +do { \ + ieee_double_shape_type ew_u; \ + ew_u.value = (d); \ + (ix) = ew_u.xparts.w; \ +} while (0) + /* Get the more significant 32 bit int from a double. */ #define GET_HIGH_WORD(i,d) \ @@ -114,6 +130,14 @@ do { \ (d) = iw_u.value; \ } while (0) +/* Set a double from a 64-bit int. */ +#define INSERT_WORD64(d,ix) \ +do { \ + ieee_double_shape_type iw_u; \ + iw_u.xparts.w = (ix); \ + (d) = iw_u.value; \ +} while (0) + /* Set the more significant 32 bits of a double from an int. */ #define SET_HIGH_WORD(d,v) \ @@ -164,6 +188,25 @@ do { \ (d) = sf_u.value; \ } while (0) +/* Get expsign as a 16 bit int from a long double. */ + +#define GET_LDBL_EXPSIGN(i,d) \ +do { \ + union IEEEl2bits ge_u; \ + ge_u.e = (d); \ + (i) = ge_u.xbits.expsign; \ +} while (0) + +/* Set expsign of a long double from a 16 bit int. */ + +#define SET_LDBL_EXPSIGN(d,v) \ +do { \ + union IEEEl2bits se_u; \ + se_u.e = (d); \ + se_u.xbits.expsign = (v); \ + (d) = se_u.e; \ +} while (0) + #ifdef FLT_EVAL_METHOD /* * Attempt to get strict C99 semantics for assignment with non-C99 compilers. @@ -354,6 +397,10 @@ int __ieee754_rem_pio2(double,double*); double __kernel_sin(double,double,int); double __kernel_cos(double,double); double __kernel_tan(double,double,int); +double __ldexp_exp(double,int); +#ifdef _COMPLEX_H +double complex __ldexp_cexp(double complex,int); +#endif /* float precision kernel functions */ #ifdef INLINE_REM_PIO2F @@ -372,6 +419,10 @@ float __kernel_cosdf(double); __inline #endif float __kernel_tandf(double,int); +float __ldexp_expf(float,int); +#ifdef _COMPLEX_H +float complex __ldexp_cexpf(float complex,int); +#endif /* long double precision kernel functions */ long double __kernel_sinl(long double, long double, int); diff --git a/lib/msun/src/s_ccosh.c b/lib/msun/src/s_ccosh.c new file mode 100644 index 0000000..9ea962b --- /dev/null +++ b/lib/msun/src/s_ccosh.c @@ -0,0 +1,155 @@ +/*- + * Copyright (c) 2005 Bruce D. Evans and Steven G. Kargl + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Hyperbolic cosine of a complex argument z = x + i y. + * + * cosh(z) = cosh(x+iy) + * = cosh(x) cos(y) + i sinh(x) sin(y). + * + * Exceptional values are noted in the comments within the source code. + * These values and the return value were taken from n1124.pdf. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <complex.h> +#include <math.h> + +#include "math_private.h" + +static const double huge = 0x1p1023; + +double complex +ccosh(double complex z) +{ + double x, y, h; + int32_t hx, hy, ix, iy, lx, ly; + + x = creal(z); + y = cimag(z); + + EXTRACT_WORDS(hx, lx, x); + EXTRACT_WORDS(hy, ly, y); + + ix = 0x7fffffff & hx; + iy = 0x7fffffff & hy; + + /* Handle the nearly-non-exceptional cases where x and y are finite. */ + if (ix < 0x7ff00000 && iy < 0x7ff00000) { + if ((iy | ly) == 0) + return (cpack(cosh(x), x * y)); + if (ix < 0x40360000) /* small x: normal case */ + return (cpack(cosh(x) * cos(y), sinh(x) * sin(y))); + + /* |x| >= 22, so cosh(x) ~= exp(|x|) */ + if (ix < 0x40862e42) { + /* x < 710: exp(|x|) won't overflow */ + h = exp(fabs(x)) * 0.5; + return (cpack(h * cos(y), copysign(h, x) * sin(y))); + } else if (ix < 0x4096bbaa) { + /* x < 1455: scale to avoid overflow */ + z = __ldexp_cexp(cpack(fabs(x), y), -1); + return (cpack(creal(z), cimag(z) * copysign(1, x))); + } else { + /* x >= 1455: the result always overflows */ + h = huge * x; + return (cpack(h * h * cos(y), h * sin(y))); + } + } + + /* + * cosh(+-0 +- I Inf) = dNaN + I sign(d(+-0, dNaN))0. + * The sign of 0 in the result is unspecified. Choice = normally + * the same as dNaN. Raise the invalid floating-point exception. + * + * cosh(+-0 +- I NaN) = d(NaN) + I sign(d(+-0, NaN))0. + * The sign of 0 in the result is unspecified. Choice = normally + * the same as d(NaN). + */ + if ((ix | lx) == 0 && iy >= 0x7ff00000) + return (cpack(y - y, copysign(0, x * (y - y)))); + + /* + * cosh(+-Inf +- I 0) = +Inf + I (+-)(+-)0. + * + * cosh(NaN +- I 0) = d(NaN) + I sign(d(NaN, +-0))0. + * The sign of 0 in the result is unspecified. + */ + if ((iy | ly) == 0 && ix >= 0x7ff00000) { + if (((hx & 0xfffff) | lx) == 0) + return (cpack(x * x, copysign(0, x) * y)); + return (cpack(x * x, copysign(0, (x + x) * y))); + } + + /* + * cosh(x +- I Inf) = dNaN + I dNaN. + * Raise the invalid floating-point exception for finite nonzero x. + * + * cosh(x + I NaN) = d(NaN) + I d(NaN). + * Optionally raises the invalid floating-point exception for finite + * nonzero x. Choice = don't raise (except for signaling NaNs). + */ + if (ix < 0x7ff00000 && iy >= 0x7ff00000) + return (cpack(y - y, x * (y - y))); + + /* + * cosh(+-Inf + I NaN) = +Inf + I d(NaN). + * + * cosh(+-Inf +- I Inf) = +Inf + I dNaN. + * The sign of Inf in the result is unspecified. Choice = always +. + * Raise the invalid floating-point exception. + * + * cosh(+-Inf + I y) = +Inf cos(y) +- I Inf sin(y) + */ + if (ix >= 0x7ff00000 && ((hx & 0xfffff) | lx) == 0) { + if (iy >= 0x7ff00000) + return (cpack(x * x, x * (y - y))); + return (cpack((x * x) * cos(y), x * sin(y))); + } + + /* + * cosh(NaN + I NaN) = d(NaN) + I d(NaN). + * + * cosh(NaN +- I Inf) = d(NaN) + I d(NaN). + * Optionally raises the invalid floating-point exception. + * Choice = raise. + * + * cosh(NaN + I y) = d(NaN) + I d(NaN). + * Optionally raises the invalid floating-point exception for finite + * nonzero y. Choice = don't raise (except for signaling NaNs). + */ + return (cpack((x * x) * (y - y), (x + x) * (y - y))); +} + +double complex +ccos(double complex z) +{ + + /* ccos(z) = ccosh(I * z) */ + return (ccosh(cpack(-cimag(z), creal(z)))); +} diff --git a/lib/msun/src/s_ccoshf.c b/lib/msun/src/s_ccoshf.c new file mode 100644 index 0000000..1de9ad4 --- /dev/null +++ b/lib/msun/src/s_ccoshf.c @@ -0,0 +1,104 @@ +/*- + * Copyright (c) 2005 Bruce D. Evans and Steven G. Kargl + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Hyperbolic cosine of a complex argument. See s_ccosh.c for details. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <complex.h> +#include <math.h> + +#include "math_private.h" + +static const float huge = 0x1p127; + +float complex +ccoshf(float complex z) +{ + float x, y, h; + int32_t hx, hy, ix, iy; + + x = crealf(z); + y = cimagf(z); + + GET_FLOAT_WORD(hx, x); + GET_FLOAT_WORD(hy, y); + + ix = 0x7fffffff & hx; + iy = 0x7fffffff & hy; + + if (ix < 0x7f800000 && iy < 0x7f800000) { + if (iy == 0) + return (cpackf(coshf(x), x * y)); + if (ix < 0x41100000) /* small x: normal case */ + return (cpackf(coshf(x) * cosf(y), sinhf(x) * sinf(y))); + + /* |x| >= 9, so cosh(x) ~= exp(|x|) */ + if (ix < 0x42b17218) { + /* x < 88.7: expf(|x|) won't overflow */ + h = expf(fabsf(x)) * 0.5f; + return (cpackf(h * cosf(y), copysignf(h, x) * sinf(y))); + } else if (ix < 0x4340b1e7) { + /* x < 192.7: scale to avoid overflow */ + z = __ldexp_cexpf(cpackf(fabsf(x), y), -1); + return (cpackf(crealf(z), cimagf(z) * copysignf(1, x))); + } else { + /* x >= 192.7: the result always overflows */ + h = huge * x; + return (cpackf(h * h * cosf(y), h * sinf(y))); + } + } + + if (ix == 0 && iy >= 0x7f800000) + return (cpackf(y - y, copysignf(0, x * (y - y)))); + + if (iy == 0 && ix >= 0x7f800000) { + if ((hx & 0x7fffff) == 0) + return (cpackf(x * x, copysignf(0, x) * y)); + return (cpackf(x * x, copysignf(0, (x + x) * y))); + } + + if (ix < 0x7f800000 && iy >= 0x7f800000) + return (cpackf(y - y, x * (y - y))); + + if (ix >= 0x7f800000 && (hx & 0x7fffff) == 0) { + if (iy >= 0x7f800000) + return (cpackf(x * x, x * (y - y))); + return (cpackf((x * x) * cosf(y), x * sinf(y))); + } + + return (cpackf((x * x) * (y - y), (x + x) * (y - y))); +} + +float complex +ccosf(float complex z) +{ + + return (ccoshf(cpackf(-cimagf(z), crealf(z)))); +} diff --git a/lib/msun/src/s_cexp.c b/lib/msun/src/s_cexp.c index ecf0992..abe178f 100644 --- a/lib/msun/src/s_cexp.c +++ b/lib/msun/src/s_cexp.c @@ -34,18 +34,13 @@ __FBSDID("$FreeBSD$"); static const uint32_t exp_ovfl = 0x40862e42, /* high bits of MAX_EXP * ln2 ~= 710 */ -cexp_ovfl = 0x4096b8e4, /* (MAX_EXP - MIN_DENORM_EXP) * ln2 */ -k = 1799; /* constant for reduction */ - -static const double -kln2 = 1246.97177782734161156; /* k * ln2 */ +cexp_ovfl = 0x4096b8e4; /* (MAX_EXP - MIN_DENORM_EXP) * ln2 */ double complex cexp(double complex z) { double x, y, exp_x; uint32_t hx, hy, lx, ly; - int scale; x = creal(z); y = cimag(z); @@ -56,8 +51,12 @@ cexp(double complex z) /* cexp(x + I 0) = exp(x) + I 0 */ if ((hy | ly) == 0) return (cpack(exp(x), y)); + EXTRACT_WORDS(hx, lx, x); + /* cexp(0 + I y) = cos(y) + I sin(y) */ + if (((hx & 0x7fffffff) | lx) == 0) + return (cpack(cos(y), sin(y))); + if (hy >= 0x7ff00000) { - EXTRACT_WORDS(hx, lx, x); if (lx != 0 || (hx & 0x7fffffff) != 0x7ff00000) { /* cexp(finite|NaN +- I Inf|NaN) = NaN + I NaN */ return (cpack(y - y, y - y)); @@ -70,21 +69,12 @@ cexp(double complex z) } } - GET_HIGH_WORD(hx, x); if (hx >= exp_ovfl && hx <= cexp_ovfl) { /* * x is between 709.7 and 1454.3, so we must scale to avoid - * overflow in exp(x). We use exp(x) = exp(x - kln2) * 2**k, - * carefully chosen to minimize |exp(kln2) - 2**k|. We also - * scale the exponent of exp(x) to MANT_DIG to avoid loss of - * accuracy due to underflow if sin(y) is tiny. + * overflow in exp(x). */ - exp_x = exp(x - kln2); - GET_HIGH_WORD(hx, exp_x); - SET_HIGH_WORD(exp_x, (hx & 0xfffff) | ((0x3ff + 52) << 20)); - scale = (hx >> 20) - (0x3ff + 52) + k; - return (cpack(scalbn(cos(y) * exp_x, scale), - scalbn(sin(y) * exp_x, scale))); + return (__ldexp_cexp(z, 0)); } else { /* * Cases covered here: diff --git a/lib/msun/src/s_cexpf.c b/lib/msun/src/s_cexpf.c index 4ea3931..0e30d08 100644 --- a/lib/msun/src/s_cexpf.c +++ b/lib/msun/src/s_cexpf.c @@ -34,18 +34,13 @@ __FBSDID("$FreeBSD$"); static const uint32_t exp_ovfl = 0x42b17218, /* MAX_EXP * ln2 ~= 88.722839355 */ -cexp_ovfl = 0x43400074, /* (MAX_EXP - MIN_DENORM_EXP) * ln2 */ -k = 235; /* constant for reduction */ - -static const float -kln2 = 162.88958740f; /* k * ln2 */ +cexp_ovfl = 0x43400074; /* (MAX_EXP - MIN_DENORM_EXP) * ln2 */ float complex cexpf(float complex z) { float x, y, exp_x; uint32_t hx, hy; - int scale; x = crealf(z); y = cimagf(z); @@ -57,6 +52,10 @@ cexpf(float complex z) if (hy == 0) return (cpackf(expf(x), y)); GET_FLOAT_WORD(hx, x); + /* cexp(0 + I y) = cos(y) + I sin(y) */ + if ((hx & 0x7fffffff) == 0) + return (cpackf(cosf(y), sinf(y))); + if (hy >= 0x7f800000) { if ((hx & 0x7fffffff) != 0x7f800000) { /* cexp(finite|NaN +- I Inf|NaN) = NaN + I NaN */ @@ -73,17 +72,9 @@ cexpf(float complex z) if (hx >= exp_ovfl && hx <= cexp_ovfl) { /* * x is between 88.7 and 192, so we must scale to avoid - * overflow in expf(x). We use exp(x) = exp(x - kln2) * 2**k, - * carefully chosen to minimize |exp(kln2) - 2**k|. We also - * scale the exponent of exp(x) to MANT_DIG to avoid loss of - * accuracy due to underflow if sin(y) is tiny. + * overflow in expf(x). */ - exp_x = expf(x - kln2); - GET_FLOAT_WORD(hx, exp_x); - SET_FLOAT_WORD(exp_x, (hx & 0x7fffff) | ((0x7f + 23) << 23)); - scale = (hx >> 23) - (0x7f + 23) + k; - return (cpackf(scalbnf(cosf(y) * exp_x, scale), - scalbnf(sinf(y) * exp_x, scale))); + return (__ldexp_cexpf(z, 0)); } else { /* * Cases covered here: diff --git a/lib/msun/src/s_csinh.c b/lib/msun/src/s_csinh.c new file mode 100644 index 0000000..c192f30 --- /dev/null +++ b/lib/msun/src/s_csinh.c @@ -0,0 +1,157 @@ +/*- + * Copyright (c) 2005 Bruce D. Evans and Steven G. Kargl + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Hyperbolic sine of a complex argument z = x + i y. + * + * sinh(z) = sinh(x+iy) + * = sinh(x) cos(y) + i cosh(x) sin(y). + * + * Exceptional values are noted in the comments within the source code. + * These values and the return value were taken from n1124.pdf. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <complex.h> +#include <math.h> + +#include "math_private.h" + +static const double huge = 0x1p1023; + +double complex +csinh(double complex z) +{ + double x, y, h; + int32_t hx, hy, ix, iy, lx, ly; + + x = creal(z); + y = cimag(z); + + EXTRACT_WORDS(hx, lx, x); + EXTRACT_WORDS(hy, ly, y); + + ix = 0x7fffffff & hx; + iy = 0x7fffffff & hy; + + /* Handle the nearly-non-exceptional cases where x and y are finite. */ + if (ix < 0x7ff00000 && iy < 0x7ff00000) { + if ((iy | ly) == 0) + return (cpack(sinh(x), y)); + if (ix < 0x40360000) /* small x: normal case */ + return (cpack(sinh(x) * cos(y), cosh(x) * sin(y))); + + /* |x| >= 22, so cosh(x) ~= exp(|x|) */ + if (ix < 0x40862e42) { + /* x < 710: exp(|x|) won't overflow */ + h = exp(fabs(x)) * 0.5; + return (cpack(copysign(h, x) * cos(y), h * sin(y))); + } else if (ix < 0x4096bbaa) { + /* x < 1455: scale to avoid overflow */ + z = __ldexp_cexp(cpack(fabs(x), y), -1); + return (cpack(creal(z) * copysign(1, x), cimag(z))); + } else { + /* x >= 1455: the result always overflows */ + h = huge * x; + return (cpack(h * cos(y), h * h * sin(y))); + } + } + + /* + * sinh(+-0 +- I Inf) = sign(d(+-0, dNaN))0 + I dNaN. + * The sign of 0 in the result is unspecified. Choice = normally + * the same as dNaN. Raise the invalid floating-point exception. + * + * sinh(+-0 +- I NaN) = sign(d(+-0, NaN))0 + I d(NaN). + * The sign of 0 in the result is unspecified. Choice = normally + * the same as d(NaN). + */ + if ((ix | lx) == 0 && iy >= 0x7ff00000) + return (cpack(copysign(0, x * (y - y)), y - y)); + + /* + * sinh(+-Inf +- I 0) = +-Inf + I +-0. + * + * sinh(NaN +- I 0) = d(NaN) + I +-0. + */ + if ((iy | ly) == 0 && ix >= 0x7ff00000) { + if (((hx & 0xfffff) | lx) == 0) + return (cpack(x, y)); + return (cpack(x, copysign(0, y))); + } + + /* + * sinh(x +- I Inf) = dNaN + I dNaN. + * Raise the invalid floating-point exception for finite nonzero x. + * + * sinh(x + I NaN) = d(NaN) + I d(NaN). + * Optionally raises the invalid floating-point exception for finite + * nonzero x. Choice = don't raise (except for signaling NaNs). + */ + if (ix < 0x7ff00000 && iy >= 0x7ff00000) + return (cpack(y - y, x * (y - y))); + + /* + * sinh(+-Inf + I NaN) = +-Inf + I d(NaN). + * The sign of Inf in the result is unspecified. Choice = normally + * the same as d(NaN). + * + * sinh(+-Inf +- I Inf) = +Inf + I dNaN. + * The sign of Inf in the result is unspecified. Choice = always +. + * Raise the invalid floating-point exception. + * + * sinh(+-Inf + I y) = +-Inf cos(y) + I Inf sin(y) + */ + if (ix >= 0x7ff00000 && ((hx & 0xfffff) | lx) == 0) { + if (iy >= 0x7ff00000) + return (cpack(x * x, x * (y - y))); + return (cpack(x * cos(y), INFINITY * sin(y))); + } + + /* + * sinh(NaN + I NaN) = d(NaN) + I d(NaN). + * + * sinh(NaN +- I Inf) = d(NaN) + I d(NaN). + * Optionally raises the invalid floating-point exception. + * Choice = raise. + * + * sinh(NaN + I y) = d(NaN) + I d(NaN). + * Optionally raises the invalid floating-point exception for finite + * nonzero y. Choice = don't raise (except for signaling NaNs). + */ + return (cpack((x * x) * (y - y), (x + x) * (y - y))); +} + +double complex +csin(double complex z) +{ + + /* csin(z) = -I * csinh(I * z) */ + z = csinh(cpack(-cimag(z), creal(z))); + return (cpack(cimag(z), -creal(z))); +} diff --git a/lib/msun/src/s_csinhf.c b/lib/msun/src/s_csinhf.c new file mode 100644 index 0000000..c523125 --- /dev/null +++ b/lib/msun/src/s_csinhf.c @@ -0,0 +1,105 @@ +/*- + * Copyright (c) 2005 Bruce D. Evans and Steven G. Kargl + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Hyperbolic sine of a complex argument z. See s_csinh.c for details. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <complex.h> +#include <math.h> + +#include "math_private.h" + +static const float huge = 0x1p127; + +float complex +csinhf(float complex z) +{ + float x, y, h; + int32_t hx, hy, ix, iy; + + x = crealf(z); + y = cimagf(z); + + GET_FLOAT_WORD(hx, x); + GET_FLOAT_WORD(hy, y); + + ix = 0x7fffffff & hx; + iy = 0x7fffffff & hy; + + if (ix < 0x7f800000 && iy < 0x7f800000) { + if (iy == 0) + return (cpackf(sinhf(x), y)); + if (ix < 0x41100000) /* small x: normal case */ + return (cpackf(sinhf(x) * cosf(y), coshf(x) * sinf(y))); + + /* |x| >= 9, so cosh(x) ~= exp(|x|) */ + if (ix < 0x42b17218) { + /* x < 88.7: expf(|x|) won't overflow */ + h = expf(fabsf(x)) * 0.5f; + return (cpackf(copysignf(h, x) * cosf(y), h * sinf(y))); + } else if (ix < 0x4340b1e7) { + /* x < 192.7: scale to avoid overflow */ + z = __ldexp_cexpf(cpackf(fabsf(x), y), -1); + return (cpackf(crealf(z) * copysignf(1, x), cimagf(z))); + } else { + /* x >= 192.7: the result always overflows */ + h = huge * x; + return (cpackf(h * cosf(y), h * h * sinf(y))); + } + } + + if (ix == 0 && iy >= 0x7f800000) + return (cpackf(copysignf(0, x * (y - y)), y - y)); + + if (iy == 0 && ix >= 0x7f800000) { + if ((hx & 0x7fffff) == 0) + return (cpackf(x, y)); + return (cpackf(x, copysignf(0, y))); + } + + if (ix < 0x7f800000 && iy >= 0x7f800000) + return (cpackf(y - y, x * (y - y))); + + if (ix >= 0x7f800000 && (hx & 0x7fffff) == 0) { + if (iy >= 0x7f800000) + return (cpackf(x * x, x * (y - y))); + return (cpackf(x * cosf(y), INFINITY * sinf(y))); + } + + return (cpackf((x * x) * (y - y), (x + x) * (y - y))); +} + +float complex +csinf(float complex z) +{ + + z = csinhf(cpackf(-cimagf(z), crealf(z))); + return (cpackf(cimagf(z), -crealf(z))); +} diff --git a/lib/msun/src/s_ctanh.c b/lib/msun/src/s_ctanh.c new file mode 100644 index 0000000..d427e28 --- /dev/null +++ b/lib/msun/src/s_ctanh.c @@ -0,0 +1,144 @@ +/*- + * Copyright (c) 2011 David Schultz + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Hyperbolic tangent of a complex argument z = x + i y. + * + * The algorithm is from: + * + * W. Kahan. Branch Cuts for Complex Elementary Functions or Much + * Ado About Nothing's Sign Bit. In The State of the Art in + * Numerical Analysis, pp. 165 ff. Iserles and Powell, eds., 1987. + * + * Method: + * + * Let t = tan(x) + * beta = 1/cos^2(y) + * s = sinh(x) + * rho = cosh(x) + * + * We have: + * + * tanh(z) = sinh(z) / cosh(z) + * + * sinh(x) cos(y) + i cosh(x) sin(y) + * = --------------------------------- + * cosh(x) cos(y) + i sinh(x) sin(y) + * + * cosh(x) sinh(x) / cos^2(y) + i tan(y) + * = ------------------------------------- + * 1 + sinh^2(x) / cos^2(y) + * + * beta rho s + i t + * = ---------------- + * 1 + beta s^2 + * + * Modifications: + * + * I omitted the original algorithm's handling of overflow in tan(x) after + * verifying with nearpi.c that this can't happen in IEEE single or double + * precision. I also handle large x differently. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <complex.h> +#include <math.h> + +#include "math_private.h" + +double complex +ctanh(double complex z) +{ + double x, y; + double t, beta, s, rho, denom; + uint32_t hx, ix, lx; + + x = creal(z); + y = cimag(z); + + EXTRACT_WORDS(hx, lx, x); + ix = hx & 0x7fffffff; + + /* + * ctanh(NaN + i 0) = NaN + i 0 + * + * ctanh(NaN + i y) = NaN + i NaN for y != 0 + * + * The imaginary part has the sign of x*sin(2*y), but there's no + * special effort to get this right. + * + * ctanh(+-Inf +- i Inf) = +-1 +- 0 + * + * ctanh(+-Inf + i y) = +-1 + 0 sin(2y) for y finite + * + * The imaginary part of the sign is unspecified. This special + * case is only needed to avoid a spurious invalid exception when + * y is infinite. + */ + if (ix >= 0x7ff00000) { + if ((ix & 0xfffff) | lx) /* x is NaN */ + return (cpack(x, (y == 0 ? y : x * y))); + SET_HIGH_WORD(x, hx - 0x40000000); /* x = copysign(1, x) */ + return (cpack(x, copysign(0, isinf(y) ? y : sin(y) * cos(y)))); + } + + /* + * ctanh(x + i NAN) = NaN + i NaN + * ctanh(x +- i Inf) = NaN + i NaN + */ + if (!isfinite(y)) + return (cpack(y - y, y - y)); + + /* + * ctanh(+-huge + i +-y) ~= +-1 +- i 2sin(2y)/exp(2x), using the + * approximation sinh^2(huge) ~= exp(2*huge) / 4. + * We use a modified formula to avoid spurious overflow. + */ + if (ix >= 0x40360000) { /* x >= 22 */ + double exp_mx = exp(-fabs(x)); + return (cpack(copysign(1, x), + 4 * sin(y) * cos(y) * exp_mx * exp_mx)); + } + + /* Kahan's algorithm */ + t = tan(y); + beta = 1.0 + t * t; /* = 1 / cos^2(y) */ + s = sinh(x); + rho = sqrt(1 + s * s); /* = cosh(x) */ + denom = 1 + beta * s * s; + return (cpack((beta * rho * s) / denom, t / denom)); +} + +double complex +ctan(double complex z) +{ + + /* ctan(z) = -I * ctanh(I * z) */ + z = ctanh(cpack(-cimag(z), creal(z))); + return (cpack(cimag(z), -creal(z))); +} diff --git a/lib/msun/src/s_ctanhf.c b/lib/msun/src/s_ctanhf.c new file mode 100644 index 0000000..4be28d8 --- /dev/null +++ b/lib/msun/src/s_ctanhf.c @@ -0,0 +1,84 @@ +/*- + * Copyright (c) 2011 David Schultz + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Hyperbolic tangent of a complex argument z. See s_ctanh.c for details. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <complex.h> +#include <math.h> + +#include "math_private.h" + +float complex +ctanhf(float complex z) +{ + float x, y; + float t, beta, s, rho, denom; + uint32_t hx, ix; + + x = crealf(z); + y = cimagf(z); + + GET_FLOAT_WORD(hx, x); + ix = hx & 0x7fffffff; + + if (ix >= 0x7f800000) { + if (ix & 0x7fffff) + return (cpackf(x, (y == 0 ? y : x * y))); + SET_FLOAT_WORD(x, hx - 0x40000000); + return (cpackf(x, + copysignf(0, isinf(y) ? y : sinf(y) * cosf(y)))); + } + + if (!isfinite(y)) + return (cpackf(y - y, y - y)); + + if (ix >= 0x41300000) { /* x >= 11 */ + float exp_mx = expf(-fabsf(x)); + return (cpackf(copysignf(1, x), + 4 * sinf(y) * cosf(y) * exp_mx * exp_mx)); + } + + t = tanf(y); + beta = 1.0 + t * t; + s = sinhf(x); + rho = sqrtf(1 + s * s); + denom = 1 + beta * s * s; + return (cpackf((beta * rho * s) / denom, t / denom)); +} + +float complex +ctanf(float complex z) +{ + + z = ctanhf(cpackf(-cimagf(z), crealf(z))); + return (cpackf(cimagf(z), -crealf(z))); +} + diff --git a/lib/msun/src/s_expm1.c b/lib/msun/src/s_expm1.c index 3de7bfb..5aa1917 100644 --- a/lib/msun/src/s_expm1.c +++ b/lib/msun/src/s_expm1.c @@ -108,6 +108,8 @@ __FBSDID("$FreeBSD$"); * to produce the hexadecimal values shown. */ +#include <float.h> + #include "math.h" #include "math_private.h" @@ -135,7 +137,6 @@ expm1(double x) GET_HIGH_WORD(hx,x); xsb = hx&0x80000000; /* sign bit of x */ - if(xsb==0) y=x; else y= -x; /* y = |x| */ hx &= 0x7fffffff; /* high word of |x| */ /* filter out huge and non-finite argument */ @@ -169,7 +170,7 @@ expm1(double x) hi = x - t*ln2_hi; /* t*ln2_hi is exact here */ lo = t*ln2_lo; } - x = hi - lo; + STRICT_ASSIGN(double, x, hi - lo); c = (hi-x)-lo; } else if(hx < 0x3c900000) { /* when |x|<2**-54, return x */ diff --git a/lib/msun/src/s_expm1f.c b/lib/msun/src/s_expm1f.c index 483472c..fb37494 100644 --- a/lib/msun/src/s_expm1f.c +++ b/lib/msun/src/s_expm1f.c @@ -16,6 +16,8 @@ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); +#include <float.h> + #include "math.h" #include "math_private.h" @@ -44,7 +46,6 @@ expm1f(float x) GET_FLOAT_WORD(hx,x); xsb = hx&0x80000000; /* sign bit of x */ - if(xsb==0) y=x; else y= -x; /* y = |x| */ hx &= 0x7fffffff; /* high word of |x| */ /* filter out huge and non-finite argument */ @@ -75,7 +76,7 @@ expm1f(float x) hi = x - t*ln2_hi; /* t*ln2_hi is exact here */ lo = t*ln2_lo; } - x = hi - lo; + STRICT_ASSIGN(float, x, hi - lo); c = (hi-x)-lo; } else if(hx < 0x33000000) { /* when |x|<2**-25, return x */ diff --git a/lib/msun/src/s_fma.c b/lib/msun/src/s_fma.c index ad1fc4a..dfbd13c 100644 --- a/lib/msun/src/s_fma.c +++ b/lib/msun/src/s_fma.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2005 David Schultz <das@FreeBSD.ORG> + * Copyright (c) 2005-2011 David Schultz <das@FreeBSD.ORG> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -31,6 +31,132 @@ __FBSDID("$FreeBSD$"); #include <float.h> #include <math.h> +#include "math_private.h" + +/* + * A struct dd represents a floating-point number with twice the precision + * of a double. We maintain the invariant that "hi" stores the 53 high-order + * bits of the result. + */ +struct dd { + double hi; + double lo; +}; + +/* + * Compute a+b exactly, returning the exact result in a struct dd. We assume + * that both a and b are finite, but make no assumptions about their relative + * magnitudes. + */ +static inline struct dd +dd_add(double a, double b) +{ + struct dd ret; + double s; + + ret.hi = a + b; + s = ret.hi - a; + ret.lo = (a - (ret.hi - s)) + (b - s); + return (ret); +} + +/* + * Compute a+b, with a small tweak: The least significant bit of the + * result is adjusted into a sticky bit summarizing all the bits that + * were lost to rounding. This adjustment negates the effects of double + * rounding when the result is added to another number with a higher + * exponent. For an explanation of round and sticky bits, see any reference + * on FPU design, e.g., + * + * J. Coonen. An Implementation Guide to a Proposed Standard for + * Floating-Point Arithmetic. Computer, vol. 13, no. 1, Jan 1980. + */ +static inline double +add_adjusted(double a, double b) +{ + struct dd sum; + uint64_t hibits, lobits; + + sum = dd_add(a, b); + if (sum.lo != 0) { + EXTRACT_WORD64(hibits, sum.hi); + if ((hibits & 1) == 0) { + /* hibits += (int)copysign(1.0, sum.hi * sum.lo) */ + EXTRACT_WORD64(lobits, sum.lo); + hibits += 1 - ((hibits ^ lobits) >> 62); + INSERT_WORD64(sum.hi, hibits); + } + } + return (sum.hi); +} + +/* + * Compute ldexp(a+b, scale) with a single rounding error. It is assumed + * that the result will be subnormal, and care is taken to ensure that + * double rounding does not occur. + */ +static inline double +add_and_denormalize(double a, double b, int scale) +{ + struct dd sum; + uint64_t hibits, lobits; + int bits_lost; + + sum = dd_add(a, b); + + /* + * If we are losing at least two bits of accuracy to denormalization, + * then the first lost bit becomes a round bit, and we adjust the + * lowest bit of sum.hi to make it a sticky bit summarizing all the + * bits in sum.lo. With the sticky bit adjusted, the hardware will + * break any ties in the correct direction. + * + * If we are losing only one bit to denormalization, however, we must + * break the ties manually. + */ + if (sum.lo != 0) { + EXTRACT_WORD64(hibits, sum.hi); + bits_lost = -((int)(hibits >> 52) & 0x7ff) - scale + 1; + if (bits_lost != 1 ^ (int)(hibits & 1)) { + /* hibits += (int)copysign(1.0, sum.hi * sum.lo) */ + EXTRACT_WORD64(lobits, sum.lo); + hibits += 1 - (((hibits ^ lobits) >> 62) & 2); + INSERT_WORD64(sum.hi, hibits); + } + } + return (ldexp(sum.hi, scale)); +} + +/* + * Compute a*b exactly, returning the exact result in a struct dd. We assume + * that both a and b are normalized, so no underflow or overflow will occur. + * The current rounding mode must be round-to-nearest. + */ +static inline struct dd +dd_mul(double a, double b) +{ + static const double split = 0x1p27 + 1.0; + struct dd ret; + double ha, hb, la, lb, p, q; + + p = a * split; + ha = a - p; + ha += p; + la = a - ha; + + p = b * split; + hb = b - p; + hb += p; + lb = b - hb; + + p = ha * hb; + q = ha * lb + la * hb; + + ret.hi = p + q; + ret.lo = p - ret.hi + q + la * lb; + return (ret); +} + /* * Fused multiply-add: Compute x * y + z with a single rounding error. * @@ -48,14 +174,11 @@ __FBSDID("$FreeBSD$"); * Hardware instructions should be used on architectures that support it, * since this implementation will likely be several times slower. */ -#if LDBL_MANT_DIG != 113 double fma(double x, double y, double z) { - static const double split = 0x1p27 + 1.0; - double xs, ys, zs; - double c, cc, hx, hy, p, q, tx, ty; - double r, rr, s; + double xs, ys, zs, adj; + struct dd xy, r; int oround; int ex, ey, ez; int spread; @@ -85,41 +208,6 @@ fma(double x, double y, double z) * will overflow, so we handle these cases specially. Rounding * modes other than FE_TONEAREST are painful. */ - if (spread > DBL_MANT_DIG * 2) { - fenv_t env; - feraiseexcept(FE_INEXACT); - switch(oround) { - case FE_TONEAREST: - return (x * y); - case FE_TOWARDZERO: - if (x > 0.0 ^ y < 0.0 ^ z < 0.0) - return (x * y); - feholdexcept(&env); - r = x * y; - if (!fetestexcept(FE_INEXACT)) - r = nextafter(r, 0); - feupdateenv(&env); - return (r); - case FE_DOWNWARD: - if (z > 0.0) - return (x * y); - feholdexcept(&env); - r = x * y; - if (!fetestexcept(FE_INEXACT)) - r = nextafter(r, -INFINITY); - feupdateenv(&env); - return (r); - default: /* FE_UPWARD */ - if (z < 0.0) - return (x * y); - feholdexcept(&env); - r = x * y; - if (!fetestexcept(FE_INEXACT)) - r = nextafter(r, INFINITY); - feupdateenv(&env); - return (r); - } - } if (spread < -DBL_MANT_DIG) { feraiseexcept(FE_INEXACT); if (!isnormal(z)) @@ -144,63 +232,52 @@ fma(double x, double y, double z) return (z); } } + if (spread <= DBL_MANT_DIG * 2) + zs = ldexp(zs, -spread); + else + zs = copysign(DBL_MIN, zs); - /* - * Use Dekker's algorithm to perform the multiplication and - * subsequent addition in twice the machine precision. - * Arrange so that x * y = c + cc, and x * y + z = r + rr. - */ fesetround(FE_TONEAREST); - p = xs * split; - hx = xs - p; - hx += p; - tx = xs - hx; - - p = ys * split; - hy = ys - p; - hy += p; - ty = ys - hy; - - p = hx * hy; - q = hx * ty + tx * hy; - c = p + q; - cc = p - c + q + tx * ty; - - zs = ldexp(zs, -spread); - r = c + zs; - s = r - c; - rr = (c - (r - s)) + (zs - s) + cc; + /* + * Basic approach for round-to-nearest: + * + * (xy.hi, xy.lo) = x * y (exact) + * (r.hi, r.lo) = xy.hi + z (exact) + * adj = xy.lo + r.lo (inexact; low bit is sticky) + * result = r.hi + adj (correctly rounded) + */ + xy = dd_mul(xs, ys); + r = dd_add(xy.hi, zs); spread = ex + ey; - if (spread + ilogb(r) > -1023) { + + if (r.hi == 0.0) { + /* + * When the addends cancel to 0, ensure that the result has + * the correct sign. + */ fesetround(oround); - r = r + rr; - } else { + volatile double vzs = zs; /* XXX gcc CSE bug workaround */ + return (xy.hi + vzs + ldexp(xy.lo, spread)); + } + + if (oround != FE_TONEAREST) { /* - * The result is subnormal, so we round before scaling to - * avoid double rounding. + * There is no need to worry about double rounding in directed + * rounding modes. */ - p = ldexp(copysign(0x1p-1022, r), -spread); - c = r + p; - s = c - r; - cc = (r - (c - s)) + (p - s) + rr; fesetround(oround); - r = (c + cc) - p; + adj = r.lo + xy.lo; + return (ldexp(r.hi + adj, spread)); } - return (ldexp(r, spread)); -} -#else /* LDBL_MANT_DIG == 113 */ -/* - * 113 bits of precision is more than twice the precision of a double, - * so it is enough to represent the intermediate product exactly. - */ -double -fma(double x, double y, double z) -{ - return ((long double)x * y + z); + + adj = add_adjusted(r.lo, xy.lo); + if (spread + ilogb(r.hi) > -1023) + return (ldexp(r.hi + adj, spread)); + else + return (add_and_denormalize(r.hi, adj, spread)); } -#endif /* LDBL_MANT_DIG != 113 */ #if (LDBL_MANT_DIG == 53) __weak_reference(fma, fmal); diff --git a/lib/msun/src/s_fmaf.c b/lib/msun/src/s_fmaf.c index 7c699e5..3695823 100644 --- a/lib/msun/src/s_fmaf.c +++ b/lib/msun/src/s_fmaf.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2005 David Schultz <das@FreeBSD.ORG> + * Copyright (c) 2005-2011 David Schultz <das@FreeBSD.ORG> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -27,23 +27,43 @@ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); +#include <fenv.h> + #include "math.h" +#include "math_private.h" /* * Fused multiply-add: Compute x * y + z with a single rounding error. * * A double has more than twice as much precision than a float, so - * direct double-precision arithmetic suffices. - * - * XXX We are relying on the compiler to convert from double to float - * using the current rounding mode and with the appropriate - * side-effects. But on at least one platform (gcc 3.4.2/sparc64), - * this appears to be too much to ask for. The precision - * reduction should be done manually. + * direct double-precision arithmetic suffices, except where double + * rounding occurs. */ float fmaf(float x, float y, float z) { + double xy, result; + uint32_t hr, lr; + + xy = (double)x * y; + result = xy + z; + EXTRACT_WORDS(hr, lr, result); + /* Common case: The double precision result is fine. */ + if ((lr & 0x1fffffff) != 0x10000000 || /* not a halfway case */ + (hr & 0x7ff00000) == 0x7ff00000 || /* NaN */ + result - xy == z || /* exact */ + fegetround() != FE_TONEAREST) /* not round-to-nearest */ + return (result); - return ((double)x * y + z); + /* + * If result is inexact, and exactly halfway between two float values, + * we need to adjust the low-order bit in the direction of the error. + */ + fesetround(FE_TOWARDZERO); + volatile double vxy = xy; /* XXX work around gcc CSE bug */ + double adjusted_result = vxy + z; + fesetround(FE_TONEAREST); + if (result == adjusted_result) + SET_LOW_WORD(adjusted_result, lr + 1); + return (adjusted_result); } diff --git a/lib/msun/src/s_fmal.c b/lib/msun/src/s_fmal.c index 4d5d114..c2a6913 100644 --- a/lib/msun/src/s_fmal.c +++ b/lib/msun/src/s_fmal.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2005 David Schultz <das@FreeBSD.ORG> + * Copyright (c) 2005-2011 David Schultz <das@FreeBSD.ORG> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -31,6 +31,128 @@ __FBSDID("$FreeBSD$"); #include <float.h> #include <math.h> +#include "fpmath.h" + +/* + * A struct dd represents a floating-point number with twice the precision + * of a long double. We maintain the invariant that "hi" stores the high-order + * bits of the result. + */ +struct dd { + long double hi; + long double lo; +}; + +/* + * Compute a+b exactly, returning the exact result in a struct dd. We assume + * that both a and b are finite, but make no assumptions about their relative + * magnitudes. + */ +static inline struct dd +dd_add(long double a, long double b) +{ + struct dd ret; + long double s; + + ret.hi = a + b; + s = ret.hi - a; + ret.lo = (a - (ret.hi - s)) + (b - s); + return (ret); +} + +/* + * Compute a+b, with a small tweak: The least significant bit of the + * result is adjusted into a sticky bit summarizing all the bits that + * were lost to rounding. This adjustment negates the effects of double + * rounding when the result is added to another number with a higher + * exponent. For an explanation of round and sticky bits, see any reference + * on FPU design, e.g., + * + * J. Coonen. An Implementation Guide to a Proposed Standard for + * Floating-Point Arithmetic. Computer, vol. 13, no. 1, Jan 1980. + */ +static inline long double +add_adjusted(long double a, long double b) +{ + struct dd sum; + union IEEEl2bits u; + + sum = dd_add(a, b); + if (sum.lo != 0) { + u.e = sum.hi; + if ((u.bits.manl & 1) == 0) + sum.hi = nextafterl(sum.hi, INFINITY * sum.lo); + } + return (sum.hi); +} + +/* + * Compute ldexp(a+b, scale) with a single rounding error. It is assumed + * that the result will be subnormal, and care is taken to ensure that + * double rounding does not occur. + */ +static inline long double +add_and_denormalize(long double a, long double b, int scale) +{ + struct dd sum; + int bits_lost; + union IEEEl2bits u; + + sum = dd_add(a, b); + + /* + * If we are losing at least two bits of accuracy to denormalization, + * then the first lost bit becomes a round bit, and we adjust the + * lowest bit of sum.hi to make it a sticky bit summarizing all the + * bits in sum.lo. With the sticky bit adjusted, the hardware will + * break any ties in the correct direction. + * + * If we are losing only one bit to denormalization, however, we must + * break the ties manually. + */ + if (sum.lo != 0) { + u.e = sum.hi; + bits_lost = -u.bits.exp - scale + 1; + if (bits_lost != 1 ^ (int)(u.bits.manl & 1)) + sum.hi = nextafterl(sum.hi, INFINITY * sum.lo); + } + return (ldexp(sum.hi, scale)); +} + +/* + * Compute a*b exactly, returning the exact result in a struct dd. We assume + * that both a and b are normalized, so no underflow or overflow will occur. + * The current rounding mode must be round-to-nearest. + */ +static inline struct dd +dd_mul(long double a, long double b) +{ +#if LDBL_MANT_DIG == 64 + static const long double split = 0x1p32L + 1.0; +#elif LDBL_MANT_DIG == 113 + static const long double split = 0x1p57L + 1.0; +#endif + struct dd ret; + long double ha, hb, la, lb, p, q; + + p = a * split; + ha = a - p; + ha += p; + la = a - ha; + + p = b * split; + hb = b - p; + hb += p; + lb = b - hb; + + p = ha * hb; + q = ha * lb + la * hb; + + ret.hi = p + q; + ret.lo = p - ret.hi + q + la * lb; + return (ret); +} + /* * Fused multiply-add: Compute x * y + z with a single rounding error. * @@ -43,14 +165,8 @@ __FBSDID("$FreeBSD$"); long double fmal(long double x, long double y, long double z) { -#if LDBL_MANT_DIG == 64 - static const long double split = 0x1p32L + 1.0; -#elif LDBL_MANT_DIG == 113 - static const long double split = 0x1p57L + 1.0; -#endif - long double xs, ys, zs; - long double c, cc, hx, hy, p, q, tx, ty; - long double r, rr, s; + long double xs, ys, zs, adj; + struct dd xy, r; int oround; int ex, ey, ez; int spread; @@ -80,41 +196,6 @@ fmal(long double x, long double y, long double z) * will overflow, so we handle these cases specially. Rounding * modes other than FE_TONEAREST are painful. */ - if (spread > LDBL_MANT_DIG * 2) { - fenv_t env; - feraiseexcept(FE_INEXACT); - switch(oround) { - case FE_TONEAREST: - return (x * y); - case FE_TOWARDZERO: - if (x > 0.0 ^ y < 0.0 ^ z < 0.0) - return (x * y); - feholdexcept(&env); - r = x * y; - if (!fetestexcept(FE_INEXACT)) - r = nextafterl(r, 0); - feupdateenv(&env); - return (r); - case FE_DOWNWARD: - if (z > 0.0) - return (x * y); - feholdexcept(&env); - r = x * y; - if (!fetestexcept(FE_INEXACT)) - r = nextafterl(r, -INFINITY); - feupdateenv(&env); - return (r); - default: /* FE_UPWARD */ - if (z < 0.0) - return (x * y); - feholdexcept(&env); - r = x * y; - if (!fetestexcept(FE_INEXACT)) - r = nextafterl(r, INFINITY); - feupdateenv(&env); - return (r); - } - } if (spread < -LDBL_MANT_DIG) { feraiseexcept(FE_INEXACT); if (!isnormal(z)) @@ -139,49 +220,49 @@ fmal(long double x, long double y, long double z) return (z); } } + if (spread <= LDBL_MANT_DIG * 2) + zs = ldexpl(zs, -spread); + else + zs = copysignl(LDBL_MIN, zs); - /* - * Use Dekker's algorithm to perform the multiplication and - * subsequent addition in twice the machine precision. - * Arrange so that x * y = c + cc, and x * y + z = r + rr. - */ fesetround(FE_TONEAREST); - p = xs * split; - hx = xs - p; - hx += p; - tx = xs - hx; - - p = ys * split; - hy = ys - p; - hy += p; - ty = ys - hy; - - p = hx * hy; - q = hx * ty + tx * hy; - c = p + q; - cc = p - c + q + tx * ty; - - zs = ldexpl(zs, -spread); - r = c + zs; - s = r - c; - rr = (c - (r - s)) + (zs - s) + cc; + /* + * Basic approach for round-to-nearest: + * + * (xy.hi, xy.lo) = x * y (exact) + * (r.hi, r.lo) = xy.hi + z (exact) + * adj = xy.lo + r.lo (inexact; low bit is sticky) + * result = r.hi + adj (correctly rounded) + */ + xy = dd_mul(xs, ys); + r = dd_add(xy.hi, zs); spread = ex + ey; - if (spread + ilogbl(r) > -16383) { + + if (r.hi == 0.0) { + /* + * When the addends cancel to 0, ensure that the result has + * the correct sign. + */ fesetround(oround); - r = r + rr; - } else { + volatile long double vzs = zs; /* XXX gcc CSE bug workaround */ + return (xy.hi + vzs + ldexpl(xy.lo, spread)); + } + + if (oround != FE_TONEAREST) { /* - * The result is subnormal, so we round before scaling to - * avoid double rounding. + * There is no need to worry about double rounding in directed + * rounding modes. */ - p = ldexpl(copysignl(0x1p-16382L, r), -spread); - c = r + p; - s = c - r; - cc = (r - (c - s)) + (p - s) + rr; fesetround(oround); - r = (c + cc) - p; + adj = r.lo + xy.lo; + return (ldexpl(r.hi + adj, spread)); } - return (ldexpl(r, spread)); + + adj = add_adjusted(r.lo, xy.lo); + if (spread + ilogbl(r.hi) > -16383) + return (ldexpl(r.hi + adj, spread)); + else + return (add_and_denormalize(r.hi, adj, spread)); } |