diff options
author | tijl <tijl@FreeBSD.org> | 2014-09-18 15:10:22 +0000 |
---|---|---|
committer | tijl <tijl@FreeBSD.org> | 2014-09-18 15:10:22 +0000 |
commit | 680c2860fd7f159f4529ca45a9ca52b4a7ba264b (patch) | |
tree | 26b81d212dd6cf440026f609ed3bcfcf488c6bdb /lib/msun/src/s_erff.c | |
parent | adb4c6080c0d40a450861f8ad12a1634fc813dc4 (diff) | |
download | FreeBSD-src-680c2860fd7f159f4529ca45a9ca52b4a7ba264b.zip FreeBSD-src-680c2860fd7f159f4529ca45a9ca52b4a7ba264b.tar.gz |
MFC r257770 r257818 r257823 r260066 r260067 r260089 r260145 r268587 r268588
r268589 r268590 r268593 r268597 r269758 r270845 r270847 r270893 r270932
r270947 r271147
Merge libm work by kargl, bde and das from the past few months.
Besides optimisations and small bug fixes this includes new implementations
for C99 functions expl, coshl, sinhl, tanhl, erfl and erfcl.
Approved by: re (kib)
Diffstat (limited to 'lib/msun/src/s_erff.c')
-rw-r--r-- | lib/msun/src/s_erff.c | 127 |
1 files changed, 63 insertions, 64 deletions
diff --git a/lib/msun/src/s_erff.c b/lib/msun/src/s_erff.c index b97ca1d..d6cfbd2 100644 --- a/lib/msun/src/s_erff.c +++ b/lib/msun/src/s_erff.c @@ -19,64 +19,63 @@ __FBSDID("$FreeBSD$"); #include "math.h" #include "math_private.h" +/* XXX Prevent compilers from erroneously constant folding: */ +static const volatile float tiny = 1e-30; + static const float -tiny = 1e-30, -half= 5.0000000000e-01, /* 0x3F000000 */ -one = 1.0000000000e+00, /* 0x3F800000 */ -two = 2.0000000000e+00, /* 0x40000000 */ +half= 0.5, +one = 1, +two = 2, +erx = 8.42697144e-01, /* 0x3f57bb00 */ /* - * Coefficients for approximation to erf on [0,0.84375] + * In the domain [0, 2**-14], only the first term in the power series + * expansion of erf(x) is used. The magnitude of the first neglected + * terms is less than 2**-42. */ -efx = 1.2837916613e-01, /* 0x3e0375d4 */ -efx8= 1.0270333290e+00, /* 0x3f8375d4 */ +efx = 1.28379166e-01, /* 0x3e0375d4 */ +efx8= 1.02703333e+00, /* 0x3f8375d4 */ /* - * Domain [0, 0.84375], range ~[-5.4446e-10,5.5197e-10]: - * |(erf(x) - x)/x - p(x)/q(x)| < 2**-31. + * Domain [0, 0.84375], range ~[-5.4419e-10, 5.5179e-10]: + * |(erf(x) - x)/x - pp(x)/qq(x)| < 2**-31 */ -pp0 = 1.28379166e-01F, /* 0x1.06eba8p-3 */ -pp1 = -3.36030394e-01F, /* -0x1.58185ap-2 */ -pp2 = -1.86260219e-03F, /* -0x1.e8451ep-10 */ -qq1 = 3.12324286e-01F, /* 0x1.3fd1f0p-2 */ -qq2 = 2.16070302e-02F, /* 0x1.620274p-6 */ -qq3 = -1.98859419e-03F, /* -0x1.04a626p-9 */ +pp0 = 1.28379166e-01, /* 0x3e0375d4 */ +pp1 = -3.36030394e-01, /* 0xbeac0c2d */ +pp2 = -1.86261395e-03, /* 0xbaf422f4 */ +qq1 = 3.12324315e-01, /* 0x3e9fe8f9 */ +qq2 = 2.16070414e-02, /* 0x3cb10140 */ +qq3 = -1.98859372e-03, /* 0xbb025311 */ /* - * Domain [0.84375, 1.25], range ~[-1.953e-11,1.940e-11]: - * |(erf(x) - erx) - p(x)/q(x)| < 2**-36. + * Domain [0.84375, 1.25], range ~[-1.023e-9, 1.023e-9]: + * |(erf(x) - erx) - pa(x)/qa(x)| < 2**-31 */ -erx = 8.42697144e-01F, /* 0x1.af7600p-1. erf(1) rounded to 16 bits. */ -pa0 = 3.64939137e-06F, /* 0x1.e9d022p-19 */ -pa1 = 4.15109694e-01F, /* 0x1.a91284p-2 */ -pa2 = -1.65179938e-01F, /* -0x1.5249dcp-3 */ -pa3 = 1.10914491e-01F, /* 0x1.c64e46p-4 */ -qa1 = 6.02074385e-01F, /* 0x1.344318p-1 */ -qa2 = 5.35934687e-01F, /* 0x1.126608p-1 */ -qa3 = 1.68576106e-01F, /* 0x1.593e6ep-3 */ -qa4 = 5.62181212e-02F, /* 0x1.cc89f2p-5 */ +pa0 = 3.65041046e-06, /* 0x3674f993 */ +pa1 = 4.15109307e-01, /* 0x3ed48935 */ +pa2 = -2.09395722e-01, /* 0xbe566bd5 */ +pa3 = 8.67677554e-02, /* 0x3db1b34b */ +qa1 = 4.95560974e-01, /* 0x3efdba2b */ +qa2 = 3.71248513e-01, /* 0x3ebe1449 */ +qa3 = 3.92478965e-02, /* 0x3d20c267 */ /* - * Domain [1.25,1/0.35], range ~[-7.043e-10,7.457e-10]: - * |log(x*erfc(x)) + x**2 + 0.5625 - r(x)/s(x)| < 2**-30 + * Domain [1.25,1/0.35], range ~[-4.821e-9, 4.927e-9]: + * |log(x*erfc(x)) + x**2 + 0.5625 - ra(x)/sa(x)| < 2**-28 */ -ra0 = -9.87132732e-03F, /* -0x1.4376b2p-7 */ -ra1 = -5.53605914e-01F, /* -0x1.1b723cp-1 */ -ra2 = -2.17589188e+00F, /* -0x1.1683a0p+1 */ -ra3 = -1.43268085e+00F, /* -0x1.6ec42cp+0 */ -sa1 = 5.45995426e+00F, /* 0x1.5d6fe4p+2 */ -sa2 = 6.69798088e+00F, /* 0x1.acabb8p+2 */ -sa3 = 1.43113089e+00F, /* 0x1.6e5e98p+0 */ -sa4 = -5.77397496e-02F, /* -0x1.d90108p-5 */ +ra0 = -9.88156721e-03, /* 0xbc21e64c */ +ra1 = -5.43658376e-01, /* 0xbf0b2d32 */ +ra2 = -1.66828310e+00, /* 0xbfd58a4d */ +ra3 = -6.91554189e-01, /* 0xbf3109b2 */ +sa1 = 4.48581553e+00, /* 0x408f8bcd */ +sa2 = 4.10799170e+00, /* 0x408374ab */ +sa3 = 5.53855181e-01, /* 0x3f0dc974 */ /* - * Domain [1/0.35, 11], range ~[-2.264e-13,2.336e-13]: - * |log(x*erfc(x)) + x**2 + 0.5625 - r(x)/s(x)| < 2**-42 + * Domain [2.85715, 11], range ~[-1.484e-9, 1.505e-9]: + * |log(x*erfc(x)) + x**2 + 0.5625 - rb(x)/sb(x)| < 2**-30 */ -rb0 = -9.86494310e-03F, /* -0x1.434124p-7 */ -rb1 = -6.25171244e-01F, /* -0x1.401672p-1 */ -rb2 = -6.16498327e+00F, /* -0x1.8a8f16p+2 */ -rb3 = -1.66696873e+01F, /* -0x1.0ab70ap+4 */ -rb4 = -9.53764343e+00F, /* -0x1.313460p+3 */ -sb1 = 1.26884899e+01F, /* 0x1.96081cp+3 */ -sb2 = 4.51839523e+01F, /* 0x1.6978bcp+5 */ -sb3 = 4.72810211e+01F, /* 0x1.7a3f88p+5 */ -sb4 = 8.93033314e+00F; /* 0x1.1dc54ap+3 */ +rb0 = -9.86496918e-03, /* 0xbc21a0ae */ +rb1 = -5.48049808e-01, /* 0xbf0c4cfe */ +rb2 = -1.84115684e+00, /* 0xbfebab07 */ +sb1 = 4.87132740e+00, /* 0x409be1ea */ +sb2 = 3.04982710e+00, /* 0x4043305e */ +sb3 = -7.61900663e-01; /* 0xbf430bec */ float erff(float x) @@ -85,9 +84,9 @@ erff(float x) float R,S,P,Q,s,y,z,r; GET_FLOAT_WORD(hx,x); ix = hx&0x7fffffff; - if(ix>=0x7f800000) { /* erf(nan)=nan */ + if(ix>=0x7f800000) { /* erff(nan)=nan */ i = ((u_int32_t)hx>>31)<<1; - return (float)(1-i)+one/x; /* erf(+-inf)=+-1 */ + return (float)(1-i)+one/x; /* erff(+-inf)=+-1 */ } if(ix < 0x3f580000) { /* |x|<0.84375 */ @@ -105,7 +104,7 @@ erff(float x) if(ix < 0x3fa00000) { /* 0.84375 <= |x| < 1.25 */ s = fabsf(x)-one; P = pa0+s*(pa1+s*(pa2+s*pa3)); - Q = one+s*(qa1+s*(qa2+s*(qa3+s*qa4))); + Q = one+s*(qa1+s*(qa2+s*qa3)); if(hx>=0) return erx + P/Q; else return -erx - P/Q; } if (ix >= 0x40800000) { /* inf>|x|>=4 */ @@ -113,12 +112,12 @@ erff(float x) } x = fabsf(x); s = one/(x*x); - if(ix< 0x4036DB6E) { /* |x| < 1/0.35 */ + if(ix< 0x4036db8c) { /* |x| < 2.85715 ~ 1/0.35 */ R=ra0+s*(ra1+s*(ra2+s*ra3)); - S=one+s*(sa1+s*(sa2+s*(sa3+s*sa4))); - } else { /* |x| >= 1/0.35 */ - R=rb0+s*(rb1+s*(rb2+s*(rb3+s*rb4))); - S=one+s*(sb1+s*(sb2+s*(sb3+s*sb4))); + S=one+s*(sa1+s*(sa2+s*sa3)); + } else { /* |x| >= 2.85715 ~ 1/0.35 */ + R=rb0+s*(rb1+s*rb2); + S=one+s*(sb1+s*(sb2+s*sb3)); } SET_FLOAT_WORD(z,hx&0xffffe000); r = expf(-z*z-0.5625F)*expf((z-x)*(z+x)+R/S); @@ -132,8 +131,8 @@ erfcf(float x) float R,S,P,Q,s,y,z,r; GET_FLOAT_WORD(hx,x); ix = hx&0x7fffffff; - if(ix>=0x7f800000) { /* erfc(nan)=nan */ - /* erfc(+-inf)=0,2 */ + if(ix>=0x7f800000) { /* erfcf(nan)=nan */ + /* erfcf(+-inf)=0,2 */ return (float)(((u_int32_t)hx>>31)<<1)+one/x; } @@ -155,7 +154,7 @@ erfcf(float x) if(ix < 0x3fa00000) { /* 0.84375 <= |x| < 1.25 */ s = fabsf(x)-one; P = pa0+s*(pa1+s*(pa2+s*pa3)); - Q = one+s*(qa1+s*(qa2+s*(qa3+s*qa4))); + Q = one+s*(qa1+s*(qa2+s*qa3)); if(hx>=0) { z = one-erx; return z - P/Q; } else { @@ -165,13 +164,13 @@ erfcf(float x) if (ix < 0x41300000) { /* |x|<11 */ x = fabsf(x); s = one/(x*x); - if(ix< 0x4036DB6D) { /* |x| < 1/.35 ~ 2.857143*/ - R=ra0+s*(ra1+s*(ra2+s*ra3)); - S=one+s*(sa1+s*(sa2+s*(sa3+s*sa4))); - } else { /* |x| >= 1/.35 ~ 2.857143 */ + if(ix< 0x4036db8c) { /* |x| < 2.85715 ~ 1/.35 */ + R=ra0+s*(ra1+s*(ra2+s*ra3)); + S=one+s*(sa1+s*(sa2+s*sa3)); + } else { /* |x| >= 2.85715 ~ 1/.35 */ if(hx<0&&ix>=0x40a00000) return two-tiny;/* x < -5 */ - R=rb0+s*(rb1+s*(rb2+s*(rb3+s*rb4))); - S=one+s*(sb1+s*(sb2+s*(sb3+s*sb4))); + R=rb0+s*(rb1+s*rb2); + S=one+s*(sb1+s*(sb2+s*sb3)); } SET_FLOAT_WORD(z,hx&0xffffe000); r = expf(-z*z-0.5625F)*expf((z-x)*(z+x)+R/S); |