summaryrefslogtreecommitdiffstats
path: root/lib/msun/src/s_erff.c
diff options
context:
space:
mode:
authortijl <tijl@FreeBSD.org>2014-09-18 15:10:22 +0000
committertijl <tijl@FreeBSD.org>2014-09-18 15:10:22 +0000
commit680c2860fd7f159f4529ca45a9ca52b4a7ba264b (patch)
tree26b81d212dd6cf440026f609ed3bcfcf488c6bdb /lib/msun/src/s_erff.c
parentadb4c6080c0d40a450861f8ad12a1634fc813dc4 (diff)
downloadFreeBSD-src-680c2860fd7f159f4529ca45a9ca52b4a7ba264b.zip
FreeBSD-src-680c2860fd7f159f4529ca45a9ca52b4a7ba264b.tar.gz
MFC r257770 r257818 r257823 r260066 r260067 r260089 r260145 r268587 r268588
r268589 r268590 r268593 r268597 r269758 r270845 r270847 r270893 r270932 r270947 r271147 Merge libm work by kargl, bde and das from the past few months. Besides optimisations and small bug fixes this includes new implementations for C99 functions expl, coshl, sinhl, tanhl, erfl and erfcl. Approved by: re (kib)
Diffstat (limited to 'lib/msun/src/s_erff.c')
-rw-r--r--lib/msun/src/s_erff.c127
1 files changed, 63 insertions, 64 deletions
diff --git a/lib/msun/src/s_erff.c b/lib/msun/src/s_erff.c
index b97ca1d..d6cfbd2 100644
--- a/lib/msun/src/s_erff.c
+++ b/lib/msun/src/s_erff.c
@@ -19,64 +19,63 @@ __FBSDID("$FreeBSD$");
#include "math.h"
#include "math_private.h"
+/* XXX Prevent compilers from erroneously constant folding: */
+static const volatile float tiny = 1e-30;
+
static const float
-tiny = 1e-30,
-half= 5.0000000000e-01, /* 0x3F000000 */
-one = 1.0000000000e+00, /* 0x3F800000 */
-two = 2.0000000000e+00, /* 0x40000000 */
+half= 0.5,
+one = 1,
+two = 2,
+erx = 8.42697144e-01, /* 0x3f57bb00 */
/*
- * Coefficients for approximation to erf on [0,0.84375]
+ * In the domain [0, 2**-14], only the first term in the power series
+ * expansion of erf(x) is used. The magnitude of the first neglected
+ * terms is less than 2**-42.
*/
-efx = 1.2837916613e-01, /* 0x3e0375d4 */
-efx8= 1.0270333290e+00, /* 0x3f8375d4 */
+efx = 1.28379166e-01, /* 0x3e0375d4 */
+efx8= 1.02703333e+00, /* 0x3f8375d4 */
/*
- * Domain [0, 0.84375], range ~[-5.4446e-10,5.5197e-10]:
- * |(erf(x) - x)/x - p(x)/q(x)| < 2**-31.
+ * Domain [0, 0.84375], range ~[-5.4419e-10, 5.5179e-10]:
+ * |(erf(x) - x)/x - pp(x)/qq(x)| < 2**-31
*/
-pp0 = 1.28379166e-01F, /* 0x1.06eba8p-3 */
-pp1 = -3.36030394e-01F, /* -0x1.58185ap-2 */
-pp2 = -1.86260219e-03F, /* -0x1.e8451ep-10 */
-qq1 = 3.12324286e-01F, /* 0x1.3fd1f0p-2 */
-qq2 = 2.16070302e-02F, /* 0x1.620274p-6 */
-qq3 = -1.98859419e-03F, /* -0x1.04a626p-9 */
+pp0 = 1.28379166e-01, /* 0x3e0375d4 */
+pp1 = -3.36030394e-01, /* 0xbeac0c2d */
+pp2 = -1.86261395e-03, /* 0xbaf422f4 */
+qq1 = 3.12324315e-01, /* 0x3e9fe8f9 */
+qq2 = 2.16070414e-02, /* 0x3cb10140 */
+qq3 = -1.98859372e-03, /* 0xbb025311 */
/*
- * Domain [0.84375, 1.25], range ~[-1.953e-11,1.940e-11]:
- * |(erf(x) - erx) - p(x)/q(x)| < 2**-36.
+ * Domain [0.84375, 1.25], range ~[-1.023e-9, 1.023e-9]:
+ * |(erf(x) - erx) - pa(x)/qa(x)| < 2**-31
*/
-erx = 8.42697144e-01F, /* 0x1.af7600p-1. erf(1) rounded to 16 bits. */
-pa0 = 3.64939137e-06F, /* 0x1.e9d022p-19 */
-pa1 = 4.15109694e-01F, /* 0x1.a91284p-2 */
-pa2 = -1.65179938e-01F, /* -0x1.5249dcp-3 */
-pa3 = 1.10914491e-01F, /* 0x1.c64e46p-4 */
-qa1 = 6.02074385e-01F, /* 0x1.344318p-1 */
-qa2 = 5.35934687e-01F, /* 0x1.126608p-1 */
-qa3 = 1.68576106e-01F, /* 0x1.593e6ep-3 */
-qa4 = 5.62181212e-02F, /* 0x1.cc89f2p-5 */
+pa0 = 3.65041046e-06, /* 0x3674f993 */
+pa1 = 4.15109307e-01, /* 0x3ed48935 */
+pa2 = -2.09395722e-01, /* 0xbe566bd5 */
+pa3 = 8.67677554e-02, /* 0x3db1b34b */
+qa1 = 4.95560974e-01, /* 0x3efdba2b */
+qa2 = 3.71248513e-01, /* 0x3ebe1449 */
+qa3 = 3.92478965e-02, /* 0x3d20c267 */
/*
- * Domain [1.25,1/0.35], range ~[-7.043e-10,7.457e-10]:
- * |log(x*erfc(x)) + x**2 + 0.5625 - r(x)/s(x)| < 2**-30
+ * Domain [1.25,1/0.35], range ~[-4.821e-9, 4.927e-9]:
+ * |log(x*erfc(x)) + x**2 + 0.5625 - ra(x)/sa(x)| < 2**-28
*/
-ra0 = -9.87132732e-03F, /* -0x1.4376b2p-7 */
-ra1 = -5.53605914e-01F, /* -0x1.1b723cp-1 */
-ra2 = -2.17589188e+00F, /* -0x1.1683a0p+1 */
-ra3 = -1.43268085e+00F, /* -0x1.6ec42cp+0 */
-sa1 = 5.45995426e+00F, /* 0x1.5d6fe4p+2 */
-sa2 = 6.69798088e+00F, /* 0x1.acabb8p+2 */
-sa3 = 1.43113089e+00F, /* 0x1.6e5e98p+0 */
-sa4 = -5.77397496e-02F, /* -0x1.d90108p-5 */
+ra0 = -9.88156721e-03, /* 0xbc21e64c */
+ra1 = -5.43658376e-01, /* 0xbf0b2d32 */
+ra2 = -1.66828310e+00, /* 0xbfd58a4d */
+ra3 = -6.91554189e-01, /* 0xbf3109b2 */
+sa1 = 4.48581553e+00, /* 0x408f8bcd */
+sa2 = 4.10799170e+00, /* 0x408374ab */
+sa3 = 5.53855181e-01, /* 0x3f0dc974 */
/*
- * Domain [1/0.35, 11], range ~[-2.264e-13,2.336e-13]:
- * |log(x*erfc(x)) + x**2 + 0.5625 - r(x)/s(x)| < 2**-42
+ * Domain [2.85715, 11], range ~[-1.484e-9, 1.505e-9]:
+ * |log(x*erfc(x)) + x**2 + 0.5625 - rb(x)/sb(x)| < 2**-30
*/
-rb0 = -9.86494310e-03F, /* -0x1.434124p-7 */
-rb1 = -6.25171244e-01F, /* -0x1.401672p-1 */
-rb2 = -6.16498327e+00F, /* -0x1.8a8f16p+2 */
-rb3 = -1.66696873e+01F, /* -0x1.0ab70ap+4 */
-rb4 = -9.53764343e+00F, /* -0x1.313460p+3 */
-sb1 = 1.26884899e+01F, /* 0x1.96081cp+3 */
-sb2 = 4.51839523e+01F, /* 0x1.6978bcp+5 */
-sb3 = 4.72810211e+01F, /* 0x1.7a3f88p+5 */
-sb4 = 8.93033314e+00F; /* 0x1.1dc54ap+3 */
+rb0 = -9.86496918e-03, /* 0xbc21a0ae */
+rb1 = -5.48049808e-01, /* 0xbf0c4cfe */
+rb2 = -1.84115684e+00, /* 0xbfebab07 */
+sb1 = 4.87132740e+00, /* 0x409be1ea */
+sb2 = 3.04982710e+00, /* 0x4043305e */
+sb3 = -7.61900663e-01; /* 0xbf430bec */
float
erff(float x)
@@ -85,9 +84,9 @@ erff(float x)
float R,S,P,Q,s,y,z,r;
GET_FLOAT_WORD(hx,x);
ix = hx&0x7fffffff;
- if(ix>=0x7f800000) { /* erf(nan)=nan */
+ if(ix>=0x7f800000) { /* erff(nan)=nan */
i = ((u_int32_t)hx>>31)<<1;
- return (float)(1-i)+one/x; /* erf(+-inf)=+-1 */
+ return (float)(1-i)+one/x; /* erff(+-inf)=+-1 */
}
if(ix < 0x3f580000) { /* |x|<0.84375 */
@@ -105,7 +104,7 @@ erff(float x)
if(ix < 0x3fa00000) { /* 0.84375 <= |x| < 1.25 */
s = fabsf(x)-one;
P = pa0+s*(pa1+s*(pa2+s*pa3));
- Q = one+s*(qa1+s*(qa2+s*(qa3+s*qa4)));
+ Q = one+s*(qa1+s*(qa2+s*qa3));
if(hx>=0) return erx + P/Q; else return -erx - P/Q;
}
if (ix >= 0x40800000) { /* inf>|x|>=4 */
@@ -113,12 +112,12 @@ erff(float x)
}
x = fabsf(x);
s = one/(x*x);
- if(ix< 0x4036DB6E) { /* |x| < 1/0.35 */
+ if(ix< 0x4036db8c) { /* |x| < 2.85715 ~ 1/0.35 */
R=ra0+s*(ra1+s*(ra2+s*ra3));
- S=one+s*(sa1+s*(sa2+s*(sa3+s*sa4)));
- } else { /* |x| >= 1/0.35 */
- R=rb0+s*(rb1+s*(rb2+s*(rb3+s*rb4)));
- S=one+s*(sb1+s*(sb2+s*(sb3+s*sb4)));
+ S=one+s*(sa1+s*(sa2+s*sa3));
+ } else { /* |x| >= 2.85715 ~ 1/0.35 */
+ R=rb0+s*(rb1+s*rb2);
+ S=one+s*(sb1+s*(sb2+s*sb3));
}
SET_FLOAT_WORD(z,hx&0xffffe000);
r = expf(-z*z-0.5625F)*expf((z-x)*(z+x)+R/S);
@@ -132,8 +131,8 @@ erfcf(float x)
float R,S,P,Q,s,y,z,r;
GET_FLOAT_WORD(hx,x);
ix = hx&0x7fffffff;
- if(ix>=0x7f800000) { /* erfc(nan)=nan */
- /* erfc(+-inf)=0,2 */
+ if(ix>=0x7f800000) { /* erfcf(nan)=nan */
+ /* erfcf(+-inf)=0,2 */
return (float)(((u_int32_t)hx>>31)<<1)+one/x;
}
@@ -155,7 +154,7 @@ erfcf(float x)
if(ix < 0x3fa00000) { /* 0.84375 <= |x| < 1.25 */
s = fabsf(x)-one;
P = pa0+s*(pa1+s*(pa2+s*pa3));
- Q = one+s*(qa1+s*(qa2+s*(qa3+s*qa4)));
+ Q = one+s*(qa1+s*(qa2+s*qa3));
if(hx>=0) {
z = one-erx; return z - P/Q;
} else {
@@ -165,13 +164,13 @@ erfcf(float x)
if (ix < 0x41300000) { /* |x|<11 */
x = fabsf(x);
s = one/(x*x);
- if(ix< 0x4036DB6D) { /* |x| < 1/.35 ~ 2.857143*/
- R=ra0+s*(ra1+s*(ra2+s*ra3));
- S=one+s*(sa1+s*(sa2+s*(sa3+s*sa4)));
- } else { /* |x| >= 1/.35 ~ 2.857143 */
+ if(ix< 0x4036db8c) { /* |x| < 2.85715 ~ 1/.35 */
+ R=ra0+s*(ra1+s*(ra2+s*ra3));
+ S=one+s*(sa1+s*(sa2+s*sa3));
+ } else { /* |x| >= 2.85715 ~ 1/.35 */
if(hx<0&&ix>=0x40a00000) return two-tiny;/* x < -5 */
- R=rb0+s*(rb1+s*(rb2+s*(rb3+s*rb4)));
- S=one+s*(sb1+s*(sb2+s*(sb3+s*sb4)));
+ R=rb0+s*(rb1+s*rb2);
+ S=one+s*(sb1+s*(sb2+s*sb3));
}
SET_FLOAT_WORD(z,hx&0xffffe000);
r = expf(-z*z-0.5625F)*expf((z-x)*(z+x)+R/S);
OpenPOWER on IntegriCloud