summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorkargl <kargl@FreeBSD.org>2013-08-27 19:46:56 +0000
committerkargl <kargl@FreeBSD.org>2013-08-27 19:46:56 +0000
commit71c97bf245a6c72a4b719e88a4fdaca4b91a8571 (patch)
tree660d0509b28edc4a36445d9938541e4d7f71f2ce /lib
parentf949b936bd5b1d5e6211bfe915da4e2f6796c659 (diff)
downloadFreeBSD-src-71c97bf245a6c72a4b719e88a4fdaca4b91a8571.zip
FreeBSD-src-71c97bf245a6c72a4b719e88a4fdaca4b91a8571.tar.gz
* s_erf.c:
. Use integer literal constants instead of double literal constants. * s_erff.c: . Use integer literal constants instead of casting double literal constants to float. . Update the threshold values from those carried over from erf() to values appropriate for float. . New sets of polynomial coefficients for the rational approximations. These coefficients have little, but positive, effect on the maximum error in ULP in the four intervals, but do improve the overall speed of execution. . Remove redundant GET_FLOAT_WORD(ix,x) as hx already contained the contents that is packed into ix. . Update the mask that is used to zero-out lower-order bits in x in the intervals [1.25, 2.857143] and [2.857143, 12]. In tests on amd64, this change improves the maximum error in ULP from 6.27739 and 63.8095 to 3.16774 and 2.92095 on these intervals for erffc(). Reviewed by: bde
Diffstat (limited to 'lib')
-rw-r--r--lib/msun/src/s_erf.c2
-rw-r--r--lib/msun/src/s_erff.c168
2 files changed, 71 insertions, 99 deletions
diff --git a/lib/msun/src/s_erf.c b/lib/msun/src/s_erf.c
index 0886e5e..a4e91a3 100644
--- a/lib/msun/src/s_erf.c
+++ b/lib/msun/src/s_erf.c
@@ -201,7 +201,7 @@ erf(double x)
if(ix < 0x3feb0000) { /* |x|<0.84375 */
if(ix < 0x3e300000) { /* |x|<2**-28 */
if (ix < 0x00800000)
- return 0.125*(8.0*x+efx8*x); /*avoid underflow */
+ return (8*x+efx8*x)/8; /* avoid spurious underflow */
return x + efx*x;
}
z = x*x;
diff --git a/lib/msun/src/s_erff.c b/lib/msun/src/s_erff.c
index a44e135..b97ca1d 100644
--- a/lib/msun/src/s_erff.c
+++ b/lib/msun/src/s_erff.c
@@ -24,75 +24,59 @@ tiny = 1e-30,
half= 5.0000000000e-01, /* 0x3F000000 */
one = 1.0000000000e+00, /* 0x3F800000 */
two = 2.0000000000e+00, /* 0x40000000 */
- /* c = (subfloat)0.84506291151 */
-erx = 8.4506291151e-01, /* 0x3f58560b */
/*
- * Coefficients for approximation to erf on [0,0.84375]
+ * Coefficients for approximation to erf on [0,0.84375]
*/
efx = 1.2837916613e-01, /* 0x3e0375d4 */
efx8= 1.0270333290e+00, /* 0x3f8375d4 */
-pp0 = 1.2837916613e-01, /* 0x3e0375d4 */
-pp1 = -3.2504209876e-01, /* 0xbea66beb */
-pp2 = -2.8481749818e-02, /* 0xbce9528f */
-pp3 = -5.7702702470e-03, /* 0xbbbd1489 */
-pp4 = -2.3763017452e-05, /* 0xb7c756b1 */
-qq1 = 3.9791721106e-01, /* 0x3ecbbbce */
-qq2 = 6.5022252500e-02, /* 0x3d852a63 */
-qq3 = 5.0813062117e-03, /* 0x3ba68116 */
-qq4 = 1.3249473704e-04, /* 0x390aee49 */
-qq5 = -3.9602282413e-06, /* 0xb684e21a */
/*
- * Coefficients for approximation to erf in [0.84375,1.25]
+ * Domain [0, 0.84375], range ~[-5.4446e-10,5.5197e-10]:
+ * |(erf(x) - x)/x - p(x)/q(x)| < 2**-31.
*/
-pa0 = -2.3621185683e-03, /* 0xbb1acdc6 */
-pa1 = 4.1485610604e-01, /* 0x3ed46805 */
-pa2 = -3.7220788002e-01, /* 0xbebe9208 */
-pa3 = 3.1834661961e-01, /* 0x3ea2fe54 */
-pa4 = -1.1089469492e-01, /* 0xbde31cc2 */
-pa5 = 3.5478305072e-02, /* 0x3d1151b3 */
-pa6 = -2.1663755178e-03, /* 0xbb0df9c0 */
-qa1 = 1.0642088205e-01, /* 0x3dd9f331 */
-qa2 = 5.4039794207e-01, /* 0x3f0a5785 */
-qa3 = 7.1828655899e-02, /* 0x3d931ae7 */
-qa4 = 1.2617121637e-01, /* 0x3e013307 */
-qa5 = 1.3637083583e-02, /* 0x3c5f6e13 */
-qa6 = 1.1984500103e-02, /* 0x3c445aa3 */
+pp0 = 1.28379166e-01F, /* 0x1.06eba8p-3 */
+pp1 = -3.36030394e-01F, /* -0x1.58185ap-2 */
+pp2 = -1.86260219e-03F, /* -0x1.e8451ep-10 */
+qq1 = 3.12324286e-01F, /* 0x1.3fd1f0p-2 */
+qq2 = 2.16070302e-02F, /* 0x1.620274p-6 */
+qq3 = -1.98859419e-03F, /* -0x1.04a626p-9 */
/*
- * Coefficients for approximation to erfc in [1.25,1/0.35]
+ * Domain [0.84375, 1.25], range ~[-1.953e-11,1.940e-11]:
+ * |(erf(x) - erx) - p(x)/q(x)| < 2**-36.
*/
-ra0 = -9.8649440333e-03, /* 0xbc21a093 */
-ra1 = -6.9385856390e-01, /* 0xbf31a0b7 */
-ra2 = -1.0558626175e+01, /* 0xc128f022 */
-ra3 = -6.2375331879e+01, /* 0xc2798057 */
-ra4 = -1.6239666748e+02, /* 0xc322658c */
-ra5 = -1.8460508728e+02, /* 0xc3389ae7 */
-ra6 = -8.1287437439e+01, /* 0xc2a2932b */
-ra7 = -9.8143291473e+00, /* 0xc11d077e */
-sa1 = 1.9651271820e+01, /* 0x419d35ce */
-sa2 = 1.3765776062e+02, /* 0x4309a863 */
-sa3 = 4.3456588745e+02, /* 0x43d9486f */
-sa4 = 6.4538726807e+02, /* 0x442158c9 */
-sa5 = 4.2900814819e+02, /* 0x43d6810b */
-sa6 = 1.0863500214e+02, /* 0x42d9451f */
-sa7 = 6.5702495575e+00, /* 0x40d23f7c */
-sa8 = -6.0424413532e-02, /* 0xbd777f97 */
+erx = 8.42697144e-01F, /* 0x1.af7600p-1. erf(1) rounded to 16 bits. */
+pa0 = 3.64939137e-06F, /* 0x1.e9d022p-19 */
+pa1 = 4.15109694e-01F, /* 0x1.a91284p-2 */
+pa2 = -1.65179938e-01F, /* -0x1.5249dcp-3 */
+pa3 = 1.10914491e-01F, /* 0x1.c64e46p-4 */
+qa1 = 6.02074385e-01F, /* 0x1.344318p-1 */
+qa2 = 5.35934687e-01F, /* 0x1.126608p-1 */
+qa3 = 1.68576106e-01F, /* 0x1.593e6ep-3 */
+qa4 = 5.62181212e-02F, /* 0x1.cc89f2p-5 */
/*
- * Coefficients for approximation to erfc in [1/.35,28]
+ * Domain [1.25,1/0.35], range ~[-7.043e-10,7.457e-10]:
+ * |log(x*erfc(x)) + x**2 + 0.5625 - r(x)/s(x)| < 2**-30
*/
-rb0 = -9.8649431020e-03, /* 0xbc21a092 */
-rb1 = -7.9928326607e-01, /* 0xbf4c9dd4 */
-rb2 = -1.7757955551e+01, /* 0xc18e104b */
-rb3 = -1.6063638306e+02, /* 0xc320a2ea */
-rb4 = -6.3756646729e+02, /* 0xc41f6441 */
-rb5 = -1.0250950928e+03, /* 0xc480230b */
-rb6 = -4.8351919556e+02, /* 0xc3f1c275 */
-sb1 = 3.0338060379e+01, /* 0x41f2b459 */
-sb2 = 3.2579251099e+02, /* 0x43a2e571 */
-sb3 = 1.5367296143e+03, /* 0x44c01759 */
-sb4 = 3.1998581543e+03, /* 0x4547fdbb */
-sb5 = 2.5530502930e+03, /* 0x451f90ce */
-sb6 = 4.7452853394e+02, /* 0x43ed43a7 */
-sb7 = -2.2440952301e+01; /* 0xc1b38712 */
+ra0 = -9.87132732e-03F, /* -0x1.4376b2p-7 */
+ra1 = -5.53605914e-01F, /* -0x1.1b723cp-1 */
+ra2 = -2.17589188e+00F, /* -0x1.1683a0p+1 */
+ra3 = -1.43268085e+00F, /* -0x1.6ec42cp+0 */
+sa1 = 5.45995426e+00F, /* 0x1.5d6fe4p+2 */
+sa2 = 6.69798088e+00F, /* 0x1.acabb8p+2 */
+sa3 = 1.43113089e+00F, /* 0x1.6e5e98p+0 */
+sa4 = -5.77397496e-02F, /* -0x1.d90108p-5 */
+/*
+ * Domain [1/0.35, 11], range ~[-2.264e-13,2.336e-13]:
+ * |log(x*erfc(x)) + x**2 + 0.5625 - r(x)/s(x)| < 2**-42
+ */
+rb0 = -9.86494310e-03F, /* -0x1.434124p-7 */
+rb1 = -6.25171244e-01F, /* -0x1.401672p-1 */
+rb2 = -6.16498327e+00F, /* -0x1.8a8f16p+2 */
+rb3 = -1.66696873e+01F, /* -0x1.0ab70ap+4 */
+rb4 = -9.53764343e+00F, /* -0x1.313460p+3 */
+sb1 = 1.26884899e+01F, /* 0x1.96081cp+3 */
+sb2 = 4.51839523e+01F, /* 0x1.6978bcp+5 */
+sb3 = 4.72810211e+01F, /* 0x1.7a3f88p+5 */
+sb4 = 8.93033314e+00F; /* 0x1.1dc54ap+3 */
float
erff(float x)
@@ -107,43 +91,37 @@ erff(float x)
}
if(ix < 0x3f580000) { /* |x|<0.84375 */
- if(ix < 0x31800000) { /* |x|<2**-28 */
- if (ix < 0x04000000)
- /*avoid underflow */
- return (float)0.125*((float)8.0*x+efx8*x);
+ if(ix < 0x38800000) { /* |x|<2**-14 */
+ if (ix < 0x04000000) /* |x|<0x1p-119 */
+ return (8*x+efx8*x)/8; /* avoid spurious underflow */
return x + efx*x;
}
z = x*x;
- r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4)));
- s = one+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5))));
+ r = pp0+z*(pp1+z*pp2);
+ s = one+z*(qq1+z*(qq2+z*qq3));
y = r/s;
return x + x*y;
}
if(ix < 0x3fa00000) { /* 0.84375 <= |x| < 1.25 */
s = fabsf(x)-one;
- P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6)))));
- Q = one+s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6)))));
+ P = pa0+s*(pa1+s*(pa2+s*pa3));
+ Q = one+s*(qa1+s*(qa2+s*(qa3+s*qa4)));
if(hx>=0) return erx + P/Q; else return -erx - P/Q;
}
- if (ix >= 0x40c00000) { /* inf>|x|>=6 */
+ if (ix >= 0x40800000) { /* inf>|x|>=4 */
if(hx>=0) return one-tiny; else return tiny-one;
}
x = fabsf(x);
s = one/(x*x);
if(ix< 0x4036DB6E) { /* |x| < 1/0.35 */
- R=ra0+s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*(
- ra5+s*(ra6+s*ra7))))));
- S=one+s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*(
- sa5+s*(sa6+s*(sa7+s*sa8)))))));
+ R=ra0+s*(ra1+s*(ra2+s*ra3));
+ S=one+s*(sa1+s*(sa2+s*(sa3+s*sa4)));
} else { /* |x| >= 1/0.35 */
- R=rb0+s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*(
- rb5+s*rb6)))));
- S=one+s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*(
- sb5+s*(sb6+s*sb7))))));
+ R=rb0+s*(rb1+s*(rb2+s*(rb3+s*rb4)));
+ S=one+s*(sb1+s*(sb2+s*(sb3+s*sb4)));
}
- GET_FLOAT_WORD(ix,x);
- SET_FLOAT_WORD(z,ix&0xfffff000);
- r = __ieee754_expf(-z*z-(float)0.5625)*__ieee754_expf((z-x)*(z+x)+R/S);
+ SET_FLOAT_WORD(z,hx&0xffffe000);
+ r = expf(-z*z-0.5625F)*expf((z-x)*(z+x)+R/S);
if(hx>=0) return one-r/x; else return r/x-one;
}
@@ -160,11 +138,11 @@ erfcf(float x)
}
if(ix < 0x3f580000) { /* |x|<0.84375 */
- if(ix < 0x23800000) /* |x|<2**-56 */
+ if(ix < 0x33800000) /* |x|<2**-24 */
return one-x;
z = x*x;
- r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4)));
- s = one+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5))));
+ r = pp0+z*(pp1+z*pp2);
+ s = one+z*(qq1+z*(qq2+z*qq3));
y = r/s;
if(hx < 0x3e800000) { /* x<1/4 */
return one-(x+x*y);
@@ -176,33 +154,27 @@ erfcf(float x)
}
if(ix < 0x3fa00000) { /* 0.84375 <= |x| < 1.25 */
s = fabsf(x)-one;
- P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6)))));
- Q = one+s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6)))));
+ P = pa0+s*(pa1+s*(pa2+s*pa3));
+ Q = one+s*(qa1+s*(qa2+s*(qa3+s*qa4)));
if(hx>=0) {
z = one-erx; return z - P/Q;
} else {
z = erx+P/Q; return one+z;
}
}
- if (ix < 0x41e00000) { /* |x|<28 */
+ if (ix < 0x41300000) { /* |x|<11 */
x = fabsf(x);
s = one/(x*x);
if(ix< 0x4036DB6D) { /* |x| < 1/.35 ~ 2.857143*/
- R=ra0+s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*(
- ra5+s*(ra6+s*ra7))))));
- S=one+s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*(
- sa5+s*(sa6+s*(sa7+s*sa8)))))));
+ R=ra0+s*(ra1+s*(ra2+s*ra3));
+ S=one+s*(sa1+s*(sa2+s*(sa3+s*sa4)));
} else { /* |x| >= 1/.35 ~ 2.857143 */
- if(hx<0&&ix>=0x40c00000) return two-tiny;/* x < -6 */
- R=rb0+s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*(
- rb5+s*rb6)))));
- S=one+s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*(
- sb5+s*(sb6+s*sb7))))));
+ if(hx<0&&ix>=0x40a00000) return two-tiny;/* x < -5 */
+ R=rb0+s*(rb1+s*(rb2+s*(rb3+s*rb4)));
+ S=one+s*(sb1+s*(sb2+s*(sb3+s*sb4)));
}
- GET_FLOAT_WORD(ix,x);
- SET_FLOAT_WORD(z,ix&0xfffff000);
- r = __ieee754_expf(-z*z-(float)0.5625)*
- __ieee754_expf((z-x)*(z+x)+R/S);
+ SET_FLOAT_WORD(z,hx&0xffffe000);
+ r = expf(-z*z-0.5625F)*expf((z-x)*(z+x)+R/S);
if(hx>0) return r/x; else return two-r/x;
} else {
if(hx>0) return tiny*tiny; else return two-tiny;
OpenPOWER on IntegriCloud