summaryrefslogtreecommitdiffstats
path: root/lib/msun
diff options
context:
space:
mode:
authorbde <bde@FreeBSD.org>2008-02-09 12:53:15 +0000
committerbde <bde@FreeBSD.org>2008-02-09 12:53:15 +0000
commit1960b378b5f38a977edd424069fdc04a1cfe120e (patch)
treeed6f68024b849603ab37303f37f6d8166a9125f2 /lib/msun
parent5b9b5c8121bf0edacdab9359bc8536551869a07d (diff)
downloadFreeBSD-src-1960b378b5f38a977edd424069fdc04a1cfe120e.zip
FreeBSD-src-1960b378b5f38a977edd424069fdc04a1cfe120e.tar.gz
As usual, use a minimax polynomial that is specialized for float
precision. The new polynomial has degree 4 instead of 10, and a maximum error of 2**-30.04 ulps instead of 2**-33.15. This doesn't affect the final error significantly; the maximum error was and is about 0.5015 ulps on i386 -O1, and the number of cases with an error of > 0.5 ulps is increased from 13851 to 14407. Note that the error is only this close to 0.5 ulps due to excessive extra precision caused by compiler bugs on i386. The extra precision could be obtained intentionally, and is useful for keeping the error of the hyperbolic float functions below 1 ulp, since these functions are implemented using expm1f. My recent change for scaling by 2**k had the unintentional side effect of retaining extra precision for longer, so callers of expm1f see errors of more like 0.0015 ulps than 0.5015 ulps, and for the hyperbolic functions this reduces the maximum error from nearly about 2 ulps to about 0.75 ulps. This is about 10% faster on i386 (A64). expm1* is still very slow, but now the float version is actually significantly faster. The algorithm is very sophisticated but not very good except on machines with fast division.
Diffstat (limited to 'lib/msun')
-rw-r--r--lib/msun/src/s_expm1f.c15
1 files changed, 8 insertions, 7 deletions
diff --git a/lib/msun/src/s_expm1f.c b/lib/msun/src/s_expm1f.c
index b6ab5b3..6c5d3e7 100644
--- a/lib/msun/src/s_expm1f.c
+++ b/lib/msun/src/s_expm1f.c
@@ -27,12 +27,13 @@ o_threshold = 8.8721679688e+01,/* 0x42b17180 */
ln2_hi = 6.9313812256e-01,/* 0x3f317180 */
ln2_lo = 9.0580006145e-06,/* 0x3717f7d1 */
invln2 = 1.4426950216e+00,/* 0x3fb8aa3b */
- /* scaled coefficients related to expm1 */
-Q1 = -3.3333335072e-02, /* 0xbd088889 */
-Q2 = 1.5873016091e-03, /* 0x3ad00d01 */
-Q3 = -7.9365076090e-05, /* 0xb8a670cd */
-Q4 = 4.0082177293e-06, /* 0x36867e54 */
-Q5 = -2.0109921195e-07; /* 0xb457edbb */
+/*
+ * Domain [-0.34568, 0.34568], range ~[-6.694e-10, 6.696e-10]:
+ * |6 / x * (1 + 2 * (1 / (exp(x) - 1) - 1 / x)) - q(x)| < 2**-30.04
+ * Scaled coefficients: Qn_here = 2**n * Qn_for_q (see s_expm1.c):
+ */
+Q1 = -3.3333212137e-2, /* -0x888868.0p-28 */
+Q2 = 1.5807170421e-3; /* 0xcf3010.0p-33 */
float
expm1f(float x)
@@ -86,7 +87,7 @@ expm1f(float x)
/* x is now in primary range */
hfx = (float)0.5*x;
hxs = x*hfx;
- r1 = one+hxs*(Q1+hxs*(Q2+hxs*(Q3+hxs*(Q4+hxs*Q5))));
+ r1 = one+hxs*(Q1+hxs*Q2);
t = (float)3.0-r1*hfx;
e = hxs*((r1-t)/((float)6.0 - x*t));
if(k==0) return x - (x*e-hxs); /* c is 0 */
OpenPOWER on IntegriCloud