diff options
-rw-r--r-- | lib/msun/ld128/s_expl.c | 72 | ||||
-rw-r--r-- | lib/msun/ld80/s_expl.c | 10 |
2 files changed, 55 insertions, 27 deletions
diff --git a/lib/msun/ld128/s_expl.c b/lib/msun/ld128/s_expl.c index 9206436..5a9cdd5 100644 --- a/lib/msun/ld128/s_expl.c +++ b/lib/msun/ld128/s_expl.c @@ -41,28 +41,48 @@ __FBSDID("$FreeBSD$"); #define LOG2_INTERVALS 7 #define BIAS (LDBL_MAX_EXP - 1) +static const long double +huge = 0x1p10000L, +twom10000 = 0x1p-10000L, +/* XXX Prevent gcc from erroneously constant folding this: */ static volatile const long double tiny = 0x1p-10000L; static const long double -INV_L = 1.84664965233787316142070359168242182e+02L, -L1 = 5.41521234812457272982212595914567508e-03L, -L2 = -1.02536706388947310094527932552595546e-29L, -huge = 0x1p10000L, +/* log(2**16384 - 0.5) rounded towards zero: */ +/* log(2**16384 - 0.5 + 1) rounded towards zero for expm1l() is the same: */ o_threshold = 11356.523406294143949491931077970763428L, -twom10000 = 0x1p-10000L, +/* log(2**(-16381-64-1)) rounded towards zero: */ u_threshold = -11433.462743336297878837243843452621503L; +static const double +/* + * ln2/INTERVALS = L1+L2 (hi+lo decomposition for multiplication). L1 must + * have at least 22 (= log2(|LDBL_MIN_EXP-extras|) + log2(INTERVALS)) lowest + * bits zero so that multiplication of it by n is exact. + */ +INV_L = 1.8466496523378731e+2, /* 0x171547652b82fe.0p-45 */ +L2 = -1.0253670638894731e-29; /* -0x1.9ff0342542fc3p-97 */ static const long double -A2 = 5.00000000000000000000000000000000000e-1L, -A3 = 1.66666666666666666666666666666666972e-1L, -A4 = 4.16666666666666666666666666653708268e-2L, -A5 = 8.33333333333333333333333315069867254e-3L, -A6 = 1.38888888888888888888996596213795377e-3L, -A7 = 1.98412698412698412718821436278644414e-4L, -A8 = 2.48015873015869681884882576649543128e-5L, -A9 = 2.75573192240103867817876199544468806e-6L, -A10 = 2.75573236172670046201884000197885520e-7L, -A11 = 2.50517544183909126492878226167697856e-8L; +/* 0x1.62e42fefa39ef35793c768000000p-8 */ +L1 = 5.41521234812457272982212595914567508e-3L; + +static const long double +/* + * Domain [-0.002708, 0.002708], range ~[-2.4011e-38, 2.4244e-38]: + * |exp(x) - p(x)| < 2**-124.9 + * (0.002708 is ln2/(2*INTERVALS) rounded up a little). + */ +A2 = 0.5, +A3 = 1.66666666666666666666666666651085500e-1L, +A4 = 4.16666666666666666666666666425885320e-2L, +A5 = 8.33333333333333333334522877160175842e-3L, +A6 = 1.38888888888888888889971139751596836e-3L; + +static const double +A7 = 1.9841269841269471e-4, +A8 = 2.4801587301585284e-5, +A9 = 2.7557324277411234e-6, +A10 = 2.7557333722375072e-7; static const struct { long double hi; @@ -202,7 +222,9 @@ long double expl(long double x) { union IEEEl2bits u, v; - long double fn, r, r1, r2, q, t, twopk, twopkp10000; + long double q, r, r1, t, twopk, twopkp10000; + double dr, fn, r2; + int k, n, n2; uint32_t hx, ix; @@ -227,8 +249,15 @@ expl(long double x) } /* Reduce x to (k*ln2 + endpoint[n2] + r1 + r2). */ - fn = x * INV_L + 0x1.8p112 - 0x1.8p112; + /* Use a specialized rint() to get fn. Assume round-to-nearest. */ + /* XXX assume no extra precision for the additions, as for trig fns. */ + /* XXX this set of comments is now quadruplicated. */ + fn = (double)x * INV_L + 0x1.8p52 - 0x1.8p52; +#if defined(HAVE_EFFICIENT_IRINT) + n = irint(fn); +#else n = (int)fn; +#endif n2 = (unsigned)n % INTERVALS; k = n >> LOG2_INTERVALS; r1 = x - fn * L1; @@ -245,11 +274,12 @@ expl(long double x) twopkp10000 = v.e; } - r = r1 + r2; - q = r * r * (A2 + r * (A3 + r * (A4 + r * (A5 + r * (A6 + r * (A7 + - r * (A8 + r * (A9 + r * (A10 + r * A11))))))))); + /* Evaluate expl(endpoint[n2] + r1 + r2) = tbl[n2] * expl(r1 + r2). */ + dr = r; + q = r2 + r * r * (A2 + r * (A3 + r * (A4 + r * (A5 + r * (A6 + + dr * (A7 + dr * (A8 + dr * (A9 + dr * A10)))))))); t = tbl[n2].lo + tbl[n2].hi; - t = tbl[n2].hi + (tbl[n2].lo + t * (r2 + q + r1)); + t = tbl[n2].lo + t * (q + r1) + tbl[n2].hi; /* Scale by 2**k. */ if (k >= LDBL_MIN_EXP) { diff --git a/lib/msun/ld80/s_expl.c b/lib/msun/ld80/s_expl.c index 5b07276..a86a5c8 100644 --- a/lib/msun/ld80/s_expl.c +++ b/lib/msun/ld80/s_expl.c @@ -235,7 +235,8 @@ long double expl(long double x) { union IEEEl2bits u, v; - long double fn, q, r, r1, r2, t, t23, t45, twopk, twopkp10000, z; + long double fn, q, r, r1, r2, t, twopk, twopkp10000; + long double z; int k, n, n2; uint16_t hx, ix; @@ -288,12 +289,9 @@ expl(long double x) twopkp10000 = v.e; } - /* Evaluate expl(midpoint[n2] + r1 + r2) = tbl[n2] * expl(r1 + r2). */ - /* Here q = q(r), not q(r1), since r1 is lopped like L1. */ - t45 = r * A5 + A4; + /* Evaluate expl(endpoint[n2] + r1 + r2) = tbl[n2] * expl(r1 + r2). */ z = r * r; - t23 = r * A3 + A2; - q = r2 + z * t23 + z * z * t45 + z * z * z * A6; + q = r2 + z * (A2 + r * A3) + z * z * (A4 + r * A5) + z * z * z * A6; t = (long double)tbl[n2].lo + tbl[n2].hi; t = tbl[n2].lo + t * (q + r1) + tbl[n2].hi; |