ld80/s_expl.c:

* Update the evaluation of the polynomial. This allows the removal of the now unused variables t23 and t45. ld128/s_expl.c: * Update the evaluation of the polynomial and the intermediate result t. This update allows several numerical constants to be written as double rather than long double constants. Update the constants as appropriate. Obtained from: bde
author: kargl <kargl@FreeBSD.org> 2013-06-03 18:40:00 +0000
committer: kargl <kargl@FreeBSD.org> 2013-06-03 18:40:00 +0000
commit: 27cbbb200ea78cdbe12b7ede2fb96588541601b6 (patch)
tree: 779db3d3875a7481676a56e07d3d1fc67ff252c5 /lib/msun
parent: 5f8df0bcf91a064c335ba9c37f58c0073b07ede6 (diff)
download: FreeBSD-src-27cbbb200ea78cdbe12b7ede2fb96588541601b6.zip
FreeBSD-src-27cbbb200ea78cdbe12b7ede2fb96588541601b6.tar.gz
2 files changed, 55 insertions, 27 deletions
diff --git a/lib/msun/ld128/s_expl.c b/lib/msun/ld128/s_expl.c
index 9206436..5a9cdd5 100644
--- a/lib/msun/ld128/s_expl.c
+++ b/lib/msun/ld128/s_expl.c
@@ -41,28 +41,48 @@ __FBSDID("$FreeBSD$");
 #define	LOG2_INTERVALS	7
 #define	BIAS	(LDBL_MAX_EXP - 1)
 
+static const long double
+huge = 0x1p10000L,
+twom10000 = 0x1p-10000L,
+/* XXX Prevent gcc from erroneously constant folding this: */
 static volatile const long double tiny = 0x1p-10000L;
 
 static const long double
-INV_L = 1.84664965233787316142070359168242182e+02L,
-L1 = 5.41521234812457272982212595914567508e-03L,
-L2 = -1.02536706388947310094527932552595546e-29L,
-huge = 0x1p10000L,
+/* log(2**16384 - 0.5) rounded towards zero: */
+/* log(2**16384 - 0.5 + 1) rounded towards zero for expm1l() is the same: */
 o_threshold =  11356.523406294143949491931077970763428L,
-twom10000 = 0x1p-10000L,
+/* log(2**(-16381-64-1)) rounded towards zero: */
 u_threshold = -11433.462743336297878837243843452621503L;
 
+static const double
+/*
+ * ln2/INTERVALS = L1+L2 (hi+lo decomposition for multiplication).  L1 must
+ * have at least 22 (= log2(|LDBL_MIN_EXP-extras|) + log2(INTERVALS)) lowest
+ * bits zero so that multiplication of it by n is exact.
+ */
+INV_L = 1.8466496523378731e+2,		/*  0x171547652b82fe.0p-45 */
+L2 = -1.0253670638894731e-29;		/* -0x1.9ff0342542fc3p-97 */
 static const long double
-A2 = 5.00000000000000000000000000000000000e-1L,
-A3 = 1.66666666666666666666666666666666972e-1L,
-A4 = 4.16666666666666666666666666653708268e-2L,
-A5 = 8.33333333333333333333333315069867254e-3L,
-A6 = 1.38888888888888888888996596213795377e-3L,
-A7 = 1.98412698412698412718821436278644414e-4L,
-A8 = 2.48015873015869681884882576649543128e-5L,
-A9 = 2.75573192240103867817876199544468806e-6L,
-A10 = 2.75573236172670046201884000197885520e-7L,
-A11 = 2.50517544183909126492878226167697856e-8L;
+/* 0x1.62e42fefa39ef35793c768000000p-8 */
+L1 =  5.41521234812457272982212595914567508e-3L;
+
+static const long double
+/*
+ * Domain [-0.002708, 0.002708], range ~[-2.4011e-38, 2.4244e-38]:
+ * |exp(x) - p(x)| < 2**-124.9
+ * (0.002708 is ln2/(2*INTERVALS) rounded up a little).
+ */
+A2  =  0.5,
+A3  =  1.66666666666666666666666666651085500e-1L,
+A4  =  4.16666666666666666666666666425885320e-2L,
+A5  =  8.33333333333333333334522877160175842e-3L,
+A6  =  1.38888888888888888889971139751596836e-3L;
+
+static const double
+A7  =  1.9841269841269471e-4,
+A8  =  2.4801587301585284e-5,
+A9  =  2.7557324277411234e-6,
+A10 =  2.7557333722375072e-7;
 
 static const struct {
 	long double	hi;
@@ -202,7 +222,9 @@ long double
 expl(long double x)
 {
 	union IEEEl2bits u, v;
-	long double fn, r, r1, r2, q, t, twopk, twopkp10000;
+	long double q, r, r1, t, twopk, twopkp10000;
+	double dr, fn, r2;
+
 	int k, n, n2;
 	uint32_t hx, ix;
 
@@ -227,8 +249,15 @@ expl(long double x)
 	}
 
 	/* Reduce x to (k*ln2 + endpoint[n2] + r1 + r2). */
-	fn = x * INV_L + 0x1.8p112 - 0x1.8p112;
+	/* Use a specialized rint() to get fn.  Assume round-to-nearest. */
+	/* XXX assume no extra precision for the additions, as for trig fns. */
+	/* XXX this set of comments is now quadruplicated. */
+	fn = (double)x * INV_L + 0x1.8p52 - 0x1.8p52;
+#if defined(HAVE_EFFICIENT_IRINT)
+	n = irint(fn);
+#else
 	n = (int)fn;
+#endif
 	n2 = (unsigned)n % INTERVALS;
 	k = n >> LOG2_INTERVALS;
 	r1 = x - fn * L1;
@@ -245,11 +274,12 @@ expl(long double x)
 		twopkp10000 = v.e;
 	}
 
-	r = r1 + r2;
-	q = r * r * (A2 + r * (A3 + r * (A4 + r * (A5 + r * (A6 + r * (A7 +
-	    r * (A8 + r * (A9 + r * (A10 + r * A11)))))))));
+	/* Evaluate expl(endpoint[n2] + r1 + r2) = tbl[n2] * expl(r1 + r2). */
+	dr = r;
+	q = r2 + r * r * (A2 + r * (A3 + r * (A4 + r * (A5 + r * (A6 +
+	    dr * (A7 + dr * (A8 + dr * (A9 + dr * A10))))))));
 	t = tbl[n2].lo + tbl[n2].hi;
-	t = tbl[n2].hi + (tbl[n2].lo + t * (r2 + q + r1));
+	t = tbl[n2].lo + t * (q + r1) + tbl[n2].hi;
 
 	/* Scale by 2**k. */
 	if (k >= LDBL_MIN_EXP) {
diff --git a/lib/msun/ld80/s_expl.c b/lib/msun/ld80/s_expl.c
index 5b07276..a86a5c8 100644
--- a/lib/msun/ld80/s_expl.c
+++ b/lib/msun/ld80/s_expl.c
@@ -235,7 +235,8 @@ long double
 expl(long double x)
 {
 	union IEEEl2bits u, v;
-	long double fn, q, r, r1, r2, t, t23, t45, twopk, twopkp10000, z;
+	long double fn, q, r, r1, r2, t, twopk, twopkp10000;
+	long double z;
 	int k, n, n2;
 	uint16_t hx, ix;
 
@@ -288,12 +289,9 @@ expl(long double x)
 		twopkp10000 = v.e;
 	}
 
-	/* Evaluate expl(midpoint[n2] + r1 + r2) = tbl[n2] * expl(r1 + r2). */
-	/* Here q = q(r), not q(r1), since r1 is lopped like L1. */
-	t45 = r * A5 + A4;
+	/* Evaluate expl(endpoint[n2] + r1 + r2) = tbl[n2] * expl(r1 + r2). */
 	z = r * r;
-	t23 = r * A3 + A2;
-	q = r2 + z * t23 + z * z * t45 + z * z * z * A6;
+	q = r2 + z * (A2 + r * A3) + z * z * (A4 + r * A5) + z * z * z * A6;
 	t = (long double)tbl[n2].lo + tbl[n2].hi;
 	t = tbl[n2].lo + t * (q + r1) + tbl[n2].hi;
author	kargl <kargl@FreeBSD.org>	2013-06-03 18:40:00 +0000
committer	kargl <kargl@FreeBSD.org>	2013-06-03 18:40:00 +0000
commit	27cbbb200ea78cdbe12b7ede2fb96588541601b6 (patch)
tree	779db3d3875a7481676a56e07d3d1fc67ff252c5 /lib/msun
parent	5f8df0bcf91a064c335ba9c37f58c0073b07ede6 (diff)
download	FreeBSD-src-27cbbb200ea78cdbe12b7ede2fb96588541601b6.zip FreeBSD-src-27cbbb200ea78cdbe12b7ede2fb96588541601b6.tar.gz