From 76448b86540fdcf11b76be6f9cd2a3a1226178f2 Mon Sep 17 00:00:00 2001
From: bde <bde@FreeBSD.org>
Date: Thu, 5 Jan 2006 09:18:48 +0000
Subject: Oops, on amd64 (and probably on all non-i386 systems), the previous
 commit broke the 2**24 cases where |x| > DBL_MAX/2.  There are exponent range
 problems not just for denormals (underflow) but for large values (overflow). 
 Doubles have more than enough exponent range to avoid the problems, but I
 forgot to convert enough terms to double, so there was an x+x term which was
 sometimes evaluated in float precision.

Unfortunately, this is a pessimization with some combinations of systems
and compilers (it makes no difference on Athlon XP's, but on Athlon64's
it gives a 5% pessimization with gcc-3.4 but not with gcc-3.3).

Exlain the problem better in comments.
---
 lib/msun/src/s_cbrtf.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/lib/msun/src/s_cbrtf.c b/lib/msun/src/s_cbrtf.c
index 3506943..5fcd795 100644
--- a/lib/msun/src/s_cbrtf.c
+++ b/lib/msun/src/s_cbrtf.c
@@ -53,16 +53,21 @@ cbrtf(float x)
 	} else
 	    SET_FLOAT_WORD(t,sign|(hx/3+B1));
 
-    /* first step Newton iteration (solving t*t-x/t == 0) to 16 bits */
-    /* in double precision to avoid problems with denormals */
+    /*
+     * First step Newton iteration (solving t*t-x/t == 0) to 16 bits.  In
+     * double precision so that its terms can be arranged for efficiency
+     * without causing overflow or underflow.
+     */
 	T=t;
 	r=T*T*T;
-	T=T*(x+x+r)/(x+r+r);
+	T=T*((double)x+x+r)/(x+r+r);
 
-    /* second step Newton iteration to 47 bits */
-    /* in double precision for accuracy */
+    /*
+     * Second step Newton iteration to 47 bits.  In double precision for
+     * efficiency and accuracy.
+     */
 	r=T*T*T;
-	T=T*(x+x+r)/(x+r+r);
+	T=T*((double)x+x+r)/(x+r+r);
 
     /* rounding to 24 bits is perfect in round-to-nearest mode */
 	return(T);
-- 
cgit v1.1