More fixes for arg reduction near pi/2 on systems with broken assignment

to floats (mainly i386's). All errors of more than 1 ulp for float precision trig functions were supposed to have been fixed; however, compiling with gcc -O2 uncovered 18250 more such errors for cosf(), with a maximum error of 1.409 ulps. Use essentially the same fix as in rev.1.8 of k_rem_pio2f.c (access a non-volatile variable as a volatile). Here the -O1 case apparently worked because the variable is in a 2-element array and it takes -O2 to mess up such a variable by putting it in a register. The maximum error for cosf() on i386 with gcc -O2 is now 0.5467 (it is still 0.5650 with gcc -O1). This shows that -O2 still causes some extra precision, but the extra precision is now good. Extra precision is harmful mainly for implementing extra precision in software. We want to represent x+y as w+r where both "+" operations are in infinite precision and r is tiny compared with w. There is a standard algorithm for this (Knuth (1981) 4.2.2 Theorem C), and fdlibm uses this routinely, but the algorithm requires w and r to have the same precision as x and y. w is just x+y (calculated in the same finite precision as x and y), and r is a tiny correction term. The i386 gcc bugs tend to give extra precision in w, and then using this extra precision in the calculation of r results in the correction mostly staying in w and being missing from r. There still tends to be no problem if the result is a simple expression involving w and r -- modulo spills, w keeps its extra precision and r remains the right correction for this wrong w. However, here we want to pass w and r to extern functions. Extra precision is not retained in function args, so w gets fixed up, but the change to the tiny r is tinier, so r almost remains as a wrong correction for the right w.
author: bde <bde@FreeBSD.org> 2005-10-25 12:13:37 +0000
committer: bde <bde@FreeBSD.org> 2005-10-25 12:13:37 +0000
commit: d6cdac5f7a822ac91d262ee4f9552a385500c3aa (patch)
tree: 003119f9239784daa7fdfbda289ff531ea2f3162 /lib
parent: 64e3944dbab2ffa6fa8e199e327151bda83382b4 (diff)
download: FreeBSD-src-d6cdac5f7a822ac91d262ee4f9552a385500c3aa.zip
FreeBSD-src-d6cdac5f7a822ac91d262ee4f9552a385500c3aa.tar.gz
1 files changed, 8 insertions, 5 deletions
diff --git a/lib/msun/src/e_rem_pio2f.c b/lib/msun/src/e_rem_pio2f.c
index 5e37eaa..53d1c8e 100644
--- a/lib/msun/src/e_rem_pio2f.c
+++ b/lib/msun/src/e_rem_pio2f.c
@@ -26,6 +26,9 @@ static char rcsid[] = "$FreeBSD$";
 #include "math.h"
 #include "math_private.h"
 
+/* Clip any extra precision in the float variable v. */
+#define	cliptofloat(v)	(*(volatile float *)&(v))
+
 /*
  * Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
  */
@@ -117,22 +120,22 @@ pio2_3t =  6.1232342629e-17; /* 0x248d3132 */
 		z = x - pio2_1;
 		if((ix&0xfffe0000)!=0x3fc80000) { /* 17+24 bit pi OK */
 		    y[0] = z - pio2_1t;
-		    y[1] = (z-y[0])-pio2_1t;
+		    y[1] = (z-cliptofloat(y[0]))-pio2_1t;
 		} else {		/* near pi/2, use 17+17+24 bit pi */
 		    z -= pio2_2;
 		    y[0] = z - pio2_2t;
-		    y[1] = (z-y[0])-pio2_2t;
+		    y[1] = (z-cliptofloat(y[0]))-pio2_2t;
 		}
 		return 1;
 	    } else {	/* negative x */
 		z = x + pio2_1;
 		if((ix&0xfffe0000)!=0x3fc80000) { /* 17+24 bit pi OK */
 		    y[0] = z + pio2_1t;
-		    y[1] = (z-y[0])+pio2_1t;
+		    y[1] = (z-cliptofloat(y[0]))+pio2_1t;
 		} else {		/* near pi/2, use 17+17+24 bit pi */
 		    z += pio2_2;
 		    y[0] = z + pio2_2t;
-		    y[1] = (z-y[0])+pio2_2t;
+		    y[1] = (z-cliptofloat(y[0]))+pio2_2t;
 		}
 		return -1;
 	    }
@@ -168,7 +171,7 @@ pio2_3t =  6.1232342629e-17; /* 0x248d3132 */
 		    }
 		}
 	    }
-	    y[1] = (r-y[0])-w;
+	    y[1] = (r-cliptofloat(y[0]))-w;
 	    if(hx<0) 	{y[0] = -y[0]; y[1] = -y[1]; return -n;}
 	    else	 return n;
 	}
author	bde <bde@FreeBSD.org>	2005-10-25 12:13:37 +0000
committer	bde <bde@FreeBSD.org>	2005-10-25 12:13:37 +0000
commit	d6cdac5f7a822ac91d262ee4f9552a385500c3aa (patch)
tree	003119f9239784daa7fdfbda289ff531ea2f3162 /lib
parent	64e3944dbab2ffa6fa8e199e327151bda83382b4 (diff)
download	FreeBSD-src-d6cdac5f7a822ac91d262ee4f9552a385500c3aa.zip FreeBSD-src-d6cdac5f7a822ac91d262ee4f9552a385500c3aa.tar.gz