Optimize the 9pi/2 < |x| <= 2**19pi/2 case some more by avoiding an

fabs(), a conditional branch, and sign adjustments of 3 variables for x < 0 when the branch is taken. In double precision, even when the branch is perfectly predicted, this saves about 10 cycles or 10% on amd64 (A64) and i386 (A64) for the negative half of the range, but makes little difference for the positive half of the range. In float precision, it also saves about 4 cycles for the positive half of the range on i386, and many more cycles in both halves on amd64 (28 in the negative half and 11 in the positive half for tanf), but the amd64 times for float precision are anomalously slow so the larger improvement is only a side effect. Previous commits arranged for the x < 0 case to be handled simply: - one part of the rounding method uses the magic number 0x1.8p52 instead of the usual 0x1.0p52. The latter is required for large |x|, but it doesn't work for negative x and we don't need it for large |x|. - another part of the rounding method no longer needs to add `half'. It would have needed to add -half for negative x. - removing the "quick check no cancellation" in the double precision case removed the need to take the absolute value of the quadrant number. Add my noncopyright in e_rem_pio2.c
author: bde <bde@FreeBSD.org> 2008-02-23 12:53:21 +0000
committer: bde <bde@FreeBSD.org> 2008-02-23 12:53:21 +0000
commit: 09a79b45a186e8dfdd44fda85c5281ebdf34ed52 (patch)
tree: 3de825adb5d2230242b14f2ee4a4c23f11b61632 /lib/msun/src
parent: ba175a7cae25d622b1c7b5a8c6dfb4cbe3274eb6 (diff)
download: FreeBSD-src-09a79b45a186e8dfdd44fda85c5281ebdf34ed52.zip
FreeBSD-src-09a79b45a186e8dfdd44fda85c5281ebdf34ed52.tar.gz
2 files changed, 8 insertions, 11 deletions
diff --git a/lib/msun/src/e_rem_pio2.c b/lib/msun/src/e_rem_pio2.c
index fc95125..96fc5c4 100644
--- a/lib/msun/src/e_rem_pio2.c
+++ b/lib/msun/src/e_rem_pio2.c
@@ -10,6 +10,7 @@
  * is preserved.
  * ====================================================
  *
+ * Optimized by Bruce D. Evans.
  */
 
 #include <sys/cdefs.h>
@@ -127,16 +128,15 @@ __ieee754_rem_pio2(double x, double *y)
 	}
 	if(ix<=0x413921fb) { /* |x| ~<= 2^19*(pi/2), medium size */
 medium:
-	    t  = fabs(x);
 	    /* Use a specialized rint() to get fn.  Assume round-to-nearest. */
-	    STRICT_ASSIGN(double,fn,t*invpio2+0x1.8p52);
+	    STRICT_ASSIGN(double,fn,x*invpio2+0x1.8p52);
 	    fn = fn-0x1.8p52;
 #ifdef HAVE_EFFICIENT_IRINT
 	    n  = irint(fn);
 #else
 	    n  = (int32_t)fn;
 #endif
-	    r  = t-fn*pio2_1;
+	    r  = x-fn*pio2_1;
 	    w  = fn*pio2_1t;	/* 1st round good to 85 bit */
 	    {
 	        u_int32_t high;
@@ -162,8 +162,7 @@ medium:
 		}
 	    }
 	    y[1] = (r-y[0])-w;
-	    if(hx<0) 	{y[0] = -y[0]; y[1] = -y[1]; return -n;}
-	    else	 return n;
+	    return n;
 	}
     /* 
      * all other (large) arguments
diff --git a/lib/msun/src/e_rem_pio2f.c b/lib/msun/src/e_rem_pio2f.c
index 2ed10db..60f8256 100644
--- a/lib/msun/src/e_rem_pio2f.c
+++ b/lib/msun/src/e_rem_pio2f.c
@@ -44,7 +44,7 @@ pio2_1t =  6.07710050650619224932e-11; /* 0x3DD0B461, 0x1A626331 */
 int
 __ieee754_rem_pio2f(float x, float *y)
 {
-	double w,t,r,fn;
+	double w,r,fn;
 	double tx[1],ty[2];
 	float z;
 	int32_t e0,n,ix,hx;
@@ -53,21 +53,19 @@ __ieee754_rem_pio2f(float x, float *y)
 	ix = hx&0x7fffffff;
     /* 33+53 bit pi is good enough for medium size */
 	if(ix<=0x49490f80) {		/* |x| ~<= 2^19*(pi/2), medium size */
-	    t  = fabsf(x);
 	    /* Use a specialized rint() to get fn.  Assume round-to-nearest. */
-	    STRICT_ASSIGN(double,fn,t*invpio2+0x1.8p52);
+	    STRICT_ASSIGN(double,fn,x*invpio2+0x1.8p52);
 	    fn = fn-0x1.8p52;
 #ifdef HAVE_EFFICIENT_IRINT
 	    n  = irint(fn);
 #else
 	    n  = (int32_t)fn;
 #endif
-	    r  = t-fn*pio2_1;
+	    r  = x-fn*pio2_1;
 	    w  = fn*pio2_1t;
 	    y[0] = r-w;
 	    y[1] = (r-y[0])-w;
-	    if(hx<0) 	{y[0] = -y[0]; y[1] = -y[1]; return -n;}
-	    else	 return n;
+	    return n;
 	}
     /*
      * all other (large) arguments
author	bde <bde@FreeBSD.org>	2008-02-23 12:53:21 +0000
committer	bde <bde@FreeBSD.org>	2008-02-23 12:53:21 +0000
commit	09a79b45a186e8dfdd44fda85c5281ebdf34ed52 (patch)
tree	3de825adb5d2230242b14f2ee4a4c23f11b61632 /lib/msun/src
parent	ba175a7cae25d622b1c7b5a8c6dfb4cbe3274eb6 (diff)
download	FreeBSD-src-09a79b45a186e8dfdd44fda85c5281ebdf34ed52.zip FreeBSD-src-09a79b45a186e8dfdd44fda85c5281ebdf34ed52.tar.gz