Change __ieee754_rem_pio2f() to return double instead of float so that

this function and its callers cosf(), sinf() and tanf() don't waste time converting values from doubles to floats and back for |x| > 9pi/4. All these functions were optimized a few years ago to mostly use doubles internally and across the __kernel*() interfaces but not across the __ieee754_rem_pio2f() interface. This saves about 40 cycles in cosf(), sinf() and tanf() for |x| > 9pi/4 on amd64 (A64), and about 20 cycles on i386 (A64) (except for cosf() and sinf() in the upper range). 40 cycles is about 35% for |x| < 9pi/4 <= 2**19pi/2 and about 5% for |x| > 2**19pi/2. The saving is much larger on amd64 than on i386 since the conversions are not easy to optimize except on i386 where some of them are automatic and others are optimized invalidly. amd64 is still about 10% slower in cosf() and tanf() in the lower range due to conversion overhead. This also gives a tiny speedup for |x| <= 9pi/4 on amd64 (by simplifying the code). It also avoids compiler bugs and/or additional slowness in the conversions on (not yet supported) machines where double_t != double.
author: bde <bde@FreeBSD.org> 2008-02-25 13:33:20 +0000
committer: bde <bde@FreeBSD.org> 2008-02-25 13:33:20 +0000
commit: 83268c5f08f2704f42d759a606f22e68c5c972ae (patch)
tree: 1e217d6cd9b666a2937b2671e5facc73681f694b /lib/msun
parent: bcb6adff032f0c60cc6f679a7c23e322e4b28ef6 (diff)
download: FreeBSD-src-83268c5f08f2704f42d759a606f22e68c5c972ae.zip
FreeBSD-src-83268c5f08f2704f42d759a606f22e68c5c972ae.tar.gz
5 files changed, 24 insertions, 27 deletions
diff --git a/lib/msun/src/e_rem_pio2f.c b/lib/msun/src/e_rem_pio2f.c
index 2cc4669..4c6ffc8 100644
--- a/lib/msun/src/e_rem_pio2f.c
+++ b/lib/msun/src/e_rem_pio2f.c
@@ -19,8 +19,8 @@ __FBSDID("$FreeBSD$");
 
 /* __ieee754_rem_pio2f(x,y)
  *
- * return the remainder of x rem pi/2 in y[0]+y[1]
- * use double precision internally
+ * return the remainder of x rem pi/2 in *y
+ * use double precision for everything except passing x
  * use __kernel_rem_pio2() for large x
  */
 
@@ -42,10 +42,10 @@ pio2_1  =  1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */
 pio2_1t =  6.07710050650619224932e-11; /* 0x3DD0B461, 0x1A626331 */
 
 int
-__ieee754_rem_pio2f(float x, float *y)
+__ieee754_rem_pio2f(float x, double *y)
 {
 	double w,r,fn;
-	double tx[1],ty[1];
+	double tx[1];
 	float z;
 	int32_t e0,n,ix,hx;
 
@@ -63,23 +63,20 @@ __ieee754_rem_pio2f(float x, float *y)
 #endif
 	    r  = x-fn*pio2_1;
 	    w  = fn*pio2_1t;
-	    y[0] = r-w;
-	    y[1] = (r-y[0])-w;
+	    *y = r-w;
 	    return n;
 	}
     /*
      * all other (large) arguments
      */
 	if(ix>=0x7f800000) {		/* x is inf or NaN */
-	    y[0]=y[1]=x-x; return 0;
+	    *y=x-x; return 0;
 	}
     /* set z = scalbn(|x|,ilogb(|x|)-23) */
 	e0 = (ix>>23)-150;		/* e0 = ilogb(|x|)-23; */
 	SET_FLOAT_WORD(z, ix - ((int32_t)(e0<<23)));
 	tx[0] = z;
-	n  =  __kernel_rem_pio2(tx,ty,e0,1,0);
-	y[0] = ty[0];
-	y[1] = ty[0] - y[0];
-	if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;}
+	n  =  __kernel_rem_pio2(tx,y,e0,1,0);
+	if(hx<0) {*y = -*y; return -n;}
 	return n;
 }
diff --git a/lib/msun/src/math_private.h b/lib/msun/src/math_private.h
index 27c14bd..1d3b7a1 100644
--- a/lib/msun/src/math_private.h
+++ b/lib/msun/src/math_private.h
@@ -320,7 +320,7 @@ double	__kernel_cos(double,double);
 double	__kernel_tan(double,double,int);
 
 /* float precision kernel functions */
-int	__ieee754_rem_pio2f(float,float*);
+int	__ieee754_rem_pio2f(float,double*);
 float	__kernel_sindf(double);
 float	__kernel_cosdf(double);
 float	__kernel_tandf(double,int);
diff --git a/lib/msun/src/s_cosf.c b/lib/msun/src/s_cosf.c
index 2d65ee9..3a3b891 100644
--- a/lib/msun/src/s_cosf.c
+++ b/lib/msun/src/s_cosf.c
@@ -34,7 +34,7 @@ c4pio2 = 4*M_PI_2;			/* 0x401921FB, 0x54442D18 */
 float
 cosf(float x)
 {
-	float y[2];
+	double y;
 	int32_t n, hx, ix;
 
 	GET_FLOAT_WORD(hx,x);
@@ -71,13 +71,13 @@ cosf(float x)
 
     /* general argument reduction needed */
 	else {
-	    n = __ieee754_rem_pio2f(x,y);
+	    n = __ieee754_rem_pio2f(x,&y);
 	    switch(n&3) {
-		case 0: return  __kernel_cosdf((double)y[0]+y[1]);
-		case 1: return  __kernel_sindf(-(double)y[0]-y[1]);
-		case 2: return -__kernel_cosdf((double)y[0]+y[1]);
+		case 0: return  __kernel_cosdf(y);
+		case 1: return  __kernel_sindf(-y);
+		case 2: return -__kernel_cosdf(y);
 		default:
-		        return  __kernel_sindf((double)y[0]+y[1]);
+		        return  __kernel_sindf(y);
 	    }
 	}
 }
diff --git a/lib/msun/src/s_sinf.c b/lib/msun/src/s_sinf.c
index 5da246e..69fb384 100644
--- a/lib/msun/src/s_sinf.c
+++ b/lib/msun/src/s_sinf.c
@@ -34,7 +34,7 @@ s4pio2 = 4*M_PI_2;			/* 0x401921FB, 0x54442D18 */
 float
 sinf(float x)
 {
-	float y[2];
+	double y;
 	int32_t n, hx, ix;
 
 	GET_FLOAT_WORD(hx,x);
@@ -69,13 +69,13 @@ sinf(float x)
 
     /* general argument reduction needed */
 	else {
-	    n = __ieee754_rem_pio2f(x,y);
+	    n = __ieee754_rem_pio2f(x,&y);
 	    switch(n&3) {
-		case 0: return  __kernel_sindf((double)y[0]+y[1]);
-		case 1: return  __kernel_cosdf((double)y[0]+y[1]);
-		case 2: return  __kernel_sindf(-(double)y[0]-y[1]);
+		case 0: return  __kernel_sindf(y);
+		case 1: return  __kernel_cosdf(y);
+		case 2: return  __kernel_sindf(-y);
 		default:
-			return -__kernel_cosdf((double)y[0]+y[1]);
+			return -__kernel_cosdf(y);
 	    }
 	}
 }
diff --git a/lib/msun/src/s_tanf.c b/lib/msun/src/s_tanf.c
index 24f73a7..3d8ee47 100644
--- a/lib/msun/src/s_tanf.c
+++ b/lib/msun/src/s_tanf.c
@@ -32,7 +32,7 @@ t4pio2 = 4*M_PI_2;			/* 0x401921FB, 0x54442D18 */
 float
 tanf(float x)
 {
-	float y[2];
+	double y;
 	int32_t n, hx, ix;
 
 	GET_FLOAT_WORD(hx,x);
@@ -61,8 +61,8 @@ tanf(float x)
 
     /* general argument reduction needed */
 	else {
-	    n = __ieee754_rem_pio2f(x,y);
+	    n = __ieee754_rem_pio2f(x,&y);
 	    /* integer parameter: 1 -- n even; -1 -- n odd */
-	    return __kernel_tandf((double)y[0]+y[1],1-((n&1)<<1));
+	    return __kernel_tandf(y,1-((n&1)<<1));
 	}
 }
author	bde <bde@FreeBSD.org>	2008-02-25 13:33:20 +0000
committer	bde <bde@FreeBSD.org>	2008-02-25 13:33:20 +0000
commit	83268c5f08f2704f42d759a606f22e68c5c972ae (patch)
tree	1e217d6cd9b666a2937b2671e5facc73681f694b /lib/msun
parent	bcb6adff032f0c60cc6f679a7c23e322e4b28ef6 (diff)
download	FreeBSD-src-83268c5f08f2704f42d759a606f22e68c5c972ae.zip FreeBSD-src-83268c5f08f2704f42d759a606f22e68c5c972ae.tar.gz