From d96648954fe8f26d8bd4bbae9e6d0970f9b1b970 Mon Sep 17 00:00:00 2001
From: bde <bde@FreeBSD.org>
Date: Mon, 21 Nov 2005 00:38:21 +0000
Subject: Use double precision to simplify and optimize a long division.

On athlons, this gives a speedup of 10-20% for tanf() on uniformly
distributed args in [-2Pi, 2Pi].  (It only directly applies for 43%
of the args and gives a 16-20% speedup for these (more for AXP than
A64) and this gives an overall speedup of 10-12% which is all that it
should; however, it gives an overall speedup of 17-20% with gcc-3.3
on AXP-A64 by mysteriously effected cases where it isn't executed.)

I originally intended to use double precision for all internals of
float trig functions and will probably still do this, but benchmarking
showed that converting to double precision and back is a pessimization
in cases where a simple float precision calculation works, so it may
be optimal to switch precisions only when using extra precision is
much simpler.
---
 lib/msun/src/k_tanf.c | 16 +---------------
 1 file changed, 1 insertion(+), 15 deletions(-)

(limited to 'lib/msun')

diff --git a/lib/msun/src/k_tanf.c b/lib/msun/src/k_tanf.c
index 31b3137..377db78 100644
--- a/lib/msun/src/k_tanf.c
+++ b/lib/msun/src/k_tanf.c
@@ -63,19 +63,5 @@ __kernel_tanf(float x, float y, int iy)
 	    return (float)(1-((hx>>30)&2))*(v-(float)2.0*(x-(w*w/(w+v)-r)));
 	}
 	if(iy==1) return w;
-	else {		/* if allow error up to 2 ulp,
-			   simply return -1.0/(x+r) here */
-     /*  compute -1.0/(x+r) accurately */
-	    float a,t;
-	    int32_t i;
-	    z  = w;
-	    GET_FLOAT_WORD(i,z);
-	    SET_FLOAT_WORD(z,i&0xfffff000);
-	    v  = r-(z - x); 	/* z+v = r+x */
-	    t = a  = -(float)1.0/w;	/* a = -1.0/w */
-	    GET_FLOAT_WORD(i,t);
-	    SET_FLOAT_WORD(t,i&0xfffff000);
-	    s  = (float)1.0+t*z;
-	    return t+a*(s+t*v);
-	}
+	else return -1.0/((double)x+r);
 }
-- 
cgit v1.1