summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorbde <bde@FreeBSD.org>2005-11-21 00:38:21 +0000
committerbde <bde@FreeBSD.org>2005-11-21 00:38:21 +0000
commitd96648954fe8f26d8bd4bbae9e6d0970f9b1b970 (patch)
treecbf5d596893f2f4a72928a1f91043aadb662ced9 /lib
parent01155bb2352566f4ee43e9add6ff957def2b9787 (diff)
downloadFreeBSD-src-d96648954fe8f26d8bd4bbae9e6d0970f9b1b970.zip
FreeBSD-src-d96648954fe8f26d8bd4bbae9e6d0970f9b1b970.tar.gz
Use double precision to simplify and optimize a long division.
On athlons, this gives a speedup of 10-20% for tanf() on uniformly distributed args in [-2Pi, 2Pi]. (It only directly applies for 43% of the args and gives a 16-20% speedup for these (more for AXP than A64) and this gives an overall speedup of 10-12% which is all that it should; however, it gives an overall speedup of 17-20% with gcc-3.3 on AXP-A64 by mysteriously effected cases where it isn't executed.) I originally intended to use double precision for all internals of float trig functions and will probably still do this, but benchmarking showed that converting to double precision and back is a pessimization in cases where a simple float precision calculation works, so it may be optimal to switch precisions only when using extra precision is much simpler.
Diffstat (limited to 'lib')
-rw-r--r--lib/msun/src/k_tanf.c16
1 files changed, 1 insertions, 15 deletions
diff --git a/lib/msun/src/k_tanf.c b/lib/msun/src/k_tanf.c
index 31b3137..377db78 100644
--- a/lib/msun/src/k_tanf.c
+++ b/lib/msun/src/k_tanf.c
@@ -63,19 +63,5 @@ __kernel_tanf(float x, float y, int iy)
return (float)(1-((hx>>30)&2))*(v-(float)2.0*(x-(w*w/(w+v)-r)));
}
if(iy==1) return w;
- else { /* if allow error up to 2 ulp,
- simply return -1.0/(x+r) here */
- /* compute -1.0/(x+r) accurately */
- float a,t;
- int32_t i;
- z = w;
- GET_FLOAT_WORD(i,z);
- SET_FLOAT_WORD(z,i&0xfffff000);
- v = r-(z - x); /* z+v = r+x */
- t = a = -(float)1.0/w; /* a = -1.0/w */
- GET_FLOAT_WORD(i,t);
- SET_FLOAT_WORD(t,i&0xfffff000);
- s = (float)1.0+t*z;
- return t+a*(s+t*v);
- }
+ else return -1.0/((double)x+r);
}
OpenPOWER on IntegriCloud