diff options
author | bde <bde@FreeBSD.org> | 2008-02-07 09:42:19 +0000 |
---|---|---|
committer | bde <bde@FreeBSD.org> | 2008-02-07 09:42:19 +0000 |
commit | efcf10f47baa709a11c4c4c07dfe44573aa4f0f6 (patch) | |
tree | e7cbad96e8eeab1df9446a3f2362588bb87c2648 /lib/msun/src/s_expm1.c | |
parent | e5687b20d7480538cf75128424f78168b351077b (diff) | |
download | FreeBSD-src-efcf10f47baa709a11c4c4c07dfe44573aa4f0f6.zip FreeBSD-src-efcf10f47baa709a11c4c4c07dfe44573aa4f0f6.tar.gz |
Use a better method of scaling by 2**k. Instead of adding to the
exponent bits of the reduced result, construct 2**k (hopefully in
parallel with the construction of the reduced result) and multiply by
it. This tends to be much faster if the construction of 2**k is
actually in parallel, and might be faster even with no parallelism
since adjustment of the exponent requires a read-modify-wrtite at an
unfortunate time for pipelines.
In some cases involving exp2* on amd64 (A64), this change saves about
40 cycles or 30%. I think it is inherently only about 12 cycles faster
in these cases and the rest of the speedup is from partly-accidentally
avoiding compiler pessimizations (the construction of 2**k is now
manually scheduled for good results, and -O2 doesn't always mess this
up). In most cases on amd64 (A64) and i386 (A64) the speedup is about
20 cycles. The worst case that I found is expf on ia64 where this
change is a pessimization of about 10 cycles or 5%. The manual
scheduling for plain exp[f] is harder and not as tuned.
Details specific to expm1*:
- the saving is closer to 12 cycles than to 40 for expm1* on i386 (A64).
For some reason it is much larger for negative args.
- also convert to __FBSDID().
Diffstat (limited to 'lib/msun/src/s_expm1.c')
-rw-r--r-- | lib/msun/src/s_expm1.c | 21 |
1 files changed, 8 insertions, 13 deletions
diff --git a/lib/msun/src/s_expm1.c b/lib/msun/src/s_expm1.c index 1bf4aa6..03d25b1 100644 --- a/lib/msun/src/s_expm1.c +++ b/lib/msun/src/s_expm1.c @@ -10,9 +10,8 @@ * ==================================================== */ -#ifndef lint -static char rcsid[] = "$FreeBSD$"; -#endif +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); /* expm1(x) * Returns exp(x)-1, the exponential of x minus 1. @@ -130,7 +129,7 @@ Q5 = -2.01099218183624371326e-07; /* BE8AFDB7 6E09C32D */ double expm1(double x) { - double y,hi,lo,c,t,e,hxs,hfx,r1; + double y,hi,lo,c,t,e,hxs,hfx,r1,twopk; int32_t k,xsb; u_int32_t hx; @@ -187,6 +186,7 @@ expm1(double x) e = hxs*((r1-t)/(6.0 - x*t)); if(k==0) return x - (x*e-hxs); /* c is 0 */ else { + INSERT_WORDS(twopk,0x3ff00000+(k<<20),0); /* 2^k */ e = (x*(e-c)-c); e -= hxs; if(k== -1) return 0.5*(x-e)-0.5; @@ -194,26 +194,21 @@ expm1(double x) if(x < -0.25) return -2.0*(e-(x+0.5)); else return one+2.0*(x-e); if (k <= -2 || k>56) { /* suffice to return exp(x)-1 */ - u_int32_t high; y = one-(e-x); - GET_HIGH_WORD(high,y); - SET_HIGH_WORD(y,high+(k<<20)); /* add k to y's exponent */ + if (k == 1024) y = y*2.0*0x1p1023; + else y = y*twopk; return y-one; } t = one; if(k<20) { - u_int32_t high; SET_HIGH_WORD(t,0x3ff00000 - (0x200000>>k)); /* t=1-2^-k */ y = t-(e-x); - GET_HIGH_WORD(high,y); - SET_HIGH_WORD(y,high+(k<<20)); /* add k to y's exponent */ + y = y*twopk; } else { - u_int32_t high; SET_HIGH_WORD(t,((0x3ff-k)<<20)); /* 2^-k */ y = x-(e+t); y += one; - GET_HIGH_WORD(high,y); - SET_HIGH_WORD(y,high+(k<<20)); /* add k to y's exponent */ + y = y*twopk; } } return y; |