summaryrefslogtreecommitdiffstats
path: root/lib/msun/src/e_exp.c
diff options
context:
space:
mode:
authorbde <bde@FreeBSD.org>2008-02-07 03:17:05 +0000
committerbde <bde@FreeBSD.org>2008-02-07 03:17:05 +0000
commit22e608f1ceb21dc3bb9bf5cda8f34642cc6e623f (patch)
tree39de11ac1fa020a02f07f29ebaac4e42ac04c4d5 /lib/msun/src/e_exp.c
parent67c8e0948c3ec70e3bdfebe1cf5c79cf617f726b (diff)
downloadFreeBSD-src-22e608f1ceb21dc3bb9bf5cda8f34642cc6e623f.zip
FreeBSD-src-22e608f1ceb21dc3bb9bf5cda8f34642cc6e623f.tar.gz
Use a better method of scaling by 2**k. Instead of adding to the
exponent bits of the reduced result, construct 2**k (hopefully in parallel with the construction of the reduced result) and multiply by it. This tends to be much faster if the construction of 2**k is actually in parallel, and might be faster even with no parallelism since adjustment of the exponent requires a read-modify-wrtite at an unfortunate time for pipelines. In some cases involving exp2* on amd64 (A64), this change saves about 40 cycles or 30%. I think it is inherently only about 12 cycles faster in these cases and the rest of the speedup is from partly-accidentally avoiding compiler pessimizations (the construction of 2**k is now manually scheduled for good results, and -O2 doesn't always mess this up). In most cases on amd64 (A64) and i386 (A64) the speedup is about 20 cycles. The worst case that I found is expf on ia64 where this change is a pessimization of about 10 cycles or 5%. The manual scheduling for plain exp[f] is harder and not as tuned. This change ld128/s_exp2l.c has not been tested.
Diffstat (limited to 'lib/msun/src/e_exp.c')
-rw-r--r--lib/msun/src/e_exp.c17
1 files changed, 8 insertions, 9 deletions
diff --git a/lib/msun/src/e_exp.c b/lib/msun/src/e_exp.c
index 60b8b2a..0809c66 100644
--- a/lib/msun/src/e_exp.c
+++ b/lib/msun/src/e_exp.c
@@ -102,7 +102,7 @@ P5 = 4.13813679705723846039e-08; /* 0x3E663769, 0x72BEA4D0 */
double
__ieee754_exp(double x) /* default IEEE double exp */
{
- double y,hi=0.0,lo=0.0,c,t;
+ double y,hi=0.0,lo=0.0,c,t,twopk;
int32_t k=0,xsb;
u_int32_t hx;
@@ -142,18 +142,17 @@ __ieee754_exp(double x) /* default IEEE double exp */
/* x is now in primary range */
t = x*x;
+ if(k >= -1021)
+ INSERT_WORDS(twopk,0x3ff00000+(k<<20), 0);
+ else
+ INSERT_WORDS(twopk,0x3ff00000+((k+1000)<<20), 0);
c = x - t*(P1+t*(P2+t*(P3+t*(P4+t*P5))));
if(k==0) return one-((x*c)/(c-2.0)-x);
else y = one-((lo-(x*c)/(2.0-c))-hi);
if(k >= -1021) {
- u_int32_t hy;
- GET_HIGH_WORD(hy,y);
- SET_HIGH_WORD(y,hy+(k<<20)); /* add k to y's exponent */
- return y;
+ if (k==1024) return y*2.0*0x1p1023;
+ return y*twopk;
} else {
- u_int32_t hy;
- GET_HIGH_WORD(hy,y);
- SET_HIGH_WORD(y,hy+((k+1000)<<20)); /* add k to y's exponent */
- return y*twom1000;
+ return y*twopk*twom1000;
}
}
OpenPOWER on IntegriCloud