diff options
author | bde <bde@FreeBSD.org> | 2008-02-07 03:17:05 +0000 |
---|---|---|
committer | bde <bde@FreeBSD.org> | 2008-02-07 03:17:05 +0000 |
commit | 22e608f1ceb21dc3bb9bf5cda8f34642cc6e623f (patch) | |
tree | 39de11ac1fa020a02f07f29ebaac4e42ac04c4d5 /lib/msun/src | |
parent | 67c8e0948c3ec70e3bdfebe1cf5c79cf617f726b (diff) | |
download | FreeBSD-src-22e608f1ceb21dc3bb9bf5cda8f34642cc6e623f.zip FreeBSD-src-22e608f1ceb21dc3bb9bf5cda8f34642cc6e623f.tar.gz |
Use a better method of scaling by 2**k. Instead of adding to the
exponent bits of the reduced result, construct 2**k (hopefully in
parallel with the construction of the reduced result) and multiply by
it. This tends to be much faster if the construction of 2**k is
actually in parallel, and might be faster even with no parallelism
since adjustment of the exponent requires a read-modify-wrtite at an
unfortunate time for pipelines.
In some cases involving exp2* on amd64 (A64), this change saves about
40 cycles or 30%. I think it is inherently only about 12 cycles faster
in these cases and the rest of the speedup is from partly-accidentally
avoiding compiler pessimizations (the construction of 2**k is now
manually scheduled for good results, and -O2 doesn't always mess this
up). In most cases on amd64 (A64) and i386 (A64) the speedup is about
20 cycles. The worst case that I found is expf on ia64 where this
change is a pessimization of about 10 cycles or 5%. The manual
scheduling for plain exp[f] is harder and not as tuned.
This change ld128/s_exp2l.c has not been tested.
Diffstat (limited to 'lib/msun/src')
-rw-r--r-- | lib/msun/src/e_exp.c | 17 | ||||
-rw-r--r-- | lib/msun/src/e_expf.c | 17 | ||||
-rw-r--r-- | lib/msun/src/s_exp2.c | 18 | ||||
-rw-r--r-- | lib/msun/src/s_exp2f.c | 9 |
4 files changed, 28 insertions, 33 deletions
diff --git a/lib/msun/src/e_exp.c b/lib/msun/src/e_exp.c index 60b8b2a..0809c66 100644 --- a/lib/msun/src/e_exp.c +++ b/lib/msun/src/e_exp.c @@ -102,7 +102,7 @@ P5 = 4.13813679705723846039e-08; /* 0x3E663769, 0x72BEA4D0 */ double __ieee754_exp(double x) /* default IEEE double exp */ { - double y,hi=0.0,lo=0.0,c,t; + double y,hi=0.0,lo=0.0,c,t,twopk; int32_t k=0,xsb; u_int32_t hx; @@ -142,18 +142,17 @@ __ieee754_exp(double x) /* default IEEE double exp */ /* x is now in primary range */ t = x*x; + if(k >= -1021) + INSERT_WORDS(twopk,0x3ff00000+(k<<20), 0); + else + INSERT_WORDS(twopk,0x3ff00000+((k+1000)<<20), 0); c = x - t*(P1+t*(P2+t*(P3+t*(P4+t*P5)))); if(k==0) return one-((x*c)/(c-2.0)-x); else y = one-((lo-(x*c)/(2.0-c))-hi); if(k >= -1021) { - u_int32_t hy; - GET_HIGH_WORD(hy,y); - SET_HIGH_WORD(y,hy+(k<<20)); /* add k to y's exponent */ - return y; + if (k==1024) return y*2.0*0x1p1023; + return y*twopk; } else { - u_int32_t hy; - GET_HIGH_WORD(hy,y); - SET_HIGH_WORD(y,hy+((k+1000)<<20)); /* add k to y's exponent */ - return y*twom1000; + return y*twopk*twom1000; } } diff --git a/lib/msun/src/e_expf.c b/lib/msun/src/e_expf.c index 949fa54..45eda01 100644 --- a/lib/msun/src/e_expf.c +++ b/lib/msun/src/e_expf.c @@ -43,7 +43,7 @@ static volatile float twom100 = 7.8886090522e-31; /* 2**-100=0x0d800000 */ float __ieee754_expf(float x) /* default IEEE double exp */ { - float y,hi=0.0,lo=0.0,c,t; + float y,hi=0.0,lo=0.0,c,t,twopk; int32_t k=0,xsb; u_int32_t hx; @@ -80,18 +80,17 @@ __ieee754_expf(float x) /* default IEEE double exp */ /* x is now in primary range */ t = x*x; + if(k >= -125) + SET_FLOAT_WORD(twopk,0x3f800000+(k<<23)); + else + SET_FLOAT_WORD(twopk,0x3f800000+((k+100)<<23)); c = x - t*(P1+t*P2); if(k==0) return one-((x*c)/(c-(float)2.0)-x); else y = one-((lo-(x*c)/((float)2.0-c))-hi); if(k >= -125) { - u_int32_t hy; - GET_FLOAT_WORD(hy,y); - SET_FLOAT_WORD(y,hy+(k<<23)); /* add k to y's exponent */ - return y; + if(k==128) return y*2.0F*0x1p127F; + return y*twopk; } else { - u_int32_t hy; - GET_FLOAT_WORD(hy,y); - SET_FLOAT_WORD(y,hy+((k+100)<<23)); /* add k to y's exponent */ - return y*twom100; + return y*twopk*twom100; } } diff --git a/lib/msun/src/s_exp2.c b/lib/msun/src/s_exp2.c index 56554f1..63b8997 100644 --- a/lib/msun/src/s_exp2.c +++ b/lib/msun/src/s_exp2.c @@ -340,7 +340,7 @@ static const double tbl[TBLSIZE * 2] = { double exp2(double x) { - double r, t, z; + double r, t, twopk, twopkp1000, z; uint32_t hx, hr, ix, lx, i0; int k; @@ -375,19 +375,19 @@ exp2(double x) /* Compute r = exp2(y) = exp2t[i0] * p(z - eps[i]). */ t = tbl[i0]; /* exp2t[i0] */ z -= tbl[i0 + 1]; /* eps[i0] */ + if (k >= -1021 << 20) + INSERT_WORDS(twopk, 0x3ff00000 + k, 0); + else + INSERT_WORDS(twopkp1000, 0x3ff00000 + k + (1000 << 20), 0); r = t + t * z * (P1 + z * (P2 + z * (P3 + z * (P4 + z * P5)))); /* Scale by 2**(k>>20). */ if(k >= -1021 << 20) { - if (k != 0) { - GET_HIGH_WORD(hr, r); - SET_HIGH_WORD(r, hr + k); - } - return (r); + if (k == 1024 << 20) + return (r * 2.0 * 0x1p1023); + return (r * twopk); } else { - GET_HIGH_WORD(hr, r); - SET_HIGH_WORD(r, hr + (k + (1000 << 20))); - return (r * twom1000); + return (r * twopkp1000 * twom1000); } } diff --git a/lib/msun/src/s_exp2f.c b/lib/msun/src/s_exp2f.c index ffe58a2..d0f6b2f 100644 --- a/lib/msun/src/s_exp2f.c +++ b/lib/msun/src/s_exp2f.c @@ -92,7 +92,7 @@ static const double exp2ft[TBLSIZE] = { float exp2f(float x) { - double tv; + double tv, twopk; float t, z; uint32_t hx, htv, ix, i0; int32_t k; @@ -129,9 +129,6 @@ exp2f(float x) tv = tv + tv * (z * (P1 + z * (P2 + z * (P3 + z * P4)))); /* Scale by 2**(k>>20). */ - if (k != 0) { - GET_HIGH_WORD(htv, tv); - SET_HIGH_WORD(tv, htv + k); - } - return (tv); + INSERT_WORDS(twopk, 0x3ff00000 + k, 0); + return (tv * twopk); } |