summaryrefslogtreecommitdiffstats
path: root/lib/msun/ld80/s_exp2l.c
diff options
context:
space:
mode:
authorbde <bde@FreeBSD.org>2008-02-07 03:17:05 +0000
committerbde <bde@FreeBSD.org>2008-02-07 03:17:05 +0000
commit22e608f1ceb21dc3bb9bf5cda8f34642cc6e623f (patch)
tree39de11ac1fa020a02f07f29ebaac4e42ac04c4d5 /lib/msun/ld80/s_exp2l.c
parent67c8e0948c3ec70e3bdfebe1cf5c79cf617f726b (diff)
downloadFreeBSD-src-22e608f1ceb21dc3bb9bf5cda8f34642cc6e623f.zip
FreeBSD-src-22e608f1ceb21dc3bb9bf5cda8f34642cc6e623f.tar.gz
Use a better method of scaling by 2**k. Instead of adding to the
exponent bits of the reduced result, construct 2**k (hopefully in parallel with the construction of the reduced result) and multiply by it. This tends to be much faster if the construction of 2**k is actually in parallel, and might be faster even with no parallelism since adjustment of the exponent requires a read-modify-wrtite at an unfortunate time for pipelines. In some cases involving exp2* on amd64 (A64), this change saves about 40 cycles or 30%. I think it is inherently only about 12 cycles faster in these cases and the rest of the speedup is from partly-accidentally avoiding compiler pessimizations (the construction of 2**k is now manually scheduled for good results, and -O2 doesn't always mess this up). In most cases on amd64 (A64) and i386 (A64) the speedup is about 20 cycles. The worst case that I found is expf on ia64 where this change is a pessimization of about 10 cycles or 5%. The manual scheduling for plain exp[f] is harder and not as tuned. This change ld128/s_exp2l.c has not been tested.
Diffstat (limited to 'lib/msun/ld80/s_exp2l.c')
-rw-r--r--lib/msun/ld80/s_exp2l.c25
1 files changed, 14 insertions, 11 deletions
diff --git a/lib/msun/ld80/s_exp2l.c b/lib/msun/ld80/s_exp2l.c
index 10c7da7..8a0dbe2 100644
--- a/lib/msun/ld80/s_exp2l.c
+++ b/lib/msun/ld80/s_exp2l.c
@@ -214,8 +214,8 @@ static const double tbl[TBLSIZE * 2] = {
long double
exp2l(long double x)
{
- union IEEEl2bits u;
- long double r, z;
+ union IEEEl2bits u, v;
+ long double r, twopk, twopkp10000, z;
uint32_t hx, ix, i0;
int k;
@@ -267,6 +267,14 @@ exp2l(long double x)
i0 = (i0 & (TBLSIZE - 1)) << 1;
u.e -= redux;
z = x - u.e;
+ v.xbits.man = 1ULL << 63;
+ if (k >= LDBL_MIN_EXP) {
+ v.xbits.expsign = LDBL_MAX_EXP - 1 + k;
+ twopk = v.e;
+ } else {
+ v.xbits.expsign = LDBL_MAX_EXP - 1 + k + 10000;
+ twopkp10000 = v.e;
+ }
/* Compute r = exp2l(y) = exp2lt[i0] * p(z). */
long double t_hi = tbl[i0];
@@ -277,15 +285,10 @@ exp2l(long double x)
/* Scale by 2**k. */
if (k >= LDBL_MIN_EXP) {
- if (k != 0) {
- u.e = r;
- u.xbits.expsign += k;
- return (u.e);
- }
- return (r);
+ if (k == LDBL_MAX_EXP)
+ return (r * 2.0 * 0x1p16383L);
+ return (r * twopk);
} else {
- u.e = r;
- u.xbits.expsign += k + 10000;
- return (u.e * twom10000);
+ return (r * twopkp10000 * twom10000);
}
}
OpenPOWER on IntegriCloud