summaryrefslogtreecommitdiffstats
path: root/lib/msun/src/s_expm1.c
diff options
context:
space:
mode:
authorbde <bde@FreeBSD.org>2008-02-07 09:42:19 +0000
committerbde <bde@FreeBSD.org>2008-02-07 09:42:19 +0000
commitefcf10f47baa709a11c4c4c07dfe44573aa4f0f6 (patch)
treee7cbad96e8eeab1df9446a3f2362588bb87c2648 /lib/msun/src/s_expm1.c
parente5687b20d7480538cf75128424f78168b351077b (diff)
downloadFreeBSD-src-efcf10f47baa709a11c4c4c07dfe44573aa4f0f6.zip
FreeBSD-src-efcf10f47baa709a11c4c4c07dfe44573aa4f0f6.tar.gz
Use a better method of scaling by 2**k. Instead of adding to the
exponent bits of the reduced result, construct 2**k (hopefully in parallel with the construction of the reduced result) and multiply by it. This tends to be much faster if the construction of 2**k is actually in parallel, and might be faster even with no parallelism since adjustment of the exponent requires a read-modify-wrtite at an unfortunate time for pipelines. In some cases involving exp2* on amd64 (A64), this change saves about 40 cycles or 30%. I think it is inherently only about 12 cycles faster in these cases and the rest of the speedup is from partly-accidentally avoiding compiler pessimizations (the construction of 2**k is now manually scheduled for good results, and -O2 doesn't always mess this up). In most cases on amd64 (A64) and i386 (A64) the speedup is about 20 cycles. The worst case that I found is expf on ia64 where this change is a pessimization of about 10 cycles or 5%. The manual scheduling for plain exp[f] is harder and not as tuned. Details specific to expm1*: - the saving is closer to 12 cycles than to 40 for expm1* on i386 (A64). For some reason it is much larger for negative args. - also convert to __FBSDID().
Diffstat (limited to 'lib/msun/src/s_expm1.c')
-rw-r--r--lib/msun/src/s_expm1.c21
1 files changed, 8 insertions, 13 deletions
diff --git a/lib/msun/src/s_expm1.c b/lib/msun/src/s_expm1.c
index 1bf4aa6..03d25b1 100644
--- a/lib/msun/src/s_expm1.c
+++ b/lib/msun/src/s_expm1.c
@@ -10,9 +10,8 @@
* ====================================================
*/
-#ifndef lint
-static char rcsid[] = "$FreeBSD$";
-#endif
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
/* expm1(x)
* Returns exp(x)-1, the exponential of x minus 1.
@@ -130,7 +129,7 @@ Q5 = -2.01099218183624371326e-07; /* BE8AFDB7 6E09C32D */
double
expm1(double x)
{
- double y,hi,lo,c,t,e,hxs,hfx,r1;
+ double y,hi,lo,c,t,e,hxs,hfx,r1,twopk;
int32_t k,xsb;
u_int32_t hx;
@@ -187,6 +186,7 @@ expm1(double x)
e = hxs*((r1-t)/(6.0 - x*t));
if(k==0) return x - (x*e-hxs); /* c is 0 */
else {
+ INSERT_WORDS(twopk,0x3ff00000+(k<<20),0); /* 2^k */
e = (x*(e-c)-c);
e -= hxs;
if(k== -1) return 0.5*(x-e)-0.5;
@@ -194,26 +194,21 @@ expm1(double x)
if(x < -0.25) return -2.0*(e-(x+0.5));
else return one+2.0*(x-e);
if (k <= -2 || k>56) { /* suffice to return exp(x)-1 */
- u_int32_t high;
y = one-(e-x);
- GET_HIGH_WORD(high,y);
- SET_HIGH_WORD(y,high+(k<<20)); /* add k to y's exponent */
+ if (k == 1024) y = y*2.0*0x1p1023;
+ else y = y*twopk;
return y-one;
}
t = one;
if(k<20) {
- u_int32_t high;
SET_HIGH_WORD(t,0x3ff00000 - (0x200000>>k)); /* t=1-2^-k */
y = t-(e-x);
- GET_HIGH_WORD(high,y);
- SET_HIGH_WORD(y,high+(k<<20)); /* add k to y's exponent */
+ y = y*twopk;
} else {
- u_int32_t high;
SET_HIGH_WORD(t,((0x3ff-k)<<20)); /* 2^-k */
y = x-(e+t);
y += one;
- GET_HIGH_WORD(high,y);
- SET_HIGH_WORD(y,high+(k<<20)); /* add k to y's exponent */
+ y = y*twopk;
}
}
return y;
OpenPOWER on IntegriCloud