summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authordas <das@FreeBSD.org>2005-02-21 17:44:57 +0000
committerdas <das@FreeBSD.org>2005-02-21 17:44:57 +0000
commitc08295146276c267807ebb20abbe29853253c03c (patch)
treeb60f2aab5076ffba4c69f1a06cbbd8e47653dbcc /lib
parent3f3a242171fb1866a10ff497dfeb3e91fb6afffe (diff)
downloadFreeBSD-src-c08295146276c267807ebb20abbe29853253c03c.zip
FreeBSD-src-c08295146276c267807ebb20abbe29853253c03c.tar.gz
Use double arithmetic instead of simulating it with two floats. This
results in a performance gain on the order of 10% for amd64 (sledge), ia64 (pluto1), i386+SSE (Pentium 4), and sparc64 (panther), and a negligible improvement for i386 without SSE. (The i386 port still uses the hardware instruction, though.)
Diffstat (limited to 'lib')
-rw-r--r--lib/msun/src/e_expf.c21
1 files changed, 8 insertions, 13 deletions
diff --git a/lib/msun/src/e_expf.c b/lib/msun/src/e_expf.c
index 5858d2c..d4b5ad9 100644
--- a/lib/msun/src/e_expf.c
+++ b/lib/msun/src/e_expf.c
@@ -27,27 +27,24 @@ huge = 1.0e+30,
twom100 = 7.8886090522e-31, /* 2**-100=0x0d800000 */
o_threshold= 8.8721679688e+01, /* 0x42b17180 */
u_threshold= -1.0397208405e+02, /* 0xc2cff1b5 */
-ln2HI[2] ={ 6.9313812256e-01, /* 0x3f317180 */
- -6.9313812256e-01,}, /* 0xbf317180 */
-ln2LO[2] ={ 9.0580006145e-06, /* 0x3717f7d1 */
- -9.0580006145e-06,}, /* 0xb717f7d1 */
invln2 = 1.4426950216e+00, /* 0x3fb8aa3b */
P1 = 1.6666667163e-01, /* 0x3e2aaaab */
P2 = -2.7777778450e-03, /* 0xbb360b61 */
P3 = 6.6137559770e-05, /* 0x388ab355 */
P4 = -1.6533901999e-06, /* 0xb5ddea0e */
P5 = 4.1381369442e-08; /* 0x3331bb4c */
+double ln2[2] = { 6.93147180369123816490e-01, -6.93147180369123816490e-01 };
float
-__ieee754_expf(float x) /* default IEEE double exp */
+__ieee754_expf(float x) /* IEEE float exp */
{
- float y,hi=0.0,lo=0.0,c,t;
+ float y,c,t;
int32_t k=0,xsb;
u_int32_t hx;
GET_FLOAT_WORD(hx,x);
xsb = (hx>>31)&1; /* sign bit of x */
- hx &= 0x7fffffff; /* high word of |x| */
+ hx &= 0x7fffffff; /* |x| */
/* filter out non-finite argument */
if(hx >= 0x42b17218) { /* if |x|>=88.721... */
@@ -62,14 +59,12 @@ __ieee754_expf(float x) /* default IEEE double exp */
/* argument reduction */
if(hx > 0x3eb17218) { /* if |x| > 0.5 ln2 */
if(hx < 0x3F851592) { /* and |x| < 1.5 ln2 */
- hi = x-ln2HI[xsb]; lo=ln2LO[xsb]; k = 1-xsb-xsb;
+ x = x-ln2[xsb]; k = 1-xsb-xsb;
} else {
k = invln2*x+halF[xsb];
t = k;
- hi = x - t*ln2HI[0]; /* t*ln2HI is exact here */
- lo = t*ln2LO[0];
+ x = x - t*ln2[0];
}
- x = hi - lo;
}
else if(hx < 0x31800000) { /* when |x|<2**-28 */
if(huge+x>one) return one+x;/* trigger inexact */
@@ -79,8 +74,8 @@ __ieee754_expf(float x) /* default IEEE double exp */
/* x is now in primary range */
t = x*x;
c = x - t*(P1+t*(P2+t*(P3+t*(P4+t*P5))));
- if(k==0) return one-((x*c)/(c-(float)2.0)-x);
- else y = one-((lo-(x*c)/((float)2.0-c))-hi);
+ y = one-(((double)x*c)/(c-2.0)-x);
+ if(k==0) return y;
if(k >= -125) {
u_int32_t hy;
GET_FLOAT_WORD(hy,y);
OpenPOWER on IntegriCloud