diff options
author | das <das@FreeBSD.org> | 2005-02-21 17:44:57 +0000 |
---|---|---|
committer | das <das@FreeBSD.org> | 2005-02-21 17:44:57 +0000 |
commit | c08295146276c267807ebb20abbe29853253c03c (patch) | |
tree | b60f2aab5076ffba4c69f1a06cbbd8e47653dbcc /lib | |
parent | 3f3a242171fb1866a10ff497dfeb3e91fb6afffe (diff) | |
download | FreeBSD-src-c08295146276c267807ebb20abbe29853253c03c.zip FreeBSD-src-c08295146276c267807ebb20abbe29853253c03c.tar.gz |
Use double arithmetic instead of simulating it with two floats. This
results in a performance gain on the order of 10% for amd64 (sledge),
ia64 (pluto1), i386+SSE (Pentium 4), and sparc64 (panther), and a
negligible improvement for i386 without SSE. (The i386 port still
uses the hardware instruction, though.)
Diffstat (limited to 'lib')
-rw-r--r-- | lib/msun/src/e_expf.c | 21 |
1 files changed, 8 insertions, 13 deletions
diff --git a/lib/msun/src/e_expf.c b/lib/msun/src/e_expf.c index 5858d2c..d4b5ad9 100644 --- a/lib/msun/src/e_expf.c +++ b/lib/msun/src/e_expf.c @@ -27,27 +27,24 @@ huge = 1.0e+30, twom100 = 7.8886090522e-31, /* 2**-100=0x0d800000 */ o_threshold= 8.8721679688e+01, /* 0x42b17180 */ u_threshold= -1.0397208405e+02, /* 0xc2cff1b5 */ -ln2HI[2] ={ 6.9313812256e-01, /* 0x3f317180 */ - -6.9313812256e-01,}, /* 0xbf317180 */ -ln2LO[2] ={ 9.0580006145e-06, /* 0x3717f7d1 */ - -9.0580006145e-06,}, /* 0xb717f7d1 */ invln2 = 1.4426950216e+00, /* 0x3fb8aa3b */ P1 = 1.6666667163e-01, /* 0x3e2aaaab */ P2 = -2.7777778450e-03, /* 0xbb360b61 */ P3 = 6.6137559770e-05, /* 0x388ab355 */ P4 = -1.6533901999e-06, /* 0xb5ddea0e */ P5 = 4.1381369442e-08; /* 0x3331bb4c */ +double ln2[2] = { 6.93147180369123816490e-01, -6.93147180369123816490e-01 }; float -__ieee754_expf(float x) /* default IEEE double exp */ +__ieee754_expf(float x) /* IEEE float exp */ { - float y,hi=0.0,lo=0.0,c,t; + float y,c,t; int32_t k=0,xsb; u_int32_t hx; GET_FLOAT_WORD(hx,x); xsb = (hx>>31)&1; /* sign bit of x */ - hx &= 0x7fffffff; /* high word of |x| */ + hx &= 0x7fffffff; /* |x| */ /* filter out non-finite argument */ if(hx >= 0x42b17218) { /* if |x|>=88.721... */ @@ -62,14 +59,12 @@ __ieee754_expf(float x) /* default IEEE double exp */ /* argument reduction */ if(hx > 0x3eb17218) { /* if |x| > 0.5 ln2 */ if(hx < 0x3F851592) { /* and |x| < 1.5 ln2 */ - hi = x-ln2HI[xsb]; lo=ln2LO[xsb]; k = 1-xsb-xsb; + x = x-ln2[xsb]; k = 1-xsb-xsb; } else { k = invln2*x+halF[xsb]; t = k; - hi = x - t*ln2HI[0]; /* t*ln2HI is exact here */ - lo = t*ln2LO[0]; + x = x - t*ln2[0]; } - x = hi - lo; } else if(hx < 0x31800000) { /* when |x|<2**-28 */ if(huge+x>one) return one+x;/* trigger inexact */ @@ -79,8 +74,8 @@ __ieee754_expf(float x) /* default IEEE double exp */ /* x is now in primary range */ t = x*x; c = x - t*(P1+t*(P2+t*(P3+t*(P4+t*P5)))); - if(k==0) return one-((x*c)/(c-(float)2.0)-x); - else y = one-((lo-(x*c)/((float)2.0-c))-hi); + y = one-(((double)x*c)/(c-2.0)-x); + if(k==0) return y; if(k >= -125) { u_int32_t hy; GET_FLOAT_WORD(hy,y); |