Use a better method of scaling by 2**k. Instead of adding to the

exponent bits of the reduced result, construct 2**k (hopefully in parallel with the construction of the reduced result) and multiply by it. This tends to be much faster if the construction of 2**k is actually in parallel, and might be faster even with no parallelism since adjustment of the exponent requires a read-modify-wrtite at an unfortunate time for pipelines. In some cases involving exp2* on amd64 (A64), this change saves about 40 cycles or 30%. I think it is inherently only about 12 cycles faster in these cases and the rest of the speedup is from partly-accidentally avoiding compiler pessimizations (the construction of 2**k is now manually scheduled for good results, and -O2 doesn't always mess this up). In most cases on amd64 (A64) and i386 (A64) the speedup is about 20 cycles. The worst case that I found is expf on ia64 where this change is a pessimization of about 10 cycles or 5%. The manual scheduling for plain exp[f] is harder and not as tuned. This change ld128/s_exp2l.c has not been tested.
author: bde <bde@FreeBSD.org> 2008-02-07 03:17:05 +0000
committer: bde <bde@FreeBSD.org> 2008-02-07 03:17:05 +0000
commit: 22e608f1ceb21dc3bb9bf5cda8f34642cc6e623f (patch)
tree: 39de11ac1fa020a02f07f29ebaac4e42ac04c4d5 /lib/msun/src/e_expf.c
parent: 67c8e0948c3ec70e3bdfebe1cf5c79cf617f726b (diff)
download: FreeBSD-src-22e608f1ceb21dc3bb9bf5cda8f34642cc6e623f.zip
FreeBSD-src-22e608f1ceb21dc3bb9bf5cda8f34642cc6e623f.tar.gz
1 files changed, 8 insertions, 9 deletions
diff --git a/lib/msun/src/e_expf.c b/lib/msun/src/e_expf.c
index 949fa54..45eda01 100644
--- a/lib/msun/src/e_expf.c
+++ b/lib/msun/src/e_expf.c
@@ -43,7 +43,7 @@ static volatile float twom100 = 7.8886090522e-31;      /* 2**-100=0x0d800000 */
 float
 __ieee754_expf(float x)	/* default IEEE double exp */
 {
-	float y,hi=0.0,lo=0.0,c,t;
+	float y,hi=0.0,lo=0.0,c,t,twopk;
 	int32_t k=0,xsb;
 	u_int32_t hx;
 
@@ -80,18 +80,17 @@ __ieee754_expf(float x)	/* default IEEE double exp */
 
     /* x is now in primary range */
 	t  = x*x;
+	if(k >= -125)
+	    SET_FLOAT_WORD(twopk,0x3f800000+(k<<23));
+	else
+	    SET_FLOAT_WORD(twopk,0x3f800000+((k+100)<<23));
 	c  = x - t*(P1+t*P2);
 	if(k==0) 	return one-((x*c)/(c-(float)2.0)-x);
 	else 		y = one-((lo-(x*c)/((float)2.0-c))-hi);
 	if(k >= -125) {
-	    u_int32_t hy;
-	    GET_FLOAT_WORD(hy,y);
-	    SET_FLOAT_WORD(y,hy+(k<<23));	/* add k to y's exponent */
-	    return y;
+	    if(k==128) return y*2.0F*0x1p127F;
+	    return y*twopk;
 	} else {
-	    u_int32_t hy;
-	    GET_FLOAT_WORD(hy,y);
-	    SET_FLOAT_WORD(y,hy+((k+100)<<23));	/* add k to y's exponent */
-	    return y*twom100;
+	    return y*twopk*twom100;
 	}
 }
author	bde <bde@FreeBSD.org>	2008-02-07 03:17:05 +0000
committer	bde <bde@FreeBSD.org>	2008-02-07 03:17:05 +0000
commit	22e608f1ceb21dc3bb9bf5cda8f34642cc6e623f (patch)
tree	39de11ac1fa020a02f07f29ebaac4e42ac04c4d5 /lib/msun/src/e_expf.c
parent	67c8e0948c3ec70e3bdfebe1cf5c79cf617f726b (diff)
download	FreeBSD-src-22e608f1ceb21dc3bb9bf5cda8f34642cc6e623f.zip FreeBSD-src-22e608f1ceb21dc3bb9bf5cda8f34642cc6e623f.tar.gz