Rearrange the polynomial evaluation for better parallelism. This is

faster on all machines tested (old Celeron (P2), A64 (amd64 and i386) and ia64) except on ia64 when compiled with -O1. It takes 2 more multiplications, so it will be slower on old machines. The speedup is about 8 cycles = 17% on A64 (amd64 and i386) with best CFLAGS and some parallelism in the caller. Move the evaluation of 2**k up a bit so that it doesn't compete too much with the new polynomial evaluation. Unlike the previous optimization, this rearrangement cannot change the result, so compilers and CPU schedulers can do it, but they don't do it quite right yet. This saves a whole 1 or 2 cycles on A64.
author: bde <bde@FreeBSD.org> 2008-02-13 08:36:13 +0000
committer: bde <bde@FreeBSD.org> 2008-02-13 08:36:13 +0000
commit: d2c1b707cd2f43b3c1dbb01e4ea59ee88399d193 (patch)
tree: 68dbb61c7c97877682fe4bdb87a8aa7e5a2c266d /lib/msun
parent: d7ab8bd66aad2a1deedeb949e903569143734cf4 (diff)
download: FreeBSD-src-d2c1b707cd2f43b3c1dbb01e4ea59ee88399d193.zip
FreeBSD-src-d2c1b707cd2f43b3c1dbb01e4ea59ee88399d193.tar.gz
1 files changed, 4 insertions, 3 deletions
diff --git a/lib/msun/src/s_exp2f.c b/lib/msun/src/s_exp2f.c
index 8ab2962..1b9299f 100644
--- a/lib/msun/src/s_exp2f.c
+++ b/lib/msun/src/s_exp2f.c
@@ -93,7 +93,7 @@ static const double exp2ft[TBLSIZE] = {
 float
 exp2f(float x)
 {
-	double tv, twopk, z;
+	double tv, twopk, u, z;
 	float t;
 	uint32_t hx, htv, ix, i0;
 	int32_t k;
@@ -124,12 +124,13 @@ exp2f(float x)
 	i0 &= TBLSIZE - 1;
 	t -= redux;
 	z = x - t;
+	INSERT_WORDS(twopk, 0x3ff00000 + k, 0);
 
 	/* Compute r = exp2(y) = exp2ft[i0] * p(z). */
 	tv = exp2ft[i0];
-	tv = tv + tv * (z * (P1 + z * (P2 + z * (P3 + z * P4))));
+	u = tv * z;
+	tv = tv + u * (P1 + z * P2) + u * (z * z) * (P3 + z * P4);
 
 	/* Scale by 2**(k>>20). */
-	INSERT_WORDS(twopk, 0x3ff00000 + k, 0);
 	return (tv * twopk);
 }
author	bde <bde@FreeBSD.org>	2008-02-13 08:36:13 +0000
committer	bde <bde@FreeBSD.org>	2008-02-13 08:36:13 +0000
commit	d2c1b707cd2f43b3c1dbb01e4ea59ee88399d193 (patch)
tree	68dbb61c7c97877682fe4bdb87a8aa7e5a2c266d /lib/msun
parent	d7ab8bd66aad2a1deedeb949e903569143734cf4 (diff)
download	FreeBSD-src-d2c1b707cd2f43b3c1dbb01e4ea59ee88399d193.zip FreeBSD-src-d2c1b707cd2f43b3c1dbb01e4ea59ee88399d193.tar.gz