summaryrefslogtreecommitdiffstats
path: root/lib/msun
diff options
context:
space:
mode:
authorbde <bde@FreeBSD.org>2008-02-22 17:26:24 +0000
committerbde <bde@FreeBSD.org>2008-02-22 17:26:24 +0000
commite31bf4b688423b9d648923a471ed8a41940b9432 (patch)
treec62d1c389f2b51b213bfde9adda1ec07b195e07a /lib/msun
parent37c23ae5ff6a87332863506dd6e421e2402a8782 (diff)
downloadFreeBSD-src-e31bf4b688423b9d648923a471ed8a41940b9432.zip
FreeBSD-src-e31bf4b688423b9d648923a471ed8a41940b9432.tar.gz
Remove the "quick check no cancellation" optimization for
9pi/2 < |x| < 32pi/2 since it is only a small or negative optimation and it gets in the way of further optimizations. It did one more branch to avoid some integer operations and to use a different dependency on previous results. The branches are fairly predictable so they are usually not a problem, so whether this is a good optimization depends mainly on the timing for the previous results, which is very machine-dependent. On amd64 (A64), this "optimization" is a pessimization of about 1 cycle or 1%; on ia64, it is an optimization of about 2 cycles or 1%; on i386 (A64), it is an optimization of about 5 cycles or 4%; on i386 (Celeron P2) it is an optimization of about 4 cycles or 3% for cos but a pessimization of about 5 cycles for sin and 1 cycle for tan. I think the new i386 (A64) slowness is due to an pipeline stall due to an avoidable load-store mismatch (so the old timing was better), and the i386 (Celeron) variance is due to its branch predictor not being too good.
Diffstat (limited to 'lib/msun')
-rw-r--r--lib/msun/src/e_rem_pio2.c13
1 files changed, 1 insertions, 12 deletions
diff --git a/lib/msun/src/e_rem_pio2.c b/lib/msun/src/e_rem_pio2.c
index 0dc88f4..708b882 100644
--- a/lib/msun/src/e_rem_pio2.c
+++ b/lib/msun/src/e_rem_pio2.c
@@ -26,15 +26,6 @@ __FBSDID("$FreeBSD$");
#include "math.h"
#include "math_private.h"
-static const int32_t npio2_hw[] = {
-0x3FF921FB, 0x400921FB, 0x4012D97C, 0x401921FB, 0x401F6A7A, 0x4022D97C,
-0x4025FDBB, 0x402921FB, 0x402C463A, 0x402F6A7A, 0x4031475C, 0x4032D97C,
-0x40346B9C, 0x4035FDBB, 0x40378FDB, 0x403921FB, 0x403AB41B, 0x403C463A,
-0x403DD85A, 0x403F6A7A, 0x40407E4C, 0x4041475C, 0x4042106C, 0x4042D97C,
-0x4043A28C, 0x40446B9C, 0x404534AC, 0x4045FDBB, 0x4046C6CB, 0x40478FDB,
-0x404858EB, 0x404921FB,
-};
-
/*
* invpio2: 53 bits of 2/pi
* pio2_1: first 33 bit of pi/2
@@ -148,9 +139,7 @@ medium:
#endif
r = t-fn*pio2_1;
w = fn*pio2_1t; /* 1st round good to 85 bit */
- if(n<32&&ix!=npio2_hw[n-1]) {
- y[0] = r-w; /* quick check no cancellation */
- } else {
+ {
u_int32_t high;
j = ix>>20;
y[0] = r-w;
OpenPOWER on IntegriCloud