summaryrefslogtreecommitdiffstats
path: root/lib/msun
diff options
context:
space:
mode:
authorbde <bde@FreeBSD.org>2005-12-20 01:21:30 +0000
committerbde <bde@FreeBSD.org>2005-12-20 01:21:30 +0000
commit761a5296f91cc098c572f1be56dd44b000486be9 (patch)
tree74a89a98d8bfc821566707b93d967ed9a173dcf9 /lib/msun
parentb98bf8f1d52a50da586ac0e1425756a9a9629158 (diff)
downloadFreeBSD-src-761a5296f91cc098c572f1be56dd44b000486be9.zip
FreeBSD-src-761a5296f91cc098c572f1be56dd44b000486be9.tar.gz
Extract the high and low words together. With gcc-3.4 on uniformly
distributed non-large args, this saves about 14 of 134 cycles for Athlon64s and about 5 of 199 cycles for AthlonXPs. Moved the check for x == 0 inside the check for subnormals. With gcc-3.4 on uniformly distributed non-large args, this saves another 5 cycles on Athlon64s and loses 1 cycle on AthlonXPs. Use INSERT_WORDS() and not SET_HIGH_WORD() when converting the first approximation from bits to double. With gcc-3.4 on uniformly distributed non-large args, this saves another 4 cycles on both Athlon64s and and AthlonXPs. Accessing doubles as 2 words may be an optimization on old CPUs, but on current CPUs it tends to cause extra operations and pipeline stalls, especially for writes, even when only 1 of the words needs to be accessed. Removed an unused variable.
Diffstat (limited to 'lib/msun')
-rw-r--r--lib/msun/src/s_cbrt.c14
1 files changed, 6 insertions, 8 deletions
diff --git a/lib/msun/src/s_cbrt.c b/lib/msun/src/s_cbrt.c
index 29e53e2..545b0e7 100644
--- a/lib/msun/src/s_cbrt.c
+++ b/lib/msun/src/s_cbrt.c
@@ -43,17 +43,13 @@ cbrt(double x)
uint64_t bits;
} u;
double r,s,t=0.0,w;
- uint64_t bits;
u_int32_t sign;
u_int32_t high,low;
- GET_HIGH_WORD(hx,x);
+ EXTRACT_WORDS(hx,low,x);
sign=hx&0x80000000; /* sign= sign(x) */
hx ^=sign;
if(hx>=0x7ff00000) return(x+x); /* cbrt(NaN,INF) is itself */
- GET_LOW_WORD(low,x);
- if((hx|low)==0)
- return(x); /* cbrt(0) is itself */
/*
* Rough cbrt to 5 bits:
@@ -70,13 +66,15 @@ cbrt(double x)
* subtraction virtually to keep e >= 0 so that ordinary integer
* division rounds towards minus infinity; this is also efficient.
*/
- if(hx<0x00100000) { /* subnormal number */
+ if(hx<0x00100000) { /* zero or subnormal? */
+ if((hx|low)==0)
+ return(x); /* cbrt(0) is itself */
SET_HIGH_WORD(t,0x43500000); /* set t= 2**54 */
t*=x;
GET_HIGH_WORD(high,t);
- SET_HIGH_WORD(t,sign|((high&0x7fffffff)/3+B2));
+ INSERT_WORDS(t,sign|((high&0x7fffffff)/3+B2),0);
} else
- SET_HIGH_WORD(t,sign|(hx/3+B1));
+ INSERT_WORDS(t,sign|(hx/3+B1),0);
/*
* New cbrt to 23 bits:
OpenPOWER on IntegriCloud