diff options
author | bde <bde@FreeBSD.org> | 2005-12-20 01:21:30 +0000 |
---|---|---|
committer | bde <bde@FreeBSD.org> | 2005-12-20 01:21:30 +0000 |
commit | 761a5296f91cc098c572f1be56dd44b000486be9 (patch) | |
tree | 74a89a98d8bfc821566707b93d967ed9a173dcf9 /lib/msun/src | |
parent | b98bf8f1d52a50da586ac0e1425756a9a9629158 (diff) | |
download | FreeBSD-src-761a5296f91cc098c572f1be56dd44b000486be9.zip FreeBSD-src-761a5296f91cc098c572f1be56dd44b000486be9.tar.gz |
Extract the high and low words together. With gcc-3.4 on uniformly
distributed non-large args, this saves about 14 of 134 cycles for
Athlon64s and about 5 of 199 cycles for AthlonXPs.
Moved the check for x == 0 inside the check for subnormals. With
gcc-3.4 on uniformly distributed non-large args, this saves another
5 cycles on Athlon64s and loses 1 cycle on AthlonXPs.
Use INSERT_WORDS() and not SET_HIGH_WORD() when converting the first
approximation from bits to double. With gcc-3.4 on uniformly distributed
non-large args, this saves another 4 cycles on both Athlon64s and and
AthlonXPs.
Accessing doubles as 2 words may be an optimization on old CPUs, but on
current CPUs it tends to cause extra operations and pipeline stalls,
especially for writes, even when only 1 of the words needs to be accessed.
Removed an unused variable.
Diffstat (limited to 'lib/msun/src')
-rw-r--r-- | lib/msun/src/s_cbrt.c | 14 |
1 files changed, 6 insertions, 8 deletions
diff --git a/lib/msun/src/s_cbrt.c b/lib/msun/src/s_cbrt.c index 29e53e2..545b0e7 100644 --- a/lib/msun/src/s_cbrt.c +++ b/lib/msun/src/s_cbrt.c @@ -43,17 +43,13 @@ cbrt(double x) uint64_t bits; } u; double r,s,t=0.0,w; - uint64_t bits; u_int32_t sign; u_int32_t high,low; - GET_HIGH_WORD(hx,x); + EXTRACT_WORDS(hx,low,x); sign=hx&0x80000000; /* sign= sign(x) */ hx ^=sign; if(hx>=0x7ff00000) return(x+x); /* cbrt(NaN,INF) is itself */ - GET_LOW_WORD(low,x); - if((hx|low)==0) - return(x); /* cbrt(0) is itself */ /* * Rough cbrt to 5 bits: @@ -70,13 +66,15 @@ cbrt(double x) * subtraction virtually to keep e >= 0 so that ordinary integer * division rounds towards minus infinity; this is also efficient. */ - if(hx<0x00100000) { /* subnormal number */ + if(hx<0x00100000) { /* zero or subnormal? */ + if((hx|low)==0) + return(x); /* cbrt(0) is itself */ SET_HIGH_WORD(t,0x43500000); /* set t= 2**54 */ t*=x; GET_HIGH_WORD(high,t); - SET_HIGH_WORD(t,sign|((high&0x7fffffff)/3+B2)); + INSERT_WORDS(t,sign|((high&0x7fffffff)/3+B2),0); } else - SET_HIGH_WORD(t,sign|(hx/3+B1)); + INSERT_WORDS(t,sign|(hx/3+B1),0); /* * New cbrt to 23 bits: |