diff options
author | bde <bde@FreeBSD.org> | 2005-11-30 06:47:18 +0000 |
---|---|---|
committer | bde <bde@FreeBSD.org> | 2005-11-30 06:47:18 +0000 |
commit | 6142ede46fe62dfc695679f3ca3adb48bfc415b2 (patch) | |
tree | 5f7f59aeb72c0c1eeeea034021a19022694e4795 /lib/msun/src | |
parent | 5d50adf57dd3eb3f8996480fa4dd26e4756e268e (diff) | |
download | FreeBSD-src-6142ede46fe62dfc695679f3ca3adb48bfc415b2.zip FreeBSD-src-6142ede46fe62dfc695679f3ca3adb48bfc415b2.tar.gz |
Fixed cosf(x) when x is a "negative" NaNs. I broke this in rev.1.10.
cosf(x) is supposed to return something like x when x is a NaN, and
we actually fairly consistently return x-x which is normally very like
x (on i386 and and it is x if x is a quiet NaN and x with the quiet bit
set if x is a signaling NaN. Rev.1.10 broke this by normalising x to
fabsf(x). It's not clear if fabsf(x) is should preserve x if x is a NaN,
but it actually clears the sign bit, and other parts of the code depended
on this.
The bugs can be fixed by saving x before normalizing it, and using the
saved x only for NaNs, and using uint32_t instead of int32_t for ix
so that negative NaNs are not misclassified even if fabsf() doesn't
clear their sign bit, but gcc pessimizes the saving very well, especially
on Athlon XPs (it generates extra loads and stores, and mixes use of
the SSE and i387, and this somehow messes up pipelines). Normalizing
x is not a very good optimization anyway, so stop doing it. (It adds
latency to the FPU pipelines, but in previous versions it helped except
for |x| <= 3pi/4 by simplifying the integer pipelines.) Use the same
organization as in s_sinf.c and s_tanf.c with some branches reordered.
These changes combined recover most of the performance of the unfixed
version on A64 but still lose 10% on AXP with gcc-3.4 -O1 but not with
gcc-3.3 -O1.
Diffstat (limited to 'lib/msun/src')
-rw-r--r-- | lib/msun/src/s_cosf.c | 30 |
1 files changed, 19 insertions, 11 deletions
diff --git a/lib/msun/src/s_cosf.c b/lib/msun/src/s_cosf.c index 84fbc6d..b800a8e 100644 --- a/lib/msun/src/s_cosf.c +++ b/lib/msun/src/s_cosf.c @@ -36,10 +36,10 @@ float cosf(float x) { float y[2]; - int32_t n,ix; + int32_t n, hx, ix; - x = fabsf(x); - GET_FLOAT_WORD(ix,x); + GET_FLOAT_WORD(hx,x); + ix = hx & 0x7fffffff; if(ix <= 0x3f490fda) { /* |x| ~<= pi/4 */ if(ix<0x39800000) /* |x| < 2**-12 */ @@ -47,16 +47,24 @@ cosf(float x) return __kernel_cosdf(x); } if(ix<=0x407b53d1) { /* |x| ~<= 5*pi/4 */ - if(ix<=0x4016cbe3) /* |x| ~<= 3pi/4 */ - return __kernel_sindf(c1pio2 - x); - else - return -__kernel_cosdf(x - c2pio2); + if(ix>0x4016cbe3) /* |x| ~> 3*pi/4 */ + return -__kernel_cosdf(x + (hx > 0 ? -c2pio2 : c2pio2)); + else { + if(hx>0) + return __kernel_sindf(c1pio2 - x); + else + return __kernel_sindf(x + c1pio2); + } } if(ix<=0x40e231d5) { /* |x| ~<= 9*pi/4 */ - if(ix<=0x40afeddf) /* |x| ~<= 7*pi/4 */ - return __kernel_sindf(x - c3pio2); - else - return __kernel_cosdf(x - c4pio2); + if(ix>0x40afeddf) /* |x| ~> 7*pi/4 */ + return __kernel_cosdf(x + (hx > 0 ? -c4pio2 : c4pio2)); + else { + if(hx>0) + return __kernel_sindf(x - c3pio2); + else + return __kernel_sindf(-c3pio2 - x); + } } /* cos(Inf or NaN) is NaN */ |