summaryrefslogtreecommitdiffstats
path: root/lib/msun/src
diff options
context:
space:
mode:
authorbde <bde@FreeBSD.org>2005-11-30 06:47:18 +0000
committerbde <bde@FreeBSD.org>2005-11-30 06:47:18 +0000
commit6142ede46fe62dfc695679f3ca3adb48bfc415b2 (patch)
tree5f7f59aeb72c0c1eeeea034021a19022694e4795 /lib/msun/src
parent5d50adf57dd3eb3f8996480fa4dd26e4756e268e (diff)
downloadFreeBSD-src-6142ede46fe62dfc695679f3ca3adb48bfc415b2.zip
FreeBSD-src-6142ede46fe62dfc695679f3ca3adb48bfc415b2.tar.gz
Fixed cosf(x) when x is a "negative" NaNs. I broke this in rev.1.10.
cosf(x) is supposed to return something like x when x is a NaN, and we actually fairly consistently return x-x which is normally very like x (on i386 and and it is x if x is a quiet NaN and x with the quiet bit set if x is a signaling NaN. Rev.1.10 broke this by normalising x to fabsf(x). It's not clear if fabsf(x) is should preserve x if x is a NaN, but it actually clears the sign bit, and other parts of the code depended on this. The bugs can be fixed by saving x before normalizing it, and using the saved x only for NaNs, and using uint32_t instead of int32_t for ix so that negative NaNs are not misclassified even if fabsf() doesn't clear their sign bit, but gcc pessimizes the saving very well, especially on Athlon XPs (it generates extra loads and stores, and mixes use of the SSE and i387, and this somehow messes up pipelines). Normalizing x is not a very good optimization anyway, so stop doing it. (It adds latency to the FPU pipelines, but in previous versions it helped except for |x| <= 3pi/4 by simplifying the integer pipelines.) Use the same organization as in s_sinf.c and s_tanf.c with some branches reordered. These changes combined recover most of the performance of the unfixed version on A64 but still lose 10% on AXP with gcc-3.4 -O1 but not with gcc-3.3 -O1.
Diffstat (limited to 'lib/msun/src')
-rw-r--r--lib/msun/src/s_cosf.c30
1 files changed, 19 insertions, 11 deletions
diff --git a/lib/msun/src/s_cosf.c b/lib/msun/src/s_cosf.c
index 84fbc6d..b800a8e 100644
--- a/lib/msun/src/s_cosf.c
+++ b/lib/msun/src/s_cosf.c
@@ -36,10 +36,10 @@ float
cosf(float x)
{
float y[2];
- int32_t n,ix;
+ int32_t n, hx, ix;
- x = fabsf(x);
- GET_FLOAT_WORD(ix,x);
+ GET_FLOAT_WORD(hx,x);
+ ix = hx & 0x7fffffff;
if(ix <= 0x3f490fda) { /* |x| ~<= pi/4 */
if(ix<0x39800000) /* |x| < 2**-12 */
@@ -47,16 +47,24 @@ cosf(float x)
return __kernel_cosdf(x);
}
if(ix<=0x407b53d1) { /* |x| ~<= 5*pi/4 */
- if(ix<=0x4016cbe3) /* |x| ~<= 3pi/4 */
- return __kernel_sindf(c1pio2 - x);
- else
- return -__kernel_cosdf(x - c2pio2);
+ if(ix>0x4016cbe3) /* |x| ~> 3*pi/4 */
+ return -__kernel_cosdf(x + (hx > 0 ? -c2pio2 : c2pio2));
+ else {
+ if(hx>0)
+ return __kernel_sindf(c1pio2 - x);
+ else
+ return __kernel_sindf(x + c1pio2);
+ }
}
if(ix<=0x40e231d5) { /* |x| ~<= 9*pi/4 */
- if(ix<=0x40afeddf) /* |x| ~<= 7*pi/4 */
- return __kernel_sindf(x - c3pio2);
- else
- return __kernel_cosdf(x - c4pio2);
+ if(ix>0x40afeddf) /* |x| ~> 7*pi/4 */
+ return __kernel_cosdf(x + (hx > 0 ? -c4pio2 : c4pio2));
+ else {
+ if(hx>0)
+ return __kernel_sindf(x - c3pio2);
+ else
+ return __kernel_sindf(-c3pio2 - x);
+ }
}
/* cos(Inf or NaN) is NaN */
OpenPOWER on IntegriCloud