summaryrefslogtreecommitdiffstats
path: root/lib/msun
diff options
context:
space:
mode:
authordas <das@FreeBSD.org>2008-08-01 01:24:25 +0000
committerdas <das@FreeBSD.org>2008-08-01 01:24:25 +0000
commit2edbbc399729c04094d4ccbcfdc5bf8f55f8f0c4 (patch)
tree8bec6766303c8787f3622a66409d254e7f707b86 /lib/msun
parent0cc238e33974e4198f76ec49aab623bd9510bcd3 (diff)
downloadFreeBSD-src-2edbbc399729c04094d4ccbcfdc5bf8f55f8f0c4.zip
FreeBSD-src-2edbbc399729c04094d4ccbcfdc5bf8f55f8f0c4.tar.gz
Fix some problems with asinf(), acosf(), atanf(), and atan2f():
- Adjust several constants for float precision. Some thresholds that were appropriate for double precision were never changed when these routines were converted to float precision. This has an impact on performance but not accuracy. (Submitted by bde.) - Reduce the degrees of the polynomials used. A smaller degree suffices for float precision. - In asinf(), use double arithmetic in part of the calculation to avoid a corner case and some complicated arithmetic involving a division and some buggy constants. This improves performance and accuracy. Max error (ulps): asinf acosf atanf before 0.925 0.782 0.852 after 0.743 0.804 0.852 As bde points out, it's cheaper for asin*() and acos*() to use polynomials instead of rational functions, but that's a task for another day.
Diffstat (limited to 'lib/msun')
-rw-r--r--lib/msun/src/e_acosf.c28
-rw-r--r--lib/msun/src/e_asinf.c53
-rw-r--r--lib/msun/src/e_atan2f.c4
-rw-r--r--lib/msun/src/s_atanf.c28
4 files changed, 42 insertions, 71 deletions
diff --git a/lib/msun/src/e_acosf.c b/lib/msun/src/e_acosf.c
index 7fbf3b2..ac0d8dd 100644
--- a/lib/msun/src/e_acosf.c
+++ b/lib/msun/src/e_acosf.c
@@ -26,16 +26,10 @@ pio2_hi = 1.5707962513e+00; /* 0x3fc90fda */
static volatile float
pio2_lo = 7.5497894159e-08; /* 0x33a22168 */
static const float
-pS0 = 1.6666667163e-01, /* 0x3e2aaaab */
-pS1 = -3.2556581497e-01, /* 0xbea6b090 */
-pS2 = 2.0121252537e-01, /* 0x3e4e0aa8 */
-pS3 = -4.0055535734e-02, /* 0xbd241146 */
-pS4 = 7.9153501429e-04, /* 0x3a4f7f04 */
-pS5 = 3.4793309169e-05, /* 0x3811ef08 */
-qS1 = -2.4033949375e+00, /* 0xc019d139 */
-qS2 = 2.0209457874e+00, /* 0x4001572d */
-qS3 = -6.8828397989e-01, /* 0xbf303361 */
-qS4 = 7.7038154006e-02; /* 0x3d9dc62e */
+pS0 = 1.6666586697e-01,
+pS1 = -4.2743422091e-02,
+pS2 = -8.6563630030e-03,
+qS1 = -7.0662963390e-01;
float
__ieee754_acosf(float x)
@@ -51,16 +45,16 @@ __ieee754_acosf(float x)
return (x-x)/(x-x); /* acos(|x|>1) is NaN */
}
if(ix<0x3f000000) { /* |x| < 0.5 */
- if(ix<=0x23000000) return pio2_hi+pio2_lo;/*if|x|<2**-57*/
+ if(ix<=0x32800000) return pio2_hi+pio2_lo;/*if|x|<2**-26*/
z = x*x;
- p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5)))));
- q = one+z*(qS1+z*(qS2+z*(qS3+z*qS4)));
+ p = z*(pS0+z*(pS1+z*pS2));
+ q = one+z*qS1;
r = p/q;
return pio2_hi - (x - (pio2_lo-x*r));
} else if (hx<0) { /* x < -0.5 */
z = (one+x)*(float)0.5;
- p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5)))));
- q = one+z*(qS1+z*(qS2+z*(qS3+z*qS4)));
+ p = z*(pS0+z*(pS1+z*pS2));
+ q = one+z*qS1;
s = __ieee754_sqrtf(z);
r = p/q;
w = r*s-pio2_lo;
@@ -73,8 +67,8 @@ __ieee754_acosf(float x)
GET_FLOAT_WORD(idf,df);
SET_FLOAT_WORD(df,idf&0xfffff000);
c = (z-df*df)/(s+df);
- p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5)))));
- q = one+z*(qS1+z*(qS2+z*(qS3+z*qS4)));
+ p = z*(pS0+z*(pS1+z*pS2));
+ q = one+z*qS1;
r = p/q;
w = r*s+c;
return (float)2.0*(df+w);
diff --git a/lib/msun/src/e_asinf.c b/lib/msun/src/e_asinf.c
index f830878..a4c3f00 100644
--- a/lib/msun/src/e_asinf.c
+++ b/lib/msun/src/e_asinf.c
@@ -22,62 +22,45 @@ __FBSDID("$FreeBSD$");
static const float
one = 1.0000000000e+00, /* 0x3F800000 */
huge = 1.000e+30,
-pio2_hi = 1.5707962513e+00, /* 0x3fc90fda */
-pio2_lo = 7.5497894159e-08, /* 0x33a22168 */
-pio4_hi = 7.8539812565e-01, /* 0x3f490fda */
/* coefficient for R(x^2) */
-pS0 = 1.6666667163e-01, /* 0x3e2aaaab */
-pS1 = -3.2556581497e-01, /* 0xbea6b090 */
-pS2 = 2.0121252537e-01, /* 0x3e4e0aa8 */
-pS3 = -4.0055535734e-02, /* 0xbd241146 */
-pS4 = 7.9153501429e-04, /* 0x3a4f7f04 */
-pS5 = 3.4793309169e-05, /* 0x3811ef08 */
-qS1 = -2.4033949375e+00, /* 0xc019d139 */
-qS2 = 2.0209457874e+00, /* 0x4001572d */
-qS3 = -6.8828397989e-01, /* 0xbf303361 */
-qS4 = 7.7038154006e-02; /* 0x3d9dc62e */
+pS0 = 1.6666586697e-01,
+pS1 = -4.2743422091e-02,
+pS2 = -8.6563630030e-03,
+qS1 = -7.0662963390e-01;
+
+static const double
+pio2 = 1.570796326794896558e+00;
float
__ieee754_asinf(float x)
{
- float t=0.0,w,p,q,c,r,s;
+ double s;
+ float t=0.0,w,p,q,c,r;
int32_t hx,ix;
GET_FLOAT_WORD(hx,x);
ix = hx&0x7fffffff;
if(ix==0x3f800000) {
/* asin(1)=+-pi/2 with inexact */
- return x*pio2_hi+x*pio2_lo;
+ return x*pio2;
} else if(ix> 0x3f800000) { /* |x|>= 1 */
return (x-x)/(x-x); /* asin(|x|>1) is NaN */
} else if (ix<0x3f000000) { /* |x|<0.5 */
- if(ix<0x32000000) { /* if |x| < 2**-27 */
+ if(ix<0x39800000) { /* if |x| < 2**-12 */
if(huge+x>one) return x;/* return x with inexact if x!=0*/
} else
t = x*x;
- p = t*(pS0+t*(pS1+t*(pS2+t*(pS3+t*(pS4+t*pS5)))));
- q = one+t*(qS1+t*(qS2+t*(qS3+t*qS4)));
+ p = t*(pS0+t*(pS1+t*pS2));
+ q = one+t*qS1;
w = p/q;
return x+x*w;
}
/* 1> |x|>= 0.5 */
w = one-fabsf(x);
t = w*(float)0.5;
- p = t*(pS0+t*(pS1+t*(pS2+t*(pS3+t*(pS4+t*pS5)))));
- q = one+t*(qS1+t*(qS2+t*(qS3+t*qS4)));
- s = __ieee754_sqrtf(t);
- if(ix>=0x3F79999A) { /* if |x| > 0.975 */
- w = p/q;
- t = pio2_hi-((float)2.0*(s+s*w)-pio2_lo);
- } else {
- int32_t iw;
- w = s;
- GET_FLOAT_WORD(iw,w);
- SET_FLOAT_WORD(w,iw&0xfffff000);
- c = (t-w*w)/(s+w);
- r = p/q;
- p = (float)2.0*s*r-(pio2_lo-(float)2.0*c);
- q = pio4_hi-(float)2.0*w;
- t = pio4_hi-(p-q);
- }
+ p = t*(pS0+t*(pS1+t*pS2));
+ q = one+t*qS1;
+ s = __ieee754_sqrt(t);
+ w = p/q;
+ t = pio2-2.0*(s+s*w);
if(hx>0) return t; else return -t;
}
diff --git a/lib/msun/src/e_atan2f.c b/lib/msun/src/e_atan2f.c
index 55435b4..a48ada1 100644
--- a/lib/msun/src/e_atan2f.c
+++ b/lib/msun/src/e_atan2f.c
@@ -80,8 +80,8 @@ __ieee754_atan2f(float y, float x)
/* compute y/x */
k = (iy-ix)>>23;
- if(k > 60) z=pi_o_2+(float)0.5*pi_lo; /* |y/x| > 2**60 */
- else if(hx<0&&k<-60) z=0.0; /* |y|/x < -2**60 */
+ if(k > 26) z=pi_o_2+(float)0.5*pi_lo; /* |y/x| > 2**26 */
+ else if(hx<0&&k<-26) z=0.0; /* |y|/x < -2**26 */
else z=atanf(fabsf(y/x)); /* safe to do y/x */
switch (m) {
case 0: return z ; /* atan(+,+) */
diff --git a/lib/msun/src/s_atanf.c b/lib/msun/src/s_atanf.c
index 8221b42..b3a371f 100644
--- a/lib/msun/src/s_atanf.c
+++ b/lib/msun/src/s_atanf.c
@@ -34,20 +34,14 @@ static const float atanlo[] = {
};
static const float aT[] = {
- 3.3333334327e-01, /* 0x3eaaaaaa */
- -2.0000000298e-01, /* 0xbe4ccccd */
- 1.4285714924e-01, /* 0x3e124925 */
- -1.1111110449e-01, /* 0xbde38e38 */
- 9.0908870101e-02, /* 0x3dba2e6e */
- -7.6918758452e-02, /* 0xbd9d8795 */
- 6.6610731184e-02, /* 0x3d886b35 */
- -5.8335702866e-02, /* 0xbd6ef16b */
- 4.9768779427e-02, /* 0x3d4bda59 */
- -3.6531571299e-02, /* 0xbd15a221 */
- 1.6285819933e-02, /* 0x3c8569d7 */
+ 3.3333328366e-01,
+ -1.9999158382e-01,
+ 1.4253635705e-01,
+ -1.0648017377e-01,
+ 6.1687607318e-02,
};
- static const float
+static const float
one = 1.0,
huge = 1.0e30;
@@ -59,13 +53,13 @@ atanf(float x)
GET_FLOAT_WORD(hx,x);
ix = hx&0x7fffffff;
- if(ix>=0x50800000) { /* if |x| >= 2^34 */
+ if(ix>=0x4c800000) { /* if |x| >= 2**26 */
if(ix>0x7f800000)
return x+x; /* NaN */
if(hx>0) return atanhi[3]+*(volatile float *)&atanlo[3];
else return -atanhi[3]-*(volatile float *)&atanlo[3];
} if (ix < 0x3ee00000) { /* |x| < 0.4375 */
- if (ix < 0x31000000) { /* |x| < 2^-29 */
+ if (ix < 0x39800000) { /* |x| < 2**-12 */
if(huge+x>one) return x; /* raise inexact */
}
id = -1;
@@ -80,7 +74,7 @@ atanf(float x)
} else {
if (ix < 0x401c0000) { /* |x| < 2.4375 */
id = 2; x = (x-(float)1.5)/(one+(float)1.5*x);
- } else { /* 2.4375 <= |x| < 2^66 */
+ } else { /* 2.4375 <= |x| < 2**26 */
id = 3; x = -(float)1.0/x;
}
}}
@@ -88,8 +82,8 @@ atanf(float x)
z = x*x;
w = z*z;
/* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */
- s1 = z*(aT[0]+w*(aT[2]+w*(aT[4]+w*(aT[6]+w*(aT[8]+w*aT[10])))));
- s2 = w*(aT[1]+w*(aT[3]+w*(aT[5]+w*(aT[7]+w*aT[9]))));
+ s1 = z*(aT[0]+w*(aT[2]+w*aT[4]));
+ s2 = w*(aT[1]+w*aT[3]);
if (id<0) return x - x*(s1+s2);
else {
z = atanhi[id] - ((x*(s1+s2) - atanlo[id]) - x);
OpenPOWER on IntegriCloud