diff options
author | das <das@FreeBSD.org> | 2008-08-01 01:24:25 +0000 |
---|---|---|
committer | das <das@FreeBSD.org> | 2008-08-01 01:24:25 +0000 |
commit | 2edbbc399729c04094d4ccbcfdc5bf8f55f8f0c4 (patch) | |
tree | 8bec6766303c8787f3622a66409d254e7f707b86 /lib/msun | |
parent | 0cc238e33974e4198f76ec49aab623bd9510bcd3 (diff) | |
download | FreeBSD-src-2edbbc399729c04094d4ccbcfdc5bf8f55f8f0c4.zip FreeBSD-src-2edbbc399729c04094d4ccbcfdc5bf8f55f8f0c4.tar.gz |
Fix some problems with asinf(), acosf(), atanf(), and atan2f():
- Adjust several constants for float precision. Some thresholds
that were appropriate for double precision were never changed
when these routines were converted to float precision. This
has an impact on performance but not accuracy. (Submitted by bde.)
- Reduce the degrees of the polynomials used. A smaller degree
suffices for float precision.
- In asinf(), use double arithmetic in part of the calculation to
avoid a corner case and some complicated arithmetic involving a
division and some buggy constants. This improves performance and
accuracy.
Max error (ulps):
asinf acosf atanf
before 0.925 0.782 0.852
after 0.743 0.804 0.852
As bde points out, it's cheaper for asin*() and acos*() to use
polynomials instead of rational functions, but that's a task for
another day.
Diffstat (limited to 'lib/msun')
-rw-r--r-- | lib/msun/src/e_acosf.c | 28 | ||||
-rw-r--r-- | lib/msun/src/e_asinf.c | 53 | ||||
-rw-r--r-- | lib/msun/src/e_atan2f.c | 4 | ||||
-rw-r--r-- | lib/msun/src/s_atanf.c | 28 |
4 files changed, 42 insertions, 71 deletions
diff --git a/lib/msun/src/e_acosf.c b/lib/msun/src/e_acosf.c index 7fbf3b2..ac0d8dd 100644 --- a/lib/msun/src/e_acosf.c +++ b/lib/msun/src/e_acosf.c @@ -26,16 +26,10 @@ pio2_hi = 1.5707962513e+00; /* 0x3fc90fda */ static volatile float pio2_lo = 7.5497894159e-08; /* 0x33a22168 */ static const float -pS0 = 1.6666667163e-01, /* 0x3e2aaaab */ -pS1 = -3.2556581497e-01, /* 0xbea6b090 */ -pS2 = 2.0121252537e-01, /* 0x3e4e0aa8 */ -pS3 = -4.0055535734e-02, /* 0xbd241146 */ -pS4 = 7.9153501429e-04, /* 0x3a4f7f04 */ -pS5 = 3.4793309169e-05, /* 0x3811ef08 */ -qS1 = -2.4033949375e+00, /* 0xc019d139 */ -qS2 = 2.0209457874e+00, /* 0x4001572d */ -qS3 = -6.8828397989e-01, /* 0xbf303361 */ -qS4 = 7.7038154006e-02; /* 0x3d9dc62e */ +pS0 = 1.6666586697e-01, +pS1 = -4.2743422091e-02, +pS2 = -8.6563630030e-03, +qS1 = -7.0662963390e-01; float __ieee754_acosf(float x) @@ -51,16 +45,16 @@ __ieee754_acosf(float x) return (x-x)/(x-x); /* acos(|x|>1) is NaN */ } if(ix<0x3f000000) { /* |x| < 0.5 */ - if(ix<=0x23000000) return pio2_hi+pio2_lo;/*if|x|<2**-57*/ + if(ix<=0x32800000) return pio2_hi+pio2_lo;/*if|x|<2**-26*/ z = x*x; - p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5))))); - q = one+z*(qS1+z*(qS2+z*(qS3+z*qS4))); + p = z*(pS0+z*(pS1+z*pS2)); + q = one+z*qS1; r = p/q; return pio2_hi - (x - (pio2_lo-x*r)); } else if (hx<0) { /* x < -0.5 */ z = (one+x)*(float)0.5; - p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5))))); - q = one+z*(qS1+z*(qS2+z*(qS3+z*qS4))); + p = z*(pS0+z*(pS1+z*pS2)); + q = one+z*qS1; s = __ieee754_sqrtf(z); r = p/q; w = r*s-pio2_lo; @@ -73,8 +67,8 @@ __ieee754_acosf(float x) GET_FLOAT_WORD(idf,df); SET_FLOAT_WORD(df,idf&0xfffff000); c = (z-df*df)/(s+df); - p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5))))); - q = one+z*(qS1+z*(qS2+z*(qS3+z*qS4))); + p = z*(pS0+z*(pS1+z*pS2)); + q = one+z*qS1; r = p/q; w = r*s+c; return (float)2.0*(df+w); diff --git a/lib/msun/src/e_asinf.c b/lib/msun/src/e_asinf.c index f830878..a4c3f00 100644 --- a/lib/msun/src/e_asinf.c +++ b/lib/msun/src/e_asinf.c @@ -22,62 +22,45 @@ __FBSDID("$FreeBSD$"); static const float one = 1.0000000000e+00, /* 0x3F800000 */ huge = 1.000e+30, -pio2_hi = 1.5707962513e+00, /* 0x3fc90fda */ -pio2_lo = 7.5497894159e-08, /* 0x33a22168 */ -pio4_hi = 7.8539812565e-01, /* 0x3f490fda */ /* coefficient for R(x^2) */ -pS0 = 1.6666667163e-01, /* 0x3e2aaaab */ -pS1 = -3.2556581497e-01, /* 0xbea6b090 */ -pS2 = 2.0121252537e-01, /* 0x3e4e0aa8 */ -pS3 = -4.0055535734e-02, /* 0xbd241146 */ -pS4 = 7.9153501429e-04, /* 0x3a4f7f04 */ -pS5 = 3.4793309169e-05, /* 0x3811ef08 */ -qS1 = -2.4033949375e+00, /* 0xc019d139 */ -qS2 = 2.0209457874e+00, /* 0x4001572d */ -qS3 = -6.8828397989e-01, /* 0xbf303361 */ -qS4 = 7.7038154006e-02; /* 0x3d9dc62e */ +pS0 = 1.6666586697e-01, +pS1 = -4.2743422091e-02, +pS2 = -8.6563630030e-03, +qS1 = -7.0662963390e-01; + +static const double +pio2 = 1.570796326794896558e+00; float __ieee754_asinf(float x) { - float t=0.0,w,p,q,c,r,s; + double s; + float t=0.0,w,p,q,c,r; int32_t hx,ix; GET_FLOAT_WORD(hx,x); ix = hx&0x7fffffff; if(ix==0x3f800000) { /* asin(1)=+-pi/2 with inexact */ - return x*pio2_hi+x*pio2_lo; + return x*pio2; } else if(ix> 0x3f800000) { /* |x|>= 1 */ return (x-x)/(x-x); /* asin(|x|>1) is NaN */ } else if (ix<0x3f000000) { /* |x|<0.5 */ - if(ix<0x32000000) { /* if |x| < 2**-27 */ + if(ix<0x39800000) { /* if |x| < 2**-12 */ if(huge+x>one) return x;/* return x with inexact if x!=0*/ } else t = x*x; - p = t*(pS0+t*(pS1+t*(pS2+t*(pS3+t*(pS4+t*pS5))))); - q = one+t*(qS1+t*(qS2+t*(qS3+t*qS4))); + p = t*(pS0+t*(pS1+t*pS2)); + q = one+t*qS1; w = p/q; return x+x*w; } /* 1> |x|>= 0.5 */ w = one-fabsf(x); t = w*(float)0.5; - p = t*(pS0+t*(pS1+t*(pS2+t*(pS3+t*(pS4+t*pS5))))); - q = one+t*(qS1+t*(qS2+t*(qS3+t*qS4))); - s = __ieee754_sqrtf(t); - if(ix>=0x3F79999A) { /* if |x| > 0.975 */ - w = p/q; - t = pio2_hi-((float)2.0*(s+s*w)-pio2_lo); - } else { - int32_t iw; - w = s; - GET_FLOAT_WORD(iw,w); - SET_FLOAT_WORD(w,iw&0xfffff000); - c = (t-w*w)/(s+w); - r = p/q; - p = (float)2.0*s*r-(pio2_lo-(float)2.0*c); - q = pio4_hi-(float)2.0*w; - t = pio4_hi-(p-q); - } + p = t*(pS0+t*(pS1+t*pS2)); + q = one+t*qS1; + s = __ieee754_sqrt(t); + w = p/q; + t = pio2-2.0*(s+s*w); if(hx>0) return t; else return -t; } diff --git a/lib/msun/src/e_atan2f.c b/lib/msun/src/e_atan2f.c index 55435b4..a48ada1 100644 --- a/lib/msun/src/e_atan2f.c +++ b/lib/msun/src/e_atan2f.c @@ -80,8 +80,8 @@ __ieee754_atan2f(float y, float x) /* compute y/x */ k = (iy-ix)>>23; - if(k > 60) z=pi_o_2+(float)0.5*pi_lo; /* |y/x| > 2**60 */ - else if(hx<0&&k<-60) z=0.0; /* |y|/x < -2**60 */ + if(k > 26) z=pi_o_2+(float)0.5*pi_lo; /* |y/x| > 2**26 */ + else if(hx<0&&k<-26) z=0.0; /* |y|/x < -2**26 */ else z=atanf(fabsf(y/x)); /* safe to do y/x */ switch (m) { case 0: return z ; /* atan(+,+) */ diff --git a/lib/msun/src/s_atanf.c b/lib/msun/src/s_atanf.c index 8221b42..b3a371f 100644 --- a/lib/msun/src/s_atanf.c +++ b/lib/msun/src/s_atanf.c @@ -34,20 +34,14 @@ static const float atanlo[] = { }; static const float aT[] = { - 3.3333334327e-01, /* 0x3eaaaaaa */ - -2.0000000298e-01, /* 0xbe4ccccd */ - 1.4285714924e-01, /* 0x3e124925 */ - -1.1111110449e-01, /* 0xbde38e38 */ - 9.0908870101e-02, /* 0x3dba2e6e */ - -7.6918758452e-02, /* 0xbd9d8795 */ - 6.6610731184e-02, /* 0x3d886b35 */ - -5.8335702866e-02, /* 0xbd6ef16b */ - 4.9768779427e-02, /* 0x3d4bda59 */ - -3.6531571299e-02, /* 0xbd15a221 */ - 1.6285819933e-02, /* 0x3c8569d7 */ + 3.3333328366e-01, + -1.9999158382e-01, + 1.4253635705e-01, + -1.0648017377e-01, + 6.1687607318e-02, }; - static const float +static const float one = 1.0, huge = 1.0e30; @@ -59,13 +53,13 @@ atanf(float x) GET_FLOAT_WORD(hx,x); ix = hx&0x7fffffff; - if(ix>=0x50800000) { /* if |x| >= 2^34 */ + if(ix>=0x4c800000) { /* if |x| >= 2**26 */ if(ix>0x7f800000) return x+x; /* NaN */ if(hx>0) return atanhi[3]+*(volatile float *)&atanlo[3]; else return -atanhi[3]-*(volatile float *)&atanlo[3]; } if (ix < 0x3ee00000) { /* |x| < 0.4375 */ - if (ix < 0x31000000) { /* |x| < 2^-29 */ + if (ix < 0x39800000) { /* |x| < 2**-12 */ if(huge+x>one) return x; /* raise inexact */ } id = -1; @@ -80,7 +74,7 @@ atanf(float x) } else { if (ix < 0x401c0000) { /* |x| < 2.4375 */ id = 2; x = (x-(float)1.5)/(one+(float)1.5*x); - } else { /* 2.4375 <= |x| < 2^66 */ + } else { /* 2.4375 <= |x| < 2**26 */ id = 3; x = -(float)1.0/x; } }} @@ -88,8 +82,8 @@ atanf(float x) z = x*x; w = z*z; /* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */ - s1 = z*(aT[0]+w*(aT[2]+w*(aT[4]+w*(aT[6]+w*(aT[8]+w*aT[10]))))); - s2 = w*(aT[1]+w*(aT[3]+w*(aT[5]+w*(aT[7]+w*aT[9])))); + s1 = z*(aT[0]+w*(aT[2]+w*aT[4])); + s2 = w*(aT[1]+w*aT[3]); if (id<0) return x - x*(s1+s2); else { z = atanhi[id] - ((x*(s1+s2) - atanlo[id]) - x); |