From 8a978711f200d55ba1fe58b520d91bea1a98e4c3 Mon Sep 17 00:00:00 2001 From: kargl Date: Thu, 9 Oct 2014 22:39:52 +0000 Subject: The value small=2**-(p+3), where p is the precision, can be determine from lgamma(x) = -log(x) - log(1+x) + x*(1-g) + x**2*P(x) with g = 0.57... being the Euler constant and P(x) a polynomial. Substitution of small into the RHS shows that the last 3 terms are negligible in comparison to the leading term. The choice of 3 may be conservative. The value large=2**(p+3) is detemined from Stirling's approximation lgamma(x) = x*(log(x)-1) - log(x)/2 + log(2*pi)/2 + P(1/x)/x Again, substitution of large into the RHS reveals the last 3 terms are negligible in comparison to the leading term. Move the x=+-0 special case into the |x|=0x7ff00000) return x*x; - if((ix|lx)==0) { - if(hx<0) - *signgamp = -1; - return one/vzero; - } - if(ix<0x3b900000) { /* |x|<2**-70, return -log(|x|) */ - if(hx<0) { - *signgamp = -1; - return -__ieee754_log(-x); - } else return -__ieee754_log(x); + + /* purge +-0 and tiny arguments */ + *signgamp = 1-2*((uint32_t)hx>>31); + if(ix<0x3c700000) { /* |x|<2**-56, return -log(|x|) */ + if((ix|lx)==0) + return one/vzero; + return -__ieee754_log(fabs(x)); } + + /* purge negative integers and start evaluation for other x < 0 */ if(hx<0) { + *signgamp = 1; if(ix>=0x43300000) /* |x|>=2**52, must be -integer */ return one/vzero; t = sin_pi(x); @@ -232,7 +232,7 @@ __ieee754_lgamma_r(double x, int *signgamp) x = -x; } - /* purge off 1 and 2 */ + /* purge 1 and 2 */ if((((ix-0x3ff00000)|lx)==0)||(((ix-0x40000000)|lx)==0)) r = 0; /* for x < 2.0 */ else if(ix<0x40000000) { @@ -253,7 +253,7 @@ __ieee754_lgamma_r(double x, int *signgamp) p1 = a0+z*(a2+z*(a4+z*(a6+z*(a8+z*a10)))); p2 = z*(a1+z*(a3+z*(a5+z*(a7+z*(a9+z*a11))))); p = y*p1+p2; - r += (p-y/2); break; + r += p-y/2; break; case 1: z = y*y; w = z*y; @@ -261,19 +261,20 @@ __ieee754_lgamma_r(double x, int *signgamp) p2 = t1+w*(t4+w*(t7+w*(t10+w*t13))); p3 = t2+w*(t5+w*(t8+w*(t11+w*t14))); p = z*p1-(tt-w*(p2+y*p3)); - r += (tf + p); break; + r += tf + p; break; case 2: p1 = y*(u0+y*(u1+y*(u2+y*(u3+y*(u4+y*u5))))); p2 = one+y*(v1+y*(v2+y*(v3+y*(v4+y*v5)))); - r += (-0.5*y + p1/p2); + r += p1/p2-y/2; } } - else if(ix<0x40200000) { /* x < 8.0 */ - i = (int)x; - y = x-(double)i; + /* x < 8.0 */ + else if(ix<0x40200000) { + i = x; + y = x-i; p = y*(s0+y*(s1+y*(s2+y*(s3+y*(s4+y*(s5+y*s6)))))); q = one+y*(r1+y*(r2+y*(r3+y*(r4+y*(r5+y*r6))))); - r = half*y+p/q; + r = y/2+p/q; z = one; /* lgamma(1+s) = log(s) + lgamma(s) */ switch(i) { case 7: z *= (y+6); /* FALLTHRU */ @@ -283,15 +284,15 @@ __ieee754_lgamma_r(double x, int *signgamp) case 3: z *= (y+2); /* FALLTHRU */ r += __ieee754_log(z); break; } - /* 8.0 <= x < 2**58 */ - } else if (ix < 0x43900000) { + /* 8.0 <= x < 2**56 */ + } else if (ix < 0x43700000) { t = __ieee754_log(x); z = one/x; y = z*z; w = w0+z*(w1+y*(w2+y*(w3+y*(w4+y*(w5+y*w6))))); r = (x-half)*(t-one)+w; } else - /* 2**58 <= x <= inf */ + /* 2**56 <= x <= inf */ r = x*(__ieee754_log(x)-one); if(hx<0) r = nadj - r; return r; diff --git a/lib/msun/src/e_lgammaf_r.c b/lib/msun/src/e_lgammaf_r.c index 9d23053..9084e18 100644 --- a/lib/msun/src/e_lgammaf_r.c +++ b/lib/msun/src/e_lgammaf_r.c @@ -122,29 +122,29 @@ sin_pif(float x) float __ieee754_lgammaf_r(float x, int *signgamp) { - float t,y,z,nadj,p,p1,p2,p3,q,r,w; + float nadj,p,p1,p2,p3,q,r,t,w,y,z; int32_t hx; int i,ix; GET_FLOAT_WORD(hx,x); - /* purge off +-inf, NaN, +-0, tiny and negative arguments */ + /* purge +-Inf and NaNs */ *signgamp = 1; ix = hx&0x7fffffff; if(ix>=0x7f800000) return x*x; - if(ix==0) { - if(hx<0) - *signgamp = -1; - return one/vzero; - } - if(ix<0x35000000) { /* |x|<2**-21, return -log(|x|) */ - if(hx<0) { - *signgamp = -1; - return -__ieee754_logf(-x); - } else return -__ieee754_logf(x); + + /* purge +-0 and tiny arguments */ + *signgamp = 1-2*((uint32_t)hx>>31); + if(ix<0x32000000) { /* |x|<2**-27, return -log(|x|) */ + if(ix==0) + return one/vzero; + return -__ieee754_logf(fabsf(x)); } + + /* purge negative integers and start evaluation for other x < 0 */ if(hx<0) { - if(ix>=0x4b000000) /* |x|>=2**23, must be -integer */ + *signgamp = 1; + if(ix>=0x4b000000) /* |x|>=2**23, must be -integer */ return one/vzero; t = sin_pif(x); if(t==zero) return one/vzero; /* -integer */ @@ -153,7 +153,7 @@ __ieee754_lgammaf_r(float x, int *signgamp) x = -x; } - /* purge off 1 and 2 */ + /* purge 1 and 2 */ if (ix==0x3f800000||ix==0x40000000) r = 0; /* for x < 2.0 */ else if(ix<0x40000000) { @@ -174,17 +174,18 @@ __ieee754_lgammaf_r(float x, int *signgamp) p1 = a0+z*(a2+z*a4); p2 = z*(a1+z*(a3+z*a5)); p = y*p1+p2; - r += (p-y/2); break; + r += p-y/2; break; case 1: p = t0+y*t1+y*y*(t2+y*(t3+y*(t4+y*(t5+y*(t6+y*t7))))); - r += (tf + p); break; + r += tf + p; break; case 2: p1 = y*(u0+y*(u1+y*u2)); p2 = one+y*(v1+y*(v2+y*v3)); - r += (p1/p2-y/2); + r += p1/p2-y/2; } } - else if(ix<0x41000000) { /* x < 8.0 */ + /* x < 8.0 */ + else if(ix<0x41000000) { i = x; y = x-i; p = y*(s0+y*(s1+y*(s2+y*s3))); @@ -199,15 +200,15 @@ __ieee754_lgammaf_r(float x, int *signgamp) case 3: z *= (y+2); /* FALLTHRU */ r += __ieee754_logf(z); break; } - /* 8.0 <= x < 2**24 */ - } else if (ix < 0x4b800000) { + /* 8.0 <= x < 2**27 */ + } else if (ix < 0x4d000000) { t = __ieee754_logf(x); z = one/x; y = z*z; w = w0+z*(w1+y*w2); r = (x-half)*(t-one)+w; } else - /* 2**24 <= x <= inf */ + /* 2**27 <= x <= inf */ r = x*(__ieee754_logf(x)-one); if(hx<0) r = nadj - r; return r; -- cgit v1.1