From 8a978711f200d55ba1fe58b520d91bea1a98e4c3 Mon Sep 17 00:00:00 2001
From: kargl <kargl@FreeBSD.org>
Date: Thu, 9 Oct 2014 22:39:52 +0000
Subject: The value small=2**-(p+3), where p is the precision, can be determine
 from lgamma(x) = -log(x) - log(1+x) + x*(1-g) + x**2*P(x) with g = 0.57...
 being the Euler constant and P(x) a polynomial.  Substitution of small into
 the RHS shows that the last 3 terms are negligible in comparison to the
 leading term.  The choice of 3 may be conservative.

The value large=2**(p+3) is detemined from Stirling's approximation
lgamma(x) = x*(log(x)-1) - log(x)/2 + log(2*pi)/2 + P(1/x)/x
Again, substitution of large into the RHS reveals the last 3 terms
are negligible in comparison to the leading term.

Move the x=+-0 special case into the |x|<small block.

In the ld80 and ld128 implementaion, use fdlibm compatible comparisons
involving ix, lx, and llx.  This replaces several floating point
comparisons (some involving fabsl()) and also fixes the special cases
x=1 and x=2.

While here
  . Remove unnecessary parentheses.
  . Fix/improve comments due to the above changes.
  . Fix nearby whitespace.

* src/e_lgamma_r.c:
  . Sort declaration.
  . Remove unneeded explicit cast for type conversion.
  . Replace a double literal constant by an integer literal constant.

* src/e_lgammaf_r.c:
  . Sort declaration.

* ld128/e_lgammal_r.c:
  . Replace a long double literal constant by a double literal constant.

* ld80/e_lgammal_r.c:
  . Remove unused '#include float.h'
  . Replace a long double literal constant by a double literal constant.

Requested by:	bde
---
 lib/msun/src/e_lgamma_r.c  | 47 +++++++++++++++++++++++-----------------------
 lib/msun/src/e_lgammaf_r.c | 43 +++++++++++++++++++++---------------------
 2 files changed, 46 insertions(+), 44 deletions(-)

(limited to 'lib/msun/src')

diff --git a/lib/msun/src/e_lgamma_r.c b/lib/msun/src/e_lgamma_r.c
index a0bd310..be70767 100644
--- a/lib/msun/src/e_lgamma_r.c
+++ b/lib/msun/src/e_lgamma_r.c
@@ -201,28 +201,28 @@ sin_pi(double x)
 double
 __ieee754_lgamma_r(double x, int *signgamp)
 {
-	double t,y,z,nadj,p,p1,p2,p3,q,r,w;
+	double nadj,p,p1,p2,p3,q,r,t,w,y,z;
 	int32_t hx;
 	int i,ix,lx;
 
 	EXTRACT_WORDS(hx,lx,x);
 
-    /* purge off +-inf, NaN, +-0, tiny and negative arguments */
+    /* purge +-Inf and NaNs */
 	*signgamp = 1;
 	ix = hx&0x7fffffff;
 	if(ix>=0x7ff00000) return x*x;
-	if((ix|lx)==0) {
-	   if(hx<0)
-		*signgamp = -1;
-	    return one/vzero;
-	}
-	if(ix<0x3b900000) {	/* |x|<2**-70, return -log(|x|) */
-	    if(hx<0) {
-	        *signgamp = -1;
-	        return -__ieee754_log(-x);
-	    } else return -__ieee754_log(x);
+
+    /* purge +-0 and tiny arguments */
+	*signgamp = 1-2*((uint32_t)hx>>31);
+	if(ix<0x3c700000) {	/* |x|<2**-56, return -log(|x|) */
+	    if((ix|lx)==0)
+	        return one/vzero;
+	    return -__ieee754_log(fabs(x));
 	}
+
+    /* purge negative integers and start evaluation for other x < 0 */
 	if(hx<0) {
+	    *signgamp = 1;
 	    if(ix>=0x43300000) 	/* |x|>=2**52, must be -integer */
 		return one/vzero;
 	    t = sin_pi(x);
@@ -232,7 +232,7 @@ __ieee754_lgamma_r(double x, int *signgamp)
 	    x = -x;
 	}
 
-    /* purge off 1 and 2 */
+    /* purge 1 and 2 */
 	if((((ix-0x3ff00000)|lx)==0)||(((ix-0x40000000)|lx)==0)) r = 0;
     /* for x < 2.0 */
 	else if(ix<0x40000000) {
@@ -253,7 +253,7 @@ __ieee754_lgamma_r(double x, int *signgamp)
 		p1 = a0+z*(a2+z*(a4+z*(a6+z*(a8+z*a10))));
 		p2 = z*(a1+z*(a3+z*(a5+z*(a7+z*(a9+z*a11)))));
 		p  = y*p1+p2;
-		r  += (p-y/2); break;
+		r  += p-y/2; break;
 	      case 1:
 		z = y*y;
 		w = z*y;
@@ -261,19 +261,20 @@ __ieee754_lgamma_r(double x, int *signgamp)
 		p2 = t1+w*(t4+w*(t7+w*(t10+w*t13)));
 		p3 = t2+w*(t5+w*(t8+w*(t11+w*t14)));
 		p  = z*p1-(tt-w*(p2+y*p3));
-		r += (tf + p); break;
+		r += tf + p; break;
 	      case 2:
 		p1 = y*(u0+y*(u1+y*(u2+y*(u3+y*(u4+y*u5)))));
 		p2 = one+y*(v1+y*(v2+y*(v3+y*(v4+y*v5))));
-		r += (-0.5*y + p1/p2);
+		r += p1/p2-y/2;
 	    }
 	}
-	else if(ix<0x40200000) { 			/* x < 8.0 */
-	    i = (int)x;
-	    y = x-(double)i;
+    /* x < 8.0 */
+	else if(ix<0x40200000) {
+	    i = x;
+	    y = x-i;
 	    p = y*(s0+y*(s1+y*(s2+y*(s3+y*(s4+y*(s5+y*s6))))));
 	    q = one+y*(r1+y*(r2+y*(r3+y*(r4+y*(r5+y*r6)))));
-	    r = half*y+p/q;
+	    r = y/2+p/q;
 	    z = one;	/* lgamma(1+s) = log(s) + lgamma(s) */
 	    switch(i) {
 	    case 7: z *= (y+6);		/* FALLTHRU */
@@ -283,15 +284,15 @@ __ieee754_lgamma_r(double x, int *signgamp)
 	    case 3: z *= (y+2);		/* FALLTHRU */
 		    r += __ieee754_log(z); break;
 	    }
-    /* 8.0 <= x < 2**58 */
-	} else if (ix < 0x43900000) {
+    /* 8.0 <= x < 2**56 */
+	} else if (ix < 0x43700000) {
 	    t = __ieee754_log(x);
 	    z = one/x;
 	    y = z*z;
 	    w = w0+z*(w1+y*(w2+y*(w3+y*(w4+y*(w5+y*w6)))));
 	    r = (x-half)*(t-one)+w;
 	} else
-    /* 2**58 <= x <= inf */
+    /* 2**56 <= x <= inf */
 	    r =  x*(__ieee754_log(x)-one);
 	if(hx<0) r = nadj - r;
 	return r;
diff --git a/lib/msun/src/e_lgammaf_r.c b/lib/msun/src/e_lgammaf_r.c
index 9d23053..9084e18 100644
--- a/lib/msun/src/e_lgammaf_r.c
+++ b/lib/msun/src/e_lgammaf_r.c
@@ -122,29 +122,29 @@ sin_pif(float x)
 float
 __ieee754_lgammaf_r(float x, int *signgamp)
 {
-	float t,y,z,nadj,p,p1,p2,p3,q,r,w;
+	float nadj,p,p1,p2,p3,q,r,t,w,y,z;
 	int32_t hx;
 	int i,ix;
 
 	GET_FLOAT_WORD(hx,x);
 
-    /* purge off +-inf, NaN, +-0, tiny and negative arguments */
+    /* purge +-Inf and NaNs */
 	*signgamp = 1;
 	ix = hx&0x7fffffff;
 	if(ix>=0x7f800000) return x*x;
-	if(ix==0) {
-	    if(hx<0)
-		*signgamp = -1;
-	    return one/vzero;
-	}
-	if(ix<0x35000000) {	/* |x|<2**-21, return -log(|x|) */
-	    if(hx<0) {
-	        *signgamp = -1;
-	        return -__ieee754_logf(-x);
-	    } else return -__ieee754_logf(x);
+
+    /* purge +-0 and tiny arguments */
+	*signgamp = 1-2*((uint32_t)hx>>31);
+	if(ix<0x32000000) {		/* |x|<2**-27, return -log(|x|) */
+	    if(ix==0)
+	        return one/vzero;
+	    return -__ieee754_logf(fabsf(x));
 	}
+
+    /* purge negative integers and start evaluation for other x < 0 */
 	if(hx<0) {
-	    if(ix>=0x4b000000) 	/* |x|>=2**23, must be -integer */
+	    *signgamp = 1;
+	    if(ix>=0x4b000000) 		/* |x|>=2**23, must be -integer */
 		return one/vzero;
 	    t = sin_pif(x);
 	    if(t==zero) return one/vzero; /* -integer */
@@ -153,7 +153,7 @@ __ieee754_lgammaf_r(float x, int *signgamp)
 	    x = -x;
 	}
 
-    /* purge off 1 and 2 */
+    /* purge 1 and 2 */
 	if (ix==0x3f800000||ix==0x40000000) r = 0;
     /* for x < 2.0 */
 	else if(ix<0x40000000) {
@@ -174,17 +174,18 @@ __ieee754_lgammaf_r(float x, int *signgamp)
 		p1 = a0+z*(a2+z*a4);
 		p2 = z*(a1+z*(a3+z*a5));
 		p  = y*p1+p2;
-		r  += (p-y/2); break;
+		r  += p-y/2; break;
 	      case 1:
 		p = t0+y*t1+y*y*(t2+y*(t3+y*(t4+y*(t5+y*(t6+y*t7)))));
-		r += (tf + p); break;
+		r += tf + p; break;
 	      case 2:
 		p1 = y*(u0+y*(u1+y*u2));
 		p2 = one+y*(v1+y*(v2+y*v3));
-		r += (p1/p2-y/2);
+		r += p1/p2-y/2;
 	    }
 	}
-	else if(ix<0x41000000) { 			/* x < 8.0 */
+    /* x < 8.0 */
+	else if(ix<0x41000000) {
 	    i = x;
 	    y = x-i;
 	    p = y*(s0+y*(s1+y*(s2+y*s3)));
@@ -199,15 +200,15 @@ __ieee754_lgammaf_r(float x, int *signgamp)
 	    case 3: z *= (y+2);		/* FALLTHRU */
 		    r += __ieee754_logf(z); break;
 	    }
-    /* 8.0 <= x < 2**24 */
-	} else if (ix < 0x4b800000) {
+    /* 8.0 <= x < 2**27 */
+	} else if (ix < 0x4d000000) {
 	    t = __ieee754_logf(x);
 	    z = one/x;
 	    y = z*z;
 	    w = w0+z*(w1+y*w2);
 	    r = (x-half)*(t-one)+w;
 	} else
-    /* 2**24 <= x <= inf */
+    /* 2**27 <= x <= inf */
 	    r =  x*(__ieee754_logf(x)-one);
 	if(hx<0) r = nadj - r;
 	return r;
-- 
cgit v1.1