36 files changed, 1575 insertions, 373 deletions
diff --git a/lib/msun/src/e_cosh.c b/lib/msun/src/e_cosh.c
index 11e6590..a363695 100644
--- a/lib/msun/src/e_cosh.c
+++ b/lib/msun/src/e_cosh.c
@@ -45,7 +45,6 @@ __ieee754_cosh(double x)
 {
 	double t,w;
 	int32_t ix;
-	u_int32_t lx;
 
     /* High word of |x|. */
 	GET_HIGH_WORD(ix,x);
@@ -72,13 +71,8 @@ __ieee754_cosh(double x)
 	if (ix < 0x40862E42)  return half*__ieee754_exp(fabs(x));
 
     /* |x| in [log(maxdouble), overflowthresold] */
-	GET_LOW_WORD(lx,x);
-	if (ix<0x408633CE ||
-	      ((ix==0x408633ce)&&(lx<=(u_int32_t)0x8fb9f87d))) {
-	    w = __ieee754_exp(half*fabs(x));
-	    t = half*w;
-	    return t*w;
-	}
+	if (ix<=0x408633CE)
+	    return __ldexp_exp(fabs(x), -1);
 
     /* |x| > overflowthresold, cosh(x) overflow */
 	return huge*huge;
diff --git a/lib/msun/src/e_coshf.c b/lib/msun/src/e_coshf.c
index 4a1d499..95a0d6e 100644
--- a/lib/msun/src/e_coshf.c
+++ b/lib/msun/src/e_coshf.c
@@ -51,11 +51,8 @@ __ieee754_coshf(float x)
 	if (ix < 0x42b17217)  return half*__ieee754_expf(fabsf(x));
 
     /* |x| in [log(maxfloat), overflowthresold] */
-	if (ix<=0x42b2d4fc) {
-	    w = __ieee754_expf(half*fabsf(x));
-	    t = half*w;
-	    return t*w;
-	}
+	if (ix<=0x42b2d4fc)
+	    return __ldexp_expf(fabsf(x), -1);
 
     /* |x| > overflowthresold, cosh(x) overflow */
 	return huge*huge;
diff --git a/lib/msun/src/e_exp.c b/lib/msun/src/e_exp.c
index 5b9a10c..b47aef5 100644
--- a/lib/msun/src/e_exp.c
+++ b/lib/msun/src/e_exp.c
@@ -76,6 +76,8 @@ __FBSDID("$FreeBSD$");
  * to produce the hexadecimal values shown.
  */
 
+#include <float.h>
+
 #include "math.h"
 #include "math_private.h"
 
@@ -133,7 +135,7 @@ __ieee754_exp(double x)	/* default IEEE double exp */
 		hi = x - t*ln2HI[0];	/* t*ln2HI is exact here */
 		lo = t*ln2LO[0];
 	    }
-	    x  = hi - lo;
+	    STRICT_ASSIGN(double, x, hi - lo);
 	} 
 	else if(hx < 0x3e300000)  {	/* when |x|<2**-28 */
 	    if(huge+x>one) return one+x;/* trigger inexact */
diff --git a/lib/msun/src/e_expf.c b/lib/msun/src/e_expf.c
index 502e421..a479076 100644
--- a/lib/msun/src/e_expf.c
+++ b/lib/msun/src/e_expf.c
@@ -16,6 +16,8 @@
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
+#include <float.h>
+
 #include "math.h"
 #include "math_private.h"
 
@@ -40,7 +42,7 @@ P2 = -2.7667332906e-3;		/* -0xb55215.0p-32 */
 static volatile float twom100 = 7.8886090522e-31;      /* 2**-100=0x0d800000 */
 
 float
-__ieee754_expf(float x)	/* default IEEE double exp */
+__ieee754_expf(float x)
 {
 	float y,hi=0.0,lo=0.0,c,t,twopk;
 	int32_t k=0,xsb;
@@ -70,7 +72,7 @@ __ieee754_expf(float x)	/* default IEEE double exp */
 		hi = x - t*ln2HI[0];	/* t*ln2HI is exact here */
 		lo = t*ln2LO[0];
 	    }
-	    x  = hi - lo;
+	    STRICT_ASSIGN(float, x, hi - lo);
 	}
 	else if(hx < 0x39000000)  {	/* when |x|<2**-14 */
 	    if(huge+x>one) return one+x;/* trigger inexact */
diff --git a/lib/msun/src/e_hypot.c b/lib/msun/src/e_hypot.c
index fb498c1..2398e98 100644
--- a/lib/msun/src/e_hypot.c
+++ b/lib/msun/src/e_hypot.c
@@ -54,7 +54,7 @@ __FBSDID("$FreeBSD$");
 double
 __ieee754_hypot(double x, double y)
 {
-	double a=x,b=y,t1,t2,y1,y2,w;
+	double a,b,t1,t2,y1,y2,w;
 	int32_t j,k,ha,hb;
 
 	GET_HIGH_WORD(ha,x);
diff --git a/lib/msun/src/e_hypotf.c b/lib/msun/src/e_hypotf.c
index c82c6e7..6d083e4 100644
--- a/lib/msun/src/e_hypotf.c
+++ b/lib/msun/src/e_hypotf.c
@@ -22,7 +22,7 @@ __FBSDID("$FreeBSD$");
 float
 __ieee754_hypotf(float x, float y)
 {
-	float a=x,b=y,t1,t2,y1,y2,w;
+	float a,b,t1,t2,y1,y2,w;
 	int32_t j,k,ha,hb;
 
 	GET_FLOAT_WORD(ha,x);
diff --git a/lib/msun/src/e_hypotl.c b/lib/msun/src/e_hypotl.c
index 0c899bf..7b5ab89 100644
--- a/lib/msun/src/e_hypotl.c
+++ b/lib/msun/src/e_hypotl.c
@@ -21,13 +21,6 @@ __FBSDID("$FreeBSD$");
 #include "math.h"
 #include "math_private.h"
 
-#define	GET_LDBL_EXPSIGN(i, v) do {	\
-	union IEEEl2bits uv;		\
-					\
-	uv.e = v;			\
-	i = uv.xbits.expsign;		\
-} while (0)
-
 #define	GET_LDBL_MAN(h, l, v) do {	\
 	union IEEEl2bits uv;		\
 					\
@@ -36,14 +29,6 @@ __FBSDID("$FreeBSD$");
 	l = uv.bits.manl;		\
 } while (0)
 
-#define	SET_LDBL_EXPSIGN(v, i) do {	\
-	union IEEEl2bits uv;		\
-					\
-	uv.e = v;			\
-	uv.xbits.expsign = i;		\
-	v = uv.e;			\
-} while (0)
-
 #undef GET_HIGH_WORD
 #define	GET_HIGH_WORD(i, v)	GET_LDBL_EXPSIGN(i, v)
 #undef SET_HIGH_WORD
diff --git a/lib/msun/src/e_lgamma_r.c b/lib/msun/src/e_lgamma_r.c
index a587b8f..1cff592 100644
--- a/lib/msun/src/e_lgamma_r.c
+++ b/lib/msun/src/e_lgamma_r.c
@@ -269,7 +269,6 @@ __ieee754_lgamma_r(double x, int *signgamp)
 	}
 	else if(ix<0x40200000) { 			/* x < 8.0 */
 	    i = (int)x;
-	    t = zero;
 	    y = x-(double)i;
 	    p = y*(s0+y*(s1+y*(s2+y*(s3+y*(s4+y*(s5+y*s6))))));
 	    q = one+y*(r1+y*(r2+y*(r3+y*(r4+y*(r5+y*r6)))));
diff --git a/lib/msun/src/e_lgammaf_r.c b/lib/msun/src/e_lgammaf_r.c
index 47c6ed0..e2d90ef 100644
--- a/lib/msun/src/e_lgammaf_r.c
+++ b/lib/msun/src/e_lgammaf_r.c
@@ -202,7 +202,6 @@ __ieee754_lgammaf_r(float x, int *signgamp)
 	}
 	else if(ix<0x41000000) { 			/* x < 8.0 */
 	    i = (int)x;
-	    t = zero;
 	    y = x-(float)i;
 	    p = y*(s0+y*(s1+y*(s2+y*(s3+y*(s4+y*(s5+y*s6))))));
 	    q = one+y*(r1+y*(r2+y*(r3+y*(r4+y*(r5+y*r6)))));
diff --git a/lib/msun/src/e_log10.c b/lib/msun/src/e_log10.c
index 135f0dc..104d257 100644
--- a/lib/msun/src/e_log10.c
+++ b/lib/msun/src/e_log10.c
@@ -15,7 +15,11 @@
 __FBSDID("$FreeBSD$");
 
 /*
- * Return the base 10 logarithm of x. See k_log.c for details on the algorithm.
+ * Return the base 10 logarithm of x.  See e_log.c and k_log.h for most
+ * comments.
+ *
+ *    log10(x) = (f - 0.5*f*f + k_log1p(f)) / ln10 + k * log10(2)
+ * in not-quite-routine extra precision.
  */
 
 #include "math.h"
@@ -34,31 +38,50 @@ static const double zero   =  0.0;
 double
 __ieee754_log10(double x)
 {
-	double f,hi,lo,y,z;
+	double f,hfsq,hi,lo,r,val_hi,val_lo,w,y,y2;
 	int32_t i,k,hx;
 	u_int32_t lx;
 
 	EXTRACT_WORDS(hx,lx,x);
 
-        k=0;
-        if (hx < 0x00100000) {                  /* x < 2**-1022  */
-            if (((hx&0x7fffffff)|lx)==0)
-                return -two54/zero;             /* log(+-0)=-inf */
-            if (hx<0) return (x-x)/zero;        /* log(-#) = NaN */
-            k -= 54; x *= two54; /* subnormal number, scale up x */
+	k=0;
+	if (hx < 0x00100000) {			/* x < 2**-1022  */
+	    if (((hx&0x7fffffff)|lx)==0)
+		return -two54/zero;		/* log(+-0)=-inf */
+	    if (hx<0) return (x-x)/zero;	/* log(-#) = NaN */
+	    k -= 54; x *= two54; /* subnormal number, scale up x */
 	    GET_HIGH_WORD(hx,x);
-        }
+	}
 	if (hx >= 0x7ff00000) return x+x;
+	if (hx == 0x3ff00000 && lx == 0)
+	    return zero;			/* log(1) = +0 */
 	k += (hx>>20)-1023;
 	hx &= 0x000fffff;
 	i = (hx+0x95f64)&0x100000;
 	SET_HIGH_WORD(x,hx|(i^0x3ff00000));	/* normalize x or x/2 */
 	k += (i>>20);
 	y = (double)k;
-	f = __kernel_log(x);
-	hi = x = x - 1;
+	f = x - 1.0;
+	hfsq = 0.5*f*f;
+	r = k_log1p(f);
+
+	/* See e_log2.c for most details. */
+	hi = f - hfsq;
 	SET_LOW_WORD(hi,0);
-	lo = x - hi;
-	z = y*log10_2lo + (x+f)*ivln10lo + (lo+f)*ivln10hi + hi*ivln10hi;
-	return  z+y*log10_2hi;
+	lo = (f - hi) - hfsq + r;
+	val_hi = hi*ivln10hi;
+	y2 = y*log10_2hi;
+	val_lo = y*log10_2lo + (lo+hi)*ivln10lo + lo*ivln10hi;
+
+	/*
+	 * Extra precision in for adding y*log10_2hi is not strictly needed
+	 * since there is no very large cancellation near x = sqrt(2) or
+	 * x = 1/sqrt(2), but we do it anyway since it costs little on CPUs
+	 * with some parallelism and it reduces the error for many args.
+	 */
+	w = y2 + val_hi;
+	val_lo += (y2 - w) + val_hi;
+	val_hi = w;
+
+	return val_lo + val_hi;
 }
diff --git a/lib/msun/src/e_log10f.c b/lib/msun/src/e_log10f.c
index 940b831..c876594 100644
--- a/lib/msun/src/e_log10f.c
+++ b/lib/msun/src/e_log10f.c
@@ -13,7 +13,7 @@
 __FBSDID("$FreeBSD$");
 
 /*
- * Return the base 10 logarithm of x. See k_log.c for details on the algorithm.
+ * Float version of e_log10.c.  See the latter for most comments.
  */
 
 #include "math.h"
@@ -32,31 +32,40 @@ static const float zero   =  0.0;
 float
 __ieee754_log10f(float x)
 {
-	float f,hi,lo,y,z;
+	float f,hfsq,hi,lo,r,y;
 	int32_t i,k,hx;
 
 	GET_FLOAT_WORD(hx,x);
 
-        k=0;
-        if (hx < 0x00800000) {                  /* x < 2**-126  */
-            if ((hx&0x7fffffff)==0)
-                return -two25/zero;             /* log(+-0)=-inf */
-            if (hx<0) return (x-x)/zero;        /* log(-#) = NaN */
-            k -= 25; x *= two25; /* subnormal number, scale up x */
+	k=0;
+	if (hx < 0x00800000) {			/* x < 2**-126  */
+	    if ((hx&0x7fffffff)==0)
+		return -two25/zero;		/* log(+-0)=-inf */
+	    if (hx<0) return (x-x)/zero;	/* log(-#) = NaN */
+	    k -= 25; x *= two25; /* subnormal number, scale up x */
 	    GET_FLOAT_WORD(hx,x);
-        }
+	}
 	if (hx >= 0x7f800000) return x+x;
+	if (hx == 0x3f800000)
+	    return zero;			/* log(1) = +0 */
 	k += (hx>>23)-127;
 	hx &= 0x007fffff;
 	i = (hx+(0x4afb0d))&0x800000;
 	SET_FLOAT_WORD(x,hx|(i^0x3f800000));	/* normalize x or x/2 */
 	k += (i>>23);
 	y = (float)k;
-	f = __kernel_logf(x);
-	x = x - (float)1.0;
-	GET_FLOAT_WORD(hx,x);
+	f = x - (float)1.0;
+	hfsq = (float)0.5*f*f;
+	r = k_log1pf(f);
+
+	/* See e_log2f.c and e_log2.c for details. */
+	if (sizeof(float_t) > sizeof(float))
+		return (r - hfsq + f) * ((float_t)ivln10lo + ivln10hi) +
+		    y * ((float_t)log10_2lo + log10_2hi);
+	hi = f - hfsq;
+	GET_FLOAT_WORD(hx,hi);
 	SET_FLOAT_WORD(hi,hx&0xfffff000);
-	lo = x - hi;
-	z = y*log10_2lo + (x+f)*ivln10lo + (lo+f)*ivln10hi + hi*ivln10hi;
-	return  z+y*log10_2hi;
+	lo = (f - hi) - hfsq + r;
+	return y*log10_2lo + (lo+hi)*ivln10lo + lo*ivln10hi + hi*ivln10hi +
+	    y*log10_2hi;
 }
diff --git a/lib/msun/src/e_log2.c b/lib/msun/src/e_log2.c
index 6cf3dbc..1fc44a5 100644
--- a/lib/msun/src/e_log2.c
+++ b/lib/msun/src/e_log2.c
@@ -15,7 +15,13 @@
 __FBSDID("$FreeBSD$");
 
 /*
- * Return the base 2 logarithm of x. See k_log.c for details on the algorithm.
+ * Return the base 2 logarithm of x.  See e_log.c and k_log.h for most
+ * comments.
+ *
+ * This reduces x to {k, 1+f} exactly as in e_log.c, then calls the kernel,
+ * then does the combining and scaling steps
+ *    log2(x) = (f - 0.5*f*f + k_log1p(f)) / ln2 + k
+ * in not-quite-routine extra precision.
  */
 
 #include "math.h"
@@ -32,29 +38,73 @@ static const double zero   =  0.0;
 double
 __ieee754_log2(double x)
 {
-	double f,hi,lo;
+	double f,hfsq,hi,lo,r,val_hi,val_lo,w,y;
 	int32_t i,k,hx;
 	u_int32_t lx;
 
 	EXTRACT_WORDS(hx,lx,x);
 
-        k=0;
-        if (hx < 0x00100000) {                  /* x < 2**-1022  */
-            if (((hx&0x7fffffff)|lx)==0)
-                return -two54/zero;             /* log(+-0)=-inf */
-            if (hx<0) return (x-x)/zero;        /* log(-#) = NaN */
-            k -= 54; x *= two54; /* subnormal number, scale up x */
+	k=0;
+	if (hx < 0x00100000) {			/* x < 2**-1022  */
+	    if (((hx&0x7fffffff)|lx)==0)
+		return -two54/zero;		/* log(+-0)=-inf */
+	    if (hx<0) return (x-x)/zero;	/* log(-#) = NaN */
+	    k -= 54; x *= two54; /* subnormal number, scale up x */
 	    GET_HIGH_WORD(hx,x);
-        }
+	}
 	if (hx >= 0x7ff00000) return x+x;
+	if (hx == 0x3ff00000 && lx == 0)
+	    return zero;			/* log(1) = +0 */
 	k += (hx>>20)-1023;
 	hx &= 0x000fffff;
 	i = (hx+0x95f64)&0x100000;
 	SET_HIGH_WORD(x,hx|(i^0x3ff00000));	/* normalize x or x/2 */
 	k += (i>>20);
-	f = __kernel_log(x);
-	hi = x = x - 1;
+	y = (double)k;
+	f = x - 1.0;
+	hfsq = 0.5*f*f;
+	r = k_log1p(f);
+
+	/*
+	 * f-hfsq must (for args near 1) be evaluated in extra precision
+	 * to avoid a large cancellation when x is near sqrt(2) or 1/sqrt(2).
+	 * This is fairly efficient since f-hfsq only depends on f, so can
+	 * be evaluated in parallel with R.  Not combining hfsq with R also
+	 * keeps R small (though not as small as a true `lo' term would be),
+	 * so that extra precision is not needed for terms involving R.
+	 *
+	 * Compiler bugs involving extra precision used to break Dekker's
+	 * theorem for spitting f-hfsq as hi+lo, unless double_t was used
+	 * or the multi-precision calculations were avoided when double_t
+	 * has extra precision.  These problems are now automatically
+	 * avoided as a side effect of the optimization of combining the
+	 * Dekker splitting step with the clear-low-bits step.
+	 *
+	 * y must (for args near sqrt(2) and 1/sqrt(2)) be added in extra
+	 * precision to avoid a very large cancellation when x is very near
+	 * these values.  Unlike the above cancellations, this problem is
+	 * specific to base 2.  It is strange that adding +-1 is so much
+	 * harder than adding +-ln2 or +-log10_2.
+	 *
+	 * This uses Dekker's theorem to normalize y+val_hi, so the
+	 * compiler bugs are back in some configurations, sigh.  And I
+	 * don't want to used double_t to avoid them, since that gives a
+	 * pessimization and the support for avoiding the pessimization
+	 * is not yet available.
+	 *
+	 * The multi-precision calculations for the multiplications are
+	 * routine.
+	 */
+	hi = f - hfsq;
 	SET_LOW_WORD(hi,0);
-	lo = x - hi;
-	return (x+f)*ivln2lo + (lo+f)*ivln2hi + hi*ivln2hi + k;
+	lo = (f - hi) - hfsq + r;
+	val_hi = hi*ivln2hi;
+	val_lo = (lo+hi)*ivln2lo + lo*ivln2hi;
+
+	/* spadd(val_hi, val_lo, y), except for not using double_t: */
+	w = y + val_hi;
+	val_lo += (y - w) + val_hi;
+	val_hi = w;
+
+	return val_lo + val_hi;
 }
diff --git a/lib/msun/src/e_log2f.c b/lib/msun/src/e_log2f.c
index bb308d3..7166346 100644
--- a/lib/msun/src/e_log2f.c
+++ b/lib/msun/src/e_log2f.c
@@ -13,7 +13,7 @@
 __FBSDID("$FreeBSD$");
 
 /*
- * Return the base 2 logarithm of x. See k_log.c for details on the algorithm.
+ * Float version of e_log2.c.  See the latter for most comments.
  */
 
 #include "math.h"
@@ -30,29 +30,52 @@ static const float zero   =  0.0;
 float
 __ieee754_log2f(float x)
 {
-	float f,hi,lo;
+	float f,hfsq,hi,lo,r,y;
 	int32_t i,k,hx;
 
 	GET_FLOAT_WORD(hx,x);
 
-        k=0;
-        if (hx < 0x00800000) {                  /* x < 2**-126  */
-            if ((hx&0x7fffffff)==0)
-                return -two25/zero;             /* log(+-0)=-inf */
-            if (hx<0) return (x-x)/zero;        /* log(-#) = NaN */
-            k -= 25; x *= two25; /* subnormal number, scale up x */
+	k=0;
+	if (hx < 0x00800000) {			/* x < 2**-126  */
+	    if ((hx&0x7fffffff)==0)
+		return -two25/zero;		/* log(+-0)=-inf */
+	    if (hx<0) return (x-x)/zero;	/* log(-#) = NaN */
+	    k -= 25; x *= two25; /* subnormal number, scale up x */
 	    GET_FLOAT_WORD(hx,x);
-        }
+	}
 	if (hx >= 0x7f800000) return x+x;
+	if (hx == 0x3f800000)
+	    return zero;			/* log(1) = +0 */
 	k += (hx>>23)-127;
 	hx &= 0x007fffff;
 	i = (hx+(0x4afb0d))&0x800000;
 	SET_FLOAT_WORD(x,hx|(i^0x3f800000));	/* normalize x or x/2 */
 	k += (i>>23);
-	f = __kernel_logf(x);
-	x = x - (float)1.0;
-	GET_FLOAT_WORD(hx,x);
+	y = (float)k;
+	f = x - (float)1.0;
+	hfsq = (float)0.5*f*f;
+	r = k_log1pf(f);
+
+	/*
+	 * We no longer need to avoid falling into the multi-precision
+	 * calculations due to compiler bugs breaking Dekker's theorem.
+	 * Keep avoiding this as an optimization.  See e_log2.c for more
+	 * details (some details are here only because the optimization
+	 * is not yet available in double precision).
+	 *
+	 * Another compiler bug turned up.  With gcc on i386,
+	 * (ivln2lo + ivln2hi) would be evaluated in float precision
+	 * despite runtime evaluations using double precision.  So we
+	 * must cast one of its terms to float_t.  This makes the whole
+	 * expression have type float_t, so return is forced to waste
+	 * time clobbering its extra precision.
+	 */
+	if (sizeof(float_t) > sizeof(float))
+		return (r - hfsq + f) * ((float_t)ivln2lo + ivln2hi) + y;
+
+	hi = f - hfsq;
+	GET_FLOAT_WORD(hx,hi);
 	SET_FLOAT_WORD(hi,hx&0xfffff000);
-	lo = x - hi;
-	return (x+f)*ivln2lo + (lo+f)*ivln2hi + hi*ivln2hi + k;
+	lo = (f - hi) - hfsq + r;
+	return (lo+hi)*ivln2lo + lo*ivln2hi + hi*ivln2hi + y;
 }
diff --git a/lib/msun/src/e_pow.c b/lib/msun/src/e_pow.c
index 4aa00d5..7607a4a 100644
--- a/lib/msun/src/e_pow.c
+++ b/lib/msun/src/e_pow.c
@@ -109,6 +109,9 @@ __ieee754_pow(double x, double y)
     /* y==zero: x**0 = 1 */
 	if((iy|ly)==0) return one; 	
 
+    /* x==1: 1**y = 1, even if y is NaN */
+	if (hx==0x3ff00000 && lx == 0) return one;
+
     /* y!=zero: result is NaN if either arg is NaN */
 	if(ix > 0x7ff00000 || ((ix==0x7ff00000)&&(lx!=0)) ||
 	   iy > 0x7ff00000 || ((iy==0x7ff00000)&&(ly!=0))) 
@@ -138,7 +141,7 @@ __ieee754_pow(double x, double y)
 	if(ly==0) { 	
 	    if (iy==0x7ff00000) {	/* y is +-inf */
 	        if(((ix-0x3ff00000)|lx)==0)
-		    return  y - y;	/* inf**+-1 is NaN */
+		    return  one;	/* (-1)**+-inf is NaN */
 	        else if (ix >= 0x3ff00000)/* (|x|>1)**+-inf = inf,0 */
 		    return (hy>=0)? y: zero;
 	        else			/* (|x|<1)**-,+inf = inf,0 */
diff --git a/lib/msun/src/e_powf.c b/lib/msun/src/e_powf.c
index 466ed72..5c46478 100644
--- a/lib/msun/src/e_powf.c
+++ b/lib/msun/src/e_powf.c
@@ -67,6 +67,9 @@ __ieee754_powf(float x, float y)
     /* y==zero: x**0 = 1 */
 	if(iy==0) return one;
 
+    /* x==1: 1**y = 1, even if y is NaN */
+	if (hx==0x3f800000) return one;
+
     /* y!=zero: result is NaN if either arg is NaN */
 	if(ix > 0x7f800000 ||
 	   iy > 0x7f800000)
@@ -90,7 +93,7 @@ __ieee754_powf(float x, float y)
     /* special value of y */
 	if (iy==0x7f800000) {	/* y is +-inf */
 	    if (ix==0x3f800000)
-	        return  y - y;	/* inf**+-1 is NaN */
+	        return  one;	/* (-1)**+-inf is NaN */
 	    else if (ix > 0x3f800000)/* (|x|>1)**+-inf = inf,0 */
 	        return (hy>=0)? y: zero;
 	    else			/* (|x|<1)**-,+inf = inf,0 */
diff --git a/lib/msun/src/e_sinh.c b/lib/msun/src/e_sinh.c
index afb8e43..17442d0 100644
--- a/lib/msun/src/e_sinh.c
+++ b/lib/msun/src/e_sinh.c
@@ -40,9 +40,8 @@ static const double one = 1.0, shuge = 1.0e307;
 double
 __ieee754_sinh(double x)
 {
-	double t,w,h;
+	double t,h;
 	int32_t ix,jx;
-	u_int32_t lx;
 
     /* High word of |x|. */
 	GET_HIGH_WORD(jx,x);
@@ -66,12 +65,8 @@ __ieee754_sinh(double x)
 	if (ix < 0x40862E42)  return h*__ieee754_exp(fabs(x));
 
     /* |x| in [log(maxdouble), overflowthresold] */
-	GET_LOW_WORD(lx,x);
-	if (ix<0x408633CE || ((ix==0x408633ce)&&(lx<=(u_int32_t)0x8fb9f87d))) {
-	    w = __ieee754_exp(0.5*fabs(x));
-	    t = h*w;
-	    return t*w;
-	}
+	if (ix<=0x408633CE)
+	    return h*2.0*__ldexp_exp(fabs(x), -1);
 
     /* |x| > overflowthresold, sinh(x) overflow */
 	return x*shuge;
diff --git a/lib/msun/src/e_sinhf.c b/lib/msun/src/e_sinhf.c
index 0f96b2b..1be2dc3 100644
--- a/lib/msun/src/e_sinhf.c
+++ b/lib/msun/src/e_sinhf.c
@@ -24,7 +24,7 @@ static const float one = 1.0, shuge = 1.0e37;
 float
 __ieee754_sinhf(float x)
 {
-	float t,w,h;
+	float t,h;
 	int32_t ix,jx;
 
 	GET_FLOAT_WORD(jx,x);
@@ -48,11 +48,8 @@ __ieee754_sinhf(float x)
 	if (ix < 0x42b17217)  return h*__ieee754_expf(fabsf(x));
 
     /* |x| in [logf(maxfloat), overflowthresold] */
-	if (ix<=0x42b2d4fc) {
-	    w = __ieee754_expf((float)0.5*fabsf(x));
-	    t = h*w;
-	    return t*w;
-	}
+	if (ix<=0x42b2d4fc)
+	    return h*2.0F*__ldexp_expf(fabsf(x), -1);
 
     /* |x| > overflowthresold, sinh(x) overflow */
 	return x*shuge;
diff --git a/lib/msun/src/k_exp.c b/lib/msun/src/k_exp.c
new file mode 100644
index 0000000..f592f69
--- /dev/null
+++ b/lib/msun/src/k_exp.c
@@ -0,0 +1,108 @@
+/*-
+ * Copyright (c) 2011 David Schultz <das@FreeBSD.ORG>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <complex.h>
+
+#include "math.h"
+#include "math_private.h"
+
+static const uint32_t k = 1799;		/* constant for reduction */
+static const double kln2 =  1246.97177782734161156;	/* k * ln2 */
+
+/*
+ * Compute exp(x), scaled to avoid spurious overflow.  An exponent is
+ * returned separately in 'expt'.
+ *
+ * Input:  ln(DBL_MAX) <= x < ln(2 * DBL_MAX / DBL_MIN_DENORM) ~= 1454.91
+ * Output: 2**1023 <= y < 2**1024
+ */
+static double
+__frexp_exp(double x, int *expt)
+{
+	double exp_x;
+	uint32_t hx;
+
+	/*
+	 * We use exp(x) = exp(x - kln2) * 2**k, carefully chosen to
+	 * minimize |exp(kln2) - 2**k|.  We also scale the exponent of
+	 * exp_x to MAX_EXP so that the result can be multiplied by
+	 * a tiny number without losing accuracy due to denormalization.
+	 */
+	exp_x = exp(x - kln2);
+	GET_HIGH_WORD(hx, exp_x);
+	*expt = (hx >> 20) - (0x3ff + 1023) + k;
+	SET_HIGH_WORD(exp_x, (hx & 0xfffff) | ((0x3ff + 1023) << 20));
+	return (exp_x);
+}
+
+/*
+ * __ldexp_exp(x, expt) and __ldexp_cexp(x, expt) compute exp(x) * 2**expt.
+ * They are intended for large arguments (real part >= ln(DBL_MAX))
+ * where care is needed to avoid overflow.
+ *
+ * The present implementation is narrowly tailored for our hyperbolic and
+ * exponential functions.  We assume expt is small (0 or -1), and the caller
+ * has filtered out very large x, for which overflow would be inevitable.
+ */
+
+double
+__ldexp_exp(double x, int expt)
+{
+	double exp_x, scale;
+	int ex_expt;
+
+	exp_x = __frexp_exp(x, &ex_expt);
+	expt += ex_expt;
+	INSERT_WORDS(scale, (0x3ff + expt) << 20, 0);
+	return (exp_x * scale);
+}
+
+double complex
+__ldexp_cexp(double complex z, int expt)
+{
+	double x, y, exp_x, scale1, scale2;
+	int ex_expt, half_expt;
+
+	x = creal(z);
+	y = cimag(z);
+	exp_x = __frexp_exp(x, &ex_expt);
+	expt += ex_expt;
+
+	/*
+	 * Arrange so that scale1 * scale2 == 2**expt.  We use this to
+	 * compensate for scalbn being horrendously slow.
+	 */
+	half_expt = expt / 2;
+	INSERT_WORDS(scale1, (0x3ff + half_expt) << 20, 0);
+	half_expt = expt - half_expt;
+	INSERT_WORDS(scale2, (0x3ff + half_expt) << 20, 0);
+
+	return (cpack(cos(y) * exp_x * scale1 * scale2,
+	    sin(y) * exp_x * scale1 * scale2));
+}
diff --git a/lib/msun/src/k_expf.c b/lib/msun/src/k_expf.c
new file mode 100644
index 0000000..a860b9f
--- /dev/null
+++ b/lib/msun/src/k_expf.c
@@ -0,0 +1,87 @@
+/*-
+ * Copyright (c) 2011 David Schultz <das@FreeBSD.ORG>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <complex.h>
+
+#include "math.h"
+#include "math_private.h"
+
+static const uint32_t k = 235;			/* constant for reduction */
+static const float kln2 =  162.88958740F;	/* k * ln2 */
+
+/*
+ * See k_exp.c for details.
+ *
+ * Input:  ln(FLT_MAX) <= x < ln(2 * FLT_MAX / FLT_MIN_DENORM) ~= 192.7
+ * Output: 2**127 <= y < 2**128
+ */
+static float
+__frexp_expf(float x, int *expt)
+{
+	double exp_x;
+	uint32_t hx;
+
+	exp_x = expf(x - kln2);
+	GET_FLOAT_WORD(hx, exp_x);
+	*expt = (hx >> 23) - (0x7f + 127) + k;
+	SET_FLOAT_WORD(exp_x, (hx & 0x7fffff) | ((0x7f + 127) << 23));
+	return (exp_x);
+}
+
+float
+__ldexp_expf(float x, int expt)
+{
+	float exp_x, scale;
+	int ex_expt;
+
+	exp_x = __frexp_expf(x, &ex_expt);
+	expt += ex_expt;
+	SET_FLOAT_WORD(scale, (0x7f + expt) << 23);
+	return (exp_x * scale);
+}
+
+float complex
+__ldexp_cexpf(float complex z, int expt)
+{
+	float x, y, exp_x, scale1, scale2;
+	int ex_expt, half_expt;
+
+	x = crealf(z);
+	y = cimagf(z);
+	exp_x = __frexp_expf(x, &ex_expt);
+	expt += ex_expt;
+
+	half_expt = expt / 2;
+	SET_FLOAT_WORD(scale1, (0x7f + half_expt) << 23);
+	half_expt = expt - half_expt;
+	SET_FLOAT_WORD(scale2, (0x7f + half_expt) << 23);
+
+	return (cpackf(cosf(y) * exp_x * scale1 * scale2,
+	    sinf(y) * exp_x * scale1 * scale2));
+}
diff --git a/lib/msun/src/k_log.h b/lib/msun/src/k_log.h
index 206355c..aaff8bd 100644
--- a/lib/msun/src/k_log.h
+++ b/lib/msun/src/k_log.h
@@ -14,8 +14,9 @@
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
-/* __kernel_log(x)
- * Return log(x) - (x-1) for x in ~[sqrt(2)/2, sqrt(2)].
+/*
+ * k_log1p(f):
+ * Return log(1+f) - f for 1+f in ~[sqrt(2)/2, sqrt(2)].
  *
  * The following describes the overall strategy for computing
  * logarithms in base e.  The argument reduction and adding the final
@@ -80,37 +81,20 @@ Lg6 = 1.531383769920937332e-01,  /* 3FC39A09 D078C69F */
 Lg7 = 1.479819860511658591e-01;  /* 3FC2F112 DF3E5244 */
 
 /*
- * We always inline __kernel_log(), since doing so produces a
+ * We always inline k_log1p(), since doing so produces a
  * substantial performance improvement (~40% on amd64).
  */
 static inline double
-__kernel_log(double x)
+k_log1p(double f)
 {
-	double hfsq,f,s,z,R,w,t1,t2;
-	int32_t hx,i,j;
-	u_int32_t lx;
-
-	EXTRACT_WORDS(hx,lx,x);
+	double hfsq,s,z,R,w,t1,t2;
 
-	f = x-1.0;
-	if((0x000fffff&(2+hx))<3) {	/* -2**-20 <= f < 2**-20 */
-	    if(f==0.0) return 0.0;
-	    return f*f*(0.33333333333333333*f-0.5);
-	}
- 	s = f/(2.0+f); 
+ 	s = f/(2.0+f);
 	z = s*s;
-	hx &= 0x000fffff;
-	i = hx-0x6147a;
 	w = z*z;
-	j = 0x6b851-hx;
-	t1= w*(Lg2+w*(Lg4+w*Lg6)); 
-	t2= z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7))); 
-	i |= j;
+	t1= w*(Lg2+w*(Lg4+w*Lg6));
+	t2= z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7)));
 	R = t2+t1;
-	if (i>0) {
-	    hfsq=0.5*f*f;
-	    return s*(hfsq+R) - hfsq;
-	} else {
-	    return s*(R-f);
-	}
+	hfsq=0.5*f*f;
+	return s*(hfsq+R);
 }
diff --git a/lib/msun/src/k_logf.h b/lib/msun/src/k_logf.h
index d9f0f3d..71c547e 100644
--- a/lib/msun/src/k_logf.h
+++ b/lib/msun/src/k_logf.h
@@ -13,7 +13,7 @@
 __FBSDID("$FreeBSD$");
 
 /*
- * float version of __kernel_log(x). See k_log.c for details.
+ * Float version of k_log.h.  See the latter for most comments.
  */
 
 static const float
@@ -24,32 +24,16 @@ Lg3 =      0x91e9ee.0p-25,	/* 0.28498786688 */
 Lg4 =      0xf89e26.0p-26;	/* 0.24279078841 */
 
 static inline float
-__kernel_logf(float x)
+k_log1pf(float f)
 {
-	float hfsq,f,s,z,R,w,t1,t2;
-	int32_t ix,i,j;
+	float hfsq,s,z,R,w,t1,t2;
 
-	GET_FLOAT_WORD(ix,x);
-
-	f = x-(float)1.0;
-	if((0x007fffff&(0x8000+ix))<0xc000) {	/* -2**-9 <= f < 2**-9 */
-	    if(f==0.0f) return 0.0f;
-	    return f*f*((float)0.33333333333333333*f-(float)0.5);
-	}
  	s = f/((float)2.0+f);
 	z = s*s;
-	ix &= 0x007fffff;
-	i = ix-(0x6147a<<3);
 	w = z*z;
-	j = (0x6b851<<3)-ix;
 	t1= w*(Lg2+w*Lg4);
 	t2= z*(Lg1+w*Lg3);
-	i |= j;
 	R = t2+t1;
-	if(i>0) {
-	    hfsq=(float)0.5*f*f;
-	    return s*(hfsq+R) - hfsq;
-	} else {
-	    return s*(R-f);
-	}
+	hfsq=(float)0.5*f*f;
+	return s*(hfsq+R);
 }
diff --git a/lib/msun/src/math.h b/lib/msun/src/math.h
index 8ad13ed..69b138e 100644
--- a/lib/msun/src/math.h
+++ b/lib/msun/src/math.h
@@ -68,14 +68,11 @@ extern const union __nan_un {
 #define	MATH_ERREXCEPT	2
 #define	math_errhandling	MATH_ERREXCEPT
 
-/* XXX We need a <machine/math.h>. */
-#if defined(__ia64__) || defined(__sparc64__)
-#define	FP_FAST_FMA	1
-#endif
+#define	FP_FAST_FMAF	1
 #ifdef __ia64__
+#define	FP_FAST_FMA	1
 #define	FP_FAST_FMAL	1
 #endif
-#define	FP_FAST_FMAF	1
 
 /* Symbolic constants to classify floating point numbers. */
 #define	FP_INFINITE	0x01
diff --git a/lib/msun/src/math_private.h b/lib/msun/src/math_private.h
index d79f808..79280e3 100644
--- a/lib/msun/src/math_private.h
+++ b/lib/msun/src/math_private.h
@@ -58,6 +58,10 @@ typedef union
     u_int32_t msw;
     u_int32_t lsw;
   } parts;
+  struct
+  {
+    u_int64_t w;
+  } xparts;
 } ieee_double_shape_type;
 
 #endif
@@ -72,6 +76,10 @@ typedef union
     u_int32_t lsw;
     u_int32_t msw;
   } parts;
+  struct
+  {
+    u_int64_t w;
+  } xparts;
 } ieee_double_shape_type;
 
 #endif
@@ -86,6 +94,14 @@ do {								\
   (ix1) = ew_u.parts.lsw;					\
 } while (0)
 
+/* Get a 64-bit int from a double. */
+#define EXTRACT_WORD64(ix,d)					\
+do {								\
+  ieee_double_shape_type ew_u;					\
+  ew_u.value = (d);						\
+  (ix) = ew_u.xparts.w;						\
+} while (0)
+
 /* Get the more significant 32 bit int from a double.  */
 
 #define GET_HIGH_WORD(i,d)					\
@@ -114,6 +130,14 @@ do {								\
   (d) = iw_u.value;						\
 } while (0)
 
+/* Set a double from a 64-bit int. */
+#define INSERT_WORD64(d,ix)					\
+do {								\
+  ieee_double_shape_type iw_u;					\
+  iw_u.xparts.w = (ix);						\
+  (d) = iw_u.value;						\
+} while (0)
+
 /* Set the more significant 32 bits of a double from an int.  */
 
 #define SET_HIGH_WORD(d,v)					\
@@ -164,6 +188,25 @@ do {								\
   (d) = sf_u.value;						\
 } while (0)
 
+/* Get expsign as a 16 bit int from a long double.  */
+
+#define	GET_LDBL_EXPSIGN(i,d)					\
+do {								\
+  union IEEEl2bits ge_u;					\
+  ge_u.e = (d);							\
+  (i) = ge_u.xbits.expsign;					\
+} while (0)
+
+/* Set expsign of a long double from a 16 bit int.  */
+
+#define	SET_LDBL_EXPSIGN(d,v)					\
+do {								\
+  union IEEEl2bits se_u;					\
+  se_u.e = (d);							\
+  se_u.xbits.expsign = (v);					\
+  (d) = se_u.e;							\
+} while (0)
+
 #ifdef FLT_EVAL_METHOD
 /*
  * Attempt to get strict C99 semantics for assignment with non-C99 compilers.
@@ -354,6 +397,10 @@ int	__ieee754_rem_pio2(double,double*);
 double	__kernel_sin(double,double,int);
 double	__kernel_cos(double,double);
 double	__kernel_tan(double,double,int);
+double	__ldexp_exp(double,int);
+#ifdef _COMPLEX_H
+double complex __ldexp_cexp(double complex,int);
+#endif
 
 /* float precision kernel functions */
 #ifdef INLINE_REM_PIO2F
@@ -372,6 +419,10 @@ float	__kernel_cosdf(double);
 __inline
 #endif
 float	__kernel_tandf(double,int);
+float	__ldexp_expf(float,int);
+#ifdef _COMPLEX_H
+float complex __ldexp_cexpf(float complex,int);
+#endif
 
 /* long double precision kernel functions */
 long double __kernel_sinl(long double, long double, int);
diff --git a/lib/msun/src/s_ccosh.c b/lib/msun/src/s_ccosh.c
new file mode 100644
index 0000000..9ea962b
--- /dev/null
+++ b/lib/msun/src/s_ccosh.c
@@ -0,0 +1,155 @@
+/*-
+ * Copyright (c) 2005 Bruce D. Evans and Steven G. Kargl
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Hyperbolic cosine of a complex argument z = x + i y.
+ *
+ * cosh(z) = cosh(x+iy)
+ *         = cosh(x) cos(y) + i sinh(x) sin(y).
+ *
+ * Exceptional values are noted in the comments within the source code.
+ * These values and the return value were taken from n1124.pdf.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <complex.h>
+#include <math.h>
+
+#include "math_private.h"
+
+static const double huge = 0x1p1023;
+
+double complex
+ccosh(double complex z)
+{
+	double x, y, h;
+	int32_t hx, hy, ix, iy, lx, ly;
+
+	x = creal(z);
+	y = cimag(z);
+
+	EXTRACT_WORDS(hx, lx, x);
+	EXTRACT_WORDS(hy, ly, y);
+
+	ix = 0x7fffffff & hx;
+	iy = 0x7fffffff & hy;
+
+	/* Handle the nearly-non-exceptional cases where x and y are finite. */
+	if (ix < 0x7ff00000 && iy < 0x7ff00000) {
+		if ((iy | ly) == 0)
+			return (cpack(cosh(x), x * y));
+		if (ix < 0x40360000)	/* small x: normal case */
+			return (cpack(cosh(x) * cos(y), sinh(x) * sin(y)));
+
+		/* |x| >= 22, so cosh(x) ~= exp(|x|) */
+		if (ix < 0x40862e42) {
+			/* x < 710: exp(|x|) won't overflow */
+			h = exp(fabs(x)) * 0.5;
+			return (cpack(h * cos(y), copysign(h, x) * sin(y)));
+		} else if (ix < 0x4096bbaa) {
+			/* x < 1455: scale to avoid overflow */
+			z = __ldexp_cexp(cpack(fabs(x), y), -1);
+			return (cpack(creal(z), cimag(z) * copysign(1, x)));
+		} else {
+			/* x >= 1455: the result always overflows */
+			h = huge * x;
+			return (cpack(h * h * cos(y), h * sin(y)));
+		}
+	}
+
+	/*
+	 * cosh(+-0 +- I Inf) = dNaN + I sign(d(+-0, dNaN))0.
+	 * The sign of 0 in the result is unspecified.  Choice = normally
+	 * the same as dNaN.  Raise the invalid floating-point exception.
+	 *
+	 * cosh(+-0 +- I NaN) = d(NaN) + I sign(d(+-0, NaN))0.
+	 * The sign of 0 in the result is unspecified.  Choice = normally
+	 * the same as d(NaN).
+	 */
+	if ((ix | lx) == 0 && iy >= 0x7ff00000)
+		return (cpack(y - y, copysign(0, x * (y - y))));
+
+	/*
+	 * cosh(+-Inf +- I 0) = +Inf + I (+-)(+-)0.
+	 *
+	 * cosh(NaN +- I 0)   = d(NaN) + I sign(d(NaN, +-0))0.
+	 * The sign of 0 in the result is unspecified.
+	 */
+	if ((iy | ly) == 0 && ix >= 0x7ff00000) {
+		if (((hx & 0xfffff) | lx) == 0)
+			return (cpack(x * x, copysign(0, x) * y));
+		return (cpack(x * x, copysign(0, (x + x) * y)));
+	}
+
+	/*
+	 * cosh(x +- I Inf) = dNaN + I dNaN.
+	 * Raise the invalid floating-point exception for finite nonzero x.
+	 *
+	 * cosh(x + I NaN) = d(NaN) + I d(NaN).
+	 * Optionally raises the invalid floating-point exception for finite
+	 * nonzero x.  Choice = don't raise (except for signaling NaNs).
+	 */
+	if (ix < 0x7ff00000 && iy >= 0x7ff00000)
+		return (cpack(y - y, x * (y - y)));
+
+	/*
+	 * cosh(+-Inf + I NaN)  = +Inf + I d(NaN).
+	 *
+	 * cosh(+-Inf +- I Inf) = +Inf + I dNaN.
+	 * The sign of Inf in the result is unspecified.  Choice = always +.
+	 * Raise the invalid floating-point exception.
+	 *
+	 * cosh(+-Inf + I y)   = +Inf cos(y) +- I Inf sin(y)
+	 */
+	if (ix >= 0x7ff00000 && ((hx & 0xfffff) | lx) == 0) {
+		if (iy >= 0x7ff00000)
+			return (cpack(x * x, x * (y - y)));
+		return (cpack((x * x) * cos(y), x * sin(y)));
+	}
+
+	/*
+	 * cosh(NaN + I NaN)  = d(NaN) + I d(NaN).
+	 *
+	 * cosh(NaN +- I Inf) = d(NaN) + I d(NaN).
+	 * Optionally raises the invalid floating-point exception.
+	 * Choice = raise.
+	 *
+	 * cosh(NaN + I y)    = d(NaN) + I d(NaN).
+	 * Optionally raises the invalid floating-point exception for finite
+	 * nonzero y.  Choice = don't raise (except for signaling NaNs).
+	 */
+	return (cpack((x * x) * (y - y), (x + x) * (y - y)));
+}
+
+double complex
+ccos(double complex z)
+{
+
+	/* ccos(z) = ccosh(I * z) */
+	return (ccosh(cpack(-cimag(z), creal(z))));
+}
diff --git a/lib/msun/src/s_ccoshf.c b/lib/msun/src/s_ccoshf.c
new file mode 100644
index 0000000..1de9ad4
--- /dev/null
+++ b/lib/msun/src/s_ccoshf.c
@@ -0,0 +1,104 @@
+/*-
+ * Copyright (c) 2005 Bruce D. Evans and Steven G. Kargl
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Hyperbolic cosine of a complex argument.  See s_ccosh.c for details.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <complex.h>
+#include <math.h>
+
+#include "math_private.h"
+
+static const float huge = 0x1p127;
+
+float complex
+ccoshf(float complex z)
+{
+	float x, y, h;
+	int32_t hx, hy, ix, iy;
+
+	x = crealf(z);
+	y = cimagf(z);
+
+	GET_FLOAT_WORD(hx, x);
+	GET_FLOAT_WORD(hy, y);
+
+	ix = 0x7fffffff & hx;
+	iy = 0x7fffffff & hy;
+
+	if (ix < 0x7f800000 && iy < 0x7f800000) {
+		if (iy == 0)
+			return (cpackf(coshf(x), x * y));
+		if (ix < 0x41100000)	/* small x: normal case */
+			return (cpackf(coshf(x) * cosf(y), sinhf(x) * sinf(y)));
+
+		/* |x| >= 9, so cosh(x) ~= exp(|x|) */
+		if (ix < 0x42b17218) {
+			/* x < 88.7: expf(|x|) won't overflow */
+			h = expf(fabsf(x)) * 0.5f;
+			return (cpackf(h * cosf(y), copysignf(h, x) * sinf(y)));
+		} else if (ix < 0x4340b1e7) {
+			/* x < 192.7: scale to avoid overflow */
+			z = __ldexp_cexpf(cpackf(fabsf(x), y), -1);
+			return (cpackf(crealf(z), cimagf(z) * copysignf(1, x)));
+		} else {
+			/* x >= 192.7: the result always overflows */
+			h = huge * x;
+			return (cpackf(h * h * cosf(y), h * sinf(y)));
+		}
+	}
+
+	if (ix == 0 && iy >= 0x7f800000)
+		return (cpackf(y - y, copysignf(0, x * (y - y))));
+
+	if (iy == 0 && ix >= 0x7f800000) {
+		if ((hx & 0x7fffff) == 0)
+			return (cpackf(x * x, copysignf(0, x) * y));
+		return (cpackf(x * x, copysignf(0, (x + x) * y)));
+	}
+
+	if (ix < 0x7f800000 && iy >= 0x7f800000)
+		return (cpackf(y - y, x * (y - y)));
+
+	if (ix >= 0x7f800000 && (hx & 0x7fffff) == 0) {
+		if (iy >= 0x7f800000)
+			return (cpackf(x * x, x * (y - y)));
+		return (cpackf((x * x) * cosf(y), x * sinf(y)));
+	}
+
+	return (cpackf((x * x) * (y - y), (x + x) * (y - y)));
+}
+
+float complex
+ccosf(float complex z)
+{
+
+	return (ccoshf(cpackf(-cimagf(z), crealf(z))));
+}
diff --git a/lib/msun/src/s_cexp.c b/lib/msun/src/s_cexp.c
index ecf0992..abe178f 100644
--- a/lib/msun/src/s_cexp.c
+++ b/lib/msun/src/s_cexp.c
@@ -34,18 +34,13 @@ __FBSDID("$FreeBSD$");
 
 static const uint32_t
 exp_ovfl  = 0x40862e42,			/* high bits of MAX_EXP * ln2 ~= 710 */
-cexp_ovfl = 0x4096b8e4,			/* (MAX_EXP - MIN_DENORM_EXP) * ln2 */
-k         = 1799;			/* constant for reduction */
-
-static const double
-kln2      =  1246.97177782734161156;	/* k * ln2 */
+cexp_ovfl = 0x4096b8e4;			/* (MAX_EXP - MIN_DENORM_EXP) * ln2 */
 
 double complex
 cexp(double complex z)
 {
 	double x, y, exp_x;
 	uint32_t hx, hy, lx, ly;
-	int scale;
 
 	x = creal(z);
 	y = cimag(z);
@@ -56,8 +51,12 @@ cexp(double complex z)
 	/* cexp(x + I 0) = exp(x) + I 0 */
 	if ((hy | ly) == 0)
 		return (cpack(exp(x), y));
+	EXTRACT_WORDS(hx, lx, x);
+	/* cexp(0 + I y) = cos(y) + I sin(y) */
+	if (((hx & 0x7fffffff) | lx) == 0)
+		return (cpack(cos(y), sin(y)));
+
 	if (hy >= 0x7ff00000) {
-		EXTRACT_WORDS(hx, lx, x);
 		if (lx != 0 || (hx & 0x7fffffff) != 0x7ff00000) {
 			/* cexp(finite|NaN +- I Inf|NaN) = NaN + I NaN */
 			return (cpack(y - y, y - y));
@@ -70,21 +69,12 @@ cexp(double complex z)
 		}
 	}
 
-	GET_HIGH_WORD(hx, x);
 	if (hx >= exp_ovfl && hx <= cexp_ovfl) {
 		/*
 		 * x is between 709.7 and 1454.3, so we must scale to avoid
-		 * overflow in exp(x).  We use exp(x) = exp(x - kln2) * 2**k,
-		 * carefully chosen to minimize |exp(kln2) - 2**k|.  We also
-		 * scale the exponent of exp(x) to MANT_DIG to avoid loss of
-		 * accuracy due to underflow if sin(y) is tiny.
+		 * overflow in exp(x).
 		 */
-		exp_x = exp(x - kln2);
-		GET_HIGH_WORD(hx, exp_x);
-		SET_HIGH_WORD(exp_x, (hx & 0xfffff) | ((0x3ff + 52) << 20));
-		scale = (hx >> 20) - (0x3ff + 52) + k;
-		return (cpack(scalbn(cos(y) * exp_x, scale),
-			scalbn(sin(y) * exp_x, scale)));
+		return (__ldexp_cexp(z, 0));
 	} else {
 		/*
 		 * Cases covered here:
diff --git a/lib/msun/src/s_cexpf.c b/lib/msun/src/s_cexpf.c
index 4ea3931..0e30d08 100644
--- a/lib/msun/src/s_cexpf.c
+++ b/lib/msun/src/s_cexpf.c
@@ -34,18 +34,13 @@ __FBSDID("$FreeBSD$");
 
 static const uint32_t
 exp_ovfl  = 0x42b17218,		/* MAX_EXP * ln2 ~= 88.722839355 */
-cexp_ovfl = 0x43400074,		/* (MAX_EXP - MIN_DENORM_EXP) * ln2 */
-k         = 235;		/* constant for reduction */
-
-static const float
-kln2      =  162.88958740f;	/* k * ln2 */
+cexp_ovfl = 0x43400074;		/* (MAX_EXP - MIN_DENORM_EXP) * ln2 */
 
 float complex
 cexpf(float complex z)
 {
 	float x, y, exp_x;
 	uint32_t hx, hy;
-	int scale;
 
 	x = crealf(z);
 	y = cimagf(z);
@@ -57,6 +52,10 @@ cexpf(float complex z)
 	if (hy == 0)
 		return (cpackf(expf(x), y));
 	GET_FLOAT_WORD(hx, x);
+	/* cexp(0 + I y) = cos(y) + I sin(y) */
+	if ((hx & 0x7fffffff) == 0)
+		return (cpackf(cosf(y), sinf(y)));
+
 	if (hy >= 0x7f800000) {
 		if ((hx & 0x7fffffff) != 0x7f800000) {
 			/* cexp(finite|NaN +- I Inf|NaN) = NaN + I NaN */
@@ -73,17 +72,9 @@ cexpf(float complex z)
 	if (hx >= exp_ovfl && hx <= cexp_ovfl) {
 		/*
 		 * x is between 88.7 and 192, so we must scale to avoid
-		 * overflow in expf(x).  We use exp(x) = exp(x - kln2) * 2**k,
-		 * carefully chosen to minimize |exp(kln2) - 2**k|.  We also
-		 * scale the exponent of exp(x) to MANT_DIG to avoid loss of
-		 * accuracy due to underflow if sin(y) is tiny.
+		 * overflow in expf(x).
 		 */
-		exp_x = expf(x - kln2);
-		GET_FLOAT_WORD(hx, exp_x);
-		SET_FLOAT_WORD(exp_x, (hx & 0x7fffff) | ((0x7f + 23) << 23));
-		scale = (hx >> 23) - (0x7f + 23) + k;
-		return (cpackf(scalbnf(cosf(y) * exp_x, scale),
-			scalbnf(sinf(y) * exp_x, scale)));
+		return (__ldexp_cexpf(z, 0));
 	} else {
 		/*
 		 * Cases covered here:
diff --git a/lib/msun/src/s_csinh.c b/lib/msun/src/s_csinh.c
new file mode 100644
index 0000000..c192f30
--- /dev/null
+++ b/lib/msun/src/s_csinh.c
@@ -0,0 +1,157 @@
+/*-
+ * Copyright (c) 2005 Bruce D. Evans and Steven G. Kargl
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Hyperbolic sine of a complex argument z = x + i y.
+ *
+ * sinh(z) = sinh(x+iy)
+ *         = sinh(x) cos(y) + i cosh(x) sin(y).
+ *
+ * Exceptional values are noted in the comments within the source code.
+ * These values and the return value were taken from n1124.pdf.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <complex.h>
+#include <math.h>
+
+#include "math_private.h"
+
+static const double huge = 0x1p1023;
+
+double complex
+csinh(double complex z)
+{
+	double x, y, h;
+	int32_t hx, hy, ix, iy, lx, ly;
+
+	x = creal(z);
+	y = cimag(z);
+
+	EXTRACT_WORDS(hx, lx, x);
+	EXTRACT_WORDS(hy, ly, y);
+
+	ix = 0x7fffffff & hx;
+	iy = 0x7fffffff & hy;
+
+	/* Handle the nearly-non-exceptional cases where x and y are finite. */
+	if (ix < 0x7ff00000 && iy < 0x7ff00000) {
+		if ((iy | ly) == 0)
+			return (cpack(sinh(x), y));
+		if (ix < 0x40360000)	/* small x: normal case */
+			return (cpack(sinh(x) * cos(y), cosh(x) * sin(y)));
+
+		/* |x| >= 22, so cosh(x) ~= exp(|x|) */
+		if (ix < 0x40862e42) {
+			/* x < 710: exp(|x|) won't overflow */
+			h = exp(fabs(x)) * 0.5;
+			return (cpack(copysign(h, x) * cos(y), h * sin(y)));
+		} else if (ix < 0x4096bbaa) {
+			/* x < 1455: scale to avoid overflow */
+			z = __ldexp_cexp(cpack(fabs(x), y), -1);
+			return (cpack(creal(z) * copysign(1, x), cimag(z)));
+		} else {
+			/* x >= 1455: the result always overflows */
+			h = huge * x;
+			return (cpack(h * cos(y), h * h * sin(y)));
+		}
+	}
+
+	/*
+	 * sinh(+-0 +- I Inf) = sign(d(+-0, dNaN))0 + I dNaN.
+	 * The sign of 0 in the result is unspecified.  Choice = normally
+	 * the same as dNaN.  Raise the invalid floating-point exception.
+	 *
+	 * sinh(+-0 +- I NaN) = sign(d(+-0, NaN))0 + I d(NaN).
+	 * The sign of 0 in the result is unspecified.  Choice = normally
+	 * the same as d(NaN).
+	 */
+	if ((ix | lx) == 0 && iy >= 0x7ff00000)
+		return (cpack(copysign(0, x * (y - y)), y - y));
+
+	/*
+	 * sinh(+-Inf +- I 0) = +-Inf + I +-0.
+	 *
+	 * sinh(NaN +- I 0)   = d(NaN) + I +-0.
+	 */
+	if ((iy | ly) == 0 && ix >= 0x7ff00000) {
+		if (((hx & 0xfffff) | lx) == 0)
+			return (cpack(x, y));
+		return (cpack(x, copysign(0, y)));
+	}
+
+	/*
+	 * sinh(x +- I Inf) = dNaN + I dNaN.
+	 * Raise the invalid floating-point exception for finite nonzero x.
+	 *
+	 * sinh(x + I NaN) = d(NaN) + I d(NaN).
+	 * Optionally raises the invalid floating-point exception for finite
+	 * nonzero x.  Choice = don't raise (except for signaling NaNs).
+	 */
+	if (ix < 0x7ff00000 && iy >= 0x7ff00000)
+		return (cpack(y - y, x * (y - y)));
+
+	/*
+	 * sinh(+-Inf + I NaN)  = +-Inf + I d(NaN).
+	 * The sign of Inf in the result is unspecified.  Choice = normally
+	 * the same as d(NaN).
+	 *
+	 * sinh(+-Inf +- I Inf) = +Inf + I dNaN.
+	 * The sign of Inf in the result is unspecified.  Choice = always +.
+	 * Raise the invalid floating-point exception.
+	 *
+	 * sinh(+-Inf + I y)   = +-Inf cos(y) + I Inf sin(y)
+	 */
+	if (ix >= 0x7ff00000 && ((hx & 0xfffff) | lx) == 0) {
+		if (iy >= 0x7ff00000)
+			return (cpack(x * x, x * (y - y)));
+		return (cpack(x * cos(y), INFINITY * sin(y)));
+	}
+
+	/*
+	 * sinh(NaN + I NaN)  = d(NaN) + I d(NaN).
+	 *
+	 * sinh(NaN +- I Inf) = d(NaN) + I d(NaN).
+	 * Optionally raises the invalid floating-point exception.
+	 * Choice = raise.
+	 *
+	 * sinh(NaN + I y)    = d(NaN) + I d(NaN).
+	 * Optionally raises the invalid floating-point exception for finite
+	 * nonzero y.  Choice = don't raise (except for signaling NaNs).
+	 */
+	return (cpack((x * x) * (y - y), (x + x) * (y - y)));
+}
+
+double complex
+csin(double complex z)
+{
+
+	/* csin(z) = -I * csinh(I * z) */
+	z = csinh(cpack(-cimag(z), creal(z)));
+	return (cpack(cimag(z), -creal(z)));
+}
diff --git a/lib/msun/src/s_csinhf.c b/lib/msun/src/s_csinhf.c
new file mode 100644
index 0000000..c523125
--- /dev/null
+++ b/lib/msun/src/s_csinhf.c
@@ -0,0 +1,105 @@
+/*-
+ * Copyright (c) 2005 Bruce D. Evans and Steven G. Kargl
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Hyperbolic sine of a complex argument z.  See s_csinh.c for details.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <complex.h>
+#include <math.h>
+
+#include "math_private.h"
+
+static const float huge = 0x1p127;
+
+float complex
+csinhf(float complex z)
+{
+	float x, y, h;
+	int32_t hx, hy, ix, iy;
+
+	x = crealf(z);
+	y = cimagf(z);
+
+	GET_FLOAT_WORD(hx, x);
+	GET_FLOAT_WORD(hy, y);
+
+	ix = 0x7fffffff & hx;
+	iy = 0x7fffffff & hy;
+
+	if (ix < 0x7f800000 && iy < 0x7f800000) {
+		if (iy == 0)
+			return (cpackf(sinhf(x), y));
+		if (ix < 0x41100000)	/* small x: normal case */
+			return (cpackf(sinhf(x) * cosf(y), coshf(x) * sinf(y)));
+
+		/* |x| >= 9, so cosh(x) ~= exp(|x|) */
+		if (ix < 0x42b17218) {
+			/* x < 88.7: expf(|x|) won't overflow */
+			h = expf(fabsf(x)) * 0.5f;
+			return (cpackf(copysignf(h, x) * cosf(y), h * sinf(y)));
+		} else if (ix < 0x4340b1e7) {
+			/* x < 192.7: scale to avoid overflow */
+			z = __ldexp_cexpf(cpackf(fabsf(x), y), -1);
+			return (cpackf(crealf(z) * copysignf(1, x), cimagf(z)));
+		} else {
+			/* x >= 192.7: the result always overflows */
+			h = huge * x;
+			return (cpackf(h * cosf(y), h * h * sinf(y)));
+		}
+	}
+
+	if (ix == 0 && iy >= 0x7f800000)
+		return (cpackf(copysignf(0, x * (y - y)), y - y));
+
+	if (iy == 0 && ix >= 0x7f800000) {
+		if ((hx & 0x7fffff) == 0)
+			return (cpackf(x, y));
+		return (cpackf(x, copysignf(0, y)));
+	}
+
+	if (ix < 0x7f800000 && iy >= 0x7f800000)
+		return (cpackf(y - y, x * (y - y)));
+
+	if (ix >= 0x7f800000 && (hx & 0x7fffff) == 0) {
+		if (iy >= 0x7f800000)
+			return (cpackf(x * x, x * (y - y)));
+		return (cpackf(x * cosf(y), INFINITY * sinf(y)));
+	}
+
+	return (cpackf((x * x) * (y - y), (x + x) * (y - y)));
+}
+
+float complex
+csinf(float complex z)
+{
+
+	z = csinhf(cpackf(-cimagf(z), crealf(z)));
+	return (cpackf(cimagf(z), -crealf(z)));
+}
diff --git a/lib/msun/src/s_ctanh.c b/lib/msun/src/s_ctanh.c
new file mode 100644
index 0000000..d427e28
--- /dev/null
+++ b/lib/msun/src/s_ctanh.c
@@ -0,0 +1,144 @@
+/*-
+ * Copyright (c) 2011 David Schultz
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Hyperbolic tangent of a complex argument z = x + i y.
+ *
+ * The algorithm is from:
+ *
+ *   W. Kahan.  Branch Cuts for Complex Elementary Functions or Much
+ *   Ado About Nothing's Sign Bit.  In The State of the Art in
+ *   Numerical Analysis, pp. 165 ff.  Iserles and Powell, eds., 1987.
+ *
+ * Method:
+ *
+ *   Let t    = tan(x)
+ *       beta = 1/cos^2(y)
+ *       s    = sinh(x)
+ *       rho  = cosh(x)
+ *
+ *   We have:
+ *
+ *   tanh(z) = sinh(z) / cosh(z)
+ *
+ *             sinh(x) cos(y) + i cosh(x) sin(y)
+ *           = ---------------------------------
+ *             cosh(x) cos(y) + i sinh(x) sin(y)
+ *
+ *             cosh(x) sinh(x) / cos^2(y) + i tan(y)
+ *           = -------------------------------------
+ *                    1 + sinh^2(x) / cos^2(y)
+ *
+ *             beta rho s + i t
+ *           = ----------------
+ *               1 + beta s^2
+ *
+ * Modifications:
+ *
+ *   I omitted the original algorithm's handling of overflow in tan(x) after
+ *   verifying with nearpi.c that this can't happen in IEEE single or double
+ *   precision.  I also handle large x differently.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <complex.h>
+#include <math.h>
+
+#include "math_private.h"
+
+double complex
+ctanh(double complex z)
+{
+	double x, y;
+	double t, beta, s, rho, denom;
+	uint32_t hx, ix, lx;
+
+	x = creal(z);
+	y = cimag(z);
+
+	EXTRACT_WORDS(hx, lx, x);
+	ix = hx & 0x7fffffff;
+
+	/*
+	 * ctanh(NaN + i 0) = NaN + i 0
+	 *
+	 * ctanh(NaN + i y) = NaN + i NaN		for y != 0
+	 *
+	 * The imaginary part has the sign of x*sin(2*y), but there's no
+	 * special effort to get this right.
+	 *
+	 * ctanh(+-Inf +- i Inf) = +-1 +- 0
+	 *
+	 * ctanh(+-Inf + i y) = +-1 + 0 sin(2y)		for y finite
+	 *
+	 * The imaginary part of the sign is unspecified.  This special
+	 * case is only needed to avoid a spurious invalid exception when
+	 * y is infinite.
+	 */
+	if (ix >= 0x7ff00000) {
+		if ((ix & 0xfffff) | lx)	/* x is NaN */
+			return (cpack(x, (y == 0 ? y : x * y)));
+		SET_HIGH_WORD(x, hx - 0x40000000);	/* x = copysign(1, x) */
+		return (cpack(x, copysign(0, isinf(y) ? y : sin(y) * cos(y))));
+	}
+
+	/*
+	 * ctanh(x + i NAN) = NaN + i NaN
+	 * ctanh(x +- i Inf) = NaN + i NaN
+	 */
+	if (!isfinite(y))
+		return (cpack(y - y, y - y));
+
+	/*
+	 * ctanh(+-huge + i +-y) ~= +-1 +- i 2sin(2y)/exp(2x), using the
+	 * approximation sinh^2(huge) ~= exp(2*huge) / 4.
+	 * We use a modified formula to avoid spurious overflow.
+	 */
+	if (ix >= 0x40360000) {	/* x >= 22 */
+		double exp_mx = exp(-fabs(x));
+		return (cpack(copysign(1, x),
+		    4 * sin(y) * cos(y) * exp_mx * exp_mx));
+	}
+
+	/* Kahan's algorithm */
+	t = tan(y);
+	beta = 1.0 + t * t;	/* = 1 / cos^2(y) */
+	s = sinh(x);
+	rho = sqrt(1 + s * s);	/* = cosh(x) */
+	denom = 1 + beta * s * s;
+	return (cpack((beta * rho * s) / denom, t / denom));
+}
+
+double complex
+ctan(double complex z)
+{
+
+	/* ctan(z) = -I * ctanh(I * z) */
+	z = ctanh(cpack(-cimag(z), creal(z)));
+	return (cpack(cimag(z), -creal(z)));
+}
diff --git a/lib/msun/src/s_ctanhf.c b/lib/msun/src/s_ctanhf.c
new file mode 100644
index 0000000..4be28d8
--- /dev/null
+++ b/lib/msun/src/s_ctanhf.c
@@ -0,0 +1,84 @@
+/*-
+ * Copyright (c) 2011 David Schultz
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Hyperbolic tangent of a complex argument z.  See s_ctanh.c for details.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <complex.h>
+#include <math.h>
+
+#include "math_private.h"
+
+float complex
+ctanhf(float complex z)
+{
+	float x, y;
+	float t, beta, s, rho, denom;
+	uint32_t hx, ix;
+
+	x = crealf(z);
+	y = cimagf(z);
+
+	GET_FLOAT_WORD(hx, x);
+	ix = hx & 0x7fffffff;
+
+	if (ix >= 0x7f800000) {
+		if (ix & 0x7fffff)
+			return (cpackf(x, (y == 0 ? y : x * y)));
+		SET_FLOAT_WORD(x, hx - 0x40000000);
+		return (cpackf(x,
+		    copysignf(0, isinf(y) ? y : sinf(y) * cosf(y))));
+	}
+
+	if (!isfinite(y))
+		return (cpackf(y - y, y - y));
+
+	if (ix >= 0x41300000) {	/* x >= 11 */
+		float exp_mx = expf(-fabsf(x));
+		return (cpackf(copysignf(1, x),
+		    4 * sinf(y) * cosf(y) * exp_mx * exp_mx));
+	}
+
+	t = tanf(y);
+	beta = 1.0 + t * t;
+	s = sinhf(x);
+	rho = sqrtf(1 + s * s);
+	denom = 1 + beta * s * s;
+	return (cpackf((beta * rho * s) / denom, t / denom));
+}
+
+float complex
+ctanf(float complex z)
+{
+
+	z = ctanhf(cpackf(-cimagf(z), crealf(z)));
+	return (cpackf(cimagf(z), -crealf(z)));
+}
+
diff --git a/lib/msun/src/s_expm1.c b/lib/msun/src/s_expm1.c
index 3de7bfb..5aa1917 100644
--- a/lib/msun/src/s_expm1.c
+++ b/lib/msun/src/s_expm1.c
@@ -108,6 +108,8 @@ __FBSDID("$FreeBSD$");
  * to produce the hexadecimal values shown.
  */
 
+#include <float.h>
+
 #include "math.h"
 #include "math_private.h"
 
@@ -135,7 +137,6 @@ expm1(double x)
 
 	GET_HIGH_WORD(hx,x);
 	xsb = hx&0x80000000;		/* sign bit of x */
-	if(xsb==0) y=x; else y= -x;	/* y = |x| */
 	hx &= 0x7fffffff;		/* high word of |x| */
 
     /* filter out huge and non-finite argument */
@@ -169,7 +170,7 @@ expm1(double x)
 		hi = x - t*ln2_hi;	/* t*ln2_hi is exact here */
 		lo = t*ln2_lo;
 	    }
-	    x  = hi - lo;
+	    STRICT_ASSIGN(double, x, hi - lo);
 	    c  = (hi-x)-lo;
 	}
 	else if(hx < 0x3c900000) {  	/* when |x|<2**-54, return x */
diff --git a/lib/msun/src/s_expm1f.c b/lib/msun/src/s_expm1f.c
index 483472c..fb37494 100644
--- a/lib/msun/src/s_expm1f.c
+++ b/lib/msun/src/s_expm1f.c
@@ -16,6 +16,8 @@
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
+#include <float.h>
+
 #include "math.h"
 #include "math_private.h"
 
@@ -44,7 +46,6 @@ expm1f(float x)
 
 	GET_FLOAT_WORD(hx,x);
 	xsb = hx&0x80000000;		/* sign bit of x */
-	if(xsb==0) y=x; else y= -x;	/* y = |x| */
 	hx &= 0x7fffffff;		/* high word of |x| */
 
     /* filter out huge and non-finite argument */
@@ -75,7 +76,7 @@ expm1f(float x)
 		hi = x - t*ln2_hi;	/* t*ln2_hi is exact here */
 		lo = t*ln2_lo;
 	    }
-	    x  = hi - lo;
+	    STRICT_ASSIGN(float, x, hi - lo);
 	    c  = (hi-x)-lo;
 	}
 	else if(hx < 0x33000000) {  	/* when |x|<2**-25, return x */
diff --git a/lib/msun/src/s_fma.c b/lib/msun/src/s_fma.c
index ad1fc4a..dfbd13c 100644
--- a/lib/msun/src/s_fma.c
+++ b/lib/msun/src/s_fma.c
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2005 David Schultz <das@FreeBSD.ORG>
+ * Copyright (c) 2005-2011 David Schultz <das@FreeBSD.ORG>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -31,6 +31,132 @@ __FBSDID("$FreeBSD$");
 #include <float.h>
 #include <math.h>
 
+#include "math_private.h"
+
+/*
+ * A struct dd represents a floating-point number with twice the precision
+ * of a double.  We maintain the invariant that "hi" stores the 53 high-order
+ * bits of the result.
+ */
+struct dd {
+	double hi;
+	double lo;
+};
+
+/*
+ * Compute a+b exactly, returning the exact result in a struct dd.  We assume
+ * that both a and b are finite, but make no assumptions about their relative
+ * magnitudes.
+ */
+static inline struct dd
+dd_add(double a, double b)
+{
+	struct dd ret;
+	double s;
+
+	ret.hi = a + b;
+	s = ret.hi - a;
+	ret.lo = (a - (ret.hi - s)) + (b - s);
+	return (ret);
+}
+
+/*
+ * Compute a+b, with a small tweak:  The least significant bit of the
+ * result is adjusted into a sticky bit summarizing all the bits that
+ * were lost to rounding.  This adjustment negates the effects of double
+ * rounding when the result is added to another number with a higher
+ * exponent.  For an explanation of round and sticky bits, see any reference
+ * on FPU design, e.g.,
+ *
+ *     J. Coonen.  An Implementation Guide to a Proposed Standard for
+ *     Floating-Point Arithmetic.  Computer, vol. 13, no. 1, Jan 1980.
+ */
+static inline double
+add_adjusted(double a, double b)
+{
+	struct dd sum;
+	uint64_t hibits, lobits;
+
+	sum = dd_add(a, b);
+	if (sum.lo != 0) {
+		EXTRACT_WORD64(hibits, sum.hi);
+		if ((hibits & 1) == 0) {
+			/* hibits += (int)copysign(1.0, sum.hi * sum.lo) */
+			EXTRACT_WORD64(lobits, sum.lo);
+			hibits += 1 - ((hibits ^ lobits) >> 62);
+			INSERT_WORD64(sum.hi, hibits);
+		}
+	}
+	return (sum.hi);
+}
+
+/*
+ * Compute ldexp(a+b, scale) with a single rounding error. It is assumed
+ * that the result will be subnormal, and care is taken to ensure that
+ * double rounding does not occur.
+ */
+static inline double
+add_and_denormalize(double a, double b, int scale)
+{
+	struct dd sum;
+	uint64_t hibits, lobits;
+	int bits_lost;
+
+	sum = dd_add(a, b);
+
+	/*
+	 * If we are losing at least two bits of accuracy to denormalization,
+	 * then the first lost bit becomes a round bit, and we adjust the
+	 * lowest bit of sum.hi to make it a sticky bit summarizing all the
+	 * bits in sum.lo. With the sticky bit adjusted, the hardware will
+	 * break any ties in the correct direction.
+	 *
+	 * If we are losing only one bit to denormalization, however, we must
+	 * break the ties manually.
+	 */
+	if (sum.lo != 0) {
+		EXTRACT_WORD64(hibits, sum.hi);
+		bits_lost = -((int)(hibits >> 52) & 0x7ff) - scale + 1;
+		if (bits_lost != 1 ^ (int)(hibits & 1)) {
+			/* hibits += (int)copysign(1.0, sum.hi * sum.lo) */
+			EXTRACT_WORD64(lobits, sum.lo);
+			hibits += 1 - (((hibits ^ lobits) >> 62) & 2);
+			INSERT_WORD64(sum.hi, hibits);
+		}
+	}
+	return (ldexp(sum.hi, scale));
+}
+
+/*
+ * Compute a*b exactly, returning the exact result in a struct dd.  We assume
+ * that both a and b are normalized, so no underflow or overflow will occur.
+ * The current rounding mode must be round-to-nearest.
+ */
+static inline struct dd
+dd_mul(double a, double b)
+{
+	static const double split = 0x1p27 + 1.0;
+	struct dd ret;
+	double ha, hb, la, lb, p, q;
+
+	p = a * split;
+	ha = a - p;
+	ha += p;
+	la = a - ha;
+
+	p = b * split;
+	hb = b - p;
+	hb += p;
+	lb = b - hb;
+
+	p = ha * hb;
+	q = ha * lb + la * hb;
+
+	ret.hi = p + q;
+	ret.lo = p - ret.hi + q + la * lb;
+	return (ret);
+}
+
 /*
  * Fused multiply-add: Compute x * y + z with a single rounding error.
  *
@@ -48,14 +174,11 @@ __FBSDID("$FreeBSD$");
  * Hardware instructions should be used on architectures that support it,
  * since this implementation will likely be several times slower.
  */
-#if LDBL_MANT_DIG != 113
 double
 fma(double x, double y, double z)
 {
-	static const double split = 0x1p27 + 1.0;
-	double xs, ys, zs;
-	double c, cc, hx, hy, p, q, tx, ty;
-	double r, rr, s;
+	double xs, ys, zs, adj;
+	struct dd xy, r;
 	int oround;
 	int ex, ey, ez;
 	int spread;
@@ -85,41 +208,6 @@ fma(double x, double y, double z)
 	 * will overflow, so we handle these cases specially.  Rounding
 	 * modes other than FE_TONEAREST are painful.
 	 */
-	if (spread > DBL_MANT_DIG * 2) {
-		fenv_t env;
-		feraiseexcept(FE_INEXACT);
-		switch(oround) {
-		case FE_TONEAREST:
-			return (x * y);
-		case FE_TOWARDZERO:
-			if (x > 0.0 ^ y < 0.0 ^ z < 0.0)
-				return (x * y);
-			feholdexcept(&env);
-			r = x * y;
-			if (!fetestexcept(FE_INEXACT))
-				r = nextafter(r, 0);
-			feupdateenv(&env);
-			return (r);
-		case FE_DOWNWARD:
-			if (z > 0.0)
-				return (x * y);
-			feholdexcept(&env);
-			r = x * y;
-			if (!fetestexcept(FE_INEXACT))
-				r = nextafter(r, -INFINITY);
-			feupdateenv(&env);
-			return (r);
-		default:	/* FE_UPWARD */
-			if (z < 0.0)
-				return (x * y);
-			feholdexcept(&env);
-			r = x * y;
-			if (!fetestexcept(FE_INEXACT))
-				r = nextafter(r, INFINITY);
-			feupdateenv(&env);
-			return (r);
-		}
-	}
 	if (spread < -DBL_MANT_DIG) {
 		feraiseexcept(FE_INEXACT);
 		if (!isnormal(z))
@@ -144,63 +232,52 @@ fma(double x, double y, double z)
 				return (z);
 		}
 	}
+	if (spread <= DBL_MANT_DIG * 2)
+		zs = ldexp(zs, -spread);
+	else
+		zs = copysign(DBL_MIN, zs);
 
-	/*
-	 * Use Dekker's algorithm to perform the multiplication and
-	 * subsequent addition in twice the machine precision.
-	 * Arrange so that x * y = c + cc, and x * y + z = r + rr.
-	 */
 	fesetround(FE_TONEAREST);
 
-	p = xs * split;
-	hx = xs - p;
-	hx += p;
-	tx = xs - hx;
-
-	p = ys * split;
-	hy = ys - p;
-	hy += p;
-	ty = ys - hy;
-
-	p = hx * hy;
-	q = hx * ty + tx * hy;
-	c = p + q;
-	cc = p - c + q + tx * ty;
-
-	zs = ldexp(zs, -spread);
-	r = c + zs;
-	s = r - c;
-	rr = (c - (r - s)) + (zs - s) + cc;
+	/*
+	 * Basic approach for round-to-nearest:
+	 *
+	 *     (xy.hi, xy.lo) = x * y		(exact)
+	 *     (r.hi, r.lo)   = xy.hi + z	(exact)
+	 *     adj = xy.lo + r.lo		(inexact; low bit is sticky)
+	 *     result = r.hi + adj		(correctly rounded)
+	 */
+	xy = dd_mul(xs, ys);
+	r = dd_add(xy.hi, zs);
 
 	spread = ex + ey;
-	if (spread + ilogb(r) > -1023) {
+
+	if (r.hi == 0.0) {
+		/*
+		 * When the addends cancel to 0, ensure that the result has
+		 * the correct sign.
+		 */
 		fesetround(oround);
-		r = r + rr;
-	} else {
+		volatile double vzs = zs; /* XXX gcc CSE bug workaround */
+		return (xy.hi + vzs + ldexp(xy.lo, spread));
+	}
+
+	if (oround != FE_TONEAREST) {
 		/*
-		 * The result is subnormal, so we round before scaling to
-		 * avoid double rounding.
+		 * There is no need to worry about double rounding in directed
+		 * rounding modes.
 		 */
-		p = ldexp(copysign(0x1p-1022, r), -spread);
-		c = r + p;
-		s = c - r;
-		cc = (r - (c - s)) + (p - s) + rr;
 		fesetround(oround);
-		r = (c + cc) - p;
+		adj = r.lo + xy.lo;
+		return (ldexp(r.hi + adj, spread));
 	}
-	return (ldexp(r, spread));
-}
-#else	/* LDBL_MANT_DIG == 113 */
-/*
- * 113 bits of precision is more than twice the precision of a double,
- * so it is enough to represent the intermediate product exactly.
- */
-double
-fma(double x, double y, double z)
-{
-	return ((long double)x * y + z);
+
+	adj = add_adjusted(r.lo, xy.lo);
+	if (spread + ilogb(r.hi) > -1023)
+		return (ldexp(r.hi + adj, spread));
+	else
+		return (add_and_denormalize(r.hi, adj, spread));
 }
-#endif	/* LDBL_MANT_DIG != 113 */
 
 #if (LDBL_MANT_DIG == 53)
 __weak_reference(fma, fmal);
diff --git a/lib/msun/src/s_fmaf.c b/lib/msun/src/s_fmaf.c
index 7c699e5..3695823 100644
--- a/lib/msun/src/s_fmaf.c
+++ b/lib/msun/src/s_fmaf.c
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2005 David Schultz <das@FreeBSD.ORG>
+ * Copyright (c) 2005-2011 David Schultz <das@FreeBSD.ORG>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -27,23 +27,43 @@
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
+#include <fenv.h>
+
 #include "math.h"
+#include "math_private.h"
 
 /*
  * Fused multiply-add: Compute x * y + z with a single rounding error.
  *
  * A double has more than twice as much precision than a float, so
- * direct double-precision arithmetic suffices.
- *
- * XXX We are relying on the compiler to convert from double to float
- *     using the current rounding mode and with the appropriate
- *     side-effects.  But on at least one platform (gcc 3.4.2/sparc64),
- *     this appears to be too much to ask for.  The precision
- *     reduction should be done manually.
+ * direct double-precision arithmetic suffices, except where double
+ * rounding occurs.
  */
 float
 fmaf(float x, float y, float z)
 {
+	double xy, result;
+	uint32_t hr, lr;
+
+	xy = (double)x * y;
+	result = xy + z;
+	EXTRACT_WORDS(hr, lr, result);
+	/* Common case: The double precision result is fine. */
+	if ((lr & 0x1fffffff) != 0x10000000 ||	/* not a halfway case */
+	    (hr & 0x7ff00000) == 0x7ff00000 ||	/* NaN */
+	    result - xy == z ||			/* exact */
+	    fegetround() != FE_TONEAREST)	/* not round-to-nearest */
+		return (result);
 
-	return ((double)x * y + z);
+	/*
+	 * If result is inexact, and exactly halfway between two float values,
+	 * we need to adjust the low-order bit in the direction of the error.
+	 */
+	fesetround(FE_TOWARDZERO);
+	volatile double vxy = xy;  /* XXX work around gcc CSE bug */
+	double adjusted_result = vxy + z;
+	fesetround(FE_TONEAREST);
+	if (result == adjusted_result)
+		SET_LOW_WORD(adjusted_result, lr + 1);
+	return (adjusted_result);
 }
diff --git a/lib/msun/src/s_fmal.c b/lib/msun/src/s_fmal.c
index 4d5d114..c2a6913 100644
--- a/lib/msun/src/s_fmal.c
+++ b/lib/msun/src/s_fmal.c
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2005 David Schultz <das@FreeBSD.ORG>
+ * Copyright (c) 2005-2011 David Schultz <das@FreeBSD.ORG>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -31,6 +31,128 @@ __FBSDID("$FreeBSD$");
 #include <float.h>
 #include <math.h>
 
+#include "fpmath.h"
+
+/*
+ * A struct dd represents a floating-point number with twice the precision
+ * of a long double.  We maintain the invariant that "hi" stores the high-order
+ * bits of the result.
+ */
+struct dd {
+	long double hi;
+	long double lo;
+};
+
+/*
+ * Compute a+b exactly, returning the exact result in a struct dd.  We assume
+ * that both a and b are finite, but make no assumptions about their relative
+ * magnitudes.
+ */
+static inline struct dd
+dd_add(long double a, long double b)
+{
+	struct dd ret;
+	long double s;
+
+	ret.hi = a + b;
+	s = ret.hi - a;
+	ret.lo = (a - (ret.hi - s)) + (b - s);
+	return (ret);
+}
+
+/*
+ * Compute a+b, with a small tweak:  The least significant bit of the
+ * result is adjusted into a sticky bit summarizing all the bits that
+ * were lost to rounding.  This adjustment negates the effects of double
+ * rounding when the result is added to another number with a higher
+ * exponent.  For an explanation of round and sticky bits, see any reference
+ * on FPU design, e.g.,
+ *
+ *     J. Coonen.  An Implementation Guide to a Proposed Standard for
+ *     Floating-Point Arithmetic.  Computer, vol. 13, no. 1, Jan 1980.
+ */
+static inline long double
+add_adjusted(long double a, long double b)
+{
+	struct dd sum;
+	union IEEEl2bits u;
+
+	sum = dd_add(a, b);
+	if (sum.lo != 0) {
+		u.e = sum.hi;
+		if ((u.bits.manl & 1) == 0)
+			sum.hi = nextafterl(sum.hi, INFINITY * sum.lo);
+	}
+	return (sum.hi);
+}
+
+/*
+ * Compute ldexp(a+b, scale) with a single rounding error. It is assumed
+ * that the result will be subnormal, and care is taken to ensure that
+ * double rounding does not occur.
+ */
+static inline long double
+add_and_denormalize(long double a, long double b, int scale)
+{
+	struct dd sum;
+	int bits_lost;
+	union IEEEl2bits u;
+
+	sum = dd_add(a, b);
+
+	/*
+	 * If we are losing at least two bits of accuracy to denormalization,
+	 * then the first lost bit becomes a round bit, and we adjust the
+	 * lowest bit of sum.hi to make it a sticky bit summarizing all the
+	 * bits in sum.lo. With the sticky bit adjusted, the hardware will
+	 * break any ties in the correct direction.
+	 *
+	 * If we are losing only one bit to denormalization, however, we must
+	 * break the ties manually.
+	 */
+	if (sum.lo != 0) {
+		u.e = sum.hi;
+		bits_lost = -u.bits.exp - scale + 1;
+		if (bits_lost != 1 ^ (int)(u.bits.manl & 1))
+			sum.hi = nextafterl(sum.hi, INFINITY * sum.lo);
+	}
+	return (ldexp(sum.hi, scale));
+}
+
+/*
+ * Compute a*b exactly, returning the exact result in a struct dd.  We assume
+ * that both a and b are normalized, so no underflow or overflow will occur.
+ * The current rounding mode must be round-to-nearest.
+ */
+static inline struct dd
+dd_mul(long double a, long double b)
+{
+#if LDBL_MANT_DIG == 64
+	static const long double split = 0x1p32L + 1.0;
+#elif LDBL_MANT_DIG == 113
+	static const long double split = 0x1p57L + 1.0;
+#endif
+	struct dd ret;
+	long double ha, hb, la, lb, p, q;
+
+	p = a * split;
+	ha = a - p;
+	ha += p;
+	la = a - ha;
+
+	p = b * split;
+	hb = b - p;
+	hb += p;
+	lb = b - hb;
+
+	p = ha * hb;
+	q = ha * lb + la * hb;
+
+	ret.hi = p + q;
+	ret.lo = p - ret.hi + q + la * lb;
+	return (ret);
+}
+
 /*
  * Fused multiply-add: Compute x * y + z with a single rounding error.
  *
@@ -43,14 +165,8 @@ __FBSDID("$FreeBSD$");
 long double
 fmal(long double x, long double y, long double z)
 {
-#if LDBL_MANT_DIG == 64
-	static const long double split = 0x1p32L + 1.0;
-#elif LDBL_MANT_DIG == 113
-	static const long double split = 0x1p57L + 1.0;
-#endif
-	long double xs, ys, zs;
-	long double c, cc, hx, hy, p, q, tx, ty;
-	long double r, rr, s;
+	long double xs, ys, zs, adj;
+	struct dd xy, r;
 	int oround;
 	int ex, ey, ez;
 	int spread;
@@ -80,41 +196,6 @@ fmal(long double x, long double y, long double z)
 	 * will overflow, so we handle these cases specially.  Rounding
 	 * modes other than FE_TONEAREST are painful.
 	 */
-	if (spread > LDBL_MANT_DIG * 2) {
-		fenv_t env;
-		feraiseexcept(FE_INEXACT);
-		switch(oround) {
-		case FE_TONEAREST:
-			return (x * y);
-		case FE_TOWARDZERO:
-			if (x > 0.0 ^ y < 0.0 ^ z < 0.0)
-				return (x * y);
-			feholdexcept(&env);
-			r = x * y;
-			if (!fetestexcept(FE_INEXACT))
-				r = nextafterl(r, 0);
-			feupdateenv(&env);
-			return (r);
-		case FE_DOWNWARD:
-			if (z > 0.0)
-				return (x * y);
-			feholdexcept(&env);
-			r = x * y;
-			if (!fetestexcept(FE_INEXACT))
-				r = nextafterl(r, -INFINITY);
-			feupdateenv(&env);
-			return (r);
-		default:	/* FE_UPWARD */
-			if (z < 0.0)
-				return (x * y);
-			feholdexcept(&env);
-			r = x * y;
-			if (!fetestexcept(FE_INEXACT))
-				r = nextafterl(r, INFINITY);
-			feupdateenv(&env);
-			return (r);
-		}
-	}
 	if (spread < -LDBL_MANT_DIG) {
 		feraiseexcept(FE_INEXACT);
 		if (!isnormal(z))
@@ -139,49 +220,49 @@ fmal(long double x, long double y, long double z)
 				return (z);
 		}
 	}
+	if (spread <= LDBL_MANT_DIG * 2)
+		zs = ldexpl(zs, -spread);
+	else
+		zs = copysignl(LDBL_MIN, zs);
 
-	/*
-	 * Use Dekker's algorithm to perform the multiplication and
-	 * subsequent addition in twice the machine precision.
-	 * Arrange so that x * y = c + cc, and x * y + z = r + rr.
-	 */
 	fesetround(FE_TONEAREST);
 
-	p = xs * split;
-	hx = xs - p;
-	hx += p;
-	tx = xs - hx;
-
-	p = ys * split;
-	hy = ys - p;
-	hy += p;
-	ty = ys - hy;
-
-	p = hx * hy;
-	q = hx * ty + tx * hy;
-	c = p + q;
-	cc = p - c + q + tx * ty;
-
-	zs = ldexpl(zs, -spread);
-	r = c + zs;
-	s = r - c;
-	rr = (c - (r - s)) + (zs - s) + cc;
+	/*
+	 * Basic approach for round-to-nearest:
+	 *
+	 *     (xy.hi, xy.lo) = x * y		(exact)
+	 *     (r.hi, r.lo)   = xy.hi + z	(exact)
+	 *     adj = xy.lo + r.lo		(inexact; low bit is sticky)
+	 *     result = r.hi + adj		(correctly rounded)
+	 */
+	xy = dd_mul(xs, ys);
+	r = dd_add(xy.hi, zs);
 
 	spread = ex + ey;
-	if (spread + ilogbl(r) > -16383) {
+
+	if (r.hi == 0.0) {
+		/*
+		 * When the addends cancel to 0, ensure that the result has
+		 * the correct sign.
+		 */
 		fesetround(oround);
-		r = r + rr;
-	} else {
+		volatile long double vzs = zs; /* XXX gcc CSE bug workaround */
+		return (xy.hi + vzs + ldexpl(xy.lo, spread));
+	}
+
+	if (oround != FE_TONEAREST) {
 		/*
-		 * The result is subnormal, so we round before scaling to
-		 * avoid double rounding.
+		 * There is no need to worry about double rounding in directed
+		 * rounding modes.
 		 */
-		p = ldexpl(copysignl(0x1p-16382L, r), -spread);
-		c = r + p;
-		s = c - r;
-		cc = (r - (c - s)) + (p - s) + rr;
 		fesetround(oround);
-		r = (c + cc) - p;
+		adj = r.lo + xy.lo;
+		return (ldexpl(r.hi + adj, spread));
 	}
-	return (ldexpl(r, spread));
+
+	adj = add_adjusted(r.lo, xy.lo);
+	if (spread + ilogbl(r.hi) > -16383)
+		return (ldexpl(r.hi + adj, spread));
+	else
+		return (add_and_denormalize(r.hi, adj, spread));
 }