summaryrefslogtreecommitdiffstats
path: root/lib/msun/src
diff options
context:
space:
mode:
Diffstat (limited to 'lib/msun/src')
-rw-r--r--lib/msun/src/e_cosh.c10
-rw-r--r--lib/msun/src/e_coshf.c7
-rw-r--r--lib/msun/src/e_exp.c4
-rw-r--r--lib/msun/src/e_expf.c6
-rw-r--r--lib/msun/src/e_hypot.c2
-rw-r--r--lib/msun/src/e_hypotf.c2
-rw-r--r--lib/msun/src/e_hypotl.c15
-rw-r--r--lib/msun/src/e_lgamma_r.c1
-rw-r--r--lib/msun/src/e_lgammaf_r.c1
-rw-r--r--lib/msun/src/e_log10.c51
-rw-r--r--lib/msun/src/e_log10f.c39
-rw-r--r--lib/msun/src/e_log2.c76
-rw-r--r--lib/msun/src/e_log2f.c51
-rw-r--r--lib/msun/src/e_pow.c5
-rw-r--r--lib/msun/src/e_powf.c5
-rw-r--r--lib/msun/src/e_sinh.c11
-rw-r--r--lib/msun/src/e_sinhf.c9
-rw-r--r--lib/msun/src/k_exp.c108
-rw-r--r--lib/msun/src/k_expf.c87
-rw-r--r--lib/msun/src/k_log.h38
-rw-r--r--lib/msun/src/k_logf.h26
-rw-r--r--lib/msun/src/math.h7
-rw-r--r--lib/msun/src/math_private.h51
-rw-r--r--lib/msun/src/s_ccosh.c155
-rw-r--r--lib/msun/src/s_ccoshf.c104
-rw-r--r--lib/msun/src/s_cexp.c26
-rw-r--r--lib/msun/src/s_cexpf.c23
-rw-r--r--lib/msun/src/s_csinh.c157
-rw-r--r--lib/msun/src/s_csinhf.c105
-rw-r--r--lib/msun/src/s_ctanh.c144
-rw-r--r--lib/msun/src/s_ctanhf.c84
-rw-r--r--lib/msun/src/s_expm1.c5
-rw-r--r--lib/msun/src/s_expm1f.c5
-rw-r--r--lib/msun/src/s_fma.c251
-rw-r--r--lib/msun/src/s_fmaf.c38
-rw-r--r--lib/msun/src/s_fmal.c239
36 files changed, 1575 insertions, 373 deletions
diff --git a/lib/msun/src/e_cosh.c b/lib/msun/src/e_cosh.c
index 11e6590..a363695 100644
--- a/lib/msun/src/e_cosh.c
+++ b/lib/msun/src/e_cosh.c
@@ -45,7 +45,6 @@ __ieee754_cosh(double x)
{
double t,w;
int32_t ix;
- u_int32_t lx;
/* High word of |x|. */
GET_HIGH_WORD(ix,x);
@@ -72,13 +71,8 @@ __ieee754_cosh(double x)
if (ix < 0x40862E42) return half*__ieee754_exp(fabs(x));
/* |x| in [log(maxdouble), overflowthresold] */
- GET_LOW_WORD(lx,x);
- if (ix<0x408633CE ||
- ((ix==0x408633ce)&&(lx<=(u_int32_t)0x8fb9f87d))) {
- w = __ieee754_exp(half*fabs(x));
- t = half*w;
- return t*w;
- }
+ if (ix<=0x408633CE)
+ return __ldexp_exp(fabs(x), -1);
/* |x| > overflowthresold, cosh(x) overflow */
return huge*huge;
diff --git a/lib/msun/src/e_coshf.c b/lib/msun/src/e_coshf.c
index 4a1d499..95a0d6e 100644
--- a/lib/msun/src/e_coshf.c
+++ b/lib/msun/src/e_coshf.c
@@ -51,11 +51,8 @@ __ieee754_coshf(float x)
if (ix < 0x42b17217) return half*__ieee754_expf(fabsf(x));
/* |x| in [log(maxfloat), overflowthresold] */
- if (ix<=0x42b2d4fc) {
- w = __ieee754_expf(half*fabsf(x));
- t = half*w;
- return t*w;
- }
+ if (ix<=0x42b2d4fc)
+ return __ldexp_expf(fabsf(x), -1);
/* |x| > overflowthresold, cosh(x) overflow */
return huge*huge;
diff --git a/lib/msun/src/e_exp.c b/lib/msun/src/e_exp.c
index 5b9a10c..b47aef5 100644
--- a/lib/msun/src/e_exp.c
+++ b/lib/msun/src/e_exp.c
@@ -76,6 +76,8 @@ __FBSDID("$FreeBSD$");
* to produce the hexadecimal values shown.
*/
+#include <float.h>
+
#include "math.h"
#include "math_private.h"
@@ -133,7 +135,7 @@ __ieee754_exp(double x) /* default IEEE double exp */
hi = x - t*ln2HI[0]; /* t*ln2HI is exact here */
lo = t*ln2LO[0];
}
- x = hi - lo;
+ STRICT_ASSIGN(double, x, hi - lo);
}
else if(hx < 0x3e300000) { /* when |x|<2**-28 */
if(huge+x>one) return one+x;/* trigger inexact */
diff --git a/lib/msun/src/e_expf.c b/lib/msun/src/e_expf.c
index 502e421..a479076 100644
--- a/lib/msun/src/e_expf.c
+++ b/lib/msun/src/e_expf.c
@@ -16,6 +16,8 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include <float.h>
+
#include "math.h"
#include "math_private.h"
@@ -40,7 +42,7 @@ P2 = -2.7667332906e-3; /* -0xb55215.0p-32 */
static volatile float twom100 = 7.8886090522e-31; /* 2**-100=0x0d800000 */
float
-__ieee754_expf(float x) /* default IEEE double exp */
+__ieee754_expf(float x)
{
float y,hi=0.0,lo=0.0,c,t,twopk;
int32_t k=0,xsb;
@@ -70,7 +72,7 @@ __ieee754_expf(float x) /* default IEEE double exp */
hi = x - t*ln2HI[0]; /* t*ln2HI is exact here */
lo = t*ln2LO[0];
}
- x = hi - lo;
+ STRICT_ASSIGN(float, x, hi - lo);
}
else if(hx < 0x39000000) { /* when |x|<2**-14 */
if(huge+x>one) return one+x;/* trigger inexact */
diff --git a/lib/msun/src/e_hypot.c b/lib/msun/src/e_hypot.c
index fb498c1..2398e98 100644
--- a/lib/msun/src/e_hypot.c
+++ b/lib/msun/src/e_hypot.c
@@ -54,7 +54,7 @@ __FBSDID("$FreeBSD$");
double
__ieee754_hypot(double x, double y)
{
- double a=x,b=y,t1,t2,y1,y2,w;
+ double a,b,t1,t2,y1,y2,w;
int32_t j,k,ha,hb;
GET_HIGH_WORD(ha,x);
diff --git a/lib/msun/src/e_hypotf.c b/lib/msun/src/e_hypotf.c
index c82c6e7..6d083e4 100644
--- a/lib/msun/src/e_hypotf.c
+++ b/lib/msun/src/e_hypotf.c
@@ -22,7 +22,7 @@ __FBSDID("$FreeBSD$");
float
__ieee754_hypotf(float x, float y)
{
- float a=x,b=y,t1,t2,y1,y2,w;
+ float a,b,t1,t2,y1,y2,w;
int32_t j,k,ha,hb;
GET_FLOAT_WORD(ha,x);
diff --git a/lib/msun/src/e_hypotl.c b/lib/msun/src/e_hypotl.c
index 0c899bf..7b5ab89 100644
--- a/lib/msun/src/e_hypotl.c
+++ b/lib/msun/src/e_hypotl.c
@@ -21,13 +21,6 @@ __FBSDID("$FreeBSD$");
#include "math.h"
#include "math_private.h"
-#define GET_LDBL_EXPSIGN(i, v) do { \
- union IEEEl2bits uv; \
- \
- uv.e = v; \
- i = uv.xbits.expsign; \
-} while (0)
-
#define GET_LDBL_MAN(h, l, v) do { \
union IEEEl2bits uv; \
\
@@ -36,14 +29,6 @@ __FBSDID("$FreeBSD$");
l = uv.bits.manl; \
} while (0)
-#define SET_LDBL_EXPSIGN(v, i) do { \
- union IEEEl2bits uv; \
- \
- uv.e = v; \
- uv.xbits.expsign = i; \
- v = uv.e; \
-} while (0)
-
#undef GET_HIGH_WORD
#define GET_HIGH_WORD(i, v) GET_LDBL_EXPSIGN(i, v)
#undef SET_HIGH_WORD
diff --git a/lib/msun/src/e_lgamma_r.c b/lib/msun/src/e_lgamma_r.c
index a587b8f..1cff592 100644
--- a/lib/msun/src/e_lgamma_r.c
+++ b/lib/msun/src/e_lgamma_r.c
@@ -269,7 +269,6 @@ __ieee754_lgamma_r(double x, int *signgamp)
}
else if(ix<0x40200000) { /* x < 8.0 */
i = (int)x;
- t = zero;
y = x-(double)i;
p = y*(s0+y*(s1+y*(s2+y*(s3+y*(s4+y*(s5+y*s6))))));
q = one+y*(r1+y*(r2+y*(r3+y*(r4+y*(r5+y*r6)))));
diff --git a/lib/msun/src/e_lgammaf_r.c b/lib/msun/src/e_lgammaf_r.c
index 47c6ed0..e2d90ef 100644
--- a/lib/msun/src/e_lgammaf_r.c
+++ b/lib/msun/src/e_lgammaf_r.c
@@ -202,7 +202,6 @@ __ieee754_lgammaf_r(float x, int *signgamp)
}
else if(ix<0x41000000) { /* x < 8.0 */
i = (int)x;
- t = zero;
y = x-(float)i;
p = y*(s0+y*(s1+y*(s2+y*(s3+y*(s4+y*(s5+y*s6))))));
q = one+y*(r1+y*(r2+y*(r3+y*(r4+y*(r5+y*r6)))));
diff --git a/lib/msun/src/e_log10.c b/lib/msun/src/e_log10.c
index 135f0dc..104d257 100644
--- a/lib/msun/src/e_log10.c
+++ b/lib/msun/src/e_log10.c
@@ -15,7 +15,11 @@
__FBSDID("$FreeBSD$");
/*
- * Return the base 10 logarithm of x. See k_log.c for details on the algorithm.
+ * Return the base 10 logarithm of x. See e_log.c and k_log.h for most
+ * comments.
+ *
+ * log10(x) = (f - 0.5*f*f + k_log1p(f)) / ln10 + k * log10(2)
+ * in not-quite-routine extra precision.
*/
#include "math.h"
@@ -34,31 +38,50 @@ static const double zero = 0.0;
double
__ieee754_log10(double x)
{
- double f,hi,lo,y,z;
+ double f,hfsq,hi,lo,r,val_hi,val_lo,w,y,y2;
int32_t i,k,hx;
u_int32_t lx;
EXTRACT_WORDS(hx,lx,x);
- k=0;
- if (hx < 0x00100000) { /* x < 2**-1022 */
- if (((hx&0x7fffffff)|lx)==0)
- return -two54/zero; /* log(+-0)=-inf */
- if (hx<0) return (x-x)/zero; /* log(-#) = NaN */
- k -= 54; x *= two54; /* subnormal number, scale up x */
+ k=0;
+ if (hx < 0x00100000) { /* x < 2**-1022 */
+ if (((hx&0x7fffffff)|lx)==0)
+ return -two54/zero; /* log(+-0)=-inf */
+ if (hx<0) return (x-x)/zero; /* log(-#) = NaN */
+ k -= 54; x *= two54; /* subnormal number, scale up x */
GET_HIGH_WORD(hx,x);
- }
+ }
if (hx >= 0x7ff00000) return x+x;
+ if (hx == 0x3ff00000 && lx == 0)
+ return zero; /* log(1) = +0 */
k += (hx>>20)-1023;
hx &= 0x000fffff;
i = (hx+0x95f64)&0x100000;
SET_HIGH_WORD(x,hx|(i^0x3ff00000)); /* normalize x or x/2 */
k += (i>>20);
y = (double)k;
- f = __kernel_log(x);
- hi = x = x - 1;
+ f = x - 1.0;
+ hfsq = 0.5*f*f;
+ r = k_log1p(f);
+
+ /* See e_log2.c for most details. */
+ hi = f - hfsq;
SET_LOW_WORD(hi,0);
- lo = x - hi;
- z = y*log10_2lo + (x+f)*ivln10lo + (lo+f)*ivln10hi + hi*ivln10hi;
- return z+y*log10_2hi;
+ lo = (f - hi) - hfsq + r;
+ val_hi = hi*ivln10hi;
+ y2 = y*log10_2hi;
+ val_lo = y*log10_2lo + (lo+hi)*ivln10lo + lo*ivln10hi;
+
+ /*
+ * Extra precision in for adding y*log10_2hi is not strictly needed
+ * since there is no very large cancellation near x = sqrt(2) or
+ * x = 1/sqrt(2), but we do it anyway since it costs little on CPUs
+ * with some parallelism and it reduces the error for many args.
+ */
+ w = y2 + val_hi;
+ val_lo += (y2 - w) + val_hi;
+ val_hi = w;
+
+ return val_lo + val_hi;
}
diff --git a/lib/msun/src/e_log10f.c b/lib/msun/src/e_log10f.c
index 940b831..c876594 100644
--- a/lib/msun/src/e_log10f.c
+++ b/lib/msun/src/e_log10f.c
@@ -13,7 +13,7 @@
__FBSDID("$FreeBSD$");
/*
- * Return the base 10 logarithm of x. See k_log.c for details on the algorithm.
+ * Float version of e_log10.c. See the latter for most comments.
*/
#include "math.h"
@@ -32,31 +32,40 @@ static const float zero = 0.0;
float
__ieee754_log10f(float x)
{
- float f,hi,lo,y,z;
+ float f,hfsq,hi,lo,r,y;
int32_t i,k,hx;
GET_FLOAT_WORD(hx,x);
- k=0;
- if (hx < 0x00800000) { /* x < 2**-126 */
- if ((hx&0x7fffffff)==0)
- return -two25/zero; /* log(+-0)=-inf */
- if (hx<0) return (x-x)/zero; /* log(-#) = NaN */
- k -= 25; x *= two25; /* subnormal number, scale up x */
+ k=0;
+ if (hx < 0x00800000) { /* x < 2**-126 */
+ if ((hx&0x7fffffff)==0)
+ return -two25/zero; /* log(+-0)=-inf */
+ if (hx<0) return (x-x)/zero; /* log(-#) = NaN */
+ k -= 25; x *= two25; /* subnormal number, scale up x */
GET_FLOAT_WORD(hx,x);
- }
+ }
if (hx >= 0x7f800000) return x+x;
+ if (hx == 0x3f800000)
+ return zero; /* log(1) = +0 */
k += (hx>>23)-127;
hx &= 0x007fffff;
i = (hx+(0x4afb0d))&0x800000;
SET_FLOAT_WORD(x,hx|(i^0x3f800000)); /* normalize x or x/2 */
k += (i>>23);
y = (float)k;
- f = __kernel_logf(x);
- x = x - (float)1.0;
- GET_FLOAT_WORD(hx,x);
+ f = x - (float)1.0;
+ hfsq = (float)0.5*f*f;
+ r = k_log1pf(f);
+
+ /* See e_log2f.c and e_log2.c for details. */
+ if (sizeof(float_t) > sizeof(float))
+ return (r - hfsq + f) * ((float_t)ivln10lo + ivln10hi) +
+ y * ((float_t)log10_2lo + log10_2hi);
+ hi = f - hfsq;
+ GET_FLOAT_WORD(hx,hi);
SET_FLOAT_WORD(hi,hx&0xfffff000);
- lo = x - hi;
- z = y*log10_2lo + (x+f)*ivln10lo + (lo+f)*ivln10hi + hi*ivln10hi;
- return z+y*log10_2hi;
+ lo = (f - hi) - hfsq + r;
+ return y*log10_2lo + (lo+hi)*ivln10lo + lo*ivln10hi + hi*ivln10hi +
+ y*log10_2hi;
}
diff --git a/lib/msun/src/e_log2.c b/lib/msun/src/e_log2.c
index 6cf3dbc..1fc44a5 100644
--- a/lib/msun/src/e_log2.c
+++ b/lib/msun/src/e_log2.c
@@ -15,7 +15,13 @@
__FBSDID("$FreeBSD$");
/*
- * Return the base 2 logarithm of x. See k_log.c for details on the algorithm.
+ * Return the base 2 logarithm of x. See e_log.c and k_log.h for most
+ * comments.
+ *
+ * This reduces x to {k, 1+f} exactly as in e_log.c, then calls the kernel,
+ * then does the combining and scaling steps
+ * log2(x) = (f - 0.5*f*f + k_log1p(f)) / ln2 + k
+ * in not-quite-routine extra precision.
*/
#include "math.h"
@@ -32,29 +38,73 @@ static const double zero = 0.0;
double
__ieee754_log2(double x)
{
- double f,hi,lo;
+ double f,hfsq,hi,lo,r,val_hi,val_lo,w,y;
int32_t i,k,hx;
u_int32_t lx;
EXTRACT_WORDS(hx,lx,x);
- k=0;
- if (hx < 0x00100000) { /* x < 2**-1022 */
- if (((hx&0x7fffffff)|lx)==0)
- return -two54/zero; /* log(+-0)=-inf */
- if (hx<0) return (x-x)/zero; /* log(-#) = NaN */
- k -= 54; x *= two54; /* subnormal number, scale up x */
+ k=0;
+ if (hx < 0x00100000) { /* x < 2**-1022 */
+ if (((hx&0x7fffffff)|lx)==0)
+ return -two54/zero; /* log(+-0)=-inf */
+ if (hx<0) return (x-x)/zero; /* log(-#) = NaN */
+ k -= 54; x *= two54; /* subnormal number, scale up x */
GET_HIGH_WORD(hx,x);
- }
+ }
if (hx >= 0x7ff00000) return x+x;
+ if (hx == 0x3ff00000 && lx == 0)
+ return zero; /* log(1) = +0 */
k += (hx>>20)-1023;
hx &= 0x000fffff;
i = (hx+0x95f64)&0x100000;
SET_HIGH_WORD(x,hx|(i^0x3ff00000)); /* normalize x or x/2 */
k += (i>>20);
- f = __kernel_log(x);
- hi = x = x - 1;
+ y = (double)k;
+ f = x - 1.0;
+ hfsq = 0.5*f*f;
+ r = k_log1p(f);
+
+ /*
+ * f-hfsq must (for args near 1) be evaluated in extra precision
+ * to avoid a large cancellation when x is near sqrt(2) or 1/sqrt(2).
+ * This is fairly efficient since f-hfsq only depends on f, so can
+ * be evaluated in parallel with R. Not combining hfsq with R also
+ * keeps R small (though not as small as a true `lo' term would be),
+ * so that extra precision is not needed for terms involving R.
+ *
+ * Compiler bugs involving extra precision used to break Dekker's
+ * theorem for spitting f-hfsq as hi+lo, unless double_t was used
+ * or the multi-precision calculations were avoided when double_t
+ * has extra precision. These problems are now automatically
+ * avoided as a side effect of the optimization of combining the
+ * Dekker splitting step with the clear-low-bits step.
+ *
+ * y must (for args near sqrt(2) and 1/sqrt(2)) be added in extra
+ * precision to avoid a very large cancellation when x is very near
+ * these values. Unlike the above cancellations, this problem is
+ * specific to base 2. It is strange that adding +-1 is so much
+ * harder than adding +-ln2 or +-log10_2.
+ *
+ * This uses Dekker's theorem to normalize y+val_hi, so the
+ * compiler bugs are back in some configurations, sigh. And I
+ * don't want to used double_t to avoid them, since that gives a
+ * pessimization and the support for avoiding the pessimization
+ * is not yet available.
+ *
+ * The multi-precision calculations for the multiplications are
+ * routine.
+ */
+ hi = f - hfsq;
SET_LOW_WORD(hi,0);
- lo = x - hi;
- return (x+f)*ivln2lo + (lo+f)*ivln2hi + hi*ivln2hi + k;
+ lo = (f - hi) - hfsq + r;
+ val_hi = hi*ivln2hi;
+ val_lo = (lo+hi)*ivln2lo + lo*ivln2hi;
+
+ /* spadd(val_hi, val_lo, y), except for not using double_t: */
+ w = y + val_hi;
+ val_lo += (y - w) + val_hi;
+ val_hi = w;
+
+ return val_lo + val_hi;
}
diff --git a/lib/msun/src/e_log2f.c b/lib/msun/src/e_log2f.c
index bb308d3..7166346 100644
--- a/lib/msun/src/e_log2f.c
+++ b/lib/msun/src/e_log2f.c
@@ -13,7 +13,7 @@
__FBSDID("$FreeBSD$");
/*
- * Return the base 2 logarithm of x. See k_log.c for details on the algorithm.
+ * Float version of e_log2.c. See the latter for most comments.
*/
#include "math.h"
@@ -30,29 +30,52 @@ static const float zero = 0.0;
float
__ieee754_log2f(float x)
{
- float f,hi,lo;
+ float f,hfsq,hi,lo,r,y;
int32_t i,k,hx;
GET_FLOAT_WORD(hx,x);
- k=0;
- if (hx < 0x00800000) { /* x < 2**-126 */
- if ((hx&0x7fffffff)==0)
- return -two25/zero; /* log(+-0)=-inf */
- if (hx<0) return (x-x)/zero; /* log(-#) = NaN */
- k -= 25; x *= two25; /* subnormal number, scale up x */
+ k=0;
+ if (hx < 0x00800000) { /* x < 2**-126 */
+ if ((hx&0x7fffffff)==0)
+ return -two25/zero; /* log(+-0)=-inf */
+ if (hx<0) return (x-x)/zero; /* log(-#) = NaN */
+ k -= 25; x *= two25; /* subnormal number, scale up x */
GET_FLOAT_WORD(hx,x);
- }
+ }
if (hx >= 0x7f800000) return x+x;
+ if (hx == 0x3f800000)
+ return zero; /* log(1) = +0 */
k += (hx>>23)-127;
hx &= 0x007fffff;
i = (hx+(0x4afb0d))&0x800000;
SET_FLOAT_WORD(x,hx|(i^0x3f800000)); /* normalize x or x/2 */
k += (i>>23);
- f = __kernel_logf(x);
- x = x - (float)1.0;
- GET_FLOAT_WORD(hx,x);
+ y = (float)k;
+ f = x - (float)1.0;
+ hfsq = (float)0.5*f*f;
+ r = k_log1pf(f);
+
+ /*
+ * We no longer need to avoid falling into the multi-precision
+ * calculations due to compiler bugs breaking Dekker's theorem.
+ * Keep avoiding this as an optimization. See e_log2.c for more
+ * details (some details are here only because the optimization
+ * is not yet available in double precision).
+ *
+ * Another compiler bug turned up. With gcc on i386,
+ * (ivln2lo + ivln2hi) would be evaluated in float precision
+ * despite runtime evaluations using double precision. So we
+ * must cast one of its terms to float_t. This makes the whole
+ * expression have type float_t, so return is forced to waste
+ * time clobbering its extra precision.
+ */
+ if (sizeof(float_t) > sizeof(float))
+ return (r - hfsq + f) * ((float_t)ivln2lo + ivln2hi) + y;
+
+ hi = f - hfsq;
+ GET_FLOAT_WORD(hx,hi);
SET_FLOAT_WORD(hi,hx&0xfffff000);
- lo = x - hi;
- return (x+f)*ivln2lo + (lo+f)*ivln2hi + hi*ivln2hi + k;
+ lo = (f - hi) - hfsq + r;
+ return (lo+hi)*ivln2lo + lo*ivln2hi + hi*ivln2hi + y;
}
diff --git a/lib/msun/src/e_pow.c b/lib/msun/src/e_pow.c
index 4aa00d5..7607a4a 100644
--- a/lib/msun/src/e_pow.c
+++ b/lib/msun/src/e_pow.c
@@ -109,6 +109,9 @@ __ieee754_pow(double x, double y)
/* y==zero: x**0 = 1 */
if((iy|ly)==0) return one;
+ /* x==1: 1**y = 1, even if y is NaN */
+ if (hx==0x3ff00000 && lx == 0) return one;
+
/* y!=zero: result is NaN if either arg is NaN */
if(ix > 0x7ff00000 || ((ix==0x7ff00000)&&(lx!=0)) ||
iy > 0x7ff00000 || ((iy==0x7ff00000)&&(ly!=0)))
@@ -138,7 +141,7 @@ __ieee754_pow(double x, double y)
if(ly==0) {
if (iy==0x7ff00000) { /* y is +-inf */
if(((ix-0x3ff00000)|lx)==0)
- return y - y; /* inf**+-1 is NaN */
+ return one; /* (-1)**+-inf is NaN */
else if (ix >= 0x3ff00000)/* (|x|>1)**+-inf = inf,0 */
return (hy>=0)? y: zero;
else /* (|x|<1)**-,+inf = inf,0 */
diff --git a/lib/msun/src/e_powf.c b/lib/msun/src/e_powf.c
index 466ed72..5c46478 100644
--- a/lib/msun/src/e_powf.c
+++ b/lib/msun/src/e_powf.c
@@ -67,6 +67,9 @@ __ieee754_powf(float x, float y)
/* y==zero: x**0 = 1 */
if(iy==0) return one;
+ /* x==1: 1**y = 1, even if y is NaN */
+ if (hx==0x3f800000) return one;
+
/* y!=zero: result is NaN if either arg is NaN */
if(ix > 0x7f800000 ||
iy > 0x7f800000)
@@ -90,7 +93,7 @@ __ieee754_powf(float x, float y)
/* special value of y */
if (iy==0x7f800000) { /* y is +-inf */
if (ix==0x3f800000)
- return y - y; /* inf**+-1 is NaN */
+ return one; /* (-1)**+-inf is NaN */
else if (ix > 0x3f800000)/* (|x|>1)**+-inf = inf,0 */
return (hy>=0)? y: zero;
else /* (|x|<1)**-,+inf = inf,0 */
diff --git a/lib/msun/src/e_sinh.c b/lib/msun/src/e_sinh.c
index afb8e43..17442d0 100644
--- a/lib/msun/src/e_sinh.c
+++ b/lib/msun/src/e_sinh.c
@@ -40,9 +40,8 @@ static const double one = 1.0, shuge = 1.0e307;
double
__ieee754_sinh(double x)
{
- double t,w,h;
+ double t,h;
int32_t ix,jx;
- u_int32_t lx;
/* High word of |x|. */
GET_HIGH_WORD(jx,x);
@@ -66,12 +65,8 @@ __ieee754_sinh(double x)
if (ix < 0x40862E42) return h*__ieee754_exp(fabs(x));
/* |x| in [log(maxdouble), overflowthresold] */
- GET_LOW_WORD(lx,x);
- if (ix<0x408633CE || ((ix==0x408633ce)&&(lx<=(u_int32_t)0x8fb9f87d))) {
- w = __ieee754_exp(0.5*fabs(x));
- t = h*w;
- return t*w;
- }
+ if (ix<=0x408633CE)
+ return h*2.0*__ldexp_exp(fabs(x), -1);
/* |x| > overflowthresold, sinh(x) overflow */
return x*shuge;
diff --git a/lib/msun/src/e_sinhf.c b/lib/msun/src/e_sinhf.c
index 0f96b2b..1be2dc3 100644
--- a/lib/msun/src/e_sinhf.c
+++ b/lib/msun/src/e_sinhf.c
@@ -24,7 +24,7 @@ static const float one = 1.0, shuge = 1.0e37;
float
__ieee754_sinhf(float x)
{
- float t,w,h;
+ float t,h;
int32_t ix,jx;
GET_FLOAT_WORD(jx,x);
@@ -48,11 +48,8 @@ __ieee754_sinhf(float x)
if (ix < 0x42b17217) return h*__ieee754_expf(fabsf(x));
/* |x| in [logf(maxfloat), overflowthresold] */
- if (ix<=0x42b2d4fc) {
- w = __ieee754_expf((float)0.5*fabsf(x));
- t = h*w;
- return t*w;
- }
+ if (ix<=0x42b2d4fc)
+ return h*2.0F*__ldexp_expf(fabsf(x), -1);
/* |x| > overflowthresold, sinh(x) overflow */
return x*shuge;
diff --git a/lib/msun/src/k_exp.c b/lib/msun/src/k_exp.c
new file mode 100644
index 0000000..f592f69
--- /dev/null
+++ b/lib/msun/src/k_exp.c
@@ -0,0 +1,108 @@
+/*-
+ * Copyright (c) 2011 David Schultz <das@FreeBSD.ORG>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <complex.h>
+
+#include "math.h"
+#include "math_private.h"
+
+static const uint32_t k = 1799; /* constant for reduction */
+static const double kln2 = 1246.97177782734161156; /* k * ln2 */
+
+/*
+ * Compute exp(x), scaled to avoid spurious overflow. An exponent is
+ * returned separately in 'expt'.
+ *
+ * Input: ln(DBL_MAX) <= x < ln(2 * DBL_MAX / DBL_MIN_DENORM) ~= 1454.91
+ * Output: 2**1023 <= y < 2**1024
+ */
+static double
+__frexp_exp(double x, int *expt)
+{
+ double exp_x;
+ uint32_t hx;
+
+ /*
+ * We use exp(x) = exp(x - kln2) * 2**k, carefully chosen to
+ * minimize |exp(kln2) - 2**k|. We also scale the exponent of
+ * exp_x to MAX_EXP so that the result can be multiplied by
+ * a tiny number without losing accuracy due to denormalization.
+ */
+ exp_x = exp(x - kln2);
+ GET_HIGH_WORD(hx, exp_x);
+ *expt = (hx >> 20) - (0x3ff + 1023) + k;
+ SET_HIGH_WORD(exp_x, (hx & 0xfffff) | ((0x3ff + 1023) << 20));
+ return (exp_x);
+}
+
+/*
+ * __ldexp_exp(x, expt) and __ldexp_cexp(x, expt) compute exp(x) * 2**expt.
+ * They are intended for large arguments (real part >= ln(DBL_MAX))
+ * where care is needed to avoid overflow.
+ *
+ * The present implementation is narrowly tailored for our hyperbolic and
+ * exponential functions. We assume expt is small (0 or -1), and the caller
+ * has filtered out very large x, for which overflow would be inevitable.
+ */
+
+double
+__ldexp_exp(double x, int expt)
+{
+ double exp_x, scale;
+ int ex_expt;
+
+ exp_x = __frexp_exp(x, &ex_expt);
+ expt += ex_expt;
+ INSERT_WORDS(scale, (0x3ff + expt) << 20, 0);
+ return (exp_x * scale);
+}
+
+double complex
+__ldexp_cexp(double complex z, int expt)
+{
+ double x, y, exp_x, scale1, scale2;
+ int ex_expt, half_expt;
+
+ x = creal(z);
+ y = cimag(z);
+ exp_x = __frexp_exp(x, &ex_expt);
+ expt += ex_expt;
+
+ /*
+ * Arrange so that scale1 * scale2 == 2**expt. We use this to
+ * compensate for scalbn being horrendously slow.
+ */
+ half_expt = expt / 2;
+ INSERT_WORDS(scale1, (0x3ff + half_expt) << 20, 0);
+ half_expt = expt - half_expt;
+ INSERT_WORDS(scale2, (0x3ff + half_expt) << 20, 0);
+
+ return (cpack(cos(y) * exp_x * scale1 * scale2,
+ sin(y) * exp_x * scale1 * scale2));
+}
diff --git a/lib/msun/src/k_expf.c b/lib/msun/src/k_expf.c
new file mode 100644
index 0000000..a860b9f
--- /dev/null
+++ b/lib/msun/src/k_expf.c
@@ -0,0 +1,87 @@
+/*-
+ * Copyright (c) 2011 David Schultz <das@FreeBSD.ORG>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <complex.h>
+
+#include "math.h"
+#include "math_private.h"
+
+static const uint32_t k = 235; /* constant for reduction */
+static const float kln2 = 162.88958740F; /* k * ln2 */
+
+/*
+ * See k_exp.c for details.
+ *
+ * Input: ln(FLT_MAX) <= x < ln(2 * FLT_MAX / FLT_MIN_DENORM) ~= 192.7
+ * Output: 2**127 <= y < 2**128
+ */
+static float
+__frexp_expf(float x, int *expt)
+{
+ double exp_x;
+ uint32_t hx;
+
+ exp_x = expf(x - kln2);
+ GET_FLOAT_WORD(hx, exp_x);
+ *expt = (hx >> 23) - (0x7f + 127) + k;
+ SET_FLOAT_WORD(exp_x, (hx & 0x7fffff) | ((0x7f + 127) << 23));
+ return (exp_x);
+}
+
+float
+__ldexp_expf(float x, int expt)
+{
+ float exp_x, scale;
+ int ex_expt;
+
+ exp_x = __frexp_expf(x, &ex_expt);
+ expt += ex_expt;
+ SET_FLOAT_WORD(scale, (0x7f + expt) << 23);
+ return (exp_x * scale);
+}
+
+float complex
+__ldexp_cexpf(float complex z, int expt)
+{
+ float x, y, exp_x, scale1, scale2;
+ int ex_expt, half_expt;
+
+ x = crealf(z);
+ y = cimagf(z);
+ exp_x = __frexp_expf(x, &ex_expt);
+ expt += ex_expt;
+
+ half_expt = expt / 2;
+ SET_FLOAT_WORD(scale1, (0x7f + half_expt) << 23);
+ half_expt = expt - half_expt;
+ SET_FLOAT_WORD(scale2, (0x7f + half_expt) << 23);
+
+ return (cpackf(cosf(y) * exp_x * scale1 * scale2,
+ sinf(y) * exp_x * scale1 * scale2));
+}
diff --git a/lib/msun/src/k_log.h b/lib/msun/src/k_log.h
index 206355c..aaff8bd 100644
--- a/lib/msun/src/k_log.h
+++ b/lib/msun/src/k_log.h
@@ -14,8 +14,9 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
-/* __kernel_log(x)
- * Return log(x) - (x-1) for x in ~[sqrt(2)/2, sqrt(2)].
+/*
+ * k_log1p(f):
+ * Return log(1+f) - f for 1+f in ~[sqrt(2)/2, sqrt(2)].
*
* The following describes the overall strategy for computing
* logarithms in base e. The argument reduction and adding the final
@@ -80,37 +81,20 @@ Lg6 = 1.531383769920937332e-01, /* 3FC39A09 D078C69F */
Lg7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */
/*
- * We always inline __kernel_log(), since doing so produces a
+ * We always inline k_log1p(), since doing so produces a
* substantial performance improvement (~40% on amd64).
*/
static inline double
-__kernel_log(double x)
+k_log1p(double f)
{
- double hfsq,f,s,z,R,w,t1,t2;
- int32_t hx,i,j;
- u_int32_t lx;
-
- EXTRACT_WORDS(hx,lx,x);
+ double hfsq,s,z,R,w,t1,t2;
- f = x-1.0;
- if((0x000fffff&(2+hx))<3) { /* -2**-20 <= f < 2**-20 */
- if(f==0.0) return 0.0;
- return f*f*(0.33333333333333333*f-0.5);
- }
- s = f/(2.0+f);
+ s = f/(2.0+f);
z = s*s;
- hx &= 0x000fffff;
- i = hx-0x6147a;
w = z*z;
- j = 0x6b851-hx;
- t1= w*(Lg2+w*(Lg4+w*Lg6));
- t2= z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7)));
- i |= j;
+ t1= w*(Lg2+w*(Lg4+w*Lg6));
+ t2= z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7)));
R = t2+t1;
- if (i>0) {
- hfsq=0.5*f*f;
- return s*(hfsq+R) - hfsq;
- } else {
- return s*(R-f);
- }
+ hfsq=0.5*f*f;
+ return s*(hfsq+R);
}
diff --git a/lib/msun/src/k_logf.h b/lib/msun/src/k_logf.h
index d9f0f3d..71c547e 100644
--- a/lib/msun/src/k_logf.h
+++ b/lib/msun/src/k_logf.h
@@ -13,7 +13,7 @@
__FBSDID("$FreeBSD$");
/*
- * float version of __kernel_log(x). See k_log.c for details.
+ * Float version of k_log.h. See the latter for most comments.
*/
static const float
@@ -24,32 +24,16 @@ Lg3 = 0x91e9ee.0p-25, /* 0.28498786688 */
Lg4 = 0xf89e26.0p-26; /* 0.24279078841 */
static inline float
-__kernel_logf(float x)
+k_log1pf(float f)
{
- float hfsq,f,s,z,R,w,t1,t2;
- int32_t ix,i,j;
+ float hfsq,s,z,R,w,t1,t2;
- GET_FLOAT_WORD(ix,x);
-
- f = x-(float)1.0;
- if((0x007fffff&(0x8000+ix))<0xc000) { /* -2**-9 <= f < 2**-9 */
- if(f==0.0f) return 0.0f;
- return f*f*((float)0.33333333333333333*f-(float)0.5);
- }
s = f/((float)2.0+f);
z = s*s;
- ix &= 0x007fffff;
- i = ix-(0x6147a<<3);
w = z*z;
- j = (0x6b851<<3)-ix;
t1= w*(Lg2+w*Lg4);
t2= z*(Lg1+w*Lg3);
- i |= j;
R = t2+t1;
- if(i>0) {
- hfsq=(float)0.5*f*f;
- return s*(hfsq+R) - hfsq;
- } else {
- return s*(R-f);
- }
+ hfsq=(float)0.5*f*f;
+ return s*(hfsq+R);
}
diff --git a/lib/msun/src/math.h b/lib/msun/src/math.h
index 8ad13ed..69b138e 100644
--- a/lib/msun/src/math.h
+++ b/lib/msun/src/math.h
@@ -68,14 +68,11 @@ extern const union __nan_un {
#define MATH_ERREXCEPT 2
#define math_errhandling MATH_ERREXCEPT
-/* XXX We need a <machine/math.h>. */
-#if defined(__ia64__) || defined(__sparc64__)
-#define FP_FAST_FMA 1
-#endif
+#define FP_FAST_FMAF 1
#ifdef __ia64__
+#define FP_FAST_FMA 1
#define FP_FAST_FMAL 1
#endif
-#define FP_FAST_FMAF 1
/* Symbolic constants to classify floating point numbers. */
#define FP_INFINITE 0x01
diff --git a/lib/msun/src/math_private.h b/lib/msun/src/math_private.h
index d79f808..79280e3 100644
--- a/lib/msun/src/math_private.h
+++ b/lib/msun/src/math_private.h
@@ -58,6 +58,10 @@ typedef union
u_int32_t msw;
u_int32_t lsw;
} parts;
+ struct
+ {
+ u_int64_t w;
+ } xparts;
} ieee_double_shape_type;
#endif
@@ -72,6 +76,10 @@ typedef union
u_int32_t lsw;
u_int32_t msw;
} parts;
+ struct
+ {
+ u_int64_t w;
+ } xparts;
} ieee_double_shape_type;
#endif
@@ -86,6 +94,14 @@ do { \
(ix1) = ew_u.parts.lsw; \
} while (0)
+/* Get a 64-bit int from a double. */
+#define EXTRACT_WORD64(ix,d) \
+do { \
+ ieee_double_shape_type ew_u; \
+ ew_u.value = (d); \
+ (ix) = ew_u.xparts.w; \
+} while (0)
+
/* Get the more significant 32 bit int from a double. */
#define GET_HIGH_WORD(i,d) \
@@ -114,6 +130,14 @@ do { \
(d) = iw_u.value; \
} while (0)
+/* Set a double from a 64-bit int. */
+#define INSERT_WORD64(d,ix) \
+do { \
+ ieee_double_shape_type iw_u; \
+ iw_u.xparts.w = (ix); \
+ (d) = iw_u.value; \
+} while (0)
+
/* Set the more significant 32 bits of a double from an int. */
#define SET_HIGH_WORD(d,v) \
@@ -164,6 +188,25 @@ do { \
(d) = sf_u.value; \
} while (0)
+/* Get expsign as a 16 bit int from a long double. */
+
+#define GET_LDBL_EXPSIGN(i,d) \
+do { \
+ union IEEEl2bits ge_u; \
+ ge_u.e = (d); \
+ (i) = ge_u.xbits.expsign; \
+} while (0)
+
+/* Set expsign of a long double from a 16 bit int. */
+
+#define SET_LDBL_EXPSIGN(d,v) \
+do { \
+ union IEEEl2bits se_u; \
+ se_u.e = (d); \
+ se_u.xbits.expsign = (v); \
+ (d) = se_u.e; \
+} while (0)
+
#ifdef FLT_EVAL_METHOD
/*
* Attempt to get strict C99 semantics for assignment with non-C99 compilers.
@@ -354,6 +397,10 @@ int __ieee754_rem_pio2(double,double*);
double __kernel_sin(double,double,int);
double __kernel_cos(double,double);
double __kernel_tan(double,double,int);
+double __ldexp_exp(double,int);
+#ifdef _COMPLEX_H
+double complex __ldexp_cexp(double complex,int);
+#endif
/* float precision kernel functions */
#ifdef INLINE_REM_PIO2F
@@ -372,6 +419,10 @@ float __kernel_cosdf(double);
__inline
#endif
float __kernel_tandf(double,int);
+float __ldexp_expf(float,int);
+#ifdef _COMPLEX_H
+float complex __ldexp_cexpf(float complex,int);
+#endif
/* long double precision kernel functions */
long double __kernel_sinl(long double, long double, int);
diff --git a/lib/msun/src/s_ccosh.c b/lib/msun/src/s_ccosh.c
new file mode 100644
index 0000000..9ea962b
--- /dev/null
+++ b/lib/msun/src/s_ccosh.c
@@ -0,0 +1,155 @@
+/*-
+ * Copyright (c) 2005 Bruce D. Evans and Steven G. Kargl
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Hyperbolic cosine of a complex argument z = x + i y.
+ *
+ * cosh(z) = cosh(x+iy)
+ * = cosh(x) cos(y) + i sinh(x) sin(y).
+ *
+ * Exceptional values are noted in the comments within the source code.
+ * These values and the return value were taken from n1124.pdf.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <complex.h>
+#include <math.h>
+
+#include "math_private.h"
+
+static const double huge = 0x1p1023;
+
+double complex
+ccosh(double complex z)
+{
+ double x, y, h;
+ int32_t hx, hy, ix, iy, lx, ly;
+
+ x = creal(z);
+ y = cimag(z);
+
+ EXTRACT_WORDS(hx, lx, x);
+ EXTRACT_WORDS(hy, ly, y);
+
+ ix = 0x7fffffff & hx;
+ iy = 0x7fffffff & hy;
+
+ /* Handle the nearly-non-exceptional cases where x and y are finite. */
+ if (ix < 0x7ff00000 && iy < 0x7ff00000) {
+ if ((iy | ly) == 0)
+ return (cpack(cosh(x), x * y));
+ if (ix < 0x40360000) /* small x: normal case */
+ return (cpack(cosh(x) * cos(y), sinh(x) * sin(y)));
+
+ /* |x| >= 22, so cosh(x) ~= exp(|x|) */
+ if (ix < 0x40862e42) {
+ /* x < 710: exp(|x|) won't overflow */
+ h = exp(fabs(x)) * 0.5;
+ return (cpack(h * cos(y), copysign(h, x) * sin(y)));
+ } else if (ix < 0x4096bbaa) {
+ /* x < 1455: scale to avoid overflow */
+ z = __ldexp_cexp(cpack(fabs(x), y), -1);
+ return (cpack(creal(z), cimag(z) * copysign(1, x)));
+ } else {
+ /* x >= 1455: the result always overflows */
+ h = huge * x;
+ return (cpack(h * h * cos(y), h * sin(y)));
+ }
+ }
+
+ /*
+ * cosh(+-0 +- I Inf) = dNaN + I sign(d(+-0, dNaN))0.
+ * The sign of 0 in the result is unspecified. Choice = normally
+ * the same as dNaN. Raise the invalid floating-point exception.
+ *
+ * cosh(+-0 +- I NaN) = d(NaN) + I sign(d(+-0, NaN))0.
+ * The sign of 0 in the result is unspecified. Choice = normally
+ * the same as d(NaN).
+ */
+ if ((ix | lx) == 0 && iy >= 0x7ff00000)
+ return (cpack(y - y, copysign(0, x * (y - y))));
+
+ /*
+ * cosh(+-Inf +- I 0) = +Inf + I (+-)(+-)0.
+ *
+ * cosh(NaN +- I 0) = d(NaN) + I sign(d(NaN, +-0))0.
+ * The sign of 0 in the result is unspecified.
+ */
+ if ((iy | ly) == 0 && ix >= 0x7ff00000) {
+ if (((hx & 0xfffff) | lx) == 0)
+ return (cpack(x * x, copysign(0, x) * y));
+ return (cpack(x * x, copysign(0, (x + x) * y)));
+ }
+
+ /*
+ * cosh(x +- I Inf) = dNaN + I dNaN.
+ * Raise the invalid floating-point exception for finite nonzero x.
+ *
+ * cosh(x + I NaN) = d(NaN) + I d(NaN).
+ * Optionally raises the invalid floating-point exception for finite
+ * nonzero x. Choice = don't raise (except for signaling NaNs).
+ */
+ if (ix < 0x7ff00000 && iy >= 0x7ff00000)
+ return (cpack(y - y, x * (y - y)));
+
+ /*
+ * cosh(+-Inf + I NaN) = +Inf + I d(NaN).
+ *
+ * cosh(+-Inf +- I Inf) = +Inf + I dNaN.
+ * The sign of Inf in the result is unspecified. Choice = always +.
+ * Raise the invalid floating-point exception.
+ *
+ * cosh(+-Inf + I y) = +Inf cos(y) +- I Inf sin(y)
+ */
+ if (ix >= 0x7ff00000 && ((hx & 0xfffff) | lx) == 0) {
+ if (iy >= 0x7ff00000)
+ return (cpack(x * x, x * (y - y)));
+ return (cpack((x * x) * cos(y), x * sin(y)));
+ }
+
+ /*
+ * cosh(NaN + I NaN) = d(NaN) + I d(NaN).
+ *
+ * cosh(NaN +- I Inf) = d(NaN) + I d(NaN).
+ * Optionally raises the invalid floating-point exception.
+ * Choice = raise.
+ *
+ * cosh(NaN + I y) = d(NaN) + I d(NaN).
+ * Optionally raises the invalid floating-point exception for finite
+ * nonzero y. Choice = don't raise (except for signaling NaNs).
+ */
+ return (cpack((x * x) * (y - y), (x + x) * (y - y)));
+}
+
+double complex
+ccos(double complex z)
+{
+
+ /* ccos(z) = ccosh(I * z) */
+ return (ccosh(cpack(-cimag(z), creal(z))));
+}
diff --git a/lib/msun/src/s_ccoshf.c b/lib/msun/src/s_ccoshf.c
new file mode 100644
index 0000000..1de9ad4
--- /dev/null
+++ b/lib/msun/src/s_ccoshf.c
@@ -0,0 +1,104 @@
+/*-
+ * Copyright (c) 2005 Bruce D. Evans and Steven G. Kargl
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Hyperbolic cosine of a complex argument. See s_ccosh.c for details.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <complex.h>
+#include <math.h>
+
+#include "math_private.h"
+
+static const float huge = 0x1p127;
+
+float complex
+ccoshf(float complex z)
+{
+ float x, y, h;
+ int32_t hx, hy, ix, iy;
+
+ x = crealf(z);
+ y = cimagf(z);
+
+ GET_FLOAT_WORD(hx, x);
+ GET_FLOAT_WORD(hy, y);
+
+ ix = 0x7fffffff & hx;
+ iy = 0x7fffffff & hy;
+
+ if (ix < 0x7f800000 && iy < 0x7f800000) {
+ if (iy == 0)
+ return (cpackf(coshf(x), x * y));
+ if (ix < 0x41100000) /* small x: normal case */
+ return (cpackf(coshf(x) * cosf(y), sinhf(x) * sinf(y)));
+
+ /* |x| >= 9, so cosh(x) ~= exp(|x|) */
+ if (ix < 0x42b17218) {
+ /* x < 88.7: expf(|x|) won't overflow */
+ h = expf(fabsf(x)) * 0.5f;
+ return (cpackf(h * cosf(y), copysignf(h, x) * sinf(y)));
+ } else if (ix < 0x4340b1e7) {
+ /* x < 192.7: scale to avoid overflow */
+ z = __ldexp_cexpf(cpackf(fabsf(x), y), -1);
+ return (cpackf(crealf(z), cimagf(z) * copysignf(1, x)));
+ } else {
+ /* x >= 192.7: the result always overflows */
+ h = huge * x;
+ return (cpackf(h * h * cosf(y), h * sinf(y)));
+ }
+ }
+
+ if (ix == 0 && iy >= 0x7f800000)
+ return (cpackf(y - y, copysignf(0, x * (y - y))));
+
+ if (iy == 0 && ix >= 0x7f800000) {
+ if ((hx & 0x7fffff) == 0)
+ return (cpackf(x * x, copysignf(0, x) * y));
+ return (cpackf(x * x, copysignf(0, (x + x) * y)));
+ }
+
+ if (ix < 0x7f800000 && iy >= 0x7f800000)
+ return (cpackf(y - y, x * (y - y)));
+
+ if (ix >= 0x7f800000 && (hx & 0x7fffff) == 0) {
+ if (iy >= 0x7f800000)
+ return (cpackf(x * x, x * (y - y)));
+ return (cpackf((x * x) * cosf(y), x * sinf(y)));
+ }
+
+ return (cpackf((x * x) * (y - y), (x + x) * (y - y)));
+}
+
+float complex
+ccosf(float complex z)
+{
+
+ return (ccoshf(cpackf(-cimagf(z), crealf(z))));
+}
diff --git a/lib/msun/src/s_cexp.c b/lib/msun/src/s_cexp.c
index ecf0992..abe178f 100644
--- a/lib/msun/src/s_cexp.c
+++ b/lib/msun/src/s_cexp.c
@@ -34,18 +34,13 @@ __FBSDID("$FreeBSD$");
static const uint32_t
exp_ovfl = 0x40862e42, /* high bits of MAX_EXP * ln2 ~= 710 */
-cexp_ovfl = 0x4096b8e4, /* (MAX_EXP - MIN_DENORM_EXP) * ln2 */
-k = 1799; /* constant for reduction */
-
-static const double
-kln2 = 1246.97177782734161156; /* k * ln2 */
+cexp_ovfl = 0x4096b8e4; /* (MAX_EXP - MIN_DENORM_EXP) * ln2 */
double complex
cexp(double complex z)
{
double x, y, exp_x;
uint32_t hx, hy, lx, ly;
- int scale;
x = creal(z);
y = cimag(z);
@@ -56,8 +51,12 @@ cexp(double complex z)
/* cexp(x + I 0) = exp(x) + I 0 */
if ((hy | ly) == 0)
return (cpack(exp(x), y));
+ EXTRACT_WORDS(hx, lx, x);
+ /* cexp(0 + I y) = cos(y) + I sin(y) */
+ if (((hx & 0x7fffffff) | lx) == 0)
+ return (cpack(cos(y), sin(y)));
+
if (hy >= 0x7ff00000) {
- EXTRACT_WORDS(hx, lx, x);
if (lx != 0 || (hx & 0x7fffffff) != 0x7ff00000) {
/* cexp(finite|NaN +- I Inf|NaN) = NaN + I NaN */
return (cpack(y - y, y - y));
@@ -70,21 +69,12 @@ cexp(double complex z)
}
}
- GET_HIGH_WORD(hx, x);
if (hx >= exp_ovfl && hx <= cexp_ovfl) {
/*
* x is between 709.7 and 1454.3, so we must scale to avoid
- * overflow in exp(x). We use exp(x) = exp(x - kln2) * 2**k,
- * carefully chosen to minimize |exp(kln2) - 2**k|. We also
- * scale the exponent of exp(x) to MANT_DIG to avoid loss of
- * accuracy due to underflow if sin(y) is tiny.
+ * overflow in exp(x).
*/
- exp_x = exp(x - kln2);
- GET_HIGH_WORD(hx, exp_x);
- SET_HIGH_WORD(exp_x, (hx & 0xfffff) | ((0x3ff + 52) << 20));
- scale = (hx >> 20) - (0x3ff + 52) + k;
- return (cpack(scalbn(cos(y) * exp_x, scale),
- scalbn(sin(y) * exp_x, scale)));
+ return (__ldexp_cexp(z, 0));
} else {
/*
* Cases covered here:
diff --git a/lib/msun/src/s_cexpf.c b/lib/msun/src/s_cexpf.c
index 4ea3931..0e30d08 100644
--- a/lib/msun/src/s_cexpf.c
+++ b/lib/msun/src/s_cexpf.c
@@ -34,18 +34,13 @@ __FBSDID("$FreeBSD$");
static const uint32_t
exp_ovfl = 0x42b17218, /* MAX_EXP * ln2 ~= 88.722839355 */
-cexp_ovfl = 0x43400074, /* (MAX_EXP - MIN_DENORM_EXP) * ln2 */
-k = 235; /* constant for reduction */
-
-static const float
-kln2 = 162.88958740f; /* k * ln2 */
+cexp_ovfl = 0x43400074; /* (MAX_EXP - MIN_DENORM_EXP) * ln2 */
float complex
cexpf(float complex z)
{
float x, y, exp_x;
uint32_t hx, hy;
- int scale;
x = crealf(z);
y = cimagf(z);
@@ -57,6 +52,10 @@ cexpf(float complex z)
if (hy == 0)
return (cpackf(expf(x), y));
GET_FLOAT_WORD(hx, x);
+ /* cexp(0 + I y) = cos(y) + I sin(y) */
+ if ((hx & 0x7fffffff) == 0)
+ return (cpackf(cosf(y), sinf(y)));
+
if (hy >= 0x7f800000) {
if ((hx & 0x7fffffff) != 0x7f800000) {
/* cexp(finite|NaN +- I Inf|NaN) = NaN + I NaN */
@@ -73,17 +72,9 @@ cexpf(float complex z)
if (hx >= exp_ovfl && hx <= cexp_ovfl) {
/*
* x is between 88.7 and 192, so we must scale to avoid
- * overflow in expf(x). We use exp(x) = exp(x - kln2) * 2**k,
- * carefully chosen to minimize |exp(kln2) - 2**k|. We also
- * scale the exponent of exp(x) to MANT_DIG to avoid loss of
- * accuracy due to underflow if sin(y) is tiny.
+ * overflow in expf(x).
*/
- exp_x = expf(x - kln2);
- GET_FLOAT_WORD(hx, exp_x);
- SET_FLOAT_WORD(exp_x, (hx & 0x7fffff) | ((0x7f + 23) << 23));
- scale = (hx >> 23) - (0x7f + 23) + k;
- return (cpackf(scalbnf(cosf(y) * exp_x, scale),
- scalbnf(sinf(y) * exp_x, scale)));
+ return (__ldexp_cexpf(z, 0));
} else {
/*
* Cases covered here:
diff --git a/lib/msun/src/s_csinh.c b/lib/msun/src/s_csinh.c
new file mode 100644
index 0000000..c192f30
--- /dev/null
+++ b/lib/msun/src/s_csinh.c
@@ -0,0 +1,157 @@
+/*-
+ * Copyright (c) 2005 Bruce D. Evans and Steven G. Kargl
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Hyperbolic sine of a complex argument z = x + i y.
+ *
+ * sinh(z) = sinh(x+iy)
+ * = sinh(x) cos(y) + i cosh(x) sin(y).
+ *
+ * Exceptional values are noted in the comments within the source code.
+ * These values and the return value were taken from n1124.pdf.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <complex.h>
+#include <math.h>
+
+#include "math_private.h"
+
+static const double huge = 0x1p1023;
+
+double complex
+csinh(double complex z)
+{
+ double x, y, h;
+ int32_t hx, hy, ix, iy, lx, ly;
+
+ x = creal(z);
+ y = cimag(z);
+
+ EXTRACT_WORDS(hx, lx, x);
+ EXTRACT_WORDS(hy, ly, y);
+
+ ix = 0x7fffffff & hx;
+ iy = 0x7fffffff & hy;
+
+ /* Handle the nearly-non-exceptional cases where x and y are finite. */
+ if (ix < 0x7ff00000 && iy < 0x7ff00000) {
+ if ((iy | ly) == 0)
+ return (cpack(sinh(x), y));
+ if (ix < 0x40360000) /* small x: normal case */
+ return (cpack(sinh(x) * cos(y), cosh(x) * sin(y)));
+
+ /* |x| >= 22, so cosh(x) ~= exp(|x|) */
+ if (ix < 0x40862e42) {
+ /* x < 710: exp(|x|) won't overflow */
+ h = exp(fabs(x)) * 0.5;
+ return (cpack(copysign(h, x) * cos(y), h * sin(y)));
+ } else if (ix < 0x4096bbaa) {
+ /* x < 1455: scale to avoid overflow */
+ z = __ldexp_cexp(cpack(fabs(x), y), -1);
+ return (cpack(creal(z) * copysign(1, x), cimag(z)));
+ } else {
+ /* x >= 1455: the result always overflows */
+ h = huge * x;
+ return (cpack(h * cos(y), h * h * sin(y)));
+ }
+ }
+
+ /*
+ * sinh(+-0 +- I Inf) = sign(d(+-0, dNaN))0 + I dNaN.
+ * The sign of 0 in the result is unspecified. Choice = normally
+ * the same as dNaN. Raise the invalid floating-point exception.
+ *
+ * sinh(+-0 +- I NaN) = sign(d(+-0, NaN))0 + I d(NaN).
+ * The sign of 0 in the result is unspecified. Choice = normally
+ * the same as d(NaN).
+ */
+ if ((ix | lx) == 0 && iy >= 0x7ff00000)
+ return (cpack(copysign(0, x * (y - y)), y - y));
+
+ /*
+ * sinh(+-Inf +- I 0) = +-Inf + I +-0.
+ *
+ * sinh(NaN +- I 0) = d(NaN) + I +-0.
+ */
+ if ((iy | ly) == 0 && ix >= 0x7ff00000) {
+ if (((hx & 0xfffff) | lx) == 0)
+ return (cpack(x, y));
+ return (cpack(x, copysign(0, y)));
+ }
+
+ /*
+ * sinh(x +- I Inf) = dNaN + I dNaN.
+ * Raise the invalid floating-point exception for finite nonzero x.
+ *
+ * sinh(x + I NaN) = d(NaN) + I d(NaN).
+ * Optionally raises the invalid floating-point exception for finite
+ * nonzero x. Choice = don't raise (except for signaling NaNs).
+ */
+ if (ix < 0x7ff00000 && iy >= 0x7ff00000)
+ return (cpack(y - y, x * (y - y)));
+
+ /*
+ * sinh(+-Inf + I NaN) = +-Inf + I d(NaN).
+ * The sign of Inf in the result is unspecified. Choice = normally
+ * the same as d(NaN).
+ *
+ * sinh(+-Inf +- I Inf) = +Inf + I dNaN.
+ * The sign of Inf in the result is unspecified. Choice = always +.
+ * Raise the invalid floating-point exception.
+ *
+ * sinh(+-Inf + I y) = +-Inf cos(y) + I Inf sin(y)
+ */
+ if (ix >= 0x7ff00000 && ((hx & 0xfffff) | lx) == 0) {
+ if (iy >= 0x7ff00000)
+ return (cpack(x * x, x * (y - y)));
+ return (cpack(x * cos(y), INFINITY * sin(y)));
+ }
+
+ /*
+ * sinh(NaN + I NaN) = d(NaN) + I d(NaN).
+ *
+ * sinh(NaN +- I Inf) = d(NaN) + I d(NaN).
+ * Optionally raises the invalid floating-point exception.
+ * Choice = raise.
+ *
+ * sinh(NaN + I y) = d(NaN) + I d(NaN).
+ * Optionally raises the invalid floating-point exception for finite
+ * nonzero y. Choice = don't raise (except for signaling NaNs).
+ */
+ return (cpack((x * x) * (y - y), (x + x) * (y - y)));
+}
+
+double complex
+csin(double complex z)
+{
+
+ /* csin(z) = -I * csinh(I * z) */
+ z = csinh(cpack(-cimag(z), creal(z)));
+ return (cpack(cimag(z), -creal(z)));
+}
diff --git a/lib/msun/src/s_csinhf.c b/lib/msun/src/s_csinhf.c
new file mode 100644
index 0000000..c523125
--- /dev/null
+++ b/lib/msun/src/s_csinhf.c
@@ -0,0 +1,105 @@
+/*-
+ * Copyright (c) 2005 Bruce D. Evans and Steven G. Kargl
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Hyperbolic sine of a complex argument z. See s_csinh.c for details.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <complex.h>
+#include <math.h>
+
+#include "math_private.h"
+
+static const float huge = 0x1p127;
+
+float complex
+csinhf(float complex z)
+{
+ float x, y, h;
+ int32_t hx, hy, ix, iy;
+
+ x = crealf(z);
+ y = cimagf(z);
+
+ GET_FLOAT_WORD(hx, x);
+ GET_FLOAT_WORD(hy, y);
+
+ ix = 0x7fffffff & hx;
+ iy = 0x7fffffff & hy;
+
+ if (ix < 0x7f800000 && iy < 0x7f800000) {
+ if (iy == 0)
+ return (cpackf(sinhf(x), y));
+ if (ix < 0x41100000) /* small x: normal case */
+ return (cpackf(sinhf(x) * cosf(y), coshf(x) * sinf(y)));
+
+ /* |x| >= 9, so cosh(x) ~= exp(|x|) */
+ if (ix < 0x42b17218) {
+ /* x < 88.7: expf(|x|) won't overflow */
+ h = expf(fabsf(x)) * 0.5f;
+ return (cpackf(copysignf(h, x) * cosf(y), h * sinf(y)));
+ } else if (ix < 0x4340b1e7) {
+ /* x < 192.7: scale to avoid overflow */
+ z = __ldexp_cexpf(cpackf(fabsf(x), y), -1);
+ return (cpackf(crealf(z) * copysignf(1, x), cimagf(z)));
+ } else {
+ /* x >= 192.7: the result always overflows */
+ h = huge * x;
+ return (cpackf(h * cosf(y), h * h * sinf(y)));
+ }
+ }
+
+ if (ix == 0 && iy >= 0x7f800000)
+ return (cpackf(copysignf(0, x * (y - y)), y - y));
+
+ if (iy == 0 && ix >= 0x7f800000) {
+ if ((hx & 0x7fffff) == 0)
+ return (cpackf(x, y));
+ return (cpackf(x, copysignf(0, y)));
+ }
+
+ if (ix < 0x7f800000 && iy >= 0x7f800000)
+ return (cpackf(y - y, x * (y - y)));
+
+ if (ix >= 0x7f800000 && (hx & 0x7fffff) == 0) {
+ if (iy >= 0x7f800000)
+ return (cpackf(x * x, x * (y - y)));
+ return (cpackf(x * cosf(y), INFINITY * sinf(y)));
+ }
+
+ return (cpackf((x * x) * (y - y), (x + x) * (y - y)));
+}
+
+float complex
+csinf(float complex z)
+{
+
+ z = csinhf(cpackf(-cimagf(z), crealf(z)));
+ return (cpackf(cimagf(z), -crealf(z)));
+}
diff --git a/lib/msun/src/s_ctanh.c b/lib/msun/src/s_ctanh.c
new file mode 100644
index 0000000..d427e28
--- /dev/null
+++ b/lib/msun/src/s_ctanh.c
@@ -0,0 +1,144 @@
+/*-
+ * Copyright (c) 2011 David Schultz
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Hyperbolic tangent of a complex argument z = x + i y.
+ *
+ * The algorithm is from:
+ *
+ * W. Kahan. Branch Cuts for Complex Elementary Functions or Much
+ * Ado About Nothing's Sign Bit. In The State of the Art in
+ * Numerical Analysis, pp. 165 ff. Iserles and Powell, eds., 1987.
+ *
+ * Method:
+ *
+ * Let t = tan(x)
+ * beta = 1/cos^2(y)
+ * s = sinh(x)
+ * rho = cosh(x)
+ *
+ * We have:
+ *
+ * tanh(z) = sinh(z) / cosh(z)
+ *
+ * sinh(x) cos(y) + i cosh(x) sin(y)
+ * = ---------------------------------
+ * cosh(x) cos(y) + i sinh(x) sin(y)
+ *
+ * cosh(x) sinh(x) / cos^2(y) + i tan(y)
+ * = -------------------------------------
+ * 1 + sinh^2(x) / cos^2(y)
+ *
+ * beta rho s + i t
+ * = ----------------
+ * 1 + beta s^2
+ *
+ * Modifications:
+ *
+ * I omitted the original algorithm's handling of overflow in tan(x) after
+ * verifying with nearpi.c that this can't happen in IEEE single or double
+ * precision. I also handle large x differently.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <complex.h>
+#include <math.h>
+
+#include "math_private.h"
+
+double complex
+ctanh(double complex z)
+{
+ double x, y;
+ double t, beta, s, rho, denom;
+ uint32_t hx, ix, lx;
+
+ x = creal(z);
+ y = cimag(z);
+
+ EXTRACT_WORDS(hx, lx, x);
+ ix = hx & 0x7fffffff;
+
+ /*
+ * ctanh(NaN + i 0) = NaN + i 0
+ *
+ * ctanh(NaN + i y) = NaN + i NaN for y != 0
+ *
+ * The imaginary part has the sign of x*sin(2*y), but there's no
+ * special effort to get this right.
+ *
+ * ctanh(+-Inf +- i Inf) = +-1 +- 0
+ *
+ * ctanh(+-Inf + i y) = +-1 + 0 sin(2y) for y finite
+ *
+ * The imaginary part of the sign is unspecified. This special
+ * case is only needed to avoid a spurious invalid exception when
+ * y is infinite.
+ */
+ if (ix >= 0x7ff00000) {
+ if ((ix & 0xfffff) | lx) /* x is NaN */
+ return (cpack(x, (y == 0 ? y : x * y)));
+ SET_HIGH_WORD(x, hx - 0x40000000); /* x = copysign(1, x) */
+ return (cpack(x, copysign(0, isinf(y) ? y : sin(y) * cos(y))));
+ }
+
+ /*
+ * ctanh(x + i NAN) = NaN + i NaN
+ * ctanh(x +- i Inf) = NaN + i NaN
+ */
+ if (!isfinite(y))
+ return (cpack(y - y, y - y));
+
+ /*
+ * ctanh(+-huge + i +-y) ~= +-1 +- i 2sin(2y)/exp(2x), using the
+ * approximation sinh^2(huge) ~= exp(2*huge) / 4.
+ * We use a modified formula to avoid spurious overflow.
+ */
+ if (ix >= 0x40360000) { /* x >= 22 */
+ double exp_mx = exp(-fabs(x));
+ return (cpack(copysign(1, x),
+ 4 * sin(y) * cos(y) * exp_mx * exp_mx));
+ }
+
+ /* Kahan's algorithm */
+ t = tan(y);
+ beta = 1.0 + t * t; /* = 1 / cos^2(y) */
+ s = sinh(x);
+ rho = sqrt(1 + s * s); /* = cosh(x) */
+ denom = 1 + beta * s * s;
+ return (cpack((beta * rho * s) / denom, t / denom));
+}
+
+double complex
+ctan(double complex z)
+{
+
+ /* ctan(z) = -I * ctanh(I * z) */
+ z = ctanh(cpack(-cimag(z), creal(z)));
+ return (cpack(cimag(z), -creal(z)));
+}
diff --git a/lib/msun/src/s_ctanhf.c b/lib/msun/src/s_ctanhf.c
new file mode 100644
index 0000000..4be28d8
--- /dev/null
+++ b/lib/msun/src/s_ctanhf.c
@@ -0,0 +1,84 @@
+/*-
+ * Copyright (c) 2011 David Schultz
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Hyperbolic tangent of a complex argument z. See s_ctanh.c for details.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <complex.h>
+#include <math.h>
+
+#include "math_private.h"
+
+float complex
+ctanhf(float complex z)
+{
+ float x, y;
+ float t, beta, s, rho, denom;
+ uint32_t hx, ix;
+
+ x = crealf(z);
+ y = cimagf(z);
+
+ GET_FLOAT_WORD(hx, x);
+ ix = hx & 0x7fffffff;
+
+ if (ix >= 0x7f800000) {
+ if (ix & 0x7fffff)
+ return (cpackf(x, (y == 0 ? y : x * y)));
+ SET_FLOAT_WORD(x, hx - 0x40000000);
+ return (cpackf(x,
+ copysignf(0, isinf(y) ? y : sinf(y) * cosf(y))));
+ }
+
+ if (!isfinite(y))
+ return (cpackf(y - y, y - y));
+
+ if (ix >= 0x41300000) { /* x >= 11 */
+ float exp_mx = expf(-fabsf(x));
+ return (cpackf(copysignf(1, x),
+ 4 * sinf(y) * cosf(y) * exp_mx * exp_mx));
+ }
+
+ t = tanf(y);
+ beta = 1.0 + t * t;
+ s = sinhf(x);
+ rho = sqrtf(1 + s * s);
+ denom = 1 + beta * s * s;
+ return (cpackf((beta * rho * s) / denom, t / denom));
+}
+
+float complex
+ctanf(float complex z)
+{
+
+ z = ctanhf(cpackf(-cimagf(z), crealf(z)));
+ return (cpackf(cimagf(z), -crealf(z)));
+}
+
diff --git a/lib/msun/src/s_expm1.c b/lib/msun/src/s_expm1.c
index 3de7bfb..5aa1917 100644
--- a/lib/msun/src/s_expm1.c
+++ b/lib/msun/src/s_expm1.c
@@ -108,6 +108,8 @@ __FBSDID("$FreeBSD$");
* to produce the hexadecimal values shown.
*/
+#include <float.h>
+
#include "math.h"
#include "math_private.h"
@@ -135,7 +137,6 @@ expm1(double x)
GET_HIGH_WORD(hx,x);
xsb = hx&0x80000000; /* sign bit of x */
- if(xsb==0) y=x; else y= -x; /* y = |x| */
hx &= 0x7fffffff; /* high word of |x| */
/* filter out huge and non-finite argument */
@@ -169,7 +170,7 @@ expm1(double x)
hi = x - t*ln2_hi; /* t*ln2_hi is exact here */
lo = t*ln2_lo;
}
- x = hi - lo;
+ STRICT_ASSIGN(double, x, hi - lo);
c = (hi-x)-lo;
}
else if(hx < 0x3c900000) { /* when |x|<2**-54, return x */
diff --git a/lib/msun/src/s_expm1f.c b/lib/msun/src/s_expm1f.c
index 483472c..fb37494 100644
--- a/lib/msun/src/s_expm1f.c
+++ b/lib/msun/src/s_expm1f.c
@@ -16,6 +16,8 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include <float.h>
+
#include "math.h"
#include "math_private.h"
@@ -44,7 +46,6 @@ expm1f(float x)
GET_FLOAT_WORD(hx,x);
xsb = hx&0x80000000; /* sign bit of x */
- if(xsb==0) y=x; else y= -x; /* y = |x| */
hx &= 0x7fffffff; /* high word of |x| */
/* filter out huge and non-finite argument */
@@ -75,7 +76,7 @@ expm1f(float x)
hi = x - t*ln2_hi; /* t*ln2_hi is exact here */
lo = t*ln2_lo;
}
- x = hi - lo;
+ STRICT_ASSIGN(float, x, hi - lo);
c = (hi-x)-lo;
}
else if(hx < 0x33000000) { /* when |x|<2**-25, return x */
diff --git a/lib/msun/src/s_fma.c b/lib/msun/src/s_fma.c
index ad1fc4a..dfbd13c 100644
--- a/lib/msun/src/s_fma.c
+++ b/lib/msun/src/s_fma.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2005 David Schultz <das@FreeBSD.ORG>
+ * Copyright (c) 2005-2011 David Schultz <das@FreeBSD.ORG>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -31,6 +31,132 @@ __FBSDID("$FreeBSD$");
#include <float.h>
#include <math.h>
+#include "math_private.h"
+
+/*
+ * A struct dd represents a floating-point number with twice the precision
+ * of a double. We maintain the invariant that "hi" stores the 53 high-order
+ * bits of the result.
+ */
+struct dd {
+ double hi;
+ double lo;
+};
+
+/*
+ * Compute a+b exactly, returning the exact result in a struct dd. We assume
+ * that both a and b are finite, but make no assumptions about their relative
+ * magnitudes.
+ */
+static inline struct dd
+dd_add(double a, double b)
+{
+ struct dd ret;
+ double s;
+
+ ret.hi = a + b;
+ s = ret.hi - a;
+ ret.lo = (a - (ret.hi - s)) + (b - s);
+ return (ret);
+}
+
+/*
+ * Compute a+b, with a small tweak: The least significant bit of the
+ * result is adjusted into a sticky bit summarizing all the bits that
+ * were lost to rounding. This adjustment negates the effects of double
+ * rounding when the result is added to another number with a higher
+ * exponent. For an explanation of round and sticky bits, see any reference
+ * on FPU design, e.g.,
+ *
+ * J. Coonen. An Implementation Guide to a Proposed Standard for
+ * Floating-Point Arithmetic. Computer, vol. 13, no. 1, Jan 1980.
+ */
+static inline double
+add_adjusted(double a, double b)
+{
+ struct dd sum;
+ uint64_t hibits, lobits;
+
+ sum = dd_add(a, b);
+ if (sum.lo != 0) {
+ EXTRACT_WORD64(hibits, sum.hi);
+ if ((hibits & 1) == 0) {
+ /* hibits += (int)copysign(1.0, sum.hi * sum.lo) */
+ EXTRACT_WORD64(lobits, sum.lo);
+ hibits += 1 - ((hibits ^ lobits) >> 62);
+ INSERT_WORD64(sum.hi, hibits);
+ }
+ }
+ return (sum.hi);
+}
+
+/*
+ * Compute ldexp(a+b, scale) with a single rounding error. It is assumed
+ * that the result will be subnormal, and care is taken to ensure that
+ * double rounding does not occur.
+ */
+static inline double
+add_and_denormalize(double a, double b, int scale)
+{
+ struct dd sum;
+ uint64_t hibits, lobits;
+ int bits_lost;
+
+ sum = dd_add(a, b);
+
+ /*
+ * If we are losing at least two bits of accuracy to denormalization,
+ * then the first lost bit becomes a round bit, and we adjust the
+ * lowest bit of sum.hi to make it a sticky bit summarizing all the
+ * bits in sum.lo. With the sticky bit adjusted, the hardware will
+ * break any ties in the correct direction.
+ *
+ * If we are losing only one bit to denormalization, however, we must
+ * break the ties manually.
+ */
+ if (sum.lo != 0) {
+ EXTRACT_WORD64(hibits, sum.hi);
+ bits_lost = -((int)(hibits >> 52) & 0x7ff) - scale + 1;
+ if (bits_lost != 1 ^ (int)(hibits & 1)) {
+ /* hibits += (int)copysign(1.0, sum.hi * sum.lo) */
+ EXTRACT_WORD64(lobits, sum.lo);
+ hibits += 1 - (((hibits ^ lobits) >> 62) & 2);
+ INSERT_WORD64(sum.hi, hibits);
+ }
+ }
+ return (ldexp(sum.hi, scale));
+}
+
+/*
+ * Compute a*b exactly, returning the exact result in a struct dd. We assume
+ * that both a and b are normalized, so no underflow or overflow will occur.
+ * The current rounding mode must be round-to-nearest.
+ */
+static inline struct dd
+dd_mul(double a, double b)
+{
+ static const double split = 0x1p27 + 1.0;
+ struct dd ret;
+ double ha, hb, la, lb, p, q;
+
+ p = a * split;
+ ha = a - p;
+ ha += p;
+ la = a - ha;
+
+ p = b * split;
+ hb = b - p;
+ hb += p;
+ lb = b - hb;
+
+ p = ha * hb;
+ q = ha * lb + la * hb;
+
+ ret.hi = p + q;
+ ret.lo = p - ret.hi + q + la * lb;
+ return (ret);
+}
+
/*
* Fused multiply-add: Compute x * y + z with a single rounding error.
*
@@ -48,14 +174,11 @@ __FBSDID("$FreeBSD$");
* Hardware instructions should be used on architectures that support it,
* since this implementation will likely be several times slower.
*/
-#if LDBL_MANT_DIG != 113
double
fma(double x, double y, double z)
{
- static const double split = 0x1p27 + 1.0;
- double xs, ys, zs;
- double c, cc, hx, hy, p, q, tx, ty;
- double r, rr, s;
+ double xs, ys, zs, adj;
+ struct dd xy, r;
int oround;
int ex, ey, ez;
int spread;
@@ -85,41 +208,6 @@ fma(double x, double y, double z)
* will overflow, so we handle these cases specially. Rounding
* modes other than FE_TONEAREST are painful.
*/
- if (spread > DBL_MANT_DIG * 2) {
- fenv_t env;
- feraiseexcept(FE_INEXACT);
- switch(oround) {
- case FE_TONEAREST:
- return (x * y);
- case FE_TOWARDZERO:
- if (x > 0.0 ^ y < 0.0 ^ z < 0.0)
- return (x * y);
- feholdexcept(&env);
- r = x * y;
- if (!fetestexcept(FE_INEXACT))
- r = nextafter(r, 0);
- feupdateenv(&env);
- return (r);
- case FE_DOWNWARD:
- if (z > 0.0)
- return (x * y);
- feholdexcept(&env);
- r = x * y;
- if (!fetestexcept(FE_INEXACT))
- r = nextafter(r, -INFINITY);
- feupdateenv(&env);
- return (r);
- default: /* FE_UPWARD */
- if (z < 0.0)
- return (x * y);
- feholdexcept(&env);
- r = x * y;
- if (!fetestexcept(FE_INEXACT))
- r = nextafter(r, INFINITY);
- feupdateenv(&env);
- return (r);
- }
- }
if (spread < -DBL_MANT_DIG) {
feraiseexcept(FE_INEXACT);
if (!isnormal(z))
@@ -144,63 +232,52 @@ fma(double x, double y, double z)
return (z);
}
}
+ if (spread <= DBL_MANT_DIG * 2)
+ zs = ldexp(zs, -spread);
+ else
+ zs = copysign(DBL_MIN, zs);
- /*
- * Use Dekker's algorithm to perform the multiplication and
- * subsequent addition in twice the machine precision.
- * Arrange so that x * y = c + cc, and x * y + z = r + rr.
- */
fesetround(FE_TONEAREST);
- p = xs * split;
- hx = xs - p;
- hx += p;
- tx = xs - hx;
-
- p = ys * split;
- hy = ys - p;
- hy += p;
- ty = ys - hy;
-
- p = hx * hy;
- q = hx * ty + tx * hy;
- c = p + q;
- cc = p - c + q + tx * ty;
-
- zs = ldexp(zs, -spread);
- r = c + zs;
- s = r - c;
- rr = (c - (r - s)) + (zs - s) + cc;
+ /*
+ * Basic approach for round-to-nearest:
+ *
+ * (xy.hi, xy.lo) = x * y (exact)
+ * (r.hi, r.lo) = xy.hi + z (exact)
+ * adj = xy.lo + r.lo (inexact; low bit is sticky)
+ * result = r.hi + adj (correctly rounded)
+ */
+ xy = dd_mul(xs, ys);
+ r = dd_add(xy.hi, zs);
spread = ex + ey;
- if (spread + ilogb(r) > -1023) {
+
+ if (r.hi == 0.0) {
+ /*
+ * When the addends cancel to 0, ensure that the result has
+ * the correct sign.
+ */
fesetround(oround);
- r = r + rr;
- } else {
+ volatile double vzs = zs; /* XXX gcc CSE bug workaround */
+ return (xy.hi + vzs + ldexp(xy.lo, spread));
+ }
+
+ if (oround != FE_TONEAREST) {
/*
- * The result is subnormal, so we round before scaling to
- * avoid double rounding.
+ * There is no need to worry about double rounding in directed
+ * rounding modes.
*/
- p = ldexp(copysign(0x1p-1022, r), -spread);
- c = r + p;
- s = c - r;
- cc = (r - (c - s)) + (p - s) + rr;
fesetround(oround);
- r = (c + cc) - p;
+ adj = r.lo + xy.lo;
+ return (ldexp(r.hi + adj, spread));
}
- return (ldexp(r, spread));
-}
-#else /* LDBL_MANT_DIG == 113 */
-/*
- * 113 bits of precision is more than twice the precision of a double,
- * so it is enough to represent the intermediate product exactly.
- */
-double
-fma(double x, double y, double z)
-{
- return ((long double)x * y + z);
+
+ adj = add_adjusted(r.lo, xy.lo);
+ if (spread + ilogb(r.hi) > -1023)
+ return (ldexp(r.hi + adj, spread));
+ else
+ return (add_and_denormalize(r.hi, adj, spread));
}
-#endif /* LDBL_MANT_DIG != 113 */
#if (LDBL_MANT_DIG == 53)
__weak_reference(fma, fmal);
diff --git a/lib/msun/src/s_fmaf.c b/lib/msun/src/s_fmaf.c
index 7c699e5..3695823 100644
--- a/lib/msun/src/s_fmaf.c
+++ b/lib/msun/src/s_fmaf.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2005 David Schultz <das@FreeBSD.ORG>
+ * Copyright (c) 2005-2011 David Schultz <das@FreeBSD.ORG>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -27,23 +27,43 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include <fenv.h>
+
#include "math.h"
+#include "math_private.h"
/*
* Fused multiply-add: Compute x * y + z with a single rounding error.
*
* A double has more than twice as much precision than a float, so
- * direct double-precision arithmetic suffices.
- *
- * XXX We are relying on the compiler to convert from double to float
- * using the current rounding mode and with the appropriate
- * side-effects. But on at least one platform (gcc 3.4.2/sparc64),
- * this appears to be too much to ask for. The precision
- * reduction should be done manually.
+ * direct double-precision arithmetic suffices, except where double
+ * rounding occurs.
*/
float
fmaf(float x, float y, float z)
{
+ double xy, result;
+ uint32_t hr, lr;
+
+ xy = (double)x * y;
+ result = xy + z;
+ EXTRACT_WORDS(hr, lr, result);
+ /* Common case: The double precision result is fine. */
+ if ((lr & 0x1fffffff) != 0x10000000 || /* not a halfway case */
+ (hr & 0x7ff00000) == 0x7ff00000 || /* NaN */
+ result - xy == z || /* exact */
+ fegetround() != FE_TONEAREST) /* not round-to-nearest */
+ return (result);
- return ((double)x * y + z);
+ /*
+ * If result is inexact, and exactly halfway between two float values,
+ * we need to adjust the low-order bit in the direction of the error.
+ */
+ fesetround(FE_TOWARDZERO);
+ volatile double vxy = xy; /* XXX work around gcc CSE bug */
+ double adjusted_result = vxy + z;
+ fesetround(FE_TONEAREST);
+ if (result == adjusted_result)
+ SET_LOW_WORD(adjusted_result, lr + 1);
+ return (adjusted_result);
}
diff --git a/lib/msun/src/s_fmal.c b/lib/msun/src/s_fmal.c
index 4d5d114..c2a6913 100644
--- a/lib/msun/src/s_fmal.c
+++ b/lib/msun/src/s_fmal.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2005 David Schultz <das@FreeBSD.ORG>
+ * Copyright (c) 2005-2011 David Schultz <das@FreeBSD.ORG>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -31,6 +31,128 @@ __FBSDID("$FreeBSD$");
#include <float.h>
#include <math.h>
+#include "fpmath.h"
+
+/*
+ * A struct dd represents a floating-point number with twice the precision
+ * of a long double. We maintain the invariant that "hi" stores the high-order
+ * bits of the result.
+ */
+struct dd {
+ long double hi;
+ long double lo;
+};
+
+/*
+ * Compute a+b exactly, returning the exact result in a struct dd. We assume
+ * that both a and b are finite, but make no assumptions about their relative
+ * magnitudes.
+ */
+static inline struct dd
+dd_add(long double a, long double b)
+{
+ struct dd ret;
+ long double s;
+
+ ret.hi = a + b;
+ s = ret.hi - a;
+ ret.lo = (a - (ret.hi - s)) + (b - s);
+ return (ret);
+}
+
+/*
+ * Compute a+b, with a small tweak: The least significant bit of the
+ * result is adjusted into a sticky bit summarizing all the bits that
+ * were lost to rounding. This adjustment negates the effects of double
+ * rounding when the result is added to another number with a higher
+ * exponent. For an explanation of round and sticky bits, see any reference
+ * on FPU design, e.g.,
+ *
+ * J. Coonen. An Implementation Guide to a Proposed Standard for
+ * Floating-Point Arithmetic. Computer, vol. 13, no. 1, Jan 1980.
+ */
+static inline long double
+add_adjusted(long double a, long double b)
+{
+ struct dd sum;
+ union IEEEl2bits u;
+
+ sum = dd_add(a, b);
+ if (sum.lo != 0) {
+ u.e = sum.hi;
+ if ((u.bits.manl & 1) == 0)
+ sum.hi = nextafterl(sum.hi, INFINITY * sum.lo);
+ }
+ return (sum.hi);
+}
+
+/*
+ * Compute ldexp(a+b, scale) with a single rounding error. It is assumed
+ * that the result will be subnormal, and care is taken to ensure that
+ * double rounding does not occur.
+ */
+static inline long double
+add_and_denormalize(long double a, long double b, int scale)
+{
+ struct dd sum;
+ int bits_lost;
+ union IEEEl2bits u;
+
+ sum = dd_add(a, b);
+
+ /*
+ * If we are losing at least two bits of accuracy to denormalization,
+ * then the first lost bit becomes a round bit, and we adjust the
+ * lowest bit of sum.hi to make it a sticky bit summarizing all the
+ * bits in sum.lo. With the sticky bit adjusted, the hardware will
+ * break any ties in the correct direction.
+ *
+ * If we are losing only one bit to denormalization, however, we must
+ * break the ties manually.
+ */
+ if (sum.lo != 0) {
+ u.e = sum.hi;
+ bits_lost = -u.bits.exp - scale + 1;
+ if (bits_lost != 1 ^ (int)(u.bits.manl & 1))
+ sum.hi = nextafterl(sum.hi, INFINITY * sum.lo);
+ }
+ return (ldexp(sum.hi, scale));
+}
+
+/*
+ * Compute a*b exactly, returning the exact result in a struct dd. We assume
+ * that both a and b are normalized, so no underflow or overflow will occur.
+ * The current rounding mode must be round-to-nearest.
+ */
+static inline struct dd
+dd_mul(long double a, long double b)
+{
+#if LDBL_MANT_DIG == 64
+ static const long double split = 0x1p32L + 1.0;
+#elif LDBL_MANT_DIG == 113
+ static const long double split = 0x1p57L + 1.0;
+#endif
+ struct dd ret;
+ long double ha, hb, la, lb, p, q;
+
+ p = a * split;
+ ha = a - p;
+ ha += p;
+ la = a - ha;
+
+ p = b * split;
+ hb = b - p;
+ hb += p;
+ lb = b - hb;
+
+ p = ha * hb;
+ q = ha * lb + la * hb;
+
+ ret.hi = p + q;
+ ret.lo = p - ret.hi + q + la * lb;
+ return (ret);
+}
+
/*
* Fused multiply-add: Compute x * y + z with a single rounding error.
*
@@ -43,14 +165,8 @@ __FBSDID("$FreeBSD$");
long double
fmal(long double x, long double y, long double z)
{
-#if LDBL_MANT_DIG == 64
- static const long double split = 0x1p32L + 1.0;
-#elif LDBL_MANT_DIG == 113
- static const long double split = 0x1p57L + 1.0;
-#endif
- long double xs, ys, zs;
- long double c, cc, hx, hy, p, q, tx, ty;
- long double r, rr, s;
+ long double xs, ys, zs, adj;
+ struct dd xy, r;
int oround;
int ex, ey, ez;
int spread;
@@ -80,41 +196,6 @@ fmal(long double x, long double y, long double z)
* will overflow, so we handle these cases specially. Rounding
* modes other than FE_TONEAREST are painful.
*/
- if (spread > LDBL_MANT_DIG * 2) {
- fenv_t env;
- feraiseexcept(FE_INEXACT);
- switch(oround) {
- case FE_TONEAREST:
- return (x * y);
- case FE_TOWARDZERO:
- if (x > 0.0 ^ y < 0.0 ^ z < 0.0)
- return (x * y);
- feholdexcept(&env);
- r = x * y;
- if (!fetestexcept(FE_INEXACT))
- r = nextafterl(r, 0);
- feupdateenv(&env);
- return (r);
- case FE_DOWNWARD:
- if (z > 0.0)
- return (x * y);
- feholdexcept(&env);
- r = x * y;
- if (!fetestexcept(FE_INEXACT))
- r = nextafterl(r, -INFINITY);
- feupdateenv(&env);
- return (r);
- default: /* FE_UPWARD */
- if (z < 0.0)
- return (x * y);
- feholdexcept(&env);
- r = x * y;
- if (!fetestexcept(FE_INEXACT))
- r = nextafterl(r, INFINITY);
- feupdateenv(&env);
- return (r);
- }
- }
if (spread < -LDBL_MANT_DIG) {
feraiseexcept(FE_INEXACT);
if (!isnormal(z))
@@ -139,49 +220,49 @@ fmal(long double x, long double y, long double z)
return (z);
}
}
+ if (spread <= LDBL_MANT_DIG * 2)
+ zs = ldexpl(zs, -spread);
+ else
+ zs = copysignl(LDBL_MIN, zs);
- /*
- * Use Dekker's algorithm to perform the multiplication and
- * subsequent addition in twice the machine precision.
- * Arrange so that x * y = c + cc, and x * y + z = r + rr.
- */
fesetround(FE_TONEAREST);
- p = xs * split;
- hx = xs - p;
- hx += p;
- tx = xs - hx;
-
- p = ys * split;
- hy = ys - p;
- hy += p;
- ty = ys - hy;
-
- p = hx * hy;
- q = hx * ty + tx * hy;
- c = p + q;
- cc = p - c + q + tx * ty;
-
- zs = ldexpl(zs, -spread);
- r = c + zs;
- s = r - c;
- rr = (c - (r - s)) + (zs - s) + cc;
+ /*
+ * Basic approach for round-to-nearest:
+ *
+ * (xy.hi, xy.lo) = x * y (exact)
+ * (r.hi, r.lo) = xy.hi + z (exact)
+ * adj = xy.lo + r.lo (inexact; low bit is sticky)
+ * result = r.hi + adj (correctly rounded)
+ */
+ xy = dd_mul(xs, ys);
+ r = dd_add(xy.hi, zs);
spread = ex + ey;
- if (spread + ilogbl(r) > -16383) {
+
+ if (r.hi == 0.0) {
+ /*
+ * When the addends cancel to 0, ensure that the result has
+ * the correct sign.
+ */
fesetround(oround);
- r = r + rr;
- } else {
+ volatile long double vzs = zs; /* XXX gcc CSE bug workaround */
+ return (xy.hi + vzs + ldexpl(xy.lo, spread));
+ }
+
+ if (oround != FE_TONEAREST) {
/*
- * The result is subnormal, so we round before scaling to
- * avoid double rounding.
+ * There is no need to worry about double rounding in directed
+ * rounding modes.
*/
- p = ldexpl(copysignl(0x1p-16382L, r), -spread);
- c = r + p;
- s = c - r;
- cc = (r - (c - s)) + (p - s) + rr;
fesetround(oround);
- r = (c + cc) - p;
+ adj = r.lo + xy.lo;
+ return (ldexpl(r.hi + adj, spread));
}
- return (ldexpl(r, spread));
+
+ adj = add_adjusted(r.lo, xy.lo);
+ if (spread + ilogbl(r.hi) > -16383)
+ return (ldexpl(r.hi + adj, spread));
+ else
+ return (add_and_denormalize(r.hi, adj, spread));
}
OpenPOWER on IntegriCloud