diff options
author | sjg <sjg@FreeBSD.org> | 2013-09-05 20:18:59 +0000 |
---|---|---|
committer | sjg <sjg@FreeBSD.org> | 2013-09-05 20:18:59 +0000 |
commit | 62bb1062226d3ce6a2350808256a25508978352d (patch) | |
tree | 22b131dceb13c3df96da594fbaadb693504797c7 /lib/msun | |
parent | 72ab90509b3a51ab361bf710338f2ef44a4e360d (diff) | |
parent | 04932445481c2cb89ff69a83b961bdef3d64757e (diff) | |
download | FreeBSD-src-62bb1062226d3ce6a2350808256a25508978352d.zip FreeBSD-src-62bb1062226d3ce6a2350808256a25508978352d.tar.gz |
Merge from head
Diffstat (limited to 'lib/msun')
48 files changed, 4227 insertions, 652 deletions
diff --git a/lib/msun/Makefile b/lib/msun/Makefile index a50dded..8b64321 100644 --- a/lib/msun/Makefile +++ b/lib/msun/Makefile @@ -21,6 +21,10 @@ ARCH_SUBDIR= ${MACHINE_CPUARCH} .include "${ARCH_SUBDIR}/Makefile.inc" .PATH: ${.CURDIR}/${ARCH_SUBDIR} +.if ${MACHINE_CPUARCH} == "i386" || ${MACHINE_CPUARCH} == "amd64" +.PATH: ${.CURDIR}/x86 +CFLAGS+= -I${.CURDIR}/x86 +.endif # long double format .if ${LDBL_PREC} == 64 @@ -92,18 +96,19 @@ SYMBOL_MAPS= ${SYM_MAPS} COMMON_SRCS+= s_copysignl.c s_fabsl.c s_llrintl.c s_lrintl.c s_modfl.c .if ${LDBL_PREC} != 53 # If long double != double use these; otherwise, we alias the double versions. -COMMON_SRCS+= e_acosl.c e_asinl.c e_atan2l.c e_fmodl.c \ - e_hypotl.c e_remainderl.c e_sqrtl.c \ +COMMON_SRCS+= e_acoshl.c e_acosl.c e_asinl.c e_atan2l.c e_atanhl.c \ + e_fmodl.c e_hypotl.c e_remainderl.c e_sqrtl.c \ invtrig.c k_cosl.c k_sinl.c k_tanl.c \ - s_atanl.c s_cbrtl.c s_ceill.c s_cosl.c s_cprojl.c \ + s_asinhl.c s_atanl.c s_cbrtl.c s_ceill.c s_cosl.c s_cprojl.c \ s_csqrtl.c s_exp2l.c s_expl.c s_floorl.c s_fmal.c \ - s_frexpl.c s_logbl.c s_nanl.c s_nextafterl.c s_nexttoward.c \ - s_remquol.c s_rintl.c s_scalbnl.c \ + s_frexpl.c s_logbl.c s_logl.c s_nanl.c s_nextafterl.c \ + s_nexttoward.c s_remquol.c s_rintl.c s_scalbnl.c \ s_sinl.c s_tanl.c s_truncl.c w_cabsl.c .endif # C99 complex functions -COMMON_SRCS+= s_ccosh.c s_ccoshf.c s_cexp.c s_cexpf.c \ +COMMON_SRCS+= catrig.c catrigf.c \ + s_ccosh.c s_ccoshf.c s_cexp.c s_cexpf.c \ s_cimag.c s_cimagf.c s_cimagl.c \ s_conj.c s_conjf.c s_conjl.c \ s_cproj.c s_cprojf.c s_creal.c s_crealf.c s_creall.c \ @@ -119,18 +124,12 @@ COMMON_SRCS:= ${COMMON_SRCS:N${i:R}.c} .endfor .endif -# Some files need certain gcc built-in functions to be disabled, since gcc's -# model of the functions bogusly assumes -fno-trapping-math. -XRINT_CFLAGS= -fno-builtin-rint -fno-builtin-rintf -fno-builtin-rintl -CFLAGS+= ${XRINT_CFLAGS} -XRINT_CFLAGS:= ${.IMPSRC:M*/s_nearbyint.c:C/^.+$/${XRINT_CFLAGS}/:C/^$//} - SRCS= ${COMMON_SRCS} ${ARCH_SRCS} INCS+= fenv.h math.h MAN= acos.3 acosh.3 asin.3 asinh.3 atan.3 atan2.3 atanh.3 \ - ceil.3 ccos.3 ccosh.3 cexp.3 \ + ceil.3 cacos.3 ccos.3 ccosh.3 cexp.3 \ cimag.3 copysign.3 cos.3 cosh.3 csqrt.3 erf.3 exp.3 fabs.3 fdim.3 \ feclearexcept.3 feenableexcept.3 fegetenv.3 \ fegetround.3 fenv.3 floor.3 \ @@ -141,13 +140,16 @@ MAN= acos.3 acosh.3 asin.3 asinh.3 atan.3 atan2.3 atanh.3 \ complex.3 MLINKS+=acos.3 acosf.3 acos.3 acosl.3 -MLINKS+=acosh.3 acoshf.3 +MLINKS+=acosh.3 acoshf.3 acosh.3 acoshl.3 MLINKS+=asin.3 asinf.3 asin.3 asinl.3 -MLINKS+=asinh.3 asinhf.3 +MLINKS+=asinh.3 asinhf.3 asinh.3 asinhl.3 MLINKS+=atan.3 atanf.3 atan.3 atanl.3 -MLINKS+=atanh.3 atanhf.3 +MLINKS+=atanh.3 atanhf.3 atanh.3 atanhl.3 MLINKS+=atan2.3 atan2f.3 atan2.3 atan2l.3 \ atan2.3 carg.3 atan2.3 cargf.3 atan2.3 cargl.3 +MLINKS+=cacos.3 cacosf.3 cacos.3 cacosh.3 cacos.3 cacoshf.3 \ + cacos.3 casin.3 cacos.3 casinf.3 cacos.3 casinh.3 cacos.3 casinhf.3 \ + cacos.3 catan.3 cacos.3 catanf.3 cacos.3 catanh.3 cacos.3 catanhf.3 MLINKS+=ccos.3 ccosf.3 ccos.3 csin.3 ccos.3 csinf.3 ccos.3 ctan.3 ccos.3 ctanf.3 MLINKS+=ccosh.3 ccoshf.3 ccosh.3 csinh.3 ccosh.3 csinhf.3 \ ccosh.3 ctanh.3 ccosh.3 ctanhf.3 @@ -162,8 +164,8 @@ MLINKS+=cos.3 cosf.3 cos.3 cosl.3 MLINKS+=cosh.3 coshf.3 MLINKS+=csqrt.3 csqrtf.3 csqrt.3 csqrtl.3 MLINKS+=erf.3 erfc.3 erf.3 erff.3 erf.3 erfcf.3 -MLINKS+=exp.3 expm1.3 exp.3 expm1f.3 exp.3 pow.3 exp.3 powf.3 \ - exp.3 exp2.3 exp.3 exp2f.3 exp.3 exp2l.3 exp.3 expf.3 +MLINKS+=exp.3 expm1.3 exp.3 expm1f.3 exp.3 expm1l.3 exp.3 pow.3 exp.3 powf.3 \ + exp.3 exp2.3 exp.3 exp2f.3 exp.3 exp2l.3 exp.3 expf.3 exp.3 expl.3 MLINKS+=fabs.3 fabsf.3 fabs.3 fabsl.3 MLINKS+=fdim.3 fdimf.3 fdim.3 fdiml.3 MLINKS+=feclearexcept.3 fegetexceptflag.3 feclearexcept.3 feraiseexcept.3 \ @@ -187,7 +189,10 @@ MLINKS+=j0.3 j1.3 j0.3 jn.3 j0.3 y0.3 j0.3 y1.3 j0.3 y1f.3 j0.3 yn.3 MLINKS+=j0.3 j0f.3 j0.3 j1f.3 j0.3 jnf.3 j0.3 y0f.3 j0.3 ynf.3 MLINKS+=lgamma.3 gamma.3 lgamma.3 gammaf.3 lgamma.3 lgammaf.3 \ lgamma.3 tgamma.3 lgamma.3 tgammaf.3 -MLINKS+=log.3 log10.3 log.3 log10f.3 log.3 log1p.3 log.3 log1pf.3 log.3 logf.3 log.3 log2.3 log.3 log2f.3 +MLINKS+=log.3 log10.3 log.3 log10f.3 log.3 log10l.3 \ + log.3 log1p.3 log.3 log1pf.3 log.3 log1pl.3 \ + log.3 logf.3 log.3 logl.3 \ + log.3 log2.3 log.3 log2f.3 log.3 log2l.3 MLINKS+=lrint.3 llrint.3 lrint.3 llrintf.3 lrint.3 llrintl.3 \ lrint.3 lrintf.3 lrint.3 lrintl.3 MLINKS+=lround.3 llround.3 lround.3 llroundf.3 lround.3 llroundl.3 \ diff --git a/lib/msun/Symbol.map b/lib/msun/Symbol.map index 76f1bfb..f244af4 100644 --- a/lib/msun/Symbol.map +++ b/lib/msun/Symbol.map @@ -237,6 +237,21 @@ FBSD_1.3 { fegetround; fesetround; fesetenv; + acoshl; + asinhl; + atanhl; + cacos; + cacosf; + cacosh; + cacoshf; + casin; + casinf; + casinh; + casinhf; + catan; + catanf; + catanh; + catanhf; csin; csinf; csinh; @@ -250,4 +265,9 @@ FBSD_1.3 { ctanh; ctanhf; expl; + expm1l; + log10l; + log1pl; + log2l; + logl; }; diff --git a/lib/msun/amd64/fenv.h b/lib/msun/amd64/fenv.h deleted file mode 100644 index b7c9873..0000000 --- a/lib/msun/amd64/fenv.h +++ /dev/null @@ -1,222 +0,0 @@ -/*- - * Copyright (c) 2004-2005 David Schultz <das@FreeBSD.ORG> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#ifndef _FENV_H_ -#define _FENV_H_ - -#include <sys/cdefs.h> -#include <sys/_types.h> - -#ifndef __fenv_static -#define __fenv_static static -#endif - -typedef struct { - struct { - __uint32_t __control; - __uint32_t __status; - __uint32_t __tag; - char __other[16]; - } __x87; - __uint32_t __mxcsr; -} fenv_t; - -typedef __uint16_t fexcept_t; - -/* Exception flags */ -#define FE_INVALID 0x01 -#define FE_DENORMAL 0x02 -#define FE_DIVBYZERO 0x04 -#define FE_OVERFLOW 0x08 -#define FE_UNDERFLOW 0x10 -#define FE_INEXACT 0x20 -#define FE_ALL_EXCEPT (FE_DIVBYZERO | FE_DENORMAL | FE_INEXACT | \ - FE_INVALID | FE_OVERFLOW | FE_UNDERFLOW) - -/* Rounding modes */ -#define FE_TONEAREST 0x0000 -#define FE_DOWNWARD 0x0400 -#define FE_UPWARD 0x0800 -#define FE_TOWARDZERO 0x0c00 -#define _ROUND_MASK (FE_TONEAREST | FE_DOWNWARD | \ - FE_UPWARD | FE_TOWARDZERO) - -/* - * As compared to the x87 control word, the SSE unit's control word - * has the rounding control bits offset by 3 and the exception mask - * bits offset by 7. - */ -#define _SSE_ROUND_SHIFT 3 -#define _SSE_EMASK_SHIFT 7 - -__BEGIN_DECLS - -/* Default floating-point environment */ -extern const fenv_t __fe_dfl_env; -#define FE_DFL_ENV (&__fe_dfl_env) - -#define __fldcw(__cw) __asm __volatile("fldcw %0" : : "m" (__cw)) -#define __fldenv(__env) __asm __volatile("fldenv %0" : : "m" (__env)) -#define __fldenvx(__env) __asm __volatile("fldenv %0" : : "m" (__env) \ - : "st", "st(1)", "st(2)", "st(3)", "st(4)", \ - "st(5)", "st(6)", "st(7)") -#define __fnclex() __asm __volatile("fnclex") -#define __fnstenv(__env) __asm __volatile("fnstenv %0" : "=m" (*(__env))) -#define __fnstcw(__cw) __asm __volatile("fnstcw %0" : "=m" (*(__cw))) -#define __fnstsw(__sw) __asm __volatile("fnstsw %0" : "=am" (*(__sw))) -#define __fwait() __asm __volatile("fwait") -#define __ldmxcsr(__csr) __asm __volatile("ldmxcsr %0" : : "m" (__csr)) -#define __stmxcsr(__csr) __asm __volatile("stmxcsr %0" : "=m" (*(__csr))) - -__fenv_static inline int -feclearexcept(int __excepts) -{ - fenv_t __env; - - if (__excepts == FE_ALL_EXCEPT) { - __fnclex(); - } else { - __fnstenv(&__env.__x87); - __env.__x87.__status &= ~__excepts; - __fldenv(__env.__x87); - } - __stmxcsr(&__env.__mxcsr); - __env.__mxcsr &= ~__excepts; - __ldmxcsr(__env.__mxcsr); - return (0); -} - -__fenv_static inline int -fegetexceptflag(fexcept_t *__flagp, int __excepts) -{ - __uint32_t __mxcsr; - __uint16_t __status; - - __stmxcsr(&__mxcsr); - __fnstsw(&__status); - *__flagp = (__mxcsr | __status) & __excepts; - return (0); -} - -int fesetexceptflag(const fexcept_t *__flagp, int __excepts); -int feraiseexcept(int __excepts); - -__fenv_static inline int -fetestexcept(int __excepts) -{ - __uint32_t __mxcsr; - __uint16_t __status; - - __stmxcsr(&__mxcsr); - __fnstsw(&__status); - return ((__status | __mxcsr) & __excepts); -} - -__fenv_static inline int -fegetround(void) -{ - __uint16_t __control; - - /* - * We assume that the x87 and the SSE unit agree on the - * rounding mode. Reading the control word on the x87 turns - * out to be about 5 times faster than reading it on the SSE - * unit on an Opteron 244. - */ - __fnstcw(&__control); - return (__control & _ROUND_MASK); -} - -__fenv_static inline int -fesetround(int __round) -{ - __uint32_t __mxcsr; - __uint16_t __control; - - if (__round & ~_ROUND_MASK) - return (-1); - - __fnstcw(&__control); - __control &= ~_ROUND_MASK; - __control |= __round; - __fldcw(__control); - - __stmxcsr(&__mxcsr); - __mxcsr &= ~(_ROUND_MASK << _SSE_ROUND_SHIFT); - __mxcsr |= __round << _SSE_ROUND_SHIFT; - __ldmxcsr(__mxcsr); - - return (0); -} - -int fegetenv(fenv_t *__envp); -int feholdexcept(fenv_t *__envp); - -__fenv_static inline int -fesetenv(const fenv_t *__envp) -{ - - /* - * XXX Using fldenvx() instead of fldenv() tells the compiler that this - * instruction clobbers the i387 register stack. This happens because - * we restore the tag word from the saved environment. Normally, this - * would happen anyway and we wouldn't care, because the ABI allows - * function calls to clobber the i387 regs. However, fesetenv() is - * inlined, so we need to be more careful. - */ - __fldenvx(__envp->__x87); - __ldmxcsr(__envp->__mxcsr); - return (0); -} - -int feupdateenv(const fenv_t *__envp); - -#if __BSD_VISIBLE - -int feenableexcept(int __mask); -int fedisableexcept(int __mask); - -/* We currently provide no external definition of fegetexcept(). */ -static inline int -fegetexcept(void) -{ - __uint16_t __control; - - /* - * We assume that the masks for the x87 and the SSE unit are - * the same. - */ - __fnstcw(&__control); - return (~__control & FE_ALL_EXCEPT); -} - -#endif /* __BSD_VISIBLE */ - -__END_DECLS - -#endif /* !_FENV_H_ */ diff --git a/lib/msun/ld128/s_exp2l.c b/lib/msun/ld128/s_exp2l.c index 31178e4..5afa37e 100644 --- a/lib/msun/ld128/s_exp2l.c +++ b/lib/msun/ld128/s_exp2l.c @@ -39,14 +39,11 @@ __FBSDID("$FreeBSD$"); #define BIAS (LDBL_MAX_EXP - 1) #define EXPMASK (BIAS + LDBL_MAX_EXP) -#if 0 /* XXX Prevent gcc from erroneously constant folding this. */ -static const long double twom10000 = 0x1p-10000L; -#else -static volatile long double twom10000 = 0x1p-10000L; -#endif +static volatile long double + huge = 0x1p10000L, + twom10000 = 0x1p-10000L; static const long double - huge = 0x1p10000L, P1 = 0x1.62e42fefa39ef35793c7673007e6p-1L, P2 = 0x1.ebfbdff82c58ea86f16b06ec9736p-3L, P3 = 0x1.c6b08d704a0bf8b33a762bad3459p-5L, diff --git a/lib/msun/ld128/s_expl.c b/lib/msun/ld128/s_expl.c index 5052c3a..176c932 100644 --- a/lib/msun/ld128/s_expl.c +++ b/lib/msun/ld128/s_expl.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2012 Steven G. Kargl + * Copyright (c) 2009-2013 Steven G. Kargl * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -22,6 +22,8 @@ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Optimized by Bruce D. Evans. */ #include <sys/cdefs.h> @@ -38,35 +40,61 @@ __FBSDID("$FreeBSD$"); #include "math_private.h" #define INTERVALS 128 +#define LOG2_INTERVALS 7 #define BIAS (LDBL_MAX_EXP - 1) +static const long double +huge = 0x1p10000L, +twom10000 = 0x1p-10000L; +/* XXX Prevent gcc from erroneously constant folding this: */ static volatile const long double tiny = 0x1p-10000L; static const long double -INV_L = 1.84664965233787316142070359168242182e+02L, -L1 = 5.41521234812457272982212595914567508e-03L, -L2 = -1.02536706388947310094527932552595546e-29L, -huge = 0x1p10000L, +/* log(2**16384 - 0.5) rounded towards zero: */ +/* log(2**16384 - 0.5 + 1) rounded towards zero for expm1l() is the same: */ o_threshold = 11356.523406294143949491931077970763428L, -twom10000 = 0x1p-10000L, +/* log(2**(-16381-64-1)) rounded towards zero: */ u_threshold = -11433.462743336297878837243843452621503L; +static const double +/* + * ln2/INTERVALS = L1+L2 (hi+lo decomposition for multiplication). L1 must + * have at least 22 (= log2(|LDBL_MIN_EXP-extras|) + log2(INTERVALS)) lowest + * bits zero so that multiplication of it by n is exact. + */ +INV_L = 1.8466496523378731e+2, /* 0x171547652b82fe.0p-45 */ +L2 = -1.0253670638894731e-29; /* -0x1.9ff0342542fc3p-97 */ static const long double -P2 = 5.00000000000000000000000000000000000e-1L, -P3 = 1.66666666666666666666666666666666972e-1L, -P4 = 4.16666666666666666666666666653708268e-2L, -P5 = 8.33333333333333333333333315069867254e-3L, -P6 = 1.38888888888888888888996596213795377e-3L, -P7 = 1.98412698412698412718821436278644414e-4L, -P8 = 2.48015873015869681884882576649543128e-5L, -P9 = 2.75573192240103867817876199544468806e-6L, -P10 = 2.75573236172670046201884000197885520e-7L, -P11 = 2.50517544183909126492878226167697856e-8L; +/* 0x1.62e42fefa39ef35793c768000000p-8 */ +L1 = 5.41521234812457272982212595914567508e-3L; + +static const long double +/* + * Domain [-0.002708, 0.002708], range ~[-2.4021e-38, 2.4234e-38]: + * |exp(x) - p(x)| < 2**-124.9 + * (0.002708 is ln2/(2*INTERVALS) rounded up a little). + */ +A2 = 0.5, +A3 = 1.66666666666666666666666666651085500e-1L, +A4 = 4.16666666666666666666666666425885320e-2L, +A5 = 8.33333333333333333334522877160175842e-3L, +A6 = 1.38888888888888888889971139751596836e-3L; + +static const double +A7 = 1.9841269841269471e-4, +A8 = 2.4801587301585284e-5, +A9 = 2.7557324277411234e-6, +A10 = 2.7557333722375072e-7; static const struct { + /* + * hi must be rounded to at most 106 bits so that multiplication + * by r1 in expm1l() is exact, but it is rounded to 88 bits due to + * historical accidents. + */ long double hi; long double lo; -} s[INTERVALS] = { +} tbl[INTERVALS] = { 0x1p0L, 0x0p0L, 0x1.0163da9fb33356d84a66aep0L, 0x3.36dcdfa4003ec04c360be2404078p-92L, 0x1.02c9a3e778060ee6f7cacap0L, 0x4.f7a29bde93d70a2cabc5cb89ba10p-92L, @@ -201,9 +229,10 @@ long double expl(long double x) { union IEEEl2bits u, v; - long double fn, r, r1, r2, q, t, twopk, twopkp10000; + long double q, r, r1, t, twopk, twopkp10000; + double dr, fn, r2; int k, n, n2; - uint32_t hx, ix; + uint16_t hx, ix; /* Filter out exceptional cases. */ u.e = x; @@ -211,31 +240,39 @@ expl(long double x) ix = hx & 0x7fff; if (ix >= BIAS + 13) { /* |x| >= 8192 or x is NaN */ if (ix == BIAS + LDBL_MAX_EXP) { - if (hx & 0x8000 && u.xbits.manh == 0 && - u.xbits.manl == 0) - return (0.0L); /* x is -Inf */ - return (x + x); /* x is +Inf or NaN */ + if (hx & 0x8000) /* x is -Inf or -NaN */ + return (-1 / x); + return (x + x); /* x is +Inf or +NaN */ } if (x > o_threshold) return (huge * huge); if (x < u_threshold) return (tiny * tiny); - } else if (ix < BIAS - 115) { /* |x| < 0x1p-115 */ - if (huge + x > 1.0L) /* trigger inexact iff x != 0 */ - return (1.0L + x); + } else if (ix < BIAS - 114) { /* |x| < 0x1p-114 */ + return (1 + x); /* 1 with inexact iff x != 0 */ } - /* Reduce x to (k*ln2 + midpoint[n2] + r1 + r2). */ - fn = x * INV_L + 0x1.8p112 - 0x1.8p112; - n = (int)fn; + ENTERI(); + + /* Reduce x to (k*ln2 + endpoint[n2] + r1 + r2). */ + /* Use a specialized rint() to get fn. Assume round-to-nearest. */ + /* XXX assume no extra precision for the additions, as for trig fns. */ + /* XXX this set of comments is now quadruplicated. */ + fn = (double)x * INV_L + 0x1.8p52 - 0x1.8p52; +#if defined(HAVE_EFFICIENT_IRINT) + n = irint(fn); +#else + n = (int)fn; +#endif n2 = (unsigned)n % INTERVALS; - k = (n - n2) / INTERVALS; + k = n >> LOG2_INTERVALS; r1 = x - fn * L1; - r2 = -fn * L2; + r2 = fn * -L2; + r = r1 + r2; /* Prepare scale factors. */ - v.xbits.manh = 0; - v.xbits.manl = 0; + /* XXX sparc64 multiplication is so slow that scalbnl() is faster. */ + v.e = 1; if (k >= LDBL_MIN_EXP) { v.xbits.expsign = BIAS + k; twopk = v.e; @@ -244,18 +281,214 @@ expl(long double x) twopkp10000 = v.e; } - r = r1 + r2; - q = r * r * (P2 + r * (P3 + r * (P4 + r * (P5 + r * (P6 + r * (P7 + - r * (P8 + r * (P9 + r * (P10 + r * P11))))))))); - t = s[n2].lo + s[n2].hi; - t = s[n2].hi + (s[n2].lo + t * (r2 + q + r1)); + /* Evaluate expl(endpoint[n2] + r1 + r2) = tbl[n2] * expl(r1 + r2). */ + dr = r; + q = r2 + r * r * (A2 + r * (A3 + r * (A4 + r * (A5 + r * (A6 + + dr * (A7 + dr * (A8 + dr * (A9 + dr * A10)))))))); + t = tbl[n2].lo + tbl[n2].hi; + t = tbl[n2].lo + t * (q + r1) + tbl[n2].hi; /* Scale by 2**k. */ if (k >= LDBL_MIN_EXP) { if (k == LDBL_MAX_EXP) - return (t * 2.0L * 0x1p16383L); - return (t * twopk); + RETURNI(t * 2 * 0x1p16383L); + RETURNI(t * twopk); } else { - return (t * twopkp10000 * twom10000); + RETURNI(t * twopkp10000 * twom10000); } } + +/* + * Our T1 and T2 are chosen to be approximately the points where method + * A and method B have the same accuracy. Tang's T1 and T2 are the + * points where method A's accuracy changes by a full bit. For Tang, + * this drop in accuracy makes method A immediately less accurate than + * method B, but our larger INTERVALS makes method A 2 bits more + * accurate so it remains the most accurate method significantly + * closer to the origin despite losing the full bit in our extended + * range for it. + * + * Split the interval [T1, T2] into two intervals [T1, T3] and [T3, T2]. + * Setting T3 to 0 would require the |x| < 0x1p-113 condition to appear + * in both subintervals, so set T3 = 2**-5, which places the condition + * into the [T1, T3] interval. + */ +static const double +T1 = -0.1659, /* ~-30.625/128 * log(2) */ +T2 = 0.1659, /* ~30.625/128 * log(2) */ +T3 = 0.03125; + +/* + * Domain [-0.1659, 0.03125], range ~[2.9134e-44, 1.8404e-37]: + * |(exp(x)-1-x-x**2/2)/x - p(x)| < 2**-122.03 + */ +static const long double +C3 = 1.66666666666666666666666666666666667e-1L, +C4 = 4.16666666666666666666666666666666645e-2L, +C5 = 8.33333333333333333333333333333371638e-3L, +C6 = 1.38888888888888888888888888891188658e-3L, +C7 = 1.98412698412698412698412697235950394e-4L, +C8 = 2.48015873015873015873015112487849040e-5L, +C9 = 2.75573192239858906525606685484412005e-6L, +C10 = 2.75573192239858906612966093057020362e-7L, +C11 = 2.50521083854417203619031960151253944e-8L, +C12 = 2.08767569878679576457272282566520649e-9L, +C13 = 1.60590438367252471783548748824255707e-10L; + +static const double +C14 = 1.1470745580491932e-11, /* 0x1.93974a81dae30p-37 */ +C15 = 7.6471620181090468e-13, /* 0x1.ae7f3820adab1p-41 */ +C16 = 4.7793721460260450e-14, /* 0x1.ae7cd18a18eacp-45 */ +C17 = 2.8074757356658877e-15, /* 0x1.949992a1937d9p-49 */ +C18 = 1.4760610323699476e-16; /* 0x1.545b43aabfbcdp-53 */ + +/* + * Domain [0.03125, 0.1659], range ~[-2.7676e-37, -1.0367e-38]: + * |(exp(x)-1-x-x**2/2)/x - p(x)| < 2**-121.44 + */ +static const long double +D3 = 1.66666666666666666666666666666682245e-1L, +D4 = 4.16666666666666666666666666634228324e-2L, +D5 = 8.33333333333333333333333364022244481e-3L, +D6 = 1.38888888888888888888887138722762072e-3L, +D7 = 1.98412698412698412699085805424661471e-4L, +D8 = 2.48015873015873015687993712101479612e-5L, +D9 = 2.75573192239858944101036288338208042e-6L, +D10 = 2.75573192239853161148064676533754048e-7L, +D11 = 2.50521083855084570046480450935267433e-8L, +D12 = 2.08767569819738524488686318024854942e-9L, +D13 = 1.60590442297008495301927448122499313e-10L; + +static const double +D14 = 1.1470726176204336e-11, /* 0x1.93971dc395d9ep-37 */ +D15 = 7.6478532249581686e-13, /* 0x1.ae892e3D16fcep-41 */ +D16 = 4.7628892832607741e-14, /* 0x1.ad00Dfe41feccp-45 */ +D17 = 3.0524857220358650e-15; /* 0x1.D7e8d886Df921p-49 */ + +long double +expm1l(long double x) +{ + union IEEEl2bits u, v; + long double hx2_hi, hx2_lo, q, r, r1, t, twomk, twopk, x_hi; + long double x_lo, x2; + double dr, dx, fn, r2; + int k, n, n2; + uint16_t hx, ix; + + /* Filter out exceptional cases. */ + u.e = x; + hx = u.xbits.expsign; + ix = hx & 0x7fff; + if (ix >= BIAS + 7) { /* |x| >= 128 or x is NaN */ + if (ix == BIAS + LDBL_MAX_EXP) { + if (hx & 0x8000) /* x is -Inf or -NaN */ + return (-1 / x - 1); + return (x + x); /* x is +Inf or +NaN */ + } + if (x > o_threshold) + return (huge * huge); + /* + * expm1l() never underflows, but it must avoid + * unrepresentable large negative exponents. We used a + * much smaller threshold for large |x| above than in + * expl() so as to handle not so large negative exponents + * in the same way as large ones here. + */ + if (hx & 0x8000) /* x <= -128 */ + return (tiny - 1); /* good for x < -114ln2 - eps */ + } + + ENTERI(); + + if (T1 < x && x < T2) { + x2 = x * x; + dx = x; + + if (x < T3) { + if (ix < BIAS - 113) { /* |x| < 0x1p-113 */ + /* x (rounded) with inexact if x != 0: */ + RETURNI(x == 0 ? x : + (0x1p200 * x + fabsl(x)) * 0x1p-200); + } + q = x * x2 * C3 + x2 * x2 * (C4 + x * (C5 + x * (C6 + + x * (C7 + x * (C8 + x * (C9 + x * (C10 + + x * (C11 + x * (C12 + x * (C13 + + dx * (C14 + dx * (C15 + dx * (C16 + + dx * (C17 + dx * C18)))))))))))))); + } else { + q = x * x2 * D3 + x2 * x2 * (D4 + x * (D5 + x * (D6 + + x * (D7 + x * (D8 + x * (D9 + x * (D10 + + x * (D11 + x * (D12 + x * (D13 + + dx * (D14 + dx * (D15 + dx * (D16 + + dx * D17))))))))))))); + } + + x_hi = (float)x; + x_lo = x - x_hi; + hx2_hi = x_hi * x_hi / 2; + hx2_lo = x_lo * (x + x_hi) / 2; + if (ix >= BIAS - 7) + RETURNI(hx2_lo + x_lo + q + (hx2_hi + x_hi)); + else + RETURNI(hx2_lo + q + hx2_hi + x); + } + + /* Reduce x to (k*ln2 + endpoint[n2] + r1 + r2). */ + /* Use a specialized rint() to get fn. Assume round-to-nearest. */ + fn = (double)x * INV_L + 0x1.8p52 - 0x1.8p52; +#if defined(HAVE_EFFICIENT_IRINT) + n = irint(fn); +#else + n = (int)fn; +#endif + n2 = (unsigned)n % INTERVALS; + k = n >> LOG2_INTERVALS; + r1 = x - fn * L1; + r2 = fn * -L2; + r = r1 + r2; + + /* Prepare scale factor. */ + v.e = 1; + v.xbits.expsign = BIAS + k; + twopk = v.e; + + /* + * Evaluate lower terms of + * expl(endpoint[n2] + r1 + r2) = tbl[n2] * expl(r1 + r2). + */ + dr = r; + q = r2 + r * r * (A2 + r * (A3 + r * (A4 + r * (A5 + r * (A6 + + dr * (A7 + dr * (A8 + dr * (A9 + dr * A10)))))))); + + t = tbl[n2].lo + tbl[n2].hi; + + if (k == 0) { + t = tbl[n2].lo * (r1 + 1) + t * q + tbl[n2].hi * r1 + + (tbl[n2].hi - 1); + RETURNI(t); + } + if (k == -1) { + t = tbl[n2].lo * (r1 + 1) + t * q + tbl[n2].hi * r1 + + (tbl[n2].hi - 2); + RETURNI(t / 2); + } + if (k < -7) { + t = tbl[n2].lo + t * (q + r1) + tbl[n2].hi; + RETURNI(t * twopk - 1); + } + if (k > 2 * LDBL_MANT_DIG - 1) { + t = tbl[n2].lo + t * (q + r1) + tbl[n2].hi; + if (k == LDBL_MAX_EXP) + RETURNI(t * 2 * 0x1p16383L - 1); + RETURNI(t * twopk - 1); + } + + v.xbits.expsign = BIAS - k; + twomk = v.e; + + if (k > LDBL_MANT_DIG - 1) + t = tbl[n2].lo - twomk + t * (q + r1) + tbl[n2].hi; + else + t = tbl[n2].lo + t * (q + r1) + (tbl[n2].hi - twomk); + RETURNI(t * twopk); +} diff --git a/lib/msun/ld128/s_logl.c b/lib/msun/ld128/s_logl.c new file mode 100644 index 0000000..391d623 --- /dev/null +++ b/lib/msun/ld128/s_logl.c @@ -0,0 +1,737 @@ +/*- + * Copyright (c) 2007-2013 Bruce D. Evans + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +/** + * Implementation of the natural logarithm of x for 128-bit format. + * + * First decompose x into its base 2 representation: + * + * log(x) = log(X * 2**k), where X is in [1, 2) + * = log(X) + k * log(2). + * + * Let X = X_i + e, where X_i is the center of one of the intervals + * [-1.0/256, 1.0/256), [1.0/256, 3.0/256), .... [2.0-1.0/256, 2.0+1.0/256) + * and X is in this interval. Then + * + * log(X) = log(X_i + e) + * = log(X_i * (1 + e / X_i)) + * = log(X_i) + log(1 + e / X_i). + * + * The values log(X_i) are tabulated below. Let d = e / X_i and use + * + * log(1 + d) = p(d) + * + * where p(d) = d - 0.5*d*d + ... is a special minimax polynomial of + * suitably high degree. + * + * To get sufficiently small roundoff errors, k * log(2), log(X_i), and + * sometimes (if |k| is not large) the first term in p(d) must be evaluated + * and added up in extra precision. Extra precision is not needed for the + * rest of p(d). In the worst case when k = 0 and log(X_i) is 0, the final + * error is controlled mainly by the error in the second term in p(d). The + * error in this term itself is at most 0.5 ulps from the d*d operation in + * it. The error in this term relative to the first term is thus at most + * 0.5 * |-0.5| * |d| < 1.0/1024 ulps. We aim for an accumulated error of + * at most twice this at the point of the final rounding step. Thus the + * final error should be at most 0.5 + 1.0/512 = 0.5020 ulps. Exhaustive + * testing of a float variant of this function showed a maximum final error + * of 0.5008 ulps. Non-exhaustive testing of a double variant of this + * function showed a maximum final error of 0.5078 ulps (near 1+1.0/256). + * + * We made the maximum of |d| (and thus the total relative error and the + * degree of p(d)) small by using a large number of intervals. Using + * centers of intervals instead of endpoints reduces this maximum by a + * factor of 2 for a given number of intervals. p(d) is special only + * in beginning with the Taylor coefficients 0 + 1*d, which tends to happen + * naturally. The most accurate minimax polynomial of a given degree might + * be different, but then we wouldn't want it since we would have to do + * extra work to avoid roundoff error (especially for P0*d instead of d). + */ + +#ifdef DEBUG +#include <assert.h> +#include <fenv.h> +#endif + +#include "fpmath.h" +#include "math.h" +#ifndef NO_STRUCT_RETURN +#define STRUCT_RETURN +#endif +#include "math_private.h" + +#if !defined(NO_UTAB) && !defined(NO_UTABL) +#define USE_UTAB +#endif + +/* + * Domain [-0.005280, 0.004838], range ~[-1.1577e-37, 1.1582e-37]: + * |log(1 + d)/d - p(d)| < 2**-122.7 + */ +static const long double +P2 = -0.5L, +P3 = 3.33333333333333333333333333333233795e-1L, /* 0x15555555555555555555555554d42.0p-114L */ +P4 = -2.49999999999999999999999999941139296e-1L, /* -0x1ffffffffffffffffffffffdab14e.0p-115L */ +P5 = 2.00000000000000000000000085468039943e-1L, /* 0x19999999999999999999a6d3567f4.0p-115L */ +P6 = -1.66666666666666666666696142372698408e-1L, /* -0x15555555555555555567267a58e13.0p-115L */ +P7 = 1.42857142857142857119522943477166120e-1L, /* 0x1249249249249248ed79a0ae434de.0p-115L */ +P8 = -1.24999999999999994863289015033581301e-1L; /* -0x1fffffffffffffa13e91765e46140.0p-116L */ +/* Double precision gives ~ 53 + log2(P9 * max(|d|)**8) ~= 120 bits. */ +static const double +P9 = 1.1111111111111401e-1, /* 0x1c71c71c71c7ed.0p-56 */ +P10 = -1.0000000000040135e-1, /* -0x199999999a0a92.0p-56 */ +P11 = 9.0909090728136258e-2, /* 0x1745d173962111.0p-56 */ +P12 = -8.3333318851855284e-2, /* -0x1555551722c7a3.0p-56 */ +P13 = 7.6928634666404178e-2, /* 0x13b1985204a4ae.0p-56 */ +P14 = -7.1626810078462499e-2; /* -0x12562276cdc5d0.0p-56 */ + +static volatile const double zero = 0; + +#define INTERVALS 128 +#define LOG2_INTERVALS 7 +#define TSIZE (INTERVALS + 1) +#define G(i) (T[(i)].G) +#define F_hi(i) (T[(i)].F_hi) +#define F_lo(i) (T[(i)].F_lo) +#define ln2_hi F_hi(TSIZE - 1) +#define ln2_lo F_lo(TSIZE - 1) +#define E(i) (U[(i)].E) +#define H(i) (U[(i)].H) + +static const struct { + float G; /* 1/(1 + i/128) rounded to 8/9 bits */ + float F_hi; /* log(1 / G_i) rounded (see below) */ + /* The compiler will insert 8 bytes of padding here. */ + long double F_lo; /* next 113 bits for log(1 / G_i) */ +} T[TSIZE] = { + /* + * ln2_hi and each F_hi(i) are rounded to a number of bits that + * makes F_hi(i) + dk*ln2_hi exact for all i and all dk. + * + * The last entry (for X just below 2) is used to define ln2_hi + * and ln2_lo, to ensure that F_hi(i) and F_lo(i) cancel exactly + * with dk*ln2_hi and dk*ln2_lo, respectively, when dk = -1. + * This is needed for accuracy when x is just below 1. (To avoid + * special cases, such x are "reduced" strangely to X just below + * 2 and dk = -1, and then the exact cancellation is needed + * because any the error from any non-exactness would be too + * large). + * + * The relevant range of dk is [-16445, 16383]. The maximum number + * of bits in F_hi(i) that works is very dependent on i but has + * a minimum of 93. We only need about 12 bits in F_hi(i) for + * it to provide enough extra precision. + * + * We round F_hi(i) to 24 bits so that it can have type float, + * mainly to minimize the size of the table. Using all 24 bits + * in a float for it automatically satisfies the above constraints. + */ + 0x800000.0p-23, 0, 0, + 0xfe0000.0p-24, 0x8080ac.0p-30, -0x14ee431dae6674afa0c4bfe16e8fd.0p-144L, + 0xfc0000.0p-24, 0x8102b3.0p-29, -0x1db29ee2d83717be918e1119642ab.0p-144L, + 0xfa0000.0p-24, 0xc24929.0p-29, 0x1191957d173697cf302cc9476f561.0p-143L, + 0xf80000.0p-24, 0x820aec.0p-28, 0x13ce8888e02e78eba9b1113bc1c18.0p-142L, + 0xf60000.0p-24, 0xa33577.0p-28, -0x17a4382ce6eb7bfa509bec8da5f22.0p-142L, + 0xf48000.0p-24, 0xbc42cb.0p-28, -0x172a21161a107674986dcdca6709c.0p-143L, + 0xf30000.0p-24, 0xd57797.0p-28, -0x1e09de07cb958897a3ea46e84abb3.0p-142L, + 0xf10000.0p-24, 0xf7518e.0p-28, 0x1ae1eec1b036c484993c549c4bf40.0p-151L, + 0xef0000.0p-24, 0x8cb9df.0p-27, -0x1d7355325d560d9e9ab3d6ebab580.0p-141L, + 0xed8000.0p-24, 0x999ec0.0p-27, -0x1f9f02d256d5037108f4ec21e48cd.0p-142L, + 0xec0000.0p-24, 0xa6988b.0p-27, -0x16fc0a9d12c17a70f7a684c596b12.0p-143L, + 0xea0000.0p-24, 0xb80698.0p-27, 0x15d581c1e8da99ded322fb08b8462.0p-141L, + 0xe80000.0p-24, 0xc99af3.0p-27, -0x1535b3ba8f150ae09996d7bb4653e.0p-143L, + 0xe70000.0p-24, 0xd273b2.0p-27, 0x163786f5251aefe0ded34c8318f52.0p-145L, + 0xe50000.0p-24, 0xe442c0.0p-27, 0x1bc4b2368e32d56699c1799a244d4.0p-144L, + 0xe38000.0p-24, 0xf1b83f.0p-27, 0x1c6090f684e6766abceccab1d7174.0p-141L, + 0xe20000.0p-24, 0xff448a.0p-27, -0x1890aa69ac9f4215f93936b709efb.0p-142L, + 0xe08000.0p-24, 0x8673f6.0p-26, 0x1b9985194b6affd511b534b72a28e.0p-140L, + 0xdf0000.0p-24, 0x8d515c.0p-26, -0x1dc08d61c6ef1d9b2ef7e68680598.0p-143L, + 0xdd8000.0p-24, 0x943a9e.0p-26, -0x1f72a2dac729b3f46662238a9425a.0p-142L, + 0xdc0000.0p-24, 0x9b2fe6.0p-26, -0x1fd4dfd3a0afb9691aed4d5e3df94.0p-140L, + 0xda8000.0p-24, 0xa2315d.0p-26, -0x11b26121629c46c186384993e1c93.0p-142L, + 0xd90000.0p-24, 0xa93f2f.0p-26, 0x1286d633e8e5697dc6a402a56fce1.0p-141L, + 0xd78000.0p-24, 0xb05988.0p-26, 0x16128eba9367707ebfa540e45350c.0p-144L, + 0xd60000.0p-24, 0xb78094.0p-26, 0x16ead577390d31ef0f4c9d43f79b2.0p-140L, + 0xd50000.0p-24, 0xbc4c6c.0p-26, 0x151131ccf7c7b75e7d900b521c48d.0p-141L, + 0xd38000.0p-24, 0xc3890a.0p-26, -0x115e2cd714bd06508aeb00d2ae3e9.0p-140L, + 0xd20000.0p-24, 0xcad2d7.0p-26, -0x1847f406ebd3af80485c2f409633c.0p-142L, + 0xd10000.0p-24, 0xcfb620.0p-26, 0x1c2259904d686581799fbce0b5f19.0p-141L, + 0xcf8000.0p-24, 0xd71653.0p-26, 0x1ece57a8d5ae54f550444ecf8b995.0p-140L, + 0xce0000.0p-24, 0xde843a.0p-26, -0x1f109d4bc4595412b5d2517aaac13.0p-141L, + 0xcd0000.0p-24, 0xe37fde.0p-26, 0x1bc03dc271a74d3a85b5b43c0e727.0p-141L, + 0xcb8000.0p-24, 0xeb050c.0p-26, -0x1bf2badc0df841a71b79dd5645b46.0p-145L, + 0xca0000.0p-24, 0xf29878.0p-26, -0x18efededd89fbe0bcfbe6d6db9f66.0p-147L, + 0xc90000.0p-24, 0xf7ad6f.0p-26, 0x1373ff977baa6911c7bafcb4d84fb.0p-141L, + 0xc80000.0p-24, 0xfcc8e3.0p-26, 0x196766f2fb328337cc050c6d83b22.0p-140L, + 0xc68000.0p-24, 0x823f30.0p-25, 0x19bd076f7c434e5fcf1a212e2a91e.0p-139L, + 0xc58000.0p-24, 0x84d52c.0p-25, -0x1a327257af0f465e5ecab5f2a6f81.0p-139L, + 0xc40000.0p-24, 0x88bc74.0p-25, 0x113f23def19c5a0fe396f40f1dda9.0p-141L, + 0xc30000.0p-24, 0x8b5ae6.0p-25, 0x1759f6e6b37de945a049a962e66c6.0p-139L, + 0xc20000.0p-24, 0x8dfccb.0p-25, 0x1ad35ca6ed5147bdb6ddcaf59c425.0p-141L, + 0xc10000.0p-24, 0x90a22b.0p-25, 0x1a1d71a87deba46bae9827221dc98.0p-139L, + 0xbf8000.0p-24, 0x94a0d8.0p-25, -0x139e5210c2b730e28aba001a9b5e0.0p-140L, + 0xbe8000.0p-24, 0x974f16.0p-25, -0x18f6ebcff3ed72e23e13431adc4a5.0p-141L, + 0xbd8000.0p-24, 0x9a00f1.0p-25, -0x1aa268be39aab7148e8d80caa10b7.0p-139L, + 0xbc8000.0p-24, 0x9cb672.0p-25, -0x14c8815839c5663663d15faed7771.0p-139L, + 0xbb0000.0p-24, 0xa0cda1.0p-25, 0x1eaf46390dbb2438273918db7df5c.0p-141L, + 0xba0000.0p-24, 0xa38c6e.0p-25, 0x138e20d831f698298adddd7f32686.0p-141L, + 0xb90000.0p-24, 0xa64f05.0p-25, -0x1e8d3c41123615b147a5d47bc208f.0p-142L, + 0xb80000.0p-24, 0xa91570.0p-25, 0x1ce28f5f3840b263acb4351104631.0p-140L, + 0xb70000.0p-24, 0xabdfbb.0p-25, -0x186e5c0a42423457e22d8c650b355.0p-139L, + 0xb60000.0p-24, 0xaeadef.0p-25, -0x14d41a0b2a08a465dc513b13f567d.0p-143L, + 0xb50000.0p-24, 0xb18018.0p-25, 0x16755892770633947ffe651e7352f.0p-139L, + 0xb40000.0p-24, 0xb45642.0p-25, -0x16395ebe59b15228bfe8798d10ff0.0p-142L, + 0xb30000.0p-24, 0xb73077.0p-25, 0x1abc65c8595f088b61a335f5b688c.0p-140L, + 0xb20000.0p-24, 0xba0ec4.0p-25, -0x1273089d3dad88e7d353e9967d548.0p-139L, + 0xb10000.0p-24, 0xbcf133.0p-25, 0x10f9f67b1f4bbf45de06ecebfaf6d.0p-139L, + 0xb00000.0p-24, 0xbfd7d2.0p-25, -0x109fab904864092b34edda19a831e.0p-140L, + 0xaf0000.0p-24, 0xc2c2ac.0p-25, -0x1124680aa43333221d8a9b475a6ba.0p-139L, + 0xae8000.0p-24, 0xc439b3.0p-25, -0x1f360cc4710fbfe24b633f4e8d84d.0p-140L, + 0xad8000.0p-24, 0xc72afd.0p-25, -0x132d91f21d89c89c45003fc5d7807.0p-140L, + 0xac8000.0p-24, 0xca20a2.0p-25, -0x16bf9b4d1f8da8002f2449e174504.0p-139L, + 0xab8000.0p-24, 0xcd1aae.0p-25, 0x19deb5ce6a6a8717d5626e16acc7d.0p-141L, + 0xaa8000.0p-24, 0xd0192f.0p-25, 0x1a29fb48f7d3ca87dabf351aa41f4.0p-139L, + 0xaa0000.0p-24, 0xd19a20.0p-25, 0x1127d3c6457f9d79f51dcc73014c9.0p-141L, + 0xa90000.0p-24, 0xd49f6a.0p-25, -0x1ba930e486a0ac42d1bf9199188e7.0p-141L, + 0xa80000.0p-24, 0xd7a94b.0p-25, -0x1b6e645f31549dd1160bcc45c7e2c.0p-139L, + 0xa70000.0p-24, 0xdab7d0.0p-25, 0x1118a425494b610665377f15625b6.0p-140L, + 0xa68000.0p-24, 0xdc40d5.0p-25, 0x1966f24d29d3a2d1b2176010478be.0p-140L, + 0xa58000.0p-24, 0xdf566d.0p-25, -0x1d8e52eb2248f0c95dd83626d7333.0p-142L, + 0xa48000.0p-24, 0xe270ce.0p-25, -0x1ee370f96e6b67ccb006a5b9890ea.0p-140L, + 0xa40000.0p-24, 0xe3ffce.0p-25, 0x1d155324911f56db28da4d629d00a.0p-140L, + 0xa30000.0p-24, 0xe72179.0p-25, -0x1fe6e2f2f867d8f4d60c713346641.0p-140L, + 0xa20000.0p-24, 0xea4812.0p-25, 0x1b7be9add7f4d3b3d406b6cbf3ce5.0p-140L, + 0xa18000.0p-24, 0xebdd3d.0p-25, 0x1b3cfb3f7511dd73692609040ccc2.0p-139L, + 0xa08000.0p-24, 0xef0b5b.0p-25, -0x1220de1f7301901b8ad85c25afd09.0p-139L, + 0xa00000.0p-24, 0xf0a451.0p-25, -0x176364c9ac81cc8a4dfb804de6867.0p-140L, + 0x9f0000.0p-24, 0xf3da16.0p-25, 0x1eed6b9aafac8d42f78d3e65d3727.0p-141L, + 0x9e8000.0p-24, 0xf576e9.0p-25, 0x1d593218675af269647b783d88999.0p-139L, + 0x9d8000.0p-24, 0xf8b47c.0p-25, -0x13e8eb7da053e063714615f7cc91d.0p-144L, + 0x9d0000.0p-24, 0xfa553f.0p-25, 0x1c063259bcade02951686d5373aec.0p-139L, + 0x9c0000.0p-24, 0xfd9ac5.0p-25, 0x1ef491085fa3c1649349630531502.0p-139L, + 0x9b8000.0p-24, 0xff3f8c.0p-25, 0x1d607a7c2b8c5320619fb9433d841.0p-139L, + 0x9a8000.0p-24, 0x814697.0p-24, -0x12ad3817004f3f0bdff99f932b273.0p-138L, + 0x9a0000.0p-24, 0x821b06.0p-24, -0x189fc53117f9e54e78103a2bc1767.0p-141L, + 0x990000.0p-24, 0x83c5f8.0p-24, 0x14cf15a048907b7d7f47ddb45c5a3.0p-139L, + 0x988000.0p-24, 0x849c7d.0p-24, 0x1cbb1d35fb82873b04a9af1dd692c.0p-138L, + 0x978000.0p-24, 0x864ba6.0p-24, 0x1128639b814f9b9770d8cb6573540.0p-138L, + 0x970000.0p-24, 0x87244c.0p-24, 0x184733853300f002e836dfd47bd41.0p-139L, + 0x968000.0p-24, 0x87fdaa.0p-24, 0x109d23aef77dd5cd7cc94306fb3ff.0p-140L, + 0x958000.0p-24, 0x89b293.0p-24, -0x1a81ef367a59de2b41eeebd550702.0p-138L, + 0x950000.0p-24, 0x8a8e20.0p-24, -0x121ad3dbb2f45275c917a30df4ac9.0p-138L, + 0x948000.0p-24, 0x8b6a6a.0p-24, -0x1cfb981628af71a89df4e6df2e93b.0p-139L, + 0x938000.0p-24, 0x8d253a.0p-24, -0x1d21730ea76cfdec367828734cae5.0p-139L, + 0x930000.0p-24, 0x8e03c2.0p-24, 0x135cc00e566f76b87333891e0dec4.0p-138L, + 0x928000.0p-24, 0x8ee30d.0p-24, -0x10fcb5df257a263e3bf446c6e3f69.0p-140L, + 0x918000.0p-24, 0x90a3ee.0p-24, -0x16e171b15433d723a4c7380a448d8.0p-139L, + 0x910000.0p-24, 0x918587.0p-24, -0x1d050da07f3236f330972da2a7a87.0p-139L, + 0x908000.0p-24, 0x9267e7.0p-24, 0x1be03669a5268d21148c6002becd3.0p-139L, + 0x8f8000.0p-24, 0x942f04.0p-24, 0x10b28e0e26c336af90e00533323ba.0p-139L, + 0x8f0000.0p-24, 0x9513c3.0p-24, 0x1a1d820da57cf2f105a89060046aa.0p-138L, + 0x8e8000.0p-24, 0x95f950.0p-24, -0x19ef8f13ae3cf162409d8ea99d4c0.0p-139L, + 0x8e0000.0p-24, 0x96dfab.0p-24, -0x109e417a6e507b9dc10dac743ad7a.0p-138L, + 0x8d0000.0p-24, 0x98aed2.0p-24, 0x10d01a2c5b0e97c4990b23d9ac1f5.0p-139L, + 0x8c8000.0p-24, 0x9997a2.0p-24, -0x1d6a50d4b61ea74540bdd2aa99a42.0p-138L, + 0x8c0000.0p-24, 0x9a8145.0p-24, 0x1b3b190b83f9527e6aba8f2d783c1.0p-138L, + 0x8b8000.0p-24, 0x9b6bbf.0p-24, 0x13a69fad7e7abe7ba81c664c107e0.0p-138L, + 0x8b0000.0p-24, 0x9c5711.0p-24, -0x11cd12316f576aad348ae79867223.0p-138L, + 0x8a8000.0p-24, 0x9d433b.0p-24, 0x1c95c444b807a246726b304ccae56.0p-139L, + 0x898000.0p-24, 0x9f1e22.0p-24, -0x1b9c224ea698c2f9b47466d6123fe.0p-139L, + 0x890000.0p-24, 0xa00ce1.0p-24, 0x125ca93186cf0f38b4619a2483399.0p-141L, + 0x888000.0p-24, 0xa0fc80.0p-24, -0x1ee38a7bc228b3597043be78eaf49.0p-139L, + 0x880000.0p-24, 0xa1ed00.0p-24, -0x1a0db876613d204147dc69a07a649.0p-138L, + 0x878000.0p-24, 0xa2de62.0p-24, 0x193224e8516c008d3602a7b41c6e8.0p-139L, + 0x870000.0p-24, 0xa3d0a9.0p-24, 0x1fa28b4d2541aca7d5844606b2421.0p-139L, + 0x868000.0p-24, 0xa4c3d6.0p-24, 0x1c1b5760fb4571acbcfb03f16daf4.0p-138L, + 0x858000.0p-24, 0xa6acea.0p-24, 0x1fed5d0f65949c0a345ad743ae1ae.0p-140L, + 0x850000.0p-24, 0xa7a2d4.0p-24, 0x1ad270c9d749362382a7688479e24.0p-140L, + 0x848000.0p-24, 0xa899ab.0p-24, 0x199ff15ce532661ea9643a3a2d378.0p-139L, + 0x840000.0p-24, 0xa99171.0p-24, 0x1a19e15ccc45d257530a682b80490.0p-139L, + 0x838000.0p-24, 0xaa8a28.0p-24, -0x121a14ec532b35ba3e1f868fd0b5e.0p-140L, + 0x830000.0p-24, 0xab83d1.0p-24, 0x1aee319980bff3303dd481779df69.0p-139L, + 0x828000.0p-24, 0xac7e6f.0p-24, -0x18ffd9e3900345a85d2d86161742e.0p-140L, + 0x820000.0p-24, 0xad7a03.0p-24, -0x1e4db102ce29f79b026b64b42caa1.0p-140L, + 0x818000.0p-24, 0xae768f.0p-24, 0x17c35c55a04a82ab19f77652d977a.0p-141L, + 0x810000.0p-24, 0xaf7415.0p-24, 0x1448324047019b48d7b98c1cf7234.0p-138L, + 0x808000.0p-24, 0xb07298.0p-24, -0x1750ee3915a197e9c7359dd94152f.0p-138L, + 0x800000.0p-24, 0xb17218.0p-24, -0x105c610ca86c3898cff81a12a17e2.0p-141L, +}; + +#ifdef USE_UTAB +static const struct { + float H; /* 1 + i/INTERVALS (exact) */ + float E; /* H(i) * G(i) - 1 (exact) */ +} U[TSIZE] = { + 0x800000.0p-23, 0, + 0x810000.0p-23, -0x800000.0p-37, + 0x820000.0p-23, -0x800000.0p-35, + 0x830000.0p-23, -0x900000.0p-34, + 0x840000.0p-23, -0x800000.0p-33, + 0x850000.0p-23, -0xc80000.0p-33, + 0x860000.0p-23, -0xa00000.0p-36, + 0x870000.0p-23, 0x940000.0p-33, + 0x880000.0p-23, 0x800000.0p-35, + 0x890000.0p-23, -0xc80000.0p-34, + 0x8a0000.0p-23, 0xe00000.0p-36, + 0x8b0000.0p-23, 0x900000.0p-33, + 0x8c0000.0p-23, -0x800000.0p-35, + 0x8d0000.0p-23, -0xe00000.0p-33, + 0x8e0000.0p-23, 0x880000.0p-33, + 0x8f0000.0p-23, -0xa80000.0p-34, + 0x900000.0p-23, -0x800000.0p-35, + 0x910000.0p-23, 0x800000.0p-37, + 0x920000.0p-23, 0x900000.0p-35, + 0x930000.0p-23, 0xd00000.0p-35, + 0x940000.0p-23, 0xe00000.0p-35, + 0x950000.0p-23, 0xc00000.0p-35, + 0x960000.0p-23, 0xe00000.0p-36, + 0x970000.0p-23, -0x800000.0p-38, + 0x980000.0p-23, -0xc00000.0p-35, + 0x990000.0p-23, -0xd00000.0p-34, + 0x9a0000.0p-23, 0x880000.0p-33, + 0x9b0000.0p-23, 0xe80000.0p-35, + 0x9c0000.0p-23, -0x800000.0p-35, + 0x9d0000.0p-23, 0xb40000.0p-33, + 0x9e0000.0p-23, 0x880000.0p-34, + 0x9f0000.0p-23, -0xe00000.0p-35, + 0xa00000.0p-23, 0x800000.0p-33, + 0xa10000.0p-23, -0x900000.0p-36, + 0xa20000.0p-23, -0xb00000.0p-33, + 0xa30000.0p-23, -0xa00000.0p-36, + 0xa40000.0p-23, 0x800000.0p-33, + 0xa50000.0p-23, -0xf80000.0p-35, + 0xa60000.0p-23, 0x880000.0p-34, + 0xa70000.0p-23, -0x900000.0p-33, + 0xa80000.0p-23, -0x800000.0p-35, + 0xa90000.0p-23, 0x900000.0p-34, + 0xaa0000.0p-23, 0xa80000.0p-33, + 0xab0000.0p-23, -0xac0000.0p-34, + 0xac0000.0p-23, -0x800000.0p-37, + 0xad0000.0p-23, 0xf80000.0p-35, + 0xae0000.0p-23, 0xf80000.0p-34, + 0xaf0000.0p-23, -0xac0000.0p-33, + 0xb00000.0p-23, -0x800000.0p-33, + 0xb10000.0p-23, -0xb80000.0p-34, + 0xb20000.0p-23, -0x800000.0p-34, + 0xb30000.0p-23, -0xb00000.0p-35, + 0xb40000.0p-23, -0x800000.0p-35, + 0xb50000.0p-23, -0xe00000.0p-36, + 0xb60000.0p-23, -0x800000.0p-35, + 0xb70000.0p-23, -0xb00000.0p-35, + 0xb80000.0p-23, -0x800000.0p-34, + 0xb90000.0p-23, -0xb80000.0p-34, + 0xba0000.0p-23, -0x800000.0p-33, + 0xbb0000.0p-23, -0xac0000.0p-33, + 0xbc0000.0p-23, 0x980000.0p-33, + 0xbd0000.0p-23, 0xbc0000.0p-34, + 0xbe0000.0p-23, 0xe00000.0p-36, + 0xbf0000.0p-23, -0xb80000.0p-35, + 0xc00000.0p-23, -0x800000.0p-33, + 0xc10000.0p-23, 0xa80000.0p-33, + 0xc20000.0p-23, 0x900000.0p-34, + 0xc30000.0p-23, -0x800000.0p-35, + 0xc40000.0p-23, -0x900000.0p-33, + 0xc50000.0p-23, 0x820000.0p-33, + 0xc60000.0p-23, 0x800000.0p-38, + 0xc70000.0p-23, -0x820000.0p-33, + 0xc80000.0p-23, 0x800000.0p-33, + 0xc90000.0p-23, -0xa00000.0p-36, + 0xca0000.0p-23, -0xb00000.0p-33, + 0xcb0000.0p-23, 0x840000.0p-34, + 0xcc0000.0p-23, -0xd00000.0p-34, + 0xcd0000.0p-23, 0x800000.0p-33, + 0xce0000.0p-23, -0xe00000.0p-35, + 0xcf0000.0p-23, 0xa60000.0p-33, + 0xd00000.0p-23, -0x800000.0p-35, + 0xd10000.0p-23, 0xb40000.0p-33, + 0xd20000.0p-23, -0x800000.0p-35, + 0xd30000.0p-23, 0xaa0000.0p-33, + 0xd40000.0p-23, -0xe00000.0p-35, + 0xd50000.0p-23, 0x880000.0p-33, + 0xd60000.0p-23, -0xd00000.0p-34, + 0xd70000.0p-23, 0x9c0000.0p-34, + 0xd80000.0p-23, -0xb00000.0p-33, + 0xd90000.0p-23, -0x800000.0p-38, + 0xda0000.0p-23, 0xa40000.0p-33, + 0xdb0000.0p-23, -0xdc0000.0p-34, + 0xdc0000.0p-23, 0xc00000.0p-35, + 0xdd0000.0p-23, 0xca0000.0p-33, + 0xde0000.0p-23, -0xb80000.0p-34, + 0xdf0000.0p-23, 0xd00000.0p-35, + 0xe00000.0p-23, 0xc00000.0p-33, + 0xe10000.0p-23, -0xf40000.0p-34, + 0xe20000.0p-23, 0x800000.0p-37, + 0xe30000.0p-23, 0x860000.0p-33, + 0xe40000.0p-23, -0xc80000.0p-33, + 0xe50000.0p-23, -0xa80000.0p-34, + 0xe60000.0p-23, 0xe00000.0p-36, + 0xe70000.0p-23, 0x880000.0p-33, + 0xe80000.0p-23, -0xe00000.0p-33, + 0xe90000.0p-23, -0xfc0000.0p-34, + 0xea0000.0p-23, -0x800000.0p-35, + 0xeb0000.0p-23, 0xe80000.0p-35, + 0xec0000.0p-23, 0x900000.0p-33, + 0xed0000.0p-23, 0xe20000.0p-33, + 0xee0000.0p-23, -0xac0000.0p-33, + 0xef0000.0p-23, -0xc80000.0p-34, + 0xf00000.0p-23, -0x800000.0p-35, + 0xf10000.0p-23, 0x800000.0p-35, + 0xf20000.0p-23, 0xb80000.0p-34, + 0xf30000.0p-23, 0x940000.0p-33, + 0xf40000.0p-23, 0xc80000.0p-33, + 0xf50000.0p-23, -0xf20000.0p-33, + 0xf60000.0p-23, -0xc80000.0p-33, + 0xf70000.0p-23, -0xa20000.0p-33, + 0xf80000.0p-23, -0x800000.0p-33, + 0xf90000.0p-23, -0xc40000.0p-34, + 0xfa0000.0p-23, -0x900000.0p-34, + 0xfb0000.0p-23, -0xc80000.0p-35, + 0xfc0000.0p-23, -0x800000.0p-35, + 0xfd0000.0p-23, -0x900000.0p-36, + 0xfe0000.0p-23, -0x800000.0p-37, + 0xff0000.0p-23, -0x800000.0p-39, + 0x800000.0p-22, 0, +}; +#endif /* USE_UTAB */ + +#ifdef STRUCT_RETURN +#define RETURN1(rp, v) do { \ + (rp)->hi = (v); \ + (rp)->lo_set = 0; \ + return; \ +} while (0) + +#define RETURN2(rp, h, l) do { \ + (rp)->hi = (h); \ + (rp)->lo = (l); \ + (rp)->lo_set = 1; \ + return; \ +} while (0) + +struct ld { + long double hi; + long double lo; + int lo_set; +}; +#else +#define RETURN1(rp, v) RETURNF(v) +#define RETURN2(rp, h, l) RETURNI((h) + (l)) +#endif + +#ifdef STRUCT_RETURN +static inline __always_inline void +k_logl(long double x, struct ld *rp) +#else +long double +logl(long double x) +#endif +{ + long double d, val_hi, val_lo; + double dd, dk; + uint64_t lx, llx; + int i, k; + uint16_t hx; + + EXTRACT_LDBL128_WORDS(hx, lx, llx, x); + k = -16383; +#if 0 /* Hard to do efficiently. Don't do it until we support all modes. */ + if (x == 1) + RETURN1(rp, 0); /* log(1) = +0 in all rounding modes */ +#endif + if (hx == 0 || hx >= 0x8000) { /* zero, negative or subnormal? */ + if (((hx & 0x7fff) | lx | llx) == 0) + RETURN1(rp, -1 / zero); /* log(+-0) = -Inf */ + if (hx != 0) + /* log(neg or NaN) = qNaN: */ + RETURN1(rp, (x - x) / zero); + x *= 0x1.0p113; /* subnormal; scale up x */ + EXTRACT_LDBL128_WORDS(hx, lx, llx, x); + k = -16383 - 113; + } else if (hx >= 0x7fff) + RETURN1(rp, x + x); /* log(Inf or NaN) = Inf or qNaN */ +#ifndef STRUCT_RETURN + ENTERI(); +#endif + k += hx; + dk = k; + + /* Scale x to be in [1, 2). */ + SET_LDBL_EXPSIGN(x, 0x3fff); + + /* 0 <= i <= INTERVALS: */ +#define L2I (49 - LOG2_INTERVALS) + i = (lx + (1LL << (L2I - 2))) >> (L2I - 1); + + /* + * -0.005280 < d < 0.004838. In particular, the infinite- + * precision |d| is <= 2**-7. Rounding of G(i) to 8 bits + * ensures that d is representable without extra precision for + * this bound on |d| (since when this calculation is expressed + * as x*G(i)-1, the multiplication needs as many extra bits as + * G(i) has and the subtraction cancels 8 bits). But for + * most i (107 cases out of 129), the infinite-precision |d| + * is <= 2**-8. G(i) is rounded to 9 bits for such i to give + * better accuracy (this works by improving the bound on |d|, + * which in turn allows rounding to 9 bits in more cases). + * This is only important when the original x is near 1 -- it + * lets us avoid using a special method to give the desired + * accuracy for such x. + */ + if (0) + d = x * G(i) - 1; + else { +#ifdef USE_UTAB + d = (x - H(i)) * G(i) + E(i); +#else + long double x_hi; + double x_lo; + + /* + * Split x into x_hi + x_lo to calculate x*G(i)-1 exactly. + * G(i) has at most 9 bits, so the splitting point is not + * critical. + */ + INSERT_LDBL128_WORDS(x_hi, 0x3fff, lx, + llx & 0xffffffffff000000ULL); + x_lo = x - x_hi; + d = x_hi * G(i) - 1 + x_lo * G(i); +#endif + } + + /* + * Our algorithm depends on exact cancellation of F_lo(i) and + * F_hi(i) with dk*ln_2_lo and dk*ln2_hi when k is -1 and i is + * at the end of the table. This and other technical complications + * make it difficult to avoid the double scaling in (dk*ln2) * + * log(base) for base != e without losing more accuracy and/or + * efficiency than is gained. + */ + /* + * Use double precision operations wherever possible, since long + * double operations are emulated and are very slow on the only + * known machines that support ld128 (sparc64). Also, don't try + * to improve parallelism by increasing the number of operations, + * since any parallelism on such machines is needed for the + * emulation. Horner's method is good for this, and is also good + * for accuracy. Horner's method doesn't handle the `lo' term + * well, either for efficiency or accuracy. However, for accuracy + * we evaluate d * d * P2 separately to take advantage of + * by P2 being exact, and this gives a good place to sum the 'lo' + * term too. + */ + dd = (double)d; + val_lo = d * d * d * (P3 + + d * (P4 + d * (P5 + d * (P6 + d * (P7 + d * (P8 + + dd * (P9 + dd * (P10 + dd * (P11 + dd * (P12 + dd * (P13 + + dd * P14))))))))))) + (F_lo(i) + dk * ln2_lo) + d * d * P2; + val_hi = d; +#ifdef DEBUG + if (fetestexcept(FE_UNDERFLOW)) + breakpoint(); +#endif + + _3sumF(val_hi, val_lo, F_hi(i) + dk * ln2_hi); + RETURN2(rp, val_hi, val_lo); +} + +long double +log1pl(long double x) +{ + long double d, d_hi, f_lo, val_hi, val_lo; + long double f_hi, twopminusk; + double d_lo, dd, dk; + uint64_t lx, llx; + int i, k; + int16_t ax, hx; + + DOPRINT_START(&x); + EXTRACT_LDBL128_WORDS(hx, lx, llx, x); + if (hx < 0x3fff) { /* x < 1, or x neg NaN */ + ax = hx & 0x7fff; + if (ax >= 0x3fff) { /* x <= -1, or x neg NaN */ + if (ax == 0x3fff && (lx | llx) == 0) + RETURNP(-1 / zero); /* log1p(-1) = -Inf */ + /* log1p(x < 1, or x NaN) = qNaN: */ + RETURNP((x - x) / (x - x)); + } + if (ax <= 0x3f8d) { /* |x| < 2**-113 */ + if ((int)x == 0) + RETURNP(x); /* x with inexact if x != 0 */ + } + f_hi = 1; + f_lo = x; + } else if (hx >= 0x7fff) { /* x +Inf or non-neg NaN */ + RETURNP(x + x); /* log1p(Inf or NaN) = Inf or qNaN */ + } else if (hx < 0x40e1) { /* 1 <= x < 2**226 */ + f_hi = x; + f_lo = 1; + } else { /* 2**226 <= x < +Inf */ + f_hi = x; + f_lo = 0; /* avoid underflow of the P3 term */ + } + ENTERI(); + x = f_hi + f_lo; + f_lo = (f_hi - x) + f_lo; + + EXTRACT_LDBL128_WORDS(hx, lx, llx, x); + k = -16383; + + k += hx; + dk = k; + + SET_LDBL_EXPSIGN(x, 0x3fff); + twopminusk = 1; + SET_LDBL_EXPSIGN(twopminusk, 0x7ffe - (hx & 0x7fff)); + f_lo *= twopminusk; + + i = (lx + (1LL << (L2I - 2))) >> (L2I - 1); + + /* + * x*G(i)-1 (with a reduced x) can be represented exactly, as + * above, but now we need to evaluate the polynomial on d = + * (x+f_lo)*G(i)-1 and extra precision is needed for that. + * Since x+x_lo is a hi+lo decomposition and subtracting 1 + * doesn't lose too many bits, an inexact calculation for + * f_lo*G(i) is good enough. + */ + if (0) + d_hi = x * G(i) - 1; + else { +#ifdef USE_UTAB + d_hi = (x - H(i)) * G(i) + E(i); +#else + long double x_hi; + double x_lo; + + INSERT_LDBL128_WORDS(x_hi, 0x3fff, lx, + llx & 0xffffffffff000000ULL); + x_lo = x - x_hi; + d_hi = x_hi * G(i) - 1 + x_lo * G(i); +#endif + } + d_lo = f_lo * G(i); + + /* + * This is _2sumF(d_hi, d_lo) inlined. The condition + * (d_hi == 0 || |d_hi| >= |d_lo|) for using _2sumF() is not + * always satisifed, so it is not clear that this works, but + * it works in practice. It works even if it gives a wrong + * normalized d_lo, since |d_lo| > |d_hi| implies that i is + * nonzero and d is tiny, so the F(i) term dominates d_lo. + * In float precision: + * (By exhaustive testing, the worst case is d_hi = 0x1.bp-25. + * And if d is only a little tinier than that, we would have + * another underflow problem for the P3 term; this is also ruled + * out by exhaustive testing.) + */ + d = d_hi + d_lo; + d_lo = d_hi - d + d_lo; + d_hi = d; + + dd = (double)d; + val_lo = d * d * d * (P3 + + d * (P4 + d * (P5 + d * (P6 + d * (P7 + d * (P8 + + dd * (P9 + dd * (P10 + dd * (P11 + dd * (P12 + dd * (P13 + + dd * P14))))))))))) + (F_lo(i) + dk * ln2_lo + d_lo) + d * d * P2; + val_hi = d_hi; +#ifdef DEBUG + if (fetestexcept(FE_UNDERFLOW)) + breakpoint(); +#endif + + _3sumF(val_hi, val_lo, F_hi(i) + dk * ln2_hi); + RETURN2PI(val_hi, val_lo); +} + +#ifdef STRUCT_RETURN + +long double +logl(long double x) +{ + struct ld r; + + ENTERI(); + DOPRINT_START(&x); + k_logl(x, &r); + RETURNSPI(&r); +} + +/* + * 29+113 bit decompositions. The bits are distributed so that the products + * of the hi terms are exact in double precision. The types are chosen so + * that the products of the hi terms are done in at least double precision, + * without any explicit conversions. More natural choices would require a + * slow long double precision multiplication. + */ +static const double +invln10_hi = 4.3429448176175356e-1, /* 0x1bcb7b15000000.0p-54 */ +invln2_hi = 1.4426950402557850e0; /* 0x17154765000000.0p-52 */ +static const long double +invln10_lo = 1.41498268538580090791605082294397000e-10L, /* 0x137287195355baaafad33dc323ee3.0p-145L */ +invln2_lo = 6.33178418956604368501892137426645911e-10L; /* 0x15c17f0bbbe87fed0691d3e88eb57.0p-143L */ + +long double +log10l(long double x) +{ + struct ld r; + long double lo; + float hi; + + ENTERI(); + DOPRINT_START(&x); + k_logl(x, &r); + if (!r.lo_set) + RETURNPI(r.hi); + _2sumF(r.hi, r.lo); + hi = r.hi; + lo = r.lo + (r.hi - hi); + RETURN2PI(invln10_hi * hi, + (invln10_lo + invln10_hi) * lo + invln10_lo * hi); +} + +long double +log2l(long double x) +{ + struct ld r; + long double lo; + float hi; + + ENTERI(); + DOPRINT_START(&x); + k_logl(x, &r); + if (!r.lo_set) + RETURNPI(r.hi); + _2sumF(r.hi, r.lo); + hi = r.hi; + lo = r.lo + (r.hi - hi); + RETURN2PI(invln2_hi * hi, + (invln2_lo + invln2_hi) * lo + invln2_lo * hi); +} + +#endif /* STRUCT_RETURN */ diff --git a/lib/msun/ld80/s_exp2l.c b/lib/msun/ld80/s_exp2l.c index 14dfc1d..8730f13 100644 --- a/lib/msun/ld80/s_exp2l.c +++ b/lib/msun/ld80/s_exp2l.c @@ -36,28 +36,31 @@ __FBSDID("$FreeBSD$"); #include "fpmath.h" #include "math.h" +#include "math_private.h" #define TBLBITS 7 #define TBLSIZE (1 << TBLBITS) #define BIAS (LDBL_MAX_EXP - 1) -#define EXPMASK (BIAS + LDBL_MAX_EXP) -static const long double huge = 0x1p10000L; -#if 0 /* XXX Prevent gcc from erroneously constant folding this. */ -static const long double twom10000 = 0x1p-10000L; -#else -static volatile long double twom10000 = 0x1p-10000L; -#endif +static volatile long double + huge = 0x1p10000L, + twom10000 = 0x1p-10000L; + +static const union IEEEl2bits +P1 = LD80C(0xb17217f7d1cf79ac, -1, 6.93147180559945309429e-1L); static const double - redux = 0x1.8p63 / TBLSIZE, - P1 = 0x1.62e42fefa39efp-1, - P2 = 0x1.ebfbdff82c58fp-3, - P3 = 0x1.c6b08d7049fap-5, - P4 = 0x1.3b2ab6fba4da5p-7, - P5 = 0x1.5d8804780a736p-10, - P6 = 0x1.430918835e33dp-13; +redux = 0x1.8p63 / TBLSIZE, +/* + * Domain [-0.00390625, 0.00390625], range ~[-1.7079e-23, 1.7079e-23] + * |exp(x) - p(x)| < 2**-75.6 + */ +P2 = 2.4022650695910072e-1, /* 0x1ebfbdff82c58f.0p-55 */ +P3 = 5.5504108664816879e-2, /* 0x1c6b08d7049e1a.0p-57 */ +P4 = 9.6181291055695180e-3, /* 0x13b2ab6fa8321a.0p-59 */ +P5 = 1.3333563089183052e-3, /* 0x15d8806f67f251.0p-62 */ +P6 = 1.5413361552277414e-4; /* 0x1433ddacff3441.0p-65 */ static const double tbl[TBLSIZE * 2] = { 0x1.6a09e667f3bcdp-1, -0x1.bdd3413b2648p-55, @@ -190,8 +193,8 @@ static const double tbl[TBLSIZE * 2] = { 0x1.68155d44ca973p+0, 0x1.038ae44f74p-57, }; -/* - * exp2l(x): compute the base 2 exponential of x +/** + * Compute the base 2 exponential of x for Intel 80-bit format. * * Accuracy: Peak error < 0.511 ulp. * @@ -207,7 +210,7 @@ static const double tbl[TBLSIZE * 2] = { * with |z| <= 2**-(TBLBITS+1). * * We compute exp2(i/TBLSIZE) via table lookup and exp2(z) via a - * degree-6 minimax polynomial with maximum error under 2**-69. + * degree-6 minimax polynomial with maximum error under 2**-75.6. * The table entries each have 104 bits of accuracy, encoded as * a pair of double precision values. */ @@ -222,30 +225,22 @@ exp2l(long double x) /* Filter out exceptional cases. */ u.e = x; hx = u.xbits.expsign; - ix = hx & EXPMASK; + ix = hx & 0x7fff; if (ix >= BIAS + 14) { /* |x| >= 16384 or x is NaN */ if (ix == BIAS + LDBL_MAX_EXP) { - if (u.xbits.man != 1ULL << 63 || (hx & 0x8000) == 0) - return (x + x); /* x is +Inf or NaN */ - else - return (0.0); /* x is -Inf */ + if (hx & 0x8000 && u.xbits.man == 1ULL << 63) + return (0.0L); /* x is -Inf */ + return (x + x); /* x is +Inf, NaN or unsupported */ } if (x >= 16384) return (huge * huge); /* overflow */ if (x <= -16446) return (twom10000 * twom10000); /* underflow */ - } else if (ix <= BIAS - 66) { /* |x| < 0x1p-66 */ - return (1.0 + x); + } else if (ix <= BIAS - 66) { /* |x| < 0x1p-65 (includes pseudos) */ + return (1.0L + x); /* 1 with inexact */ } -#ifdef __i386__ - /* - * The default precision on i386 is 53 bits, so long doubles are - * broken. Call exp2() to get an accurate (double precision) result. - */ - if (fpgetprec() != FP_PE) - return (exp2(x)); -#endif + ENTERI(); /* * Reduce x, computing z, i0, and k. The low bits of x + redux @@ -269,26 +264,25 @@ exp2l(long double x) z = x - u.e; v.xbits.man = 1ULL << 63; if (k >= LDBL_MIN_EXP) { - v.xbits.expsign = LDBL_MAX_EXP - 1 + k; + v.xbits.expsign = BIAS + k; twopk = v.e; } else { - v.xbits.expsign = LDBL_MAX_EXP - 1 + k + 10000; + v.xbits.expsign = BIAS + k + 10000; twopkp10000 = v.e; } /* Compute r = exp2l(y) = exp2lt[i0] * p(z). */ long double t_hi = tbl[i0]; long double t_lo = tbl[i0 + 1]; - /* XXX This gives > 1 ulp errors outside of FE_TONEAREST mode */ - r = t_lo + (t_hi + t_lo) * z * (P1 + z * (P2 + z * (P3 + z * (P4 + r = t_lo + (t_hi + t_lo) * z * (P1.e + z * (P2 + z * (P3 + z * (P4 + z * (P5 + z * P6))))) + t_hi; /* Scale by 2**k. */ if (k >= LDBL_MIN_EXP) { if (k == LDBL_MAX_EXP) - return (r * 2.0 * 0x1p16383L); - return (r * twopk); + RETURNI(r * 2.0 * 0x1p16383L); + RETURNI(r * twopk); } else { - return (r * twopkp10000 * twom10000); + RETURNI(r * twopkp10000 * twom10000); } } diff --git a/lib/msun/ld80/s_expl.c b/lib/msun/ld80/s_expl.c index af63668..ec748d3 100644 --- a/lib/msun/ld80/s_expl.c +++ b/lib/msun/ld80/s_expl.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2009-2012 Steven G. Kargl + * Copyright (c) 2009-2013 Steven G. Kargl * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -29,7 +29,7 @@ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); -/*- +/** * Compute the exponential of x for Intel 80-bit format. This is based on: * * PTP Tang, "Table-driven implementation of the exponential function @@ -50,6 +50,7 @@ __FBSDID("$FreeBSD$"); #include "math_private.h" #define INTERVALS 128 +#define LOG2_INTERVALS 7 #define BIAS (LDBL_MAX_EXP - 1) static const long double @@ -60,9 +61,12 @@ static volatile const long double tiny = 0x1p-10000L; static const union IEEEl2bits /* log(2**16384 - 0.5) rounded towards zero: */ -o_threshold = LD80C(0xb17217f7d1cf79ab, 13, 11356.5234062941439488L), +/* log(2**16384 - 0.5 + 1) rounded towards zero for expm1l() is the same: */ +o_thresholdu = LD80C(0xb17217f7d1cf79ab, 13, 11356.5234062941439488L), +#define o_threshold (o_thresholdu.e) /* log(2**(-16381-64-1)) rounded towards zero: */ -u_threshold = LD80C(0xb21dfe7f09e2baa9, 13, -11399.4985314888605581L); +u_thresholdu = LD80C(0xb21dfe7f09e2baa9, 13, -11399.4985314888605581L); +#define u_threshold (u_thresholdu.e) static const double /* @@ -78,11 +82,11 @@ L2 = -3.2819649005320973e-13, /* -0x1718432a1b0e26.0p-94 */ * |exp(x) - p(x)| < 2**-77.2 * (0.002708 is ln2/(2*INTERVALS) rounded up a little). */ -P2 = 0.5, -P3 = 1.6666666666666119e-1, /* 0x15555555555490.0p-55 */ -P4 = 4.1666666666665887e-2, /* 0x155555555554e5.0p-57 */ -P5 = 8.3333354987869413e-3, /* 0x1111115b789919.0p-59 */ -P6 = 1.3888891738560272e-3; /* 0x16c16c651633ae.0p-62 */ +A2 = 0.5, +A3 = 1.6666666666666119e-1, /* 0x15555555555490.0p-55 */ +A4 = 4.1666666666665887e-2, /* 0x155555555554e5.0p-57 */ +A5 = 8.3333354987869413e-3, /* 0x1111115b789919.0p-59 */ +A6 = 1.3888891738560272e-3; /* 0x16c16c651633ae.0p-62 */ /* * 2^(i/INTERVALS) for i in [0,INTERVALS] is represented by two values where @@ -96,8 +100,7 @@ P6 = 1.3888891738560272e-3; /* 0x16c16c651633ae.0p-62 */ static const struct { double hi; double lo; -/* XXX should rename 's'. */ -} s[INTERVALS] = { +} tbl[INTERVALS] = { 0x1p+0, 0x0p+0, 0x1.0163da9fb3335p+0, 0x1.b61299ab8cdb7p-54, 0x1.02c9a3e778060p+0, 0x1.dcdef95949ef4p-53, @@ -232,7 +235,8 @@ long double expl(long double x) { union IEEEl2bits u, v; - long double fn, q, r, r1, r2, t, t23, t45, twopk, twopkp10000, z; + long double fn, q, r, r1, r2, t, twopk, twopkp10000; + long double z; int k, n, n2; uint16_t hx, ix; @@ -242,40 +246,39 @@ expl(long double x) ix = hx & 0x7fff; if (ix >= BIAS + 13) { /* |x| >= 8192 or x is NaN */ if (ix == BIAS + LDBL_MAX_EXP) { - if (hx & 0x8000 && u.xbits.man == 1ULL << 63) - return (0.0L); /* x is -Inf */ - return (x + x); /* x is +Inf, NaN or unsupported */ + if (hx & 0x8000) /* x is -Inf, -NaN or unsupported */ + return (-1 / x); + return (x + x); /* x is +Inf, +NaN or unsupported */ } - if (x > o_threshold.e) + if (x > o_threshold) return (huge * huge); - if (x < u_threshold.e) + if (x < u_threshold) return (tiny * tiny); - } else if (ix < BIAS - 66) { /* |x| < 0x1p-66 */ - /* includes pseudo-denormals */ - if (huge + x > 1.0L) /* trigger inexact iff x != 0 */ - return (1.0L + x); + } else if (ix < BIAS - 65) { /* |x| < 0x1p-65 (includes pseudos) */ + return (1 + x); /* 1 with inexact iff x != 0 */ } ENTERI(); - /* Reduce x to (k*ln2 + midpoint[n2] + r1 + r2). */ + /* Reduce x to (k*ln2 + endpoint[n2] + r1 + r2). */ /* Use a specialized rint() to get fn. Assume round-to-nearest. */ fn = x * INV_L + 0x1.8p63 - 0x1.8p63; r = x - fn * L1 - fn * L2; /* r = r1 + r2 done independently. */ #if defined(HAVE_EFFICIENT_IRINTL) - n = irintl(fn); + n = irintl(fn); #elif defined(HAVE_EFFICIENT_IRINT) - n = irint(fn); + n = irint(fn); #else - n = (int)fn; + n = (int)fn; #endif n2 = (unsigned)n % INTERVALS; - k = (n - n2) / INTERVALS; + /* Depend on the sign bit being propagated: */ + k = n >> LOG2_INTERVALS; r1 = x - fn * L1; - r2 = -fn * L2; + r2 = fn * -L2; /* Prepare scale factors. */ - v.xbits.man = 1ULL << 63; + v.e = 1; if (k >= LDBL_MIN_EXP) { v.xbits.expsign = BIAS + k; twopk = v.e; @@ -284,21 +287,183 @@ expl(long double x) twopkp10000 = v.e; } - /* Evaluate expl(midpoint[n2] + r1 + r2) = s[n2] * expl(r1 + r2). */ - /* Here q = q(r), not q(r1), since r1 is lopped like L1. */ - t45 = r * P5 + P4; + /* Evaluate expl(endpoint[n2] + r1 + r2) = tbl[n2] * expl(r1 + r2). */ z = r * r; - t23 = r * P3 + P2; - q = r2 + z * t23 + z * z * t45 + z * z * z * P6; - t = (long double)s[n2].lo + s[n2].hi; - t = s[n2].lo + t * (q + r1) + s[n2].hi; + q = r2 + z * (A2 + r * A3) + z * z * (A4 + r * A5) + z * z * z * A6; + t = (long double)tbl[n2].lo + tbl[n2].hi; + t = tbl[n2].lo + t * (q + r1) + tbl[n2].hi; /* Scale by 2**k. */ if (k >= LDBL_MIN_EXP) { if (k == LDBL_MAX_EXP) - RETURNI(t * 2.0L * 0x1p16383L); + RETURNI(t * 2 * 0x1p16383L); RETURNI(t * twopk); } else { RETURNI(t * twopkp10000 * twom10000); } } + +/** + * Compute expm1l(x) for Intel 80-bit format. This is based on: + * + * PTP Tang, "Table-driven implementation of the Expm1 function + * in IEEE floating-point arithmetic," ACM Trans. Math. Soft., 18, + * 211-222 (1992). + */ + +/* + * Our T1 and T2 are chosen to be approximately the points where method + * A and method B have the same accuracy. Tang's T1 and T2 are the + * points where method A's accuracy changes by a full bit. For Tang, + * this drop in accuracy makes method A immediately less accurate than + * method B, but our larger INTERVALS makes method A 2 bits more + * accurate so it remains the most accurate method significantly + * closer to the origin despite losing the full bit in our extended + * range for it. + */ +static const double +T1 = -0.1659, /* ~-30.625/128 * log(2) */ +T2 = 0.1659; /* ~30.625/128 * log(2) */ + +/* + * Domain [-0.1659, 0.1659], range ~[-1.2027e-22, 3.4417e-22]: + * |(exp(x)-1-x-x**2/2)/x - p(x)| < 2**-71.2 + */ +static const union IEEEl2bits +B3 = LD80C(0xaaaaaaaaaaaaaaab, -3, 1.66666666666666666671e-1L), +B4 = LD80C(0xaaaaaaaaaaaaaaac, -5, 4.16666666666666666712e-2L); + +static const double +B5 = 8.3333333333333245e-3, /* 0x1.111111111110cp-7 */ +B6 = 1.3888888888888861e-3, /* 0x1.6c16c16c16c0ap-10 */ +B7 = 1.9841269841532042e-4, /* 0x1.a01a01a0319f9p-13 */ +B8 = 2.4801587302069236e-5, /* 0x1.a01a01a03cbbcp-16 */ +B9 = 2.7557316558468562e-6, /* 0x1.71de37fd33d67p-19 */ +B10 = 2.7557315829785151e-7, /* 0x1.27e4f91418144p-22 */ +B11 = 2.5063168199779829e-8, /* 0x1.ae94fabdc6b27p-26 */ +B12 = 2.0887164654459567e-9; /* 0x1.1f122d6413fe1p-29 */ + +long double +expm1l(long double x) +{ + union IEEEl2bits u, v; + long double fn, hx2_hi, hx2_lo, q, r, r1, r2, t, twomk, twopk, x_hi; + long double x_lo, x2, z; + long double x4; + int k, n, n2; + uint16_t hx, ix; + + /* Filter out exceptional cases. */ + u.e = x; + hx = u.xbits.expsign; + ix = hx & 0x7fff; + if (ix >= BIAS + 6) { /* |x| >= 64 or x is NaN */ + if (ix == BIAS + LDBL_MAX_EXP) { + if (hx & 0x8000) /* x is -Inf, -NaN or unsupported */ + return (-1 / x - 1); + return (x + x); /* x is +Inf, +NaN or unsupported */ + } + if (x > o_threshold) + return (huge * huge); + /* + * expm1l() never underflows, but it must avoid + * unrepresentable large negative exponents. We used a + * much smaller threshold for large |x| above than in + * expl() so as to handle not so large negative exponents + * in the same way as large ones here. + */ + if (hx & 0x8000) /* x <= -64 */ + return (tiny - 1); /* good for x < -65ln2 - eps */ + } + + ENTERI(); + + if (T1 < x && x < T2) { + if (ix < BIAS - 64) { /* |x| < 0x1p-64 (includes pseudos) */ + /* x (rounded) with inexact if x != 0: */ + RETURNI(x == 0 ? x : + (0x1p100 * x + fabsl(x)) * 0x1p-100); + } + + x2 = x * x; + x4 = x2 * x2; + q = x4 * (x2 * (x4 * + /* + * XXX the number of terms is no longer good for + * pairwise grouping of all except B3, and the + * grouping is no longer from highest down. + */ + (x2 * B12 + (x * B11 + B10)) + + (x2 * (x * B9 + B8) + (x * B7 + B6))) + + (x * B5 + B4.e)) + x2 * x * B3.e; + + x_hi = (float)x; + x_lo = x - x_hi; + hx2_hi = x_hi * x_hi / 2; + hx2_lo = x_lo * (x + x_hi) / 2; + if (ix >= BIAS - 7) + RETURNI(hx2_lo + x_lo + q + (hx2_hi + x_hi)); + else + RETURNI(hx2_lo + q + hx2_hi + x); + } + + /* Reduce x to (k*ln2 + endpoint[n2] + r1 + r2). */ + /* Use a specialized rint() to get fn. Assume round-to-nearest. */ + fn = x * INV_L + 0x1.8p63 - 0x1.8p63; +#if defined(HAVE_EFFICIENT_IRINTL) + n = irintl(fn); +#elif defined(HAVE_EFFICIENT_IRINT) + n = irint(fn); +#else + n = (int)fn; +#endif + n2 = (unsigned)n % INTERVALS; + k = n >> LOG2_INTERVALS; + r1 = x - fn * L1; + r2 = fn * -L2; + r = r1 + r2; + + /* Prepare scale factor. */ + v.e = 1; + v.xbits.expsign = BIAS + k; + twopk = v.e; + + /* + * Evaluate lower terms of + * expl(endpoint[n2] + r1 + r2) = tbl[n2] * expl(r1 + r2). + */ + z = r * r; + q = r2 + z * (A2 + r * A3) + z * z * (A4 + r * A5) + z * z * z * A6; + + t = (long double)tbl[n2].lo + tbl[n2].hi; + + if (k == 0) { + t = tbl[n2].lo * (r1 + 1) + t * q + tbl[n2].hi * r1 + + (tbl[n2].hi - 1); + RETURNI(t); + } + if (k == -1) { + t = tbl[n2].lo * (r1 + 1) + t * q + tbl[n2].hi * r1 + + (tbl[n2].hi - 2); + RETURNI(t / 2); + } + if (k < -7) { + t = tbl[n2].lo + t * (q + r1) + tbl[n2].hi; + RETURNI(t * twopk - 1); + } + if (k > 2 * LDBL_MANT_DIG - 1) { + t = tbl[n2].lo + t * (q + r1) + tbl[n2].hi; + if (k == LDBL_MAX_EXP) + RETURNI(t * 2 * 0x1p16383L - 1); + RETURNI(t * twopk - 1); + } + + v.xbits.expsign = BIAS - k; + twomk = v.e; + + if (k > LDBL_MANT_DIG - 1) + t = tbl[n2].lo - twomk + t * (q + r1) + tbl[n2].hi; + else + t = tbl[n2].lo + t * (q + r1) + (tbl[n2].hi - twomk); + RETURNI(t * twopk); +} diff --git a/lib/msun/ld80/s_logl.c b/lib/msun/ld80/s_logl.c new file mode 100644 index 0000000..3a35753 --- /dev/null +++ b/lib/msun/ld80/s_logl.c @@ -0,0 +1,717 @@ +/*- + * Copyright (c) 2007-2013 Bruce D. Evans + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +/** + * Implementation of the natural logarithm of x for Intel 80-bit format. + * + * First decompose x into its base 2 representation: + * + * log(x) = log(X * 2**k), where X is in [1, 2) + * = log(X) + k * log(2). + * + * Let X = X_i + e, where X_i is the center of one of the intervals + * [-1.0/256, 1.0/256), [1.0/256, 3.0/256), .... [2.0-1.0/256, 2.0+1.0/256) + * and X is in this interval. Then + * + * log(X) = log(X_i + e) + * = log(X_i * (1 + e / X_i)) + * = log(X_i) + log(1 + e / X_i). + * + * The values log(X_i) are tabulated below. Let d = e / X_i and use + * + * log(1 + d) = p(d) + * + * where p(d) = d - 0.5*d*d + ... is a special minimax polynomial of + * suitably high degree. + * + * To get sufficiently small roundoff errors, k * log(2), log(X_i), and + * sometimes (if |k| is not large) the first term in p(d) must be evaluated + * and added up in extra precision. Extra precision is not needed for the + * rest of p(d). In the worst case when k = 0 and log(X_i) is 0, the final + * error is controlled mainly by the error in the second term in p(d). The + * error in this term itself is at most 0.5 ulps from the d*d operation in + * it. The error in this term relative to the first term is thus at most + * 0.5 * |-0.5| * |d| < 1.0/1024 ulps. We aim for an accumulated error of + * at most twice this at the point of the final rounding step. Thus the + * final error should be at most 0.5 + 1.0/512 = 0.5020 ulps. Exhaustive + * testing of a float variant of this function showed a maximum final error + * of 0.5008 ulps. Non-exhaustive testing of a double variant of this + * function showed a maximum final error of 0.5078 ulps (near 1+1.0/256). + * + * We made the maximum of |d| (and thus the total relative error and the + * degree of p(d)) small by using a large number of intervals. Using + * centers of intervals instead of endpoints reduces this maximum by a + * factor of 2 for a given number of intervals. p(d) is special only + * in beginning with the Taylor coefficients 0 + 1*d, which tends to happen + * naturally. The most accurate minimax polynomial of a given degree might + * be different, but then we wouldn't want it since we would have to do + * extra work to avoid roundoff error (especially for P0*d instead of d). + */ + +#ifdef DEBUG +#include <assert.h> +#include <fenv.h> +#endif + +#ifdef __i386__ +#include <ieeefp.h> +#endif + +#include "fpmath.h" +#include "math.h" +#define i386_SSE_GOOD +#ifndef NO_STRUCT_RETURN +#define STRUCT_RETURN +#endif +#include "math_private.h" + +#if !defined(NO_UTAB) && !defined(NO_UTABL) +#define USE_UTAB +#endif + +/* + * Domain [-0.005280, 0.004838], range ~[-5.1736e-22, 5.1738e-22]: + * |log(1 + d)/d - p(d)| < 2**-70.7 + */ +static const double +P2 = -0.5, +P3 = 3.3333333333333359e-1, /* 0x1555555555555a.0p-54 */ +P4 = -2.5000000000004424e-1, /* -0x1000000000031d.0p-54 */ +P5 = 1.9999999992970016e-1, /* 0x1999999972f3c7.0p-55 */ +P6 = -1.6666666072191585e-1, /* -0x15555548912c09.0p-55 */ +P7 = 1.4286227413310518e-1, /* 0x12494f9d9def91.0p-55 */ +P8 = -1.2518388626763144e-1; /* -0x1006068cc0b97c.0p-55 */ + +static volatile const double zero = 0; + +#define INTERVALS 128 +#define LOG2_INTERVALS 7 +#define TSIZE (INTERVALS + 1) +#define G(i) (T[(i)].G) +#define F_hi(i) (T[(i)].F_hi) +#define F_lo(i) (T[(i)].F_lo) +#define ln2_hi F_hi(TSIZE - 1) +#define ln2_lo F_lo(TSIZE - 1) +#define E(i) (U[(i)].E) +#define H(i) (U[(i)].H) + +static const struct { + float G; /* 1/(1 + i/128) rounded to 8/9 bits */ + float F_hi; /* log(1 / G_i) rounded (see below) */ + double F_lo; /* next 53 bits for log(1 / G_i) */ +} T[TSIZE] = { + /* + * ln2_hi and each F_hi(i) are rounded to a number of bits that + * makes F_hi(i) + dk*ln2_hi exact for all i and all dk. + * + * The last entry (for X just below 2) is used to define ln2_hi + * and ln2_lo, to ensure that F_hi(i) and F_lo(i) cancel exactly + * with dk*ln2_hi and dk*ln2_lo, respectively, when dk = -1. + * This is needed for accuracy when x is just below 1. (To avoid + * special cases, such x are "reduced" strangely to X just below + * 2 and dk = -1, and then the exact cancellation is needed + * because any the error from any non-exactness would be too + * large). + * + * We want to share this table between double precision and ld80, + * so the relevant range of dk is the larger one of ld80 + * ([-16445, 16383]) and the relevant exactness requirement is + * the stricter one of double precision. The maximum number of + * bits in F_hi(i) that works is very dependent on i but has + * a minimum of 33. We only need about 12 bits in F_hi(i) for + * it to provide enough extra precision in double precision (11 + * more than that are required for ld80). + * + * We round F_hi(i) to 24 bits so that it can have type float, + * mainly to minimize the size of the table. Using all 24 bits + * in a float for it automatically satisfies the above constraints. + */ + 0x800000.0p-23, 0, 0, + 0xfe0000.0p-24, 0x8080ac.0p-30, -0x14ee431dae6675.0p-84, + 0xfc0000.0p-24, 0x8102b3.0p-29, -0x1db29ee2d83718.0p-84, + 0xfa0000.0p-24, 0xc24929.0p-29, 0x1191957d173698.0p-83, + 0xf80000.0p-24, 0x820aec.0p-28, 0x13ce8888e02e79.0p-82, + 0xf60000.0p-24, 0xa33577.0p-28, -0x17a4382ce6eb7c.0p-82, + 0xf48000.0p-24, 0xbc42cb.0p-28, -0x172a21161a1076.0p-83, + 0xf30000.0p-24, 0xd57797.0p-28, -0x1e09de07cb9589.0p-82, + 0xf10000.0p-24, 0xf7518e.0p-28, 0x1ae1eec1b036c5.0p-91, + 0xef0000.0p-24, 0x8cb9df.0p-27, -0x1d7355325d560e.0p-81, + 0xed8000.0p-24, 0x999ec0.0p-27, -0x1f9f02d256d503.0p-82, + 0xec0000.0p-24, 0xa6988b.0p-27, -0x16fc0a9d12c17a.0p-83, + 0xea0000.0p-24, 0xb80698.0p-27, 0x15d581c1e8da9a.0p-81, + 0xe80000.0p-24, 0xc99af3.0p-27, -0x1535b3ba8f150b.0p-83, + 0xe70000.0p-24, 0xd273b2.0p-27, 0x163786f5251af0.0p-85, + 0xe50000.0p-24, 0xe442c0.0p-27, 0x1bc4b2368e32d5.0p-84, + 0xe38000.0p-24, 0xf1b83f.0p-27, 0x1c6090f684e676.0p-81, + 0xe20000.0p-24, 0xff448a.0p-27, -0x1890aa69ac9f42.0p-82, + 0xe08000.0p-24, 0x8673f6.0p-26, 0x1b9985194b6b00.0p-80, + 0xdf0000.0p-24, 0x8d515c.0p-26, -0x1dc08d61c6ef1e.0p-83, + 0xdd8000.0p-24, 0x943a9e.0p-26, -0x1f72a2dac729b4.0p-82, + 0xdc0000.0p-24, 0x9b2fe6.0p-26, -0x1fd4dfd3a0afb9.0p-80, + 0xda8000.0p-24, 0xa2315d.0p-26, -0x11b26121629c47.0p-82, + 0xd90000.0p-24, 0xa93f2f.0p-26, 0x1286d633e8e569.0p-81, + 0xd78000.0p-24, 0xb05988.0p-26, 0x16128eba936770.0p-84, + 0xd60000.0p-24, 0xb78094.0p-26, 0x16ead577390d32.0p-80, + 0xd50000.0p-24, 0xbc4c6c.0p-26, 0x151131ccf7c7b7.0p-81, + 0xd38000.0p-24, 0xc3890a.0p-26, -0x115e2cd714bd06.0p-80, + 0xd20000.0p-24, 0xcad2d7.0p-26, -0x1847f406ebd3b0.0p-82, + 0xd10000.0p-24, 0xcfb620.0p-26, 0x1c2259904d6866.0p-81, + 0xcf8000.0p-24, 0xd71653.0p-26, 0x1ece57a8d5ae55.0p-80, + 0xce0000.0p-24, 0xde843a.0p-26, -0x1f109d4bc45954.0p-81, + 0xcd0000.0p-24, 0xe37fde.0p-26, 0x1bc03dc271a74d.0p-81, + 0xcb8000.0p-24, 0xeb050c.0p-26, -0x1bf2badc0df842.0p-85, + 0xca0000.0p-24, 0xf29878.0p-26, -0x18efededd89fbe.0p-87, + 0xc90000.0p-24, 0xf7ad6f.0p-26, 0x1373ff977baa69.0p-81, + 0xc80000.0p-24, 0xfcc8e3.0p-26, 0x196766f2fb3283.0p-80, + 0xc68000.0p-24, 0x823f30.0p-25, 0x19bd076f7c434e.0p-79, + 0xc58000.0p-24, 0x84d52c.0p-25, -0x1a327257af0f46.0p-79, + 0xc40000.0p-24, 0x88bc74.0p-25, 0x113f23def19c5a.0p-81, + 0xc30000.0p-24, 0x8b5ae6.0p-25, 0x1759f6e6b37de9.0p-79, + 0xc20000.0p-24, 0x8dfccb.0p-25, 0x1ad35ca6ed5148.0p-81, + 0xc10000.0p-24, 0x90a22b.0p-25, 0x1a1d71a87deba4.0p-79, + 0xbf8000.0p-24, 0x94a0d8.0p-25, -0x139e5210c2b731.0p-80, + 0xbe8000.0p-24, 0x974f16.0p-25, -0x18f6ebcff3ed73.0p-81, + 0xbd8000.0p-24, 0x9a00f1.0p-25, -0x1aa268be39aab7.0p-79, + 0xbc8000.0p-24, 0x9cb672.0p-25, -0x14c8815839c566.0p-79, + 0xbb0000.0p-24, 0xa0cda1.0p-25, 0x1eaf46390dbb24.0p-81, + 0xba0000.0p-24, 0xa38c6e.0p-25, 0x138e20d831f698.0p-81, + 0xb90000.0p-24, 0xa64f05.0p-25, -0x1e8d3c41123616.0p-82, + 0xb80000.0p-24, 0xa91570.0p-25, 0x1ce28f5f3840b2.0p-80, + 0xb70000.0p-24, 0xabdfbb.0p-25, -0x186e5c0a424234.0p-79, + 0xb60000.0p-24, 0xaeadef.0p-25, -0x14d41a0b2a08a4.0p-83, + 0xb50000.0p-24, 0xb18018.0p-25, 0x16755892770634.0p-79, + 0xb40000.0p-24, 0xb45642.0p-25, -0x16395ebe59b152.0p-82, + 0xb30000.0p-24, 0xb73077.0p-25, 0x1abc65c8595f09.0p-80, + 0xb20000.0p-24, 0xba0ec4.0p-25, -0x1273089d3dad89.0p-79, + 0xb10000.0p-24, 0xbcf133.0p-25, 0x10f9f67b1f4bbf.0p-79, + 0xb00000.0p-24, 0xbfd7d2.0p-25, -0x109fab90486409.0p-80, + 0xaf0000.0p-24, 0xc2c2ac.0p-25, -0x1124680aa43333.0p-79, + 0xae8000.0p-24, 0xc439b3.0p-25, -0x1f360cc4710fc0.0p-80, + 0xad8000.0p-24, 0xc72afd.0p-25, -0x132d91f21d89c9.0p-80, + 0xac8000.0p-24, 0xca20a2.0p-25, -0x16bf9b4d1f8da8.0p-79, + 0xab8000.0p-24, 0xcd1aae.0p-25, 0x19deb5ce6a6a87.0p-81, + 0xaa8000.0p-24, 0xd0192f.0p-25, 0x1a29fb48f7d3cb.0p-79, + 0xaa0000.0p-24, 0xd19a20.0p-25, 0x1127d3c6457f9d.0p-81, + 0xa90000.0p-24, 0xd49f6a.0p-25, -0x1ba930e486a0ac.0p-81, + 0xa80000.0p-24, 0xd7a94b.0p-25, -0x1b6e645f31549e.0p-79, + 0xa70000.0p-24, 0xdab7d0.0p-25, 0x1118a425494b61.0p-80, + 0xa68000.0p-24, 0xdc40d5.0p-25, 0x1966f24d29d3a3.0p-80, + 0xa58000.0p-24, 0xdf566d.0p-25, -0x1d8e52eb2248f1.0p-82, + 0xa48000.0p-24, 0xe270ce.0p-25, -0x1ee370f96e6b68.0p-80, + 0xa40000.0p-24, 0xe3ffce.0p-25, 0x1d155324911f57.0p-80, + 0xa30000.0p-24, 0xe72179.0p-25, -0x1fe6e2f2f867d9.0p-80, + 0xa20000.0p-24, 0xea4812.0p-25, 0x1b7be9add7f4d4.0p-80, + 0xa18000.0p-24, 0xebdd3d.0p-25, 0x1b3cfb3f7511dd.0p-79, + 0xa08000.0p-24, 0xef0b5b.0p-25, -0x1220de1f730190.0p-79, + 0xa00000.0p-24, 0xf0a451.0p-25, -0x176364c9ac81cd.0p-80, + 0x9f0000.0p-24, 0xf3da16.0p-25, 0x1eed6b9aafac8d.0p-81, + 0x9e8000.0p-24, 0xf576e9.0p-25, 0x1d593218675af2.0p-79, + 0x9d8000.0p-24, 0xf8b47c.0p-25, -0x13e8eb7da053e0.0p-84, + 0x9d0000.0p-24, 0xfa553f.0p-25, 0x1c063259bcade0.0p-79, + 0x9c0000.0p-24, 0xfd9ac5.0p-25, 0x1ef491085fa3c1.0p-79, + 0x9b8000.0p-24, 0xff3f8c.0p-25, 0x1d607a7c2b8c53.0p-79, + 0x9a8000.0p-24, 0x814697.0p-24, -0x12ad3817004f3f.0p-78, + 0x9a0000.0p-24, 0x821b06.0p-24, -0x189fc53117f9e5.0p-81, + 0x990000.0p-24, 0x83c5f8.0p-24, 0x14cf15a048907b.0p-79, + 0x988000.0p-24, 0x849c7d.0p-24, 0x1cbb1d35fb8287.0p-78, + 0x978000.0p-24, 0x864ba6.0p-24, 0x1128639b814f9c.0p-78, + 0x970000.0p-24, 0x87244c.0p-24, 0x184733853300f0.0p-79, + 0x968000.0p-24, 0x87fdaa.0p-24, 0x109d23aef77dd6.0p-80, + 0x958000.0p-24, 0x89b293.0p-24, -0x1a81ef367a59de.0p-78, + 0x950000.0p-24, 0x8a8e20.0p-24, -0x121ad3dbb2f452.0p-78, + 0x948000.0p-24, 0x8b6a6a.0p-24, -0x1cfb981628af72.0p-79, + 0x938000.0p-24, 0x8d253a.0p-24, -0x1d21730ea76cfe.0p-79, + 0x930000.0p-24, 0x8e03c2.0p-24, 0x135cc00e566f77.0p-78, + 0x928000.0p-24, 0x8ee30d.0p-24, -0x10fcb5df257a26.0p-80, + 0x918000.0p-24, 0x90a3ee.0p-24, -0x16e171b15433d7.0p-79, + 0x910000.0p-24, 0x918587.0p-24, -0x1d050da07f3237.0p-79, + 0x908000.0p-24, 0x9267e7.0p-24, 0x1be03669a5268d.0p-79, + 0x8f8000.0p-24, 0x942f04.0p-24, 0x10b28e0e26c337.0p-79, + 0x8f0000.0p-24, 0x9513c3.0p-24, 0x1a1d820da57cf3.0p-78, + 0x8e8000.0p-24, 0x95f950.0p-24, -0x19ef8f13ae3cf1.0p-79, + 0x8e0000.0p-24, 0x96dfab.0p-24, -0x109e417a6e507c.0p-78, + 0x8d0000.0p-24, 0x98aed2.0p-24, 0x10d01a2c5b0e98.0p-79, + 0x8c8000.0p-24, 0x9997a2.0p-24, -0x1d6a50d4b61ea7.0p-78, + 0x8c0000.0p-24, 0x9a8145.0p-24, 0x1b3b190b83f952.0p-78, + 0x8b8000.0p-24, 0x9b6bbf.0p-24, 0x13a69fad7e7abe.0p-78, + 0x8b0000.0p-24, 0x9c5711.0p-24, -0x11cd12316f576b.0p-78, + 0x8a8000.0p-24, 0x9d433b.0p-24, 0x1c95c444b807a2.0p-79, + 0x898000.0p-24, 0x9f1e22.0p-24, -0x1b9c224ea698c3.0p-79, + 0x890000.0p-24, 0xa00ce1.0p-24, 0x125ca93186cf0f.0p-81, + 0x888000.0p-24, 0xa0fc80.0p-24, -0x1ee38a7bc228b3.0p-79, + 0x880000.0p-24, 0xa1ed00.0p-24, -0x1a0db876613d20.0p-78, + 0x878000.0p-24, 0xa2de62.0p-24, 0x193224e8516c01.0p-79, + 0x870000.0p-24, 0xa3d0a9.0p-24, 0x1fa28b4d2541ad.0p-79, + 0x868000.0p-24, 0xa4c3d6.0p-24, 0x1c1b5760fb4572.0p-78, + 0x858000.0p-24, 0xa6acea.0p-24, 0x1fed5d0f65949c.0p-80, + 0x850000.0p-24, 0xa7a2d4.0p-24, 0x1ad270c9d74936.0p-80, + 0x848000.0p-24, 0xa899ab.0p-24, 0x199ff15ce53266.0p-79, + 0x840000.0p-24, 0xa99171.0p-24, 0x1a19e15ccc45d2.0p-79, + 0x838000.0p-24, 0xaa8a28.0p-24, -0x121a14ec532b36.0p-80, + 0x830000.0p-24, 0xab83d1.0p-24, 0x1aee319980bff3.0p-79, + 0x828000.0p-24, 0xac7e6f.0p-24, -0x18ffd9e3900346.0p-80, + 0x820000.0p-24, 0xad7a03.0p-24, -0x1e4db102ce29f8.0p-80, + 0x818000.0p-24, 0xae768f.0p-24, 0x17c35c55a04a83.0p-81, + 0x810000.0p-24, 0xaf7415.0p-24, 0x1448324047019b.0p-78, + 0x808000.0p-24, 0xb07298.0p-24, -0x1750ee3915a198.0p-78, + 0x800000.0p-24, 0xb17218.0p-24, -0x105c610ca86c39.0p-81, +}; + +#ifdef USE_UTAB +static const struct { + float H; /* 1 + i/INTERVALS (exact) */ + float E; /* H(i) * G(i) - 1 (exact) */ +} U[TSIZE] = { + 0x800000.0p-23, 0, + 0x810000.0p-23, -0x800000.0p-37, + 0x820000.0p-23, -0x800000.0p-35, + 0x830000.0p-23, -0x900000.0p-34, + 0x840000.0p-23, -0x800000.0p-33, + 0x850000.0p-23, -0xc80000.0p-33, + 0x860000.0p-23, -0xa00000.0p-36, + 0x870000.0p-23, 0x940000.0p-33, + 0x880000.0p-23, 0x800000.0p-35, + 0x890000.0p-23, -0xc80000.0p-34, + 0x8a0000.0p-23, 0xe00000.0p-36, + 0x8b0000.0p-23, 0x900000.0p-33, + 0x8c0000.0p-23, -0x800000.0p-35, + 0x8d0000.0p-23, -0xe00000.0p-33, + 0x8e0000.0p-23, 0x880000.0p-33, + 0x8f0000.0p-23, -0xa80000.0p-34, + 0x900000.0p-23, -0x800000.0p-35, + 0x910000.0p-23, 0x800000.0p-37, + 0x920000.0p-23, 0x900000.0p-35, + 0x930000.0p-23, 0xd00000.0p-35, + 0x940000.0p-23, 0xe00000.0p-35, + 0x950000.0p-23, 0xc00000.0p-35, + 0x960000.0p-23, 0xe00000.0p-36, + 0x970000.0p-23, -0x800000.0p-38, + 0x980000.0p-23, -0xc00000.0p-35, + 0x990000.0p-23, -0xd00000.0p-34, + 0x9a0000.0p-23, 0x880000.0p-33, + 0x9b0000.0p-23, 0xe80000.0p-35, + 0x9c0000.0p-23, -0x800000.0p-35, + 0x9d0000.0p-23, 0xb40000.0p-33, + 0x9e0000.0p-23, 0x880000.0p-34, + 0x9f0000.0p-23, -0xe00000.0p-35, + 0xa00000.0p-23, 0x800000.0p-33, + 0xa10000.0p-23, -0x900000.0p-36, + 0xa20000.0p-23, -0xb00000.0p-33, + 0xa30000.0p-23, -0xa00000.0p-36, + 0xa40000.0p-23, 0x800000.0p-33, + 0xa50000.0p-23, -0xf80000.0p-35, + 0xa60000.0p-23, 0x880000.0p-34, + 0xa70000.0p-23, -0x900000.0p-33, + 0xa80000.0p-23, -0x800000.0p-35, + 0xa90000.0p-23, 0x900000.0p-34, + 0xaa0000.0p-23, 0xa80000.0p-33, + 0xab0000.0p-23, -0xac0000.0p-34, + 0xac0000.0p-23, -0x800000.0p-37, + 0xad0000.0p-23, 0xf80000.0p-35, + 0xae0000.0p-23, 0xf80000.0p-34, + 0xaf0000.0p-23, -0xac0000.0p-33, + 0xb00000.0p-23, -0x800000.0p-33, + 0xb10000.0p-23, -0xb80000.0p-34, + 0xb20000.0p-23, -0x800000.0p-34, + 0xb30000.0p-23, -0xb00000.0p-35, + 0xb40000.0p-23, -0x800000.0p-35, + 0xb50000.0p-23, -0xe00000.0p-36, + 0xb60000.0p-23, -0x800000.0p-35, + 0xb70000.0p-23, -0xb00000.0p-35, + 0xb80000.0p-23, -0x800000.0p-34, + 0xb90000.0p-23, -0xb80000.0p-34, + 0xba0000.0p-23, -0x800000.0p-33, + 0xbb0000.0p-23, -0xac0000.0p-33, + 0xbc0000.0p-23, 0x980000.0p-33, + 0xbd0000.0p-23, 0xbc0000.0p-34, + 0xbe0000.0p-23, 0xe00000.0p-36, + 0xbf0000.0p-23, -0xb80000.0p-35, + 0xc00000.0p-23, -0x800000.0p-33, + 0xc10000.0p-23, 0xa80000.0p-33, + 0xc20000.0p-23, 0x900000.0p-34, + 0xc30000.0p-23, -0x800000.0p-35, + 0xc40000.0p-23, -0x900000.0p-33, + 0xc50000.0p-23, 0x820000.0p-33, + 0xc60000.0p-23, 0x800000.0p-38, + 0xc70000.0p-23, -0x820000.0p-33, + 0xc80000.0p-23, 0x800000.0p-33, + 0xc90000.0p-23, -0xa00000.0p-36, + 0xca0000.0p-23, -0xb00000.0p-33, + 0xcb0000.0p-23, 0x840000.0p-34, + 0xcc0000.0p-23, -0xd00000.0p-34, + 0xcd0000.0p-23, 0x800000.0p-33, + 0xce0000.0p-23, -0xe00000.0p-35, + 0xcf0000.0p-23, 0xa60000.0p-33, + 0xd00000.0p-23, -0x800000.0p-35, + 0xd10000.0p-23, 0xb40000.0p-33, + 0xd20000.0p-23, -0x800000.0p-35, + 0xd30000.0p-23, 0xaa0000.0p-33, + 0xd40000.0p-23, -0xe00000.0p-35, + 0xd50000.0p-23, 0x880000.0p-33, + 0xd60000.0p-23, -0xd00000.0p-34, + 0xd70000.0p-23, 0x9c0000.0p-34, + 0xd80000.0p-23, -0xb00000.0p-33, + 0xd90000.0p-23, -0x800000.0p-38, + 0xda0000.0p-23, 0xa40000.0p-33, + 0xdb0000.0p-23, -0xdc0000.0p-34, + 0xdc0000.0p-23, 0xc00000.0p-35, + 0xdd0000.0p-23, 0xca0000.0p-33, + 0xde0000.0p-23, -0xb80000.0p-34, + 0xdf0000.0p-23, 0xd00000.0p-35, + 0xe00000.0p-23, 0xc00000.0p-33, + 0xe10000.0p-23, -0xf40000.0p-34, + 0xe20000.0p-23, 0x800000.0p-37, + 0xe30000.0p-23, 0x860000.0p-33, + 0xe40000.0p-23, -0xc80000.0p-33, + 0xe50000.0p-23, -0xa80000.0p-34, + 0xe60000.0p-23, 0xe00000.0p-36, + 0xe70000.0p-23, 0x880000.0p-33, + 0xe80000.0p-23, -0xe00000.0p-33, + 0xe90000.0p-23, -0xfc0000.0p-34, + 0xea0000.0p-23, -0x800000.0p-35, + 0xeb0000.0p-23, 0xe80000.0p-35, + 0xec0000.0p-23, 0x900000.0p-33, + 0xed0000.0p-23, 0xe20000.0p-33, + 0xee0000.0p-23, -0xac0000.0p-33, + 0xef0000.0p-23, -0xc80000.0p-34, + 0xf00000.0p-23, -0x800000.0p-35, + 0xf10000.0p-23, 0x800000.0p-35, + 0xf20000.0p-23, 0xb80000.0p-34, + 0xf30000.0p-23, 0x940000.0p-33, + 0xf40000.0p-23, 0xc80000.0p-33, + 0xf50000.0p-23, -0xf20000.0p-33, + 0xf60000.0p-23, -0xc80000.0p-33, + 0xf70000.0p-23, -0xa20000.0p-33, + 0xf80000.0p-23, -0x800000.0p-33, + 0xf90000.0p-23, -0xc40000.0p-34, + 0xfa0000.0p-23, -0x900000.0p-34, + 0xfb0000.0p-23, -0xc80000.0p-35, + 0xfc0000.0p-23, -0x800000.0p-35, + 0xfd0000.0p-23, -0x900000.0p-36, + 0xfe0000.0p-23, -0x800000.0p-37, + 0xff0000.0p-23, -0x800000.0p-39, + 0x800000.0p-22, 0, +}; +#endif /* USE_UTAB */ + +#ifdef STRUCT_RETURN +#define RETURN1(rp, v) do { \ + (rp)->hi = (v); \ + (rp)->lo_set = 0; \ + return; \ +} while (0) + +#define RETURN2(rp, h, l) do { \ + (rp)->hi = (h); \ + (rp)->lo = (l); \ + (rp)->lo_set = 1; \ + return; \ +} while (0) + +struct ld { + long double hi; + long double lo; + int lo_set; +}; +#else +#define RETURN1(rp, v) RETURNF(v) +#define RETURN2(rp, h, l) RETURNI((h) + (l)) +#endif + +#ifdef STRUCT_RETURN +static inline __always_inline void +k_logl(long double x, struct ld *rp) +#else +long double +logl(long double x) +#endif +{ + long double d, dk, val_hi, val_lo, z; + uint64_t ix, lx; + int i, k; + uint16_t hx; + + EXTRACT_LDBL80_WORDS(hx, lx, x); + k = -16383; +#if 0 /* Hard to do efficiently. Don't do it until we support all modes. */ + if (x == 1) + RETURN1(rp, 0); /* log(1) = +0 in all rounding modes */ +#endif + if (hx == 0 || hx >= 0x8000) { /* zero, negative or subnormal? */ + if (((hx & 0x7fff) | lx) == 0) + RETURN1(rp, -1 / zero); /* log(+-0) = -Inf */ + if (hx != 0) + /* log(neg or [pseudo-]NaN) = qNaN: */ + RETURN1(rp, (x - x) / zero); + x *= 0x1.0p65; /* subnormal; scale up x */ + /* including pseudo-subnormals */ + EXTRACT_LDBL80_WORDS(hx, lx, x); + k = -16383 - 65; + } else if (hx >= 0x7fff || (lx & 0x8000000000000000ULL) == 0) + RETURN1(rp, x + x); /* log(Inf or NaN) = Inf or qNaN */ + /* log(pseudo-Inf) = qNaN */ + /* log(pseudo-NaN) = qNaN */ + /* log(unnormal) = qNaN */ +#ifndef STRUCT_RETURN + ENTERI(); +#endif + k += hx; + ix = lx & 0x7fffffffffffffffULL; + dk = k; + + /* Scale x to be in [1, 2). */ + SET_LDBL_EXPSIGN(x, 0x3fff); + + /* 0 <= i <= INTERVALS: */ +#define L2I (64 - LOG2_INTERVALS) + i = (ix + (1LL << (L2I - 2))) >> (L2I - 1); + + /* + * -0.005280 < d < 0.004838. In particular, the infinite- + * precision |d| is <= 2**-7. Rounding of G(i) to 8 bits + * ensures that d is representable without extra precision for + * this bound on |d| (since when this calculation is expressed + * as x*G(i)-1, the multiplication needs as many extra bits as + * G(i) has and the subtraction cancels 8 bits). But for + * most i (107 cases out of 129), the infinite-precision |d| + * is <= 2**-8. G(i) is rounded to 9 bits for such i to give + * better accuracy (this works by improving the bound on |d|, + * which in turn allows rounding to 9 bits in more cases). + * This is only important when the original x is near 1 -- it + * lets us avoid using a special method to give the desired + * accuracy for such x. + */ + if (0) + d = x * G(i) - 1; + else { +#ifdef USE_UTAB + d = (x - H(i)) * G(i) + E(i); +#else + long double x_hi, x_lo; + float fx_hi; + + /* + * Split x into x_hi + x_lo to calculate x*G(i)-1 exactly. + * G(i) has at most 9 bits, so the splitting point is not + * critical. + */ + SET_FLOAT_WORD(fx_hi, (lx >> 40) | 0x3f800000); + x_hi = fx_hi; + x_lo = x - x_hi; + d = x_hi * G(i) - 1 + x_lo * G(i); +#endif + } + + /* + * Our algorithm depends on exact cancellation of F_lo(i) and + * F_hi(i) with dk*ln_2_lo and dk*ln2_hi when k is -1 and i is + * at the end of the table. This and other technical complications + * make it difficult to avoid the double scaling in (dk*ln2) * + * log(base) for base != e without losing more accuracy and/or + * efficiency than is gained. + */ + z = d * d; + val_lo = z * d * z * (z * (d * P8 + P7) + (d * P6 + P5)) + + (F_lo(i) + dk * ln2_lo + z * d * (d * P4 + P3)) + z * P2; + val_hi = d; +#ifdef DEBUG + if (fetestexcept(FE_UNDERFLOW)) + breakpoint(); +#endif + + _3sumF(val_hi, val_lo, F_hi(i) + dk * ln2_hi); + RETURN2(rp, val_hi, val_lo); +} + +long double +log1pl(long double x) +{ + long double d, d_hi, d_lo, dk, f_lo, val_hi, val_lo, z; + long double f_hi, twopminusk; + uint64_t ix, lx; + int i, k; + int16_t ax, hx; + + DOPRINT_START(&x); + EXTRACT_LDBL80_WORDS(hx, lx, x); + if (hx < 0x3fff) { /* x < 1, or x neg NaN */ + ax = hx & 0x7fff; + if (ax >= 0x3fff) { /* x <= -1, or x neg NaN */ + if (ax == 0x3fff && lx == 0x8000000000000000ULL) + RETURNP(-1 / zero); /* log1p(-1) = -Inf */ + /* log1p(x < 1, or x [pseudo-]NaN) = qNaN: */ + RETURNP((x - x) / (x - x)); + } + if (ax <= 0x3fbe) { /* |x| < 2**-64 */ + if ((int)x == 0) + RETURNP(x); /* x with inexact if x != 0 */ + } + f_hi = 1; + f_lo = x; + } else if (hx >= 0x7fff) { /* x +Inf or non-neg NaN */ + RETURNP(x + x); /* log1p(Inf or NaN) = Inf or qNaN */ + /* log1p(pseudo-Inf) = qNaN */ + /* log1p(pseudo-NaN) = qNaN */ + /* log1p(unnormal) = qNaN */ + } else if (hx < 0x407f) { /* 1 <= x < 2**128 */ + f_hi = x; + f_lo = 1; + } else { /* 2**128 <= x < +Inf */ + f_hi = x; + f_lo = 0; /* avoid underflow of the P5 term */ + } + ENTERI(); + x = f_hi + f_lo; + f_lo = (f_hi - x) + f_lo; + + EXTRACT_LDBL80_WORDS(hx, lx, x); + k = -16383; + + k += hx; + ix = lx & 0x7fffffffffffffffULL; + dk = k; + + SET_LDBL_EXPSIGN(x, 0x3fff); + twopminusk = 1; + SET_LDBL_EXPSIGN(twopminusk, 0x7ffe - (hx & 0x7fff)); + f_lo *= twopminusk; + + i = (ix + (1LL << (L2I - 2))) >> (L2I - 1); + + /* + * x*G(i)-1 (with a reduced x) can be represented exactly, as + * above, but now we need to evaluate the polynomial on d = + * (x+f_lo)*G(i)-1 and extra precision is needed for that. + * Since x+x_lo is a hi+lo decomposition and subtracting 1 + * doesn't lose too many bits, an inexact calculation for + * f_lo*G(i) is good enough. + */ + if (0) + d_hi = x * G(i) - 1; + else { +#ifdef USE_UTAB + d_hi = (x - H(i)) * G(i) + E(i); +#else + long double x_hi, x_lo; + float fx_hi; + + SET_FLOAT_WORD(fx_hi, (lx >> 40) | 0x3f800000); + x_hi = fx_hi; + x_lo = x - x_hi; + d_hi = x_hi * G(i) - 1 + x_lo * G(i); +#endif + } + d_lo = f_lo * G(i); + + /* + * This is _2sumF(d_hi, d_lo) inlined. The condition + * (d_hi == 0 || |d_hi| >= |d_lo|) for using _2sumF() is not + * always satisifed, so it is not clear that this works, but + * it works in practice. It works even if it gives a wrong + * normalized d_lo, since |d_lo| > |d_hi| implies that i is + * nonzero and d is tiny, so the F(i) term dominates d_lo. + * In float precision: + * (By exhaustive testing, the worst case is d_hi = 0x1.bp-25. + * And if d is only a little tinier than that, we would have + * another underflow problem for the P3 term; this is also ruled + * out by exhaustive testing.) + */ + d = d_hi + d_lo; + d_lo = d_hi - d + d_lo; + d_hi = d; + + z = d * d; + val_lo = z * d * z * (z * (d * P8 + P7) + (d * P6 + P5)) + + (F_lo(i) + dk * ln2_lo + d_lo + z * d * (d * P4 + P3)) + z * P2; + val_hi = d_hi; +#ifdef DEBUG + if (fetestexcept(FE_UNDERFLOW)) + breakpoint(); +#endif + + _3sumF(val_hi, val_lo, F_hi(i) + dk * ln2_hi); + RETURN2PI(val_hi, val_lo); +} + +#ifdef STRUCT_RETURN + +long double +logl(long double x) +{ + struct ld r; + + ENTERI(); + DOPRINT_START(&x); + k_logl(x, &r); + RETURNSPI(&r); +} + +static const double +invln10_hi = 4.3429448190317999e-1, /* 0x1bcb7b1526e000.0p-54 */ +invln10_lo = 7.1842412889749798e-14, /* 0x1438ca9aadd558.0p-96 */ +invln2_hi = 1.4426950408887933e0, /* 0x171547652b8000.0p-52 */ +invln2_lo = 1.7010652264631490e-13; /* 0x17f0bbbe87fed0.0p-95 */ + +long double +log10l(long double x) +{ + struct ld r; + long double hi, lo; + + ENTERI(); + DOPRINT_START(&x); + k_logl(x, &r); + if (!r.lo_set) + RETURNPI(r.hi); + _2sumF(r.hi, r.lo); + hi = (float)r.hi; + lo = r.lo + (r.hi - hi); + RETURN2PI(invln10_hi * hi, + (invln10_lo + invln10_hi) * lo + invln10_lo * hi); +} + +long double +log2l(long double x) +{ + struct ld r; + long double hi, lo; + + ENTERI(); + DOPRINT_START(&x); + k_logl(x, &r); + if (!r.lo_set) + RETURNPI(r.hi); + _2sumF(r.hi, r.lo); + hi = (float)r.hi; + lo = r.lo + (r.hi - hi); + RETURN2PI(invln2_hi * hi, + (invln2_lo + invln2_hi) * lo + invln2_lo * hi); +} + +#endif /* STRUCT_RETURN */ diff --git a/lib/msun/man/acosh.3 b/lib/msun/man/acosh.3 index cd04bb7..b32c2ea 100644 --- a/lib/msun/man/acosh.3 +++ b/lib/msun/man/acosh.3 @@ -28,12 +28,13 @@ .\" from: @(#)acosh.3 5.2 (Berkeley) 5/6/91 .\" $FreeBSD$ .\" -.Dd January 14, 2005 +.Dd June 9, 2013 .Dt ACOSH 3 .Os .Sh NAME .Nm acosh , -.Nm acoshf +.Nm acoshf , +.Nm acoshl .Nd inverse hyperbolic cosine functions .Sh LIBRARY .Lb libm @@ -43,11 +44,14 @@ .Fn acosh "double x" .Ft float .Fn acoshf "float x" +.Ft long double +.Fn acoshl "long double x" .Sh DESCRIPTION The -.Fn acosh -and the -.Fn acoshf +.Fn acosh , +.Fn acoshf , +and +.Fn acoshl functions compute the inverse hyperbolic cosine of the real argument @@ -55,11 +59,7 @@ argument For a discussion of error due to roundoff, see .Xr math 3 . .Sh RETURN VALUES -The -.Fn acosh -and the -.Fn acoshf -functions +These functions return the inverse hyperbolic cosine of .Ar x . If the argument is less than 1, @@ -73,6 +73,13 @@ raises an invalid exception and returns an \*(Na. .Xr math 3 .Sh HISTORY The -.Fn acosh -function appeared in -.Bx 4.3 . +.Fn acosh , +.Fn acoshf , +and +.Fn acoshl +functions appeared in +.Bx 4.3 , +.Fx 2.0 , +and +.Fx 10.0 , +respectively. diff --git a/lib/msun/man/asinh.3 b/lib/msun/man/asinh.3 index 6dba217..bab8da6 100644 --- a/lib/msun/man/asinh.3 +++ b/lib/msun/man/asinh.3 @@ -28,12 +28,13 @@ .\" from: @(#)asinh.3 6.4 (Berkeley) 5/6/91 .\" $FreeBSD$ .\" -.Dd May 6, 1991 +.Dd June 9, 2013 .Dt ASINH 3 .Os .Sh NAME .Nm asinh , -.Nm asinhf +.Nm asinhf , +.Nm asinhl .Nd inverse hyperbolic sine functions .Sh LIBRARY .Lb libm @@ -43,11 +44,14 @@ .Fn asinh "double x" .Ft float .Fn asinhf "float x" +.Ft long double +.Fn asinhl "long double x" .Sh DESCRIPTION The -.Fn asinh -and the -.Fn asinhf +.Fn asinh , +.Fn asinhf , +and +.Fn asinhl functions compute the inverse hyperbolic sine of the real argument @@ -55,11 +59,7 @@ argument For a discussion of error due to roundoff, see .Xr math 3 . .Sh RETURN VALUES -The -.Fn asinh -and the -.Fn asinhf -functions +These functions return the inverse hyperbolic sine of .Ar x . .Sh SEE ALSO @@ -69,6 +69,13 @@ return the inverse hyperbolic sine of .Xr math 3 .Sh HISTORY The -.Fn asinh -function appeared in -.Bx 4.3 . +.Fn asinh , +.Fn asinhf , +and +.Fn asinhl +functions appeared in +.Bx 4.3 , +.Fx 2.0 , +and +.Fx 10.0 , +respectively. diff --git a/lib/msun/man/atanh.3 b/lib/msun/man/atanh.3 index 0638a1f..7763c46 100644 --- a/lib/msun/man/atanh.3 +++ b/lib/msun/man/atanh.3 @@ -28,12 +28,13 @@ .\" from: @(#)atanh.3 5.2 (Berkeley) 5/6/91 .\" $FreeBSD$ .\" -.Dd January 14, 2005 +.Dd June 9, 2013 .Dt ATANH 3 .Os .Sh NAME .Nm atanh , -.Nm atanhf +.Nm atanhf , +.Nm atanhl .Nd inverse hyperbolic tangent functions .Sh LIBRARY .Lb libm @@ -43,11 +44,14 @@ .Fn atanh "double x" .Ft float .Fn atanhf "float x" +.Ft long double +.Fn atanhl "long double x" .Sh DESCRIPTION The -.Fn atanh -and the -.Fn atanhf +.Fn atanh , +.Fn atanhf , +and +.Fn atanhl functions compute the inverse hyperbolic tangent of the real argument @@ -55,11 +59,7 @@ argument For a discussion of error due to roundoff, see .Xr math 3 . .Sh RETURN VALUES -The -.Fn atanh -and the -.Fn atanhf -functions +These functions return the inverse hyperbolic tangent of .Ar x if successful. @@ -76,6 +76,13 @@ If .Xr math 3 .Sh HISTORY The -.Fn atanh -function appeared in -.Bx 4.3 . +.Fn atanh , +.Fn atanhf , +and +.Fn atanhl +functions appeared in +.Bx 4.3 , +.Fx 2.0 , +and +.Fx 10.0 , +respectively. diff --git a/lib/msun/man/cacos.3 b/lib/msun/man/cacos.3 new file mode 100644 index 0000000..0bf3f0f --- /dev/null +++ b/lib/msun/man/cacos.3 @@ -0,0 +1,128 @@ +.\" Copyright (c) 2013 David Schultz <das@FreeBSD.org> +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd May 27, 2013 +.Dt CACOS 3 +.Os +.Sh NAME +.Nm cacos , +.Nm cacosf , +.Nm cacosh , +.Nm cacoshf , +.Nm casin , +.Nm casinf +.Nm casinh , +.Nm casinhf +.Nm catan , +.Nm catanf +.Nm catanh , +.Nm catanhf +.Nd complex arc trigonometric and hyperbolic functions +.Sh LIBRARY +.Lb libm +.Sh SYNOPSIS +.In complex.h +.Ft double complex +.Fn cacos "double complex z" +.Ft float complex +.Fn cacosf "float complex z" +.Ft double complex +.Fn cacosh "double complex z" +.Ft float complex +.Fn cacoshf "float complex z" +.Ft double complex +.Fn casin "double complex z" +.Ft float complex +.Fn casinf "float complex z" +.Ft double complex +.Fn casinh "double complex z" +.Ft float complex +.Fn casinhf "float complex z" +.Ft double complex +.Fn catan "double complex z" +.Ft float complex +.Fn catanf "float complex z" +.Ft double complex +.Fn catanh "double complex z" +.Ft float complex +.Fn catanhf "float complex z" +.Sh DESCRIPTION +The +.Fn cacos , +.Fn casin , +and +.Fn catan +functions compute the principal value of the inverse cosine, sine, +and tangent of the complex number +.Fa z , +respectively. +The +.Fn cacosh , +.Fn casinh , +and +.Fn catanh +functions compute the principal value of the inverse hyperbolic +cosine, sine, and tangent. +The +.Fn cacosf , +.Fn casinf , +.Fn catanf +.Fn cacoshf , +.Fn casinhf , +and +.Fn catanhf +functions perform the same operations in +.Fa float +precision. +.Pp +.ie '\*[.T]'utf8' +. ds Un \[cu] +.el +. ds Un U +. +There is no universal convention for defining the principal values of +these functions. The following table gives the branch cuts, and the +corresponding ranges for the return values, adopted by the C language. +.Bl -column ".Sy Function" ".Sy (-\*(If*I, -I) \*(Un (I, \*(If*I)" ".Sy [-\*(Pi/2*I, \*(Pi/2*I]" +.It Sy Function Ta Sy Branch Cut(s) Ta Sy Range +.It cacos Ta (-\*(If, -1) \*(Un (1, \*(If) Ta [0, \*(Pi] +.It casin Ta (-\*(If, -1) \*(Un (1, \*(If) Ta [-\*(Pi/2, \*(Pi/2] +.It catan Ta (-\*(If*I, -i) \*(Un (I, \*(If*I) Ta [-\*(Pi/2, \*(Pi/2] +.It cacosh Ta (-\*(If, 1) Ta [-\*(Pi*I, \*(Pi*I] +.It casinh Ta (-\*(If*I, -i) \*(Un (I, \*(If*I) Ta [-\*(Pi/2*I, \*(Pi/2*I] +.It catanh Ta (-\*(If, -1) \*(Un (1, \*(If) Ta [-\*(Pi/2*I, \*(Pi/2*I] +.El +.Sh SEE ALSO +.Xr ccos 3 , +.Xr ccosh 3 , +.Xr complex 3 , +.Xr cos 3 , +.Xr math 3 , +.Xr sin 3 , +.Xr tan 3 +.Sh STANDARDS +These functions conform to +.St -isoC-99 . diff --git a/lib/msun/man/ccos.3 b/lib/msun/man/ccos.3 index cf708c1..c07205e 100644 --- a/lib/msun/man/ccos.3 +++ b/lib/msun/man/ccos.3 @@ -69,6 +69,7 @@ functions perform the same operations in .Fa float precision. .Sh SEE ALSO +.Xr cacos 3 , .Xr ccosh 3 , .Xr complex 3 , .Xr cos 3 , diff --git a/lib/msun/man/ccosh.3 b/lib/msun/man/ccosh.3 index 01688b5..f006442 100644 --- a/lib/msun/man/ccosh.3 +++ b/lib/msun/man/ccosh.3 @@ -69,6 +69,7 @@ functions perform the same operations in .Fa float precision. .Sh SEE ALSO +.Xr cacosh 3 , .Xr ccos 3 , .Xr complex 3 , .Xr cosh 3 , diff --git a/lib/msun/man/complex.3 b/lib/msun/man/complex.3 index 4c4dd68..34eb03e 100644 --- a/lib/msun/man/complex.3 +++ b/lib/msun/man/complex.3 @@ -89,6 +89,12 @@ creal compute the real part .\" Section 7.3.5-6 of ISO C99 standard .Ss Trigonometric and Hyperbolic Functions .Cl +cacos arc cosine +cacosh arc hyperbolic cosine +casin arc sine +casinh arc hyperbolic sine +catan arc tangent +catanh arc hyperbolic tangent ccos cosine ccosh hyperbolic cosine csin sine @@ -111,20 +117,8 @@ The functions described here conform to .St -isoC-99 . .Sh BUGS -The inverse trigonometric and hyperbolic functions -.Fn cacos , -.Fn cacosh , -.Fn casin , -.Fn casinh , -.Fn catan , -and -.Fn catanh -are not implemented. -.Pp The logarithmic functions .Fn clog -are not implemented. -.Pp -The power functions +and the power functions .Fn cpow are not implemented. diff --git a/lib/msun/man/exp.3 b/lib/msun/man/exp.3 index 5907337..89a2dc5 100644 --- a/lib/msun/man/exp.3 +++ b/lib/msun/man/exp.3 @@ -28,7 +28,7 @@ .\" from: @(#)exp.3 6.12 (Berkeley) 7/31/91 .\" $FreeBSD$ .\" -.Dd July 10, 2012 +.Dd June 3, 2013 .Dt EXP 3 .Os .Sh NAME @@ -41,6 +41,7 @@ .Nm exp2l , .Nm expm1 , .Nm expm1f , +.Nm expm1l , .Nm pow , .Nm powf .Nd exponential and power functions @@ -64,6 +65,8 @@ .Fn expm1 "double x" .Ft float .Fn expm1f "float x" +.Ft long double +.Fn expm1l "long double x" .Ft double .Fn pow "double x" "double y" .Ft float @@ -88,9 +91,10 @@ functions compute the base 2 exponential of the given argument .Fa x . .Pp The -.Fn expm1 +.Fn expm1 , +.Fn expm1f , and the -.Fn expm1f +.Fn expm1l functions compute the value exp(x)\-1 accurately even for tiny argument .Fa x . .Pp diff --git a/lib/msun/man/log.3 b/lib/msun/man/log.3 index b9fd83c..b08e692 100644 --- a/lib/msun/man/log.3 +++ b/lib/msun/man/log.3 @@ -24,7 +24,7 @@ .\" .\" $FreeBSD$ .\" -.Dd December 5, 2010 +.Dd June 3, 2013 .Dt LOG 3 .Os .Sh NAME @@ -33,10 +33,13 @@ .Nm logl , .Nm log10 , .Nm log10f , +.Nm log10l , .Nm log2 , .Nm log2f , +.Nm log2l , .Nm log1p , -.Nm log1pf +.Nm log1pf , +.Nm log1pl .Nd logarithm functions .Sh LIBRARY .Lb libm @@ -46,43 +49,55 @@ .Fn log "double x" .Ft float .Fn logf "float x" +.Ft long double +.Fn logl "long double x" .Ft double .Fn log10 "double x" .Ft float .Fn log10f "float x" +.Ft long double +.Fn log10l "long double x" .Ft double .Fn log2 "double x" .Ft float .Fn log2f "float x" +.Ft long double +.Fn log2l "long double x" .Ft double .Fn log1p "double x" .Ft float .Fn log1pf "float x" +.Ft long double +.Fn log1pl "long double x" .Sh DESCRIPTION The -.Fn log +.Fn log , +.Fn logf , and -.Fn logf +.Fn logl functions compute the natural logarithm of .Fa x . .Pp The -.Fn log10 +.Fn log10 , +.Fn log10f , and -.Fn log10f +.Fn log10l functions compute the logarithm base 10 of .Fa x , while -.Fn log2 +.Fn log2 , +.Fn log2f , and -.Fn log2f +.Fn log2l compute the logarithm base 2 of .Fa x . .Pp The -.Fn log1p +.Fn log1p , +.Fn log1pf , and -.Fn log1pf +.Fn log1pl functions compute the natural logarithm of .No "1 + x" . Computing the natural logarithm as @@ -107,12 +122,16 @@ results in an invalid exception and a return value of \*(Na. The .Fn log , .Fn logf , +.Fn logl , .Fn log10 , .Fn log10f , +.Fn log10l , .Fn log2 , .Fn log2f , +.Fn log2l , .Fn log1p , +.Fn log1pf , and -.Fn log1pf +.Fn log1pl functions conform to .St -isoC-99 . diff --git a/lib/msun/src/catrig.c b/lib/msun/src/catrig.c new file mode 100644 index 0000000..200977c --- /dev/null +++ b/lib/msun/src/catrig.c @@ -0,0 +1,639 @@ +/*- + * Copyright (c) 2012 Stephen Montgomery-Smith <stephen@FreeBSD.ORG> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <complex.h> +#include <float.h> + +#include "math.h" +#include "math_private.h" + +#undef isinf +#define isinf(x) (fabs(x) == INFINITY) +#undef isnan +#define isnan(x) ((x) != (x)) +#define raise_inexact() do { volatile float junk = 1 + tiny; } while(0) +#undef signbit +#define signbit(x) (__builtin_signbit(x)) + +/* We need that DBL_EPSILON^2/128 is larger than FOUR_SQRT_MIN. */ +static const double +A_crossover = 10, /* Hull et al suggest 1.5, but 10 works better */ +B_crossover = 0.6417, /* suggested by Hull et al */ +FOUR_SQRT_MIN = 0x1p-509, /* >= 4 * sqrt(DBL_MIN) */ +QUARTER_SQRT_MAX = 0x1p509, /* <= sqrt(DBL_MAX) / 4 */ +m_e = 2.7182818284590452e0, /* 0x15bf0a8b145769.0p-51 */ +m_ln2 = 6.9314718055994531e-1, /* 0x162e42fefa39ef.0p-53 */ +pio2_hi = 1.5707963267948966e0, /* 0x1921fb54442d18.0p-52 */ +RECIP_EPSILON = 1 / DBL_EPSILON, +SQRT_3_EPSILON = 2.5809568279517849e-8, /* 0x1bb67ae8584caa.0p-78 */ +SQRT_6_EPSILON = 3.6500241499888571e-8, /* 0x13988e1409212e.0p-77 */ +SQRT_MIN = 0x1p-511; /* >= sqrt(DBL_MIN) */ + +static const volatile double +pio2_lo = 6.1232339957367659e-17; /* 0x11a62633145c07.0p-106 */ +static const volatile float +tiny = 0x1p-100; + +static double complex clog_for_large_values(double complex z); + +/* + * Testing indicates that all these functions are accurate up to 4 ULP. + * The functions casin(h) and cacos(h) are about 2.5 times slower than asinh. + * The functions catan(h) are a little under 2 times slower than atanh. + * + * The code for casinh, casin, cacos, and cacosh comes first. The code is + * rather complicated, and the four functions are highly interdependent. + * + * The code for catanh and catan comes at the end. It is much simpler than + * the other functions, and the code for these can be disconnected from the + * rest of the code. + */ + +/* + * ================================ + * | casinh, casin, cacos, cacosh | + * ================================ + */ + +/* + * The algorithm is very close to that in "Implementing the complex arcsine + * and arccosine functions using exception handling" by T. E. Hull, Thomas F. + * Fairgrieve, and Ping Tak Peter Tang, published in ACM Transactions on + * Mathematical Software, Volume 23 Issue 3, 1997, Pages 299-335, + * http://dl.acm.org/citation.cfm?id=275324. + * + * Throughout we use the convention z = x + I*y. + * + * casinh(z) = sign(x)*log(A+sqrt(A*A-1)) + I*asin(B) + * where + * A = (|z+I| + |z-I|) / 2 + * B = (|z+I| - |z-I|) / 2 = y/A + * + * These formulas become numerically unstable: + * (a) for Re(casinh(z)) when z is close to the line segment [-I, I] (that + * is, Re(casinh(z)) is close to 0); + * (b) for Im(casinh(z)) when z is close to either of the intervals + * [I, I*infinity) or (-I*infinity, -I] (that is, |Im(casinh(z))| is + * close to PI/2). + * + * These numerical problems are overcome by defining + * f(a, b) = (hypot(a, b) - b) / 2 = a*a / (hypot(a, b) + b) / 2 + * Then if A < A_crossover, we use + * log(A + sqrt(A*A-1)) = log1p((A-1) + sqrt((A-1)*(A+1))) + * A-1 = f(x, 1+y) + f(x, 1-y) + * and if B > B_crossover, we use + * asin(B) = atan2(y, sqrt(A*A - y*y)) = atan2(y, sqrt((A+y)*(A-y))) + * A-y = f(x, y+1) + f(x, y-1) + * where without loss of generality we have assumed that x and y are + * non-negative. + * + * Much of the difficulty comes because the intermediate computations may + * produce overflows or underflows. This is dealt with in the paper by Hull + * et al by using exception handling. We do this by detecting when + * computations risk underflow or overflow. The hardest part is handling the + * underflows when computing f(a, b). + * + * Note that the function f(a, b) does not appear explicitly in the paper by + * Hull et al, but the idea may be found on pages 308 and 309. Introducing the + * function f(a, b) allows us to concentrate many of the clever tricks in this + * paper into one function. + */ + +/* + * Function f(a, b, hypot_a_b) = (hypot(a, b) - b) / 2. + * Pass hypot(a, b) as the third argument. + */ +static inline double +f(double a, double b, double hypot_a_b) +{ + if (b < 0) + return ((hypot_a_b - b) / 2); + if (b == 0) + return (a / 2); + return (a * a / (hypot_a_b + b) / 2); +} + +/* + * All the hard work is contained in this function. + * x and y are assumed positive or zero, and less than RECIP_EPSILON. + * Upon return: + * rx = Re(casinh(z)) = -Im(cacos(y + I*x)). + * B_is_usable is set to 1 if the value of B is usable. + * If B_is_usable is set to 0, sqrt_A2my2 = sqrt(A*A - y*y), and new_y = y. + * If returning sqrt_A2my2 has potential to result in an underflow, it is + * rescaled, and new_y is similarly rescaled. + */ +static inline void +do_hard_work(double x, double y, double *rx, int *B_is_usable, double *B, + double *sqrt_A2my2, double *new_y) +{ + double R, S, A; /* A, B, R, and S are as in Hull et al. */ + double Am1, Amy; /* A-1, A-y. */ + + R = hypot(x, y + 1); /* |z+I| */ + S = hypot(x, y - 1); /* |z-I| */ + + /* A = (|z+I| + |z-I|) / 2 */ + A = (R + S) / 2; + /* + * Mathematically A >= 1. There is a small chance that this will not + * be so because of rounding errors. So we will make certain it is + * so. + */ + if (A < 1) + A = 1; + + if (A < A_crossover) { + /* + * Am1 = fp + fm, where fp = f(x, 1+y), and fm = f(x, 1-y). + * rx = log1p(Am1 + sqrt(Am1*(A+1))) + */ + if (y == 1 && x < DBL_EPSILON * DBL_EPSILON / 128) { + /* + * fp is of order x^2, and fm = x/2. + * A = 1 (inexactly). + */ + *rx = sqrt(x); + } else if (x >= DBL_EPSILON * fabs(y - 1)) { + /* + * Underflow will not occur because + * x >= DBL_EPSILON^2/128 >= FOUR_SQRT_MIN + */ + Am1 = f(x, 1 + y, R) + f(x, 1 - y, S); + *rx = log1p(Am1 + sqrt(Am1 * (A + 1))); + } else if (y < 1) { + /* + * fp = x*x/(1+y)/4, fm = x*x/(1-y)/4, and + * A = 1 (inexactly). + */ + *rx = x / sqrt((1 - y) * (1 + y)); + } else { /* if (y > 1) */ + /* + * A-1 = y-1 (inexactly). + */ + *rx = log1p((y - 1) + sqrt((y - 1) * (y + 1))); + } + } else { + *rx = log(A + sqrt(A * A - 1)); + } + + *new_y = y; + + if (y < FOUR_SQRT_MIN) { + /* + * Avoid a possible underflow caused by y/A. For casinh this + * would be legitimate, but will be picked up by invoking atan2 + * later on. For cacos this would not be legitimate. + */ + *B_is_usable = 0; + *sqrt_A2my2 = A * (2 / DBL_EPSILON); + *new_y = y * (2 / DBL_EPSILON); + return; + } + + /* B = (|z+I| - |z-I|) / 2 = y/A */ + *B = y / A; + *B_is_usable = 1; + + if (*B > B_crossover) { + *B_is_usable = 0; + /* + * Amy = fp + fm, where fp = f(x, y+1), and fm = f(x, y-1). + * sqrt_A2my2 = sqrt(Amy*(A+y)) + */ + if (y == 1 && x < DBL_EPSILON / 128) { + /* + * fp is of order x^2, and fm = x/2. + * A = 1 (inexactly). + */ + *sqrt_A2my2 = sqrt(x) * sqrt((A + y) / 2); + } else if (x >= DBL_EPSILON * fabs(y - 1)) { + /* + * Underflow will not occur because + * x >= DBL_EPSILON/128 >= FOUR_SQRT_MIN + * and + * x >= DBL_EPSILON^2 >= FOUR_SQRT_MIN + */ + Amy = f(x, y + 1, R) + f(x, y - 1, S); + *sqrt_A2my2 = sqrt(Amy * (A + y)); + } else if (y > 1) { + /* + * fp = x*x/(y+1)/4, fm = x*x/(y-1)/4, and + * A = y (inexactly). + * + * y < RECIP_EPSILON. So the following + * scaling should avoid any underflow problems. + */ + *sqrt_A2my2 = x * (4 / DBL_EPSILON / DBL_EPSILON) * y / + sqrt((y + 1) * (y - 1)); + *new_y = y * (4 / DBL_EPSILON / DBL_EPSILON); + } else { /* if (y < 1) */ + /* + * fm = 1-y >= DBL_EPSILON, fp is of order x^2, and + * A = 1 (inexactly). + */ + *sqrt_A2my2 = sqrt((1 - y) * (1 + y)); + } + } +} + +/* + * casinh(z) = z + O(z^3) as z -> 0 + * + * casinh(z) = sign(x)*clog(sign(x)*z) + O(1/z^2) as z -> infinity + * The above formula works for the imaginary part as well, because + * Im(casinh(z)) = sign(x)*atan2(sign(x)*y, fabs(x)) + O(y/z^3) + * as z -> infinity, uniformly in y + */ +double complex +casinh(double complex z) +{ + double x, y, ax, ay, rx, ry, B, sqrt_A2my2, new_y; + int B_is_usable; + double complex w; + + x = creal(z); + y = cimag(z); + ax = fabs(x); + ay = fabs(y); + + if (isnan(x) || isnan(y)) { + /* casinh(+-Inf + I*NaN) = +-Inf + I*NaN */ + if (isinf(x)) + return (cpack(x, y + y)); + /* casinh(NaN + I*+-Inf) = opt(+-)Inf + I*NaN */ + if (isinf(y)) + return (cpack(y, x + x)); + /* casinh(NaN + I*0) = NaN + I*0 */ + if (y == 0) + return (cpack(x + x, y)); + /* + * All other cases involving NaN return NaN + I*NaN. + * C99 leaves it optional whether to raise invalid if one of + * the arguments is not NaN, so we opt not to raise it. + */ + return (cpack(x + 0.0L + (y + 0), x + 0.0L + (y + 0))); + } + + if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) { + /* clog...() will raise inexact unless x or y is infinite. */ + if (signbit(x) == 0) + w = clog_for_large_values(z) + m_ln2; + else + w = clog_for_large_values(-z) + m_ln2; + return (cpack(copysign(creal(w), x), copysign(cimag(w), y))); + } + + /* Avoid spuriously raising inexact for z = 0. */ + if (x == 0 && y == 0) + return (z); + + /* All remaining cases are inexact. */ + raise_inexact(); + + if (ax < SQRT_6_EPSILON / 4 && ay < SQRT_6_EPSILON / 4) + return (z); + + do_hard_work(ax, ay, &rx, &B_is_usable, &B, &sqrt_A2my2, &new_y); + if (B_is_usable) + ry = asin(B); + else + ry = atan2(new_y, sqrt_A2my2); + return (cpack(copysign(rx, x), copysign(ry, y))); +} + +/* + * casin(z) = reverse(casinh(reverse(z))) + * where reverse(x + I*y) = y + I*x = I*conj(z). + */ +double complex +casin(double complex z) +{ + double complex w = casinh(cpack(cimag(z), creal(z))); + + return (cpack(cimag(w), creal(w))); +} + +/* + * cacos(z) = PI/2 - casin(z) + * but do the computation carefully so cacos(z) is accurate when z is + * close to 1. + * + * cacos(z) = PI/2 - z + O(z^3) as z -> 0 + * + * cacos(z) = -sign(y)*I*clog(z) + O(1/z^2) as z -> infinity + * The above formula works for the real part as well, because + * Re(cacos(z)) = atan2(fabs(y), x) + O(y/z^3) + * as z -> infinity, uniformly in y + */ +double complex +cacos(double complex z) +{ + double x, y, ax, ay, rx, ry, B, sqrt_A2mx2, new_x; + int sx, sy; + int B_is_usable; + double complex w; + + x = creal(z); + y = cimag(z); + sx = signbit(x); + sy = signbit(y); + ax = fabs(x); + ay = fabs(y); + + if (isnan(x) || isnan(y)) { + /* cacos(+-Inf + I*NaN) = NaN + I*opt(-)Inf */ + if (isinf(x)) + return (cpack(y + y, -INFINITY)); + /* cacos(NaN + I*+-Inf) = NaN + I*-+Inf */ + if (isinf(y)) + return (cpack(x + x, -y)); + /* cacos(0 + I*NaN) = PI/2 + I*NaN with inexact */ + if (x == 0) + return (cpack(pio2_hi + pio2_lo, y + y)); + /* + * All other cases involving NaN return NaN + I*NaN. + * C99 leaves it optional whether to raise invalid if one of + * the arguments is not NaN, so we opt not to raise it. + */ + return (cpack(x + 0.0L + (y + 0), x + 0.0L + (y + 0))); + } + + if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) { + /* clog...() will raise inexact unless x or y is infinite. */ + w = clog_for_large_values(z); + rx = fabs(cimag(w)); + ry = creal(w) + m_ln2; + if (sy == 0) + ry = -ry; + return (cpack(rx, ry)); + } + + /* Avoid spuriously raising inexact for z = 1. */ + if (x == 1 && y == 0) + return (cpack(0, -y)); + + /* All remaining cases are inexact. */ + raise_inexact(); + + if (ax < SQRT_6_EPSILON / 4 && ay < SQRT_6_EPSILON / 4) + return (cpack(pio2_hi - (x - pio2_lo), -y)); + + do_hard_work(ay, ax, &ry, &B_is_usable, &B, &sqrt_A2mx2, &new_x); + if (B_is_usable) { + if (sx == 0) + rx = acos(B); + else + rx = acos(-B); + } else { + if (sx == 0) + rx = atan2(sqrt_A2mx2, new_x); + else + rx = atan2(sqrt_A2mx2, -new_x); + } + if (sy == 0) + ry = -ry; + return (cpack(rx, ry)); +} + +/* + * cacosh(z) = I*cacos(z) or -I*cacos(z) + * where the sign is chosen so Re(cacosh(z)) >= 0. + */ +double complex +cacosh(double complex z) +{ + double complex w; + double rx, ry; + + w = cacos(z); + rx = creal(w); + ry = cimag(w); + /* cacosh(NaN + I*NaN) = NaN + I*NaN */ + if (isnan(rx) && isnan(ry)) + return (cpack(ry, rx)); + /* cacosh(NaN + I*+-Inf) = +Inf + I*NaN */ + /* cacosh(+-Inf + I*NaN) = +Inf + I*NaN */ + if (isnan(rx)) + return (cpack(fabs(ry), rx)); + /* cacosh(0 + I*NaN) = NaN + I*NaN */ + if (isnan(ry)) + return (cpack(ry, ry)); + return (cpack(fabs(ry), copysign(rx, cimag(z)))); +} + +/* + * Optimized version of clog() for |z| finite and larger than ~RECIP_EPSILON. + */ +static double complex +clog_for_large_values(double complex z) +{ + double x, y; + double ax, ay, t; + + x = creal(z); + y = cimag(z); + ax = fabs(x); + ay = fabs(y); + if (ax < ay) { + t = ax; + ax = ay; + ay = t; + } + + /* + * Avoid overflow in hypot() when x and y are both very large. + * Divide x and y by E, and then add 1 to the logarithm. This depends + * on E being larger than sqrt(2). + * Dividing by E causes an insignificant loss of accuracy; however + * this method is still poor since it is uneccessarily slow. + */ + if (ax > DBL_MAX / 2) + return (cpack(log(hypot(x / m_e, y / m_e)) + 1, atan2(y, x))); + + /* + * Avoid overflow when x or y is large. Avoid underflow when x or + * y is small. + */ + if (ax > QUARTER_SQRT_MAX || ay < SQRT_MIN) + return (cpack(log(hypot(x, y)), atan2(y, x))); + + return (cpack(log(ax * ax + ay * ay) / 2, atan2(y, x))); +} + +/* + * ================= + * | catanh, catan | + * ================= + */ + +/* + * sum_squares(x,y) = x*x + y*y (or just x*x if y*y would underflow). + * Assumes x*x and y*y will not overflow. + * Assumes x and y are finite. + * Assumes y is non-negative. + * Assumes fabs(x) >= DBL_EPSILON. + */ +static inline double +sum_squares(double x, double y) +{ + + /* Avoid underflow when y is small. */ + if (y < SQRT_MIN) + return (x * x); + + return (x * x + y * y); +} + +/* + * real_part_reciprocal(x, y) = Re(1/(x+I*y)) = x/(x*x + y*y). + * Assumes x and y are not NaN, and one of x and y is larger than + * RECIP_EPSILON. We avoid unwarranted underflow. It is important to not use + * the code creal(1/z), because the imaginary part may produce an unwanted + * underflow. + * This is only called in a context where inexact is always raised before + * the call, so no effort is made to avoid or force inexact. + */ +static inline double +real_part_reciprocal(double x, double y) +{ + double scale; + uint32_t hx, hy; + int32_t ix, iy; + + /* + * This code is inspired by the C99 document n1124.pdf, Section G.5.1, + * example 2. + */ + GET_HIGH_WORD(hx, x); + ix = hx & 0x7ff00000; + GET_HIGH_WORD(hy, y); + iy = hy & 0x7ff00000; +#define BIAS (DBL_MAX_EXP - 1) +/* XXX more guard digits are useful iff there is extra precision. */ +#define CUTOFF (DBL_MANT_DIG / 2 + 1) /* just half or 1 guard digit */ + if (ix - iy >= CUTOFF << 20 || isinf(x)) + return (1 / x); /* +-Inf -> +-0 is special */ + if (iy - ix >= CUTOFF << 20) + return (x / y / y); /* should avoid double div, but hard */ + if (ix <= (BIAS + DBL_MAX_EXP / 2 - CUTOFF) << 20) + return (x / (x * x + y * y)); + scale = 1; + SET_HIGH_WORD(scale, 0x7ff00000 - ix); /* 2**(1-ilogb(x)) */ + x *= scale; + y *= scale; + return (x / (x * x + y * y) * scale); +} + +/* + * catanh(z) = log((1+z)/(1-z)) / 2 + * = log1p(4*x / |z-1|^2) / 4 + * + I * atan2(2*y, (1-x)*(1+x)-y*y) / 2 + * + * catanh(z) = z + O(z^3) as z -> 0 + * + * catanh(z) = 1/z + sign(y)*I*PI/2 + O(1/z^3) as z -> infinity + * The above formula works for the real part as well, because + * Re(catanh(z)) = x/|z|^2 + O(x/z^4) + * as z -> infinity, uniformly in x + */ +double complex +catanh(double complex z) +{ + double x, y, ax, ay, rx, ry; + + x = creal(z); + y = cimag(z); + ax = fabs(x); + ay = fabs(y); + + /* This helps handle many cases. */ + if (y == 0 && ax <= 1) + return (cpack(atanh(x), y)); + + /* To ensure the same accuracy as atan(), and to filter out z = 0. */ + if (x == 0) + return (cpack(x, atan(y))); + + if (isnan(x) || isnan(y)) { + /* catanh(+-Inf + I*NaN) = +-0 + I*NaN */ + if (isinf(x)) + return (cpack(copysign(0, x), y + y)); + /* catanh(NaN + I*+-Inf) = sign(NaN)0 + I*+-PI/2 */ + if (isinf(y)) + return (cpack(copysign(0, x), + copysign(pio2_hi + pio2_lo, y))); + /* + * All other cases involving NaN return NaN + I*NaN. + * C99 leaves it optional whether to raise invalid if one of + * the arguments is not NaN, so we opt not to raise it. + */ + return (cpack(x + 0.0L + (y + 0), x + 0.0L + (y + 0))); + } + + if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) + return (cpack(real_part_reciprocal(x, y), + copysign(pio2_hi + pio2_lo, y))); + + if (ax < SQRT_3_EPSILON / 2 && ay < SQRT_3_EPSILON / 2) { + /* + * z = 0 was filtered out above. All other cases must raise + * inexact, but this is the only only that needs to do it + * explicitly. + */ + raise_inexact(); + return (z); + } + + if (ax == 1 && ay < DBL_EPSILON) + rx = (m_ln2 - log(ay)) / 2; + else + rx = log1p(4 * ax / sum_squares(ax - 1, ay)) / 4; + + if (ax == 1) + ry = atan2(2, -ay) / 2; + else if (ay < DBL_EPSILON) + ry = atan2(2 * ay, (1 - ax) * (1 + ax)) / 2; + else + ry = atan2(2 * ay, (1 - ax) * (1 + ax) - ay * ay) / 2; + + return (cpack(copysign(rx, x), copysign(ry, y))); +} + +/* + * catan(z) = reverse(catanh(reverse(z))) + * where reverse(x + I*y) = y + I*x = I*conj(z). + */ +double complex +catan(double complex z) +{ + double complex w = catanh(cpack(cimag(z), creal(z))); + + return (cpack(cimag(w), creal(w))); +} diff --git a/lib/msun/src/catrigf.c b/lib/msun/src/catrigf.c new file mode 100644 index 0000000..08ebef7 --- /dev/null +++ b/lib/msun/src/catrigf.c @@ -0,0 +1,393 @@ +/*- + * Copyright (c) 2012 Stephen Montgomery-Smith <stephen@FreeBSD.ORG> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * The algorithm is very close to that in "Implementing the complex arcsine + * and arccosine functions using exception handling" by T. E. Hull, Thomas F. + * Fairgrieve, and Ping Tak Peter Tang, published in ACM Transactions on + * Mathematical Software, Volume 23 Issue 3, 1997, Pages 299-335, + * http://dl.acm.org/citation.cfm?id=275324. + * + * See catrig.c for complete comments. + * + * XXX comments were removed automatically, and even short ones on the right + * of statements were removed (all of them), contrary to normal style. Only + * a few comments on the right of declarations remain. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <complex.h> +#include <float.h> + +#include "math.h" +#include "math_private.h" + +#undef isinf +#define isinf(x) (fabsf(x) == INFINITY) +#undef isnan +#define isnan(x) ((x) != (x)) +#define raise_inexact() do { volatile float junk = 1 + tiny; } while(0) +#undef signbit +#define signbit(x) (__builtin_signbitf(x)) + +static const float +A_crossover = 10, +B_crossover = 0.6417, +FOUR_SQRT_MIN = 0x1p-61, +QUARTER_SQRT_MAX = 0x1p61, +m_e = 2.7182818285e0, /* 0xadf854.0p-22 */ +m_ln2 = 6.9314718056e-1, /* 0xb17218.0p-24 */ +pio2_hi = 1.5707962513e0, /* 0xc90fda.0p-23 */ +RECIP_EPSILON = 1 / FLT_EPSILON, +SQRT_3_EPSILON = 5.9801995673e-4, /* 0x9cc471.0p-34 */ +SQRT_6_EPSILON = 8.4572793338e-4, /* 0xddb3d7.0p-34 */ +SQRT_MIN = 0x1p-63; + +static const volatile float +pio2_lo = 7.5497899549e-8, /* 0xa22169.0p-47 */ +tiny = 0x1p-100; + +static float complex clog_for_large_values(float complex z); + +static inline float +f(float a, float b, float hypot_a_b) +{ + if (b < 0) + return ((hypot_a_b - b) / 2); + if (b == 0) + return (a / 2); + return (a * a / (hypot_a_b + b) / 2); +} + +static inline void +do_hard_work(float x, float y, float *rx, int *B_is_usable, float *B, + float *sqrt_A2my2, float *new_y) +{ + float R, S, A; + float Am1, Amy; + + R = hypotf(x, y + 1); + S = hypotf(x, y - 1); + + A = (R + S) / 2; + if (A < 1) + A = 1; + + if (A < A_crossover) { + if (y == 1 && x < FLT_EPSILON * FLT_EPSILON / 128) { + *rx = sqrtf(x); + } else if (x >= FLT_EPSILON * fabsf(y - 1)) { + Am1 = f(x, 1 + y, R) + f(x, 1 - y, S); + *rx = log1pf(Am1 + sqrtf(Am1 * (A + 1))); + } else if (y < 1) { + *rx = x / sqrtf((1 - y) * (1 + y)); + } else { + *rx = log1pf((y - 1) + sqrtf((y - 1) * (y + 1))); + } + } else { + *rx = logf(A + sqrtf(A * A - 1)); + } + + *new_y = y; + + if (y < FOUR_SQRT_MIN) { + *B_is_usable = 0; + *sqrt_A2my2 = A * (2 / FLT_EPSILON); + *new_y = y * (2 / FLT_EPSILON); + return; + } + + *B = y / A; + *B_is_usable = 1; + + if (*B > B_crossover) { + *B_is_usable = 0; + if (y == 1 && x < FLT_EPSILON / 128) { + *sqrt_A2my2 = sqrtf(x) * sqrtf((A + y) / 2); + } else if (x >= FLT_EPSILON * fabsf(y - 1)) { + Amy = f(x, y + 1, R) + f(x, y - 1, S); + *sqrt_A2my2 = sqrtf(Amy * (A + y)); + } else if (y > 1) { + *sqrt_A2my2 = x * (4 / FLT_EPSILON / FLT_EPSILON) * y / + sqrtf((y + 1) * (y - 1)); + *new_y = y * (4 / FLT_EPSILON / FLT_EPSILON); + } else { + *sqrt_A2my2 = sqrtf((1 - y) * (1 + y)); + } + } +} + +float complex +casinhf(float complex z) +{ + float x, y, ax, ay, rx, ry, B, sqrt_A2my2, new_y; + int B_is_usable; + float complex w; + + x = crealf(z); + y = cimagf(z); + ax = fabsf(x); + ay = fabsf(y); + + if (isnan(x) || isnan(y)) { + if (isinf(x)) + return (cpackf(x, y + y)); + if (isinf(y)) + return (cpackf(y, x + x)); + if (y == 0) + return (cpackf(x + x, y)); + return (cpackf(x + 0.0L + (y + 0), x + 0.0L + (y + 0))); + } + + if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) { + if (signbit(x) == 0) + w = clog_for_large_values(z) + m_ln2; + else + w = clog_for_large_values(-z) + m_ln2; + return (cpackf(copysignf(crealf(w), x), + copysignf(cimagf(w), y))); + } + + if (x == 0 && y == 0) + return (z); + + raise_inexact(); + + if (ax < SQRT_6_EPSILON / 4 && ay < SQRT_6_EPSILON / 4) + return (z); + + do_hard_work(ax, ay, &rx, &B_is_usable, &B, &sqrt_A2my2, &new_y); + if (B_is_usable) + ry = asinf(B); + else + ry = atan2f(new_y, sqrt_A2my2); + return (cpackf(copysignf(rx, x), copysignf(ry, y))); +} + +float complex +casinf(float complex z) +{ + float complex w = casinhf(cpackf(cimagf(z), crealf(z))); + + return (cpackf(cimagf(w), crealf(w))); +} + +float complex +cacosf(float complex z) +{ + float x, y, ax, ay, rx, ry, B, sqrt_A2mx2, new_x; + int sx, sy; + int B_is_usable; + float complex w; + + x = crealf(z); + y = cimagf(z); + sx = signbit(x); + sy = signbit(y); + ax = fabsf(x); + ay = fabsf(y); + + if (isnan(x) || isnan(y)) { + if (isinf(x)) + return (cpackf(y + y, -INFINITY)); + if (isinf(y)) + return (cpackf(x + x, -y)); + if (x == 0) + return (cpackf(pio2_hi + pio2_lo, y + y)); + return (cpackf(x + 0.0L + (y + 0), x + 0.0L + (y + 0))); + } + + if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) { + w = clog_for_large_values(z); + rx = fabsf(cimagf(w)); + ry = crealf(w) + m_ln2; + if (sy == 0) + ry = -ry; + return (cpackf(rx, ry)); + } + + if (x == 1 && y == 0) + return (cpackf(0, -y)); + + raise_inexact(); + + if (ax < SQRT_6_EPSILON / 4 && ay < SQRT_6_EPSILON / 4) + return (cpackf(pio2_hi - (x - pio2_lo), -y)); + + do_hard_work(ay, ax, &ry, &B_is_usable, &B, &sqrt_A2mx2, &new_x); + if (B_is_usable) { + if (sx == 0) + rx = acosf(B); + else + rx = acosf(-B); + } else { + if (sx == 0) + rx = atan2f(sqrt_A2mx2, new_x); + else + rx = atan2f(sqrt_A2mx2, -new_x); + } + if (sy == 0) + ry = -ry; + return (cpackf(rx, ry)); +} + +float complex +cacoshf(float complex z) +{ + float complex w; + float rx, ry; + + w = cacosf(z); + rx = crealf(w); + ry = cimagf(w); + if (isnan(rx) && isnan(ry)) + return (cpackf(ry, rx)); + if (isnan(rx)) + return (cpackf(fabsf(ry), rx)); + if (isnan(ry)) + return (cpackf(ry, ry)); + return (cpackf(fabsf(ry), copysignf(rx, cimagf(z)))); +} + +static float complex +clog_for_large_values(float complex z) +{ + float x, y; + float ax, ay, t; + + x = crealf(z); + y = cimagf(z); + ax = fabsf(x); + ay = fabsf(y); + if (ax < ay) { + t = ax; + ax = ay; + ay = t; + } + + if (ax > FLT_MAX / 2) + return (cpackf(logf(hypotf(x / m_e, y / m_e)) + 1, + atan2f(y, x))); + + if (ax > QUARTER_SQRT_MAX || ay < SQRT_MIN) + return (cpackf(logf(hypotf(x, y)), atan2f(y, x))); + + return (cpackf(logf(ax * ax + ay * ay) / 2, atan2f(y, x))); +} + +static inline float +sum_squares(float x, float y) +{ + + if (y < SQRT_MIN) + return (x * x); + + return (x * x + y * y); +} + +static inline float +real_part_reciprocal(float x, float y) +{ + float scale; + uint32_t hx, hy; + int32_t ix, iy; + + GET_FLOAT_WORD(hx, x); + ix = hx & 0x7f800000; + GET_FLOAT_WORD(hy, y); + iy = hy & 0x7f800000; +#define BIAS (FLT_MAX_EXP - 1) +#define CUTOFF (FLT_MANT_DIG / 2 + 1) + if (ix - iy >= CUTOFF << 23 || isinf(x)) + return (1 / x); + if (iy - ix >= CUTOFF << 23) + return (x / y / y); + if (ix <= (BIAS + FLT_MAX_EXP / 2 - CUTOFF) << 23) + return (x / (x * x + y * y)); + SET_FLOAT_WORD(scale, 0x7f800000 - ix); + x *= scale; + y *= scale; + return (x / (x * x + y * y) * scale); +} + +float complex +catanhf(float complex z) +{ + float x, y, ax, ay, rx, ry; + + x = crealf(z); + y = cimagf(z); + ax = fabsf(x); + ay = fabsf(y); + + if (y == 0 && ax <= 1) + return (cpackf(atanhf(x), y)); + + if (x == 0) + return (cpackf(x, atanf(y))); + + if (isnan(x) || isnan(y)) { + if (isinf(x)) + return (cpackf(copysignf(0, x), y + y)); + if (isinf(y)) + return (cpackf(copysignf(0, x), + copysignf(pio2_hi + pio2_lo, y))); + return (cpackf(x + 0.0L + (y + 0), x + 0.0L + (y + 0))); + } + + if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) + return (cpackf(real_part_reciprocal(x, y), + copysignf(pio2_hi + pio2_lo, y))); + + if (ax < SQRT_3_EPSILON / 2 && ay < SQRT_3_EPSILON / 2) { + raise_inexact(); + return (z); + } + + if (ax == 1 && ay < FLT_EPSILON) + rx = (m_ln2 - logf(ay)) / 2; + else + rx = log1pf(4 * ax / sum_squares(ax - 1, ay)) / 4; + + if (ax == 1) + ry = atan2f(2, -ay) / 2; + else if (ay < FLT_EPSILON) + ry = atan2f(2 * ay, (1 - ax) * (1 + ax)) / 2; + else + ry = atan2f(2 * ay, (1 - ax) * (1 + ax) - ay * ay) / 2; + + return (cpackf(copysignf(rx, x), copysignf(ry, y))); +} + +float complex +catanf(float complex z) +{ + float complex w = catanhf(cpackf(cimagf(z), crealf(z))); + + return (cpackf(cimagf(w), crealf(w))); +} diff --git a/lib/msun/src/e_acosh.c b/lib/msun/src/e_acosh.c index a0cc6cb..358c8bd 100644 --- a/lib/msun/src/e_acosh.c +++ b/lib/msun/src/e_acosh.c @@ -29,6 +29,8 @@ __FBSDID("$FreeBSD$"); * acosh(NaN) is NaN without signal. */ +#include <float.h> + #include "math.h" #include "math_private.h" @@ -60,3 +62,7 @@ __ieee754_acosh(double x) return log1p(t+sqrt(2.0*t+t*t)); } } + +#if LDBL_MANT_DIG == 53 +__weak_reference(acosh, acoshl); +#endif diff --git a/lib/msun/src/e_acoshl.c b/lib/msun/src/e_acoshl.c new file mode 100644 index 0000000..b9f3aed --- /dev/null +++ b/lib/msun/src/e_acoshl.c @@ -0,0 +1,89 @@ +/* from: FreeBSD: head/lib/msun/src/e_acosh.c 176451 2008-02-22 02:30:36Z das */ + +/* @(#)e_acosh.c 1.3 95/01/18 */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + * + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +/* + * See e_acosh.c for complete comments. + * + * Converted to long double by David Schultz <das@FreeBSD.ORG> and + * Bruce D. Evans. + */ + +#include <float.h> +#ifdef __i386__ +#include <ieeefp.h> +#endif + +#include "fpmath.h" +#include "math.h" +#include "math_private.h" + +/* EXP_LARGE is the threshold above which we use acosh(x) ~= log(2x). */ +#if LDBL_MANT_DIG == 64 +#define EXP_LARGE 34 +#elif LDBL_MANT_DIG == 113 +#define EXP_LARGE 58 +#else +#error "Unsupported long double format" +#endif + +#if LDBL_MAX_EXP != 0x4000 +/* We also require the usual expsign encoding. */ +#error "Unsupported long double format" +#endif + +#define BIAS (LDBL_MAX_EXP - 1) + +static const double +one = 1.0; + +#if LDBL_MANT_DIG == 64 +static const union IEEEl2bits +u_ln2 = LD80C(0xb17217f7d1cf79ac, -1, 6.93147180559945309417e-1L); +#define ln2 u_ln2.e +#elif LDBL_MANT_DIG == 113 +static const long double +ln2 = 6.93147180559945309417232121458176568e-1L; /* 0x162e42fefa39ef35793c7673007e6.0p-113 */ +#else +#error "Unsupported long double format" +#endif + +long double +acoshl(long double x) +{ + long double t; + int16_t hx; + + ENTERI(); + GET_LDBL_EXPSIGN(hx, x); + if (hx < 0x3fff) { /* x < 1, or misnormal */ + RETURNI((x-x)/(x-x)); + } else if (hx >= BIAS + EXP_LARGE) { /* x >= LARGE */ + if (hx >= 0x7fff) { /* x is inf, NaN or misnormal */ + RETURNI(x+x); + } else + RETURNI(logl(x)+ln2); /* acosh(huge)=log(2x), or misnormal */ + } else if (hx == 0x3fff && x == 1) { + RETURNI(0.0); /* acosh(1) = 0 */ + } else if (hx >= 0x4000) { /* LARGE > x >= 2, or misnormal */ + t=x*x; + RETURNI(logl(2.0*x-one/(x+sqrtl(t-one)))); + } else { /* 1<x<2 */ + t = x-one; + RETURNI(log1pl(t+sqrtl(2.0*t+t*t))); + } +} diff --git a/lib/msun/src/e_atanh.c b/lib/msun/src/e_atanh.c index ab8a2e1..422ff26 100644 --- a/lib/msun/src/e_atanh.c +++ b/lib/msun/src/e_atanh.c @@ -33,6 +33,8 @@ __FBSDID("$FreeBSD$"); * */ +#include <float.h> + #include "math.h" #include "math_private.h" @@ -60,3 +62,7 @@ __ieee754_atanh(double x) t = 0.5*log1p((x+x)/(one-x)); if(hx>=0) return t; else return -t; } + +#if LDBL_MANT_DIG == 53 +__weak_reference(atanh, atanhl); +#endif diff --git a/lib/msun/src/e_atanhl.c b/lib/msun/src/e_atanhl.c new file mode 100644 index 0000000..11d56ea --- /dev/null +++ b/lib/msun/src/e_atanhl.c @@ -0,0 +1,74 @@ +/* from: FreeBSD: head/lib/msun/src/e_atanh.c 176451 2008-02-22 02:30:36Z das */ + +/* @(#)e_atanh.c 1.3 95/01/18 */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + * + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +/* + * See e_atanh.c for complete comments. + * + * Converted to long double by David Schultz <das@FreeBSD.ORG> and + * Bruce D. Evans. + */ + +#include <float.h> +#ifdef __i386__ +#include <ieeefp.h> +#endif + +#include "fpmath.h" +#include "math.h" +#include "math_private.h" + +/* EXP_TINY is the threshold below which we use atanh(x) ~= x. */ +#if LDBL_MANT_DIG == 64 +#define EXP_TINY -34 +#elif LDBL_MANT_DIG == 113 +#define EXP_TINY -58 +#else +#error "Unsupported long double format" +#endif + +#if LDBL_MAX_EXP != 0x4000 +/* We also require the usual expsign encoding. */ +#error "Unsupported long double format" +#endif + +#define BIAS (LDBL_MAX_EXP - 1) + +static const double one = 1.0, huge = 1e300; +static const double zero = 0.0; + +long double +atanhl(long double x) +{ + long double t; + uint16_t hx, ix; + + ENTERI(); + GET_LDBL_EXPSIGN(hx, x); + ix = hx & 0x7fff; + if (ix >= 0x3fff) /* |x| >= 1, or NaN or misnormal */ + RETURNI(fabsl(x) == 1 ? x / zero : (x - x) / (x - x)); + if (ix < BIAS + EXP_TINY && (huge + x) > zero) + RETURNI(x); /* x is tiny */ + SET_LDBL_EXPSIGN(x, ix); + if (ix < 0x3ffe) { /* |x| < 0.5, or misnormal */ + t = x+x; + t = 0.5*log1pl(t+t*x/(one-x)); + } else + t = 0.5*log1pl((x+x)/(one-x)); + RETURNI((hx & 0x8000) == 0 ? t : -t); +} diff --git a/lib/msun/src/e_exp.c b/lib/msun/src/e_exp.c index e432bc8..94c9769 100644 --- a/lib/msun/src/e_exp.c +++ b/lib/msun/src/e_exp.c @@ -84,7 +84,6 @@ __FBSDID("$FreeBSD$"); static const double one = 1.0, halF[2] = {0.5,-0.5,}, -huge = 1.0e+300, o_threshold= 7.09782712893383973096e+02, /* 0x40862E42, 0xFEFA39EF */ u_threshold= -7.45133219101941108420e+02, /* 0xc0874910, 0xD52D3051 */ ln2HI[2] ={ 6.93147180369123816490e-01, /* 0x3fe62e42, 0xfee00000 */ @@ -99,6 +98,7 @@ P4 = -1.65339022054652515390e-06, /* 0xBEBBBD41, 0xC5D26BF1 */ P5 = 4.13813679705723846039e-08; /* 0x3E663769, 0x72BEA4D0 */ static volatile double +huge = 1.0e+300, twom1000= 9.33263618503218878990e-302; /* 2**-1000=0x01700000,0*/ double diff --git a/lib/msun/src/e_expf.c b/lib/msun/src/e_expf.c index a479076..b1fe2c5 100644 --- a/lib/msun/src/e_expf.c +++ b/lib/msun/src/e_expf.c @@ -24,7 +24,6 @@ __FBSDID("$FreeBSD$"); static const float one = 1.0, halF[2] = {0.5,-0.5,}, -huge = 1.0e+30, o_threshold= 8.8721679688e+01, /* 0x42b17180 */ u_threshold= -1.0397208405e+02, /* 0xc2cff1b5 */ ln2HI[2] ={ 6.9314575195e-01, /* 0x3f317200 */ @@ -39,7 +38,9 @@ invln2 = 1.4426950216e+00, /* 0x3fb8aa3b */ P1 = 1.6666625440e-1, /* 0xaaaa8f.0p-26 */ P2 = -2.7667332906e-3; /* -0xb55215.0p-32 */ -static volatile float twom100 = 7.8886090522e-31; /* 2**-100=0x0d800000 */ +static volatile float +huge = 1.0e+30, +twom100 = 7.8886090522e-31; /* 2**-100=0x0d800000 */ float __ieee754_expf(float x) diff --git a/lib/msun/src/e_log.c b/lib/msun/src/e_log.c index 204fb48..68bc107 100644 --- a/lib/msun/src/e_log.c +++ b/lib/msun/src/e_log.c @@ -65,6 +65,8 @@ __FBSDID("$FreeBSD$"); * to produce the hexadecimal values shown. */ +#include <float.h> + #include "math.h" #include "math_private.h" @@ -81,6 +83,7 @@ Lg6 = 1.531383769920937332e-01, /* 3FC39A09 D078C69F */ Lg7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ static const double zero = 0.0; +static volatile double vzero = 0.0; double __ieee754_log(double x) @@ -94,7 +97,7 @@ __ieee754_log(double x) k=0; if (hx < 0x00100000) { /* x < 2**-1022 */ if (((hx&0x7fffffff)|lx)==0) - return -two54/zero; /* log(+-0)=-inf */ + return -two54/vzero; /* log(+-0)=-inf */ if (hx<0) return (x-x)/zero; /* log(-#) = NaN */ k -= 54; x *= two54; /* subnormal number, scale up x */ GET_HIGH_WORD(hx,x); @@ -138,3 +141,7 @@ __ieee754_log(double x) return dk*ln2_hi-((s*(f-R)-dk*ln2_lo)-f); } } + +#if (LDBL_MANT_DIG == 53) +__weak_reference(log, logl); +#endif diff --git a/lib/msun/src/e_log10.c b/lib/msun/src/e_log10.c index 104d257..3c89ed2 100644 --- a/lib/msun/src/e_log10.c +++ b/lib/msun/src/e_log10.c @@ -22,6 +22,8 @@ __FBSDID("$FreeBSD$"); * in not-quite-routine extra precision. */ +#include <float.h> + #include "math.h" #include "math_private.h" #include "k_log.h" @@ -34,6 +36,7 @@ log10_2hi = 3.01029995663611771306e-01, /* 0x3FD34413, 0x509F6000 */ log10_2lo = 3.69423907715893078616e-13; /* 0x3D59FEF3, 0x11F12B36 */ static const double zero = 0.0; +static volatile double vzero = 0.0; double __ieee754_log10(double x) @@ -47,7 +50,7 @@ __ieee754_log10(double x) k=0; if (hx < 0x00100000) { /* x < 2**-1022 */ if (((hx&0x7fffffff)|lx)==0) - return -two54/zero; /* log(+-0)=-inf */ + return -two54/vzero; /* log(+-0)=-inf */ if (hx<0) return (x-x)/zero; /* log(-#) = NaN */ k -= 54; x *= two54; /* subnormal number, scale up x */ GET_HIGH_WORD(hx,x); @@ -85,3 +88,7 @@ __ieee754_log10(double x) return val_lo + val_hi; } + +#if (LDBL_MANT_DIG == 53) +__weak_reference(log10, log10l); +#endif diff --git a/lib/msun/src/e_log10f.c b/lib/msun/src/e_log10f.c index c876594..9856df2 100644 --- a/lib/msun/src/e_log10f.c +++ b/lib/msun/src/e_log10f.c @@ -28,6 +28,7 @@ log10_2hi = 3.0102920532e-01, /* 0x3e9a2080 */ log10_2lo = 7.9034151668e-07; /* 0x355427db */ static const float zero = 0.0; +static volatile float vzero = 0.0; float __ieee754_log10f(float x) @@ -40,7 +41,7 @@ __ieee754_log10f(float x) k=0; if (hx < 0x00800000) { /* x < 2**-126 */ if ((hx&0x7fffffff)==0) - return -two25/zero; /* log(+-0)=-inf */ + return -two25/vzero; /* log(+-0)=-inf */ if (hx<0) return (x-x)/zero; /* log(-#) = NaN */ k -= 25; x *= two25; /* subnormal number, scale up x */ GET_FLOAT_WORD(hx,x); diff --git a/lib/msun/src/e_log2.c b/lib/msun/src/e_log2.c index 1fc44a5..4766cdb 100644 --- a/lib/msun/src/e_log2.c +++ b/lib/msun/src/e_log2.c @@ -24,6 +24,8 @@ __FBSDID("$FreeBSD$"); * in not-quite-routine extra precision. */ +#include <float.h> + #include "math.h" #include "math_private.h" #include "k_log.h" @@ -34,6 +36,7 @@ ivln2hi = 1.44269504072144627571e+00, /* 0x3ff71547, 0x65200000 */ ivln2lo = 1.67517131648865118353e-10; /* 0x3de705fc, 0x2eefa200 */ static const double zero = 0.0; +static volatile double vzero = 0.0; double __ieee754_log2(double x) @@ -47,7 +50,7 @@ __ieee754_log2(double x) k=0; if (hx < 0x00100000) { /* x < 2**-1022 */ if (((hx&0x7fffffff)|lx)==0) - return -two54/zero; /* log(+-0)=-inf */ + return -two54/vzero; /* log(+-0)=-inf */ if (hx<0) return (x-x)/zero; /* log(-#) = NaN */ k -= 54; x *= two54; /* subnormal number, scale up x */ GET_HIGH_WORD(hx,x); @@ -108,3 +111,7 @@ __ieee754_log2(double x) return val_lo + val_hi; } + +#if (LDBL_MANT_DIG == 53) +__weak_reference(log2, log2l); +#endif diff --git a/lib/msun/src/e_log2f.c b/lib/msun/src/e_log2f.c index 7166346..1794484 100644 --- a/lib/msun/src/e_log2f.c +++ b/lib/msun/src/e_log2f.c @@ -26,6 +26,7 @@ ivln2hi = 1.4428710938e+00, /* 0x3fb8b000 */ ivln2lo = -1.7605285393e-04; /* 0xb9389ad4 */ static const float zero = 0.0; +static volatile float vzero = 0.0; float __ieee754_log2f(float x) @@ -38,7 +39,7 @@ __ieee754_log2f(float x) k=0; if (hx < 0x00800000) { /* x < 2**-126 */ if ((hx&0x7fffffff)==0) - return -two25/zero; /* log(+-0)=-inf */ + return -two25/vzero; /* log(+-0)=-inf */ if (hx<0) return (x-x)/zero; /* log(-#) = NaN */ k -= 25; x *= two25; /* subnormal number, scale up x */ GET_FLOAT_WORD(hx,x); diff --git a/lib/msun/src/e_logf.c b/lib/msun/src/e_logf.c index c3be6ed..ec3985f 100644 --- a/lib/msun/src/e_logf.c +++ b/lib/msun/src/e_logf.c @@ -30,6 +30,7 @@ Lg3 = 0x91e9ee.0p-25, /* 0.28498786688 */ Lg4 = 0xf89e26.0p-26; /* 0.24279078841 */ static const float zero = 0.0; +static volatile float vzero = 0.0; float __ieee754_logf(float x) @@ -42,7 +43,7 @@ __ieee754_logf(float x) k=0; if (ix < 0x00800000) { /* x < 2**-126 */ if ((ix&0x7fffffff)==0) - return -two25/zero; /* log(+-0)=-inf */ + return -two25/vzero; /* log(+-0)=-inf */ if (ix<0) return (x-x)/zero; /* log(-#) = NaN */ k -= 25; x *= two25; /* subnormal number, scale up x */ GET_FLOAT_WORD(ix,x); diff --git a/lib/msun/src/math.h b/lib/msun/src/math.h index c6cee13..1bd931c 100644 --- a/lib/msun/src/math.h +++ b/lib/msun/src/math.h @@ -80,27 +80,43 @@ extern const union __nan_un { #define FP_NORMAL 0x04 #define FP_SUBNORMAL 0x08 #define FP_ZERO 0x10 + +#if (__STDC_VERSION__ >= 201112L && defined(__clang__)) || \ + __has_extension(c_generic_selections) +#define __fp_type_select(x, f, d, ld) _Generic((x), \ + float: f(x), \ + double: d(x), \ + long double: ld(x), \ + volatile float: f(x), \ + volatile double: d(x), \ + volatile long double: ld(x), \ + volatile const float: f(x), \ + volatile const double: d(x), \ + volatile const long double: ld(x), \ + const float: f(x), \ + const double: d(x), \ + const long double: ld(x)) +#elif __GNUC_PREREQ__(3, 1) && !defined(__cplusplus) +#define __fp_type_select(x, f, d, ld) __builtin_choose_expr( \ + __builtin_types_compatible_p(__typeof(x), long double), ld(x), \ + __builtin_choose_expr( \ + __builtin_types_compatible_p(__typeof(x), double), d(x), \ + __builtin_choose_expr( \ + __builtin_types_compatible_p(__typeof(x), float), f(x), (void)0))) +#else +#define __fp_type_select(x, f, d, ld) \ + ((sizeof(x) == sizeof(float)) ? f(x) \ + : (sizeof(x) == sizeof(double)) ? d(x) \ + : ld(x)) +#endif + #define fpclassify(x) \ - ((sizeof (x) == sizeof (float)) ? __fpclassifyf(x) \ - : (sizeof (x) == sizeof (double)) ? __fpclassifyd(x) \ - : __fpclassifyl(x)) - -#define isfinite(x) \ - ((sizeof (x) == sizeof (float)) ? __isfinitef(x) \ - : (sizeof (x) == sizeof (double)) ? __isfinite(x) \ - : __isfinitel(x)) -#define isinf(x) \ - ((sizeof (x) == sizeof (float)) ? __isinff(x) \ - : (sizeof (x) == sizeof (double)) ? isinf(x) \ - : __isinfl(x)) -#define isnan(x) \ - ((sizeof (x) == sizeof (float)) ? __isnanf(x) \ - : (sizeof (x) == sizeof (double)) ? isnan(x) \ - : __isnanl(x)) -#define isnormal(x) \ - ((sizeof (x) == sizeof (float)) ? __isnormalf(x) \ - : (sizeof (x) == sizeof (double)) ? __isnormal(x) \ - : __isnormall(x)) + __fp_type_select(x, __fpclassifyf, __fpclassifyd, __fpclassifyl) +#define isfinite(x) __fp_type_select(x, __isfinitef, __isfinite, __isfinitel) +#define isinf(x) __fp_type_select(x, __isinff, __isinf, __isinfl) +#define isnan(x) \ + __fp_type_select(x, __inline_isnanf, __inline_isnan, __inline_isnanl) +#define isnormal(x) __fp_type_select(x, __isnormalf, __isnormal, __isnormall) #ifdef __MATH_BUILTIN_RELOPS #define isgreater(x, y) __builtin_isgreater((x), (y)) @@ -119,10 +135,7 @@ extern const union __nan_un { #define isunordered(x, y) (isnan(x) || isnan(y)) #endif /* __MATH_BUILTIN_RELOPS */ -#define signbit(x) \ - ((sizeof (x) == sizeof (float)) ? __signbitf(x) \ - : (sizeof (x) == sizeof (double)) ? __signbit(x) \ - : __signbitl(x)) +#define signbit(x) __fp_type_select(x, __signbitf, __signbit, __signbitl) typedef __double_t double_t; typedef __float_t float_t; @@ -175,9 +188,8 @@ int __isfinitef(float) __pure2; int __isfinite(double) __pure2; int __isfinitel(long double) __pure2; int __isinff(float) __pure2; +int __isinf(double) __pure2; int __isinfl(long double) __pure2; -int __isnanf(float) __pure2; -int __isnanl(long double) __pure2; int __isnormalf(float) __pure2; int __isnormal(double) __pure2; int __isnormall(long double) __pure2; @@ -185,6 +197,42 @@ int __signbit(double) __pure2; int __signbitf(float) __pure2; int __signbitl(long double) __pure2; +static __inline int +__inline_isnan(__const double __x) +{ + + return (__x != __x); +} + +static __inline int +__inline_isnanf(__const float __x) +{ + + return (__x != __x); +} + +static __inline int +__inline_isnanl(__const long double __x) +{ + + return (__x != __x); +} + +/* + * Version 2 of the Single UNIX Specification (UNIX98) defined isnan() and + * isinf() as functions taking double. C99, and the subsequent POSIX revisions + * (SUSv3, POSIX.1-2001, define it as a macro that accepts any real floating + * point type. If we are targeting SUSv2 and C99 or C11 (or C++11) then we + * expose the newer definition, assuming that the language spec takes + * precedence over the operating system interface spec. + */ +#if __XSI_VISIBLE > 0 && __XSI_VISIBLE < 600 && __ISO_C_VISIBLE < 1999 +#undef isinf +#undef isnan +int isinf(double); +int isnan(double); +#endif + double acos(double); double asin(double); double atan(double); @@ -227,8 +275,6 @@ double expm1(double); double fma(double, double, double); double hypot(double, double); int ilogb(double) __pure2; -int (isinf)(double) __pure2; -int (isnan)(double) __pure2; double lgamma(double); long long llrint(double); long long llround(double); @@ -395,9 +441,12 @@ float significandf(float); * long double versions of ISO/POSIX math functions */ #if __ISO_C_VISIBLE >= 1999 +long double acoshl(long double); long double acosl(long double); +long double asinhl(long double); long double asinl(long double); long double atan2l(long double, long double); +long double atanhl(long double); long double atanl(long double); long double cbrtl(long double); long double ceill(long double); @@ -405,6 +454,7 @@ long double copysignl(long double, long double) __pure2; long double cosl(long double); long double exp2l(long double); long double expl(long double); +long double expm1l(long double); long double fabsl(long double) __pure2; long double fdiml(long double, long double); long double floorl(long double); @@ -418,7 +468,11 @@ int ilogbl(long double) __pure2; long double ldexpl(long double, int); long long llrintl(long double); long long llroundl(long double); +long double log10l(long double); +long double log1pl(long double); +long double log2l(long double); long double logbl(long double); +long double logl(long double); long lrintl(long double); long lroundl(long double); long double modfl(long double, long double *); /* fundamentally !__pure2 */ @@ -456,18 +510,10 @@ __END_DECLS */ __BEGIN_DECLS -long double acoshl(long double); -long double asinhl(long double); -long double atanhl(long double); long double coshl(long double); long double erfcl(long double); long double erfl(long double); -long double expm1l(long double); long double lgammal(long double); -long double log10l(long double); -long double log1pl(long double); -long double log2l(long double); -long double logl(long double); long double powl(long double, long double); long double sinhl(long double); long double tanhl(long double); diff --git a/lib/msun/src/math_private.h b/lib/msun/src/math_private.h index 5662df0..8ebc7fb 100644 --- a/lib/msun/src/math_private.h +++ b/lib/msun/src/math_private.h @@ -188,6 +188,33 @@ do { \ (d) = sf_u.value; \ } while (0) +/* + * Get expsign and mantissa as 16 bit and 64 bit ints from an 80 bit long + * double. + */ + +#define EXTRACT_LDBL80_WORDS(ix0,ix1,d) \ +do { \ + union IEEEl2bits ew_u; \ + ew_u.e = (d); \ + (ix0) = ew_u.xbits.expsign; \ + (ix1) = ew_u.xbits.man; \ +} while (0) + +/* + * Get expsign and mantissa as one 16 bit and two 64 bit ints from a 128 bit + * long double. + */ + +#define EXTRACT_LDBL128_WORDS(ix0,ix1,ix2,d) \ +do { \ + union IEEEl2bits ew_u; \ + ew_u.e = (d); \ + (ix0) = ew_u.xbits.expsign; \ + (ix1) = ew_u.xbits.manh; \ + (ix2) = ew_u.xbits.manl; \ +} while (0) + /* Get expsign as a 16 bit int from a long double. */ #define GET_LDBL_EXPSIGN(i,d) \ @@ -197,6 +224,33 @@ do { \ (i) = ge_u.xbits.expsign; \ } while (0) +/* + * Set an 80 bit long double from a 16 bit int expsign and a 64 bit int + * mantissa. + */ + +#define INSERT_LDBL80_WORDS(d,ix0,ix1) \ +do { \ + union IEEEl2bits iw_u; \ + iw_u.xbits.expsign = (ix0); \ + iw_u.xbits.man = (ix1); \ + (d) = iw_u.e; \ +} while (0) + +/* + * Set a 128 bit long double from a 16 bit int expsign and two 64 bit ints + * comprising the mantissa. + */ + +#define INSERT_LDBL128_WORDS(d,ix0,ix1,ix2) \ +do { \ + union IEEEl2bits iw_u; \ + iw_u.xbits.expsign = (ix0); \ + iw_u.xbits.manh = (ix1); \ + iw_u.xbits.manl = (ix2); \ + (d) = iw_u.e; \ +} while (0) + /* Set expsign of a long double from a 16 bit int. */ #define SET_LDBL_EXPSIGN(d,v) \ @@ -261,6 +315,110 @@ do { \ #define RETURNF(v) return (v) /* + * 2sum gives the same result as 2sumF without requiring |a| >= |b| or + * a == 0, but is slower. + */ +#define _2sum(a, b) do { \ + __typeof(a) __s, __w; \ + \ + __w = (a) + (b); \ + __s = __w - (a); \ + (b) = ((a) - (__w - __s)) + ((b) - __s); \ + (a) = __w; \ +} while (0) + +/* + * 2sumF algorithm. + * + * "Normalize" the terms in the infinite-precision expression a + b for + * the sum of 2 floating point values so that b is as small as possible + * relative to 'a'. (The resulting 'a' is the value of the expression in + * the same precision as 'a' and the resulting b is the rounding error.) + * |a| must be >= |b| or 0, b's type must be no larger than 'a's type, and + * exponent overflow or underflow must not occur. This uses a Theorem of + * Dekker (1971). See Knuth (1981) 4.2.2 Theorem C. The name "TwoSum" + * is apparently due to Skewchuk (1997). + * + * For this to always work, assignment of a + b to 'a' must not retain any + * extra precision in a + b. This is required by C standards but broken + * in many compilers. The brokenness cannot be worked around using + * STRICT_ASSIGN() like we do elsewhere, since the efficiency of this + * algorithm would be destroyed by non-null strict assignments. (The + * compilers are correct to be broken -- the efficiency of all floating + * point code calculations would be destroyed similarly if they forced the + * conversions.) + * + * Fortunately, a case that works well can usually be arranged by building + * any extra precision into the type of 'a' -- 'a' should have type float_t, + * double_t or long double. b's type should be no larger than 'a's type. + * Callers should use these types with scopes as large as possible, to + * reduce their own extra-precision and efficiciency problems. In + * particular, they shouldn't convert back and forth just to call here. + */ +#ifdef DEBUG +#define _2sumF(a, b) do { \ + __typeof(a) __w; \ + volatile __typeof(a) __ia, __ib, __r, __vw; \ + \ + __ia = (a); \ + __ib = (b); \ + assert(__ia == 0 || fabsl(__ia) >= fabsl(__ib)); \ + \ + __w = (a) + (b); \ + (b) = ((a) - __w) + (b); \ + (a) = __w; \ + \ + /* The next 2 assertions are weak if (a) is already long double. */ \ + assert((long double)__ia + __ib == (long double)(a) + (b)); \ + __vw = __ia + __ib; \ + __r = __ia - __vw; \ + __r += __ib; \ + assert(__vw == (a) && __r == (b)); \ +} while (0) +#else /* !DEBUG */ +#define _2sumF(a, b) do { \ + __typeof(a) __w; \ + \ + __w = (a) + (b); \ + (b) = ((a) - __w) + (b); \ + (a) = __w; \ +} while (0) +#endif /* DEBUG */ + +/* + * Set x += c, where x is represented in extra precision as a + b. + * x must be sufficiently normalized and sufficiently larger than c, + * and the result is then sufficiently normalized. + * + * The details of ordering are that |a| must be >= |c| (so that (a, c) + * can be normalized without extra work to swap 'a' with c). The details of + * the normalization are that b must be small relative to the normalized 'a'. + * Normalization of (a, c) makes the normalized c tiny relative to the + * normalized a, so b remains small relative to 'a' in the result. However, + * b need not ever be tiny relative to 'a'. For example, b might be about + * 2**20 times smaller than 'a' to give about 20 extra bits of precision. + * That is usually enough, and adding c (which by normalization is about + * 2**53 times smaller than a) cannot change b significantly. However, + * cancellation of 'a' with c in normalization of (a, c) may reduce 'a' + * significantly relative to b. The caller must ensure that significant + * cancellation doesn't occur, either by having c of the same sign as 'a', + * or by having |c| a few percent smaller than |a|. Pre-normalization of + * (a, b) may help. + * + * This is is a variant of an algorithm of Kahan (see Knuth (1981) 4.2.2 + * exercise 19). We gain considerable efficiency by requiring the terms to + * be sufficiently normalized and sufficiently increasing. + */ +#define _3sumF(a, b, c) do { \ + __typeof(a) __tmp; \ + \ + __tmp = (c); \ + _2sumF(__tmp, (a)); \ + (b) += (a); \ + (a) = __tmp; \ +} while (0) + +/* * Common routine to process the arguments to nan(), nanf(), and nanl(). */ void _scan_nan(uint32_t *__words, int __num_words, const char *__s); @@ -370,6 +528,140 @@ irintl(long double x) #endif /* __GNUCLIKE_ASM */ +#ifdef DEBUG +#if defined(__amd64__) || defined(__i386__) +#define breakpoint() asm("int $3") +#else +#include <signal.h> + +#define breakpoint() raise(SIGTRAP) +#endif +#endif + +/* Write a pari script to test things externally. */ +#ifdef DOPRINT +#include <stdio.h> + +#ifndef DOPRINT_SWIZZLE +#define DOPRINT_SWIZZLE 0 +#endif + +#ifdef DOPRINT_LD80 + +#define DOPRINT_START(xp) do { \ + uint64_t __lx; \ + uint16_t __hx; \ + \ + /* Hack to give more-problematic args. */ \ + EXTRACT_LDBL80_WORDS(__hx, __lx, *xp); \ + __lx ^= DOPRINT_SWIZZLE; \ + INSERT_LDBL80_WORDS(*xp, __hx, __lx); \ + printf("x = %.21Lg; ", (long double)*xp); \ +} while (0) +#define DOPRINT_END1(v) \ + printf("y = %.21Lg; z = 0; show(x, y, z);\n", (long double)(v)) +#define DOPRINT_END2(hi, lo) \ + printf("y = %.21Lg; z = %.21Lg; show(x, y, z);\n", \ + (long double)(hi), (long double)(lo)) + +#elif defined(DOPRINT_D64) + +#define DOPRINT_START(xp) do { \ + uint32_t __hx, __lx; \ + \ + EXTRACT_WORDS(__hx, __lx, *xp); \ + __lx ^= DOPRINT_SWIZZLE; \ + INSERT_WORDS(*xp, __hx, __lx); \ + printf("x = %.21Lg; ", (long double)*xp); \ +} while (0) +#define DOPRINT_END1(v) \ + printf("y = %.21Lg; z = 0; show(x, y, z);\n", (long double)(v)) +#define DOPRINT_END2(hi, lo) \ + printf("y = %.21Lg; z = %.21Lg; show(x, y, z);\n", \ + (long double)(hi), (long double)(lo)) + +#elif defined(DOPRINT_F32) + +#define DOPRINT_START(xp) do { \ + uint32_t __hx; \ + \ + GET_FLOAT_WORD(__hx, *xp); \ + __hx ^= DOPRINT_SWIZZLE; \ + SET_FLOAT_WORD(*xp, __hx); \ + printf("x = %.21Lg; ", (long double)*xp); \ +} while (0) +#define DOPRINT_END1(v) \ + printf("y = %.21Lg; z = 0; show(x, y, z);\n", (long double)(v)) +#define DOPRINT_END2(hi, lo) \ + printf("y = %.21Lg; z = %.21Lg; show(x, y, z);\n", \ + (long double)(hi), (long double)(lo)) + +#else /* !DOPRINT_LD80 && !DOPRINT_D64 (LD128 only) */ + +#ifndef DOPRINT_SWIZZLE_HIGH +#define DOPRINT_SWIZZLE_HIGH 0 +#endif + +#define DOPRINT_START(xp) do { \ + uint64_t __lx, __llx; \ + uint16_t __hx; \ + \ + EXTRACT_LDBL128_WORDS(__hx, __lx, __llx, *xp); \ + __llx ^= DOPRINT_SWIZZLE; \ + __lx ^= DOPRINT_SWIZZLE_HIGH; \ + INSERT_LDBL128_WORDS(*xp, __hx, __lx, __llx); \ + printf("x = %.36Lg; ", (long double)*xp); \ +} while (0) +#define DOPRINT_END1(v) \ + printf("y = %.36Lg; z = 0; show(x, y, z);\n", (long double)(v)) +#define DOPRINT_END2(hi, lo) \ + printf("y = %.36Lg; z = %.36Lg; show(x, y, z);\n", \ + (long double)(hi), (long double)(lo)) + +#endif /* DOPRINT_LD80 */ + +#else /* !DOPRINT */ +#define DOPRINT_START(xp) +#define DOPRINT_END1(v) +#define DOPRINT_END2(hi, lo) +#endif /* DOPRINT */ + +#define RETURNP(x) do { \ + DOPRINT_END1(x); \ + RETURNF(x); \ +} while (0) +#define RETURNPI(x) do { \ + DOPRINT_END1(x); \ + RETURNI(x); \ +} while (0) +#define RETURN2P(x, y) do { \ + DOPRINT_END2((x), (y)); \ + RETURNF((x) + (y)); \ +} while (0) +#define RETURN2PI(x, y) do { \ + DOPRINT_END2((x), (y)); \ + RETURNI((x) + (y)); \ +} while (0) +#ifdef STRUCT_RETURN +#define RETURNSP(rp) do { \ + if (!(rp)->lo_set) \ + RETURNP((rp)->hi); \ + RETURN2P((rp)->hi, (rp)->lo); \ +} while (0) +#define RETURNSPI(rp) do { \ + if (!(rp)->lo_set) \ + RETURNPI((rp)->hi); \ + RETURN2PI((rp)->hi, (rp)->lo); \ +} while (0) +#endif +#define SUM2P(x, y) ({ \ + const __typeof (x) __x = (x); \ + const __typeof (y) __y = (y); \ + \ + DOPRINT_END2(__x, __y); \ + __x + __y; \ +}) + /* * ieee style elementary functions * diff --git a/lib/msun/src/s_asinh.c b/lib/msun/src/s_asinh.c index f3fdf74..cbb3d46 100644 --- a/lib/msun/src/s_asinh.c +++ b/lib/msun/src/s_asinh.c @@ -24,6 +24,8 @@ __FBSDID("$FreeBSD$"); * := sign(x)*log1p(|x| + x^2/(1 + sqrt(1+x^2))) */ +#include <float.h> + #include "math.h" #include "math_private.h" @@ -54,3 +56,7 @@ asinh(double x) } if(hx>0) return w; else return -w; } + +#if LDBL_MANT_DIG == 53 +__weak_reference(asinh, asinhl); +#endif diff --git a/lib/msun/src/s_asinhl.c b/lib/msun/src/s_asinhl.c new file mode 100644 index 0000000..ba28f59 --- /dev/null +++ b/lib/msun/src/s_asinhl.c @@ -0,0 +1,91 @@ +/* from: FreeBSD: head/lib/msun/src/e_acosh.c 176451 2008-02-22 02:30:36Z das */ + +/* @(#)s_asinh.c 5.1 93/09/24 */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +/* + * See s_asinh.c for complete comments. + * + * Converted to long double by David Schultz <das@FreeBSD.ORG> and + * Bruce D. Evans. + */ + +#include <float.h> +#ifdef __i386__ +#include <ieeefp.h> +#endif + +#include "fpmath.h" +#include "math.h" +#include "math_private.h" + +/* EXP_LARGE is the threshold above which we use asinh(x) ~= log(2x). */ +/* EXP_TINY is the threshold below which we use asinh(x) ~= x. */ +#if LDBL_MANT_DIG == 64 +#define EXP_LARGE 34 +#define EXP_TINY -34 +#elif LDBL_MANT_DIG == 113 +#define EXP_LARGE 58 +#define EXP_TINY -58 +#else +#error "Unsupported long double format" +#endif + +#if LDBL_MAX_EXP != 0x4000 +/* We also require the usual expsign encoding. */ +#error "Unsupported long double format" +#endif + +#define BIAS (LDBL_MAX_EXP - 1) + +static const double +one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */ +huge= 1.00000000000000000000e+300; + +#if LDBL_MANT_DIG == 64 +static const union IEEEl2bits +u_ln2 = LD80C(0xb17217f7d1cf79ac, -1, 6.93147180559945309417e-1L); +#define ln2 u_ln2.e +#elif LDBL_MANT_DIG == 113 +static const long double +ln2 = 6.93147180559945309417232121458176568e-1L; /* 0x162e42fefa39ef35793c7673007e6.0p-113 */ +#else +#error "Unsupported long double format" +#endif + +long double +asinhl(long double x) +{ + long double t, w; + uint16_t hx, ix; + + ENTERI(); + GET_LDBL_EXPSIGN(hx, x); + ix = hx & 0x7fff; + if (ix >= 0x7fff) RETURNI(x+x); /* x is inf, NaN or misnormal */ + if (ix < BIAS + EXP_TINY) { /* |x| < TINY, or misnormal */ + if (huge + x > one) RETURNI(x); /* return x inexact except 0 */ + } + if (ix >= BIAS + EXP_LARGE) { /* |x| >= LARGE, or misnormal */ + w = logl(fabsl(x))+ln2; + } else if (ix >= 0x4000) { /* LARGE > |x| >= 2.0, or misnormal */ + t = fabsl(x); + w = logl(2.0*t+one/(sqrtl(x*x+one)+t)); + } else { /* 2.0 > |x| >= TINY, or misnormal */ + t = x*x; + w =log1pl(fabsl(x)+t/(one+sqrtl(one+t))); + } + RETURNI((hx & 0x8000) == 0 ? w : -w); +} diff --git a/lib/msun/src/s_erf.c b/lib/msun/src/s_erf.c index 0886e5e..854767b 100644 --- a/lib/msun/src/s_erf.c +++ b/lib/msun/src/s_erf.c @@ -201,7 +201,7 @@ erf(double x) if(ix < 0x3feb0000) { /* |x|<0.84375 */ if(ix < 0x3e300000) { /* |x|<2**-28 */ if (ix < 0x00800000) - return 0.125*(8.0*x+efx8*x); /*avoid underflow */ + return (8*x+efx8*x)/8; /* avoid spurious underflow */ return x + efx*x; } z = x*x; diff --git a/lib/msun/src/s_erff.c b/lib/msun/src/s_erff.c index a44e135..b97ca1d 100644 --- a/lib/msun/src/s_erff.c +++ b/lib/msun/src/s_erff.c @@ -24,75 +24,59 @@ tiny = 1e-30, half= 5.0000000000e-01, /* 0x3F000000 */ one = 1.0000000000e+00, /* 0x3F800000 */ two = 2.0000000000e+00, /* 0x40000000 */ - /* c = (subfloat)0.84506291151 */ -erx = 8.4506291151e-01, /* 0x3f58560b */ /* - * Coefficients for approximation to erf on [0,0.84375] + * Coefficients for approximation to erf on [0,0.84375] */ efx = 1.2837916613e-01, /* 0x3e0375d4 */ efx8= 1.0270333290e+00, /* 0x3f8375d4 */ -pp0 = 1.2837916613e-01, /* 0x3e0375d4 */ -pp1 = -3.2504209876e-01, /* 0xbea66beb */ -pp2 = -2.8481749818e-02, /* 0xbce9528f */ -pp3 = -5.7702702470e-03, /* 0xbbbd1489 */ -pp4 = -2.3763017452e-05, /* 0xb7c756b1 */ -qq1 = 3.9791721106e-01, /* 0x3ecbbbce */ -qq2 = 6.5022252500e-02, /* 0x3d852a63 */ -qq3 = 5.0813062117e-03, /* 0x3ba68116 */ -qq4 = 1.3249473704e-04, /* 0x390aee49 */ -qq5 = -3.9602282413e-06, /* 0xb684e21a */ /* - * Coefficients for approximation to erf in [0.84375,1.25] + * Domain [0, 0.84375], range ~[-5.4446e-10,5.5197e-10]: + * |(erf(x) - x)/x - p(x)/q(x)| < 2**-31. */ -pa0 = -2.3621185683e-03, /* 0xbb1acdc6 */ -pa1 = 4.1485610604e-01, /* 0x3ed46805 */ -pa2 = -3.7220788002e-01, /* 0xbebe9208 */ -pa3 = 3.1834661961e-01, /* 0x3ea2fe54 */ -pa4 = -1.1089469492e-01, /* 0xbde31cc2 */ -pa5 = 3.5478305072e-02, /* 0x3d1151b3 */ -pa6 = -2.1663755178e-03, /* 0xbb0df9c0 */ -qa1 = 1.0642088205e-01, /* 0x3dd9f331 */ -qa2 = 5.4039794207e-01, /* 0x3f0a5785 */ -qa3 = 7.1828655899e-02, /* 0x3d931ae7 */ -qa4 = 1.2617121637e-01, /* 0x3e013307 */ -qa5 = 1.3637083583e-02, /* 0x3c5f6e13 */ -qa6 = 1.1984500103e-02, /* 0x3c445aa3 */ +pp0 = 1.28379166e-01F, /* 0x1.06eba8p-3 */ +pp1 = -3.36030394e-01F, /* -0x1.58185ap-2 */ +pp2 = -1.86260219e-03F, /* -0x1.e8451ep-10 */ +qq1 = 3.12324286e-01F, /* 0x1.3fd1f0p-2 */ +qq2 = 2.16070302e-02F, /* 0x1.620274p-6 */ +qq3 = -1.98859419e-03F, /* -0x1.04a626p-9 */ /* - * Coefficients for approximation to erfc in [1.25,1/0.35] + * Domain [0.84375, 1.25], range ~[-1.953e-11,1.940e-11]: + * |(erf(x) - erx) - p(x)/q(x)| < 2**-36. */ -ra0 = -9.8649440333e-03, /* 0xbc21a093 */ -ra1 = -6.9385856390e-01, /* 0xbf31a0b7 */ -ra2 = -1.0558626175e+01, /* 0xc128f022 */ -ra3 = -6.2375331879e+01, /* 0xc2798057 */ -ra4 = -1.6239666748e+02, /* 0xc322658c */ -ra5 = -1.8460508728e+02, /* 0xc3389ae7 */ -ra6 = -8.1287437439e+01, /* 0xc2a2932b */ -ra7 = -9.8143291473e+00, /* 0xc11d077e */ -sa1 = 1.9651271820e+01, /* 0x419d35ce */ -sa2 = 1.3765776062e+02, /* 0x4309a863 */ -sa3 = 4.3456588745e+02, /* 0x43d9486f */ -sa4 = 6.4538726807e+02, /* 0x442158c9 */ -sa5 = 4.2900814819e+02, /* 0x43d6810b */ -sa6 = 1.0863500214e+02, /* 0x42d9451f */ -sa7 = 6.5702495575e+00, /* 0x40d23f7c */ -sa8 = -6.0424413532e-02, /* 0xbd777f97 */ +erx = 8.42697144e-01F, /* 0x1.af7600p-1. erf(1) rounded to 16 bits. */ +pa0 = 3.64939137e-06F, /* 0x1.e9d022p-19 */ +pa1 = 4.15109694e-01F, /* 0x1.a91284p-2 */ +pa2 = -1.65179938e-01F, /* -0x1.5249dcp-3 */ +pa3 = 1.10914491e-01F, /* 0x1.c64e46p-4 */ +qa1 = 6.02074385e-01F, /* 0x1.344318p-1 */ +qa2 = 5.35934687e-01F, /* 0x1.126608p-1 */ +qa3 = 1.68576106e-01F, /* 0x1.593e6ep-3 */ +qa4 = 5.62181212e-02F, /* 0x1.cc89f2p-5 */ /* - * Coefficients for approximation to erfc in [1/.35,28] + * Domain [1.25,1/0.35], range ~[-7.043e-10,7.457e-10]: + * |log(x*erfc(x)) + x**2 + 0.5625 - r(x)/s(x)| < 2**-30 */ -rb0 = -9.8649431020e-03, /* 0xbc21a092 */ -rb1 = -7.9928326607e-01, /* 0xbf4c9dd4 */ -rb2 = -1.7757955551e+01, /* 0xc18e104b */ -rb3 = -1.6063638306e+02, /* 0xc320a2ea */ -rb4 = -6.3756646729e+02, /* 0xc41f6441 */ -rb5 = -1.0250950928e+03, /* 0xc480230b */ -rb6 = -4.8351919556e+02, /* 0xc3f1c275 */ -sb1 = 3.0338060379e+01, /* 0x41f2b459 */ -sb2 = 3.2579251099e+02, /* 0x43a2e571 */ -sb3 = 1.5367296143e+03, /* 0x44c01759 */ -sb4 = 3.1998581543e+03, /* 0x4547fdbb */ -sb5 = 2.5530502930e+03, /* 0x451f90ce */ -sb6 = 4.7452853394e+02, /* 0x43ed43a7 */ -sb7 = -2.2440952301e+01; /* 0xc1b38712 */ +ra0 = -9.87132732e-03F, /* -0x1.4376b2p-7 */ +ra1 = -5.53605914e-01F, /* -0x1.1b723cp-1 */ +ra2 = -2.17589188e+00F, /* -0x1.1683a0p+1 */ +ra3 = -1.43268085e+00F, /* -0x1.6ec42cp+0 */ +sa1 = 5.45995426e+00F, /* 0x1.5d6fe4p+2 */ +sa2 = 6.69798088e+00F, /* 0x1.acabb8p+2 */ +sa3 = 1.43113089e+00F, /* 0x1.6e5e98p+0 */ +sa4 = -5.77397496e-02F, /* -0x1.d90108p-5 */ +/* + * Domain [1/0.35, 11], range ~[-2.264e-13,2.336e-13]: + * |log(x*erfc(x)) + x**2 + 0.5625 - r(x)/s(x)| < 2**-42 + */ +rb0 = -9.86494310e-03F, /* -0x1.434124p-7 */ +rb1 = -6.25171244e-01F, /* -0x1.401672p-1 */ +rb2 = -6.16498327e+00F, /* -0x1.8a8f16p+2 */ +rb3 = -1.66696873e+01F, /* -0x1.0ab70ap+4 */ +rb4 = -9.53764343e+00F, /* -0x1.313460p+3 */ +sb1 = 1.26884899e+01F, /* 0x1.96081cp+3 */ +sb2 = 4.51839523e+01F, /* 0x1.6978bcp+5 */ +sb3 = 4.72810211e+01F, /* 0x1.7a3f88p+5 */ +sb4 = 8.93033314e+00F; /* 0x1.1dc54ap+3 */ float erff(float x) @@ -107,43 +91,37 @@ erff(float x) } if(ix < 0x3f580000) { /* |x|<0.84375 */ - if(ix < 0x31800000) { /* |x|<2**-28 */ - if (ix < 0x04000000) - /*avoid underflow */ - return (float)0.125*((float)8.0*x+efx8*x); + if(ix < 0x38800000) { /* |x|<2**-14 */ + if (ix < 0x04000000) /* |x|<0x1p-119 */ + return (8*x+efx8*x)/8; /* avoid spurious underflow */ return x + efx*x; } z = x*x; - r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4))); - s = one+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5)))); + r = pp0+z*(pp1+z*pp2); + s = one+z*(qq1+z*(qq2+z*qq3)); y = r/s; return x + x*y; } if(ix < 0x3fa00000) { /* 0.84375 <= |x| < 1.25 */ s = fabsf(x)-one; - P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6))))); - Q = one+s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6))))); + P = pa0+s*(pa1+s*(pa2+s*pa3)); + Q = one+s*(qa1+s*(qa2+s*(qa3+s*qa4))); if(hx>=0) return erx + P/Q; else return -erx - P/Q; } - if (ix >= 0x40c00000) { /* inf>|x|>=6 */ + if (ix >= 0x40800000) { /* inf>|x|>=4 */ if(hx>=0) return one-tiny; else return tiny-one; } x = fabsf(x); s = one/(x*x); if(ix< 0x4036DB6E) { /* |x| < 1/0.35 */ - R=ra0+s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*( - ra5+s*(ra6+s*ra7)))))); - S=one+s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*( - sa5+s*(sa6+s*(sa7+s*sa8))))))); + R=ra0+s*(ra1+s*(ra2+s*ra3)); + S=one+s*(sa1+s*(sa2+s*(sa3+s*sa4))); } else { /* |x| >= 1/0.35 */ - R=rb0+s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*( - rb5+s*rb6))))); - S=one+s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*( - sb5+s*(sb6+s*sb7)))))); + R=rb0+s*(rb1+s*(rb2+s*(rb3+s*rb4))); + S=one+s*(sb1+s*(sb2+s*(sb3+s*sb4))); } - GET_FLOAT_WORD(ix,x); - SET_FLOAT_WORD(z,ix&0xfffff000); - r = __ieee754_expf(-z*z-(float)0.5625)*__ieee754_expf((z-x)*(z+x)+R/S); + SET_FLOAT_WORD(z,hx&0xffffe000); + r = expf(-z*z-0.5625F)*expf((z-x)*(z+x)+R/S); if(hx>=0) return one-r/x; else return r/x-one; } @@ -160,11 +138,11 @@ erfcf(float x) } if(ix < 0x3f580000) { /* |x|<0.84375 */ - if(ix < 0x23800000) /* |x|<2**-56 */ + if(ix < 0x33800000) /* |x|<2**-24 */ return one-x; z = x*x; - r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4))); - s = one+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5)))); + r = pp0+z*(pp1+z*pp2); + s = one+z*(qq1+z*(qq2+z*qq3)); y = r/s; if(hx < 0x3e800000) { /* x<1/4 */ return one-(x+x*y); @@ -176,33 +154,27 @@ erfcf(float x) } if(ix < 0x3fa00000) { /* 0.84375 <= |x| < 1.25 */ s = fabsf(x)-one; - P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6))))); - Q = one+s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6))))); + P = pa0+s*(pa1+s*(pa2+s*pa3)); + Q = one+s*(qa1+s*(qa2+s*(qa3+s*qa4))); if(hx>=0) { z = one-erx; return z - P/Q; } else { z = erx+P/Q; return one+z; } } - if (ix < 0x41e00000) { /* |x|<28 */ + if (ix < 0x41300000) { /* |x|<11 */ x = fabsf(x); s = one/(x*x); if(ix< 0x4036DB6D) { /* |x| < 1/.35 ~ 2.857143*/ - R=ra0+s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*( - ra5+s*(ra6+s*ra7)))))); - S=one+s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*( - sa5+s*(sa6+s*(sa7+s*sa8))))))); + R=ra0+s*(ra1+s*(ra2+s*ra3)); + S=one+s*(sa1+s*(sa2+s*(sa3+s*sa4))); } else { /* |x| >= 1/.35 ~ 2.857143 */ - if(hx<0&&ix>=0x40c00000) return two-tiny;/* x < -6 */ - R=rb0+s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*( - rb5+s*rb6))))); - S=one+s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*( - sb5+s*(sb6+s*sb7)))))); + if(hx<0&&ix>=0x40a00000) return two-tiny;/* x < -5 */ + R=rb0+s*(rb1+s*(rb2+s*(rb3+s*rb4))); + S=one+s*(sb1+s*(sb2+s*(sb3+s*sb4))); } - GET_FLOAT_WORD(ix,x); - SET_FLOAT_WORD(z,ix&0xfffff000); - r = __ieee754_expf(-z*z-(float)0.5625)* - __ieee754_expf((z-x)*(z+x)+R/S); + SET_FLOAT_WORD(z,hx&0xffffe000); + r = expf(-z*z-0.5625F)*expf((z-x)*(z+x)+R/S); if(hx>0) return r/x; else return two-r/x; } else { if(hx>0) return tiny*tiny; else return two-tiny; diff --git a/lib/msun/src/s_exp2.c b/lib/msun/src/s_exp2.c index 485b4e3..fde11c2 100644 --- a/lib/msun/src/s_exp2.c +++ b/lib/msun/src/s_exp2.c @@ -36,7 +36,6 @@ __FBSDID("$FreeBSD$"); #define TBLSIZE (1 << TBLBITS) static const double - huge = 0x1p1000, redux = 0x1.8p52 / TBLSIZE, P1 = 0x1.62e42fefa39efp-1, P2 = 0x1.ebfbdff82c575p-3, @@ -44,7 +43,9 @@ static const double P4 = 0x1.3b2ab88f70400p-7, P5 = 0x1.5d88003875c74p-10; -static volatile double twom1000 = 0x1p-1000; +static volatile double + huge = 0x1p1000, + twom1000 = 0x1p-1000; static const double tbl[TBLSIZE * 2] = { /* exp2(z + eps) eps */ diff --git a/lib/msun/src/s_exp2f.c b/lib/msun/src/s_exp2f.c index 0a97bf6..9ac7c1f 100644 --- a/lib/msun/src/s_exp2f.c +++ b/lib/msun/src/s_exp2f.c @@ -36,14 +36,15 @@ __FBSDID("$FreeBSD$"); #define TBLSIZE (1 << TBLBITS) static const float - huge = 0x1p100f, redux = 0x1.8p23f / TBLSIZE, P1 = 0x1.62e430p-1f, P2 = 0x1.ebfbe0p-3f, P3 = 0x1.c6b348p-5f, P4 = 0x1.3b2c9cp-7f; -static volatile float twom100 = 0x1p-100f; +static volatile float + huge = 0x1p100f, + twom100 = 0x1p-100f; static const double exp2ft[TBLSIZE] = { 0x1.6a09e667f3bcdp-1, diff --git a/lib/msun/src/s_expm1.c b/lib/msun/src/s_expm1.c index 5aa1917..37998a3 100644 --- a/lib/msun/src/s_expm1.c +++ b/lib/msun/src/s_expm1.c @@ -115,7 +115,6 @@ __FBSDID("$FreeBSD$"); static const double one = 1.0, -huge = 1.0e+300, tiny = 1.0e-300, o_threshold = 7.09782712893383973096e+02,/* 0x40862E42, 0xFEFA39EF */ ln2_hi = 6.93147180369123816490e-01,/* 0x3fe62e42, 0xfee00000 */ @@ -128,6 +127,8 @@ Q3 = -7.93650757867487942473e-05, /* BF14CE19 9EAADBB7 */ Q4 = 4.00821782732936239552e-06, /* 3ED0CFCA 86E65239 */ Q5 = -2.01099218183624371326e-07; /* BE8AFDB7 6E09C32D */ +static volatile double huge = 1.0e+300; + double expm1(double x) { @@ -215,3 +216,7 @@ expm1(double x) } return y; } + +#if (LDBL_MANT_DIG == 53) +__weak_reference(expm1, expm1l); +#endif diff --git a/lib/msun/src/s_expm1f.c b/lib/msun/src/s_expm1f.c index fb37494..c0a3934 100644 --- a/lib/msun/src/s_expm1f.c +++ b/lib/msun/src/s_expm1f.c @@ -23,7 +23,6 @@ __FBSDID("$FreeBSD$"); static const float one = 1.0, -huge = 1.0e+30, tiny = 1.0e-30, o_threshold = 8.8721679688e+01,/* 0x42b17180 */ ln2_hi = 6.9313812256e-01,/* 0x3f317180 */ @@ -37,6 +36,8 @@ invln2 = 1.4426950216e+00,/* 0x3fb8aa3b */ Q1 = -3.3333212137e-2, /* -0x888868.0p-28 */ Q2 = 1.5807170421e-3; /* 0xcf3010.0p-33 */ +static volatile float huge = 1.0e+30; + float expm1f(float x) { diff --git a/lib/msun/src/s_fma.c b/lib/msun/src/s_fma.c index dfbd13c..b1066c2 100644 --- a/lib/msun/src/s_fma.c +++ b/lib/msun/src/s_fma.c @@ -117,7 +117,7 @@ add_and_denormalize(double a, double b, int scale) if (sum.lo != 0) { EXTRACT_WORD64(hibits, sum.hi); bits_lost = -((int)(hibits >> 52) & 0x7ff) - scale + 1; - if (bits_lost != 1 ^ (int)(hibits & 1)) { + if ((bits_lost != 1) ^ (int)(hibits & 1)) { /* hibits += (int)copysign(1.0, sum.hi * sum.lo) */ EXTRACT_WORD64(lobits, sum.lo); hibits += 1 - (((hibits ^ lobits) >> 62) & 2); @@ -238,6 +238,8 @@ fma(double x, double y, double z) zs = copysign(DBL_MIN, zs); fesetround(FE_TONEAREST); + /* work around clang bug 8100 */ + volatile double vxs = xs; /* * Basic approach for round-to-nearest: @@ -247,7 +249,7 @@ fma(double x, double y, double z) * adj = xy.lo + r.lo (inexact; low bit is sticky) * result = r.hi + adj (correctly rounded) */ - xy = dd_mul(xs, ys); + xy = dd_mul(vxs, ys); r = dd_add(xy.hi, zs); spread = ex + ey; @@ -268,7 +270,9 @@ fma(double x, double y, double z) * rounding modes. */ fesetround(oround); - adj = r.lo + xy.lo; + /* work around clang bug 8100 */ + volatile double vrlo = r.lo; + adj = vrlo + xy.lo; return (ldexp(r.hi + adj, spread)); } diff --git a/lib/msun/src/s_fmal.c b/lib/msun/src/s_fmal.c index c2a6913..d435f4f 100644 --- a/lib/msun/src/s_fmal.c +++ b/lib/msun/src/s_fmal.c @@ -113,7 +113,7 @@ add_and_denormalize(long double a, long double b, int scale) if (sum.lo != 0) { u.e = sum.hi; bits_lost = -u.bits.exp - scale + 1; - if (bits_lost != 1 ^ (int)(u.bits.manl & 1)) + if ((bits_lost != 1) ^ (int)(u.bits.manl & 1)) sum.hi = nextafterl(sum.hi, INFINITY * sum.lo); } return (ldexp(sum.hi, scale)); @@ -226,6 +226,8 @@ fmal(long double x, long double y, long double z) zs = copysignl(LDBL_MIN, zs); fesetround(FE_TONEAREST); + /* work around clang bug 8100 */ + volatile long double vxs = xs; /* * Basic approach for round-to-nearest: @@ -235,7 +237,7 @@ fmal(long double x, long double y, long double z) * adj = xy.lo + r.lo (inexact; low bit is sticky) * result = r.hi + adj (correctly rounded) */ - xy = dd_mul(xs, ys); + xy = dd_mul(vxs, ys); r = dd_add(xy.hi, zs); spread = ex + ey; @@ -256,7 +258,9 @@ fmal(long double x, long double y, long double z) * rounding modes. */ fesetround(oround); - adj = r.lo + xy.lo; + /* work around clang bug 8100 */ + volatile long double vrlo = r.lo; + adj = vrlo + xy.lo; return (ldexpl(r.hi + adj, spread)); } diff --git a/lib/msun/src/s_log1p.c b/lib/msun/src/s_log1p.c index b062a8a..3cc77bd 100644 --- a/lib/msun/src/s_log1p.c +++ b/lib/msun/src/s_log1p.c @@ -96,6 +96,7 @@ Lp6 = 1.531383769920937332e-01, /* 3FC39A09 D078C69F */ Lp7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ static const double zero = 0.0; +static volatile double vzero = 0.0; double log1p(double x) @@ -109,7 +110,7 @@ log1p(double x) k = 1; if (hx < 0x3FDA827A) { /* 1+x < sqrt(2)+ */ if(ax>=0x3ff00000) { /* x <= -1.0 */ - if(x==-1.0) return -two54/zero; /* log1p(-1)=+inf */ + if(x==-1.0) return -two54/vzero; /* log1p(-1)=+inf */ else return (x-x)/(x-x); /* log1p(x<-1)=NaN */ } if(ax<0x3e200000) { /* |x| < 2**-29 */ @@ -173,3 +174,7 @@ log1p(double x) if(k==0) return f-(hfsq-s*(hfsq+R)); else return k*ln2_hi-((hfsq-(s*(hfsq+R)+(k*ln2_lo+c)))-f); } + +#if (LDBL_MANT_DIG == 53) +__weak_reference(log1p, log1pl); +#endif diff --git a/lib/msun/src/s_log1pf.c b/lib/msun/src/s_log1pf.c index 01d3457..df04c67 100644 --- a/lib/msun/src/s_log1pf.c +++ b/lib/msun/src/s_log1pf.c @@ -34,6 +34,7 @@ Lp6 = 1.5313838422e-01, /* 3E1CD04F */ Lp7 = 1.4798198640e-01; /* 3E178897 */ static const float zero = 0.0; +static volatile float vzero = 0.0; float log1pf(float x) @@ -47,7 +48,7 @@ log1pf(float x) k = 1; if (hx < 0x3ed413d0) { /* 1+x < sqrt(2)+ */ if(ax>=0x3f800000) { /* x <= -1.0 */ - if(x==(float)-1.0) return -two25/zero; /* log1p(-1)=+inf */ + if(x==(float)-1.0) return -two25/vzero; /* log1p(-1)=+inf */ else return (x-x)/(x-x); /* log1p(x<-1)=NaN */ } if(ax<0x38000000) { /* |x| < 2**-15 */ diff --git a/lib/msun/src/s_nearbyint.c b/lib/msun/src/s_nearbyint.c index 12493d29..063f8d7 100644 --- a/lib/msun/src/s_nearbyint.c +++ b/lib/msun/src/s_nearbyint.c @@ -36,12 +36,16 @@ __FBSDID("$FreeBSD$"); * instead of feclearexcept()/feupdateenv() to restore the environment * because the only exception defined for rint() is overflow, and * rounding can't overflow as long as emax >= p. + * + * The volatile keyword is needed below because clang incorrectly assumes + * that rint won't raise any floating-point exceptions. Declaring ret volatile + * is sufficient to trick the compiler into doing the right thing. */ #define DECL(type, fn, rint) \ type \ fn(type x) \ { \ - type ret; \ + volatile type ret; \ fenv_t env; \ \ fegetenv(&env); \ diff --git a/lib/msun/i387/fenv.h b/lib/msun/x86/fenv.h index 329dfe1..a4e452d 100644 --- a/lib/msun/i387/fenv.h +++ b/lib/msun/x86/fenv.h @@ -36,26 +36,6 @@ #define __fenv_static static #endif -/* - * To preserve binary compatibility with FreeBSD 5.3, we pack the - * mxcsr into some reserved fields, rather than changing sizeof(fenv_t). - */ -typedef struct { - __uint16_t __control; - __uint16_t __mxcsr_hi; - __uint16_t __status; - __uint16_t __mxcsr_lo; - __uint32_t __tag; - char __other[16]; -} fenv_t; - -#define __get_mxcsr(env) (((env).__mxcsr_hi << 16) | \ - ((env).__mxcsr_lo)) -#define __set_mxcsr(env, x) do { \ - (env).__mxcsr_hi = (__uint32_t)(x) >> 16; \ - (env).__mxcsr_lo = (__uint16_t)(x); \ -} while (0) - typedef __uint16_t fexcept_t; /* Exception flags */ @@ -84,18 +64,32 @@ typedef __uint16_t fexcept_t; #define _SSE_ROUND_SHIFT 3 #define _SSE_EMASK_SHIFT 7 -__BEGIN_DECLS +#ifdef __i386__ +/* + * To preserve binary compatibility with FreeBSD 5.3, we pack the + * mxcsr into some reserved fields, rather than changing sizeof(fenv_t). + */ +typedef struct { + __uint16_t __control; + __uint16_t __mxcsr_hi; + __uint16_t __status; + __uint16_t __mxcsr_lo; + __uint32_t __tag; + char __other[16]; +} fenv_t; +#else /* __amd64__ */ +typedef struct { + struct { + __uint32_t __control; + __uint32_t __status; + __uint32_t __tag; + char __other[16]; + } __x87; + __uint32_t __mxcsr; +} fenv_t; +#endif /* __i386__ */ -/* After testing for SSE support once, we cache the result in __has_sse. */ -enum __sse_support { __SSE_YES, __SSE_NO, __SSE_UNK }; -extern enum __sse_support __has_sse; -int __test_sse(void); -#ifdef __SSE__ -#define __HAS_SSE() 1 -#else -#define __HAS_SSE() (__has_sse == __SSE_YES || \ - (__has_sse == __SSE_UNK && __test_sse())) -#endif +__BEGIN_DECLS /* Default floating-point environment */ extern const fenv_t __fe_dfl_env; @@ -114,6 +108,68 @@ extern const fenv_t __fe_dfl_env; #define __ldmxcsr(__csr) __asm __volatile("ldmxcsr %0" : : "m" (__csr)) #define __stmxcsr(__csr) __asm __volatile("stmxcsr %0" : "=m" (*(__csr))) +int fegetenv(fenv_t *__envp); +int feholdexcept(fenv_t *__envp); +int fesetexceptflag(const fexcept_t *__flagp, int __excepts); +int feraiseexcept(int __excepts); +int feupdateenv(const fenv_t *__envp); + +__fenv_static inline int +fegetround(void) +{ + __uint16_t __control; + + /* + * We assume that the x87 and the SSE unit agree on the + * rounding mode. Reading the control word on the x87 turns + * out to be about 5 times faster than reading it on the SSE + * unit on an Opteron 244. + */ + __fnstcw(&__control); + return (__control & _ROUND_MASK); +} + +#if __BSD_VISIBLE + +int feenableexcept(int __mask); +int fedisableexcept(int __mask); + +/* We currently provide no external definition of fegetexcept(). */ +static inline int +fegetexcept(void) +{ + __uint16_t __control; + + /* + * We assume that the masks for the x87 and the SSE unit are + * the same. + */ + __fnstcw(&__control); + return (~__control & FE_ALL_EXCEPT); +} + +#endif /* __BSD_VISIBLE */ + +#ifdef __i386__ + +/* After testing for SSE support once, we cache the result in __has_sse. */ +enum __sse_support { __SSE_YES, __SSE_NO, __SSE_UNK }; +extern enum __sse_support __has_sse; +int __test_sse(void); +#ifdef __SSE__ +#define __HAS_SSE() 1 +#else +#define __HAS_SSE() (__has_sse == __SSE_YES || \ + (__has_sse == __SSE_UNK && __test_sse())) +#endif + +#define __get_mxcsr(env) (((env).__mxcsr_hi << 16) | \ + ((env).__mxcsr_lo)) +#define __set_mxcsr(env, x) do { \ + (env).__mxcsr_hi = (__uint32_t)(x) >> 16; \ + (env).__mxcsr_lo = (__uint16_t)(x); \ +} while (0) + __fenv_static inline int feclearexcept(int __excepts) { @@ -150,9 +206,6 @@ fegetexceptflag(fexcept_t *__flagp, int __excepts) return (0); } -int fesetexceptflag(const fexcept_t *__flagp, int __excepts); -int feraiseexcept(int __excepts); - __fenv_static inline int fetestexcept(int __excepts) { @@ -168,21 +221,6 @@ fetestexcept(int __excepts) } __fenv_static inline int -fegetround(void) -{ - __uint16_t __control; - - /* - * We assume that the x87 and the SSE unit agree on the - * rounding mode. Reading the control word on the x87 turns - * out to be about 5 times faster than reading it on the SSE - * unit on an Opteron 244. - */ - __fnstcw(&__control); - return (__control & _ROUND_MASK); -} - -__fenv_static inline int fesetround(int __round) { __uint32_t __mxcsr; @@ -206,9 +244,6 @@ fesetround(int __round) return (0); } -int fegetenv(fenv_t *__envp); -int feholdexcept(fenv_t *__envp); - __fenv_static inline int fesetenv(const fenv_t *__envp) { @@ -231,28 +266,89 @@ fesetenv(const fenv_t *__envp) return (0); } -int feupdateenv(const fenv_t *__envp); +#else /* __amd64__ */ -#if __BSD_VISIBLE +__fenv_static inline int +feclearexcept(int __excepts) +{ + fenv_t __env; -int feenableexcept(int __mask); -int fedisableexcept(int __mask); + if (__excepts == FE_ALL_EXCEPT) { + __fnclex(); + } else { + __fnstenv(&__env.__x87); + __env.__x87.__status &= ~__excepts; + __fldenv(__env.__x87); + } + __stmxcsr(&__env.__mxcsr); + __env.__mxcsr &= ~__excepts; + __ldmxcsr(__env.__mxcsr); + return (0); +} -/* We currently provide no external definition of fegetexcept(). */ -static inline int -fegetexcept(void) +__fenv_static inline int +fegetexceptflag(fexcept_t *__flagp, int __excepts) +{ + __uint32_t __mxcsr; + __uint16_t __status; + + __stmxcsr(&__mxcsr); + __fnstsw(&__status); + *__flagp = (__mxcsr | __status) & __excepts; + return (0); +} + +__fenv_static inline int +fetestexcept(int __excepts) { + __uint32_t __mxcsr; + __uint16_t __status; + + __stmxcsr(&__mxcsr); + __fnstsw(&__status); + return ((__status | __mxcsr) & __excepts); +} + +__fenv_static inline int +fesetround(int __round) +{ + __uint32_t __mxcsr; __uint16_t __control; + if (__round & ~_ROUND_MASK) + return (-1); + + __fnstcw(&__control); + __control &= ~_ROUND_MASK; + __control |= __round; + __fldcw(__control); + + __stmxcsr(&__mxcsr); + __mxcsr &= ~(_ROUND_MASK << _SSE_ROUND_SHIFT); + __mxcsr |= __round << _SSE_ROUND_SHIFT; + __ldmxcsr(__mxcsr); + + return (0); +} + +__fenv_static inline int +fesetenv(const fenv_t *__envp) +{ + /* - * We assume that the masks for the x87 and the SSE unit are - * the same. + * XXX Using fldenvx() instead of fldenv() tells the compiler that this + * instruction clobbers the i387 register stack. This happens because + * we restore the tag word from the saved environment. Normally, this + * would happen anyway and we wouldn't care, because the ABI allows + * function calls to clobber the i387 regs. However, fesetenv() is + * inlined, so we need to be more careful. */ - __fnstcw(&__control); - return (~__control & FE_ALL_EXCEPT); + __fldenvx(__envp->__x87); + __ldmxcsr(__envp->__mxcsr); + return (0); } -#endif /* __BSD_VISIBLE */ +#endif /* __i386__ */ __END_DECLS |