From 7c947eee2570f5cda7e1ce3562be6f001928ebb0 Mon Sep 17 00:00:00 2001 From: das Date: Sat, 15 Oct 2011 04:16:58 +0000 Subject: Fix a double-rounding bug in fma{,f,l}. The bug would occur in round-to-nearest mode when the result, rounded to twice machine precision, was exactly halfway between two machine-precision values. The essence of the fix is to simulate a "sticky bit" in the pathological cases, which is how hardware implementations break the ties. MFC after: 1 month --- lib/msun/src/s_fma.c | 169 +++++++++++++++++++++++++++++++------------------- lib/msun/src/s_fmaf.c | 38 +++++++++--- lib/msun/src/s_fmal.c | 149 ++++++++++++++++++++++++++++---------------- 3 files changed, 231 insertions(+), 125 deletions(-) (limited to 'lib/msun/src') diff --git a/lib/msun/src/s_fma.c b/lib/msun/src/s_fma.c index aefbd8e..9cb0ddc 100644 --- a/lib/msun/src/s_fma.c +++ b/lib/msun/src/s_fma.c @@ -31,6 +31,8 @@ __FBSDID("$FreeBSD$"); #include #include +#include "math_private.h" + /* * A struct dd represents a floating-point number with twice the precision * of a double. We maintain the invariant that "hi" stores the 53 high-order @@ -59,6 +61,73 @@ dd_add(double a, double b) } /* + * Compute a+b, with a small tweak: The least significant bit of the + * result is adjusted into a sticky bit summarizing all the bits that + * were lost to rounding. This adjustment negates the effects of double + * rounding when the result is added to another number with a higher + * exponent. For an explanation of round and sticky bits, see any reference + * on FPU design, e.g., + * + * J. Coonen. An Implementation Guide to a Proposed Standard for + * Floating-Point Arithmetic. Computer, vol. 13, no. 1, Jan 1980. + */ +static inline double +add_adjusted(double a, double b) +{ + struct dd sum; + uint64_t hibits, lobits; + + sum = dd_add(a, b); + if (sum.lo != 0) { + EXTRACT_WORD64(hibits, sum.hi); + if ((hibits & 1) == 0) { + /* hibits += (int)copysign(1.0, sum.hi * sum.lo) */ + EXTRACT_WORD64(lobits, sum.lo); + hibits += 1 - ((hibits ^ lobits) >> 62); + INSERT_WORD64(sum.hi, hibits); + } + } + return (sum.hi); +} + +/* + * Compute ldexp(a+b, scale) with a single rounding error. It is assumed + * that the result will be subnormal, and care is taken to ensure that + * double rounding does not occur. + */ +static inline double +add_and_denormalize(double a, double b, int scale) +{ + struct dd sum; + uint64_t hibits, lobits; + int bits_lost; + + sum = dd_add(a, b); + + /* + * If we are losing at least two bits of accuracy to denormalization, + * then the first lost bit becomes a round bit, and we adjust the + * lowest bit of sum.hi to make it a sticky bit summarizing all the + * bits in sum.lo. With the sticky bit adjusted, the hardware will + * break any ties in the correct direction. + * + * If we are losing only one bit to denormalization, however, we must + * break the ties manually. + */ + if (sum.lo != 0) { + EXTRACT_WORD64(hibits, sum.hi); + bits_lost = -((int)(hibits >> 52) & 0x7ff) - scale + 1; + if (bits_lost != 1 ^ (int)(hibits & 1)) { + /* hibits += (int)copysign(1.0, sum.hi * sum.lo) */ + EXTRACT_WORD64(lobits, sum.lo); + hibits += 1 - (((hibits ^ lobits) >> 62) & 2); + INSERT_WORD64(sum.hi, hibits); + } + } + return (ldexp(sum.hi, scale)); +} + +/* * Compute a*b exactly, returning the exact result in a struct dd. We assume * that both a and b are normalized, so no underflow or overflow will occur. * The current rounding mode must be round-to-nearest. @@ -105,14 +174,11 @@ dd_mul(double a, double b) * Hardware instructions should be used on architectures that support it, * since this implementation will likely be several times slower. */ -#if LDBL_MANT_DIG != 113 double fma(double x, double y, double z) { - double xs, ys, zs; - struct dd xy, r, r2; - double p; - double s; + double xs, ys, zs, adj; + struct dd xy, r; int oround; int ex, ey, ez; int spread; @@ -142,41 +208,6 @@ fma(double x, double y, double z) * will overflow, so we handle these cases specially. Rounding * modes other than FE_TONEAREST are painful. */ - if (spread > DBL_MANT_DIG * 2) { - fenv_t env; - feraiseexcept(FE_INEXACT); - switch(oround) { - case FE_TONEAREST: - return (x * y); - case FE_TOWARDZERO: - if (x > 0.0 ^ y < 0.0 ^ z < 0.0) - return (x * y); - feholdexcept(&env); - s = x * y; - if (!fetestexcept(FE_INEXACT)) - s = nextafter(s, 0); - feupdateenv(&env); - return (s); - case FE_DOWNWARD: - if (z > 0.0) - return (x * y); - feholdexcept(&env); - s = x * y; - if (!fetestexcept(FE_INEXACT)) - s = nextafter(s, -INFINITY); - feupdateenv(&env); - return (s); - default: /* FE_UPWARD */ - if (z < 0.0) - return (x * y); - feholdexcept(&env); - s = x * y; - if (!fetestexcept(FE_INEXACT)) - s = nextafter(s, INFINITY); - feupdateenv(&env); - return (s); - } - } if (spread < -DBL_MANT_DIG) { feraiseexcept(FE_INEXACT); if (!isnormal(z)) @@ -201,42 +232,52 @@ fma(double x, double y, double z) return (z); } } + if (spread <= DBL_MANT_DIG * 2) + zs = ldexp(zs, -spread); + else + zs = copysign(DBL_MIN, zs); fesetround(FE_TONEAREST); + /* + * Basic approach for round-to-nearest: + * + * (xy.hi, xy.lo) = x * y (exact) + * (r.hi, r.lo) = xy.hi + z (exact) + * adj = xy.lo + r.lo (inexact; low bit is sticky) + * result = r.hi + adj (correctly rounded) + */ xy = dd_mul(xs, ys); - zs = ldexp(zs, -spread); r = dd_add(xy.hi, zs); - r.lo += xy.lo; - spread = ex + ey; - if (spread + ilogb(r.hi) > -1023) { + if (r.hi == 0.0) { + /* + * When the addends cancel to 0, ensure that the result has + * the correct sign. + */ fesetround(oround); - r.hi = r.hi + r.lo; - } else { + volatile double vzs = zs; /* XXX gcc CSE bug workaround */ + return (xy.hi + vzs); + } + + spread = ex + ey; + + if (oround != FE_TONEAREST) { /* - * The result is subnormal, so we round before scaling to - * avoid double rounding. + * There is no need to worry about double rounding in directed + * rounding modes. */ - p = ldexp(copysign(0x1p-1022, r.hi), -spread); - r2 = dd_add(r.hi, p); - r2.lo += r.lo; fesetround(oround); - r.hi = (r2.hi + r2.lo) - p; + adj = r.lo + xy.lo; + return (ldexp(r.hi + adj, spread)); } - return (ldexp(r.hi, spread)); -} -#else /* LDBL_MANT_DIG == 113 */ -/* - * 113 bits of precision is more than twice the precision of a double, - * so it is enough to represent the intermediate product exactly. - */ -double -fma(double x, double y, double z) -{ - return ((long double)x * y + z); + + adj = add_adjusted(r.lo, xy.lo); + if (spread + ilogb(r.hi) > -1023) + return (ldexp(r.hi + adj, spread)); + else + return (add_and_denormalize(r.hi, adj, spread)); } -#endif /* LDBL_MANT_DIG != 113 */ #if (LDBL_MANT_DIG == 53) __weak_reference(fma, fmal); diff --git a/lib/msun/src/s_fmaf.c b/lib/msun/src/s_fmaf.c index 7c699e5..3695823 100644 --- a/lib/msun/src/s_fmaf.c +++ b/lib/msun/src/s_fmaf.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2005 David Schultz + * Copyright (c) 2005-2011 David Schultz * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -27,23 +27,43 @@ #include __FBSDID("$FreeBSD$"); +#include + #include "math.h" +#include "math_private.h" /* * Fused multiply-add: Compute x * y + z with a single rounding error. * * A double has more than twice as much precision than a float, so - * direct double-precision arithmetic suffices. - * - * XXX We are relying on the compiler to convert from double to float - * using the current rounding mode and with the appropriate - * side-effects. But on at least one platform (gcc 3.4.2/sparc64), - * this appears to be too much to ask for. The precision - * reduction should be done manually. + * direct double-precision arithmetic suffices, except where double + * rounding occurs. */ float fmaf(float x, float y, float z) { + double xy, result; + uint32_t hr, lr; + + xy = (double)x * y; + result = xy + z; + EXTRACT_WORDS(hr, lr, result); + /* Common case: The double precision result is fine. */ + if ((lr & 0x1fffffff) != 0x10000000 || /* not a halfway case */ + (hr & 0x7ff00000) == 0x7ff00000 || /* NaN */ + result - xy == z || /* exact */ + fegetround() != FE_TONEAREST) /* not round-to-nearest */ + return (result); - return ((double)x * y + z); + /* + * If result is inexact, and exactly halfway between two float values, + * we need to adjust the low-order bit in the direction of the error. + */ + fesetround(FE_TOWARDZERO); + volatile double vxy = xy; /* XXX work around gcc CSE bug */ + double adjusted_result = vxy + z; + fesetround(FE_TONEAREST); + if (result == adjusted_result) + SET_LOW_WORD(adjusted_result, lr + 1); + return (adjusted_result); } diff --git a/lib/msun/src/s_fmal.c b/lib/msun/src/s_fmal.c index 464dcb5..59fa6cd 100644 --- a/lib/msun/src/s_fmal.c +++ b/lib/msun/src/s_fmal.c @@ -31,6 +31,8 @@ __FBSDID("$FreeBSD$"); #include #include +#include "fpmath.h" + /* * A struct dd represents a floating-point number with twice the precision * of a long double. We maintain the invariant that "hi" stores the high-order @@ -59,6 +61,65 @@ dd_add(long double a, long double b) } /* + * Compute a+b, with a small tweak: The least significant bit of the + * result is adjusted into a sticky bit summarizing all the bits that + * were lost to rounding. This adjustment negates the effects of double + * rounding when the result is added to another number with a higher + * exponent. For an explanation of round and sticky bits, see any reference + * on FPU design, e.g., + * + * J. Coonen. An Implementation Guide to a Proposed Standard for + * Floating-Point Arithmetic. Computer, vol. 13, no. 1, Jan 1980. + */ +static inline long double +add_adjusted(long double a, long double b) +{ + struct dd sum; + union IEEEl2bits u; + + sum = dd_add(a, b); + if (sum.lo != 0) { + u.e = sum.hi; + if ((u.bits.manl & 1) == 0) + sum.hi = nextafterl(sum.hi, INFINITY * sum.lo); + } + return (sum.hi); +} + +/* + * Compute ldexp(a+b, scale) with a single rounding error. It is assumed + * that the result will be subnormal, and care is taken to ensure that + * double rounding does not occur. + */ +static inline long double +add_and_denormalize(long double a, long double b, int scale) +{ + struct dd sum; + int bits_lost; + union IEEEl2bits u; + + sum = dd_add(a, b); + + /* + * If we are losing at least two bits of accuracy to denormalization, + * then the first lost bit becomes a round bit, and we adjust the + * lowest bit of sum.hi to make it a sticky bit summarizing all the + * bits in sum.lo. With the sticky bit adjusted, the hardware will + * break any ties in the correct direction. + * + * If we are losing only one bit to denormalization, however, we must + * break the ties manually. + */ + if (sum.lo != 0) { + u.e = sum.hi; + bits_lost = -u.bits.exp - scale + 1; + if (bits_lost != 1 ^ (int)(u.bits.manl & 1)) + sum.hi = nextafterl(sum.hi, INFINITY * sum.lo); + } + return (ldexp(sum.hi, scale)); +} + +/* * Compute a*b exactly, returning the exact result in a struct dd. We assume * that both a and b are normalized, so no underflow or overflow will occur. * The current rounding mode must be round-to-nearest. @@ -104,10 +165,8 @@ dd_mul(long double a, long double b) long double fmal(long double x, long double y, long double z) { - long double xs, ys, zs; - struct dd xy, r, r2; - long double p; - long double s; + long double xs, ys, zs, adj; + struct dd xy, r; int oround; int ex, ey, ez; int spread; @@ -137,41 +196,6 @@ fmal(long double x, long double y, long double z) * will overflow, so we handle these cases specially. Rounding * modes other than FE_TONEAREST are painful. */ - if (spread > LDBL_MANT_DIG * 2) { - fenv_t env; - feraiseexcept(FE_INEXACT); - switch(oround) { - case FE_TONEAREST: - return (x * y); - case FE_TOWARDZERO: - if (x > 0.0 ^ y < 0.0 ^ z < 0.0) - return (x * y); - feholdexcept(&env); - s = x * y; - if (!fetestexcept(FE_INEXACT)) - s = nextafterl(s, 0); - feupdateenv(&env); - return (s); - case FE_DOWNWARD: - if (z > 0.0) - return (x * y); - feholdexcept(&env); - s = x * y; - if (!fetestexcept(FE_INEXACT)) - s = nextafterl(s, -INFINITY); - feupdateenv(&env); - return (s); - default: /* FE_UPWARD */ - if (z < 0.0) - return (x * y); - feholdexcept(&env); - s = x * y; - if (!fetestexcept(FE_INEXACT)) - s = nextafterl(s, INFINITY); - feupdateenv(&env); - return (s); - } - } if (spread < -LDBL_MANT_DIG) { feraiseexcept(FE_INEXACT); if (!isnormal(z)) @@ -196,28 +220,49 @@ fmal(long double x, long double y, long double z) return (z); } } + if (spread <= LDBL_MANT_DIG * 2) + zs = ldexpl(zs, -spread); + else + zs = copysignl(LDBL_MIN, zs); fesetround(FE_TONEAREST); + /* + * Basic approach for round-to-nearest: + * + * (xy.hi, xy.lo) = x * y (exact) + * (r.hi, r.lo) = xy.hi + z (exact) + * adj = xy.lo + r.lo (inexact; low bit is sticky) + * result = r.hi + adj (correctly rounded) + */ xy = dd_mul(xs, ys); - zs = ldexpl(zs, -spread); r = dd_add(xy.hi, zs); - r.lo += xy.lo; - spread = ex + ey; - if (spread + ilogbl(r.hi) > -16383) { + if (r.hi == 0.0) { + /* + * When the addends cancel to 0, ensure that the result has + * the correct sign. + */ fesetround(oround); - r.hi = r.hi + r.lo; - } else { + volatile long double vzs = zs; /* XXX gcc CSE bug workaround */ + return (xy.hi + vzs); + } + + spread = ex + ey; + + if (oround != FE_TONEAREST) { /* - * The result is subnormal, so we round before scaling to - * avoid double rounding. + * There is no need to worry about double rounding in directed + * rounding modes. */ - p = ldexpl(copysignl(0x1p-16382L, r.hi), -spread); - r2 = dd_add(r.hi, p); - r2.lo += r.lo; fesetround(oround); - r.hi = (r2.hi + r2.lo) - p; + adj = r.lo + xy.lo; + return (ldexpl(r.hi + adj, spread)); } - return (ldexpl(r.hi, spread)); + + adj = add_adjusted(r.lo, xy.lo); + if (spread + ilogbl(r.hi) > -16383) + return (ldexpl(r.hi + adj, spread)); + else + return (add_and_denormalize(r.hi, adj, spread)); } -- cgit v1.1