diff options
Diffstat (limited to 'arch/arm/nwfpe/softfloat.c')
-rw-r--r-- | arch/arm/nwfpe/softfloat.c | 348 |
1 files changed, 163 insertions, 185 deletions
diff --git a/arch/arm/nwfpe/softfloat.c b/arch/arm/nwfpe/softfloat.c index e038dd3..f9f0491 100644 --- a/arch/arm/nwfpe/softfloat.c +++ b/arch/arm/nwfpe/softfloat.c @@ -36,16 +36,6 @@ this code that are retained. /* ------------------------------------------------------------------------------- -Floating-point rounding mode, extended double-precision rounding precision, -and exception flags. -------------------------------------------------------------------------------- -*/ -int8 float_rounding_mode = float_round_nearest_even; -int8 floatx80_rounding_precision = 80; -int8 float_exception_flags; - -/* -------------------------------------------------------------------------------- Primitive arithmetic functions, including multi-word arithmetic, and division and square root approximations. (Can be specialized to target if desired.) @@ -77,14 +67,14 @@ input is too large, however, the invalid exception is raised and the largest positive or negative integer is returned. ------------------------------------------------------------------------------- */ -static int32 roundAndPackInt32( flag zSign, bits64 absZ ) +static int32 roundAndPackInt32( struct roundingData *roundData, flag zSign, bits64 absZ ) { int8 roundingMode; flag roundNearestEven; int8 roundIncrement, roundBits; int32 z; - roundingMode = float_rounding_mode; + roundingMode = roundData->mode; roundNearestEven = ( roundingMode == float_round_nearest_even ); roundIncrement = 0x40; if ( ! roundNearestEven ) { @@ -107,10 +97,10 @@ static int32 roundAndPackInt32( flag zSign, bits64 absZ ) z = absZ; if ( zSign ) z = - z; if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) { - float_exception_flags |= float_flag_invalid; + roundData->exception |= float_flag_invalid; return zSign ? 0x80000000 : 0x7FFFFFFF; } - if ( roundBits ) float_exception_flags |= float_flag_inexact; + if ( roundBits ) roundData->exception |= float_flag_inexact; return z; } @@ -224,14 +214,14 @@ The handling of underflow and overflow follows the IEC/IEEE Standard for Binary Floating-point Arithmetic. ------------------------------------------------------------------------------- */ -static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig ) +static float32 roundAndPackFloat32( struct roundingData *roundData, flag zSign, int16 zExp, bits32 zSig ) { int8 roundingMode; flag roundNearestEven; int8 roundIncrement, roundBits; flag isTiny; - roundingMode = float_rounding_mode; + roundingMode = roundData->mode; roundNearestEven = ( roundingMode == float_round_nearest_even ); roundIncrement = 0x40; if ( ! roundNearestEven ) { @@ -254,7 +244,7 @@ static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig ) || ( ( zExp == 0xFD ) && ( (sbits32) ( zSig + roundIncrement ) < 0 ) ) ) { - float_raise( float_flag_overflow | float_flag_inexact ); + roundData->exception |= float_flag_overflow | float_flag_inexact; return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 ); } if ( zExp < 0 ) { @@ -265,10 +255,10 @@ static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig ) shift32RightJamming( zSig, - zExp, &zSig ); zExp = 0; roundBits = zSig & 0x7F; - if ( isTiny && roundBits ) float_raise( float_flag_underflow ); + if ( isTiny && roundBits ) roundData->exception |= float_flag_underflow; } } - if ( roundBits ) float_exception_flags |= float_flag_inexact; + if ( roundBits ) roundData->exception |= float_flag_inexact; zSig = ( zSig + roundIncrement )>>7; zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); if ( zSig == 0 ) zExp = 0; @@ -287,12 +277,12 @@ point exponent. ------------------------------------------------------------------------------- */ static float32 - normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig ) + normalizeRoundAndPackFloat32( struct roundingData *roundData, flag zSign, int16 zExp, bits32 zSig ) { int8 shiftCount; shiftCount = countLeadingZeros32( zSig ) - 1; - return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount ); + return roundAndPackFloat32( roundData, zSign, zExp - shiftCount, zSig<<shiftCount ); } @@ -395,14 +385,14 @@ The handling of underflow and overflow follows the IEC/IEEE Standard for Binary Floating-point Arithmetic. ------------------------------------------------------------------------------- */ -static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig ) +static float64 roundAndPackFloat64( struct roundingData *roundData, flag zSign, int16 zExp, bits64 zSig ) { int8 roundingMode; flag roundNearestEven; int16 roundIncrement, roundBits; flag isTiny; - roundingMode = float_rounding_mode; + roundingMode = roundData->mode; roundNearestEven = ( roundingMode == float_round_nearest_even ); roundIncrement = 0x200; if ( ! roundNearestEven ) { @@ -427,7 +417,7 @@ static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig ) ) { //register int lr = __builtin_return_address(0); //printk("roundAndPackFloat64 called from 0x%08x\n",lr); - float_raise( float_flag_overflow | float_flag_inexact ); + roundData->exception |= float_flag_overflow | float_flag_inexact; return packFloat64( zSign, 0x7FF, 0 ) - ( roundIncrement == 0 ); } if ( zExp < 0 ) { @@ -438,10 +428,10 @@ static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig ) shift64RightJamming( zSig, - zExp, &zSig ); zExp = 0; roundBits = zSig & 0x3FF; - if ( isTiny && roundBits ) float_raise( float_flag_underflow ); + if ( isTiny && roundBits ) roundData->exception |= float_flag_underflow; } } - if ( roundBits ) float_exception_flags |= float_flag_inexact; + if ( roundBits ) roundData->exception |= float_flag_inexact; zSig = ( zSig + roundIncrement )>>10; zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven ); if ( zSig == 0 ) zExp = 0; @@ -460,12 +450,12 @@ point exponent. ------------------------------------------------------------------------------- */ static float64 - normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig ) + normalizeRoundAndPackFloat64( struct roundingData *roundData, flag zSign, int16 zExp, bits64 zSig ) { int8 shiftCount; shiftCount = countLeadingZeros64( zSig ) - 1; - return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount ); + return roundAndPackFloat64( roundData, zSign, zExp - shiftCount, zSig<<shiftCount ); } @@ -572,14 +562,15 @@ Floating-point Arithmetic. */ static floatx80 roundAndPackFloatx80( - int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 + struct roundingData *roundData, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 ) { - int8 roundingMode; + int8 roundingMode, roundingPrecision; flag roundNearestEven, increment, isTiny; int64 roundIncrement, roundMask, roundBits; - roundingMode = float_rounding_mode; + roundingMode = roundData->mode; + roundingPrecision = roundData->precision; roundNearestEven = ( roundingMode == float_round_nearest_even ); if ( roundingPrecision == 80 ) goto precision80; if ( roundingPrecision == 64 ) { @@ -623,8 +614,8 @@ static floatx80 shift64RightJamming( zSig0, 1 - zExp, &zSig0 ); zExp = 0; roundBits = zSig0 & roundMask; - if ( isTiny && roundBits ) float_raise( float_flag_underflow ); - if ( roundBits ) float_exception_flags |= float_flag_inexact; + if ( isTiny && roundBits ) roundData->exception |= float_flag_underflow; + if ( roundBits ) roundData->exception |= float_flag_inexact; zSig0 += roundIncrement; if ( (sbits64) zSig0 < 0 ) zExp = 1; roundIncrement = roundMask + 1; @@ -635,7 +626,7 @@ static floatx80 return packFloatx80( zSign, zExp, zSig0 ); } } - if ( roundBits ) float_exception_flags |= float_flag_inexact; + if ( roundBits ) roundData->exception |= float_flag_inexact; zSig0 += roundIncrement; if ( zSig0 < roundIncrement ) { ++zExp; @@ -672,7 +663,7 @@ static floatx80 ) { roundMask = 0; overflow: - float_raise( float_flag_overflow | float_flag_inexact ); + roundData->exception |= float_flag_overflow | float_flag_inexact; if ( ( roundingMode == float_round_to_zero ) || ( zSign && ( roundingMode == float_round_up ) ) || ( ! zSign && ( roundingMode == float_round_down ) ) @@ -689,8 +680,8 @@ static floatx80 || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) ); shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 ); zExp = 0; - if ( isTiny && zSig1 ) float_raise( float_flag_underflow ); - if ( zSig1 ) float_exception_flags |= float_flag_inexact; + if ( isTiny && zSig1 ) roundData->exception |= float_flag_underflow; + if ( zSig1 ) roundData->exception |= float_flag_inexact; if ( roundNearestEven ) { increment = ( (sbits64) zSig1 < 0 ); } @@ -710,7 +701,7 @@ static floatx80 return packFloatx80( zSign, zExp, zSig0 ); } } - if ( zSig1 ) float_exception_flags |= float_flag_inexact; + if ( zSig1 ) roundData->exception |= float_flag_inexact; if ( increment ) { ++zSig0; if ( zSig0 == 0 ) { @@ -740,7 +731,7 @@ normalized. */ static floatx80 normalizeRoundAndPackFloatx80( - int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 + struct roundingData *roundData, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 ) { int8 shiftCount; @@ -754,7 +745,7 @@ static floatx80 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); zExp -= shiftCount; return - roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 ); + roundAndPackFloatx80( roundData, zSign, zExp, zSig0, zSig1 ); } @@ -767,14 +758,14 @@ the single-precision floating-point format. The conversion is performed according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. ------------------------------------------------------------------------------- */ -float32 int32_to_float32( int32 a ) +float32 int32_to_float32(struct roundingData *roundData, int32 a) { flag zSign; if ( a == 0 ) return 0; if ( a == 0x80000000 ) return packFloat32( 1, 0x9E, 0 ); zSign = ( a < 0 ); - return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a ); + return normalizeRoundAndPackFloat32( roundData, zSign, 0x9C, zSign ? - a : a ); } @@ -840,7 +831,7 @@ positive integer is returned. Otherwise, if the conversion overflows, the largest integer with the same sign as `a' is returned. ------------------------------------------------------------------------------- */ -int32 float32_to_int32( float32 a ) +int32 float32_to_int32( struct roundingData *roundData, float32 a ) { flag aSign; int16 aExp, shiftCount; @@ -856,7 +847,7 @@ int32 float32_to_int32( float32 a ) zSig = aSig; zSig <<= 32; if ( 0 < shiftCount ) shift64RightJamming( zSig, shiftCount, &zSig ); - return roundAndPackInt32( aSign, zSig ); + return roundAndPackInt32( roundData, aSign, zSig ); } @@ -889,13 +880,13 @@ int32 float32_to_int32_round_to_zero( float32 a ) return 0x80000000; } else if ( aExp <= 0x7E ) { - if ( aExp | aSig ) float_exception_flags |= float_flag_inexact; + if ( aExp | aSig ) float_raise( float_flag_inexact ); return 0; } aSig = ( aSig | 0x00800000 )<<8; z = aSig>>( - shiftCount ); if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) { - float_exception_flags |= float_flag_inexact; + float_raise( float_flag_inexact ); } return aSign ? - z : z; @@ -973,7 +964,7 @@ operation is performed according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. ------------------------------------------------------------------------------- */ -float32 float32_round_to_int( float32 a ) +float32 float32_round_to_int( struct roundingData *roundData, float32 a ) { flag aSign; int16 aExp; @@ -988,11 +979,12 @@ float32 float32_round_to_int( float32 a ) } return a; } + roundingMode = roundData->mode; if ( aExp <= 0x7E ) { if ( (bits32) ( a<<1 ) == 0 ) return a; - float_exception_flags |= float_flag_inexact; + roundData->exception |= float_flag_inexact; aSign = extractFloat32Sign( a ); - switch ( float_rounding_mode ) { + switch ( roundingMode ) { case float_round_nearest_even: if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) { return packFloat32( aSign, 0x7F, 0 ); @@ -1009,7 +1001,6 @@ float32 float32_round_to_int( float32 a ) lastBitMask <<= 0x96 - aExp; roundBitsMask = lastBitMask - 1; z = a; - roundingMode = float_rounding_mode; if ( roundingMode == float_round_nearest_even ) { z += lastBitMask>>1; if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask; @@ -1020,7 +1011,7 @@ float32 float32_round_to_int( float32 a ) } } z &= ~ roundBitsMask; - if ( z != a ) float_exception_flags |= float_flag_inexact; + if ( z != a ) roundData->exception |= float_flag_inexact; return z; } @@ -1034,7 +1025,7 @@ addition is performed according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. ------------------------------------------------------------------------------- */ -static float32 addFloat32Sigs( float32 a, float32 b, flag zSign ) +static float32 addFloat32Sigs( struct roundingData *roundData, float32 a, float32 b, flag zSign ) { int16 aExp, bExp, zExp; bits32 aSig, bSig, zSig; @@ -1093,7 +1084,7 @@ static float32 addFloat32Sigs( float32 a, float32 b, flag zSign ) ++zExp; } roundAndPack: - return roundAndPackFloat32( zSign, zExp, zSig ); + return roundAndPackFloat32( roundData, zSign, zExp, zSig ); } @@ -1106,7 +1097,7 @@ result is a NaN. The subtraction is performed according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. ------------------------------------------------------------------------------- */ -static float32 subFloat32Sigs( float32 a, float32 b, flag zSign ) +static float32 subFloat32Sigs( struct roundingData *roundData, float32 a, float32 b, flag zSign ) { int16 aExp, bExp, zExp; bits32 aSig, bSig, zSig; @@ -1123,7 +1114,7 @@ static float32 subFloat32Sigs( float32 a, float32 b, flag zSign ) if ( expDiff < 0 ) goto bExpBigger; if ( aExp == 0xFF ) { if ( aSig | bSig ) return propagateFloat32NaN( a, b ); - float_raise( float_flag_invalid ); + roundData->exception |= float_flag_invalid; return float32_default_nan; } if ( aExp == 0 ) { @@ -1132,7 +1123,7 @@ static float32 subFloat32Sigs( float32 a, float32 b, flag zSign ) } if ( bSig < aSig ) goto aBigger; if ( aSig < bSig ) goto bBigger; - return packFloat32( float_rounding_mode == float_round_down, 0, 0 ); + return packFloat32( roundData->mode == float_round_down, 0, 0 ); bExpBigger: if ( bExp == 0xFF ) { if ( bSig ) return propagateFloat32NaN( a, b ); @@ -1169,7 +1160,7 @@ static float32 subFloat32Sigs( float32 a, float32 b, flag zSign ) zExp = aExp; normalizeRoundAndPack: --zExp; - return normalizeRoundAndPackFloat32( zSign, zExp, zSig ); + return normalizeRoundAndPackFloat32( roundData, zSign, zExp, zSig ); } @@ -1180,17 +1171,17 @@ and `b'. The operation is performed according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. ------------------------------------------------------------------------------- */ -float32 float32_add( float32 a, float32 b ) +float32 float32_add( struct roundingData *roundData, float32 a, float32 b ) { flag aSign, bSign; aSign = extractFloat32Sign( a ); bSign = extractFloat32Sign( b ); if ( aSign == bSign ) { - return addFloat32Sigs( a, b, aSign ); + return addFloat32Sigs( roundData, a, b, aSign ); } else { - return subFloat32Sigs( a, b, aSign ); + return subFloat32Sigs( roundData, a, b, aSign ); } } @@ -1202,17 +1193,17 @@ Returns the result of subtracting the single-precision floating-point values for Binary Floating-point Arithmetic. ------------------------------------------------------------------------------- */ -float32 float32_sub( float32 a, float32 b ) +float32 float32_sub( struct roundingData *roundData, float32 a, float32 b ) { flag aSign, bSign; aSign = extractFloat32Sign( a ); bSign = extractFloat32Sign( b ); if ( aSign == bSign ) { - return subFloat32Sigs( a, b, aSign ); + return subFloat32Sigs( roundData, a, b, aSign ); } else { - return addFloat32Sigs( a, b, aSign ); + return addFloat32Sigs( roundData, a, b, aSign ); } } @@ -1224,7 +1215,7 @@ Returns the result of multiplying the single-precision floating-point values for Binary Floating-point Arithmetic. ------------------------------------------------------------------------------- */ -float32 float32_mul( float32 a, float32 b ) +float32 float32_mul( struct roundingData *roundData, float32 a, float32 b ) { flag aSign, bSign, zSign; int16 aExp, bExp, zExp; @@ -1244,7 +1235,7 @@ float32 float32_mul( float32 a, float32 b ) return propagateFloat32NaN( a, b ); } if ( ( bExp | bSig ) == 0 ) { - float_raise( float_flag_invalid ); + roundData->exception |= float_flag_invalid; return float32_default_nan; } return packFloat32( zSign, 0xFF, 0 ); @@ -1252,7 +1243,7 @@ float32 float32_mul( float32 a, float32 b ) if ( bExp == 0xFF ) { if ( bSig ) return propagateFloat32NaN( a, b ); if ( ( aExp | aSig ) == 0 ) { - float_raise( float_flag_invalid ); + roundData->exception |= float_flag_invalid; return float32_default_nan; } return packFloat32( zSign, 0xFF, 0 ); @@ -1274,7 +1265,7 @@ float32 float32_mul( float32 a, float32 b ) zSig <<= 1; --zExp; } - return roundAndPackFloat32( zSign, zExp, zSig ); + return roundAndPackFloat32( roundData, zSign, zExp, zSig ); } @@ -1285,7 +1276,7 @@ by the corresponding value `b'. The operation is performed according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. ------------------------------------------------------------------------------- */ -float32 float32_div( float32 a, float32 b ) +float32 float32_div( struct roundingData *roundData, float32 a, float32 b ) { flag aSign, bSign, zSign; int16 aExp, bExp, zExp; @@ -1302,7 +1293,7 @@ float32 float32_div( float32 a, float32 b ) if ( aSig ) return propagateFloat32NaN( a, b ); if ( bExp == 0xFF ) { if ( bSig ) return propagateFloat32NaN( a, b ); - float_raise( float_flag_invalid ); + roundData->exception |= float_flag_invalid; return float32_default_nan; } return packFloat32( zSign, 0xFF, 0 ); @@ -1314,10 +1305,10 @@ float32 float32_div( float32 a, float32 b ) if ( bExp == 0 ) { if ( bSig == 0 ) { if ( ( aExp | aSig ) == 0 ) { - float_raise( float_flag_invalid ); + roundData->exception |= float_flag_invalid; return float32_default_nan; } - float_raise( float_flag_divbyzero ); + roundData->exception |= float_flag_divbyzero; return packFloat32( zSign, 0xFF, 0 ); } normalizeFloat32Subnormal( bSig, &bExp, &bSig ); @@ -1341,7 +1332,7 @@ float32 float32_div( float32 a, float32 b ) if ( ( zSig & 0x3F ) == 0 ) { zSig |= ( ( (bits64) bSig ) * zSig != ( (bits64) aSig )<<32 ); } - return roundAndPackFloat32( zSign, zExp, zSig ); + return roundAndPackFloat32( roundData, zSign, zExp, zSig ); } @@ -1352,7 +1343,7 @@ with respect to the corresponding value `b'. The operation is performed according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. ------------------------------------------------------------------------------- */ -float32 float32_rem( float32 a, float32 b ) +float32 float32_rem( struct roundingData *roundData, float32 a, float32 b ) { flag aSign, bSign, zSign; int16 aExp, bExp, expDiff; @@ -1372,7 +1363,7 @@ float32 float32_rem( float32 a, float32 b ) if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) { return propagateFloat32NaN( a, b ); } - float_raise( float_flag_invalid ); + roundData->exception |= float_flag_invalid; return float32_default_nan; } if ( bExp == 0xFF ) { @@ -1381,7 +1372,7 @@ float32 float32_rem( float32 a, float32 b ) } if ( bExp == 0 ) { if ( bSig == 0 ) { - float_raise( float_flag_invalid ); + roundData->exception |= float_flag_invalid; return float32_default_nan; } normalizeFloat32Subnormal( bSig, &bExp, &bSig ); @@ -1444,7 +1435,7 @@ float32 float32_rem( float32 a, float32 b ) } zSign = ( (sbits32) aSig < 0 ); if ( zSign ) aSig = - aSig; - return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig ); + return normalizeRoundAndPackFloat32( roundData, aSign ^ zSign, bExp, aSig ); } @@ -1455,7 +1446,7 @@ The operation is performed according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. ------------------------------------------------------------------------------- */ -float32 float32_sqrt( float32 a ) +float32 float32_sqrt( struct roundingData *roundData, float32 a ) { flag aSign; int16 aExp, zExp; @@ -1468,12 +1459,12 @@ float32 float32_sqrt( float32 a ) if ( aExp == 0xFF ) { if ( aSig ) return propagateFloat32NaN( a, 0 ); if ( ! aSign ) return a; - float_raise( float_flag_invalid ); + roundData->exception |= float_flag_invalid; return float32_default_nan; } if ( aSign ) { if ( ( aExp | aSig ) == 0 ) return a; - float_raise( float_flag_invalid ); + roundData->exception |= float_flag_invalid; return float32_default_nan; } if ( aExp == 0 ) { @@ -1499,7 +1490,7 @@ float32 float32_sqrt( float32 a ) } } shift32RightJamming( zSig, 1, &zSig ); - return roundAndPackFloat32( 0, zExp, zSig ); + return roundAndPackFloat32( roundData, 0, zExp, zSig ); } @@ -1611,9 +1602,7 @@ flag float32_le_quiet( float32 a, float32 b ) if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) ) { - if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { - float_raise( float_flag_invalid ); - } + /* Do nothing, even if NaN as we're quiet */ return 0; } aSign = extractFloat32Sign( a ); @@ -1638,9 +1627,7 @@ flag float32_lt_quiet( float32 a, float32 b ) if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) ) { - if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { - float_raise( float_flag_invalid ); - } + /* Do nothing, even if NaN as we're quiet */ return 0; } aSign = extractFloat32Sign( a ); @@ -1661,7 +1648,7 @@ positive integer is returned. Otherwise, if the conversion overflows, the largest integer with the same sign as `a' is returned. ------------------------------------------------------------------------------- */ -int32 float64_to_int32( float64 a ) +int32 float64_to_int32( struct roundingData *roundData, float64 a ) { flag aSign; int16 aExp, shiftCount; @@ -1674,7 +1661,7 @@ int32 float64_to_int32( float64 a ) if ( aExp ) aSig |= LIT64( 0x0010000000000000 ); shiftCount = 0x42C - aExp; if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig ); - return roundAndPackInt32( aSign, aSig ); + return roundAndPackInt32( roundData, aSign, aSig ); } @@ -1705,7 +1692,7 @@ int32 float64_to_int32_round_to_zero( float64 a ) goto invalid; } else if ( 52 < shiftCount ) { - if ( aExp || aSig ) float_exception_flags |= float_flag_inexact; + if ( aExp || aSig ) float_raise( float_flag_inexact ); return 0; } aSig |= LIT64( 0x0010000000000000 ); @@ -1715,11 +1702,11 @@ int32 float64_to_int32_round_to_zero( float64 a ) if ( aSign ) z = - z; if ( ( z < 0 ) ^ aSign ) { invalid: - float_exception_flags |= float_flag_invalid; + float_raise( float_flag_invalid ); return aSign ? 0x80000000 : 0x7FFFFFFF; } if ( ( aSig<<shiftCount ) != savedASig ) { - float_exception_flags |= float_flag_inexact; + float_raise( float_flag_inexact ); } return z; @@ -1736,7 +1723,7 @@ positive integer is returned. Otherwise, if the conversion overflows, the largest positive integer is returned. ------------------------------------------------------------------------------- */ -int32 float64_to_uint32( float64 a ) +int32 float64_to_uint32( struct roundingData *roundData, float64 a ) { flag aSign; int16 aExp, shiftCount; @@ -1749,7 +1736,7 @@ int32 float64_to_uint32( float64 a ) if ( aExp ) aSig |= LIT64( 0x0010000000000000 ); shiftCount = 0x42C - aExp; if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig ); - return roundAndPackInt32( aSign, aSig ); + return roundAndPackInt32( roundData, aSign, aSig ); } /* @@ -1778,7 +1765,7 @@ int32 float64_to_uint32_round_to_zero( float64 a ) goto invalid; } else if ( 52 < shiftCount ) { - if ( aExp || aSig ) float_exception_flags |= float_flag_inexact; + if ( aExp || aSig ) float_raise( float_flag_inexact ); return 0; } aSig |= LIT64( 0x0010000000000000 ); @@ -1788,11 +1775,11 @@ int32 float64_to_uint32_round_to_zero( float64 a ) if ( aSign ) z = - z; if ( ( z < 0 ) ^ aSign ) { invalid: - float_exception_flags |= float_flag_invalid; + float_raise( float_flag_invalid ); return aSign ? 0x80000000 : 0x7FFFFFFF; } if ( ( aSig<<shiftCount ) != savedASig ) { - float_exception_flags |= float_flag_inexact; + float_raise( float_flag_inexact ); } return z; } @@ -1805,7 +1792,7 @@ performed according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. ------------------------------------------------------------------------------- */ -float32 float64_to_float32( float64 a ) +float32 float64_to_float32( struct roundingData *roundData, float64 a ) { flag aSign; int16 aExp; @@ -1825,7 +1812,7 @@ float32 float64_to_float32( float64 a ) zSig |= 0x40000000; aExp -= 0x381; } - return roundAndPackFloat32( aSign, aExp, zSig ); + return roundAndPackFloat32( roundData, aSign, aExp, zSig ); } @@ -1872,7 +1859,7 @@ operation is performed according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. ------------------------------------------------------------------------------- */ -float64 float64_round_to_int( float64 a ) +float64 float64_round_to_int( struct roundingData *roundData, float64 a ) { flag aSign; int16 aExp; @@ -1889,9 +1876,9 @@ float64 float64_round_to_int( float64 a ) } if ( aExp <= 0x3FE ) { if ( (bits64) ( a<<1 ) == 0 ) return a; - float_exception_flags |= float_flag_inexact; + roundData->exception |= float_flag_inexact; aSign = extractFloat64Sign( a ); - switch ( float_rounding_mode ) { + switch ( roundData->mode ) { case float_round_nearest_even: if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) { return packFloat64( aSign, 0x3FF, 0 ); @@ -1909,7 +1896,7 @@ float64 float64_round_to_int( float64 a ) lastBitMask <<= 0x433 - aExp; roundBitsMask = lastBitMask - 1; z = a; - roundingMode = float_rounding_mode; + roundingMode = roundData->mode; if ( roundingMode == float_round_nearest_even ) { z += lastBitMask>>1; if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask; @@ -1920,7 +1907,7 @@ float64 float64_round_to_int( float64 a ) } } z &= ~ roundBitsMask; - if ( z != a ) float_exception_flags |= float_flag_inexact; + if ( z != a ) roundData->exception |= float_flag_inexact; return z; } @@ -1934,7 +1921,7 @@ addition is performed according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. ------------------------------------------------------------------------------- */ -static float64 addFloat64Sigs( float64 a, float64 b, flag zSign ) +static float64 addFloat64Sigs( struct roundingData *roundData, float64 a, float64 b, flag zSign ) { int16 aExp, bExp, zExp; bits64 aSig, bSig, zSig; @@ -1993,7 +1980,7 @@ static float64 addFloat64Sigs( float64 a, float64 b, flag zSign ) ++zExp; } roundAndPack: - return roundAndPackFloat64( zSign, zExp, zSig ); + return roundAndPackFloat64( roundData, zSign, zExp, zSig ); } @@ -2006,7 +1993,7 @@ result is a NaN. The subtraction is performed according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. ------------------------------------------------------------------------------- */ -static float64 subFloat64Sigs( float64 a, float64 b, flag zSign ) +static float64 subFloat64Sigs( struct roundingData *roundData, float64 a, float64 b, flag zSign ) { int16 aExp, bExp, zExp; bits64 aSig, bSig, zSig; @@ -2023,7 +2010,7 @@ static float64 subFloat64Sigs( float64 a, float64 b, flag zSign ) if ( expDiff < 0 ) goto bExpBigger; if ( aExp == 0x7FF ) { if ( aSig | bSig ) return propagateFloat64NaN( a, b ); - float_raise( float_flag_invalid ); + roundData->exception |= float_flag_invalid; return float64_default_nan; } if ( aExp == 0 ) { @@ -2032,7 +2019,7 @@ static float64 subFloat64Sigs( float64 a, float64 b, flag zSign ) } if ( bSig < aSig ) goto aBigger; if ( aSig < bSig ) goto bBigger; - return packFloat64( float_rounding_mode == float_round_down, 0, 0 ); + return packFloat64( roundData->mode == float_round_down, 0, 0 ); bExpBigger: if ( bExp == 0x7FF ) { if ( bSig ) return propagateFloat64NaN( a, b ); @@ -2069,7 +2056,7 @@ static float64 subFloat64Sigs( float64 a, float64 b, flag zSign ) zExp = aExp; normalizeRoundAndPack: --zExp; - return normalizeRoundAndPackFloat64( zSign, zExp, zSig ); + return normalizeRoundAndPackFloat64( roundData, zSign, zExp, zSig ); } @@ -2080,17 +2067,17 @@ and `b'. The operation is performed according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. ------------------------------------------------------------------------------- */ -float64 float64_add( float64 a, float64 b ) +float64 float64_add( struct roundingData *roundData, float64 a, float64 b ) { flag aSign, bSign; aSign = extractFloat64Sign( a ); bSign = extractFloat64Sign( b ); if ( aSign == bSign ) { - return addFloat64Sigs( a, b, aSign ); + return addFloat64Sigs( roundData, a, b, aSign ); } else { - return subFloat64Sigs( a, b, aSign ); + return subFloat64Sigs( roundData, a, b, aSign ); } } @@ -2102,17 +2089,17 @@ Returns the result of subtracting the double-precision floating-point values for Binary Floating-point Arithmetic. ------------------------------------------------------------------------------- */ -float64 float64_sub( float64 a, float64 b ) +float64 float64_sub( struct roundingData *roundData, float64 a, float64 b ) { flag aSign, bSign; aSign = extractFloat64Sign( a ); bSign = extractFloat64Sign( b ); if ( aSign == bSign ) { - return subFloat64Sigs( a, b, aSign ); + return subFloat64Sigs( roundData, a, b, aSign ); } else { - return addFloat64Sigs( a, b, aSign ); + return addFloat64Sigs( roundData, a, b, aSign ); } } @@ -2124,7 +2111,7 @@ Returns the result of multiplying the double-precision floating-point values for Binary Floating-point Arithmetic. ------------------------------------------------------------------------------- */ -float64 float64_mul( float64 a, float64 b ) +float64 float64_mul( struct roundingData *roundData, float64 a, float64 b ) { flag aSign, bSign, zSign; int16 aExp, bExp, zExp; @@ -2142,7 +2129,7 @@ float64 float64_mul( float64 a, float64 b ) return propagateFloat64NaN( a, b ); } if ( ( bExp | bSig ) == 0 ) { - float_raise( float_flag_invalid ); + roundData->exception |= float_flag_invalid; return float64_default_nan; } return packFloat64( zSign, 0x7FF, 0 ); @@ -2150,7 +2137,7 @@ float64 float64_mul( float64 a, float64 b ) if ( bExp == 0x7FF ) { if ( bSig ) return propagateFloat64NaN( a, b ); if ( ( aExp | aSig ) == 0 ) { - float_raise( float_flag_invalid ); + roundData->exception |= float_flag_invalid; return float64_default_nan; } return packFloat64( zSign, 0x7FF, 0 ); @@ -2172,7 +2159,7 @@ float64 float64_mul( float64 a, float64 b ) zSig0 <<= 1; --zExp; } - return roundAndPackFloat64( zSign, zExp, zSig0 ); + return roundAndPackFloat64( roundData, zSign, zExp, zSig0 ); } @@ -2183,7 +2170,7 @@ by the corresponding value `b'. The operation is performed according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. ------------------------------------------------------------------------------- */ -float64 float64_div( float64 a, float64 b ) +float64 float64_div( struct roundingData *roundData, float64 a, float64 b ) { flag aSign, bSign, zSign; int16 aExp, bExp, zExp; @@ -2202,7 +2189,7 @@ float64 float64_div( float64 a, float64 b ) if ( aSig ) return propagateFloat64NaN( a, b ); if ( bExp == 0x7FF ) { if ( bSig ) return propagateFloat64NaN( a, b ); - float_raise( float_flag_invalid ); + roundData->exception |= float_flag_invalid; return float64_default_nan; } return packFloat64( zSign, 0x7FF, 0 ); @@ -2214,10 +2201,10 @@ float64 float64_div( float64 a, float64 b ) if ( bExp == 0 ) { if ( bSig == 0 ) { if ( ( aExp | aSig ) == 0 ) { - float_raise( float_flag_invalid ); + roundData->exception |= float_flag_invalid; return float64_default_nan; } - float_raise( float_flag_divbyzero ); + roundData->exception |= float_flag_divbyzero; return packFloat64( zSign, 0x7FF, 0 ); } normalizeFloat64Subnormal( bSig, &bExp, &bSig ); @@ -2243,7 +2230,7 @@ float64 float64_div( float64 a, float64 b ) } zSig |= ( rem1 != 0 ); } - return roundAndPackFloat64( zSign, zExp, zSig ); + return roundAndPackFloat64( roundData, zSign, zExp, zSig ); } @@ -2254,7 +2241,7 @@ with respect to the corresponding value `b'. The operation is performed according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. ------------------------------------------------------------------------------- */ -float64 float64_rem( float64 a, float64 b ) +float64 float64_rem( struct roundingData *roundData, float64 a, float64 b ) { flag aSign, bSign, zSign; int16 aExp, bExp, expDiff; @@ -2272,7 +2259,7 @@ float64 float64_rem( float64 a, float64 b ) if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) { return propagateFloat64NaN( a, b ); } - float_raise( float_flag_invalid ); + roundData->exception |= float_flag_invalid; return float64_default_nan; } if ( bExp == 0x7FF ) { @@ -2281,7 +2268,7 @@ float64 float64_rem( float64 a, float64 b ) } if ( bExp == 0 ) { if ( bSig == 0 ) { - float_raise( float_flag_invalid ); + roundData->exception |= float_flag_invalid; return float64_default_nan; } normalizeFloat64Subnormal( bSig, &bExp, &bSig ); @@ -2329,7 +2316,7 @@ float64 float64_rem( float64 a, float64 b ) } zSign = ( (sbits64) aSig < 0 ); if ( zSign ) aSig = - aSig; - return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig ); + return normalizeRoundAndPackFloat64( roundData, aSign ^ zSign, bExp, aSig ); } @@ -2340,7 +2327,7 @@ The operation is performed according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. ------------------------------------------------------------------------------- */ -float64 float64_sqrt( float64 a ) +float64 float64_sqrt( struct roundingData *roundData, float64 a ) { flag aSign; int16 aExp, zExp; @@ -2354,12 +2341,12 @@ float64 float64_sqrt( float64 a ) if ( aExp == 0x7FF ) { if ( aSig ) return propagateFloat64NaN( a, a ); if ( ! aSign ) return a; - float_raise( float_flag_invalid ); + roundData->exception |= float_flag_invalid; return float64_default_nan; } if ( aSign ) { if ( ( aExp | aSig ) == 0 ) return a; - float_raise( float_flag_invalid ); + roundData->exception |= float_flag_invalid; return float64_default_nan; } if ( aExp == 0 ) { @@ -2390,7 +2377,7 @@ float64 float64_sqrt( float64 a ) } } shift64RightJamming( zSig, 1, &zSig ); - return roundAndPackFloat64( 0, zExp, zSig ); + return roundAndPackFloat64( roundData, 0, zExp, zSig ); } @@ -2502,9 +2489,7 @@ flag float64_le_quiet( float64 a, float64 b ) if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) ) { - if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { - float_raise( float_flag_invalid ); - } + /* Do nothing, even if NaN as we're quiet */ return 0; } aSign = extractFloat64Sign( a ); @@ -2529,9 +2514,7 @@ flag float64_lt_quiet( float64 a, float64 b ) if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) ) { - if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { - float_raise( float_flag_invalid ); - } + /* Do nothing, even if NaN as we're quiet */ return 0; } aSign = extractFloat64Sign( a ); @@ -2554,7 +2537,7 @@ largest positive integer is returned. Otherwise, if the conversion overflows, the largest integer with the same sign as `a' is returned. ------------------------------------------------------------------------------- */ -int32 floatx80_to_int32( floatx80 a ) +int32 floatx80_to_int32( struct roundingData *roundData, floatx80 a ) { flag aSign; int32 aExp, shiftCount; @@ -2567,7 +2550,7 @@ int32 floatx80_to_int32( floatx80 a ) shiftCount = 0x4037 - aExp; if ( shiftCount <= 0 ) shiftCount = 1; shift64RightJamming( aSig, shiftCount, &aSig ); - return roundAndPackInt32( aSign, aSig ); + return roundAndPackInt32( roundData, aSign, aSig ); } @@ -2598,7 +2581,7 @@ int32 floatx80_to_int32_round_to_zero( floatx80 a ) goto invalid; } else if ( 63 < shiftCount ) { - if ( aExp || aSig ) float_exception_flags |= float_flag_inexact; + if ( aExp || aSig ) float_raise( float_flag_inexact ); return 0; } savedASig = aSig; @@ -2607,11 +2590,11 @@ int32 floatx80_to_int32_round_to_zero( floatx80 a ) if ( aSign ) z = - z; if ( ( z < 0 ) ^ aSign ) { invalid: - float_exception_flags |= float_flag_invalid; + float_raise( float_flag_invalid ); return aSign ? 0x80000000 : 0x7FFFFFFF; } if ( ( aSig<<shiftCount ) != savedASig ) { - float_exception_flags |= float_flag_inexact; + float_raise( float_flag_inexact ); } return z; @@ -2625,7 +2608,7 @@ conversion is performed according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. ------------------------------------------------------------------------------- */ -float32 floatx80_to_float32( floatx80 a ) +float32 floatx80_to_float32( struct roundingData *roundData, floatx80 a ) { flag aSign; int32 aExp; @@ -2642,7 +2625,7 @@ float32 floatx80_to_float32( floatx80 a ) } shift64RightJamming( aSig, 33, &aSig ); if ( aExp || aSig ) aExp -= 0x3F81; - return roundAndPackFloat32( aSign, aExp, aSig ); + return roundAndPackFloat32( roundData, aSign, aExp, aSig ); } @@ -2654,7 +2637,7 @@ conversion is performed according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. ------------------------------------------------------------------------------- */ -float64 floatx80_to_float64( floatx80 a ) +float64 floatx80_to_float64( struct roundingData *roundData, floatx80 a ) { flag aSign; int32 aExp; @@ -2671,7 +2654,7 @@ float64 floatx80_to_float64( floatx80 a ) } shift64RightJamming( aSig, 1, &zSig ); if ( aExp || aSig ) aExp -= 0x3C01; - return roundAndPackFloat64( aSign, aExp, zSig ); + return roundAndPackFloat64( roundData, aSign, aExp, zSig ); } @@ -2683,7 +2666,7 @@ value. The operation is performed according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. ------------------------------------------------------------------------------- */ -floatx80 floatx80_round_to_int( floatx80 a ) +floatx80 floatx80_round_to_int( struct roundingData *roundData, floatx80 a ) { flag aSign; int32 aExp; @@ -2703,9 +2686,9 @@ floatx80 floatx80_round_to_int( floatx80 a ) && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) { return a; } - float_exception_flags |= float_flag_inexact; + roundData->exception |= float_flag_inexact; aSign = extractFloatx80Sign( a ); - switch ( float_rounding_mode ) { + switch ( roundData->mode ) { case float_round_nearest_even: if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) { @@ -2729,7 +2712,7 @@ floatx80 floatx80_round_to_int( floatx80 a ) lastBitMask <<= 0x403E - aExp; roundBitsMask = lastBitMask - 1; z = a; - roundingMode = float_rounding_mode; + roundingMode = roundData->mode; if ( roundingMode == float_round_nearest_even ) { z.low += lastBitMask>>1; if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask; @@ -2744,7 +2727,7 @@ floatx80 floatx80_round_to_int( floatx80 a ) ++z.high; z.low = LIT64( 0x8000000000000000 ); } - if ( z.low != a.low ) float_exception_flags |= float_flag_inexact; + if ( z.low != a.low ) roundData->exception |= float_flag_inexact; return z; } @@ -2758,7 +2741,7 @@ The addition is performed according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. ------------------------------------------------------------------------------- */ -static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign ) +static floatx80 addFloatx80Sigs( struct roundingData *roundData, floatx80 a, floatx80 b, flag zSign ) { int32 aExp, bExp, zExp; bits64 aSig, bSig, zSig0, zSig1; @@ -2814,7 +2797,7 @@ static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign ) roundAndPack: return roundAndPackFloatx80( - floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); + roundData, zSign, zExp, zSig0, zSig1 ); } @@ -2827,7 +2810,7 @@ result is a NaN. The subtraction is performed according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. ------------------------------------------------------------------------------- */ -static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign ) +static floatx80 subFloatx80Sigs( struct roundingData *roundData, floatx80 a, floatx80 b, flag zSign ) { int32 aExp, bExp, zExp; bits64 aSig, bSig, zSig0, zSig1; @@ -2845,7 +2828,7 @@ static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign ) if ( (bits64) ( ( aSig | bSig )<<1 ) ) { return propagateFloatx80NaN( a, b ); } - float_raise( float_flag_invalid ); + roundData->exception |= float_flag_invalid; z.low = floatx80_default_nan_low; z.high = floatx80_default_nan_high; return z; @@ -2857,7 +2840,7 @@ static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign ) zSig1 = 0; if ( bSig < aSig ) goto aBigger; if ( aSig < bSig ) goto bBigger; - return packFloatx80( float_rounding_mode == float_round_down, 0, 0 ); + return packFloatx80( roundData->mode == float_round_down, 0, 0 ); bExpBigger: if ( bExp == 0x7FFF ) { if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); @@ -2883,7 +2866,7 @@ static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign ) normalizeRoundAndPack: return normalizeRoundAndPackFloatx80( - floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); + roundData, zSign, zExp, zSig0, zSig1 ); } @@ -2894,17 +2877,17 @@ values `a' and `b'. The operation is performed according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. ------------------------------------------------------------------------------- */ -floatx80 floatx80_add( floatx80 a, floatx80 b ) +floatx80 floatx80_add( struct roundingData *roundData, floatx80 a, floatx80 b ) { flag aSign, bSign; aSign = extractFloatx80Sign( a ); bSign = extractFloatx80Sign( b ); if ( aSign == bSign ) { - return addFloatx80Sigs( a, b, aSign ); + return addFloatx80Sigs( roundData, a, b, aSign ); } else { - return subFloatx80Sigs( a, b, aSign ); + return subFloatx80Sigs( roundData, a, b, aSign ); } } @@ -2916,17 +2899,17 @@ point values `a' and `b'. The operation is performed according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. ------------------------------------------------------------------------------- */ -floatx80 floatx80_sub( floatx80 a, floatx80 b ) +floatx80 floatx80_sub( struct roundingData *roundData, floatx80 a, floatx80 b ) { flag aSign, bSign; aSign = extractFloatx80Sign( a ); bSign = extractFloatx80Sign( b ); if ( aSign == bSign ) { - return subFloatx80Sigs( a, b, aSign ); + return subFloatx80Sigs( roundData, a, b, aSign ); } else { - return addFloatx80Sigs( a, b, aSign ); + return addFloatx80Sigs( roundData, a, b, aSign ); } } @@ -2938,7 +2921,7 @@ point values `a' and `b'. The operation is performed according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. ------------------------------------------------------------------------------- */ -floatx80 floatx80_mul( floatx80 a, floatx80 b ) +floatx80 floatx80_mul( struct roundingData *roundData, floatx80 a, floatx80 b ) { flag aSign, bSign, zSign; int32 aExp, bExp, zExp; @@ -2964,7 +2947,7 @@ floatx80 floatx80_mul( floatx80 a, floatx80 b ) if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); if ( ( aExp | aSig ) == 0 ) { invalid: - float_raise( float_flag_invalid ); + roundData->exception |= float_flag_invalid; z.low = floatx80_default_nan_low; z.high = floatx80_default_nan_high; return z; @@ -2987,7 +2970,7 @@ floatx80 floatx80_mul( floatx80 a, floatx80 b ) } return roundAndPackFloatx80( - floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); + roundData, zSign, zExp, zSig0, zSig1 ); } @@ -2998,7 +2981,7 @@ value `a' by the corresponding value `b'. The operation is performed according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. ------------------------------------------------------------------------------- */ -floatx80 floatx80_div( floatx80 a, floatx80 b ) +floatx80 floatx80_div( struct roundingData *roundData, floatx80 a, floatx80 b ) { flag aSign, bSign, zSign; int32 aExp, bExp, zExp; @@ -3029,12 +3012,12 @@ floatx80 floatx80_div( floatx80 a, floatx80 b ) if ( bSig == 0 ) { if ( ( aExp | aSig ) == 0 ) { invalid: - float_raise( float_flag_invalid ); + roundData->exception |= float_flag_invalid; z.low = floatx80_default_nan_low; z.high = floatx80_default_nan_high; return z; } - float_raise( float_flag_divbyzero ); + roundData->exception |= float_flag_divbyzero; return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); } normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); @@ -3068,7 +3051,7 @@ floatx80 floatx80_div( floatx80 a, floatx80 b ) } return roundAndPackFloatx80( - floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); + roundData, zSign, zExp, zSig0, zSig1 ); } @@ -3079,7 +3062,7 @@ Returns the remainder of the extended double-precision floating-point value according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. ------------------------------------------------------------------------------- */ -floatx80 floatx80_rem( floatx80 a, floatx80 b ) +floatx80 floatx80_rem( struct roundingData *roundData, floatx80 a, floatx80 b ) { flag aSign, bSign, zSign; int32 aExp, bExp, expDiff; @@ -3107,7 +3090,7 @@ floatx80 floatx80_rem( floatx80 a, floatx80 b ) if ( bExp == 0 ) { if ( bSig == 0 ) { invalid: - float_raise( float_flag_invalid ); + roundData->exception |= float_flag_invalid; z.low = floatx80_default_nan_low; z.high = floatx80_default_nan_high; return z; @@ -3164,9 +3147,10 @@ floatx80 floatx80_rem( floatx80 a, floatx80 b ) aSig1 = alternateASig1; zSign = ! zSign; } + return normalizeRoundAndPackFloatx80( - 80, zSign, bExp + expDiff, aSig0, aSig1 ); + roundData, zSign, bExp + expDiff, aSig0, aSig1 ); } @@ -3177,7 +3161,7 @@ value `a'. The operation is performed according to the IEC/IEEE Standard for Binary Floating-point Arithmetic. ------------------------------------------------------------------------------- */ -floatx80 floatx80_sqrt( floatx80 a ) +floatx80 floatx80_sqrt( struct roundingData *roundData, floatx80 a ) { flag aSign; int32 aExp, zExp; @@ -3197,7 +3181,7 @@ floatx80 floatx80_sqrt( floatx80 a ) if ( aSign ) { if ( ( aExp | aSig0 ) == 0 ) return a; invalid: - float_raise( float_flag_invalid ); + roundData->exception |= float_flag_invalid; z.low = floatx80_default_nan_low; z.high = floatx80_default_nan_high; return z; @@ -3242,7 +3226,7 @@ floatx80 floatx80_sqrt( floatx80 a ) } return roundAndPackFloatx80( - floatx80_rounding_precision, 0, zExp, zSig0, zSig1 ); + roundData, 0, zExp, zSig0, zSig1 ); } @@ -3390,10 +3374,7 @@ flag floatx80_le_quiet( floatx80 a, floatx80 b ) || ( ( extractFloatx80Exp( b ) == 0x7FFF ) && (bits64) ( extractFloatx80Frac( b )<<1 ) ) ) { - if ( floatx80_is_signaling_nan( a ) - || floatx80_is_signaling_nan( b ) ) { - float_raise( float_flag_invalid ); - } + /* Do nothing, even if NaN as we're quiet */ return 0; } aSign = extractFloatx80Sign( a ); @@ -3427,10 +3408,7 @@ flag floatx80_lt_quiet( floatx80 a, floatx80 b ) || ( ( extractFloatx80Exp( b ) == 0x7FFF ) && (bits64) ( extractFloatx80Frac( b )<<1 ) ) ) { - if ( floatx80_is_signaling_nan( a ) - || floatx80_is_signaling_nan( b ) ) { - float_raise( float_flag_invalid ); - } + /* Do nothing, even if NaN as we're quiet */ return 0; } aSign = extractFloatx80Sign( a ); |