summaryrefslogtreecommitdiffstats
path: root/arch/arm/nwfpe/softfloat.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm/nwfpe/softfloat.c')
-rw-r--r--arch/arm/nwfpe/softfloat.c348
1 files changed, 163 insertions, 185 deletions
diff --git a/arch/arm/nwfpe/softfloat.c b/arch/arm/nwfpe/softfloat.c
index e038dd3..f9f0491 100644
--- a/arch/arm/nwfpe/softfloat.c
+++ b/arch/arm/nwfpe/softfloat.c
@@ -36,16 +36,6 @@ this code that are retained.
/*
-------------------------------------------------------------------------------
-Floating-point rounding mode, extended double-precision rounding precision,
-and exception flags.
--------------------------------------------------------------------------------
-*/
-int8 float_rounding_mode = float_round_nearest_even;
-int8 floatx80_rounding_precision = 80;
-int8 float_exception_flags;
-
-/*
--------------------------------------------------------------------------------
Primitive arithmetic functions, including multi-word arithmetic, and
division and square root approximations. (Can be specialized to target if
desired.)
@@ -77,14 +67,14 @@ input is too large, however, the invalid exception is raised and the largest
positive or negative integer is returned.
-------------------------------------------------------------------------------
*/
-static int32 roundAndPackInt32( flag zSign, bits64 absZ )
+static int32 roundAndPackInt32( struct roundingData *roundData, flag zSign, bits64 absZ )
{
int8 roundingMode;
flag roundNearestEven;
int8 roundIncrement, roundBits;
int32 z;
- roundingMode = float_rounding_mode;
+ roundingMode = roundData->mode;
roundNearestEven = ( roundingMode == float_round_nearest_even );
roundIncrement = 0x40;
if ( ! roundNearestEven ) {
@@ -107,10 +97,10 @@ static int32 roundAndPackInt32( flag zSign, bits64 absZ )
z = absZ;
if ( zSign ) z = - z;
if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
- float_exception_flags |= float_flag_invalid;
+ roundData->exception |= float_flag_invalid;
return zSign ? 0x80000000 : 0x7FFFFFFF;
}
- if ( roundBits ) float_exception_flags |= float_flag_inexact;
+ if ( roundBits ) roundData->exception |= float_flag_inexact;
return z;
}
@@ -224,14 +214,14 @@ The handling of underflow and overflow follows the IEC/IEEE Standard for
Binary Floating-point Arithmetic.
-------------------------------------------------------------------------------
*/
-static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
+static float32 roundAndPackFloat32( struct roundingData *roundData, flag zSign, int16 zExp, bits32 zSig )
{
int8 roundingMode;
flag roundNearestEven;
int8 roundIncrement, roundBits;
flag isTiny;
- roundingMode = float_rounding_mode;
+ roundingMode = roundData->mode;
roundNearestEven = ( roundingMode == float_round_nearest_even );
roundIncrement = 0x40;
if ( ! roundNearestEven ) {
@@ -254,7 +244,7 @@ static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
|| ( ( zExp == 0xFD )
&& ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
) {
- float_raise( float_flag_overflow | float_flag_inexact );
+ roundData->exception |= float_flag_overflow | float_flag_inexact;
return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
}
if ( zExp < 0 ) {
@@ -265,10 +255,10 @@ static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
shift32RightJamming( zSig, - zExp, &zSig );
zExp = 0;
roundBits = zSig & 0x7F;
- if ( isTiny && roundBits ) float_raise( float_flag_underflow );
+ if ( isTiny && roundBits ) roundData->exception |= float_flag_underflow;
}
}
- if ( roundBits ) float_exception_flags |= float_flag_inexact;
+ if ( roundBits ) roundData->exception |= float_flag_inexact;
zSig = ( zSig + roundIncrement )>>7;
zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
if ( zSig == 0 ) zExp = 0;
@@ -287,12 +277,12 @@ point exponent.
-------------------------------------------------------------------------------
*/
static float32
- normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
+ normalizeRoundAndPackFloat32( struct roundingData *roundData, flag zSign, int16 zExp, bits32 zSig )
{
int8 shiftCount;
shiftCount = countLeadingZeros32( zSig ) - 1;
- return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount );
+ return roundAndPackFloat32( roundData, zSign, zExp - shiftCount, zSig<<shiftCount );
}
@@ -395,14 +385,14 @@ The handling of underflow and overflow follows the IEC/IEEE Standard for
Binary Floating-point Arithmetic.
-------------------------------------------------------------------------------
*/
-static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
+static float64 roundAndPackFloat64( struct roundingData *roundData, flag zSign, int16 zExp, bits64 zSig )
{
int8 roundingMode;
flag roundNearestEven;
int16 roundIncrement, roundBits;
flag isTiny;
- roundingMode = float_rounding_mode;
+ roundingMode = roundData->mode;
roundNearestEven = ( roundingMode == float_round_nearest_even );
roundIncrement = 0x200;
if ( ! roundNearestEven ) {
@@ -427,7 +417,7 @@ static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
) {
//register int lr = __builtin_return_address(0);
//printk("roundAndPackFloat64 called from 0x%08x\n",lr);
- float_raise( float_flag_overflow | float_flag_inexact );
+ roundData->exception |= float_flag_overflow | float_flag_inexact;
return packFloat64( zSign, 0x7FF, 0 ) - ( roundIncrement == 0 );
}
if ( zExp < 0 ) {
@@ -438,10 +428,10 @@ static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
shift64RightJamming( zSig, - zExp, &zSig );
zExp = 0;
roundBits = zSig & 0x3FF;
- if ( isTiny && roundBits ) float_raise( float_flag_underflow );
+ if ( isTiny && roundBits ) roundData->exception |= float_flag_underflow;
}
}
- if ( roundBits ) float_exception_flags |= float_flag_inexact;
+ if ( roundBits ) roundData->exception |= float_flag_inexact;
zSig = ( zSig + roundIncrement )>>10;
zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
if ( zSig == 0 ) zExp = 0;
@@ -460,12 +450,12 @@ point exponent.
-------------------------------------------------------------------------------
*/
static float64
- normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
+ normalizeRoundAndPackFloat64( struct roundingData *roundData, flag zSign, int16 zExp, bits64 zSig )
{
int8 shiftCount;
shiftCount = countLeadingZeros64( zSig ) - 1;
- return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount );
+ return roundAndPackFloat64( roundData, zSign, zExp - shiftCount, zSig<<shiftCount );
}
@@ -572,14 +562,15 @@ Floating-point Arithmetic.
*/
static floatx80
roundAndPackFloatx80(
- int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
+ struct roundingData *roundData, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
)
{
- int8 roundingMode;
+ int8 roundingMode, roundingPrecision;
flag roundNearestEven, increment, isTiny;
int64 roundIncrement, roundMask, roundBits;
- roundingMode = float_rounding_mode;
+ roundingMode = roundData->mode;
+ roundingPrecision = roundData->precision;
roundNearestEven = ( roundingMode == float_round_nearest_even );
if ( roundingPrecision == 80 ) goto precision80;
if ( roundingPrecision == 64 ) {
@@ -623,8 +614,8 @@ static floatx80
shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
zExp = 0;
roundBits = zSig0 & roundMask;
- if ( isTiny && roundBits ) float_raise( float_flag_underflow );
- if ( roundBits ) float_exception_flags |= float_flag_inexact;
+ if ( isTiny && roundBits ) roundData->exception |= float_flag_underflow;
+ if ( roundBits ) roundData->exception |= float_flag_inexact;
zSig0 += roundIncrement;
if ( (sbits64) zSig0 < 0 ) zExp = 1;
roundIncrement = roundMask + 1;
@@ -635,7 +626,7 @@ static floatx80
return packFloatx80( zSign, zExp, zSig0 );
}
}
- if ( roundBits ) float_exception_flags |= float_flag_inexact;
+ if ( roundBits ) roundData->exception |= float_flag_inexact;
zSig0 += roundIncrement;
if ( zSig0 < roundIncrement ) {
++zExp;
@@ -672,7 +663,7 @@ static floatx80
) {
roundMask = 0;
overflow:
- float_raise( float_flag_overflow | float_flag_inexact );
+ roundData->exception |= float_flag_overflow | float_flag_inexact;
if ( ( roundingMode == float_round_to_zero )
|| ( zSign && ( roundingMode == float_round_up ) )
|| ( ! zSign && ( roundingMode == float_round_down ) )
@@ -689,8 +680,8 @@ static floatx80
|| ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
zExp = 0;
- if ( isTiny && zSig1 ) float_raise( float_flag_underflow );
- if ( zSig1 ) float_exception_flags |= float_flag_inexact;
+ if ( isTiny && zSig1 ) roundData->exception |= float_flag_underflow;
+ if ( zSig1 ) roundData->exception |= float_flag_inexact;
if ( roundNearestEven ) {
increment = ( (sbits64) zSig1 < 0 );
}
@@ -710,7 +701,7 @@ static floatx80
return packFloatx80( zSign, zExp, zSig0 );
}
}
- if ( zSig1 ) float_exception_flags |= float_flag_inexact;
+ if ( zSig1 ) roundData->exception |= float_flag_inexact;
if ( increment ) {
++zSig0;
if ( zSig0 == 0 ) {
@@ -740,7 +731,7 @@ normalized.
*/
static floatx80
normalizeRoundAndPackFloatx80(
- int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
+ struct roundingData *roundData, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
)
{
int8 shiftCount;
@@ -754,7 +745,7 @@ static floatx80
shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
zExp -= shiftCount;
return
- roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 );
+ roundAndPackFloatx80( roundData, zSign, zExp, zSig0, zSig1 );
}
@@ -767,14 +758,14 @@ the single-precision floating-point format. The conversion is performed
according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
-------------------------------------------------------------------------------
*/
-float32 int32_to_float32( int32 a )
+float32 int32_to_float32(struct roundingData *roundData, int32 a)
{
flag zSign;
if ( a == 0 ) return 0;
if ( a == 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
zSign = ( a < 0 );
- return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a );
+ return normalizeRoundAndPackFloat32( roundData, zSign, 0x9C, zSign ? - a : a );
}
@@ -840,7 +831,7 @@ positive integer is returned. Otherwise, if the conversion overflows, the
largest integer with the same sign as `a' is returned.
-------------------------------------------------------------------------------
*/
-int32 float32_to_int32( float32 a )
+int32 float32_to_int32( struct roundingData *roundData, float32 a )
{
flag aSign;
int16 aExp, shiftCount;
@@ -856,7 +847,7 @@ int32 float32_to_int32( float32 a )
zSig = aSig;
zSig <<= 32;
if ( 0 < shiftCount ) shift64RightJamming( zSig, shiftCount, &zSig );
- return roundAndPackInt32( aSign, zSig );
+ return roundAndPackInt32( roundData, aSign, zSig );
}
@@ -889,13 +880,13 @@ int32 float32_to_int32_round_to_zero( float32 a )
return 0x80000000;
}
else if ( aExp <= 0x7E ) {
- if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
+ if ( aExp | aSig ) float_raise( float_flag_inexact );
return 0;
}
aSig = ( aSig | 0x00800000 )<<8;
z = aSig>>( - shiftCount );
if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
- float_exception_flags |= float_flag_inexact;
+ float_raise( float_flag_inexact );
}
return aSign ? - z : z;
@@ -973,7 +964,7 @@ operation is performed according to the IEC/IEEE Standard for Binary
Floating-point Arithmetic.
-------------------------------------------------------------------------------
*/
-float32 float32_round_to_int( float32 a )
+float32 float32_round_to_int( struct roundingData *roundData, float32 a )
{
flag aSign;
int16 aExp;
@@ -988,11 +979,12 @@ float32 float32_round_to_int( float32 a )
}
return a;
}
+ roundingMode = roundData->mode;
if ( aExp <= 0x7E ) {
if ( (bits32) ( a<<1 ) == 0 ) return a;
- float_exception_flags |= float_flag_inexact;
+ roundData->exception |= float_flag_inexact;
aSign = extractFloat32Sign( a );
- switch ( float_rounding_mode ) {
+ switch ( roundingMode ) {
case float_round_nearest_even:
if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
return packFloat32( aSign, 0x7F, 0 );
@@ -1009,7 +1001,6 @@ float32 float32_round_to_int( float32 a )
lastBitMask <<= 0x96 - aExp;
roundBitsMask = lastBitMask - 1;
z = a;
- roundingMode = float_rounding_mode;
if ( roundingMode == float_round_nearest_even ) {
z += lastBitMask>>1;
if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
@@ -1020,7 +1011,7 @@ float32 float32_round_to_int( float32 a )
}
}
z &= ~ roundBitsMask;
- if ( z != a ) float_exception_flags |= float_flag_inexact;
+ if ( z != a ) roundData->exception |= float_flag_inexact;
return z;
}
@@ -1034,7 +1025,7 @@ addition is performed according to the IEC/IEEE Standard for Binary
Floating-point Arithmetic.
-------------------------------------------------------------------------------
*/
-static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
+static float32 addFloat32Sigs( struct roundingData *roundData, float32 a, float32 b, flag zSign )
{
int16 aExp, bExp, zExp;
bits32 aSig, bSig, zSig;
@@ -1093,7 +1084,7 @@ static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
++zExp;
}
roundAndPack:
- return roundAndPackFloat32( zSign, zExp, zSig );
+ return roundAndPackFloat32( roundData, zSign, zExp, zSig );
}
@@ -1106,7 +1097,7 @@ result is a NaN. The subtraction is performed according to the IEC/IEEE
Standard for Binary Floating-point Arithmetic.
-------------------------------------------------------------------------------
*/
-static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
+static float32 subFloat32Sigs( struct roundingData *roundData, float32 a, float32 b, flag zSign )
{
int16 aExp, bExp, zExp;
bits32 aSig, bSig, zSig;
@@ -1123,7 +1114,7 @@ static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
if ( expDiff < 0 ) goto bExpBigger;
if ( aExp == 0xFF ) {
if ( aSig | bSig ) return propagateFloat32NaN( a, b );
- float_raise( float_flag_invalid );
+ roundData->exception |= float_flag_invalid;
return float32_default_nan;
}
if ( aExp == 0 ) {
@@ -1132,7 +1123,7 @@ static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
}
if ( bSig < aSig ) goto aBigger;
if ( aSig < bSig ) goto bBigger;
- return packFloat32( float_rounding_mode == float_round_down, 0, 0 );
+ return packFloat32( roundData->mode == float_round_down, 0, 0 );
bExpBigger:
if ( bExp == 0xFF ) {
if ( bSig ) return propagateFloat32NaN( a, b );
@@ -1169,7 +1160,7 @@ static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
zExp = aExp;
normalizeRoundAndPack:
--zExp;
- return normalizeRoundAndPackFloat32( zSign, zExp, zSig );
+ return normalizeRoundAndPackFloat32( roundData, zSign, zExp, zSig );
}
@@ -1180,17 +1171,17 @@ and `b'. The operation is performed according to the IEC/IEEE Standard for
Binary Floating-point Arithmetic.
-------------------------------------------------------------------------------
*/
-float32 float32_add( float32 a, float32 b )
+float32 float32_add( struct roundingData *roundData, float32 a, float32 b )
{
flag aSign, bSign;
aSign = extractFloat32Sign( a );
bSign = extractFloat32Sign( b );
if ( aSign == bSign ) {
- return addFloat32Sigs( a, b, aSign );
+ return addFloat32Sigs( roundData, a, b, aSign );
}
else {
- return subFloat32Sigs( a, b, aSign );
+ return subFloat32Sigs( roundData, a, b, aSign );
}
}
@@ -1202,17 +1193,17 @@ Returns the result of subtracting the single-precision floating-point values
for Binary Floating-point Arithmetic.
-------------------------------------------------------------------------------
*/
-float32 float32_sub( float32 a, float32 b )
+float32 float32_sub( struct roundingData *roundData, float32 a, float32 b )
{
flag aSign, bSign;
aSign = extractFloat32Sign( a );
bSign = extractFloat32Sign( b );
if ( aSign == bSign ) {
- return subFloat32Sigs( a, b, aSign );
+ return subFloat32Sigs( roundData, a, b, aSign );
}
else {
- return addFloat32Sigs( a, b, aSign );
+ return addFloat32Sigs( roundData, a, b, aSign );
}
}
@@ -1224,7 +1215,7 @@ Returns the result of multiplying the single-precision floating-point values
for Binary Floating-point Arithmetic.
-------------------------------------------------------------------------------
*/
-float32 float32_mul( float32 a, float32 b )
+float32 float32_mul( struct roundingData *roundData, float32 a, float32 b )
{
flag aSign, bSign, zSign;
int16 aExp, bExp, zExp;
@@ -1244,7 +1235,7 @@ float32 float32_mul( float32 a, float32 b )
return propagateFloat32NaN( a, b );
}
if ( ( bExp | bSig ) == 0 ) {
- float_raise( float_flag_invalid );
+ roundData->exception |= float_flag_invalid;
return float32_default_nan;
}
return packFloat32( zSign, 0xFF, 0 );
@@ -1252,7 +1243,7 @@ float32 float32_mul( float32 a, float32 b )
if ( bExp == 0xFF ) {
if ( bSig ) return propagateFloat32NaN( a, b );
if ( ( aExp | aSig ) == 0 ) {
- float_raise( float_flag_invalid );
+ roundData->exception |= float_flag_invalid;
return float32_default_nan;
}
return packFloat32( zSign, 0xFF, 0 );
@@ -1274,7 +1265,7 @@ float32 float32_mul( float32 a, float32 b )
zSig <<= 1;
--zExp;
}
- return roundAndPackFloat32( zSign, zExp, zSig );
+ return roundAndPackFloat32( roundData, zSign, zExp, zSig );
}
@@ -1285,7 +1276,7 @@ by the corresponding value `b'. The operation is performed according to the
IEC/IEEE Standard for Binary Floating-point Arithmetic.
-------------------------------------------------------------------------------
*/
-float32 float32_div( float32 a, float32 b )
+float32 float32_div( struct roundingData *roundData, float32 a, float32 b )
{
flag aSign, bSign, zSign;
int16 aExp, bExp, zExp;
@@ -1302,7 +1293,7 @@ float32 float32_div( float32 a, float32 b )
if ( aSig ) return propagateFloat32NaN( a, b );
if ( bExp == 0xFF ) {
if ( bSig ) return propagateFloat32NaN( a, b );
- float_raise( float_flag_invalid );
+ roundData->exception |= float_flag_invalid;
return float32_default_nan;
}
return packFloat32( zSign, 0xFF, 0 );
@@ -1314,10 +1305,10 @@ float32 float32_div( float32 a, float32 b )
if ( bExp == 0 ) {
if ( bSig == 0 ) {
if ( ( aExp | aSig ) == 0 ) {
- float_raise( float_flag_invalid );
+ roundData->exception |= float_flag_invalid;
return float32_default_nan;
}
- float_raise( float_flag_divbyzero );
+ roundData->exception |= float_flag_divbyzero;
return packFloat32( zSign, 0xFF, 0 );
}
normalizeFloat32Subnormal( bSig, &bExp, &bSig );
@@ -1341,7 +1332,7 @@ float32 float32_div( float32 a, float32 b )
if ( ( zSig & 0x3F ) == 0 ) {
zSig |= ( ( (bits64) bSig ) * zSig != ( (bits64) aSig )<<32 );
}
- return roundAndPackFloat32( zSign, zExp, zSig );
+ return roundAndPackFloat32( roundData, zSign, zExp, zSig );
}
@@ -1352,7 +1343,7 @@ with respect to the corresponding value `b'. The operation is performed
according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
-------------------------------------------------------------------------------
*/
-float32 float32_rem( float32 a, float32 b )
+float32 float32_rem( struct roundingData *roundData, float32 a, float32 b )
{
flag aSign, bSign, zSign;
int16 aExp, bExp, expDiff;
@@ -1372,7 +1363,7 @@ float32 float32_rem( float32 a, float32 b )
if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
return propagateFloat32NaN( a, b );
}
- float_raise( float_flag_invalid );
+ roundData->exception |= float_flag_invalid;
return float32_default_nan;
}
if ( bExp == 0xFF ) {
@@ -1381,7 +1372,7 @@ float32 float32_rem( float32 a, float32 b )
}
if ( bExp == 0 ) {
if ( bSig == 0 ) {
- float_raise( float_flag_invalid );
+ roundData->exception |= float_flag_invalid;
return float32_default_nan;
}
normalizeFloat32Subnormal( bSig, &bExp, &bSig );
@@ -1444,7 +1435,7 @@ float32 float32_rem( float32 a, float32 b )
}
zSign = ( (sbits32) aSig < 0 );
if ( zSign ) aSig = - aSig;
- return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig );
+ return normalizeRoundAndPackFloat32( roundData, aSign ^ zSign, bExp, aSig );
}
@@ -1455,7 +1446,7 @@ The operation is performed according to the IEC/IEEE Standard for Binary
Floating-point Arithmetic.
-------------------------------------------------------------------------------
*/
-float32 float32_sqrt( float32 a )
+float32 float32_sqrt( struct roundingData *roundData, float32 a )
{
flag aSign;
int16 aExp, zExp;
@@ -1468,12 +1459,12 @@ float32 float32_sqrt( float32 a )
if ( aExp == 0xFF ) {
if ( aSig ) return propagateFloat32NaN( a, 0 );
if ( ! aSign ) return a;
- float_raise( float_flag_invalid );
+ roundData->exception |= float_flag_invalid;
return float32_default_nan;
}
if ( aSign ) {
if ( ( aExp | aSig ) == 0 ) return a;
- float_raise( float_flag_invalid );
+ roundData->exception |= float_flag_invalid;
return float32_default_nan;
}
if ( aExp == 0 ) {
@@ -1499,7 +1490,7 @@ float32 float32_sqrt( float32 a )
}
}
shift32RightJamming( zSig, 1, &zSig );
- return roundAndPackFloat32( 0, zExp, zSig );
+ return roundAndPackFloat32( roundData, 0, zExp, zSig );
}
@@ -1611,9 +1602,7 @@ flag float32_le_quiet( float32 a, float32 b )
if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
|| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
) {
- if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
- float_raise( float_flag_invalid );
- }
+ /* Do nothing, even if NaN as we're quiet */
return 0;
}
aSign = extractFloat32Sign( a );
@@ -1638,9 +1627,7 @@ flag float32_lt_quiet( float32 a, float32 b )
if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
|| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
) {
- if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
- float_raise( float_flag_invalid );
- }
+ /* Do nothing, even if NaN as we're quiet */
return 0;
}
aSign = extractFloat32Sign( a );
@@ -1661,7 +1648,7 @@ positive integer is returned. Otherwise, if the conversion overflows, the
largest integer with the same sign as `a' is returned.
-------------------------------------------------------------------------------
*/
-int32 float64_to_int32( float64 a )
+int32 float64_to_int32( struct roundingData *roundData, float64 a )
{
flag aSign;
int16 aExp, shiftCount;
@@ -1674,7 +1661,7 @@ int32 float64_to_int32( float64 a )
if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
shiftCount = 0x42C - aExp;
if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
- return roundAndPackInt32( aSign, aSig );
+ return roundAndPackInt32( roundData, aSign, aSig );
}
@@ -1705,7 +1692,7 @@ int32 float64_to_int32_round_to_zero( float64 a )
goto invalid;
}
else if ( 52 < shiftCount ) {
- if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
+ if ( aExp || aSig ) float_raise( float_flag_inexact );
return 0;
}
aSig |= LIT64( 0x0010000000000000 );
@@ -1715,11 +1702,11 @@ int32 float64_to_int32_round_to_zero( float64 a )
if ( aSign ) z = - z;
if ( ( z < 0 ) ^ aSign ) {
invalid:
- float_exception_flags |= float_flag_invalid;
+ float_raise( float_flag_invalid );
return aSign ? 0x80000000 : 0x7FFFFFFF;
}
if ( ( aSig<<shiftCount ) != savedASig ) {
- float_exception_flags |= float_flag_inexact;
+ float_raise( float_flag_inexact );
}
return z;
@@ -1736,7 +1723,7 @@ positive integer is returned. Otherwise, if the conversion overflows, the
largest positive integer is returned.
-------------------------------------------------------------------------------
*/
-int32 float64_to_uint32( float64 a )
+int32 float64_to_uint32( struct roundingData *roundData, float64 a )
{
flag aSign;
int16 aExp, shiftCount;
@@ -1749,7 +1736,7 @@ int32 float64_to_uint32( float64 a )
if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
shiftCount = 0x42C - aExp;
if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
- return roundAndPackInt32( aSign, aSig );
+ return roundAndPackInt32( roundData, aSign, aSig );
}
/*
@@ -1778,7 +1765,7 @@ int32 float64_to_uint32_round_to_zero( float64 a )
goto invalid;
}
else if ( 52 < shiftCount ) {
- if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
+ if ( aExp || aSig ) float_raise( float_flag_inexact );
return 0;
}
aSig |= LIT64( 0x0010000000000000 );
@@ -1788,11 +1775,11 @@ int32 float64_to_uint32_round_to_zero( float64 a )
if ( aSign ) z = - z;
if ( ( z < 0 ) ^ aSign ) {
invalid:
- float_exception_flags |= float_flag_invalid;
+ float_raise( float_flag_invalid );
return aSign ? 0x80000000 : 0x7FFFFFFF;
}
if ( ( aSig<<shiftCount ) != savedASig ) {
- float_exception_flags |= float_flag_inexact;
+ float_raise( float_flag_inexact );
}
return z;
}
@@ -1805,7 +1792,7 @@ performed according to the IEC/IEEE Standard for Binary Floating-point
Arithmetic.
-------------------------------------------------------------------------------
*/
-float32 float64_to_float32( float64 a )
+float32 float64_to_float32( struct roundingData *roundData, float64 a )
{
flag aSign;
int16 aExp;
@@ -1825,7 +1812,7 @@ float32 float64_to_float32( float64 a )
zSig |= 0x40000000;
aExp -= 0x381;
}
- return roundAndPackFloat32( aSign, aExp, zSig );
+ return roundAndPackFloat32( roundData, aSign, aExp, zSig );
}
@@ -1872,7 +1859,7 @@ operation is performed according to the IEC/IEEE Standard for Binary
Floating-point Arithmetic.
-------------------------------------------------------------------------------
*/
-float64 float64_round_to_int( float64 a )
+float64 float64_round_to_int( struct roundingData *roundData, float64 a )
{
flag aSign;
int16 aExp;
@@ -1889,9 +1876,9 @@ float64 float64_round_to_int( float64 a )
}
if ( aExp <= 0x3FE ) {
if ( (bits64) ( a<<1 ) == 0 ) return a;
- float_exception_flags |= float_flag_inexact;
+ roundData->exception |= float_flag_inexact;
aSign = extractFloat64Sign( a );
- switch ( float_rounding_mode ) {
+ switch ( roundData->mode ) {
case float_round_nearest_even:
if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
return packFloat64( aSign, 0x3FF, 0 );
@@ -1909,7 +1896,7 @@ float64 float64_round_to_int( float64 a )
lastBitMask <<= 0x433 - aExp;
roundBitsMask = lastBitMask - 1;
z = a;
- roundingMode = float_rounding_mode;
+ roundingMode = roundData->mode;
if ( roundingMode == float_round_nearest_even ) {
z += lastBitMask>>1;
if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
@@ -1920,7 +1907,7 @@ float64 float64_round_to_int( float64 a )
}
}
z &= ~ roundBitsMask;
- if ( z != a ) float_exception_flags |= float_flag_inexact;
+ if ( z != a ) roundData->exception |= float_flag_inexact;
return z;
}
@@ -1934,7 +1921,7 @@ addition is performed according to the IEC/IEEE Standard for Binary
Floating-point Arithmetic.
-------------------------------------------------------------------------------
*/
-static float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
+static float64 addFloat64Sigs( struct roundingData *roundData, float64 a, float64 b, flag zSign )
{
int16 aExp, bExp, zExp;
bits64 aSig, bSig, zSig;
@@ -1993,7 +1980,7 @@ static float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
++zExp;
}
roundAndPack:
- return roundAndPackFloat64( zSign, zExp, zSig );
+ return roundAndPackFloat64( roundData, zSign, zExp, zSig );
}
@@ -2006,7 +1993,7 @@ result is a NaN. The subtraction is performed according to the IEC/IEEE
Standard for Binary Floating-point Arithmetic.
-------------------------------------------------------------------------------
*/
-static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
+static float64 subFloat64Sigs( struct roundingData *roundData, float64 a, float64 b, flag zSign )
{
int16 aExp, bExp, zExp;
bits64 aSig, bSig, zSig;
@@ -2023,7 +2010,7 @@ static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
if ( expDiff < 0 ) goto bExpBigger;
if ( aExp == 0x7FF ) {
if ( aSig | bSig ) return propagateFloat64NaN( a, b );
- float_raise( float_flag_invalid );
+ roundData->exception |= float_flag_invalid;
return float64_default_nan;
}
if ( aExp == 0 ) {
@@ -2032,7 +2019,7 @@ static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
}
if ( bSig < aSig ) goto aBigger;
if ( aSig < bSig ) goto bBigger;
- return packFloat64( float_rounding_mode == float_round_down, 0, 0 );
+ return packFloat64( roundData->mode == float_round_down, 0, 0 );
bExpBigger:
if ( bExp == 0x7FF ) {
if ( bSig ) return propagateFloat64NaN( a, b );
@@ -2069,7 +2056,7 @@ static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
zExp = aExp;
normalizeRoundAndPack:
--zExp;
- return normalizeRoundAndPackFloat64( zSign, zExp, zSig );
+ return normalizeRoundAndPackFloat64( roundData, zSign, zExp, zSig );
}
@@ -2080,17 +2067,17 @@ and `b'. The operation is performed according to the IEC/IEEE Standard for
Binary Floating-point Arithmetic.
-------------------------------------------------------------------------------
*/
-float64 float64_add( float64 a, float64 b )
+float64 float64_add( struct roundingData *roundData, float64 a, float64 b )
{
flag aSign, bSign;
aSign = extractFloat64Sign( a );
bSign = extractFloat64Sign( b );
if ( aSign == bSign ) {
- return addFloat64Sigs( a, b, aSign );
+ return addFloat64Sigs( roundData, a, b, aSign );
}
else {
- return subFloat64Sigs( a, b, aSign );
+ return subFloat64Sigs( roundData, a, b, aSign );
}
}
@@ -2102,17 +2089,17 @@ Returns the result of subtracting the double-precision floating-point values
for Binary Floating-point Arithmetic.
-------------------------------------------------------------------------------
*/
-float64 float64_sub( float64 a, float64 b )
+float64 float64_sub( struct roundingData *roundData, float64 a, float64 b )
{
flag aSign, bSign;
aSign = extractFloat64Sign( a );
bSign = extractFloat64Sign( b );
if ( aSign == bSign ) {
- return subFloat64Sigs( a, b, aSign );
+ return subFloat64Sigs( roundData, a, b, aSign );
}
else {
- return addFloat64Sigs( a, b, aSign );
+ return addFloat64Sigs( roundData, a, b, aSign );
}
}
@@ -2124,7 +2111,7 @@ Returns the result of multiplying the double-precision floating-point values
for Binary Floating-point Arithmetic.
-------------------------------------------------------------------------------
*/
-float64 float64_mul( float64 a, float64 b )
+float64 float64_mul( struct roundingData *roundData, float64 a, float64 b )
{
flag aSign, bSign, zSign;
int16 aExp, bExp, zExp;
@@ -2142,7 +2129,7 @@ float64 float64_mul( float64 a, float64 b )
return propagateFloat64NaN( a, b );
}
if ( ( bExp | bSig ) == 0 ) {
- float_raise( float_flag_invalid );
+ roundData->exception |= float_flag_invalid;
return float64_default_nan;
}
return packFloat64( zSign, 0x7FF, 0 );
@@ -2150,7 +2137,7 @@ float64 float64_mul( float64 a, float64 b )
if ( bExp == 0x7FF ) {
if ( bSig ) return propagateFloat64NaN( a, b );
if ( ( aExp | aSig ) == 0 ) {
- float_raise( float_flag_invalid );
+ roundData->exception |= float_flag_invalid;
return float64_default_nan;
}
return packFloat64( zSign, 0x7FF, 0 );
@@ -2172,7 +2159,7 @@ float64 float64_mul( float64 a, float64 b )
zSig0 <<= 1;
--zExp;
}
- return roundAndPackFloat64( zSign, zExp, zSig0 );
+ return roundAndPackFloat64( roundData, zSign, zExp, zSig0 );
}
@@ -2183,7 +2170,7 @@ by the corresponding value `b'. The operation is performed according to
the IEC/IEEE Standard for Binary Floating-point Arithmetic.
-------------------------------------------------------------------------------
*/
-float64 float64_div( float64 a, float64 b )
+float64 float64_div( struct roundingData *roundData, float64 a, float64 b )
{
flag aSign, bSign, zSign;
int16 aExp, bExp, zExp;
@@ -2202,7 +2189,7 @@ float64 float64_div( float64 a, float64 b )
if ( aSig ) return propagateFloat64NaN( a, b );
if ( bExp == 0x7FF ) {
if ( bSig ) return propagateFloat64NaN( a, b );
- float_raise( float_flag_invalid );
+ roundData->exception |= float_flag_invalid;
return float64_default_nan;
}
return packFloat64( zSign, 0x7FF, 0 );
@@ -2214,10 +2201,10 @@ float64 float64_div( float64 a, float64 b )
if ( bExp == 0 ) {
if ( bSig == 0 ) {
if ( ( aExp | aSig ) == 0 ) {
- float_raise( float_flag_invalid );
+ roundData->exception |= float_flag_invalid;
return float64_default_nan;
}
- float_raise( float_flag_divbyzero );
+ roundData->exception |= float_flag_divbyzero;
return packFloat64( zSign, 0x7FF, 0 );
}
normalizeFloat64Subnormal( bSig, &bExp, &bSig );
@@ -2243,7 +2230,7 @@ float64 float64_div( float64 a, float64 b )
}
zSig |= ( rem1 != 0 );
}
- return roundAndPackFloat64( zSign, zExp, zSig );
+ return roundAndPackFloat64( roundData, zSign, zExp, zSig );
}
@@ -2254,7 +2241,7 @@ with respect to the corresponding value `b'. The operation is performed
according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
-------------------------------------------------------------------------------
*/
-float64 float64_rem( float64 a, float64 b )
+float64 float64_rem( struct roundingData *roundData, float64 a, float64 b )
{
flag aSign, bSign, zSign;
int16 aExp, bExp, expDiff;
@@ -2272,7 +2259,7 @@ float64 float64_rem( float64 a, float64 b )
if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
return propagateFloat64NaN( a, b );
}
- float_raise( float_flag_invalid );
+ roundData->exception |= float_flag_invalid;
return float64_default_nan;
}
if ( bExp == 0x7FF ) {
@@ -2281,7 +2268,7 @@ float64 float64_rem( float64 a, float64 b )
}
if ( bExp == 0 ) {
if ( bSig == 0 ) {
- float_raise( float_flag_invalid );
+ roundData->exception |= float_flag_invalid;
return float64_default_nan;
}
normalizeFloat64Subnormal( bSig, &bExp, &bSig );
@@ -2329,7 +2316,7 @@ float64 float64_rem( float64 a, float64 b )
}
zSign = ( (sbits64) aSig < 0 );
if ( zSign ) aSig = - aSig;
- return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig );
+ return normalizeRoundAndPackFloat64( roundData, aSign ^ zSign, bExp, aSig );
}
@@ -2340,7 +2327,7 @@ The operation is performed according to the IEC/IEEE Standard for Binary
Floating-point Arithmetic.
-------------------------------------------------------------------------------
*/
-float64 float64_sqrt( float64 a )
+float64 float64_sqrt( struct roundingData *roundData, float64 a )
{
flag aSign;
int16 aExp, zExp;
@@ -2354,12 +2341,12 @@ float64 float64_sqrt( float64 a )
if ( aExp == 0x7FF ) {
if ( aSig ) return propagateFloat64NaN( a, a );
if ( ! aSign ) return a;
- float_raise( float_flag_invalid );
+ roundData->exception |= float_flag_invalid;
return float64_default_nan;
}
if ( aSign ) {
if ( ( aExp | aSig ) == 0 ) return a;
- float_raise( float_flag_invalid );
+ roundData->exception |= float_flag_invalid;
return float64_default_nan;
}
if ( aExp == 0 ) {
@@ -2390,7 +2377,7 @@ float64 float64_sqrt( float64 a )
}
}
shift64RightJamming( zSig, 1, &zSig );
- return roundAndPackFloat64( 0, zExp, zSig );
+ return roundAndPackFloat64( roundData, 0, zExp, zSig );
}
@@ -2502,9 +2489,7 @@ flag float64_le_quiet( float64 a, float64 b )
if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
|| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
) {
- if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
- float_raise( float_flag_invalid );
- }
+ /* Do nothing, even if NaN as we're quiet */
return 0;
}
aSign = extractFloat64Sign( a );
@@ -2529,9 +2514,7 @@ flag float64_lt_quiet( float64 a, float64 b )
if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
|| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
) {
- if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
- float_raise( float_flag_invalid );
- }
+ /* Do nothing, even if NaN as we're quiet */
return 0;
}
aSign = extractFloat64Sign( a );
@@ -2554,7 +2537,7 @@ largest positive integer is returned. Otherwise, if the conversion
overflows, the largest integer with the same sign as `a' is returned.
-------------------------------------------------------------------------------
*/
-int32 floatx80_to_int32( floatx80 a )
+int32 floatx80_to_int32( struct roundingData *roundData, floatx80 a )
{
flag aSign;
int32 aExp, shiftCount;
@@ -2567,7 +2550,7 @@ int32 floatx80_to_int32( floatx80 a )
shiftCount = 0x4037 - aExp;
if ( shiftCount <= 0 ) shiftCount = 1;
shift64RightJamming( aSig, shiftCount, &aSig );
- return roundAndPackInt32( aSign, aSig );
+ return roundAndPackInt32( roundData, aSign, aSig );
}
@@ -2598,7 +2581,7 @@ int32 floatx80_to_int32_round_to_zero( floatx80 a )
goto invalid;
}
else if ( 63 < shiftCount ) {
- if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
+ if ( aExp || aSig ) float_raise( float_flag_inexact );
return 0;
}
savedASig = aSig;
@@ -2607,11 +2590,11 @@ int32 floatx80_to_int32_round_to_zero( floatx80 a )
if ( aSign ) z = - z;
if ( ( z < 0 ) ^ aSign ) {
invalid:
- float_exception_flags |= float_flag_invalid;
+ float_raise( float_flag_invalid );
return aSign ? 0x80000000 : 0x7FFFFFFF;
}
if ( ( aSig<<shiftCount ) != savedASig ) {
- float_exception_flags |= float_flag_inexact;
+ float_raise( float_flag_inexact );
}
return z;
@@ -2625,7 +2608,7 @@ conversion is performed according to the IEC/IEEE Standard for Binary
Floating-point Arithmetic.
-------------------------------------------------------------------------------
*/
-float32 floatx80_to_float32( floatx80 a )
+float32 floatx80_to_float32( struct roundingData *roundData, floatx80 a )
{
flag aSign;
int32 aExp;
@@ -2642,7 +2625,7 @@ float32 floatx80_to_float32( floatx80 a )
}
shift64RightJamming( aSig, 33, &aSig );
if ( aExp || aSig ) aExp -= 0x3F81;
- return roundAndPackFloat32( aSign, aExp, aSig );
+ return roundAndPackFloat32( roundData, aSign, aExp, aSig );
}
@@ -2654,7 +2637,7 @@ conversion is performed according to the IEC/IEEE Standard for Binary
Floating-point Arithmetic.
-------------------------------------------------------------------------------
*/
-float64 floatx80_to_float64( floatx80 a )
+float64 floatx80_to_float64( struct roundingData *roundData, floatx80 a )
{
flag aSign;
int32 aExp;
@@ -2671,7 +2654,7 @@ float64 floatx80_to_float64( floatx80 a )
}
shift64RightJamming( aSig, 1, &zSig );
if ( aExp || aSig ) aExp -= 0x3C01;
- return roundAndPackFloat64( aSign, aExp, zSig );
+ return roundAndPackFloat64( roundData, aSign, aExp, zSig );
}
@@ -2683,7 +2666,7 @@ value. The operation is performed according to the IEC/IEEE Standard for
Binary Floating-point Arithmetic.
-------------------------------------------------------------------------------
*/
-floatx80 floatx80_round_to_int( floatx80 a )
+floatx80 floatx80_round_to_int( struct roundingData *roundData, floatx80 a )
{
flag aSign;
int32 aExp;
@@ -2703,9 +2686,9 @@ floatx80 floatx80_round_to_int( floatx80 a )
&& ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
return a;
}
- float_exception_flags |= float_flag_inexact;
+ roundData->exception |= float_flag_inexact;
aSign = extractFloatx80Sign( a );
- switch ( float_rounding_mode ) {
+ switch ( roundData->mode ) {
case float_round_nearest_even:
if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 )
) {
@@ -2729,7 +2712,7 @@ floatx80 floatx80_round_to_int( floatx80 a )
lastBitMask <<= 0x403E - aExp;
roundBitsMask = lastBitMask - 1;
z = a;
- roundingMode = float_rounding_mode;
+ roundingMode = roundData->mode;
if ( roundingMode == float_round_nearest_even ) {
z.low += lastBitMask>>1;
if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
@@ -2744,7 +2727,7 @@ floatx80 floatx80_round_to_int( floatx80 a )
++z.high;
z.low = LIT64( 0x8000000000000000 );
}
- if ( z.low != a.low ) float_exception_flags |= float_flag_inexact;
+ if ( z.low != a.low ) roundData->exception |= float_flag_inexact;
return z;
}
@@ -2758,7 +2741,7 @@ The addition is performed according to the IEC/IEEE Standard for Binary
Floating-point Arithmetic.
-------------------------------------------------------------------------------
*/
-static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
+static floatx80 addFloatx80Sigs( struct roundingData *roundData, floatx80 a, floatx80 b, flag zSign )
{
int32 aExp, bExp, zExp;
bits64 aSig, bSig, zSig0, zSig1;
@@ -2814,7 +2797,7 @@ static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
roundAndPack:
return
roundAndPackFloatx80(
- floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
+ roundData, zSign, zExp, zSig0, zSig1 );
}
@@ -2827,7 +2810,7 @@ result is a NaN. The subtraction is performed according to the IEC/IEEE
Standard for Binary Floating-point Arithmetic.
-------------------------------------------------------------------------------
*/
-static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
+static floatx80 subFloatx80Sigs( struct roundingData *roundData, floatx80 a, floatx80 b, flag zSign )
{
int32 aExp, bExp, zExp;
bits64 aSig, bSig, zSig0, zSig1;
@@ -2845,7 +2828,7 @@ static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
return propagateFloatx80NaN( a, b );
}
- float_raise( float_flag_invalid );
+ roundData->exception |= float_flag_invalid;
z.low = floatx80_default_nan_low;
z.high = floatx80_default_nan_high;
return z;
@@ -2857,7 +2840,7 @@ static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
zSig1 = 0;
if ( bSig < aSig ) goto aBigger;
if ( aSig < bSig ) goto bBigger;
- return packFloatx80( float_rounding_mode == float_round_down, 0, 0 );
+ return packFloatx80( roundData->mode == float_round_down, 0, 0 );
bExpBigger:
if ( bExp == 0x7FFF ) {
if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
@@ -2883,7 +2866,7 @@ static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
normalizeRoundAndPack:
return
normalizeRoundAndPackFloatx80(
- floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
+ roundData, zSign, zExp, zSig0, zSig1 );
}
@@ -2894,17 +2877,17 @@ values `a' and `b'. The operation is performed according to the IEC/IEEE
Standard for Binary Floating-point Arithmetic.
-------------------------------------------------------------------------------
*/
-floatx80 floatx80_add( floatx80 a, floatx80 b )
+floatx80 floatx80_add( struct roundingData *roundData, floatx80 a, floatx80 b )
{
flag aSign, bSign;
aSign = extractFloatx80Sign( a );
bSign = extractFloatx80Sign( b );
if ( aSign == bSign ) {
- return addFloatx80Sigs( a, b, aSign );
+ return addFloatx80Sigs( roundData, a, b, aSign );
}
else {
- return subFloatx80Sigs( a, b, aSign );
+ return subFloatx80Sigs( roundData, a, b, aSign );
}
}
@@ -2916,17 +2899,17 @@ point values `a' and `b'. The operation is performed according to the
IEC/IEEE Standard for Binary Floating-point Arithmetic.
-------------------------------------------------------------------------------
*/
-floatx80 floatx80_sub( floatx80 a, floatx80 b )
+floatx80 floatx80_sub( struct roundingData *roundData, floatx80 a, floatx80 b )
{
flag aSign, bSign;
aSign = extractFloatx80Sign( a );
bSign = extractFloatx80Sign( b );
if ( aSign == bSign ) {
- return subFloatx80Sigs( a, b, aSign );
+ return subFloatx80Sigs( roundData, a, b, aSign );
}
else {
- return addFloatx80Sigs( a, b, aSign );
+ return addFloatx80Sigs( roundData, a, b, aSign );
}
}
@@ -2938,7 +2921,7 @@ point values `a' and `b'. The operation is performed according to the
IEC/IEEE Standard for Binary Floating-point Arithmetic.
-------------------------------------------------------------------------------
*/
-floatx80 floatx80_mul( floatx80 a, floatx80 b )
+floatx80 floatx80_mul( struct roundingData *roundData, floatx80 a, floatx80 b )
{
flag aSign, bSign, zSign;
int32 aExp, bExp, zExp;
@@ -2964,7 +2947,7 @@ floatx80 floatx80_mul( floatx80 a, floatx80 b )
if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
if ( ( aExp | aSig ) == 0 ) {
invalid:
- float_raise( float_flag_invalid );
+ roundData->exception |= float_flag_invalid;
z.low = floatx80_default_nan_low;
z.high = floatx80_default_nan_high;
return z;
@@ -2987,7 +2970,7 @@ floatx80 floatx80_mul( floatx80 a, floatx80 b )
}
return
roundAndPackFloatx80(
- floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
+ roundData, zSign, zExp, zSig0, zSig1 );
}
@@ -2998,7 +2981,7 @@ value `a' by the corresponding value `b'. The operation is performed
according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
-------------------------------------------------------------------------------
*/
-floatx80 floatx80_div( floatx80 a, floatx80 b )
+floatx80 floatx80_div( struct roundingData *roundData, floatx80 a, floatx80 b )
{
flag aSign, bSign, zSign;
int32 aExp, bExp, zExp;
@@ -3029,12 +3012,12 @@ floatx80 floatx80_div( floatx80 a, floatx80 b )
if ( bSig == 0 ) {
if ( ( aExp | aSig ) == 0 ) {
invalid:
- float_raise( float_flag_invalid );
+ roundData->exception |= float_flag_invalid;
z.low = floatx80_default_nan_low;
z.high = floatx80_default_nan_high;
return z;
}
- float_raise( float_flag_divbyzero );
+ roundData->exception |= float_flag_divbyzero;
return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
}
normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
@@ -3068,7 +3051,7 @@ floatx80 floatx80_div( floatx80 a, floatx80 b )
}
return
roundAndPackFloatx80(
- floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
+ roundData, zSign, zExp, zSig0, zSig1 );
}
@@ -3079,7 +3062,7 @@ Returns the remainder of the extended double-precision floating-point value
according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
-------------------------------------------------------------------------------
*/
-floatx80 floatx80_rem( floatx80 a, floatx80 b )
+floatx80 floatx80_rem( struct roundingData *roundData, floatx80 a, floatx80 b )
{
flag aSign, bSign, zSign;
int32 aExp, bExp, expDiff;
@@ -3107,7 +3090,7 @@ floatx80 floatx80_rem( floatx80 a, floatx80 b )
if ( bExp == 0 ) {
if ( bSig == 0 ) {
invalid:
- float_raise( float_flag_invalid );
+ roundData->exception |= float_flag_invalid;
z.low = floatx80_default_nan_low;
z.high = floatx80_default_nan_high;
return z;
@@ -3164,9 +3147,10 @@ floatx80 floatx80_rem( floatx80 a, floatx80 b )
aSig1 = alternateASig1;
zSign = ! zSign;
}
+
return
normalizeRoundAndPackFloatx80(
- 80, zSign, bExp + expDiff, aSig0, aSig1 );
+ roundData, zSign, bExp + expDiff, aSig0, aSig1 );
}
@@ -3177,7 +3161,7 @@ value `a'. The operation is performed according to the IEC/IEEE Standard
for Binary Floating-point Arithmetic.
-------------------------------------------------------------------------------
*/
-floatx80 floatx80_sqrt( floatx80 a )
+floatx80 floatx80_sqrt( struct roundingData *roundData, floatx80 a )
{
flag aSign;
int32 aExp, zExp;
@@ -3197,7 +3181,7 @@ floatx80 floatx80_sqrt( floatx80 a )
if ( aSign ) {
if ( ( aExp | aSig0 ) == 0 ) return a;
invalid:
- float_raise( float_flag_invalid );
+ roundData->exception |= float_flag_invalid;
z.low = floatx80_default_nan_low;
z.high = floatx80_default_nan_high;
return z;
@@ -3242,7 +3226,7 @@ floatx80 floatx80_sqrt( floatx80 a )
}
return
roundAndPackFloatx80(
- floatx80_rounding_precision, 0, zExp, zSig0, zSig1 );
+ roundData, 0, zExp, zSig0, zSig1 );
}
@@ -3390,10 +3374,7 @@ flag floatx80_le_quiet( floatx80 a, floatx80 b )
|| ( ( extractFloatx80Exp( b ) == 0x7FFF )
&& (bits64) ( extractFloatx80Frac( b )<<1 ) )
) {
- if ( floatx80_is_signaling_nan( a )
- || floatx80_is_signaling_nan( b ) ) {
- float_raise( float_flag_invalid );
- }
+ /* Do nothing, even if NaN as we're quiet */
return 0;
}
aSign = extractFloatx80Sign( a );
@@ -3427,10 +3408,7 @@ flag floatx80_lt_quiet( floatx80 a, floatx80 b )
|| ( ( extractFloatx80Exp( b ) == 0x7FFF )
&& (bits64) ( extractFloatx80Frac( b )<<1 ) )
) {
- if ( floatx80_is_signaling_nan( a )
- || floatx80_is_signaling_nan( b ) ) {
- float_raise( float_flag_invalid );
- }
+ /* Do nothing, even if NaN as we're quiet */
return 0;
}
aSign = extractFloatx80Sign( a );
OpenPOWER on IntegriCloud