diff options
-rw-r--r-- | floattypes.h | 4 | ||||
-rw-r--r-- | mathfuncs_fabs.h | 19 |
2 files changed, 23 insertions, 0 deletions
diff --git a/floattypes.h b/floattypes.h index 1830d1d..2c22a10 100644 --- a/floattypes.h +++ b/floattypes.h @@ -215,11 +215,15 @@ namespace vecmathlib { struct fp8 { // 1 bit sign, 4 bits exponent, 3 bits mantissa vml_std::uint8_t val; + fp8() {} + fp8(double x) { __builtin_unreachable(); } }; struct fp16 { // 1 bit sign, 5 bits exponent, 10 bits mantissa vml_std::uint16_t val; + fp16() {} + fp16(double x) { __builtin_unreachable(); } }; } // namespace vecmathlib diff --git a/mathfuncs_fabs.h b/mathfuncs_fabs.h index 4baec9b..4f31dec 100644 --- a/mathfuncs_fabs.h +++ b/mathfuncs_fabs.h @@ -166,9 +166,28 @@ namespace vecmathlib { realvec_t mathfuncs<realvec_t>::vml_ldexp(realvec_t x, intvec_t n) { // TODO: Check SLEEF 2.80 algorithm +#if 0 realvec_t r = as_float(as_int(x) + (n << I(FP::mantissa_bits))); r = ifthen((as_int(x) & IV(FP::exponent_mask)) == IV(I(0)), x, r); return r; +#endif + realvec_t r = as_float(as_int(x) + (n << U(FP::mantissa_bits))); + int max_n = FP::max_exponent - FP::min_exponent; + boolvec_t underflow = n < IV(I(-max_n)); + boolvec_t overflow = n > IV(I(max_n)); + intvec_t old_exp = + lsr(as_int(x) & IV(FP::exponent_mask), FP::mantissa_bits); + intvec_t new_exp = old_exp + n; + // TODO: check bit patterns instead + underflow = + underflow || new_exp < IV(I(FP::min_exponent + FP::exponent_offset)); + overflow = + overflow || new_exp > IV(I(FP::max_exponent + FP::exponent_offset)); + r = ifthen(underflow, copysign(RV(R(0.0)), x), r); + r = ifthen(overflow, copysign(RV(FP::infinity()), x), r); + boolvec_t dont_change = x == RV(R(0.0)) || isinf(x) || isnan(x); + r = ifthen(dont_change, x, r); + return r; } template<typename realvec_t> |