summaryrefslogtreecommitdiffstats
path: root/mathfuncs_convert.h
diff options
context:
space:
mode:
Diffstat (limited to 'mathfuncs_convert.h')
-rw-r--r--mathfuncs_convert.h358
1 files changed, 170 insertions, 188 deletions
diff --git a/mathfuncs_convert.h b/mathfuncs_convert.h
index 79befbc..9cb1add 100644
--- a/mathfuncs_convert.h
+++ b/mathfuncs_convert.h
@@ -7,197 +7,179 @@
#include <cmath>
+namespace vecmathlib {
+template <typename realvec_t>
+realvec_t mathfuncs<realvec_t>::vml_convert_float(intvec_t x) {
+ // Convert in two passes. Convert as much as possible during the
+ // first pass (lobits), so that the second pass (hibits) may be
+ // omitted if the high bits are known to be zero.
+ int_t lobits = FP::mantissa_bits;
+ // int_t hibits = FP::bits - lobits;
-namespace vecmathlib {
-
- template<typename realvec_t>
- realvec_t mathfuncs<realvec_t>::vml_convert_float(intvec_t x)
- {
- // Convert in two passes. Convert as much as possible during the
- // first pass (lobits), so that the second pass (hibits) may be
- // omitted if the high bits are known to be zero.
- int_t lobits = FP::mantissa_bits;
- // int_t hibits = FP::bits - lobits;
-
- // Convert lower bits
- intvec_t xlo = x & IV((U(1) << lobits) - 1);
- // exponent for the equivalent floating point number
- int_t exponent_lo = (FP::exponent_offset + lobits) << FP::mantissa_bits;
- xlo |= exponent_lo;
- // subtract hidden mantissa bit
- realvec_t flo = as_float(xlo) - RV(FP::as_float(exponent_lo));
-
- // Convert upper bits
- // make unsigned by subtracting largest negative number
- // (only do this for the high bits, since they have sufficient
- // precision to handle the overflow)
- x ^= FP::signbit_mask;
- intvec_t xhi = lsr(x, lobits);
- // exponent for the equivalent floating point number
- int_t exponent_hi = (FP::exponent_offset + 2*lobits) << FP::mantissa_bits;
- xhi |= exponent_hi;
- // subtract hidden mantissa bit
- realvec_t fhi = as_float(xhi) - RV(FP::as_float(exponent_hi));
- // add largest negative number again
- fhi -= RV(R(FP::signbit_mask));
- // Ensure that the converted low and high bits are calculated
- // separately, since a real_t doesn't have enough precision to
- // hold all the bits of an int_t
- fhi.barrier();
-
- // Combine results
- return flo + fhi;
- }
-
-
-
- template<typename realvec_t>
- typename realvec_t::intvec_t
- mathfuncs<realvec_t>::vml_convert_int(realvec_t x)
- {
- // Handle overflow
- // int_t min_int = FP::signbit_mask;
- // int_t max_int = ~FP::signbit_mask;
- // boolvec_t is_overflow = x < RV(R(min_int)) || x > RV(R(max_int));
- // Handle negative numbers
- boolvec_t is_negative = signbit(x);
- x = fabs(x);
- // Handle small numbers
- boolvec_t issmall = x < RV(1.0);
-
- intvec_t shift = ilogb(x) - IV(FP::mantissa_bits);
- boolvec_t shift_left = x > RV(std::ldexp(R(1.0), FP::mantissa_bits));
- intvec_t ix = as_int(x) & IV(FP::mantissa_mask);
- // add hidden mantissa bit
- ix |= U(1) << FP::mantissa_bits;
- // shift according to exponent (which may truncate)
- ix = ifthen(shift_left, ix << shift, ix >> -shift);
-
- // Handle small numbers
- ix = ifthen(issmall, IV(I(0)), ix);
- // Handle negative numbers
- ix = ifthen(is_negative, -ix, ix);
- // Handle overflow
- // ix = ifthen(is_overflow, IV(min_int), ix);
-
- return ix;
- }
-
-
-
- // Round to nearest integer, breaking ties using prevailing rounding
- // mode (default: round to even)
- template<typename realvec_t>
- realvec_t mathfuncs<realvec_t>::vml_rint(realvec_t x)
- {
- realvec_t r = x;
- // Round by adding a large number, destroying all excess precision
- realvec_t offset = copysign(RV(std::ldexp(R(1.0), FP::mantissa_bits)), x);
- r += offset;
- // Ensure the rounding is not optimised away
- r.barrier();
- r -= offset;
- return r;
- }
-
- // Round to next integer above
- template<typename realvec_t>
- realvec_t mathfuncs<realvec_t>::vml_ceil(realvec_t x)
- {
- // boolvec_t iszero = x == RV(0.0);
- // realvec_t offset = RV(0.5) - ldexp(fabs(x), I(-FP::mantissa_bits));
- // return ifthen(iszero, x, rint(x + offset));
- return ifthen(x<RV(0.0), trunc(x), vml_antitrunc(x));
- }
-
- // Round to next integer below
- template<typename realvec_t>
- realvec_t mathfuncs<realvec_t>::vml_floor(realvec_t x)
- {
- // boolvec_t iszero = x == RV(0.0);
- // realvec_t offset = RV(0.5) - ldexp(fabs(x), I(-FP::mantissa_bits));
- // return ifthen(iszero, x, rint(x - offset));
- return ifthen(x<RV(0.0), vml_antitrunc(x), trunc(x));
- }
-
- // Round to nearest integer, breaking ties using prevailing rounding
- // mode (default: round to even), returning an integer
- template<typename realvec_t>
- typename realvec_t::intvec_t mathfuncs<realvec_t>::vml_lrint(realvec_t x)
- {
- return convert_int(rint(x));
- }
-
- // Round to nearest integer, breaking ties away from zero
- template<typename realvec_t>
- realvec_t mathfuncs<realvec_t>::vml_round(realvec_t x)
- {
- // return copysign(floor(fabs(x)+RV(0.5)), x);
- return trunc(x + copysign(RV(0.5), x));
- }
-
- // Round to next integer towards zero
- template<typename realvec_t>
- realvec_t mathfuncs<realvec_t>::vml_trunc(realvec_t x)
- {
- realvec_t x0 = x;
- x = fabs(x);
- boolvec_t istoosmall = x < RV(1.0);
- boolvec_t istoolarge = x >= RV(std::ldexp(R(1.0), FP::mantissa_bits));
- // Number of mantissa bits to keep
- intvec_t nbits = ilogb(x);
- // This is probably faster than a shift operation
- realvec_t mask = ldexp(RV(2.0), nbits) - RV(1.0);
- intvec_t imask = IV(FP::signbit_mask | FP::exponent_mask) | as_int(mask);
- realvec_t y = as_float(as_int(x) & imask);
- realvec_t r =
+ // Convert lower bits
+ intvec_t xlo = x & IV((U(1) << lobits) - 1);
+ // exponent for the equivalent floating point number
+ int_t exponent_lo = (FP::exponent_offset + lobits) << FP::mantissa_bits;
+ xlo |= exponent_lo;
+ // subtract hidden mantissa bit
+ realvec_t flo = as_float(xlo) - RV(FP::as_float(exponent_lo));
+
+ // Convert upper bits
+ // make unsigned by subtracting largest negative number
+ // (only do this for the high bits, since they have sufficient
+ // precision to handle the overflow)
+ x ^= FP::signbit_mask;
+ intvec_t xhi = lsr(x, lobits);
+ // exponent for the equivalent floating point number
+ int_t exponent_hi = (FP::exponent_offset + 2 * lobits) << FP::mantissa_bits;
+ xhi |= exponent_hi;
+ // subtract hidden mantissa bit
+ realvec_t fhi = as_float(xhi) - RV(FP::as_float(exponent_hi));
+ // add largest negative number again
+ fhi -= RV(R(FP::signbit_mask));
+ // Ensure that the converted low and high bits are calculated
+ // separately, since a real_t doesn't have enough precision to
+ // hold all the bits of an int_t
+ fhi.barrier();
+
+ // Combine results
+ return flo + fhi;
+}
+
+template <typename realvec_t>
+typename realvec_t::intvec_t
+mathfuncs<realvec_t>::vml_convert_int(realvec_t x) {
+ // Handle overflow
+ // int_t min_int = FP::signbit_mask;
+ // int_t max_int = ~FP::signbit_mask;
+ // boolvec_t is_overflow = x < RV(R(min_int)) || x > RV(R(max_int));
+ // Handle negative numbers
+ boolvec_t is_negative = signbit(x);
+ x = fabs(x);
+ // Handle small numbers
+ boolvec_t issmall = x < RV(1.0);
+
+ intvec_t shift = ilogb(x) - IV(FP::mantissa_bits);
+ boolvec_t shift_left = x > RV(std::ldexp(R(1.0), FP::mantissa_bits));
+ intvec_t ix = as_int(x) & IV(FP::mantissa_mask);
+ // add hidden mantissa bit
+ ix |= U(1) << FP::mantissa_bits;
+ // shift according to exponent (which may truncate)
+ ix = ifthen(shift_left, ix << shift, ix >> -shift);
+
+ // Handle small numbers
+ ix = ifthen(issmall, IV(I(0)), ix);
+ // Handle negative numbers
+ ix = ifthen(is_negative, -ix, ix);
+ // Handle overflow
+ // ix = ifthen(is_overflow, IV(min_int), ix);
+
+ return ix;
+}
+
+// Round to nearest integer, breaking ties using prevailing rounding
+// mode (default: round to even)
+template <typename realvec_t>
+realvec_t mathfuncs<realvec_t>::vml_rint(realvec_t x) {
+ realvec_t r = x;
+ // Round by adding a large number, destroying all excess precision
+ realvec_t offset = copysign(RV(std::ldexp(R(1.0), FP::mantissa_bits)), x);
+ r += offset;
+ // Ensure the rounding is not optimised away
+ r.barrier();
+ r -= offset;
+ return r;
+}
+
+// Round to next integer above
+template <typename realvec_t>
+realvec_t mathfuncs<realvec_t>::vml_ceil(realvec_t x) {
+ // boolvec_t iszero = x == RV(0.0);
+ // realvec_t offset = RV(0.5) - ldexp(fabs(x), I(-FP::mantissa_bits));
+ // return ifthen(iszero, x, rint(x + offset));
+ return ifthen(x < RV(0.0), trunc(x), vml_antitrunc(x));
+}
+
+// Round to next integer below
+template <typename realvec_t>
+realvec_t mathfuncs<realvec_t>::vml_floor(realvec_t x) {
+ // boolvec_t iszero = x == RV(0.0);
+ // realvec_t offset = RV(0.5) - ldexp(fabs(x), I(-FP::mantissa_bits));
+ // return ifthen(iszero, x, rint(x - offset));
+ return ifthen(x < RV(0.0), vml_antitrunc(x), trunc(x));
+}
+
+// Round to nearest integer, breaking ties using prevailing rounding
+// mode (default: round to even), returning an integer
+template <typename realvec_t>
+typename realvec_t::intvec_t mathfuncs<realvec_t>::vml_lrint(realvec_t x) {
+ return convert_int(rint(x));
+}
+
+// Round to nearest integer, breaking ties away from zero
+template <typename realvec_t>
+realvec_t mathfuncs<realvec_t>::vml_round(realvec_t x) {
+ // return copysign(floor(fabs(x)+RV(0.5)), x);
+ return trunc(x + copysign(RV(0.5), x));
+}
+
+// Round to next integer towards zero
+template <typename realvec_t>
+realvec_t mathfuncs<realvec_t>::vml_trunc(realvec_t x) {
+ realvec_t x0 = x;
+ x = fabs(x);
+ boolvec_t istoosmall = x < RV(1.0);
+ boolvec_t istoolarge = x >= RV(std::ldexp(R(1.0), FP::mantissa_bits));
+ // Number of mantissa bits to keep
+ intvec_t nbits = ilogb(x);
+ // This is probably faster than a shift operation
+ realvec_t mask = ldexp(RV(2.0), nbits) - RV(1.0);
+ intvec_t imask = IV(FP::signbit_mask | FP::exponent_mask) | as_int(mask);
+ realvec_t y = as_float(as_int(x) & imask);
+ realvec_t r =
copysign(ifthen(istoosmall, RV(0.0), ifthen(istoolarge, x, y)), x0);
- return r;
- }
-
- // Round to next integer away from zero
- template<typename realvec_t>
- realvec_t mathfuncs<realvec_t>::vml_antitrunc(realvec_t x)
- {
- realvec_t x0 = x;
- x = fabs(x);
- boolvec_t iszero = x == RV(0.0);
- boolvec_t issmall = x <= RV(1.0);
- boolvec_t istoolarge =
- x > RV(std::ldexp(R(1.0), FP::mantissa_bits) - R(1.0));
- // Number of mantissa bits to keep
- intvec_t nbits = ilogb(x);
- // This is probably faster than a shift operation
- realvec_t mask = ldexp(RV(2.0), nbits) - RV(1.0);
- intvec_t imask = IV(FP::signbit_mask | FP::exponent_mask) | as_int(mask);
- realvec_t offset = RV(1.0) - ldexp(RV(1.0), nbits - IV(FP::mantissa_bits));
- offset.barrier();
- realvec_t y = as_float(as_int(x + offset) & imask);
- realvec_t r =
+ return r;
+}
+
+// Round to next integer away from zero
+template <typename realvec_t>
+realvec_t mathfuncs<realvec_t>::vml_antitrunc(realvec_t x) {
+ realvec_t x0 = x;
+ x = fabs(x);
+ boolvec_t iszero = x == RV(0.0);
+ boolvec_t issmall = x <= RV(1.0);
+ boolvec_t istoolarge = x > RV(std::ldexp(R(1.0), FP::mantissa_bits) - R(1.0));
+ // Number of mantissa bits to keep
+ intvec_t nbits = ilogb(x);
+ // This is probably faster than a shift operation
+ realvec_t mask = ldexp(RV(2.0), nbits) - RV(1.0);
+ intvec_t imask = IV(FP::signbit_mask | FP::exponent_mask) | as_int(mask);
+ realvec_t offset = RV(1.0) - ldexp(RV(1.0), nbits - IV(FP::mantissa_bits));
+ offset.barrier();
+ realvec_t y = as_float(as_int(x + offset) & imask);
+ realvec_t r =
copysign(ifthen(iszero, RV(0.0),
- ifthen(issmall, RV(1.0),
- ifthen(istoolarge, x, y))), x0);
- return r;
- }
-
- // Next machine representable number from x in direction y
- template<typename realvec_t>
- realvec_t mathfuncs<realvec_t>::vml_nextafter(realvec_t x, realvec_t y)
- {
- realvec_t dir = y - x;
- realvec_t offset = ldexp(RV(FP::epsilon()), ilogb(x));
- offset = copysign(offset, dir);
- offset = ifthen(convert_bool(as_int(x) & IV(FP::mantissa_mask)) ||
- signbit(x) == signbit(offset),
- offset,
- offset * RV(0.5));
- realvec_t r = x + offset;
- real_t smallest_pos = std::ldexp(FP::min(), -FP::mantissa_bits);
- return ifthen(dir==RV(0.0), y,
- ifthen(x==RV(0.0), copysign(RV(smallest_pos), dir), r));
- }
-
+ ifthen(issmall, RV(1.0), ifthen(istoolarge, x, y))),
+ x0);
+ return r;
+}
+
+// Next machine representable number from x in direction y
+template <typename realvec_t>
+realvec_t mathfuncs<realvec_t>::vml_nextafter(realvec_t x, realvec_t y) {
+ realvec_t dir = y - x;
+ realvec_t offset = ldexp(RV(FP::epsilon()), ilogb(x));
+ offset = copysign(offset, dir);
+ offset = ifthen(convert_bool(as_int(x) & IV(FP::mantissa_mask)) ||
+ signbit(x) == signbit(offset),
+ offset, offset * RV(0.5));
+ realvec_t r = x + offset;
+ real_t smallest_pos = std::ldexp(FP::min(), -FP::mantissa_bits);
+ return ifthen(dir == RV(0.0), y,
+ ifthen(x == RV(0.0), copysign(RV(smallest_pos), dir), r));
+}
+
}; // namespace vecmathlib
-#endif // #ifndef MATHFUNCS_CONVERT_H
+#endif // #ifndef MATHFUNCS_CONVERT_H
OpenPOWER on IntegriCloud