summaryrefslogtreecommitdiffstats
path: root/floatprops.h
diff options
context:
space:
mode:
Diffstat (limited to 'floatprops.h')
-rw-r--r--floatprops.h571
1 files changed, 270 insertions, 301 deletions
diff --git a/floatprops.h b/floatprops.h
index f1c39a2..c7a3b7f 100644
--- a/floatprops.h
+++ b/floatprops.h
@@ -10,310 +10,279 @@
#include <cstring>
#include <limits>
+namespace vecmathlib {
+// A structure describing various properties of a floating point
+// type. Most properties are already described in numeric_limits, so
+// we inherit it.
+template <typename real_t> struct floatprops {
+ // Some interesting properties are:
+ // min
+ // max
+ // digits
+ // epsilon
+ // min_exponent
+ // max_exponent
+ // infinity
+ // quiet_NaN
+};
-namespace vecmathlib {
-
- // A structure describing various properties of a floating point
- // type. Most properties are already described in numeric_limits, so
- // we inherit it.
- template<typename real_t>
- struct floatprops {
- // Some interesting properties are:
- // min
- // max
- // digits
- // epsilon
- // min_exponent
- // max_exponent
- // infinity
- // quiet_NaN
- };
-
-
-
- // Properties of fp8
- template<>
- struct floatprops<fp8> {
- typedef fp8 real_t;
- typedef vml_std::int8_t int_t;
- typedef vml_std::uint8_t uint_t;
-
- static char const* name() { return "fp8"; }
-
- // Definitions that might come from numeric_limits<> instead:
- static real_t min() { __builtin_unreachable(); }
- static real_t max() { __builtin_unreachable(); }
- static int const digits = 4;
- static real_t epsilon() { __builtin_unreachable(); }
- static int const min_exponent = -6;
- static int const max_exponent = 7;
- static real_t infinity() { __builtin_unreachable(); }
- static real_t quiet_NaN() { __builtin_unreachable(); }
-
- // Ensure the sizes match
- static_assert(sizeof(real_t) == sizeof(int_t), "int_t has wrong size");
- static_assert(sizeof(real_t) == sizeof(uint_t), "uint_t has wrong size");
-
- // Number of bits in internal representation
- static int const bits = 8 * sizeof(real_t);
- static int const mantissa_bits = digits - 1;
- static int const signbit_bits = 1;
- static int const exponent_bits = bits - mantissa_bits - signbit_bits;
- static int const exponent_offset = 2 - min_exponent;
- static_assert(mantissa_bits + exponent_bits + signbit_bits == bits,
- "error in bit counts");
- static uint_t const mantissa_mask = (uint_t(1) << mantissa_bits) - 1;
- static uint_t const exponent_mask =
- ((uint_t(1) << exponent_bits) - 1) << mantissa_bits;
- static uint_t const signbit_mask = uint_t(1) << (bits-1);
- static_assert((mantissa_mask & exponent_mask & signbit_mask) == uint_t(0),
- "error in masks");
- static_assert((mantissa_mask | exponent_mask | signbit_mask) ==
- uint_t(~uint_t(0)),
- "error in masks");
-
- // Re-interpret bit patterns
- static real_t as_float(int_t x)
- {
- real_t res;
- std::memcpy(&res, &x, sizeof res);
- return res;
- }
- static int_t as_int(real_t x)
- {
- int_t res;
- std::memcpy(&res, &x, sizeof res);
- return res;
- }
- static int_t replicate_byte(unsigned char byte)
- {
- int_t res;
- std::memset(&res, byte, sizeof res);
- return res;
- }
-
- // Convert values (truncate)
- static real_t convert_float(int_t x) { __builtin_unreachable(); }
- static int_t convert_int(real_t x) { __builtin_unreachable(); }
- };
-
-
-
- // Properties of fp16
- template<>
- struct floatprops<fp16> {
- typedef fp16 real_t;
- typedef vml_std::int16_t int_t;
- typedef vml_std::uint16_t uint_t;
-
- static char const* name() { return "fp16"; }
-
- // Definitions that might come from numeric_limits<> instead:
- static real_t min() { __builtin_unreachable(); }
- static real_t max() { __builtin_unreachable(); }
- static int const digits = 11;
- static real_t epsilon() { __builtin_unreachable(); }
- static int const min_exponent = -14;
- static int const max_exponent = 15;
- static real_t infinity() { __builtin_unreachable(); }
- static real_t quiet_NaN() { __builtin_unreachable(); }
-
- // Ensure the sizes match
- static_assert(sizeof(real_t) == sizeof(int_t), "int_t has wrong size");
- static_assert(sizeof(real_t) == sizeof(uint_t), "uint_t has wrong size");
-
- // Number of bits in internal representation
- static int const bits = 8 * sizeof(real_t);
- static int const mantissa_bits = digits - 1;
- static int const signbit_bits = 1;
- static int const exponent_bits = bits - mantissa_bits - signbit_bits;
- static int const exponent_offset = 2 - min_exponent;
- static_assert(mantissa_bits + exponent_bits + signbit_bits == bits,
- "error in bit counts");
- static uint_t const mantissa_mask = (uint_t(1) << mantissa_bits) - 1;
- static uint_t const exponent_mask =
- ((uint_t(1) << exponent_bits) - 1) << mantissa_bits;
- static uint_t const signbit_mask = uint_t(1) << (bits-1);
- static_assert((mantissa_mask & exponent_mask & signbit_mask) == uint_t(0),
- "error in masks");
- static_assert((mantissa_mask | exponent_mask | signbit_mask) ==
- uint_t(~uint_t(0)),
- "error in masks");
-
- // Re-interpret bit patterns
- static real_t as_float(int_t x)
- {
- real_t res;
- std::memcpy(&res, &x, sizeof res);
- return res;
- }
- static int_t as_int(real_t x)
- {
- int_t res;
- std::memcpy(&res, &x, sizeof res);
- return res;
- }
- static int_t replicate_byte(unsigned char byte)
- {
- int_t res;
- std::memset(&res, byte, sizeof res);
- return res;
- }
-
- // Convert values (truncate)
- static real_t convert_float(int_t x) { __builtin_unreachable(); }
- static int_t convert_int(real_t x) { __builtin_unreachable(); }
- };
-
-
-
- // Properties of float
- template<>
- struct floatprops<float>: std::numeric_limits<float> {
- typedef float real_t;
- typedef vml_std::int32_t int_t;
- typedef vml_std::uint32_t uint_t;
-
- static char const* name() { return "float"; }
-
- // Ensure the internal representation is what we expect
- static_assert(is_signed, "real_t is not signed");
- static_assert(radix==2, "real_t is not binary");
-
- // Ensure the sizes match
- static_assert(sizeof(real_t) == sizeof(int_t), "int_t has wrong size");
- static_assert(sizeof(real_t) == sizeof(uint_t), "uint_t has wrong size");
-
- // Number of bits in internal representation
- static int const bits = 8 * sizeof(real_t);
- static int const mantissa_bits = digits - 1;
- static int const signbit_bits = 1;
- static int const exponent_bits = bits - mantissa_bits - signbit_bits;
- static int const exponent_offset = 2 - min_exponent;
- static_assert(mantissa_bits + exponent_bits + signbit_bits == bits,
- "error in bit counts");
- static uint_t const mantissa_mask = (uint_t(1) << mantissa_bits) - 1;
- static uint_t const exponent_mask =
- ((uint_t(1) << exponent_bits) - 1) << mantissa_bits;
- static uint_t const signbit_mask = uint_t(1) << (bits-1);
- static_assert((mantissa_mask & exponent_mask & signbit_mask) == uint_t(0),
- "error in masks");
- static_assert((mantissa_mask | exponent_mask | signbit_mask) ==
- uint_t(~uint_t(0)),
- "error in masks");
-
- // Re-interpret bit patterns
- static real_t as_float(int_t x)
- {
- real_t res;
- std::memcpy(&res, &x, sizeof res);
- return res;
- }
- static int_t as_int(real_t x)
- {
- int_t res;
- std::memcpy(&res, &x, sizeof res);
- return res;
- }
- static int_t replicate_byte(unsigned char byte)
- {
- int_t res;
- std::memset(&res, byte, sizeof res);
- return res;
- }
-
- // Convert values (truncate)
- static real_t convert_float(int_t x) { return real_t(x); }
- static int_t convert_int(real_t x) { return int_t(x); }
- };
-
-
-
- // Properties of double
- template<>
- struct floatprops<double>: std::numeric_limits<double> {
- typedef double real_t;
- typedef vml_std::int64_t int_t;
- typedef vml_std::uint64_t uint_t;
-
- static char const* name() { return "double"; }
-
- // Ensure the internal representation is what we expect
- static_assert(is_signed, "real_t is not signed");
- static_assert(radix==2, "real_t is not binary");
-
- // Ensure the sizes match
- static_assert(sizeof(real_t) == sizeof(int_t), "int_t has wrong size");
- static_assert(sizeof(real_t) == sizeof(uint_t), "uint_t has wrong size");
-
- // Number of bits in internal representation
- static int const bits = 8 * sizeof(real_t);
- static int const mantissa_bits = digits - 1;
- static int const signbit_bits = 1;
- static int const exponent_bits = bits - mantissa_bits - signbit_bits;
- static int const exponent_offset = 2 - min_exponent;
- static_assert(mantissa_bits + exponent_bits + signbit_bits == bits,
- "error in bit counts");
- static uint_t const mantissa_mask = (uint_t(1) << mantissa_bits) - 1;
- static uint_t const exponent_mask =
- ((uint_t(1) << exponent_bits) - 1) << mantissa_bits;
- static uint_t const signbit_mask = uint_t(1) << (bits-1);
- static_assert((mantissa_mask & exponent_mask & signbit_mask) == uint_t(0),
- "error in masks");
- static_assert((mantissa_mask | exponent_mask | signbit_mask) ==
- uint_t(~uint_t(0)),
- "error in masks");
-
- // Re-interpret bit patterns
- static real_t as_float(int_t x)
- {
- real_t res;
- std::memcpy(&res, &x, sizeof res);
- return res;
- }
- static int_t as_int(real_t x)
- {
- int_t res;
- std::memcpy(&res, &x, sizeof res);
- return res;
- }
- static int_t replicate_byte(unsigned char byte)
- {
- int_t res;
- std::memset(&res, byte, sizeof res);
- return res;
- }
-
- // Convert values (truncate)
- static real_t convert_float(int_t x) { return real_t(x); }
- static int_t convert_int(real_t x) { return int_t(x); }
- };
-
-
-
- // We are adding the (unused) type RV here to avoid name mangling
- // problems. On some systems, the vector size does not enter into
- // the mangled name (!), leading to duplicate function definitions.
- template<typename RV, typename V, typename E>
- E get_elt(const V& v, const int n)
- {
- const size_t s = sizeof(E);
- E e;
- // assert(n>=0 and s*n<sizeof(V));
- std::memcpy(&e, &((const char*)&v)[s*n], s);
- return e;
+// Properties of fp8
+template <> struct floatprops<fp8> {
+ typedef fp8 real_t;
+ typedef vml_std::int8_t int_t;
+ typedef vml_std::uint8_t uint_t;
+
+ static char const *name() { return "fp8"; }
+
+ // Definitions that might come from numeric_limits<> instead:
+ static real_t min() { __builtin_unreachable(); }
+ static real_t max() { __builtin_unreachable(); }
+ static int const digits = 4;
+ static real_t epsilon() { __builtin_unreachable(); }
+ static int const min_exponent = -6;
+ static int const max_exponent = 7;
+ static real_t infinity() { __builtin_unreachable(); }
+ static real_t quiet_NaN() { __builtin_unreachable(); }
+
+ // Ensure the sizes match
+ static_assert(sizeof(real_t) == sizeof(int_t), "int_t has wrong size");
+ static_assert(sizeof(real_t) == sizeof(uint_t), "uint_t has wrong size");
+
+ // Number of bits in internal representation
+ static int const bits = 8 * sizeof(real_t);
+ static int const mantissa_bits = digits - 1;
+ static int const signbit_bits = 1;
+ static int const exponent_bits = bits - mantissa_bits - signbit_bits;
+ static int const exponent_offset = 2 - min_exponent;
+ static_assert(mantissa_bits + exponent_bits + signbit_bits == bits,
+ "error in bit counts");
+ static uint_t const mantissa_mask = (uint_t(1) << mantissa_bits) - 1;
+ static uint_t const exponent_mask = ((uint_t(1) << exponent_bits) - 1)
+ << mantissa_bits;
+ static uint_t const signbit_mask = uint_t(1) << (bits - 1);
+ static_assert((mantissa_mask & exponent_mask & signbit_mask) == uint_t(0),
+ "error in masks");
+ static_assert((mantissa_mask | exponent_mask | signbit_mask) ==
+ uint_t(~uint_t(0)),
+ "error in masks");
+
+ // Re-interpret bit patterns
+ static real_t as_float(int_t x) {
+ real_t res;
+ std::memcpy(&res, &x, sizeof res);
+ return res;
+ }
+ static int_t as_int(real_t x) {
+ int_t res;
+ std::memcpy(&res, &x, sizeof res);
+ return res;
+ }
+ static int_t replicate_byte(unsigned char byte) {
+ int_t res;
+ std::memset(&res, byte, sizeof res);
+ return res;
+ }
+
+ // Convert values (truncate)
+ static real_t convert_float(int_t x) { __builtin_unreachable(); }
+ static int_t convert_int(real_t x) { __builtin_unreachable(); }
+};
+
+// Properties of fp16
+template <> struct floatprops<fp16> {
+ typedef fp16 real_t;
+ typedef vml_std::int16_t int_t;
+ typedef vml_std::uint16_t uint_t;
+
+ static char const *name() { return "fp16"; }
+
+ // Definitions that might come from numeric_limits<> instead:
+ static real_t min() { __builtin_unreachable(); }
+ static real_t max() { __builtin_unreachable(); }
+ static int const digits = 11;
+ static real_t epsilon() { __builtin_unreachable(); }
+ static int const min_exponent = -14;
+ static int const max_exponent = 15;
+ static real_t infinity() { __builtin_unreachable(); }
+ static real_t quiet_NaN() { __builtin_unreachable(); }
+
+ // Ensure the sizes match
+ static_assert(sizeof(real_t) == sizeof(int_t), "int_t has wrong size");
+ static_assert(sizeof(real_t) == sizeof(uint_t), "uint_t has wrong size");
+
+ // Number of bits in internal representation
+ static int const bits = 8 * sizeof(real_t);
+ static int const mantissa_bits = digits - 1;
+ static int const signbit_bits = 1;
+ static int const exponent_bits = bits - mantissa_bits - signbit_bits;
+ static int const exponent_offset = 2 - min_exponent;
+ static_assert(mantissa_bits + exponent_bits + signbit_bits == bits,
+ "error in bit counts");
+ static uint_t const mantissa_mask = (uint_t(1) << mantissa_bits) - 1;
+ static uint_t const exponent_mask = ((uint_t(1) << exponent_bits) - 1)
+ << mantissa_bits;
+ static uint_t const signbit_mask = uint_t(1) << (bits - 1);
+ static_assert((mantissa_mask & exponent_mask & signbit_mask) == uint_t(0),
+ "error in masks");
+ static_assert((mantissa_mask | exponent_mask | signbit_mask) ==
+ uint_t(~uint_t(0)),
+ "error in masks");
+
+ // Re-interpret bit patterns
+ static real_t as_float(int_t x) {
+ real_t res;
+ std::memcpy(&res, &x, sizeof res);
+ return res;
+ }
+ static int_t as_int(real_t x) {
+ int_t res;
+ std::memcpy(&res, &x, sizeof res);
+ return res;
+ }
+ static int_t replicate_byte(unsigned char byte) {
+ int_t res;
+ std::memset(&res, byte, sizeof res);
+ return res;
+ }
+
+ // Convert values (truncate)
+ static real_t convert_float(int_t x) { __builtin_unreachable(); }
+ static int_t convert_int(real_t x) { __builtin_unreachable(); }
+};
+
+// Properties of float
+template <> struct floatprops<float> : std::numeric_limits<float> {
+ typedef float real_t;
+ typedef vml_std::int32_t int_t;
+ typedef vml_std::uint32_t uint_t;
+
+ static char const *name() { return "float"; }
+
+ // Ensure the internal representation is what we expect
+ static_assert(is_signed, "real_t is not signed");
+ static_assert(radix == 2, "real_t is not binary");
+
+ // Ensure the sizes match
+ static_assert(sizeof(real_t) == sizeof(int_t), "int_t has wrong size");
+ static_assert(sizeof(real_t) == sizeof(uint_t), "uint_t has wrong size");
+
+ // Number of bits in internal representation
+ static int const bits = 8 * sizeof(real_t);
+ static int const mantissa_bits = digits - 1;
+ static int const signbit_bits = 1;
+ static int const exponent_bits = bits - mantissa_bits - signbit_bits;
+ static int const exponent_offset = 2 - min_exponent;
+ static_assert(mantissa_bits + exponent_bits + signbit_bits == bits,
+ "error in bit counts");
+ static uint_t const mantissa_mask = (uint_t(1) << mantissa_bits) - 1;
+ static uint_t const exponent_mask = ((uint_t(1) << exponent_bits) - 1)
+ << mantissa_bits;
+ static uint_t const signbit_mask = uint_t(1) << (bits - 1);
+ static_assert((mantissa_mask & exponent_mask & signbit_mask) == uint_t(0),
+ "error in masks");
+ static_assert((mantissa_mask | exponent_mask | signbit_mask) ==
+ uint_t(~uint_t(0)),
+ "error in masks");
+
+ // Re-interpret bit patterns
+ static real_t as_float(int_t x) {
+ real_t res;
+ std::memcpy(&res, &x, sizeof res);
+ return res;
}
-
- template<typename RV, typename V, typename E>
- V& set_elt(V& v, const int n, const E e)
- {
- const size_t s = sizeof(E);
- // assert(n>=0 and s*n<sizeof(V));
- std::memcpy(&((char*)&v)[s*n], &e, s);
- return v;
+ static int_t as_int(real_t x) {
+ int_t res;
+ std::memcpy(&res, &x, sizeof res);
+ return res;
}
-
+ static int_t replicate_byte(unsigned char byte) {
+ int_t res;
+ std::memset(&res, byte, sizeof res);
+ return res;
+ }
+
+ // Convert values (truncate)
+ static real_t convert_float(int_t x) { return real_t(x); }
+ static int_t convert_int(real_t x) { return int_t(x); }
+};
+
+// Properties of double
+template <> struct floatprops<double> : std::numeric_limits<double> {
+ typedef double real_t;
+ typedef vml_std::int64_t int_t;
+ typedef vml_std::uint64_t uint_t;
+
+ static char const *name() { return "double"; }
+
+ // Ensure the internal representation is what we expect
+ static_assert(is_signed, "real_t is not signed");
+ static_assert(radix == 2, "real_t is not binary");
+
+ // Ensure the sizes match
+ static_assert(sizeof(real_t) == sizeof(int_t), "int_t has wrong size");
+ static_assert(sizeof(real_t) == sizeof(uint_t), "uint_t has wrong size");
+
+ // Number of bits in internal representation
+ static int const bits = 8 * sizeof(real_t);
+ static int const mantissa_bits = digits - 1;
+ static int const signbit_bits = 1;
+ static int const exponent_bits = bits - mantissa_bits - signbit_bits;
+ static int const exponent_offset = 2 - min_exponent;
+ static_assert(mantissa_bits + exponent_bits + signbit_bits == bits,
+ "error in bit counts");
+ static uint_t const mantissa_mask = (uint_t(1) << mantissa_bits) - 1;
+ static uint_t const exponent_mask = ((uint_t(1) << exponent_bits) - 1)
+ << mantissa_bits;
+ static uint_t const signbit_mask = uint_t(1) << (bits - 1);
+ static_assert((mantissa_mask & exponent_mask & signbit_mask) == uint_t(0),
+ "error in masks");
+ static_assert((mantissa_mask | exponent_mask | signbit_mask) ==
+ uint_t(~uint_t(0)),
+ "error in masks");
+
+ // Re-interpret bit patterns
+ static real_t as_float(int_t x) {
+ real_t res;
+ std::memcpy(&res, &x, sizeof res);
+ return res;
+ }
+ static int_t as_int(real_t x) {
+ int_t res;
+ std::memcpy(&res, &x, sizeof res);
+ return res;
+ }
+ static int_t replicate_byte(unsigned char byte) {
+ int_t res;
+ std::memset(&res, byte, sizeof res);
+ return res;
+ }
+
+ // Convert values (truncate)
+ static real_t convert_float(int_t x) { return real_t(x); }
+ static int_t convert_int(real_t x) { return int_t(x); }
+};
+
+// We are adding the (unused) type RV here to avoid name mangling
+// problems. On some systems, the vector size does not enter into
+// the mangled name (!), leading to duplicate function definitions.
+template <typename RV, typename V, typename E>
+E get_elt(const V &v, const int n) {
+ const size_t s = sizeof(E);
+ E e;
+ // assert(n>=0 and s*n<sizeof(V));
+ std::memcpy(&e, &((const char *)&v)[s * n], s);
+ return e;
+}
+
+template <typename RV, typename V, typename E>
+V &set_elt(V &v, const int n, const E e) {
+ const size_t s = sizeof(E);
+ // assert(n>=0 and s*n<sizeof(V));
+ std::memcpy(&((char *)&v)[s * n], &e, s);
+ return v;
+}
+
} // namespace vecmathlib
-#endif // #ifndef FLOATPROPS_H
+#endif // #ifndef FLOATPROPS_H
OpenPOWER on IntegriCloud