diff options
Diffstat (limited to 'vec_pseudo.h')
-rw-r--r-- | vec_pseudo.h | 3052 |
1 files changed, 1438 insertions, 1614 deletions
diff --git a/vec_pseudo.h b/vec_pseudo.h index 2aafc23..c4cbbc1 100644 --- a/vec_pseudo.h +++ b/vec_pseudo.h @@ -12,1668 +12,1492 @@ #include <climits> #include <cstdlib> #ifndef VML_NO_IOSTREAM -# include <sstream> +#include <sstream> #endif #include <string> +namespace vecmathlib { +template <typename T, int N> struct boolpseudovec; +template <typename T, int N> struct intpseudovec; +template <typename T, int N> struct realpseudovec; -namespace vecmathlib { - - template<typename T, int N> struct boolpseudovec; - template<typename T, int N> struct intpseudovec; - template<typename T, int N> struct realpseudovec; - - - - template<typename T, int N> - struct boolpseudovec: floatprops<T> - { - typedef typename floatprops<T>::int_t int_t; - typedef typename floatprops<T>::uint_t uint_t; - typedef typename floatprops<T>::real_t real_t; - - static int const size = N; - typedef bool scalar_t; - typedef bool bvector_t[size]; - static int const alignment = sizeof(bool); - - typedef boolpseudovec boolvec_t; - typedef intpseudovec<real_t, size> intvec_t; - typedef realpseudovec<real_t, size> realvec_t; - - // Short names for type casts - typedef real_t R; - typedef int_t I; - typedef uint_t U; - typedef realvec_t RV; - typedef intvec_t IV; - typedef boolvec_t BV; - typedef floatprops<real_t> FP; - typedef mathfuncs<realvec_t> MF; - - - - bvector_t v; - - boolpseudovec() {} - // Can't have a non-trivial copy constructor; if so, objects won't - // be passed in registers - // boolpseudovec(boolpseudovec const& x): v(x.v) {} - // boolpseudovec& operator=(boolpseudovec const& x) { return v=x.v, *this; } - boolpseudovec(bool a) { for (int d=0; d<size; ++d) v[d]=a; } - boolpseudovec(bool const* as) { for (int d=0; d<size; ++d) v[d]=as[d]; } - - bool operator[](int n) const { return v[n]; } - boolvec_t& set_elt(int n, bool a) { return v[n]=a, *this; } - - - - intvec_t as_int() const; // defined after intpseudovec - intvec_t convert_int() const; // defined after intpseudovec - - - - boolvec_t operator!() const - { - boolvec_t res; - for (int d=0; d<size; ++d) res.v[d] = !v[d]; - return res; - } - - boolvec_t operator&&(boolvec_t x) const - { - boolvec_t res; - for (int d=0; d<size; ++d) res.v[d] = v[d] && x.v[d]; - return res; - } - boolvec_t operator||(boolvec_t x) const - { - boolvec_t res; - for (int d=0; d<size; ++d) res.v[d] = v[d] || x.v[d]; - return res; - } - boolvec_t operator==(boolvec_t x) const - { - boolvec_t res; - for (int d=0; d<size; ++d) res.v[d] = v[d] == x.v[d]; - return res; - } - boolvec_t operator!=(boolvec_t x) const - { - boolvec_t res; - for (int d=0; d<size; ++d) res.v[d] = v[d] != x.v[d]; - return res; - } - - bool all() const - { - bool res = v[0]; - for (int d=1; d<size; ++d) res = res && v[d]; - return res; - } - bool any() const - { - bool res = v[0]; - for (int d=1; d<size; ++d) res = res || v[d]; - return res; - } - - - - // ifthen(condition, then-value, else-value) - boolvec_t ifthen(boolvec_t x, boolvec_t y) const; - intvec_t ifthen(intvec_t x, intvec_t y) const; // defined after intpseudovec - realvec_t ifthen(realvec_t x, realvec_t y) const; // defined after realpseudovec - }; - - - - template<typename T, int N> - struct intpseudovec: floatprops<T> - { - typedef typename floatprops<T>::int_t int_t; - typedef typename floatprops<T>::uint_t uint_t; - typedef typename floatprops<T>::real_t real_t; - - static int const size = N; - typedef int_t scalar_t; - typedef int_t ivector_t[size]; - static int const alignment = sizeof(int_t); - - typedef boolpseudovec<real_t, size> boolvec_t; - typedef intpseudovec intvec_t; - typedef realpseudovec<real_t, size> realvec_t; - - // Short names for type casts - typedef real_t R; - typedef int_t I; - typedef uint_t U; - typedef realvec_t RV; - typedef intvec_t IV; - typedef boolvec_t BV; - typedef floatprops<real_t> FP; - typedef mathfuncs<realvec_t> MF; - - - - ivector_t v; - - intpseudovec() {} - // Can't have a non-trivial copy constructor; if so, objects won't - // be passed in registers - // intpseudovec(intpseudovec const& x): v(x.v) {} - // intpseudovec& operator=(intpseudovec const& x) { return v=x.v, *this; } - intpseudovec(int_t a) { for (int d=0; d<size; ++d) v[d]=a; } - intpseudovec(int_t const* as) { for (int d=0; d<size; ++d) v[d]=as[d]; } - static intvec_t iota() - { - intvec_t res; - for (int d=0; d<size; ++d) res.v[d]=d; - return res; - } - - int_t operator[](int n) const { return v[n]; } - intvec_t& set_elt(int n, int_t a) { return v[n]=a, *this; } - - - - boolvec_t as_bool() const - { - boolvec_t res; - for (int d=0; d<size; ++d) res.v[d]=v[d]; - return res; - } - boolvec_t convert_bool() const - { - // Result: convert_bool(0)=false, convert_bool(else)=true - boolvec_t res; - for (int d=0; d<size; ++d) res.v[d] = v[d]; - return res; - } - realvec_t as_float() const; // defined after realpseudovec - realvec_t convert_float() const; // defined after realpseudovec - - - - intvec_t operator+() const - { - intvec_t res; - for (int d=0; d<size; ++d) res.v[d] = + v[d]; - return res; - } - intvec_t operator-() const - { - intvec_t res; - for (int d=0; d<size; ++d) res.v[d] = - v[d]; - return res; - } - - intvec_t& operator+=(intvec_t const& x) - { - for (int d=0; d<size; ++d) v[d] += x.v[d]; - return *this; - } - intvec_t& operator-=(intvec_t const& x) - { - for (int d=0; d<size; ++d) v[d] -= x.v[d]; - return *this; - } - intvec_t& operator*=(intvec_t const& x) - { - for (int d=0; d<size; ++d) v[d] *= x.v[d]; - return *this; - } - intvec_t& operator/=(intvec_t const& x) - { - for (int d=0; d<size; ++d) v[d] /= x.v[d]; - return *this; - } - intvec_t& operator%=(intvec_t const& x) - { - for (int d=0; d<size; ++d) v[d] %= x.v[d]; - return *this; - } - - intvec_t operator+(intvec_t x) const - { - intvec_t res = *this; - return res += x; - } - intvec_t operator-(intvec_t x) const - { - intvec_t res = *this; - return res -= x; - } - intvec_t operator*(intvec_t x) const - { - intvec_t res = *this; - return res *= x; - } - intvec_t operator/(intvec_t x) const - { - intvec_t res = *this; - return res /= x; - } - intvec_t operator%(intvec_t x) const - { - intvec_t res = *this; - return res %= x; - } - - - - intvec_t operator~() const - { - intvec_t res; - for (int d=0; d<size; ++d) res.v[d] = ~ v[d]; - return res; - } - - intvec_t& operator&=(intvec_t const& x) - { - for (int d=0; d<size; ++d) v[d] &= x.v[d]; - return *this; - } - intvec_t& operator|=(intvec_t const& x) - { - for (int d=0; d<size; ++d) v[d] |= x.v[d]; - return *this; - } - intvec_t& operator^=(intvec_t const& x) - { - for (int d=0; d<size; ++d) v[d] ^= x.v[d]; - return *this; - } - - intvec_t operator&(intvec_t x) const - { - intvec_t res = *this; - return res &= x; - } - intvec_t operator|(intvec_t x) const - { - intvec_t res = *this; - return res |= x; - } - intvec_t operator^(intvec_t x) const - { - intvec_t res = *this; - return res ^= x; - } - - intvec_t bitifthen(intvec_t x, intvec_t y) const; - - - - intvec_t lsr(int_t n) const - { - intvec_t res; - for (int d=0; d<size; ++d) res.v[d] = I(U(v[d]) >> U(n)); - return res; - } - intvec_t rotate(int_t n) const; - intvec_t& operator>>=(int_t n) - { - for (int d=0; d<size; ++d) v[d] >>= n; - return *this; - } - intvec_t& operator<<=(int_t n) - { - for (int d=0; d<size; ++d) v[d] <<= n; - return *this; - } - intvec_t operator>>(int_t n) const - { - intvec_t res = *this; - return res >>= n; - } - intvec_t operator<<(int_t n) const - { - intvec_t res = *this; - return res <<= n; - } - - intvec_t lsr(intvec_t n) const - { - intvec_t res; - for (int d=0; d<size; ++d) res.v[d] = I(U(v[d]) >> U(n.v[d])); - return res; - } - intvec_t rotate(intvec_t n) const; - intvec_t& operator>>=(intvec_t n) - { - for (int d=0; d<size; ++d) v[d] >>= n.v[d]; - return *this; - } - intvec_t& operator<<=(intvec_t n) - { - for (int d=0; d<size; ++d) v[d] <<= n.v[d]; - return *this; - } - intvec_t operator>>(intvec_t n) const - { - intvec_t res = *this; - return res >>= n; - } - intvec_t operator<<(intvec_t n) const - { - intvec_t res = *this; - return res <<= n; - } - - intvec_t clz() const - { - intvec_t res; +template <typename T, int N> struct boolpseudovec : floatprops<T> { + typedef typename floatprops<T>::int_t int_t; + typedef typename floatprops<T>::uint_t uint_t; + typedef typename floatprops<T>::real_t real_t; + + static int const size = N; + typedef bool scalar_t; + typedef bool bvector_t[size]; + static int const alignment = sizeof(bool); + + typedef boolpseudovec boolvec_t; + typedef intpseudovec<real_t, size> intvec_t; + typedef realpseudovec<real_t, size> realvec_t; + + // Short names for type casts + typedef real_t R; + typedef int_t I; + typedef uint_t U; + typedef realvec_t RV; + typedef intvec_t IV; + typedef boolvec_t BV; + typedef floatprops<real_t> FP; + typedef mathfuncs<realvec_t> MF; + + bvector_t v; + + boolpseudovec() {} + // Can't have a non-trivial copy constructor; if so, objects won't + // be passed in registers + // boolpseudovec(boolpseudovec const& x): v(x.v) {} + // boolpseudovec& operator=(boolpseudovec const& x) { return v=x.v, *this; } + boolpseudovec(bool a) { + for (int d = 0; d < size; ++d) + v[d] = a; + } + boolpseudovec(bool const *as) { + for (int d = 0; d < size; ++d) + v[d] = as[d]; + } + + bool operator[](int n) const { return v[n]; } + boolvec_t &set_elt(int n, bool a) { return v[n] = a, *this; } + + intvec_t as_int() const; // defined after intpseudovec + intvec_t convert_int() const; // defined after intpseudovec + + boolvec_t operator!() const { + boolvec_t res; + for (int d = 0; d < size; ++d) + res.v[d] = !v[d]; + return res; + } + + boolvec_t operator&&(boolvec_t x) const { + boolvec_t res; + for (int d = 0; d < size; ++d) + res.v[d] = v[d] && x.v[d]; + return res; + } + boolvec_t operator||(boolvec_t x) const { + boolvec_t res; + for (int d = 0; d < size; ++d) + res.v[d] = v[d] || x.v[d]; + return res; + } + boolvec_t operator==(boolvec_t x) const { + boolvec_t res; + for (int d = 0; d < size; ++d) + res.v[d] = v[d] == x.v[d]; + return res; + } + boolvec_t operator!=(boolvec_t x) const { + boolvec_t res; + for (int d = 0; d < size; ++d) + res.v[d] = v[d] != x.v[d]; + return res; + } + + bool all() const { + bool res = v[0]; + for (int d = 1; d < size; ++d) + res = res && v[d]; + return res; + } + bool any() const { + bool res = v[0]; + for (int d = 1; d < size; ++d) + res = res || v[d]; + return res; + } + + // ifthen(condition, then-value, else-value) + boolvec_t ifthen(boolvec_t x, boolvec_t y) const; + intvec_t ifthen(intvec_t x, intvec_t y) const; // defined after intpseudovec + realvec_t ifthen(realvec_t x, + realvec_t y) const; // defined after realpseudovec +}; + +template <typename T, int N> struct intpseudovec : floatprops<T> { + typedef typename floatprops<T>::int_t int_t; + typedef typename floatprops<T>::uint_t uint_t; + typedef typename floatprops<T>::real_t real_t; + + static int const size = N; + typedef int_t scalar_t; + typedef int_t ivector_t[size]; + static int const alignment = sizeof(int_t); + + typedef boolpseudovec<real_t, size> boolvec_t; + typedef intpseudovec intvec_t; + typedef realpseudovec<real_t, size> realvec_t; + + // Short names for type casts + typedef real_t R; + typedef int_t I; + typedef uint_t U; + typedef realvec_t RV; + typedef intvec_t IV; + typedef boolvec_t BV; + typedef floatprops<real_t> FP; + typedef mathfuncs<realvec_t> MF; + + ivector_t v; + + intpseudovec() {} + // Can't have a non-trivial copy constructor; if so, objects won't + // be passed in registers + // intpseudovec(intpseudovec const& x): v(x.v) {} + // intpseudovec& operator=(intpseudovec const& x) { return v=x.v, *this; } + intpseudovec(int_t a) { + for (int d = 0; d < size; ++d) + v[d] = a; + } + intpseudovec(int_t const *as) { + for (int d = 0; d < size; ++d) + v[d] = as[d]; + } + static intvec_t iota() { + intvec_t res; + for (int d = 0; d < size; ++d) + res.v[d] = d; + return res; + } + + int_t operator[](int n) const { return v[n]; } + intvec_t &set_elt(int n, int_t a) { return v[n] = a, *this; } + + boolvec_t as_bool() const { + boolvec_t res; + for (int d = 0; d < size; ++d) + res.v[d] = v[d]; + return res; + } + boolvec_t convert_bool() const { + // Result: convert_bool(0)=false, convert_bool(else)=true + boolvec_t res; + for (int d = 0; d < size; ++d) + res.v[d] = v[d]; + return res; + } + realvec_t as_float() const; // defined after realpseudovec + realvec_t convert_float() const; // defined after realpseudovec + + intvec_t operator+() const { + intvec_t res; + for (int d = 0; d < size; ++d) + res.v[d] = +v[d]; + return res; + } + intvec_t operator-() const { + intvec_t res; + for (int d = 0; d < size; ++d) + res.v[d] = -v[d]; + return res; + } + + intvec_t &operator+=(intvec_t const &x) { + for (int d = 0; d < size; ++d) + v[d] += x.v[d]; + return *this; + } + intvec_t &operator-=(intvec_t const &x) { + for (int d = 0; d < size; ++d) + v[d] -= x.v[d]; + return *this; + } + intvec_t &operator*=(intvec_t const &x) { + for (int d = 0; d < size; ++d) + v[d] *= x.v[d]; + return *this; + } + intvec_t &operator/=(intvec_t const &x) { + for (int d = 0; d < size; ++d) + v[d] /= x.v[d]; + return *this; + } + intvec_t &operator%=(intvec_t const &x) { + for (int d = 0; d < size; ++d) + v[d] %= x.v[d]; + return *this; + } + + intvec_t operator+(intvec_t x) const { + intvec_t res = *this; + return res += x; + } + intvec_t operator-(intvec_t x) const { + intvec_t res = *this; + return res -= x; + } + intvec_t operator*(intvec_t x) const { + intvec_t res = *this; + return res *= x; + } + intvec_t operator/(intvec_t x) const { + intvec_t res = *this; + return res /= x; + } + intvec_t operator%(intvec_t x) const { + intvec_t res = *this; + return res %= x; + } + + intvec_t operator~() const { + intvec_t res; + for (int d = 0; d < size; ++d) + res.v[d] = ~v[d]; + return res; + } + + intvec_t &operator&=(intvec_t const &x) { + for (int d = 0; d < size; ++d) + v[d] &= x.v[d]; + return *this; + } + intvec_t &operator|=(intvec_t const &x) { + for (int d = 0; d < size; ++d) + v[d] |= x.v[d]; + return *this; + } + intvec_t &operator^=(intvec_t const &x) { + for (int d = 0; d < size; ++d) + v[d] ^= x.v[d]; + return *this; + } + + intvec_t operator&(intvec_t x) const { + intvec_t res = *this; + return res &= x; + } + intvec_t operator|(intvec_t x) const { + intvec_t res = *this; + return res |= x; + } + intvec_t operator^(intvec_t x) const { + intvec_t res = *this; + return res ^= x; + } + + intvec_t bitifthen(intvec_t x, intvec_t y) const; + + intvec_t lsr(int_t n) const { + intvec_t res; + for (int d = 0; d < size; ++d) + res.v[d] = I(U(v[d]) >> U(n)); + return res; + } + intvec_t rotate(int_t n) const; + intvec_t &operator>>=(int_t n) { + for (int d = 0; d < size; ++d) + v[d] >>= n; + return *this; + } + intvec_t &operator<<=(int_t n) { + for (int d = 0; d < size; ++d) + v[d] <<= n; + return *this; + } + intvec_t operator>>(int_t n) const { + intvec_t res = *this; + return res >>= n; + } + intvec_t operator<<(int_t n) const { + intvec_t res = *this; + return res <<= n; + } + + intvec_t lsr(intvec_t n) const { + intvec_t res; + for (int d = 0; d < size; ++d) + res.v[d] = I(U(v[d]) >> U(n.v[d])); + return res; + } + intvec_t rotate(intvec_t n) const; + intvec_t &operator>>=(intvec_t n) { + for (int d = 0; d < size; ++d) + v[d] >>= n.v[d]; + return *this; + } + intvec_t &operator<<=(intvec_t n) { + for (int d = 0; d < size; ++d) + v[d] <<= n.v[d]; + return *this; + } + intvec_t operator>>(intvec_t n) const { + intvec_t res = *this; + return res >>= n; + } + intvec_t operator<<(intvec_t n) const { + intvec_t res = *this; + return res <<= n; + } + + intvec_t clz() const { + intvec_t res; #if defined __clang__ || defined __gcc__ - for (int d=0; d<size; ++d) { - if (v[d] == 0) { - res.v[d] = CHAR_BIT * sizeof v[d]; + for (int d = 0; d < size; ++d) { + if (v[d] == 0) { + res.v[d] = CHAR_BIT * sizeof v[d]; + } else { + if (sizeof v[d] == sizeof(long long)) { + res.v[d] = __builtin_clzll(v[d]); + } else if (sizeof v[d] == sizeof(long)) { + res.v[d] = __builtin_clzl(v[d]); + } else if (sizeof v[d] == sizeof(int)) { + res.v[d] = __builtin_clz(v[d]); + } else if (sizeof v[d] == sizeof(short)) { + res.v[d] = __builtin_clzs(v[d]); + } else if (sizeof v[d] == sizeof(char)) { + res.v[d] = __builtin_clzs((unsigned short)(unsigned char)v[d]) - + CHAR_BIT * (sizeof(short) - sizeof(char)); } else { - if (sizeof v[d] == sizeof(long long)) { - res.v[d] = __builtin_clzll(v[d]); - } else if (sizeof v[d] == sizeof(long)) { - res.v[d] = __builtin_clzl(v[d]); - } else if (sizeof v[d] == sizeof(int)) { - res.v[d] = __builtin_clz(v[d]); - } else if (sizeof v[d] == sizeof(short)) { - res.v[d] = __builtin_clzs(v[d]); - } else if (sizeof v[d] == sizeof(char)) { - res.v[d] = - __builtin_clzs((unsigned short)(unsigned char)v[d]) - - CHAR_BIT * (sizeof(short) - sizeof(char)); - } else { - __builtin_unreachable(); - } + __builtin_unreachable(); } } + } #else - res = MF::vml_clz(*this); + res = MF::vml_clz(*this); #endif - return res; - } - intvec_t popcount() const - { - intvec_t res; + return res; + } + intvec_t popcount() const { + intvec_t res; #if defined __clang__ || defined __gcc__ - if (sizeof(int_t) == sizeof(long long)) { - for (int d=0; d<size; ++d) res.v[d] = __builtin_popcountll(v[d]); - } else if (sizeof(int_t) == sizeof(long)) { - for (int d=0; d<size; ++d) res.v[d] = __builtin_popcountl(v[d]); - } else if (sizeof(int_t) <= sizeof(int)) { - for (int d=0; d<size; ++d) res.v[d] = __builtin_popcount(v[d]); - } else { - __builtin_unreachable(); - } + if (sizeof(int_t) == sizeof(long long)) { + for (int d = 0; d < size; ++d) + res.v[d] = __builtin_popcountll(v[d]); + } else if (sizeof(int_t) == sizeof(long)) { + for (int d = 0; d < size; ++d) + res.v[d] = __builtin_popcountl(v[d]); + } else if (sizeof(int_t) <= sizeof(int)) { + for (int d = 0; d < size; ++d) + res.v[d] = __builtin_popcount(v[d]); + } else { + __builtin_unreachable(); + } #else - res = MF::vml_popcount(*this); + res = MF::vml_popcount(*this); #endif - return res; - } - - - - boolvec_t operator==(intvec_t const& x) const - { - boolvec_t res; - for (int d=0; d<size; ++d) res.v[d] = v[d] == x.v[d]; - return res; - } - boolvec_t operator!=(intvec_t const& x) const - { - boolvec_t res; - for (int d=0; d<size; ++d) res.v[d] = v[d] != x.v[d]; - return res; - } - boolvec_t operator<(intvec_t const& x) const - { - boolvec_t res; - for (int d=0; d<size; ++d) res.v[d] = v[d] < x.v[d]; - return res; - } - boolvec_t operator<=(intvec_t const& x) const - { - boolvec_t res; - for (int d=0; d<size; ++d) res.v[d] = v[d] <= x.v[d]; - return res; - } - boolvec_t operator>(intvec_t const& x) const - { - boolvec_t res; - for (int d=0; d<size; ++d) res.v[d] = v[d] > x.v[d]; - return res; - } - boolvec_t operator>=(intvec_t const& x) const - { - boolvec_t res; - for (int d=0; d<size; ++d) res.v[d] = v[d] >= x.v[d]; - return res; - } - - intvec_t abs() const - { - intvec_t res; - for (int d=0; d<size; ++d) res.v[d] = std::abs(v[d]); - return res; - } - - boolvec_t isignbit() const - { - boolvec_t res; - for (int d=0; d<size; ++d) res.v[d] = v[d] < 0; - return res; - } - - intvec_t max(intvec_t x) const - { - intvec_t res; - for (int d=0; d<size; ++d) res.v[d] = std::max(v[d], x.v[d]); - return res; - } - - intvec_t min(intvec_t x) const - { - intvec_t res; - for (int d=0; d<size; ++d) res.v[d] = std::min(v[d], x.v[d]); - return res; - } - }; - - - - template<typename T, int N> - struct realpseudovec: floatprops<T> - { - typedef typename floatprops<T>::int_t int_t; - typedef typename floatprops<T>::uint_t uint_t; - typedef typename floatprops<T>::real_t real_t; - - static int const size = N; - typedef real_t scalar_t; - typedef real_t vector_t[size]; - static int const alignment = sizeof(real_t); - + return res; + } + + boolvec_t operator==(intvec_t const &x) const { + boolvec_t res; + for (int d = 0; d < size; ++d) + res.v[d] = v[d] == x.v[d]; + return res; + } + boolvec_t operator!=(intvec_t const &x) const { + boolvec_t res; + for (int d = 0; d < size; ++d) + res.v[d] = v[d] != x.v[d]; + return res; + } + boolvec_t operator<(intvec_t const &x) const { + boolvec_t res; + for (int d = 0; d < size; ++d) + res.v[d] = v[d] < x.v[d]; + return res; + } + boolvec_t operator<=(intvec_t const &x) const { + boolvec_t res; + for (int d = 0; d < size; ++d) + res.v[d] = v[d] <= x.v[d]; + return res; + } + boolvec_t operator>(intvec_t const &x) const { + boolvec_t res; + for (int d = 0; d < size; ++d) + res.v[d] = v[d] > x.v[d]; + return res; + } + boolvec_t operator>=(intvec_t const &x) const { + boolvec_t res; + for (int d = 0; d < size; ++d) + res.v[d] = v[d] >= x.v[d]; + return res; + } + + intvec_t abs() const { + intvec_t res; + for (int d = 0; d < size; ++d) + res.v[d] = std::abs(v[d]); + return res; + } + + boolvec_t isignbit() const { + boolvec_t res; + for (int d = 0; d < size; ++d) + res.v[d] = v[d] < 0; + return res; + } + + intvec_t max(intvec_t x) const { + intvec_t res; + for (int d = 0; d < size; ++d) + res.v[d] = std::max(v[d], x.v[d]); + return res; + } + + intvec_t min(intvec_t x) const { + intvec_t res; + for (int d = 0; d < size; ++d) + res.v[d] = std::min(v[d], x.v[d]); + return res; + } +}; + +template <typename T, int N> struct realpseudovec : floatprops<T> { + typedef typename floatprops<T>::int_t int_t; + typedef typename floatprops<T>::uint_t uint_t; + typedef typename floatprops<T>::real_t real_t; + + static int const size = N; + typedef real_t scalar_t; + typedef real_t vector_t[size]; + static int const alignment = sizeof(real_t); + #ifndef VML_NO_IOSTREAM - static char const* name() - { - static std::string name_; - if (name_.empty()) { - std::stringstream buf; - buf << "<libm:" << N << "*" << FP::name() << ">"; - name_ = buf.str(); - } - return name_.c_str(); + static char const *name() { + static std::string name_; + if (name_.empty()) { + std::stringstream buf; + buf << "<libm:" << N << "*" << FP::name() << ">"; + name_ = buf.str(); } + return name_.c_str(); + } #endif - void barrier() - { + void barrier() { #if defined __GNUC__ && !defined __clang__ && !defined __ICC - // GCC crashes when +X is used as constraint -# if defined __SSE2__ - for (int d=0; d<size; ++d) __asm__("": "+x"(v[d])); -# elif defined __PPC64__ // maybe also __PPC__ - for (int d=0; d<size; ++d) __asm__("": "+f"(v[d])); -# elif defined __arm__ - for (int d=0; d<size; ++d) __asm__("": "+w"(v[d])); -# else -# error "Floating point barrier undefined on this architecture" -# endif +// GCC crashes when +X is used as constraint +#if defined __SSE2__ + for (int d = 0; d < size; ++d) + __asm__("" : "+x"(v[d])); +#elif defined __PPC64__ // maybe also __PPC__ + for (int d = 0; d < size; ++d) + __asm__("" : "+f"(v[d])); +#elif defined __arm__ + for (int d = 0; d < size; ++d) + __asm__("" : "+w"(v[d])); +#else +#error "Floating point barrier undefined on this architecture" +#endif #elif defined __clang__ - for (int d=0; d<size; ++d) __asm__("": "+X"(v[d])); + for (int d = 0; d < size; ++d) + __asm__("" : "+X"(v[d])); #elif defined __ICC - for (int d=0; d<size; ++d) { - real_t tmp = v[d]; - __asm__("": "+X"(tmp)); - v[d] = tmp; - } + for (int d = 0; d < size; ++d) { + real_t tmp = v[d]; + __asm__("" : "+X"(tmp)); + v[d] = tmp; + } #elif defined __IBMCPP__ - for (int d=0; d<size; ++d) __asm__("": "+f"(v[d])); + for (int d = 0; d < size; ++d) + __asm__("" : "+f"(v[d])); #else -# error "Floating point barrier undefined on this architecture" +#error "Floating point barrier undefined on this architecture" #endif - } - - typedef boolpseudovec<real_t, size> boolvec_t; - typedef intpseudovec<real_t, size> intvec_t; - typedef realpseudovec realvec_t; - - private: - boolvec_t mapb(bool f(real_t)) const - { - boolvec_t res; - for (int d=0; d<size; ++d) res.v[d] = f(v[d]); - return res; - } - intvec_t map(int_t f(real_t)) const - { - intvec_t res; - for (int d=0; d<size; ++d) res.v[d] = f(v[d]); - return res; - } - realvec_t map(real_t f(real_t)) const - { - realvec_t res; - for (int d=0; d<size; ++d) res.v[d] = f(v[d]); - return res; - } - realvec_t map(real_t f(real_t, int_t), intvec_t x) const - { - realvec_t res; - for (int d=0; d<size; ++d) res.v[d] = f(v[d], x.v[d]); - return res; - } - realvec_t map(real_t f(real_t, real_t), realvec_t x) const - { - realvec_t res; - for (int d=0; d<size; ++d) res.v[d] = f(v[d], x.v[d]); - return res; - } - realvec_t map(real_t f(real_t, real_t, real_t), - realvec_t x, realvec_t y) const - { - realvec_t res; - for (int d=0; d<size; ++d) res.v[d] = f(v[d], x.v[d], y.v[d]); - return res; - } - public: - - // Short names for type casts - typedef real_t R; - typedef int_t I; - typedef uint_t U; - typedef realvec_t RV; - typedef intvec_t IV; - typedef boolvec_t BV; - typedef floatprops<real_t> FP; - typedef mathfuncs<realvec_t> MF; - - - - vector_t v; - - realpseudovec() {} - // Can't have a non-trivial copy constructor; if so, objects won't - // be passed in registers - // realpseudovec(realpseudovec const& x): v(x.v) {} - // realpseudovec& operator=(realpseudovec const& x) { return v=x.v, *this; } - realpseudovec(real_t a) { for (int d=0; d<size; ++d) v[d]=a; } - realpseudovec(real_t const* as) { for (int d=0; d<size; ++d) v[d]=as[d]; } - - real_t operator[](int n) const { return v[n]; } - realvec_t& set_elt(int n, real_t a) { return v[n]=a, *this; } - - - - typedef vecmathlib::mask_t<realvec_t> mask_t; - - static realvec_t loada(real_t const* p) - { - VML_ASSERT(intptr_t(p) % alignment == 0); - return loadu(p); - } - static realvec_t loadu(real_t const* p) - { - realvec_t res; - for (int d=0; d<size; ++d) res.v[d] = p[d]; - return res; - } - static realvec_t loadu(real_t const* p, size_t ioff) - { - VML_ASSERT(intptr_t(p) % alignment == 0); - return loadu(p+ioff); - } - realvec_t loada(real_t const* p, mask_t const& m) const - { - return m.m.ifthen(loada(p), *this); - } - realvec_t loadu(real_t const* p, mask_t const& m) const - { - return m.m.ifthen(loadu(p), *this); - } - realvec_t loadu(real_t const* p, size_t ioff, mask_t const& m) const - { - return m.m.ifthen(loadu(p, ioff), *this); - } - - void storea(real_t* p) const - { - VML_ASSERT(intptr_t(p) % alignment == 0); - storeu(p); - } - void storeu(real_t* p) const - { - for (int d=0; d<size; ++d) p[d] = v[d]; - } - void storeu(real_t* p, size_t ioff) const - { - VML_ASSERT(intptr_t(p) % alignment == 0); - storeu(p+ioff); - } - void storea(real_t* p, mask_t const& m) const - { - VML_ASSERT(intptr_t(p) % alignment == 0); - storeu(p, m); - } - void storeu(real_t* p, mask_t const& m) const - { - for (int d=0; d<size; ++d) if (m.m[d]) p[d] = v[d]; - } - void storeu(real_t* p, size_t ioff, mask_t const& m) const - { - VML_ASSERT(intptr_t(p) % alignment == 0); - storeu(p+ioff, m); - } - - - - intvec_t as_int() const - { - intvec_t res; - for (int d=0; d<size; ++d) res.v[d] = FP::as_int(v[d]); - return res; - } - intvec_t convert_int() const - { - intvec_t res; - for (int d=0; d<size; ++d) res.v[d] = FP::convert_int(v[d]); - return res; - } - - - - realvec_t operator+() const - { - realvec_t res; - for (int d=0; d<size; ++d) res.v[d] = + v[d]; - return res; - } - realvec_t operator-() const - { - realvec_t res; - for (int d=0; d<size; ++d) res.v[d] = - v[d]; - return res; - } - - realvec_t& operator+=(realvec_t const& x) - { - for (int d=0; d<size; ++d) v[d] += x.v[d]; - return *this; - } - realvec_t& operator-=(realvec_t const& x) - { - for (int d=0; d<size; ++d) v[d] -= x.v[d]; - return *this; - } - realvec_t& operator*=(realvec_t const& x) - { - for (int d=0; d<size; ++d) v[d] *= x.v[d]; - return *this; - } - realvec_t& operator/=(realvec_t const& x) - { - for (int d=0; d<size; ++d) v[d] /= x.v[d]; - return *this; - } - - realvec_t operator+(realvec_t x) const - { - realvec_t res = *this; - return res += x; - } - realvec_t operator-(realvec_t x) const - { - realvec_t res = *this; - return res -= x; - } - realvec_t operator*(realvec_t x) const - { - realvec_t res = *this; - return res *= x; - } - realvec_t operator/(realvec_t x) const - { - realvec_t res = *this; - return res /= x; - } - - real_t maxval() const - { - real_t res = v[0]; - for (int d=1; d<size; ++d) res = vml_std::fmax(res, v[d]); - return res; - } - real_t minval() const - { - real_t res = v[0]; - for (int d=1; d<size; ++d) res = vml_std::fmin(res, v[d]); - return res; - } - real_t prod() const - { - real_t res = v[0]; - for (int d=1; d<size; ++d) res *= v[d]; - return res; - } - real_t sum() const - { - real_t res = v[0]; - for (int d=1; d<size; ++d) res += v[d]; - return res; - } - - - - boolvec_t operator==(realvec_t const& x) const - { - boolvec_t res; - for (int d=0; d<size; ++d) res.v[d] = v[d] == x.v[d]; - return res; - } - boolvec_t operator!=(realvec_t const& x) const - { - boolvec_t res; - for (int d=0; d<size; ++d) res.v[d] = v[d] != x.v[d]; - return res; - } - boolvec_t operator<(realvec_t const& x) const - { - boolvec_t res; - for (int d=0; d<size; ++d) res.v[d] = v[d] < x.v[d]; - return res; - } - boolvec_t operator<=(realvec_t const& x) const - { - boolvec_t res; - for (int d=0; d<size; ++d) res.v[d] = v[d] <= x.v[d]; - return res; - } - boolvec_t operator>(realvec_t const& x) const - { - boolvec_t res; - for (int d=0; d<size; ++d) res.v[d] = v[d] > x.v[d]; - return res; - } - boolvec_t operator>=(realvec_t const& x) const - { - boolvec_t res; - for (int d=0; d<size; ++d) res.v[d] = v[d] >= x.v[d]; - return res; - } - - - - realvec_t acos() const { return map(vml_std::acos); } - realvec_t acosh() const { return map(vml_std::acosh); } - realvec_t asin() const { return map(vml_std::asin); } - realvec_t asinh() const { return map(vml_std::asinh); } - realvec_t atan() const { return map(vml_std::atan); } - realvec_t atan2(realvec_t y) const - { - return MF::vml_atan2(*this, y); - } - realvec_t atanh() const { return map(vml_std::atanh); } - realvec_t cbrt() const { return map(vml_std::cbrt); } - realvec_t ceil() const { return map(vml_std::ceil); } - realvec_t copysign(realvec_t y) const - { - return map(vml_std::copysign, y); - } - realvec_t cos() const { return map(vml_std::cos); } - realvec_t cosh() const { return map(vml_std::cosh); } - realvec_t exp() const { return map(vml_std::exp); } - realvec_t exp10() const - { - realvec_t res; - for (int d=0; d<size; ++d) res.v[d] = vml_std::exp(R(M_LN10) * v[d]); - return res; - } - realvec_t exp2() const { return map(vml_std::exp2); } - realvec_t expm1() const { return map(vml_std::expm1); } - realvec_t fabs() const { return map(vml_std::fabs); } - realvec_t fdim(realvec_t y) const { return map(vml_std::fdim, y); } - realvec_t floor() const { return map(vml_std::floor); } - realvec_t fma(realvec_t y, realvec_t z) const - { - return map(vml_std::fma, y, z); - } - realvec_t fmax(realvec_t y) const { return map(vml_std::fmax, y); } - realvec_t fmin(realvec_t y) const { return map(vml_std::fmin, y); } - realvec_t fmod(realvec_t y) const { return map(vml_std::fmod, y); } - realvec_t frexp(intvec_t* ires) const - { - realvec_t res; - for (int d=0; d<size; ++d) { - int iri; - real_t r = vml_std::frexp(v[d], &iri); - int_t ir = iri; + } + + typedef boolpseudovec<real_t, size> boolvec_t; + typedef intpseudovec<real_t, size> intvec_t; + typedef realpseudovec realvec_t; + +private: + boolvec_t mapb(bool f(real_t)) const { + boolvec_t res; + for (int d = 0; d < size; ++d) + res.v[d] = f(v[d]); + return res; + } + intvec_t map(int_t f(real_t)) const { + intvec_t res; + for (int d = 0; d < size; ++d) + res.v[d] = f(v[d]); + return res; + } + realvec_t map(real_t f(real_t)) const { + realvec_t res; + for (int d = 0; d < size; ++d) + res.v[d] = f(v[d]); + return res; + } + realvec_t map(real_t f(real_t, int_t), intvec_t x) const { + realvec_t res; + for (int d = 0; d < size; ++d) + res.v[d] = f(v[d], x.v[d]); + return res; + } + realvec_t map(real_t f(real_t, real_t), realvec_t x) const { + realvec_t res; + for (int d = 0; d < size; ++d) + res.v[d] = f(v[d], x.v[d]); + return res; + } + realvec_t map(real_t f(real_t, real_t, real_t), realvec_t x, + realvec_t y) const { + realvec_t res; + for (int d = 0; d < size; ++d) + res.v[d] = f(v[d], x.v[d], y.v[d]); + return res; + } + +public: + // Short names for type casts + typedef real_t R; + typedef int_t I; + typedef uint_t U; + typedef realvec_t RV; + typedef intvec_t IV; + typedef boolvec_t BV; + typedef floatprops<real_t> FP; + typedef mathfuncs<realvec_t> MF; + + vector_t v; + + realpseudovec() {} + // Can't have a non-trivial copy constructor; if so, objects won't + // be passed in registers + // realpseudovec(realpseudovec const& x): v(x.v) {} + // realpseudovec& operator=(realpseudovec const& x) { return v=x.v, *this; } + realpseudovec(real_t a) { + for (int d = 0; d < size; ++d) + v[d] = a; + } + realpseudovec(real_t const *as) { + for (int d = 0; d < size; ++d) + v[d] = as[d]; + } + + real_t operator[](int n) const { return v[n]; } + realvec_t &set_elt(int n, real_t a) { return v[n] = a, *this; } + + typedef vecmathlib::mask_t<realvec_t> mask_t; + + static realvec_t loada(real_t const *p) { + VML_ASSERT(intptr_t(p) % alignment == 0); + return loadu(p); + } + static realvec_t loadu(real_t const *p) { + realvec_t res; + for (int d = 0; d < size; ++d) + res.v[d] = p[d]; + return res; + } + static realvec_t loadu(real_t const *p, size_t ioff) { + VML_ASSERT(intptr_t(p) % alignment == 0); + return loadu(p + ioff); + } + realvec_t loada(real_t const *p, mask_t const &m) const { + return m.m.ifthen(loada(p), *this); + } + realvec_t loadu(real_t const *p, mask_t const &m) const { + return m.m.ifthen(loadu(p), *this); + } + realvec_t loadu(real_t const *p, size_t ioff, mask_t const &m) const { + return m.m.ifthen(loadu(p, ioff), *this); + } + + void storea(real_t *p) const { + VML_ASSERT(intptr_t(p) % alignment == 0); + storeu(p); + } + void storeu(real_t *p) const { + for (int d = 0; d < size; ++d) + p[d] = v[d]; + } + void storeu(real_t *p, size_t ioff) const { + VML_ASSERT(intptr_t(p) % alignment == 0); + storeu(p + ioff); + } + void storea(real_t *p, mask_t const &m) const { + VML_ASSERT(intptr_t(p) % alignment == 0); + storeu(p, m); + } + void storeu(real_t *p, mask_t const &m) const { + for (int d = 0; d < size; ++d) + if (m.m[d]) + p[d] = v[d]; + } + void storeu(real_t *p, size_t ioff, mask_t const &m) const { + VML_ASSERT(intptr_t(p) % alignment == 0); + storeu(p + ioff, m); + } + + intvec_t as_int() const { + intvec_t res; + for (int d = 0; d < size; ++d) + res.v[d] = FP::as_int(v[d]); + return res; + } + intvec_t convert_int() const { + intvec_t res; + for (int d = 0; d < size; ++d) + res.v[d] = FP::convert_int(v[d]); + return res; + } + + realvec_t operator+() const { + realvec_t res; + for (int d = 0; d < size; ++d) + res.v[d] = +v[d]; + return res; + } + realvec_t operator-() const { + realvec_t res; + for (int d = 0; d < size; ++d) + res.v[d] = -v[d]; + return res; + } + + realvec_t &operator+=(realvec_t const &x) { + for (int d = 0; d < size; ++d) + v[d] += x.v[d]; + return *this; + } + realvec_t &operator-=(realvec_t const &x) { + for (int d = 0; d < size; ++d) + v[d] -= x.v[d]; + return *this; + } + realvec_t &operator*=(realvec_t const &x) { + for (int d = 0; d < size; ++d) + v[d] *= x.v[d]; + return *this; + } + realvec_t &operator/=(realvec_t const &x) { + for (int d = 0; d < size; ++d) + v[d] /= x.v[d]; + return *this; + } + + realvec_t operator+(realvec_t x) const { + realvec_t res = *this; + return res += x; + } + realvec_t operator-(realvec_t x) const { + realvec_t res = *this; + return res -= x; + } + realvec_t operator*(realvec_t x) const { + realvec_t res = *this; + return res *= x; + } + realvec_t operator/(realvec_t x) const { + realvec_t res = *this; + return res /= x; + } + + real_t maxval() const { + real_t res = v[0]; + for (int d = 1; d < size; ++d) + res = vml_std::fmax(res, v[d]); + return res; + } + real_t minval() const { + real_t res = v[0]; + for (int d = 1; d < size; ++d) + res = vml_std::fmin(res, v[d]); + return res; + } + real_t prod() const { + real_t res = v[0]; + for (int d = 1; d < size; ++d) + res *= v[d]; + return res; + } + real_t sum() const { + real_t res = v[0]; + for (int d = 1; d < size; ++d) + res += v[d]; + return res; + } + + boolvec_t operator==(realvec_t const &x) const { + boolvec_t res; + for (int d = 0; d < size; ++d) + res.v[d] = v[d] == x.v[d]; + return res; + } + boolvec_t operator!=(realvec_t const &x) const { + boolvec_t res; + for (int d = 0; d < size; ++d) + res.v[d] = v[d] != x.v[d]; + return res; + } + boolvec_t operator<(realvec_t const &x) const { + boolvec_t res; + for (int d = 0; d < size; ++d) + res.v[d] = v[d] < x.v[d]; + return res; + } + boolvec_t operator<=(realvec_t const &x) const { + boolvec_t res; + for (int d = 0; d < size; ++d) + res.v[d] = v[d] <= x.v[d]; + return res; + } + boolvec_t operator>(realvec_t const &x) const { + boolvec_t res; + for (int d = 0; d < size; ++d) + res.v[d] = v[d] > x.v[d]; + return res; + } + boolvec_t operator>=(realvec_t const &x) const { + boolvec_t res; + for (int d = 0; d < size; ++d) + res.v[d] = v[d] >= x.v[d]; + return res; + } + + realvec_t acos() const { return map(vml_std::acos); } + realvec_t acosh() const { return map(vml_std::acosh); } + realvec_t asin() const { return map(vml_std::asin); } + realvec_t asinh() const { return map(vml_std::asinh); } + realvec_t atan() const { return map(vml_std::atan); } + realvec_t atan2(realvec_t y) const { return MF::vml_atan2(*this, y); } + realvec_t atanh() const { return map(vml_std::atanh); } + realvec_t cbrt() const { return map(vml_std::cbrt); } + realvec_t ceil() const { return map(vml_std::ceil); } + realvec_t copysign(realvec_t y) const { return map(vml_std::copysign, y); } + realvec_t cos() const { return map(vml_std::cos); } + realvec_t cosh() const { return map(vml_std::cosh); } + realvec_t exp() const { return map(vml_std::exp); } + realvec_t exp10() const { + realvec_t res; + for (int d = 0; d < size; ++d) + res.v[d] = vml_std::exp(R(M_LN10) * v[d]); + return res; + } + realvec_t exp2() const { return map(vml_std::exp2); } + realvec_t expm1() const { return map(vml_std::expm1); } + realvec_t fabs() const { return map(vml_std::fabs); } + realvec_t fdim(realvec_t y) const { return map(vml_std::fdim, y); } + realvec_t floor() const { return map(vml_std::floor); } + realvec_t fma(realvec_t y, realvec_t z) const { + return map(vml_std::fma, y, z); + } + realvec_t fmax(realvec_t y) const { return map(vml_std::fmax, y); } + realvec_t fmin(realvec_t y) const { return map(vml_std::fmin, y); } + realvec_t fmod(realvec_t y) const { return map(vml_std::fmod, y); } + realvec_t frexp(intvec_t *ires) const { + realvec_t res; + for (int d = 0; d < size; ++d) { + int iri; + real_t r = vml_std::frexp(v[d], &iri); + int_t ir = iri; #if defined VML_HAVE_INF - if (vml_std::isinf(v[d])) ir = std::numeric_limits<int_t>::max(); + if (vml_std::isinf(v[d])) + ir = std::numeric_limits<int_t>::max(); #endif #if defined VML_HAVE_NAN - if (vml_std::isnan(v[d])) ir = std::numeric_limits<int_t>::min(); + if (vml_std::isnan(v[d])) + ir = std::numeric_limits<int_t>::min(); #endif - res.v[d] = r; - ires->v[d] = ir; - } - return res; + res.v[d] = r; + ires->v[d] = ir; } - realvec_t hypot(realvec_t y) const { return map(vml_std::hypot, y); } - intvec_t ilogb() const - { - intvec_t res; - for (int d=0; d<size; ++d) { - int_t r = vml_std::ilogb(v[d]); - typedef std::numeric_limits<int_t> NL; - if (FP_ILOGB0 != NL::min() and v[d] == R(0.0)) { - r = NL::min(); + return res; + } + realvec_t hypot(realvec_t y) const { return map(vml_std::hypot, y); } + intvec_t ilogb() const { + intvec_t res; + for (int d = 0; d < size; ++d) { + int_t r = vml_std::ilogb(v[d]); + typedef std::numeric_limits<int_t> NL; + if (FP_ILOGB0 != NL::min() and v[d] == R(0.0)) { + r = NL::min(); #if defined VML_HAVE_INF - } else if (INT_MAX != NL::max() and vml_std::isinf(v[d])) { - r = NL::max(); + } else if (INT_MAX != NL::max() and vml_std::isinf(v[d])) { + r = NL::max(); #endif #if defined VML_HAVE_NAN - } else if (FP_ILOGBNAN != NL::min() and vml_std::isnan(v[d])) { - r = NL::min(); + } else if (FP_ILOGBNAN != NL::min() and vml_std::isnan(v[d])) { + r = NL::min(); #endif - } - res.v[d] = r; } - return res; + res.v[d] = r; } - boolvec_t isfinite() const { return mapb(vml_std::isfinite); } - boolvec_t isinf() const { return mapb(vml_std::isinf); } - boolvec_t isnan() const { return mapb(vml_std::isnan); } - boolvec_t isnormal() const { return mapb(vml_std::isnormal); } - realvec_t ldexp(int_t n) const - { - realvec_t res; - for (int d=0; d<size; ++d) res.v[d] = vml_std::ldexp(v[d], n); - return res; - } - realvec_t ldexp(intvec_t n) const - { - realvec_t res; - for (int d=0; d<size; ++d) res.v[d] = vml_std::ldexp(v[d], n.v[d]); - return res; - } - realvec_t log() const { return map(vml_std::log); } - realvec_t log10() const { return map(vml_std::log10); } - realvec_t log1p() const { return map(vml_std::log1p); } - realvec_t log2() const { return map(vml_std::log2); } - intvec_t lrint() const - { - realvec_t res; - if (sizeof(int_t) <= sizeof(long)) { - for (int d=0; d<size; ++d) res.v[d] = vml_std::lrint(v[d]); - } else if (sizeof(int_t) <= sizeof(long long)) { - for (int d=0; d<size; ++d) res.v[d] = vml_std::llrint(v[d]); - } else { - __builtin_unreachable(); - } - return res; - } - realvec_t mad(realvec_t y, realvec_t z) const - { - return MF::vml_mad(*this, y, z); - } - realvec_t nextafter(realvec_t y) const - { - return map(vml_std::nextafter, y); - } - realvec_t pow(realvec_t y) const { return map(vml_std::pow, y); } - realvec_t rcp() const - { - realvec_t res; - for (int d=0; d<size; ++d) res.v[d] = R(1.0) / v[d]; - return res; - } - realvec_t remainder(realvec_t y) const - { - return map(vml_std::remainder, y); - } - realvec_t rint() const { return map(vml_std::rint); } - realvec_t round() const { return map(vml_std::round); } - realvec_t rsqrt() const { return sqrt().rcp(); } - boolvec_t signbit() const { return mapb(vml_std::signbit); } - realvec_t sin() const { return map(vml_std::sin); } - realvec_t sinh() const { return map(vml_std::sinh); } - realvec_t sqrt() const { return map(vml_std::sqrt); } - realvec_t tan() const { return map(vml_std::tan); } - realvec_t tanh() const { return map(vml_std::tanh); } - realvec_t trunc() const { return map(vml_std::trunc); } - }; - - - - // boolpseudovec definitions - - template<typename T, int N> - inline - typename boolpseudovec<T,N>::intvec_t boolpseudovec<T,N>::as_int() const - { - return convert_int(); - } - - template<typename T, int N> - inline - typename boolpseudovec<T,N>::intvec_t boolpseudovec<T,N>::convert_int() const - { - intvec_t res; - for (int d=0; d<size; ++d) res.v[d] = v[d]; return res; } - - template<typename T, int N> - inline - typename boolpseudovec<T,N>::boolvec_t - boolpseudovec<T,N>::ifthen(boolvec_t x, boolvec_t y) const - { - boolvec_t res; - for (int d=0; d<size; ++d) res.v[d] = v[d] ? x.v[d] : y.v[d]; - return res; - } - - template<typename T, int N> - inline - typename boolpseudovec<T,N>::intvec_t - boolpseudovec<T,N>::ifthen(intvec_t x, intvec_t y) const - { - intvec_t res; - for (int d=0; d<size; ++d) res.v[d] = v[d] ? x.v[d] : y.v[d]; + boolvec_t isfinite() const { return mapb(vml_std::isfinite); } + boolvec_t isinf() const { return mapb(vml_std::isinf); } + boolvec_t isnan() const { return mapb(vml_std::isnan); } + boolvec_t isnormal() const { return mapb(vml_std::isnormal); } + realvec_t ldexp(int_t n) const { + realvec_t res; + for (int d = 0; d < size; ++d) + res.v[d] = vml_std::ldexp(v[d], n); return res; } - - template<typename T, int N> - inline - typename boolpseudovec<T,N>::realvec_t - boolpseudovec<T,N>::ifthen(realvec_t x, realvec_t y) const - { + realvec_t ldexp(intvec_t n) const { realvec_t res; - for (int d=0; d<size; ++d) res.v[d] = v[d] ? x.v[d] : y.v[d]; + for (int d = 0; d < size; ++d) + res.v[d] = vml_std::ldexp(v[d], n.v[d]); return res; } - - - - // intpseudovec definitions - - template<typename T, int N> - inline - typename intpseudovec<T,N>::realvec_t intpseudovec<T,N>::as_float() const - { + realvec_t log() const { return map(vml_std::log); } + realvec_t log10() const { return map(vml_std::log10); } + realvec_t log1p() const { return map(vml_std::log1p); } + realvec_t log2() const { return map(vml_std::log2); } + intvec_t lrint() const { realvec_t res; - for (int d=0; d<size; ++d) res.v[d] = FP::as_float(v[d]); + if (sizeof(int_t) <= sizeof(long)) { + for (int d = 0; d < size; ++d) + res.v[d] = vml_std::lrint(v[d]); + } else if (sizeof(int_t) <= sizeof(long long)) { + for (int d = 0; d < size; ++d) + res.v[d] = vml_std::llrint(v[d]); + } else { + __builtin_unreachable(); + } return res; } - - template<typename T, int N> - inline - intpseudovec<T,N> intpseudovec<T,N>::bitifthen(intvec_t x, intvec_t y) const - { - return MF::vml_bitifthen(*this, x, y); - } - - template<typename T, int N> - inline - typename intpseudovec<T,N>::realvec_t intpseudovec<T,N>::convert_float() const - { + realvec_t mad(realvec_t y, realvec_t z) const { + return MF::vml_mad(*this, y, z); + } + realvec_t nextafter(realvec_t y) const { return map(vml_std::nextafter, y); } + realvec_t pow(realvec_t y) const { return map(vml_std::pow, y); } + realvec_t rcp() const { realvec_t res; - for (int d=0; d<size; ++d) res.v[d] = FP::convert_float(v[d]); + for (int d = 0; d < size; ++d) + res.v[d] = R(1.0) / v[d]; return res; } - - template<typename T, int N> - inline intpseudovec<T,N> intpseudovec<T,N>::rotate(int_t n) const - { - return MF::vml_rotate(*this, n); - } - - template<typename T, int N> - inline intpseudovec<T,N> intpseudovec<T,N>::rotate(intvec_t n) const - { - return MF::vml_rotate(*this, n); - } - - - - // Wrappers - - // boolpseudovec wrappers - - template<typename real_t, int size> - inline intpseudovec<real_t, size> as_int(boolpseudovec<real_t, size> x) - { - return x.as_int(); - } - - template<typename real_t, int size> - inline intpseudovec<real_t, size> convert_int(boolpseudovec<real_t, size> x) - { - return x.convert_int(); - } - - template<typename real_t, int size> - inline bool all(boolpseudovec<real_t, size> x) { return x.all(); } - - template<typename real_t, int size> - inline bool any(boolpseudovec<real_t, size> x) { return x.any(); } - - template<typename real_t, int size> - inline - boolpseudovec<real_t, size> ifthen(boolpseudovec<real_t, size> c, - boolpseudovec<real_t, size> x, - boolpseudovec<real_t, size> y) - { - return c.ifthen(x, y); - } - - template<typename real_t, int size> - inline - intpseudovec<real_t, size> ifthen(boolpseudovec<real_t, size> c, - intpseudovec<real_t, size> x, - intpseudovec<real_t, size> y) - { - return c.ifthen(x, y); - } - - template<typename real_t, int size> - inline - realpseudovec<real_t, size> ifthen(boolpseudovec<real_t, size> c, - realpseudovec<real_t, size> x, - realpseudovec<real_t, size> y) - { - return c.ifthen(x, y); - } - - - - // intpseudovec wrappers - - template<typename real_t, int size> - inline intpseudovec<real_t, size> abs(intpseudovec<real_t, size> x) - { - return x.abs(); - } - - template<typename real_t, int size> - inline boolpseudovec<real_t, size> as_bool(intpseudovec<real_t, size> x) - { - return x.as_bool(); - } - - template<typename real_t, int size> - inline realpseudovec<real_t, size> as_float(intpseudovec<real_t, size> x) - { - return x.as_float(); - } - - template<typename real_t, int size> - inline intpseudovec<real_t, size> bitifthen(intpseudovec<real_t, size> x, - intpseudovec<real_t, size> y, - intpseudovec<real_t, size> z) - { - return x.bitifthen(y, z); - } - - template<typename real_t, int size> - inline intpseudovec<real_t, size> clz(intpseudovec<real_t, size> x) - { - return x.clz(); - } - - template<typename real_t, int size> - inline boolpseudovec<real_t, size> convert_bool(intpseudovec<real_t, size> x) - { - return x.convert_bool(); - } - - template<typename real_t, int size> - inline realpseudovec<real_t, size> convert_float(intpseudovec<real_t, size> x) - { - return x.convert_float(); - } - - template<typename real_t, int size> - inline boolpseudovec<real_t, size> isignbit(intpseudovec<real_t, size> x) - { - return x.isignbit(); - } - - template<typename real_t, int size> - inline - intpseudovec<real_t, size> lsr(intpseudovec<real_t, size> x, - typename intpseudovec<real_t, size>::int_t n) - { - return x.lsr(n); - } - - template<typename real_t, int size> - inline intpseudovec<real_t, size> lsr(intpseudovec<real_t, size> x, - intpseudovec<real_t, size> n) - { - return x.lsr(n); - } - - template<typename real_t, int size> - inline intpseudovec<real_t, size> max(intpseudovec<real_t, size> x, - intpseudovec<real_t, size> y) - { - return x.max(y); - } - - template<typename real_t, int size> - inline intpseudovec<real_t, size> min(intpseudovec<real_t, size> x, - intpseudovec<real_t, size> y) - { - return x.min(y); - } - - template<typename real_t, int size> - inline intpseudovec<real_t, size> popcount(intpseudovec<real_t, size> x) - { - return x.popcount(); - } - - template<typename real_t, int size> - inline - intpseudovec<real_t, size> rotate(intpseudovec<real_t, size> x, - typename - intpseudovec<real_t, size>::int_t n) - { - return x.rotate(n); - } - - template<typename real_t, int size> - inline intpseudovec<real_t, size> rotate(intpseudovec<real_t, size> x, - intpseudovec<real_t, size> n) - { - return x.rotate(n); - } - - - - // realpseudovec wrappers - - template<typename real_t, int size> - inline realpseudovec<real_t, size> - loada(real_t const* p, - realpseudovec<real_t, size> x, - typename realpseudovec<real_t, size>::mask_t const& m) - { - return x.loada(p, m); - } - - template<typename real_t, int size> - inline realpseudovec<real_t, size> - loadu(real_t const* p, - realpseudovec<real_t, size> x, - typename realpseudovec<real_t, size>::mask_t const& m) - { - return x.loadu(p, m); - } - - template<typename real_t, int size> - inline realpseudovec<real_t, size> - loadu(real_t const* p, size_t ioff, - realpseudovec<real_t, size> x, - typename realpseudovec<real_t, size>::mask_t const& m) - { - return x.loadu(p, ioff, m); - } - - template<typename real_t, int size> - inline void storea(realpseudovec<real_t, size> x, real_t* p) - { - return x.storea(p); - } - - template<typename real_t, int size> - inline void storeu(realpseudovec<real_t, size> x, real_t* p) - { - return x.storeu(p); - } - - template<typename real_t, int size> - inline void storeu(realpseudovec<real_t, size> x, real_t* p, size_t ioff) - { - return x.storeu(p, ioff); - } - - template<typename real_t, int size> - inline void storea(realpseudovec<real_t, size> x, real_t* p, - typename realpseudovec<real_t, size>::mask_t const& m) - { - return x.storea(p, m); - } - - template<typename real_t, int size> - inline void storeu(realpseudovec<real_t, size> x, real_t* p, - typename realpseudovec<real_t, size>::mask_t const& m) - { - return x.storeu(p, m); - } - - template<typename real_t, int size> - inline void storeu(realpseudovec<real_t, size> x, real_t* p, size_t ioff, - typename realpseudovec<real_t, size>::mask_t const& m) - { - return x.storeu(p, ioff, m); - } - - - - template<typename real_t, int size> - inline intpseudovec<real_t, size> as_int(realpseudovec<real_t, size> x) - { - return x.as_int(); - } - - template<typename real_t, int size> - inline intpseudovec<real_t, size> convert_int(realpseudovec<real_t, size> x) - { - return x.convert_int(); - } - - template<typename real_t, int size> - inline real_t maxval(realpseudovec<real_t, size> x) - { - return x.maxval(); - } - - template<typename real_t, int size> - inline real_t minval(realpseudovec<real_t, size> x) - { - return x.minval(); - } - - template<typename real_t, int size> - inline real_t prod(realpseudovec<real_t, size> x) - { - return x.prod(); - } - - template<typename real_t, int size> - inline real_t sum(realpseudovec<real_t, size> x) - { - return x.sum(); - } - - - - template<typename real_t, int size> - inline realpseudovec<real_t, size> acos(realpseudovec<real_t, size> x) - { - return x.acos(); - } - - template<typename real_t, int size> - inline realpseudovec<real_t, size> acosh(realpseudovec<real_t, size> x) - { - return x.acosh(); - } - - template<typename real_t, int size> - inline realpseudovec<real_t, size> asin(realpseudovec<real_t, size> x) - { - return x.asin(); - } - - template<typename real_t, int size> - inline realpseudovec<real_t, size> asinh(realpseudovec<real_t, size> x) - { - return x.asinh(); - } - - template<typename real_t, int size> - inline realpseudovec<real_t, size> atan(realpseudovec<real_t, size> x) - { - return x.atan(); - } - - template<typename real_t, int size> - inline realpseudovec<real_t, size> atan2(realpseudovec<real_t, size> x, - realpseudovec<real_t, size> y) - { - return x.atan2(y); - } - - template<typename real_t, int size> - inline realpseudovec<real_t, size> atanh(realpseudovec<real_t, size> x) - { - return x.atanh(); - } - - template<typename real_t, int size> - inline realpseudovec<real_t, size> cbrt(realpseudovec<real_t, size> x) - { - return x.cbrt(); - } - - template<typename real_t, int size> - inline realpseudovec<real_t, size> ceil(realpseudovec<real_t, size> x) - { - return x.ceil(); - } - - template<typename real_t, int size> - inline realpseudovec<real_t, size> copysign(realpseudovec<real_t, size> x, - realpseudovec<real_t, size> y) - { - return x.copysign(y); - } - - template<typename real_t, int size> - inline realpseudovec<real_t, size> cos(realpseudovec<real_t, size> x) - { - return x.cos(); - } - - template<typename real_t, int size> - inline realpseudovec<real_t, size> cosh(realpseudovec<real_t, size> x) - { - return x.cosh(); - } - - template<typename real_t, int size> - inline realpseudovec<real_t, size> exp(realpseudovec<real_t, size> x) - { - return x.exp(); - } - - template<typename real_t, int size> - inline realpseudovec<real_t, size> exp10(realpseudovec<real_t, size> x) - { - return x.exp10(); - } - - template<typename real_t, int size> - inline realpseudovec<real_t, size> exp2(realpseudovec<real_t, size> x) - { - return x.exp2(); - } - - template<typename real_t, int size> - inline realpseudovec<real_t, size> expm1(realpseudovec<real_t, size> x) - { - return x.expm1(); - } - - template<typename real_t, int size> - inline realpseudovec<real_t, size> fabs(realpseudovec<real_t, size> x) - { - return x.fabs(); - } - - template<typename real_t, int size> - inline realpseudovec<real_t, size> floor(realpseudovec<real_t, size> x) - { - return x.floor(); - } - - template<typename real_t, int size> - inline realpseudovec<real_t, size> fdim(realpseudovec<real_t, size> x, - realpseudovec<real_t, size> y) - { - return x.fdim(y); - } - - template<typename real_t, int size> - inline realpseudovec<real_t, size> fma(realpseudovec<real_t, size> x, - realpseudovec<real_t, size> y, - realpseudovec<real_t, size> z) - { - return x.fma(y, z); - } - - template<typename real_t, int size> - inline realpseudovec<real_t, size> fmax(realpseudovec<real_t, size> x, - realpseudovec<real_t, size> y) - { - return x.fmax(y); - } - - template<typename real_t, int size> - inline realpseudovec<real_t, size> fmin(realpseudovec<real_t, size> x, - realpseudovec<real_t, size> y) - { - return x.fmin(y); - } - - template<typename real_t, int size> - inline realpseudovec<real_t, size> fmod(realpseudovec<real_t, size> x, - realpseudovec<real_t, size> y) - { - return x.fmod(y); - } - - template<typename real_t, int size> - inline realpseudovec<real_t, size> frexp(realpseudovec<real_t, size> x, - intpseudovec<real_t, size>* r) - { - return x.frexp(r); - } - - template<typename real_t, int size> - inline realpseudovec<real_t, size> hypot(realpseudovec<real_t, size> x, - realpseudovec<real_t, size> y) - { - return x.hypot(y); - } - - template<typename real_t, int size> - inline intpseudovec<real_t, size> ilogb(realpseudovec<real_t, size> x) - { - return x.ilogb(); - } - - template<typename real_t, int size> - inline boolpseudovec<real_t, size> isfinite(realpseudovec<real_t, size> x) - { - return x.isfinite(); - } - - template<typename real_t, int size> - inline boolpseudovec<real_t, size> isinf(realpseudovec<real_t, size> x) - { - return x.isinf(); - } - - template<typename real_t, int size> - inline boolpseudovec<real_t, size> isnan(realpseudovec<real_t, size> x) - { - return x.isnan(); - } - - template<typename real_t, int size> - inline boolpseudovec<real_t, size> isnormal(realpseudovec<real_t, size> x) - { - return x.isnormal(); - } - - template<typename real_t, int size> - inline - realpseudovec<real_t, size> ldexp(realpseudovec<real_t, size> x, - typename intpseudovec<real_t, size>::int_t - n) - { - return x.ldexp(n); - } - - template<typename real_t, int size> - inline - realpseudovec<real_t, size> ldexp(realpseudovec<real_t, size> x, - intpseudovec<real_t, size> n) - { - return x.ldexp(n); - } - - template<typename real_t, int size> - inline realpseudovec<real_t, size> log(realpseudovec<real_t, size> x) - { - return x.log(); - } - - template<typename real_t, int size> - inline realpseudovec<real_t, size> log10(realpseudovec<real_t, size> x) - { - return x.log10(); - } - - template<typename real_t, int size> - inline realpseudovec<real_t, size> log1p(realpseudovec<real_t, size> x) - { - return x.log1p(); - } - - template<typename real_t, int size> - inline realpseudovec<real_t, size> log2(realpseudovec<real_t, size> x) - { - return x.log2(); - } - - template<typename real_t, int size> - inline intpseudovec<real_t, size> lrint(realpseudovec<real_t, size> x) - { - return x.lrint(); - } - - template<typename real_t, int size> - inline realpseudovec<real_t, size> mad(realpseudovec<real_t, size> x, - realpseudovec<real_t, size> y, - realpseudovec<real_t, size> z) - { - return x.mad(y, z); - } - - template<typename real_t, int size> - inline realpseudovec<real_t, size> nextafter(realpseudovec<real_t, size> x, - realpseudovec<real_t, size> y) - { - return x.nextafter(y); - } - - template<typename real_t, int size> - inline realpseudovec<real_t, size> pow(realpseudovec<real_t, size> x, - realpseudovec<real_t, size> y) - { - return x.pow(y); - } - - template<typename real_t, int size> - inline realpseudovec<real_t, size> rcp(realpseudovec<real_t, size> x) - { - return x.rcp(); - } - - template<typename real_t, int size> - inline realpseudovec<real_t, size> remainder(realpseudovec<real_t, size> x, - realpseudovec<real_t, size> y) - { - return x.remainder(y); - } - - template<typename real_t, int size> - inline realpseudovec<real_t, size> rint(realpseudovec<real_t, size> x) - { - return x.rint(); - } - - template<typename real_t, int size> - inline realpseudovec<real_t, size> round(realpseudovec<real_t, size> x) - { - return x.round(); - } - - template<typename real_t, int size> - inline realpseudovec<real_t, size> rsqrt(realpseudovec<real_t, size> x) - { - return x.rsqrt(); - } - - template<typename real_t, int size> - inline boolpseudovec<real_t, size> signbit(realpseudovec<real_t, size> x) - { - return x.signbit(); - } - - template<typename real_t, int size> - inline realpseudovec<real_t, size> sin(realpseudovec<real_t, size> x) - { - return x.sin(); - } - - template<typename real_t, int size> - inline realpseudovec<real_t, size> sinh(realpseudovec<real_t, size> x) - { - return x.sinh(); - } - - template<typename real_t, int size> - inline realpseudovec<real_t, size> sqrt(realpseudovec<real_t, size> x) - { - return x.sqrt(); - } - - template<typename real_t, int size> - inline realpseudovec<real_t, size> tan(realpseudovec<real_t, size> x) - { - return x.tan(); - } - - template<typename real_t, int size> - inline realpseudovec<real_t, size> tanh(realpseudovec<real_t, size> x) - { - return x.tanh(); - } - - template<typename real_t, int size> - inline realpseudovec<real_t, size> trunc(realpseudovec<real_t, size> x) - { - return x.trunc(); - } - - - + realvec_t remainder(realvec_t y) const { return map(vml_std::remainder, y); } + realvec_t rint() const { return map(vml_std::rint); } + realvec_t round() const { return map(vml_std::round); } + realvec_t rsqrt() const { return sqrt().rcp(); } + boolvec_t signbit() const { return mapb(vml_std::signbit); } + realvec_t sin() const { return map(vml_std::sin); } + realvec_t sinh() const { return map(vml_std::sinh); } + realvec_t sqrt() const { return map(vml_std::sqrt); } + realvec_t tan() const { return map(vml_std::tan); } + realvec_t tanh() const { return map(vml_std::tanh); } + realvec_t trunc() const { return map(vml_std::trunc); } +}; + +// boolpseudovec definitions + +template <typename T, int N> +inline typename boolpseudovec<T, N>::intvec_t +boolpseudovec<T, N>::as_int() const { + return convert_int(); +} + +template <typename T, int N> +inline typename boolpseudovec<T, N>::intvec_t +boolpseudovec<T, N>::convert_int() const { + intvec_t res; + for (int d = 0; d < size; ++d) + res.v[d] = v[d]; + return res; +} + +template <typename T, int N> +inline typename boolpseudovec<T, N>::boolvec_t +boolpseudovec<T, N>::ifthen(boolvec_t x, boolvec_t y) const { + boolvec_t res; + for (int d = 0; d < size; ++d) + res.v[d] = v[d] ? x.v[d] : y.v[d]; + return res; +} + +template <typename T, int N> +inline typename boolpseudovec<T, N>::intvec_t +boolpseudovec<T, N>::ifthen(intvec_t x, intvec_t y) const { + intvec_t res; + for (int d = 0; d < size; ++d) + res.v[d] = v[d] ? x.v[d] : y.v[d]; + return res; +} + +template <typename T, int N> +inline typename boolpseudovec<T, N>::realvec_t +boolpseudovec<T, N>::ifthen(realvec_t x, realvec_t y) const { + realvec_t res; + for (int d = 0; d < size; ++d) + res.v[d] = v[d] ? x.v[d] : y.v[d]; + return res; +} + +// intpseudovec definitions + +template <typename T, int N> +inline typename intpseudovec<T, N>::realvec_t +intpseudovec<T, N>::as_float() const { + realvec_t res; + for (int d = 0; d < size; ++d) + res.v[d] = FP::as_float(v[d]); + return res; +} + +template <typename T, int N> +inline intpseudovec<T, N> intpseudovec<T, N>::bitifthen(intvec_t x, + intvec_t y) const { + return MF::vml_bitifthen(*this, x, y); +} + +template <typename T, int N> +inline typename intpseudovec<T, N>::realvec_t +intpseudovec<T, N>::convert_float() const { + realvec_t res; + for (int d = 0; d < size; ++d) + res.v[d] = FP::convert_float(v[d]); + return res; +} + +template <typename T, int N> +inline intpseudovec<T, N> intpseudovec<T, N>::rotate(int_t n) const { + return MF::vml_rotate(*this, n); +} + +template <typename T, int N> +inline intpseudovec<T, N> intpseudovec<T, N>::rotate(intvec_t n) const { + return MF::vml_rotate(*this, n); +} + +// Wrappers + +// boolpseudovec wrappers + +template <typename real_t, int size> +inline intpseudovec<real_t, size> as_int(boolpseudovec<real_t, size> x) { + return x.as_int(); +} + +template <typename real_t, int size> +inline intpseudovec<real_t, size> convert_int(boolpseudovec<real_t, size> x) { + return x.convert_int(); +} + +template <typename real_t, int size> +inline bool all(boolpseudovec<real_t, size> x) { + return x.all(); +} + +template <typename real_t, int size> +inline bool any(boolpseudovec<real_t, size> x) { + return x.any(); +} + +template <typename real_t, int size> +inline boolpseudovec<real_t, size> ifthen(boolpseudovec<real_t, size> c, + boolpseudovec<real_t, size> x, + boolpseudovec<real_t, size> y) { + return c.ifthen(x, y); +} + +template <typename real_t, int size> +inline intpseudovec<real_t, size> ifthen(boolpseudovec<real_t, size> c, + intpseudovec<real_t, size> x, + intpseudovec<real_t, size> y) { + return c.ifthen(x, y); +} + +template <typename real_t, int size> +inline realpseudovec<real_t, size> ifthen(boolpseudovec<real_t, size> c, + realpseudovec<real_t, size> x, + realpseudovec<real_t, size> y) { + return c.ifthen(x, y); +} + +// intpseudovec wrappers + +template <typename real_t, int size> +inline intpseudovec<real_t, size> abs(intpseudovec<real_t, size> x) { + return x.abs(); +} + +template <typename real_t, int size> +inline boolpseudovec<real_t, size> as_bool(intpseudovec<real_t, size> x) { + return x.as_bool(); +} + +template <typename real_t, int size> +inline realpseudovec<real_t, size> as_float(intpseudovec<real_t, size> x) { + return x.as_float(); +} + +template <typename real_t, int size> +inline intpseudovec<real_t, size> bitifthen(intpseudovec<real_t, size> x, + intpseudovec<real_t, size> y, + intpseudovec<real_t, size> z) { + return x.bitifthen(y, z); +} + +template <typename real_t, int size> +inline intpseudovec<real_t, size> clz(intpseudovec<real_t, size> x) { + return x.clz(); +} + +template <typename real_t, int size> +inline boolpseudovec<real_t, size> convert_bool(intpseudovec<real_t, size> x) { + return x.convert_bool(); +} + +template <typename real_t, int size> +inline realpseudovec<real_t, size> convert_float(intpseudovec<real_t, size> x) { + return x.convert_float(); +} + +template <typename real_t, int size> +inline boolpseudovec<real_t, size> isignbit(intpseudovec<real_t, size> x) { + return x.isignbit(); +} + +template <typename real_t, int size> +inline intpseudovec<real_t, size> +lsr(intpseudovec<real_t, size> x, + typename intpseudovec<real_t, size>::int_t n) { + return x.lsr(n); +} + +template <typename real_t, int size> +inline intpseudovec<real_t, size> lsr(intpseudovec<real_t, size> x, + intpseudovec<real_t, size> n) { + return x.lsr(n); +} + +template <typename real_t, int size> +inline intpseudovec<real_t, size> max(intpseudovec<real_t, size> x, + intpseudovec<real_t, size> y) { + return x.max(y); +} + +template <typename real_t, int size> +inline intpseudovec<real_t, size> min(intpseudovec<real_t, size> x, + intpseudovec<real_t, size> y) { + return x.min(y); +} + +template <typename real_t, int size> +inline intpseudovec<real_t, size> popcount(intpseudovec<real_t, size> x) { + return x.popcount(); +} + +template <typename real_t, int size> +inline intpseudovec<real_t, size> +rotate(intpseudovec<real_t, size> x, + typename intpseudovec<real_t, size>::int_t n) { + return x.rotate(n); +} + +template <typename real_t, int size> +inline intpseudovec<real_t, size> rotate(intpseudovec<real_t, size> x, + intpseudovec<real_t, size> n) { + return x.rotate(n); +} + +// realpseudovec wrappers + +template <typename real_t, int size> +inline realpseudovec<real_t, size> +loada(real_t const *p, realpseudovec<real_t, size> x, + typename realpseudovec<real_t, size>::mask_t const &m) { + return x.loada(p, m); +} + +template <typename real_t, int size> +inline realpseudovec<real_t, size> +loadu(real_t const *p, realpseudovec<real_t, size> x, + typename realpseudovec<real_t, size>::mask_t const &m) { + return x.loadu(p, m); +} + +template <typename real_t, int size> +inline realpseudovec<real_t, size> +loadu(real_t const *p, size_t ioff, realpseudovec<real_t, size> x, + typename realpseudovec<real_t, size>::mask_t const &m) { + return x.loadu(p, ioff, m); +} + +template <typename real_t, int size> +inline void storea(realpseudovec<real_t, size> x, real_t *p) { + return x.storea(p); +} + +template <typename real_t, int size> +inline void storeu(realpseudovec<real_t, size> x, real_t *p) { + return x.storeu(p); +} + +template <typename real_t, int size> +inline void storeu(realpseudovec<real_t, size> x, real_t *p, size_t ioff) { + return x.storeu(p, ioff); +} + +template <typename real_t, int size> +inline void storea(realpseudovec<real_t, size> x, real_t *p, + typename realpseudovec<real_t, size>::mask_t const &m) { + return x.storea(p, m); +} + +template <typename real_t, int size> +inline void storeu(realpseudovec<real_t, size> x, real_t *p, + typename realpseudovec<real_t, size>::mask_t const &m) { + return x.storeu(p, m); +} + +template <typename real_t, int size> +inline void storeu(realpseudovec<real_t, size> x, real_t *p, size_t ioff, + typename realpseudovec<real_t, size>::mask_t const &m) { + return x.storeu(p, ioff, m); +} + +template <typename real_t, int size> +inline intpseudovec<real_t, size> as_int(realpseudovec<real_t, size> x) { + return x.as_int(); +} + +template <typename real_t, int size> +inline intpseudovec<real_t, size> convert_int(realpseudovec<real_t, size> x) { + return x.convert_int(); +} + +template <typename real_t, int size> +inline real_t maxval(realpseudovec<real_t, size> x) { + return x.maxval(); +} + +template <typename real_t, int size> +inline real_t minval(realpseudovec<real_t, size> x) { + return x.minval(); +} + +template <typename real_t, int size> +inline real_t prod(realpseudovec<real_t, size> x) { + return x.prod(); +} + +template <typename real_t, int size> +inline real_t sum(realpseudovec<real_t, size> x) { + return x.sum(); +} + +template <typename real_t, int size> +inline realpseudovec<real_t, size> acos(realpseudovec<real_t, size> x) { + return x.acos(); +} + +template <typename real_t, int size> +inline realpseudovec<real_t, size> acosh(realpseudovec<real_t, size> x) { + return x.acosh(); +} + +template <typename real_t, int size> +inline realpseudovec<real_t, size> asin(realpseudovec<real_t, size> x) { + return x.asin(); +} + +template <typename real_t, int size> +inline realpseudovec<real_t, size> asinh(realpseudovec<real_t, size> x) { + return x.asinh(); +} + +template <typename real_t, int size> +inline realpseudovec<real_t, size> atan(realpseudovec<real_t, size> x) { + return x.atan(); +} + +template <typename real_t, int size> +inline realpseudovec<real_t, size> atan2(realpseudovec<real_t, size> x, + realpseudovec<real_t, size> y) { + return x.atan2(y); +} + +template <typename real_t, int size> +inline realpseudovec<real_t, size> atanh(realpseudovec<real_t, size> x) { + return x.atanh(); +} + +template <typename real_t, int size> +inline realpseudovec<real_t, size> cbrt(realpseudovec<real_t, size> x) { + return x.cbrt(); +} + +template <typename real_t, int size> +inline realpseudovec<real_t, size> ceil(realpseudovec<real_t, size> x) { + return x.ceil(); +} + +template <typename real_t, int size> +inline realpseudovec<real_t, size> copysign(realpseudovec<real_t, size> x, + realpseudovec<real_t, size> y) { + return x.copysign(y); +} + +template <typename real_t, int size> +inline realpseudovec<real_t, size> cos(realpseudovec<real_t, size> x) { + return x.cos(); +} + +template <typename real_t, int size> +inline realpseudovec<real_t, size> cosh(realpseudovec<real_t, size> x) { + return x.cosh(); +} + +template <typename real_t, int size> +inline realpseudovec<real_t, size> exp(realpseudovec<real_t, size> x) { + return x.exp(); +} + +template <typename real_t, int size> +inline realpseudovec<real_t, size> exp10(realpseudovec<real_t, size> x) { + return x.exp10(); +} + +template <typename real_t, int size> +inline realpseudovec<real_t, size> exp2(realpseudovec<real_t, size> x) { + return x.exp2(); +} + +template <typename real_t, int size> +inline realpseudovec<real_t, size> expm1(realpseudovec<real_t, size> x) { + return x.expm1(); +} + +template <typename real_t, int size> +inline realpseudovec<real_t, size> fabs(realpseudovec<real_t, size> x) { + return x.fabs(); +} + +template <typename real_t, int size> +inline realpseudovec<real_t, size> floor(realpseudovec<real_t, size> x) { + return x.floor(); +} + +template <typename real_t, int size> +inline realpseudovec<real_t, size> fdim(realpseudovec<real_t, size> x, + realpseudovec<real_t, size> y) { + return x.fdim(y); +} + +template <typename real_t, int size> +inline realpseudovec<real_t, size> fma(realpseudovec<real_t, size> x, + realpseudovec<real_t, size> y, + realpseudovec<real_t, size> z) { + return x.fma(y, z); +} + +template <typename real_t, int size> +inline realpseudovec<real_t, size> fmax(realpseudovec<real_t, size> x, + realpseudovec<real_t, size> y) { + return x.fmax(y); +} + +template <typename real_t, int size> +inline realpseudovec<real_t, size> fmin(realpseudovec<real_t, size> x, + realpseudovec<real_t, size> y) { + return x.fmin(y); +} + +template <typename real_t, int size> +inline realpseudovec<real_t, size> fmod(realpseudovec<real_t, size> x, + realpseudovec<real_t, size> y) { + return x.fmod(y); +} + +template <typename real_t, int size> +inline realpseudovec<real_t, size> frexp(realpseudovec<real_t, size> x, + intpseudovec<real_t, size> *r) { + return x.frexp(r); +} + +template <typename real_t, int size> +inline realpseudovec<real_t, size> hypot(realpseudovec<real_t, size> x, + realpseudovec<real_t, size> y) { + return x.hypot(y); +} + +template <typename real_t, int size> +inline intpseudovec<real_t, size> ilogb(realpseudovec<real_t, size> x) { + return x.ilogb(); +} + +template <typename real_t, int size> +inline boolpseudovec<real_t, size> isfinite(realpseudovec<real_t, size> x) { + return x.isfinite(); +} + +template <typename real_t, int size> +inline boolpseudovec<real_t, size> isinf(realpseudovec<real_t, size> x) { + return x.isinf(); +} + +template <typename real_t, int size> +inline boolpseudovec<real_t, size> isnan(realpseudovec<real_t, size> x) { + return x.isnan(); +} + +template <typename real_t, int size> +inline boolpseudovec<real_t, size> isnormal(realpseudovec<real_t, size> x) { + return x.isnormal(); +} + +template <typename real_t, int size> +inline realpseudovec<real_t, size> +ldexp(realpseudovec<real_t, size> x, + typename intpseudovec<real_t, size>::int_t n) { + return x.ldexp(n); +} + +template <typename real_t, int size> +inline realpseudovec<real_t, size> ldexp(realpseudovec<real_t, size> x, + intpseudovec<real_t, size> n) { + return x.ldexp(n); +} + +template <typename real_t, int size> +inline realpseudovec<real_t, size> log(realpseudovec<real_t, size> x) { + return x.log(); +} + +template <typename real_t, int size> +inline realpseudovec<real_t, size> log10(realpseudovec<real_t, size> x) { + return x.log10(); +} + +template <typename real_t, int size> +inline realpseudovec<real_t, size> log1p(realpseudovec<real_t, size> x) { + return x.log1p(); +} + +template <typename real_t, int size> +inline realpseudovec<real_t, size> log2(realpseudovec<real_t, size> x) { + return x.log2(); +} + +template <typename real_t, int size> +inline intpseudovec<real_t, size> lrint(realpseudovec<real_t, size> x) { + return x.lrint(); +} + +template <typename real_t, int size> +inline realpseudovec<real_t, size> mad(realpseudovec<real_t, size> x, + realpseudovec<real_t, size> y, + realpseudovec<real_t, size> z) { + return x.mad(y, z); +} + +template <typename real_t, int size> +inline realpseudovec<real_t, size> nextafter(realpseudovec<real_t, size> x, + realpseudovec<real_t, size> y) { + return x.nextafter(y); +} + +template <typename real_t, int size> +inline realpseudovec<real_t, size> pow(realpseudovec<real_t, size> x, + realpseudovec<real_t, size> y) { + return x.pow(y); +} + +template <typename real_t, int size> +inline realpseudovec<real_t, size> rcp(realpseudovec<real_t, size> x) { + return x.rcp(); +} + +template <typename real_t, int size> +inline realpseudovec<real_t, size> remainder(realpseudovec<real_t, size> x, + realpseudovec<real_t, size> y) { + return x.remainder(y); +} + +template <typename real_t, int size> +inline realpseudovec<real_t, size> rint(realpseudovec<real_t, size> x) { + return x.rint(); +} + +template <typename real_t, int size> +inline realpseudovec<real_t, size> round(realpseudovec<real_t, size> x) { + return x.round(); +} + +template <typename real_t, int size> +inline realpseudovec<real_t, size> rsqrt(realpseudovec<real_t, size> x) { + return x.rsqrt(); +} + +template <typename real_t, int size> +inline boolpseudovec<real_t, size> signbit(realpseudovec<real_t, size> x) { + return x.signbit(); +} + +template <typename real_t, int size> +inline realpseudovec<real_t, size> sin(realpseudovec<real_t, size> x) { + return x.sin(); +} + +template <typename real_t, int size> +inline realpseudovec<real_t, size> sinh(realpseudovec<real_t, size> x) { + return x.sinh(); +} + +template <typename real_t, int size> +inline realpseudovec<real_t, size> sqrt(realpseudovec<real_t, size> x) { + return x.sqrt(); +} + +template <typename real_t, int size> +inline realpseudovec<real_t, size> tan(realpseudovec<real_t, size> x) { + return x.tan(); +} + +template <typename real_t, int size> +inline realpseudovec<real_t, size> tanh(realpseudovec<real_t, size> x) { + return x.tanh(); +} + +template <typename real_t, int size> +inline realpseudovec<real_t, size> trunc(realpseudovec<real_t, size> x) { + return x.trunc(); +} + #ifndef VML_NO_IOSTREAM - template<typename real_t, int size> - std::ostream& operator<<(std::ostream& os, - boolpseudovec<real_t, size> const& x) - { - os << "["; - for (int i=0; i<size; ++i) { - if (i!=0) os << ","; - os << x[i]; - } - os << "]"; - return os; - } - - template<typename real_t, int size> - std::ostream& operator<<(std::ostream& os, - intpseudovec<real_t, size> const& x) - { - os << "["; - for (int i=0; i<size; ++i) { - if (i!=0) os << ","; - os << x[i]; - } - os << "]"; - return os; - } - - template<typename real_t, int size> - std::ostream& operator<<(std::ostream& os, - realpseudovec<real_t, size> const& x) - { - os << "["; - for (int i=0; i<size; ++i) { - if (i!=0) os << ","; - os << x[i]; - } - os << "]"; - return os; - } +template <typename real_t, int size> +std::ostream &operator<<(std::ostream &os, + boolpseudovec<real_t, size> const &x) { + os << "["; + for (int i = 0; i < size; ++i) { + if (i != 0) + os << ","; + os << x[i]; + } + os << "]"; + return os; +} + +template <typename real_t, int size> +std::ostream &operator<<(std::ostream &os, + intpseudovec<real_t, size> const &x) { + os << "["; + for (int i = 0; i < size; ++i) { + if (i != 0) + os << ","; + os << x[i]; + } + os << "]"; + return os; +} + +template <typename real_t, int size> +std::ostream &operator<<(std::ostream &os, + realpseudovec<real_t, size> const &x) { + os << "["; + for (int i = 0; i < size; ++i) { + if (i != 0) + os << ","; + os << x[i]; + } + os << "]"; + return os; +} #endif - + } // namespace vecmathlib -#endif // #ifndef VEC_PSEUDO_H +#endif // #ifndef VEC_PSEUDO_H |