// -*-C++-*- #ifndef VEC_PSEUDO_H #define VEC_PSEUDO_H #include "floatprops.h" #include "mathfuncs.h" #include "vec_base.h" #include #include #include #include #ifndef VML_NO_IOSTREAM #include #endif #include namespace vecmathlib { template struct boolpseudovec; template struct intpseudovec; template struct realpseudovec; template struct boolpseudovec : floatprops { typedef typename floatprops::int_t int_t; typedef typename floatprops::uint_t uint_t; typedef typename floatprops::real_t real_t; static int const size = N; typedef bool scalar_t; typedef bool bvector_t[size]; static int const alignment = sizeof(bool); typedef boolpseudovec boolvec_t; typedef intpseudovec intvec_t; typedef realpseudovec realvec_t; // Short names for type casts typedef real_t R; typedef int_t I; typedef uint_t U; typedef realvec_t RV; typedef intvec_t IV; typedef boolvec_t BV; typedef floatprops FP; typedef mathfuncs MF; bvector_t v; boolpseudovec() {} // Can't have a non-trivial copy constructor; if so, objects won't // be passed in registers // boolpseudovec(boolpseudovec const& x): v(x.v) {} // boolpseudovec& operator=(boolpseudovec const& x) { return v=x.v, *this; } boolpseudovec(bool a) { for (int d = 0; d < size; ++d) v[d] = a; } boolpseudovec(bool const *as) { for (int d = 0; d < size; ++d) v[d] = as[d]; } bool operator[](int n) const { return v[n]; } boolvec_t &set_elt(int n, bool a) { return v[n] = a, *this; } intvec_t as_int() const; // defined after intpseudovec intvec_t convert_int() const; // defined after intpseudovec boolvec_t operator!() const { boolvec_t res; for (int d = 0; d < size; ++d) res.v[d] = !v[d]; return res; } boolvec_t operator&&(boolvec_t x) const { boolvec_t res; for (int d = 0; d < size; ++d) res.v[d] = v[d] && x.v[d]; return res; } boolvec_t operator||(boolvec_t x) const { boolvec_t res; for (int d = 0; d < size; ++d) res.v[d] = v[d] || x.v[d]; return res; } boolvec_t operator==(boolvec_t x) const { boolvec_t res; for (int d = 0; d < size; ++d) res.v[d] = v[d] == x.v[d]; return res; } boolvec_t operator!=(boolvec_t x) const { boolvec_t res; for (int d = 0; d < size; ++d) res.v[d] = v[d] != x.v[d]; return res; } bool all() const { bool res = v[0]; for (int d = 1; d < size; ++d) res = res && v[d]; return res; } bool any() const { bool res = v[0]; for (int d = 1; d < size; ++d) res = res || v[d]; return res; } // ifthen(condition, then-value, else-value) boolvec_t ifthen(boolvec_t x, boolvec_t y) const; intvec_t ifthen(intvec_t x, intvec_t y) const; // defined after intpseudovec realvec_t ifthen(realvec_t x, realvec_t y) const; // defined after realpseudovec }; template struct intpseudovec : floatprops { typedef typename floatprops::int_t int_t; typedef typename floatprops::uint_t uint_t; typedef typename floatprops::real_t real_t; static int const size = N; typedef int_t scalar_t; typedef int_t ivector_t[size]; static int const alignment = sizeof(int_t); typedef boolpseudovec boolvec_t; typedef intpseudovec intvec_t; typedef realpseudovec realvec_t; // Short names for type casts typedef real_t R; typedef int_t I; typedef uint_t U; typedef realvec_t RV; typedef intvec_t IV; typedef boolvec_t BV; typedef floatprops FP; typedef mathfuncs MF; ivector_t v; intpseudovec() {} // Can't have a non-trivial copy constructor; if so, objects won't // be passed in registers // intpseudovec(intpseudovec const& x): v(x.v) {} // intpseudovec& operator=(intpseudovec const& x) { return v=x.v, *this; } intpseudovec(int_t a) { for (int d = 0; d < size; ++d) v[d] = a; } intpseudovec(int_t const *as) { for (int d = 0; d < size; ++d) v[d] = as[d]; } static intvec_t iota() { intvec_t res; for (int d = 0; d < size; ++d) res.v[d] = d; return res; } int_t operator[](int n) const { return v[n]; } intvec_t &set_elt(int n, int_t a) { return v[n] = a, *this; } boolvec_t as_bool() const { boolvec_t res; for (int d = 0; d < size; ++d) res.v[d] = v[d]; return res; } boolvec_t convert_bool() const { // Result: convert_bool(0)=false, convert_bool(else)=true boolvec_t res; for (int d = 0; d < size; ++d) res.v[d] = v[d]; return res; } realvec_t as_float() const; // defined after realpseudovec realvec_t convert_float() const; // defined after realpseudovec intvec_t operator+() const { intvec_t res; for (int d = 0; d < size; ++d) res.v[d] = +v[d]; return res; } intvec_t operator-() const { intvec_t res; for (int d = 0; d < size; ++d) res.v[d] = -v[d]; return res; } intvec_t &operator+=(intvec_t const &x) { for (int d = 0; d < size; ++d) v[d] += x.v[d]; return *this; } intvec_t &operator-=(intvec_t const &x) { for (int d = 0; d < size; ++d) v[d] -= x.v[d]; return *this; } intvec_t &operator*=(intvec_t const &x) { for (int d = 0; d < size; ++d) v[d] *= x.v[d]; return *this; } intvec_t &operator/=(intvec_t const &x) { for (int d = 0; d < size; ++d) v[d] /= x.v[d]; return *this; } intvec_t &operator%=(intvec_t const &x) { for (int d = 0; d < size; ++d) v[d] %= x.v[d]; return *this; } intvec_t operator+(intvec_t x) const { intvec_t res = *this; return res += x; } intvec_t operator-(intvec_t x) const { intvec_t res = *this; return res -= x; } intvec_t operator*(intvec_t x) const { intvec_t res = *this; return res *= x; } intvec_t operator/(intvec_t x) const { intvec_t res = *this; return res /= x; } intvec_t operator%(intvec_t x) const { intvec_t res = *this; return res %= x; } intvec_t operator~() const { intvec_t res; for (int d = 0; d < size; ++d) res.v[d] = ~v[d]; return res; } intvec_t &operator&=(intvec_t const &x) { for (int d = 0; d < size; ++d) v[d] &= x.v[d]; return *this; } intvec_t &operator|=(intvec_t const &x) { for (int d = 0; d < size; ++d) v[d] |= x.v[d]; return *this; } intvec_t &operator^=(intvec_t const &x) { for (int d = 0; d < size; ++d) v[d] ^= x.v[d]; return *this; } intvec_t operator&(intvec_t x) const { intvec_t res = *this; return res &= x; } intvec_t operator|(intvec_t x) const { intvec_t res = *this; return res |= x; } intvec_t operator^(intvec_t x) const { intvec_t res = *this; return res ^= x; } intvec_t bitifthen(intvec_t x, intvec_t y) const; intvec_t lsr(int_t n) const { intvec_t res; for (int d = 0; d < size; ++d) res.v[d] = I(U(v[d]) >> U(n)); return res; } intvec_t rotate(int_t n) const; intvec_t &operator>>=(int_t n) { for (int d = 0; d < size; ++d) v[d] >>= n; return *this; } intvec_t &operator<<=(int_t n) { for (int d = 0; d < size; ++d) v[d] <<= n; return *this; } intvec_t operator>>(int_t n) const { intvec_t res = *this; return res >>= n; } intvec_t operator<<(int_t n) const { intvec_t res = *this; return res <<= n; } intvec_t lsr(intvec_t n) const { intvec_t res; for (int d = 0; d < size; ++d) res.v[d] = I(U(v[d]) >> U(n.v[d])); return res; } intvec_t rotate(intvec_t n) const; intvec_t &operator>>=(intvec_t n) { for (int d = 0; d < size; ++d) v[d] >>= n.v[d]; return *this; } intvec_t &operator<<=(intvec_t n) { for (int d = 0; d < size; ++d) v[d] <<= n.v[d]; return *this; } intvec_t operator>>(intvec_t n) const { intvec_t res = *this; return res >>= n; } intvec_t operator<<(intvec_t n) const { intvec_t res = *this; return res <<= n; } intvec_t clz() const { intvec_t res; #if defined __clang__ for (int d = 0; d < size; ++d) { if (v[d] == 0) { res.v[d] = CHAR_BIT * sizeof v[d]; } else { if (sizeof v[d] == sizeof(long long)) { res.v[d] = __builtin_clzll(v[d]); } else if (sizeof v[d] == sizeof(long)) { res.v[d] = __builtin_clzl(v[d]); } else if (sizeof v[d] == sizeof(int)) { res.v[d] = __builtin_clz(v[d]); } else if (sizeof v[d] == sizeof(short)) { res.v[d] = __builtin_clzs(v[d]); } else if (sizeof v[d] == sizeof(char)) { res.v[d] = __builtin_clzs((unsigned short)(unsigned char)v[d]) - CHAR_BIT * (sizeof(short) - sizeof(char)); } else { __builtin_unreachable(); } } } #else res = MF::vml_clz(*this); #endif return res; } intvec_t popcount() const { intvec_t res; #if defined __clang__ || defined __GNUC__ || defined __GNUG__ if (sizeof(int_t) == sizeof(long long)) { for (int d = 0; d < size; ++d) res.v[d] = __builtin_popcountll(v[d]); } else if (sizeof(int_t) == sizeof(long)) { for (int d = 0; d < size; ++d) res.v[d] = __builtin_popcountl(v[d]); } else if (sizeof(int_t) <= sizeof(int)) { for (int d = 0; d < size; ++d) res.v[d] = __builtin_popcount(v[d]); } else { __builtin_unreachable(); } #else res = MF::vml_popcount(*this); #endif return res; } boolvec_t operator==(intvec_t const &x) const { boolvec_t res; for (int d = 0; d < size; ++d) res.v[d] = v[d] == x.v[d]; return res; } boolvec_t operator!=(intvec_t const &x) const { boolvec_t res; for (int d = 0; d < size; ++d) res.v[d] = v[d] != x.v[d]; return res; } boolvec_t operator<(intvec_t const &x) const { boolvec_t res; for (int d = 0; d < size; ++d) res.v[d] = v[d] < x.v[d]; return res; } boolvec_t operator<=(intvec_t const &x) const { boolvec_t res; for (int d = 0; d < size; ++d) res.v[d] = v[d] <= x.v[d]; return res; } boolvec_t operator>(intvec_t const &x) const { boolvec_t res; for (int d = 0; d < size; ++d) res.v[d] = v[d] > x.v[d]; return res; } boolvec_t operator>=(intvec_t const &x) const { boolvec_t res; for (int d = 0; d < size; ++d) res.v[d] = v[d] >= x.v[d]; return res; } intvec_t abs() const { intvec_t res; for (int d = 0; d < size; ++d) res.v[d] = std::abs(v[d]); return res; } boolvec_t isignbit() const { boolvec_t res; for (int d = 0; d < size; ++d) res.v[d] = v[d] < 0; return res; } intvec_t max(intvec_t x) const { intvec_t res; for (int d = 0; d < size; ++d) res.v[d] = std::max(v[d], x.v[d]); return res; } intvec_t min(intvec_t x) const { intvec_t res; for (int d = 0; d < size; ++d) res.v[d] = std::min(v[d], x.v[d]); return res; } }; template struct realpseudovec : floatprops { typedef typename floatprops::int_t int_t; typedef typename floatprops::uint_t uint_t; typedef typename floatprops::real_t real_t; static int const size = N; typedef real_t scalar_t; typedef real_t vector_t[size]; static int const alignment = sizeof(real_t); #ifndef VML_NO_IOSTREAM static char const *name() { static std::string name_; if (name_.empty()) { std::stringstream buf; buf << ""; name_ = buf.str(); } return name_.c_str(); } #endif void barrier() { for (int d = 0; d < size; ++d) v[d] = vecmathlib::barrier(v[d]); } typedef boolpseudovec boolvec_t; typedef intpseudovec intvec_t; typedef realpseudovec realvec_t; private: boolvec_t mapb(bool f(real_t)) const { boolvec_t res; for (int d = 0; d < size; ++d) res.v[d] = f(v[d]); return res; } intvec_t map(int_t f(real_t)) const { intvec_t res; for (int d = 0; d < size; ++d) res.v[d] = f(v[d]); return res; } realvec_t map(real_t f(real_t)) const { realvec_t res; for (int d = 0; d < size; ++d) res.v[d] = f(v[d]); return res; } realvec_t map(real_t f(real_t, int_t), intvec_t x) const { realvec_t res; for (int d = 0; d < size; ++d) res.v[d] = f(v[d], x.v[d]); return res; } realvec_t map(real_t f(real_t, real_t), realvec_t x) const { realvec_t res; for (int d = 0; d < size; ++d) res.v[d] = f(v[d], x.v[d]); return res; } realvec_t map(real_t f(real_t, real_t, real_t), realvec_t x, realvec_t y) const { realvec_t res; for (int d = 0; d < size; ++d) res.v[d] = f(v[d], x.v[d], y.v[d]); return res; } public: // Short names for type casts typedef real_t R; typedef int_t I; typedef uint_t U; typedef realvec_t RV; typedef intvec_t IV; typedef boolvec_t BV; typedef floatprops FP; typedef mathfuncs MF; vector_t v; realpseudovec() {} // Can't have a non-trivial copy constructor; if so, objects won't // be passed in registers // realpseudovec(realpseudovec const& x): v(x.v) {} // realpseudovec& operator=(realpseudovec const& x) { return v=x.v, *this; } realpseudovec(real_t a) { for (int d = 0; d < size; ++d) v[d] = a; } realpseudovec(real_t const *as) { for (int d = 0; d < size; ++d) v[d] = as[d]; } real_t operator[](int n) const { return v[n]; } realvec_t &set_elt(int n, real_t a) { return v[n] = a, *this; } typedef vecmathlib::mask_t mask_t; static realvec_t loada(real_t const *p) { VML_ASSERT(intptr_t(p) % alignment == 0); return loadu(p); } static realvec_t loadu(real_t const *p) { realvec_t res; for (int d = 0; d < size; ++d) res.v[d] = p[d]; return res; } static realvec_t loadu(real_t const *p, size_t ioff) { VML_ASSERT(intptr_t(p) % alignment == 0); return loadu(p + ioff); } realvec_t loada(real_t const *p, mask_t const &m) const { return m.m.ifthen(loada(p), *this); } realvec_t loadu(real_t const *p, mask_t const &m) const { return m.m.ifthen(loadu(p), *this); } realvec_t loadu(real_t const *p, size_t ioff, mask_t const &m) const { return m.m.ifthen(loadu(p, ioff), *this); } void storea(real_t *p) const { VML_ASSERT(intptr_t(p) % alignment == 0); storeu(p); } void storeu(real_t *p) const { for (int d = 0; d < size; ++d) p[d] = v[d]; } void storeu(real_t *p, size_t ioff) const { VML_ASSERT(intptr_t(p) % alignment == 0); storeu(p + ioff); } void storea(real_t *p, mask_t const &m) const { VML_ASSERT(intptr_t(p) % alignment == 0); storeu(p, m); } void storeu(real_t *p, mask_t const &m) const { for (int d = 0; d < size; ++d) if (m.m[d]) p[d] = v[d]; } void storeu(real_t *p, size_t ioff, mask_t const &m) const { VML_ASSERT(intptr_t(p) % alignment == 0); storeu(p + ioff, m); } intvec_t as_int() const { intvec_t res; for (int d = 0; d < size; ++d) res.v[d] = FP::as_int(v[d]); return res; } intvec_t convert_int() const { intvec_t res; for (int d = 0; d < size; ++d) res.v[d] = FP::convert_int(v[d]); return res; } realvec_t operator+() const { realvec_t res; for (int d = 0; d < size; ++d) res.v[d] = +v[d]; return res; } realvec_t operator-() const { realvec_t res; for (int d = 0; d < size; ++d) res.v[d] = -v[d]; return res; } realvec_t &operator+=(realvec_t const &x) { for (int d = 0; d < size; ++d) v[d] += x.v[d]; return *this; } realvec_t &operator-=(realvec_t const &x) { for (int d = 0; d < size; ++d) v[d] -= x.v[d]; return *this; } realvec_t &operator*=(realvec_t const &x) { for (int d = 0; d < size; ++d) v[d] *= x.v[d]; return *this; } realvec_t &operator/=(realvec_t const &x) { for (int d = 0; d < size; ++d) v[d] /= x.v[d]; return *this; } realvec_t operator+(realvec_t x) const { realvec_t res = *this; return res += x; } realvec_t operator-(realvec_t x) const { realvec_t res = *this; return res -= x; } realvec_t operator*(realvec_t x) const { realvec_t res = *this; return res *= x; } realvec_t operator/(realvec_t x) const { realvec_t res = *this; return res /= x; } real_t maxval() const { real_t res = v[0]; for (int d = 1; d < size; ++d) res = vml_std::fmax(res, v[d]); return res; } real_t minval() const { real_t res = v[0]; for (int d = 1; d < size; ++d) res = vml_std::fmin(res, v[d]); return res; } real_t prod() const { real_t res = v[0]; for (int d = 1; d < size; ++d) res *= v[d]; return res; } real_t sum() const { real_t res = v[0]; for (int d = 1; d < size; ++d) res += v[d]; return res; } boolvec_t operator==(realvec_t const &x) const { boolvec_t res; for (int d = 0; d < size; ++d) res.v[d] = v[d] == x.v[d]; return res; } boolvec_t operator!=(realvec_t const &x) const { boolvec_t res; for (int d = 0; d < size; ++d) res.v[d] = v[d] != x.v[d]; return res; } boolvec_t operator<(realvec_t const &x) const { boolvec_t res; for (int d = 0; d < size; ++d) res.v[d] = v[d] < x.v[d]; return res; } boolvec_t operator<=(realvec_t const &x) const { boolvec_t res; for (int d = 0; d < size; ++d) res.v[d] = v[d] <= x.v[d]; return res; } boolvec_t operator>(realvec_t const &x) const { boolvec_t res; for (int d = 0; d < size; ++d) res.v[d] = v[d] > x.v[d]; return res; } boolvec_t operator>=(realvec_t const &x) const { boolvec_t res; for (int d = 0; d < size; ++d) res.v[d] = v[d] >= x.v[d]; return res; } realvec_t acos() const { return map(vml_std::acos); } realvec_t acosh() const { return map(vml_std::acosh); } realvec_t asin() const { return map(vml_std::asin); } realvec_t asinh() const { return map(vml_std::asinh); } realvec_t atan() const { return map(vml_std::atan); } realvec_t atan2(realvec_t y) const { return MF::vml_atan2(*this, y); } realvec_t atanh() const { return map(vml_std::atanh); } realvec_t cbrt() const { return map(vml_std::cbrt); } realvec_t ceil() const { return map(vml_std::ceil); } realvec_t copysign(realvec_t y) const { return map(vml_std::copysign, y); } realvec_t cos() const { return map(vml_std::cos); } realvec_t cosh() const { return map(vml_std::cosh); } realvec_t exp() const { return map(vml_std::exp); } realvec_t exp10() const { realvec_t res; for (int d = 0; d < size; ++d) res.v[d] = vml_std::exp(R(M_LN10) * v[d]); return res; } realvec_t exp2() const { return map(vml_std::exp2); } realvec_t expm1() const { return map(vml_std::expm1); } realvec_t fabs() const { return map(vml_std::fabs); } realvec_t fdim(realvec_t y) const { return map(vml_std::fdim, y); } realvec_t floor() const { return map(vml_std::floor); } realvec_t fma(realvec_t y, realvec_t z) const { return map(vml_std::fma, y, z); } realvec_t fmax(realvec_t y) const { return map(vml_std::fmax, y); } realvec_t fmin(realvec_t y) const { return map(vml_std::fmin, y); } realvec_t fmod(realvec_t y) const { return map(vml_std::fmod, y); } realvec_t frexp(intvec_t *ires) const { realvec_t res; for (int d = 0; d < size; ++d) { int iri; real_t r = vml_std::frexp(v[d], &iri); int_t ir = iri; #if defined VML_HAVE_INF if (vml_std::isinf(v[d])) ir = std::numeric_limits::max(); #endif #if defined VML_HAVE_NAN if (vml_std::isnan(v[d])) ir = std::numeric_limits::min(); #endif res.v[d] = r; ires->v[d] = ir; } return res; } realvec_t hypot(realvec_t y) const { return map(vml_std::hypot, y); } intvec_t ilogb() const { intvec_t res; for (int d = 0; d < size; ++d) { int_t r = vml_std::ilogb(v[d]); typedef std::numeric_limits NL; if (FP_ILOGB0 != NL::min() and v[d] == R(0.0)) { r = NL::min(); #if defined VML_HAVE_INF } else if (INT_MAX != NL::max() and vml_std::isinf(v[d])) { r = NL::max(); #endif #if defined VML_HAVE_NAN } else if (FP_ILOGBNAN != NL::min() and vml_std::isnan(v[d])) { r = NL::min(); #endif } res.v[d] = r; } return res; } boolvec_t isfinite() const { return mapb(vml_std::isfinite); } boolvec_t isinf() const { return mapb(vml_std::isinf); } boolvec_t isnan() const { return mapb(vml_std::isnan); } boolvec_t isnormal() const { return mapb(vml_std::isnormal); } realvec_t ldexp(int_t n) const { realvec_t res; for (int d = 0; d < size; ++d) res.v[d] = vml_std::ldexp(v[d], n); return res; } realvec_t ldexp(intvec_t n) const { realvec_t res; for (int d = 0; d < size; ++d) res.v[d] = vml_std::ldexp(v[d], n.v[d]); return res; } realvec_t log() const { return map(vml_std::log); } realvec_t log10() const { return map(vml_std::log10); } realvec_t log1p() const { return map(vml_std::log1p); } realvec_t log2() const { return map(vml_std::log2); } intvec_t lrint() const { realvec_t res; if (sizeof(int_t) <= sizeof(long)) { for (int d = 0; d < size; ++d) res.v[d] = vml_std::lrint(v[d]); } else if (sizeof(int_t) <= sizeof(long long)) { for (int d = 0; d < size; ++d) res.v[d] = vml_std::llrint(v[d]); } else { __builtin_unreachable(); } return res; } realvec_t mad(realvec_t y, realvec_t z) const { return MF::vml_mad(*this, y, z); } realvec_t nextafter(realvec_t y) const { return map(vml_std::nextafter, y); } realvec_t pow(realvec_t y) const { return map(vml_std::pow, y); } realvec_t rcp() const { realvec_t res; for (int d = 0; d < size; ++d) res.v[d] = R(1.0) / v[d]; return res; } realvec_t remainder(realvec_t y) const { return map(vml_std::remainder, y); } realvec_t rint() const { return map(vml_std::rint); } realvec_t round() const { return map(vml_std::round); } realvec_t rsqrt() const { return sqrt().rcp(); } boolvec_t signbit() const { return mapb(vml_std::signbit); } realvec_t sin() const { return map(vml_std::sin); } realvec_t sinh() const { return map(vml_std::sinh); } realvec_t sqrt() const { return map(vml_std::sqrt); } realvec_t tan() const { return map(vml_std::tan); } realvec_t tanh() const { return map(vml_std::tanh); } realvec_t trunc() const { return map(vml_std::trunc); } }; // boolpseudovec definitions template inline typename boolpseudovec::intvec_t boolpseudovec::as_int() const { return convert_int(); } template inline typename boolpseudovec::intvec_t boolpseudovec::convert_int() const { intvec_t res; for (int d = 0; d < size; ++d) res.v[d] = v[d]; return res; } template inline typename boolpseudovec::boolvec_t boolpseudovec::ifthen(boolvec_t x, boolvec_t y) const { boolvec_t res; for (int d = 0; d < size; ++d) res.v[d] = v[d] ? x.v[d] : y.v[d]; return res; } template inline typename boolpseudovec::intvec_t boolpseudovec::ifthen(intvec_t x, intvec_t y) const { intvec_t res; for (int d = 0; d < size; ++d) res.v[d] = v[d] ? x.v[d] : y.v[d]; return res; } template inline typename boolpseudovec::realvec_t boolpseudovec::ifthen(realvec_t x, realvec_t y) const { realvec_t res; for (int d = 0; d < size; ++d) res.v[d] = v[d] ? x.v[d] : y.v[d]; return res; } // intpseudovec definitions template inline typename intpseudovec::realvec_t intpseudovec::as_float() const { realvec_t res; for (int d = 0; d < size; ++d) res.v[d] = FP::as_float(v[d]); return res; } template inline intpseudovec intpseudovec::bitifthen(intvec_t x, intvec_t y) const { return MF::vml_bitifthen(*this, x, y); } template inline typename intpseudovec::realvec_t intpseudovec::convert_float() const { realvec_t res; for (int d = 0; d < size; ++d) res.v[d] = FP::convert_float(v[d]); return res; } template inline intpseudovec intpseudovec::rotate(int_t n) const { return MF::vml_rotate(*this, n); } template inline intpseudovec intpseudovec::rotate(intvec_t n) const { return MF::vml_rotate(*this, n); } // Wrappers // boolpseudovec wrappers template inline intpseudovec as_int(boolpseudovec x) { return x.as_int(); } template inline intpseudovec convert_int(boolpseudovec x) { return x.convert_int(); } template inline bool all(boolpseudovec x) { return x.all(); } template inline bool any(boolpseudovec x) { return x.any(); } template inline boolpseudovec ifthen(boolpseudovec c, boolpseudovec x, boolpseudovec y) { return c.ifthen(x, y); } template inline intpseudovec ifthen(boolpseudovec c, intpseudovec x, intpseudovec y) { return c.ifthen(x, y); } template inline realpseudovec ifthen(boolpseudovec c, realpseudovec x, realpseudovec y) { return c.ifthen(x, y); } // intpseudovec wrappers template inline intpseudovec abs(intpseudovec x) { return x.abs(); } template inline boolpseudovec as_bool(intpseudovec x) { return x.as_bool(); } template inline realpseudovec as_float(intpseudovec x) { return x.as_float(); } template inline intpseudovec bitifthen(intpseudovec x, intpseudovec y, intpseudovec z) { return x.bitifthen(y, z); } template inline intpseudovec clz(intpseudovec x) { return x.clz(); } template inline boolpseudovec convert_bool(intpseudovec x) { return x.convert_bool(); } template inline realpseudovec convert_float(intpseudovec x) { return x.convert_float(); } template inline boolpseudovec isignbit(intpseudovec x) { return x.isignbit(); } template inline intpseudovec lsr(intpseudovec x, typename intpseudovec::int_t n) { return x.lsr(n); } template inline intpseudovec lsr(intpseudovec x, intpseudovec n) { return x.lsr(n); } template inline intpseudovec max(intpseudovec x, intpseudovec y) { return x.max(y); } template inline intpseudovec min(intpseudovec x, intpseudovec y) { return x.min(y); } template inline intpseudovec popcount(intpseudovec x) { return x.popcount(); } template inline intpseudovec rotate(intpseudovec x, typename intpseudovec::int_t n) { return x.rotate(n); } template inline intpseudovec rotate(intpseudovec x, intpseudovec n) { return x.rotate(n); } // realpseudovec wrappers template inline realpseudovec loada(real_t const *p, realpseudovec x, typename realpseudovec::mask_t const &m) { return x.loada(p, m); } template inline realpseudovec loadu(real_t const *p, realpseudovec x, typename realpseudovec::mask_t const &m) { return x.loadu(p, m); } template inline realpseudovec loadu(real_t const *p, size_t ioff, realpseudovec x, typename realpseudovec::mask_t const &m) { return x.loadu(p, ioff, m); } template inline void storea(realpseudovec x, real_t *p) { return x.storea(p); } template inline void storeu(realpseudovec x, real_t *p) { return x.storeu(p); } template inline void storeu(realpseudovec x, real_t *p, size_t ioff) { return x.storeu(p, ioff); } template inline void storea(realpseudovec x, real_t *p, typename realpseudovec::mask_t const &m) { return x.storea(p, m); } template inline void storeu(realpseudovec x, real_t *p, typename realpseudovec::mask_t const &m) { return x.storeu(p, m); } template inline void storeu(realpseudovec x, real_t *p, size_t ioff, typename realpseudovec::mask_t const &m) { return x.storeu(p, ioff, m); } template inline intpseudovec as_int(realpseudovec x) { return x.as_int(); } template inline intpseudovec convert_int(realpseudovec x) { return x.convert_int(); } template inline real_t maxval(realpseudovec x) { return x.maxval(); } template inline real_t minval(realpseudovec x) { return x.minval(); } template inline real_t prod(realpseudovec x) { return x.prod(); } template inline real_t sum(realpseudovec x) { return x.sum(); } template inline realpseudovec acos(realpseudovec x) { return x.acos(); } template inline realpseudovec acosh(realpseudovec x) { return x.acosh(); } template inline realpseudovec asin(realpseudovec x) { return x.asin(); } template inline realpseudovec asinh(realpseudovec x) { return x.asinh(); } template inline realpseudovec atan(realpseudovec x) { return x.atan(); } template inline realpseudovec atan2(realpseudovec x, realpseudovec y) { return x.atan2(y); } template inline realpseudovec atanh(realpseudovec x) { return x.atanh(); } template inline realpseudovec cbrt(realpseudovec x) { return x.cbrt(); } template inline realpseudovec ceil(realpseudovec x) { return x.ceil(); } template inline realpseudovec copysign(realpseudovec x, realpseudovec y) { return x.copysign(y); } template inline realpseudovec cos(realpseudovec x) { return x.cos(); } template inline realpseudovec cosh(realpseudovec x) { return x.cosh(); } template inline realpseudovec exp(realpseudovec x) { return x.exp(); } template inline realpseudovec exp10(realpseudovec x) { return x.exp10(); } template inline realpseudovec exp2(realpseudovec x) { return x.exp2(); } template inline realpseudovec expm1(realpseudovec x) { return x.expm1(); } template inline realpseudovec fabs(realpseudovec x) { return x.fabs(); } template inline realpseudovec floor(realpseudovec x) { return x.floor(); } template inline realpseudovec fdim(realpseudovec x, realpseudovec y) { return x.fdim(y); } template inline realpseudovec fma(realpseudovec x, realpseudovec y, realpseudovec z) { return x.fma(y, z); } template inline realpseudovec fmax(realpseudovec x, realpseudovec y) { return x.fmax(y); } template inline realpseudovec fmin(realpseudovec x, realpseudovec y) { return x.fmin(y); } template inline realpseudovec fmod(realpseudovec x, realpseudovec y) { return x.fmod(y); } template inline realpseudovec frexp(realpseudovec x, intpseudovec *r) { return x.frexp(r); } template inline realpseudovec hypot(realpseudovec x, realpseudovec y) { return x.hypot(y); } template inline intpseudovec ilogb(realpseudovec x) { return x.ilogb(); } template inline boolpseudovec isfinite(realpseudovec x) { return x.isfinite(); } template inline boolpseudovec isinf(realpseudovec x) { return x.isinf(); } template inline boolpseudovec isnan(realpseudovec x) { return x.isnan(); } template inline boolpseudovec isnormal(realpseudovec x) { return x.isnormal(); } template inline realpseudovec ldexp(realpseudovec x, typename intpseudovec::int_t n) { return x.ldexp(n); } template inline realpseudovec ldexp(realpseudovec x, intpseudovec n) { return x.ldexp(n); } template inline realpseudovec log(realpseudovec x) { return x.log(); } template inline realpseudovec log10(realpseudovec x) { return x.log10(); } template inline realpseudovec log1p(realpseudovec x) { return x.log1p(); } template inline realpseudovec log2(realpseudovec x) { return x.log2(); } template inline intpseudovec lrint(realpseudovec x) { return x.lrint(); } template inline realpseudovec mad(realpseudovec x, realpseudovec y, realpseudovec z) { return x.mad(y, z); } template inline realpseudovec nextafter(realpseudovec x, realpseudovec y) { return x.nextafter(y); } template inline realpseudovec pow(realpseudovec x, realpseudovec y) { return x.pow(y); } template inline realpseudovec rcp(realpseudovec x) { return x.rcp(); } template inline realpseudovec remainder(realpseudovec x, realpseudovec y) { return x.remainder(y); } template inline realpseudovec rint(realpseudovec x) { return x.rint(); } template inline realpseudovec round(realpseudovec x) { return x.round(); } template inline realpseudovec rsqrt(realpseudovec x) { return x.rsqrt(); } template inline boolpseudovec signbit(realpseudovec x) { return x.signbit(); } template inline realpseudovec sin(realpseudovec x) { return x.sin(); } template inline realpseudovec sinh(realpseudovec x) { return x.sinh(); } template inline realpseudovec sqrt(realpseudovec x) { return x.sqrt(); } template inline realpseudovec tan(realpseudovec x) { return x.tan(); } template inline realpseudovec tanh(realpseudovec x) { return x.tanh(); } template inline realpseudovec trunc(realpseudovec x) { return x.trunc(); } #ifndef VML_NO_IOSTREAM template std::ostream &operator<<(std::ostream &os, boolpseudovec const &x) { os << "["; for (int i = 0; i < size; ++i) { if (i != 0) os << ","; os << x[i]; } os << "]"; return os; } template std::ostream &operator<<(std::ostream &os, intpseudovec const &x) { os << "["; for (int i = 0; i < size; ++i) { if (i != 0) os << ","; os << x[i]; } os << "]"; return os; } template std::ostream &operator<<(std::ostream &os, realpseudovec const &x) { os << "["; for (int i = 0; i < size; ++i) { if (i != 0) os << ","; os << x[i]; } os << "]"; return os; } #endif } // namespace vecmathlib #endif // #ifndef VEC_PSEUDO_H