// -*-C++-*- #ifndef VEC_BUILTIN_H #define VEC_BUILTIN_H #include "floatprops.h" #include "floatbuiltins.h" #include "mathfuncs.h" #include "vec_base.h" #include #include #include #ifndef VML_NO_IOSTREAM # include #endif #include namespace vecmathlib { template struct boolbuiltinvec; template struct intbuiltinvec; template struct realbuiltinvec; template struct boolbuiltinvec: floatprops { typedef typename floatprops::int_t int_t; typedef typename floatprops::uint_t uint_t; typedef typename floatprops::real_t real_t; static const int size = N; typedef bool scalar_t; typedef int_t bvector_t __attribute__((__ext_vector_type__(N))); static const int alignment = sizeof(bvector_t); static_assert(size * sizeof(real_t) == sizeof(bvector_t), "vector size is wrong"); private: // true is -1, false is 0 static int_t from_bool(bool a) { return -uint_t(a); } static bool to_bool(int_t a) { return a; } public: typedef boolbuiltinvec boolvec_t; typedef intbuiltinvec intvec_t; typedef realbuiltinvec realvec_t; // Short names for type casts typedef real_t R; typedef int_t I; typedef uint_t U; typedef realvec_t RV; typedef intvec_t IV; typedef boolvec_t BV; typedef floatprops FP; typedef mathfuncs MF; bvector_t v; boolbuiltinvec() {} // Can't have a non-trivial copy constructor; if so, objects won't // be passed in registers // boolbuiltinvec(const boolbuiltinvec& x): v(x.v) {} // boolbuiltinvec& operator=(const boolbuiltinvec& x) { return v=x.v, *this; } // Can't have a constructor from bvector_t, since this would // conflict with the constructor from bool // boolbuiltinvec(bvector_t x): v(x) {} static boolvec_t mkvec(bvector_t x) { boolvec_t res; res.v=x; return res; } boolbuiltinvec(bool a): v(from_bool(a)) {} boolbuiltinvec(const bool* as) { for (int d=0; d struct intbuiltinvec: floatprops { typedef typename floatprops::int_t int_t; typedef typename floatprops::uint_t uint_t; typedef typename floatprops::real_t real_t; static const int size = N; typedef int_t scalar_t; typedef int_t ivector_t __attribute__((__ext_vector_type__(N))); typedef uint_t uvector_t __attribute__((__ext_vector_type__(N))); static const int alignment = sizeof(ivector_t); static_assert(size * sizeof(real_t) == sizeof(ivector_t), "vector size is wrong"); static_assert(size * sizeof(real_t) == sizeof(uvector_t), "vector size is wrong"); typedef boolbuiltinvec boolvec_t; typedef intbuiltinvec intvec_t; typedef realbuiltinvec realvec_t; // Short names for type casts typedef real_t R; typedef int_t I; typedef uint_t U; typedef realvec_t RV; typedef intvec_t IV; typedef boolvec_t BV; typedef floatprops FP; typedef mathfuncs MF; ivector_t v; intbuiltinvec() {} // Can't have a non-trivial copy constructor; if so, objects won't // be passed in registers // intbuiltinvec(const intbuiltinvec& x): v(x.v) {} // intbuiltinvec& operator=(const intbuiltinvec& x) { return v=x.v, *this; } // Can't have a constructor from ivector_t, since this would // conflict with the constructor from int_t // intbuiltinvec(ivector_t x): v(x) {} static intvec_t mkvec(ivector_t x) { intvec_t res; res.v=x; return res; } intbuiltinvec(int_t a): v(a) {} intbuiltinvec(const int_t* as) { std::memcpy(&v, as, sizeof v); } static intvec_t iota() { intvec_t res; for (int d=0; d> U(n))); } intvec_t rotate(int_t n) const { return MF::vml_rotate(*this, n); } intvec_t operator>>(int_t n) const { return mkvec(v >> n); } intvec_t operator<<(int_t n) const { return mkvec(v << n); } intvec_t& operator>>=(int_t n) { return *this=*this>>n; } intvec_t& operator<<=(int_t n) { return *this=*this<>uvector_t(n.v))); } intvec_t rotate(intvec_t n) const { return MF::vml_rotate(*this, n); } intvec_t operator>>(intvec_t n) const { return mkvec(v >> n.v); } intvec_t operator<<(intvec_t n) const { return mkvec(v << n.v); } intvec_t& operator>>=(intvec_t n) { return *this=*this>>n; } intvec_t& operator<<=(intvec_t n) { return *this=*this<(const intvec_t& x) const { return boolvec_t::mkvec(v > x.v); } boolvec_t operator>=(const intvec_t& x) const { return boolvec_t::mkvec(v >= x.v); } intvec_t abs() const { intvec_t res; for (int d=0; d struct realbuiltinvec: floatprops { typedef typename floatprops::int_t int_t; typedef typename floatprops::uint_t uint_t; typedef typename floatprops::real_t real_t; static const int size = N; typedef real_t scalar_t; typedef real_t vector_t __attribute__((__ext_vector_type__(N))); static const int alignment = sizeof(vector_t); static_assert(size * sizeof(real_t) == sizeof(vector_t), "vector size is wrong"); #ifndef VML_NO_IOSTREAM static const char* name() { static std::string name_; if (name_.empty()) { std::stringstream buf; buf << ""; name_ = buf.str(); } return name_.c_str(); } #endif void barrier() { volatile vector_t x __attribute__((__unused__)) = v; } typedef boolbuiltinvec boolvec_t; typedef intbuiltinvec intvec_t; typedef realbuiltinvec realvec_t; private: boolvec_t mapb(bool f(real_t)) const { boolvec_t res; for (int d=0; dset_elt(d, ix); } return res; } realvec_t map(real_t f(real_t, real_t), realvec_t x) const { realvec_t res; for (int d=0; d FP; typedef mathfuncs MF; vector_t v; realbuiltinvec() {} // Can't have a non-trivial copy constructor; if so, objects won't // be passed in registers // realbuiltinvec(const realbuiltinvec& x): v(x.v) {} // realbuiltinvec& operator=(const realbuiltinvec& x) { return v=x.v, *this; } // Can't have a constructor from vector_t, since this would // conflict with the constructor from real_t // realbuiltinvec(vector_t x): v(x) {} static realvec_t mkvec(vector_t x) { realvec_t res; res.v=x; return res; } realbuiltinvec(real_t a): v(a) {} realbuiltinvec(const real_t* as) { std::memcpy(&v, as, sizeof v); } real_t operator[](int n) const { return v[n]; } realvec_t& set_elt(int n, real_t a) { return v[n]=a, *this; } typedef vecmathlib::mask_t mask_t; static realvec_t loada(const real_t* p) { VML_ASSERT(intptr_t(p) % alignment == 0); #if __has_builtin(__builtin_assume_aligned) p = (const real_t*)__builtin_assume_aligned(p, sizeof(realvec_t)); #endif return mkvec(*(const vector_t*)p); } static realvec_t loadu(const real_t* p) { // return mkvec(*(const vector_t*)p); realvec_t res; for (int d=0; d(const realvec_t& x) const { return boolvec_t::mkvec(v > x.v); } boolvec_t operator>=(const realvec_t& x) const { return boolvec_t::mkvec(v >= x.v); } realvec_t acos() const { return map(builtin_acos); } realvec_t acosh() const { return map(builtin_acosh); } realvec_t asin() const { return map(builtin_asin); } realvec_t asinh() const { return map(builtin_asinh); } realvec_t atan() const { return map(builtin_atan); } realvec_t atan2(realvec_t y) const { return map(builtin_atan2, y); } realvec_t atanh() const { return map(builtin_atanh); } realvec_t cbrt() const { return map(builtin_cbrt); } realvec_t ceil() const { return map(builtin_ceil); } realvec_t copysign(realvec_t y) const { return map(builtin_copysign, y); } realvec_t cos() const { return map(builtin_cos); } realvec_t cosh() const { return map(builtin_cosh); } realvec_t exp() const { return map(builtin_exp); } realvec_t exp10() const { return MF::vml_exp10(*this); } realvec_t exp2() const { return map(builtin_exp2); } realvec_t expm1() const { return map(builtin_expm1); } realvec_t fabs() const { return map(builtin_fabs); } realvec_t fdim(realvec_t y) const { return map(builtin_fdim, y); } realvec_t floor() const { return map(builtin_floor); } realvec_t fma(realvec_t y, realvec_t z) const { return map(builtin_fma, y, z); } realvec_t fmax(realvec_t y) const { return map(builtin_fmax, y); } realvec_t fmin(realvec_t y) const { return map(builtin_fmin, y); } realvec_t fmod(realvec_t y) const { return map(builtin_fmod, y); } realvec_t frexp(intvec_t* r) const { realvec_t res; intvec_t exp; for (int d=0; d::max(); if (__builtin_isnan(val)) jexp = std::numeric_limits::min(); exp.set_elt(d, jexp); } *r = exp; return res; } realvec_t hypot(realvec_t y) const { return map(builtin_hypot, y); } intvec_t ilogb() const { intvec_t res; for (int d=0; d::min(); if (__builtin_isinf(val)) jexp = std::numeric_limits::max(); if (__builtin_isnan(val)) jexp = std::numeric_limits::min(); res.set_elt(d, jexp); } return res; } boolvec_t isfinite() const { boolvec_t res; for (int d=0; d inline typename boolbuiltinvec::intvec_t boolbuiltinvec::as_int() const { intvec_t res; std::memcpy(&res.v, &v, sizeof res.v); return res; } template inline typename boolbuiltinvec::intvec_t boolbuiltinvec::convert_int() const { return - as_int(); } template inline typename boolbuiltinvec::boolvec_t boolbuiltinvec::ifthen(boolvec_t x, boolvec_t y) const { // return v ? x.v : y.v; boolvec_t res; for (int d=0; d inline typename boolbuiltinvec::intvec_t boolbuiltinvec::ifthen(intvec_t x, intvec_t y) const { // return v ? x.v : y.v; intvec_t res; for (int d=0; d inline typename boolbuiltinvec::realvec_t boolbuiltinvec::ifthen(realvec_t x, realvec_t y) const { // return v ? x.v : y.v; realvec_t res; for (int d=0; d inline typename intbuiltinvec::realvec_t intbuiltinvec::as_float() const { realvec_t res; std::memcpy(&res.v, &v, sizeof res.v); return res; } template inline typename intbuiltinvec::realvec_t intbuiltinvec::convert_float() const { realvec_t res; for (int d=0; d inline intbuiltinvec as_int(boolbuiltinvec x) { return x.as_int(); } template inline intbuiltinvec convert_int(boolbuiltinvec x) { return x.convert_int(); } template inline bool all(boolbuiltinvec x) { return x.all(); } template inline bool any(boolbuiltinvec x) { return x.any(); } template inline boolbuiltinvec ifthen(boolbuiltinvec c, boolbuiltinvec x, boolbuiltinvec y) { return c.ifthen(x, y); } template inline intbuiltinvec ifthen(boolbuiltinvec c, intbuiltinvec x, intbuiltinvec y) { return c.ifthen(x, y); } template inline realbuiltinvec ifthen(boolbuiltinvec c, realbuiltinvec x, realbuiltinvec y) { return c.ifthen(x, y); } // intbuiltinvec wrappers template inline intbuiltinvec abs(intbuiltinvec x) { return x.abs(); } template inline boolbuiltinvec as_bool(intbuiltinvec x) { return x.as_bool(); } template inline realbuiltinvec as_float(intbuiltinvec x) { return x.as_float(); } template inline intbuiltinvec bitifthen(intbuiltinvec x, intbuiltinvec y, intbuiltinvec z) { return x.bitifthen(y, z); } template inline intbuiltinvec clz(intbuiltinvec x) { return x.clz(); } template inline boolbuiltinvec convert_bool(intbuiltinvec x) { return x.convert_bool(); } template inline realbuiltinvec convert_float(intbuiltinvec x) { return x.convert_float(); } template inline boolbuiltinvec isignbit(intbuiltinvec x) { return x.isignbit(); } template inline intbuiltinvec lsr(intbuiltinvec x, typename intbuiltinvec::int_t n) { return x.lsr(n); } template inline intbuiltinvec lsr(intbuiltinvec x, intbuiltinvec n) { return x.lsr(n); } template inline intbuiltinvec max(intbuiltinvec x, intbuiltinvec y) { return x.max(y); } template inline intbuiltinvec min(intbuiltinvec x, intbuiltinvec y) { return x.min(y); } template inline intbuiltinvec popcount(intbuiltinvec x) { return x.popcount(); } template inline intbuiltinvec rotate(intbuiltinvec x, typename intbuiltinvec::int_t n) { return x.rotate(n); } template inline intbuiltinvec rotate(intbuiltinvec x, intbuiltinvec n) { return x.rotate(n); } // realbuiltinvec wrappers template inline realbuiltinvec loada(real_t const* p, realbuiltinvec x, typename realbuiltinvec::mask_t const& m) { return x.loada(p, m); } template inline realbuiltinvec loadu(real_t const* p, realbuiltinvec x, typename realbuiltinvec::mask_t const& m) { return x.loadu(p, m); } template inline realbuiltinvec loadu(real_t const* p, size_t ioff, realbuiltinvec x, typename realbuiltinvec::mask_t const& m) { return x.loadu(p, ioff, m); } template inline void storea(realbuiltinvec x, real_t* p) { return x.storea(p); } template inline void storeu(realbuiltinvec x, real_t* p) { return x.storeu(p); } template inline void storeu(realbuiltinvec x, real_t* p, size_t ioff) { return x.storeu(p, ioff); } template inline void storea(realbuiltinvec x, real_t* p, typename realbuiltinvec::mask_t const& m) { return x.storea(p, m); } template inline void storeu(realbuiltinvec x, real_t* p, typename realbuiltinvec::mask_t const& m) { return x.storeu(p, m); } template inline void storeu(realbuiltinvec x, real_t* p, size_t ioff, typename realbuiltinvec::mask_t const& m) { return x.storeu(p, ioff, m); } template inline intbuiltinvec as_int(realbuiltinvec x) { return x.as_int(); } template inline intbuiltinvec convert_int(realbuiltinvec x) { return x.convert_int(); } template inline real_t maxval(realbuiltinvec x) { return x.maxval(); } template inline real_t minval(realbuiltinvec x) { return x.minval(); } template inline real_t prod(realbuiltinvec x) { return x.prod(); } template inline real_t sum(realbuiltinvec x) { return x.sum(); } template inline realbuiltinvec acos(realbuiltinvec x) { return x.acos(); } template inline realbuiltinvec acosh(realbuiltinvec x) { return x.acosh(); } template inline realbuiltinvec asin(realbuiltinvec x) { return x.asin(); } template inline realbuiltinvec asinh(realbuiltinvec x) { return x.asinh(); } template inline realbuiltinvec atan(realbuiltinvec x) { return x.atan(); } template inline realbuiltinvec atan2(realbuiltinvec x, realbuiltinvec y) { return x.atan2(y); } template inline realbuiltinvec atanh(realbuiltinvec x) { return x.atanh(); } template inline realbuiltinvec cbrt(realbuiltinvec x) { return x.cbrt(); } template inline realbuiltinvec ceil(realbuiltinvec x) { return x.ceil(); } template inline realbuiltinvec copysign(realbuiltinvec x, realbuiltinvec y) { return x.copysign(y); } template inline realbuiltinvec cos(realbuiltinvec x) { return x.cos(); } template inline realbuiltinvec cosh(realbuiltinvec x) { return x.cosh(); } template inline realbuiltinvec exp(realbuiltinvec x) { return x.exp(); } template inline realbuiltinvec exp10(realbuiltinvec x) { return x.exp10(); } template inline realbuiltinvec exp2(realbuiltinvec x) { return x.exp2(); } template inline realbuiltinvec expm1(realbuiltinvec x) { return x.expm1(); } template inline realbuiltinvec fabs(realbuiltinvec x) { return x.fabs(); } template inline realbuiltinvec floor(realbuiltinvec x) { return x.floor(); } template inline realbuiltinvec fdim(realbuiltinvec x, realbuiltinvec y) { return x.fdim(y); } template inline realbuiltinvec fma(realbuiltinvec x, realbuiltinvec y, realbuiltinvec z) { return x.fma(y, z); } template inline realbuiltinvec fmax(realbuiltinvec x, realbuiltinvec y) { return x.fmax(y); } template inline realbuiltinvec fmin(realbuiltinvec x, realbuiltinvec y) { return x.fmin(y); } template inline realbuiltinvec fmod(realbuiltinvec x, realbuiltinvec y) { return x.fmod(y); } template inline realbuiltinvec frexp(realbuiltinvec x, intbuiltinvec* r) { return x.frexp(r); } template inline realbuiltinvec hypot(realbuiltinvec x, realbuiltinvec y) { return x.hypot(y); } template inline intbuiltinvec ilogb(realbuiltinvec x) { return x.ilogb(); } template inline boolbuiltinvec isfinite(realbuiltinvec x) { return x.isfinite(); } template inline boolbuiltinvec isinf(realbuiltinvec x) { return x.isinf(); } template inline boolbuiltinvec isnan(realbuiltinvec x) { return x.isnan(); } template inline boolbuiltinvec isnormal(realbuiltinvec x) { return x.isnormal(); } template inline realbuiltinvec ldexp(realbuiltinvec x, typename intbuiltinvec::int_t n) { return x.ldexp(n); } template inline realbuiltinvec ldexp(realbuiltinvec x, intbuiltinvec n) { return x.ldexp(n); } template inline realbuiltinvec log(realbuiltinvec x) { return x.log(); } template inline realbuiltinvec log10(realbuiltinvec x) { return x.log10(); } template inline realbuiltinvec log1p(realbuiltinvec x) { return x.log1p(); } template inline realbuiltinvec log2(realbuiltinvec x) { return x.log2(); } template inline intbuiltinvec lrint(realbuiltinvec x) { return x.lrint(); } template inline realbuiltinvec mad(realbuiltinvec x, realbuiltinvec y, realbuiltinvec z) { return x.mad(y, z); } template inline realbuiltinvec nextafter(realbuiltinvec x, realbuiltinvec y) { return x.nextafter(y); } template inline realbuiltinvec pow(realbuiltinvec x, realbuiltinvec y) { return x.pow(y); } template inline realbuiltinvec rcp(realbuiltinvec x) { return x.rcp(); } template inline realbuiltinvec remainder(realbuiltinvec x, realbuiltinvec y) { return x.remainder(y); } template inline realbuiltinvec rint(realbuiltinvec x) { return x.rint(); } template inline realbuiltinvec round(realbuiltinvec x) { return x.round(); } template inline realbuiltinvec rsqrt(realbuiltinvec x) { return x.rsqrt(); } template inline boolbuiltinvec signbit(realbuiltinvec x) { return x.signbit(); } template inline realbuiltinvec sin(realbuiltinvec x) { return x.sin(); } template inline realbuiltinvec sinh(realbuiltinvec x) { return x.sinh(); } template inline realbuiltinvec sqrt(realbuiltinvec x) { return x.sqrt(); } template inline realbuiltinvec tan(realbuiltinvec x) { return x.tan(); } template inline realbuiltinvec tanh(realbuiltinvec x) { return x.tanh(); } template inline realbuiltinvec trunc(realbuiltinvec x) { return x.trunc(); } #ifndef VML_NO_IOSTREAM template std::ostream& operator<<(std::ostream& os, boolbuiltinvec const& x) { os << "["; for (int i=0; i std::ostream& operator<<(std::ostream& os, intbuiltinvec const& x) { os << "["; for (int i=0; i std::ostream& operator<<(std::ostream& os, realbuiltinvec const& x) { os << "["; for (int i=0; i