// -*-C++-*- #ifndef VEC_BUILTIN_H #define VEC_BUILTIN_H #include "floatprops.h" #include "mathfuncs.h" #include "vec_base.h" #include #include #include #include #include namespace vecmathlib { template struct boolbuiltinvec; template struct intbuiltinvec; template struct realbuiltinvec; template struct boolbuiltinvec: floatprops { typedef typename floatprops::int_t int_t; typedef typename floatprops::uint_t uint_t; typedef typename floatprops::real_t real_t; static int const size = N; typedef bool scalar_t; // true values are -1, false values are 0 #ifdef __clang__ typedef int_t bvector_t __attribute__((__ext_vector_type__(N))); #else typedef int_t bvector_t __attribute__((__vector_size__(N*sizeof(int_t)))); #endif static int const alignment = sizeof(bvector_t); static_assert(size * sizeof(real_t) == sizeof(bvector_t), "vector size is wrong"); typedef boolbuiltinvec boolvec_t; typedef intbuiltinvec intvec_t; typedef realbuiltinvec realvec_t; // Short names for type casts typedef real_t R; typedef int_t I; typedef uint_t U; typedef realvec_t RV; typedef intvec_t IV; typedef boolvec_t BV; typedef floatprops FP; typedef mathfuncs MF; static boolvec_t wrap(bvector_t x) { boolvec_t res; res.v = x; return res; } bvector_t v; boolbuiltinvec() {} // Can't have a non-trivial copy constructor; if so, objects won't // be passed in registers // boolbuiltinvec(boolbuiltinvec const& x): v(x.v) {} // boolbuiltinvec& operator=(boolbuiltinvec const& x) { return v=x.v, *this; } boolbuiltinvec(bool a): v(-(int_t)a) {} boolbuiltinvec(bool const* as) { for (int d=0; d struct intbuiltinvec: floatprops { typedef typename floatprops::int_t int_t; typedef typename floatprops::uint_t uint_t; typedef typename floatprops::real_t real_t; static int const size = N; typedef int_t scalar_t; #ifdef __clang__ typedef int_t ivector_t __attribute__((__ext_vector_type__(N))); typedef uint_t uvector_t __attribute__((__ext_vector_type__(N))); #else typedef int_t ivector_t __attribute__((__vector_size__(N*sizeof(int_t)))); typedef uint_t uvector_t __attribute__((__vector_size__(N*sizeof(uint_t)))); #endif static int const alignment = sizeof(ivector_t); static_assert(size * sizeof(real_t) == sizeof(ivector_t), "vector size is wrong"); static_assert(size * sizeof(real_t) == sizeof(uvector_t), "vector size is wrong"); typedef boolbuiltinvec boolvec_t; typedef intbuiltinvec intvec_t; typedef realbuiltinvec realvec_t; // Short names for type casts typedef real_t R; typedef int_t I; typedef uint_t U; typedef realvec_t RV; typedef intvec_t IV; typedef boolvec_t BV; typedef floatprops FP; typedef mathfuncs MF; static intvec_t wrap(ivector_t x) { intvec_t res; res.v = x; return res; } ivector_t v; intbuiltinvec() {} // Can't have a non-trivial copy constructor; if so, objects won't // be passed in registers // intbuiltinvec(intbuiltinvec const& x): v(x.v) {} // intbuiltinvec& operator=(intbuiltinvec const& x) { return v=x.v, *this; } intbuiltinvec(int_t a): v(ivector_t {a}) {} intbuiltinvec(int_t const* as) { std::memcpy(&v, as, sizeof v); } static intvec_t iota() { intvec_t res; for (int d=0; d>U(n))); } intvec_t operator>>(int_t n) const { return wrap(v>>n); } intvec_t operator<<(int_t n) const { return wrap(v<>=(int_t n) { return *this=*this>>n; } intvec_t& operator<<=(int_t n) { return *this=*this<>uvector_t(n.v))); } intvec_t operator>>(intvec_t n) const { return wrap(v>>n.v); } intvec_t operator<<(intvec_t n) const { return wrap(v<>=(intvec_t n) { return *this=*this>>n; } intvec_t& operator<<=(intvec_t n) { return *this=*this<(intvec_t const& x) const { return boolvec_t::wrap((ivector_t)(v>x.v)); } boolvec_t operator>=(intvec_t const& x) const { return boolvec_t::wrap((ivector_t)(v>=x.v)); } }; template struct realbuiltinvec: floatprops { typedef typename floatprops::int_t int_t; typedef typename floatprops::uint_t uint_t; typedef typename floatprops::real_t real_t; static int const size = N; typedef real_t scalar_t; #ifdef __clang__ typedef real_t vector_t __attribute__((__ext_vector_type__(N))); #else typedef real_t vector_t __attribute__((__vector_size__(N*sizeof(real_t)))); #endif static int const alignment = sizeof(vector_t); static_assert(size * sizeof(real_t) == sizeof(vector_t), "vector size is wrong"); static char const* name() { static std::string name_; if (name_.empty()) { std::stringstream buf; buf << ""; name_ = buf.str(); } return name_.c_str(); } void barrier() { volatile vector_t x __attribute__((__unused__)) = v; } typedef boolbuiltinvec boolvec_t; typedef intbuiltinvec intvec_t; typedef realbuiltinvec realvec_t; // Short names for type casts typedef real_t R; typedef int_t I; typedef uint_t U; typedef realvec_t RV; typedef intvec_t IV; typedef boolvec_t BV; typedef floatprops FP; typedef mathfuncs MF; static realvec_t wrap(vector_t x) { realvec_t res; res.v = x; return res; } vector_t v; realbuiltinvec() {} // Can't have a non-trivial copy constructor; if so, objects won't // be passed in registers realbuiltinvec(realbuiltinvec const& x): v(x.v) {} realbuiltinvec& operator=(realbuiltinvec const& x) { return v=x.v, *this; } realbuiltinvec(real_t a): v(vector_t {a}) {} realbuiltinvec(real_t const* as) { std::memcpy(&v, as, sizeof v); } #ifdef __clang__ real_t operator[](int n) const { return v[n]; } realvec_t& set_elt(int n, real_t a) { return v[n]=a, *this; } #else real_t operator[](int n) const { return ((real_t const*)&v)[n]; } realvec_t& set_elt(int n, real_t a) { return ((real_t*)&v)[n]=a, *this; } #endif typedef vecmathlib::mask_t mask_t; static realvec_t loada(real_t const* p) { VML_ASSERT(intptr_t(p) % alignment == 0); #ifdef __clang__ #else p = (real_t const*)__builtin_assume_aligned(p, sizeof(realvec_t)); #endif return wrap(*(vector_t const*)p); } static realvec_t loadu(real_t const* p) { // realvec_t res; // for (int d=0; d(realvec_t const& x) const { return boolvec_t::wrap((typename intvec_t::ivector_t)(v>x.v)); } boolvec_t operator>=(realvec_t const& x) const { return boolvec_t::wrap((typename intvec_t::ivector_t)(v>=x.v)); } realvec_t acos() const { return MF::vml_acos(*this); } realvec_t acosh() const { return MF::vml_acosh(*this); } realvec_t asin() const { return MF::vml_asin(*this); } realvec_t asinh() const { return MF::vml_asinh(*this); } realvec_t atan() const { return MF::vml_atan(*this); } realvec_t atan2(realvec_t y) const { return MF::vml_atan(*this, y); } realvec_t atanh() const { return MF::vml_atanh(*this); } realvec_t cbrt() const { return MF::vml_cbrt(*this); } realvec_t ceil() const { return MF::vml_ceil(*this); } realvec_t copysign(realvec_t y) const { return MF::vml_copysign(*this, y); } realvec_t cos() const { return MF::vml_cos(*this); } realvec_t cosh() const { return MF::vml_cosh(*this); } realvec_t exp() const { return MF::vml_exp(*this); } realvec_t exp10() const { return MF::vml_exp10(*this); } realvec_t exp2() const { return MF::vml_exp2(*this); } realvec_t expm1() const { return MF::vml_expm1(*this); } realvec_t fabs() const { return MF::vml_fabs(*this); } realvec_t fdim(realvec_t y) const { return MF::vml_fdim(*this, y); } realvec_t floor() const { return MF::vml_floor(*this); } realvec_t fma(realvec_t y, realvec_t z) const { return MF::vml_fma(*this, y, z); } realvec_t fmax(realvec_t y) const { return MF::vml_fmax(*this, y); } realvec_t fmin(realvec_t y) const { return MF::vml_fmin(*this, y); } realvec_t fmod(realvec_t y) const { return MF::vml_fmod(*this, y); } realvec frexp(intvec_t* r) const { return MF::vml_frexp(*this, r); } realvec_t hypot(realvec_t y) const { return MF::vml_hypot(*this, y); } intvec_t ilogb() const { return MF::vml_ilogb(*this); } boolvec_t isfinite() const { return MF::vml_isfinite(*this); } boolvec_t isinf() const { return MF::vml_isinf(*this); } boolvec_t isnan() const { return MF::vml_isnan(*this); } boolvec_t isnormal() const { return MF::vml_isnormal(*this); } realvec_t ldexp(int_t n) const { return MF::vml_ldexp(*this, n); } realvec_t ldexp(intvec_t n) const { return MF::vml_ldexp(*this, n); } realvec_t log() const { return MF::vml_log(*this); } realvec_t log10() const { return MF::vml_log10(*this); } realvec_t log1p() const { return MF::vml_log1p(*this); } realvec_t log2() const { return MF::vml_log2(*this); } realvec_t pow(realvec_t y) const { return MF::vml_pow(*this, y); } realvec_t rcp() const { return MF::vml_rcp(*this); } realvec_t remainder(realvec_t y) const { return MF::vml_remainder(*this, y); } realvec_t rint() const { return MF::vml_rint(*this); } realvec_t round() const { return MF::vml_round(*this); } realvec_t rsqrt() const { return MF::vml_rsqrt(*this); } boolvec_t signbit() const { return MF::vml_signbit(*this); } realvec_t sin() const { return MF::vml_sin(*this); } realvec_t sinh() const { return MF::vml_sinh(*this); } realvec_t sqrt() const { return MF::vml_sqrt(*this); } realvec_t tan() const { return MF::vml_tan(*this); } realvec_t tanh() const { return MF::vml_tanh(*this); } realvec_t trunc() const { return MF::vml_trunc(*this); } }; // boolbuiltinvec definitions template inline auto boolbuiltinvec::as_int() const -> intvec_t { intvec_t res; std::memcpy(&res.v, &v, sizeof v); return res; } template inline auto boolbuiltinvec::convert_int() const -> intvec_t { return intvec_t::wrap(-v); } template inline auto boolbuiltinvec::ifthen(intvec_t x, intvec_t y) const -> intvec_t { #ifdef __clang__ intvec_t mask = as_int(); return (mask & x) | (~mask & y); #else return intvec_t::wrap(v ? x.v : y.v); #endif } template inline auto boolbuiltinvec::ifthen(realvec_t x, realvec_t y) const -> realvec_t { #ifdef __clang__ intvec_t mask = as_int(); return as_float((mask & x.as_int()) | (~mask & y.as_int())); #else return realvec_t::wrap(v ? x.v : y.v); #endif } // intbuiltinvec definitions template inline auto intbuiltinvec::as_float() const -> realvec_t { realvec_t res; std::memcpy(&res.v, &v, sizeof v); return res; } template inline auto intbuiltinvec::convert_float() const -> realvec_t { return realvec_t::wrap((typename realvec_t::vector_t)v); } // Wrappers // boolbuiltinvec wrappers template inline intbuiltinvec as_int(boolbuiltinvec x) { return x.as_int(); } template inline intbuiltinvec convert_int(boolbuiltinvec x) { return x.convert_int(); } template inline bool all(boolbuiltinvec x) { return x.all(); } template inline bool any(boolbuiltinvec x) { return x.any(); } template inline intbuiltinvec ifthen(boolbuiltinvec c, intbuiltinvec x, intbuiltinvec y) { return c.ifthen(x, y); } template inline realbuiltinvec ifthen(boolbuiltinvec c, realbuiltinvec x, realbuiltinvec y) { return c.ifthen(x, y); } // intbuiltinvec wrappers template inline boolbuiltinvec as_bool(intbuiltinvec x) { return x.as_bool(); } template inline boolbuiltinvec convert_bool(intbuiltinvec x) { return x.convert_bool(); } template inline realbuiltinvec as_float(intbuiltinvec x) { return x.as_float(); } template inline realbuiltinvec convert_float(intbuiltinvec x) { return x.convert_float(); } template inline intbuiltinvec lsr(intbuiltinvec x, typename intbuiltinvec::int_t n) { return x.lsr(n); } template inline intbuiltinvec lsr(intbuiltinvec x, intbuiltinvec n) { return x.lsr(n); } // realbuiltinvec wrappers template inline realbuiltinvec loada(real_t const* p, realbuiltinvec x, typename realbuiltinvec::mask_t const& m) { return x.loada(p, m); } template inline realbuiltinvec loadu(real_t const* p, realbuiltinvec x, typename realbuiltinvec::mask_t const& m) { return x.loadu(p, m); } template inline realbuiltinvec loadu(real_t const* p, size_t ioff, realbuiltinvec x, typename realbuiltinvec::mask_t const& m) { return x.loadu(p, ioff, m); } template inline void storea(realbuiltinvec x, real_t* p) { x.storea(p); } template inline void storeu(realbuiltinvec x, real_t* p) { x.storeu(p); } template inline void storeu(realbuiltinvec x, real_t* p, size_t ioff) { x.storeu(p, ioff); } template inline void storea(realbuiltinvec x, real_t* p, typename realbuiltinvec::mask_t const& m) { x.storea(p, m); } template inline void storeu(realbuiltinvec x, real_t* p, typename realbuiltinvec::mask_t const& m) { x.storeu(p, m); } template inline void storeu(realbuiltinvec x, real_t* p, size_t ioff, typename realbuiltinvec::mask_t const& m) { x.storeu(p, ioff, m); } template inline intbuiltinvec as_int(realbuiltinvec x) { return x.as_int(); } template inline intbuiltinvec convert_int(realbuiltinvec x) { return x.convert_int(); } template inline auto prod(realbuiltinvec x) -> real_t { return x.prod(); } template inline auto sum(realbuiltinvec x) -> real_t { return x.sum(); } template inline realbuiltinvec acos(realbuiltinvec x) { return x.acos(); } template inline realbuiltinvec acosh(realbuiltinvec x) { return x.acosh(); } template inline realbuiltinvec asin(realbuiltinvec x) { return x.asin(); } template inline realbuiltinvec asinh(realbuiltinvec x) { return x.asinh(); } template inline realbuiltinvec atan(realbuiltinvec x) { return x.atan(); } template inline realbuiltinvec atan2(realbuiltinvec x, realbuiltinvec y) { return x.atan2(y); } template inline realbuiltinvec atanh(realbuiltinvec x) { return x.atanh(); } template inline realbuiltinvec cbrt(realbuiltinvec x) { return x.cbrt(); } template inline realbuiltinvec ceil(realbuiltinvec x) { return x.ceil(); } template inline realbuiltinvec copysign(realbuiltinvec x, realbuiltinvec y) { return x.copysign(y); } template inline realbuiltinvec cos(realbuiltinvec x) { return x.cos(); } template inline realbuiltinvec cosh(realbuiltinvec x) { return x.cosh(); } template inline realbuiltinvec exp(realbuiltinvec x) { return x.exp(); } template inline realbuiltinvec exp10(realbuiltinvec x) { return x.exp10(); } template inline realbuiltinvec exp2(realbuiltinvec x) { return x.exp2(); } template inline realbuiltinvec expm1(realbuiltinvec x) { return x.expm1(); } template inline realbuiltinvec fabs(realbuiltinvec x) { return x.fabs(); } template inline realbuiltinvec floor(realbuiltinvec x) { return x.floor(); } template inline realbuiltinvec fdim(realbuiltinvec x, realbuiltinvec y) { return x.fdim(y); } template inline realbuiltinvec fma(realbuiltinvec x, realbuiltinvec y, realbuiltinvec z) { return x.fma(y, z); } template inline realbuiltinvec fmax(realbuiltinvec x, realbuiltinvec y) { return x.fmax(y); } template inline realbuiltinvec fmin(realbuiltinvec x, realbuiltinvec y) { return x.fmin(y); } template inline realbuiltinvec fmod(realbuiltinvec x, realbuiltinvec y) { return x.fmod(y); } template inline realbuiltinvec hypot(realbuiltinvec x, realbuiltinvec y) { return x.hypot(y); } template inline intbuiltinvec ilogb(realbuiltinvec x) { return x.ilogb(); } template inline boolbuiltinvec isfinite(realbuiltinvec x) { return x.isfinite(); } template inline boolbuiltinvec isinf(realbuiltinvec x) { return x.isinf(); } template inline boolbuiltinvec isnan(realbuiltinvec x) { return x.isnan(); } template inline boolbuiltinvec isnormal(realbuiltinvec x) { return x.isnormal(); } template inline realbuiltinvec ldexp(realbuiltinvec x, typename intbuiltinvec::int_t n) { return x.ldexp(n); } template inline realbuiltinvec ldexp(realbuiltinvec x, intbuiltinvec n) { return x.ldexp(n); } template inline realbuiltinvec log(realbuiltinvec x) { return x.log(); } template inline realbuiltinvec log10(realbuiltinvec x) { return x.log10(); } template inline realbuiltinvec log1p(realbuiltinvec x) { return x.log1p(); } template inline realbuiltinvec log2(realbuiltinvec x) { return x.log2(); } template inline realbuiltinvec pow(realbuiltinvec x, realbuiltinvec y) { return x.pow(y); } template inline realbuiltinvec rcp(realbuiltinvec x) { return x.rcp(); } template inline realbuiltinvec remainder(realbuiltinvec x, realbuiltinvec y) { return x.remainder(y); } template inline realbuiltinvec rint(realbuiltinvec x) { return x.rint(); } template inline realbuiltinvec round(realbuiltinvec x) { return x.round(); } template inline realbuiltinvec rsqrt(realbuiltinvec x) { return x.rsqrt(); } template inline boolbuiltinvec signbit(realbuiltinvec x) { return x.signbit(); } template inline realbuiltinvec sin(realbuiltinvec x) { return x.sin(); } template inline realbuiltinvec sinh(realbuiltinvec x) { return x.sinh(); } template inline realbuiltinvec sqrt(realbuiltinvec x) { return x.sqrt(); } template inline realbuiltinvec tan(realbuiltinvec x) { return x.tan(); } template inline realbuiltinvec tanh(realbuiltinvec x) { return x.tanh(); } template inline realbuiltinvec trunc(realbuiltinvec x) { return x.trunc(); } template std::ostream& operator<<(std::ostream& os, boolbuiltinvec const& x) { os << "["; for (int i=0; i std::ostream& operator<<(std::ostream& os, intbuiltinvec const& x) { os << "["; for (int i=0; i std::ostream& operator<<(std::ostream& os, realbuiltinvec const& x) { os << "["; for (int i=0; i