diff options
author | Erik Schnetter <schnetter@gmail.com> | 2013-09-09 16:37:33 -0400 |
---|---|---|
committer | Erik Schnetter <schnetter@gmail.com> | 2013-09-09 16:37:33 -0400 |
commit | 01d740e8bba1385cc995c7f96ff1e4668adc08df (patch) | |
tree | c6d2797d8a8be65cc6e1a39bd36f0c3242dbc282 | |
parent | cd23f9f22ce463e78d2a52b37798fbfd75ac9631 (diff) | |
download | vecmathlib-01d740e8bba1385cc995c7f96ff1e4668adc08df.zip vecmathlib-01d740e8bba1385cc995c7f96ff1e4668adc08df.tar.gz |
Provide vector types implemented via (scalar) compiler builtins
-rw-r--r-- | floatbuiltins.h | 316 | ||||
-rw-r--r-- | test.cc | 18 | ||||
-rw-r--r-- | vec_builtin.h | 811 | ||||
-rw-r--r-- | vecmathlib.h | 3 |
4 files changed, 866 insertions, 282 deletions
diff --git a/floatbuiltins.h b/floatbuiltins.h new file mode 100644 index 0000000..68d391a --- /dev/null +++ b/floatbuiltins.h @@ -0,0 +1,316 @@ +// -*-C++-*- + +#ifndef FLOATBUILTINS_H +#define FLOATBUILTINS_H + +#if defined __clang__ + +namespace vecmathlib { + + inline char builtin_abs(char x) { return __builtin_abs(x); } + inline short builtin_abs(short x) { return __builtin_abs(x); } + inline int builtin_abs(int x) { return __builtin_abs(x); } + inline long builtin_abs(long x) { return __builtin_labs(x); } +#if __SIZEOF_LONG_LONG__ + inline long long builtin_abs(long long x) { return __builtin_llabs(x); } +#endif + + inline unsigned char builtin_clz(unsigned char x) { return __builtin_clzs(x) - CHAR_BIT * (sizeof(unsigned short) - sizeof(unsigned char)); } + inline unsigned short builtin_clz(unsigned short x) { return __builtin_clzs(x); } + inline unsigned int builtin_clz(unsigned int x) { return __builtin_clz(x); } + inline unsigned long builtin_clz(unsigned long x) { return __builtin_clzl(x); } +#if __SIZEOF_LONG_LONG__ + inline unsigned long long builtin_clz(unsigned long long x) { return __builtin_clzll(x); } +#endif + + inline unsigned char builtin_popcount(unsigned char x) { return __builtin_popcount(x); } + inline unsigned short builtin_popcount(unsigned short x) { return __builtin_popcount(x); } + inline unsigned int builtin_popcount(unsigned int x) { return __builtin_popcount(x); } + inline unsigned long builtin_popcount(unsigned long x) { return __builtin_popcountl(x); } +#if __SIZEOF_LONG_LONG__ + inline unsigned long long builtin_popcount(unsigned long long x) { return __builtin_popcountll(x); } +#endif + + + + inline float builtin_acos(float x) { return __builtin_acosf(x); } + inline double builtin_acos(double x) { return __builtin_acos(x); } +#if __SIZEOF_LONG_DOUBLE__ + inline long double builtin_acos(long double x) { return __builtin_acosl(x); } +#endif + + inline float builtin_acosh(float x) { return __builtin_acoshf(x); } + inline double builtin_acosh(double x) { return __builtin_acosh(x); } +#if __SIZEOF_LONG_DOUBLE__ + inline long double builtin_acosh(long double x) { return __builtin_acoshl(x); } +#endif + + inline float builtin_asin(float x) { return __builtin_asinf(x); } + inline double builtin_asin(double x) { return __builtin_asin(x); } +#if __SIZEOF_LONG_DOUBLE__ + inline long double builtin_asin(long double x) { return __builtin_asinl(x); } +#endif + + inline float builtin_asinh(float x) { return __builtin_asinhf(x); } + inline double builtin_asinh(double x) { return __builtin_asinh(x); } +#if __SIZEOF_LONG_DOUBLE__ + inline long double builtin_asinh(long double x) { return __builtin_asinhl(x); } +#endif + + inline float builtin_atan(float x) { return __builtin_atanf(x); } + inline double builtin_atan(double x) { return __builtin_atan(x); } +#if __SIZEOF_LONG_DOUBLE__ + inline long double builtin_atan(long double x) { return __builtin_atanl(x); } +#endif + + inline float builtin_atan2(float x, float y) { return __builtin_atan2f(x, y); } + inline double builtin_atan2(double x, double y) { return __builtin_atan2(x, y); } +#if __SIZEOF_LONG_DOUBLE__ + inline long double builtin_atan2(long double x, long double y) { return __builtin_atan2l(x, y); } +#endif + + inline float builtin_atanh(float x) { return __builtin_atanhf(x); } + inline double builtin_atanh(double x) { return __builtin_atanh(x); } +#if __SIZEOF_LONG_DOUBLE__ + inline long double builtin_atanh(long double x) { return __builtin_atanhl(x); } +#endif + + inline float builtin_cbrt(float x) { return __builtin_cbrtf(x); } + inline double builtin_cbrt(double x) { return __builtin_cbrt(x); } +#if __SIZEOF_LONG_DOUBLE__ + inline long double builtin_cbrt(long double x) { return __builtin_cbrtl(x); } +#endif + + inline float builtin_ceil(float x) { return __builtin_ceilf(x); } + inline double builtin_ceil(double x) { return __builtin_ceil(x); } +#if __SIZEOF_LONG_DOUBLE__ + inline long double builtin_ceil(long double x) { return __builtin_ceill(x); } +#endif + + inline float builtin_copysign(float x, float y) { return __builtin_copysignf(x, y); } + inline double builtin_copysign(double x, double y) { return __builtin_copysign(x, y); } +#if __SIZEOF_LONG_DOUBLE__ + inline long double builtin_copysign(long double x, long double y) { return __builtin_copysignl(x, y); } +#endif + + inline float builtin_cos(float x) { return __builtin_cosf(x); } + inline double builtin_cos(double x) { return __builtin_cos(x); } +#if __SIZEOF_LONG_DOUBLE__ + inline long double builtin_cos(long double x) { return __builtin_cosl(x); } +#endif + + inline float builtin_cosh(float x) { return __builtin_coshf(x); } + inline double builtin_cosh(double x) { return __builtin_cosh(x); } +#if __SIZEOF_LONG_DOUBLE__ + inline long double builtin_cosh(long double x) { return __builtin_coshl(x); } +#endif + + inline float builtin_exp(float x) { return __builtin_expf(x); } + inline double builtin_exp(double x) { return __builtin_exp(x); } +#if __SIZEOF_LONG_DOUBLE__ + inline long double builtin_exp(long double x) { return __builtin_expl(x); } +#endif + + inline float builtin_exp2(float x) { return __builtin_exp2f(x); } + inline double builtin_exp2(double x) { return __builtin_exp2(x); } +#if __SIZEOF_LONG_DOUBLE__ + inline long double builtin_exp2(long double x) { return __builtin_exp2l(x); } +#endif + + inline float builtin_expm1(float x) { return __builtin_expm1f(x); } + inline double builtin_expm1(double x) { return __builtin_expm1(x); } +#if __SIZEOF_LONG_DOUBLE__ + inline long double builtin_expm1(long double x) { return __builtin_expm1l(x); } +#endif + + inline float builtin_fabs(float x) { return __builtin_fabsf(x); } + inline double builtin_fabs(double x) { return __builtin_fabs(x); } +#if __SIZEOF_LONG_DOUBLE__ + inline long double builtin_fabs(long double x) { return __builtin_fabsl(x); } +#endif + + inline float builtin_fdim(float x, float y) { return __builtin_fdimf(x, y); } + inline double builtin_fdim(double x, double y) { return __builtin_fdim(x, y); } +#if __SIZEOF_LONG_DOUBLE__ + inline long double builtin_fdim(long double x, long double y) { return __builtin_fdiml(x, y); } +#endif + + inline float builtin_floor(float x) { return __builtin_floorf(x); } + inline double builtin_floor(double x) { return __builtin_floor(x); } +#if __SIZEOF_LONG_DOUBLE__ + inline long double builtin_floor(long double x) { return __builtin_floorl(x); } +#endif + + inline float builtin_fma(float x, float y, float z) { return __builtin_fmaf(x, y, z); } + inline double builtin_fma(double x, double y, double z) { return __builtin_fma(x, y, z); } +#if __SIZEOF_LONG_DOUBLE__ + inline long double builtin_fma(long double x, long double y, long double z) { return __builtin_fmal(x, y, z); } +#endif + + inline float builtin_fmax(float x, float y) { return __builtin_fmaxf(x, y); } + inline double builtin_fmax(double x, double y) { return __builtin_fmax(x, y); } +#if __SIZEOF_LONG_DOUBLE__ + inline long double builtin_fmax(long double x, long double y) { return __builtin_fmaxl(x, y); } +#endif + + inline float builtin_fmin(float x, float y) { return __builtin_fminf(x, y); } + inline double builtin_fmin(double x, double y) { return __builtin_fmin(x, y); } +#if __SIZEOF_LONG_DOUBLE__ + inline long double builtin_fmin(long double x, long double y) { return __builtin_fminl(x, y); } +#endif + + inline float builtin_fmod(float x, float y) { return __builtin_fmodf(x, y); } + inline double builtin_fmod(double x, double y) { return __builtin_fmod(x, y); } +#if __SIZEOF_LONG_DOUBLE__ + inline long double builtin_fmod(long double x, long double y) { return __builtin_fmodl(x, y); } +#endif + + inline float builtin_frexp(float x, int* r) { return __builtin_frexpf(x, r); } + inline double builtin_frexp(double x, int* r) { return __builtin_frexp(x, r); } +#if __SIZEOF_LONG_DOUBLE__ + inline long double builtin_frexp(long double x, int* r) { return __builtin_frexpl(x, r); } +#endif + + inline float builtin_hypot(float x, float y) { return __builtin_hypotf(x, y); } + inline double builtin_hypot(double x, double y) { return __builtin_hypot(x, y); } +#if __SIZEOF_LONG_DOUBLE__ + inline long double builtin_hypot(long double x, long double y) { return __builtin_hypotl(x, y); } +#endif + + inline int builtin_ilogb(float x) { return __builtin_ilogbf(x); } + inline int builtin_ilogb(double x) { return __builtin_ilogb(x); } +#if __SIZEOF_LONG_DOUBLE__ + inline int builtin_ilogb(long double x) { return __builtin_ilogbl(x); } +#endif + + inline int builtin_isfinite(float x) { return __builtin_isfinite(x); } + inline int builtin_isfinite(double x) { return __builtin_isfinite(x); } +#if __SIZEOF_LONG_DOUBLE__ + inline int builtin_isfinite(long double x) { return __builtin_isfinite(x); } +#endif + + inline int builtin_isinf(float x) { return __builtin_isinf(x); } + inline int builtin_isinf(double x) { return __builtin_isinf(x); } +#if __SIZEOF_LONG_DOUBLE__ + inline int builtin_isinf(long double x) { return __builtin_isinf(x); } +#endif + + inline int builtin_isnan(float x) { return __builtin_isnan(x); } + inline int builtin_isnan(double x) { return __builtin_isnan(x); } +#if __SIZEOF_LONG_DOUBLE__ + inline int builtin_isnan(long double x) { return __builtin_isnan(x); } +#endif + + inline int builtin_isnormal(float x) { return __builtin_isnormal(x); } + inline int builtin_isnormal(double x) { return __builtin_isnormal(x); } +#if __SIZEOF_LONG_DOUBLE__ + inline int builtin_isnormal(long double x) { return __builtin_isnormal(x); } +#endif + + inline float builtin_ldexp(float x, int y) { return __builtin_ldexpf(x, y); } + inline double builtin_ldexp(double x, int y) { return __builtin_ldexp(x, y); } +#if __SIZEOF_LONG_DOUBLE__ + inline long double builtin_ldexp(long double x, int y) { return __builtin_ldexpl(x, y); } +#endif + + inline float builtin_log(float x) { return __builtin_logf(x); } + inline double builtin_log(double x) { return __builtin_log(x); } +#if __SIZEOF_LONG_DOUBLE__ + inline long double builtin_log(long double x) { return __builtin_logl(x); } +#endif + + inline float builtin_log10(float x) { return __builtin_log10f(x); } + inline double builtin_log10(double x) { return __builtin_log10(x); } +#if __SIZEOF_LONG_DOUBLE__ + inline long double builtin_log10(long double x) { return __builtin_log10l(x); } +#endif + + inline float builtin_log1p(float x) { return __builtin_log1pf(x); } + inline double builtin_log1p(double x) { return __builtin_log1p(x); } +#if __SIZEOF_LONG_DOUBLE__ + inline long double builtin_log1p(long double x) { return __builtin_log1pl(x); } +#endif + + inline float builtin_log2(float x) { return __builtin_log2f(x); } + inline double builtin_log2(double x) { return __builtin_log2(x); } +#if __SIZEOF_LONG_DOUBLE__ + inline long double builtin_log2(long double x) { return __builtin_log2l(x); } +#endif + + inline float builtin_nextafter(float x, float y) { return __builtin_nextafterf(x, y); } + inline double builtin_nextafter(double x, double y) { return __builtin_nextafter(x, y); } +#if __SIZEOF_LONG_DOUBLE__ + inline long double builtin_nextafter(long double x, long double y) { return __builtin_nextafterl(x, y); } +#endif + + inline float builtin_pow(float x, float y) { return __builtin_powf(x, y); } + inline double builtin_pow(double x, double y) { return __builtin_pow(x, y); } +#if __SIZEOF_LONG_DOUBLE__ + inline long double builtin_pow(long double x, long double y) { return __builtin_powl(x, y); } +#endif + + inline float builtin_remainder(float x, float y) { return __builtin_remainderf(x, y); } + inline double builtin_remainder(double x, double y) { return __builtin_remainder(x, y); } +#if __SIZEOF_LONG_DOUBLE__ + inline long double builtin_remainder(long double x, long double y) { return __builtin_remainderl(x, y); } +#endif + + inline float builtin_rint(float x) { return __builtin_rintf(x); } + inline double builtin_rint(double x) { return __builtin_rint(x); } +#if __SIZEOF_LONG_DOUBLE__ + inline long double builtin_rint(long double x) { return __builtin_rintl(x); } +#endif + + inline float builtin_round(float x) { return __builtin_roundf(x); } + inline double builtin_round(double x) { return __builtin_round(x); } +#if __SIZEOF_LONG_DOUBLE__ + inline long double builtin_round(long double x) { return __builtin_roundl(x); } +#endif + + inline int builtin_signbit(float x) { return __builtin_signbitf(x); } + inline int builtin_signbit(double x) { return __builtin_signbit(x); } +#if __SIZEOF_LONG_DOUBLE__ + inline int builtin_signbit(long double x) { return __builtin_signbitl(x); } +#endif + + inline float builtin_sin(float x) { return __builtin_sinf(x); } + inline double builtin_sin(double x) { return __builtin_sin(x); } +#if __SIZEOF_LONG_DOUBLE__ + inline long double builtin_sin(long double x) { return __builtin_sinl(x); } +#endif + + inline float builtin_sinh(float x) { return __builtin_sinhf(x); } + inline double builtin_sinh(double x) { return __builtin_sinh(x); } +#if __SIZEOF_LONG_DOUBLE__ + inline long double builtin_sinh(long double x) { return __builtin_sinhl(x); } +#endif + + inline float builtin_sqrt(float x) { return __builtin_sqrtf(x); } + inline double builtin_sqrt(double x) { return __builtin_sqrt(x); } +#if __SIZEOF_LONG_DOUBLE__ + inline long double builtin_sqrt(long double x) { return __builtin_sqrtl(x); } +#endif + + inline float builtin_tan(float x) { return __builtin_tanf(x); } + inline double builtin_tan(double x) { return __builtin_tan(x); } +#if __SIZEOF_LONG_DOUBLE__ + inline long double builtin_tan(long double x) { return __builtin_tanl(x); } +#endif + + inline float builtin_tanh(float x) { return __builtin_tanhf(x); } + inline double builtin_tanh(double x) { return __builtin_tanh(x); } +#if __SIZEOF_LONG_DOUBLE__ + inline long double builtin_tanh(long double x) { return __builtin_tanhl(x); } +#endif + + inline float builtin_trunc(float x) { return __builtin_truncf(x); } + inline double builtin_trunc(double x) { return __builtin_trunc(x); } +#if __SIZEOF_LONG_DOUBLE__ + inline long double builtin_trunc(long double x) { return __builtin_truncl(x); } +#endif + +} + +#endif + +#endif // #ifndef FLOATBUILTINS_H @@ -1617,57 +1617,57 @@ int main(int argc, char** argv) << flush; vecmathlib_test<realpseudovec<float,1> >::test(); - // vecmathlib_test<realbuiltinvec<float,1>>::test(); + vecmathlib_test<realbuiltinvec<float,1> >::test(); vecmathlib_test<realtestvec<float,1> >::test(); #ifdef VECMATHLIB_HAVE_VEC_FLOAT_1 vecmathlib_test<realvec<float,1> >::test(); #endif vecmathlib_test<realpseudovec<float,2> >::test(); - // vecmathlib_test<realbuiltinvec<float,2> >::test(); + vecmathlib_test<realbuiltinvec<float,2> >::test(); vecmathlib_test<realtestvec<float,2> >::test(); #ifdef VECMATHLIB_HAVE_VEC_FLOAT_2 vecmathlib_test<realvec<float,2> >::test(); #endif vecmathlib_test<realpseudovec<float,4> >::test(); - // vecmathlib_test<realbuiltinvec<float,4> >::test(); + vecmathlib_test<realbuiltinvec<float,4> >::test(); vecmathlib_test<realtestvec<float,4> >::test(); #ifdef VECMATHLIB_HAVE_VEC_FLOAT_4 vecmathlib_test<realvec<float,4> >::test(); #endif #ifdef VECMATHLIB_HAVE_VEC_FLOAT_8 vecmathlib_test<realpseudovec<float,8> >::test(); - // vecmathlib_test<realbuiltinvec<float,8> >::test(); + vecmathlib_test<realbuiltinvec<float,8> >::test(); vecmathlib_test<realtestvec<float,8> >::test(); vecmathlib_test<realvec<float,8> >::test(); #endif #ifdef VECMATHLIB_HAVE_VEC_FLOAT_16 vecmathlib_test<realpseudovec<float,16> >::test(); - // vecmathlib_test<realbuiltinvec<float,16> >::test(); + vecmathlib_test<realbuiltinvec<float,16> >::test(); vecmathlib_test<realtestvec<float,16> >::test(); vecmathlib_test<realvec<float,16> >::test(); #endif vecmathlib_test<realpseudovec<double,1> >::test(); - // vecmathlib_test<realbuiltinvec<double,1> >::test(); + vecmathlib_test<realbuiltinvec<double,1> >::test(); vecmathlib_test<realtestvec<double,1> >::test(); #ifdef VECMATHLIB_HAVE_VEC_DOUBLE_1 vecmathlib_test<realvec<double,1> >::test(); #endif vecmathlib_test<realpseudovec<double,2> >::test(); - // vecmathlib_test<realbuiltinvec<double,2> >::test(); + vecmathlib_test<realbuiltinvec<double,2> >::test(); vecmathlib_test<realtestvec<double,2> >::test(); #ifdef VECMATHLIB_HAVE_VEC_DOUBLE_2 vecmathlib_test<realvec<double,2> >::test(); #endif #ifdef VECMATHLIB_HAVE_VEC_DOUBLE_4 vecmathlib_test<realpseudovec<double,4> >::test(); - // vecmathlib_test<realbuiltinvec<double,4> >::test(); + vecmathlib_test<realbuiltinvec<double,4> >::test(); vecmathlib_test<realtestvec<double,4> >::test(); vecmathlib_test<realvec<double,4> >::test(); #endif #ifdef VECMATHLIB_HAVE_VEC_DOUBLE_8 vecmathlib_test<realpseudovec<double,8> >::test(); - // vecmathlib_test<realbuiltinvec<double,8> >::test(); + vecmathlib_test<realbuiltinvec<double,8> >::test(); vecmathlib_test<realtestvec<double,8> >::test(); vecmathlib_test<realvec<double,8> >::test(); #endif diff --git a/vec_builtin.h b/vec_builtin.h index c09a290..decab46 100644 --- a/vec_builtin.h +++ b/vec_builtin.h @@ -4,6 +4,7 @@ #define VEC_BUILTIN_H #include "floatprops.h" +#include "floatbuiltins.h" #include "mathfuncs.h" #include "vec_base.h" @@ -30,19 +31,20 @@ namespace vecmathlib { typedef typename floatprops<T>::uint_t uint_t; typedef typename floatprops<T>::real_t real_t; - static int const size = N; + static const int size = N; typedef bool scalar_t; - // true values are -1, false values are 0 -#ifdef __clang__ typedef int_t bvector_t __attribute__((__ext_vector_type__(N))); -#else - typedef int_t bvector_t __attribute__((__vector_size__(N*sizeof(int_t)))); -#endif - static int const alignment = sizeof(bvector_t); + static const int alignment = sizeof(bvector_t); static_assert(size * sizeof(real_t) == sizeof(bvector_t), "vector size is wrong"); + private: + // true is -1, false is 0 + static int_t from_bool(bool a) { return -uint_t(a); } + static bool to_bool(int_t a) { return a; } + public: + typedef boolbuiltinvec boolvec_t; typedef intbuiltinvec<real_t, size> intvec_t; typedef realbuiltinvec<real_t, size> realvec_t; @@ -57,13 +59,6 @@ namespace vecmathlib { typedef floatprops<real_t> FP; typedef mathfuncs<realvec_t> MF; - static boolvec_t wrap(bvector_t x) - { - boolvec_t res; - res.v = x; - return res; - } - bvector_t v; @@ -71,21 +66,21 @@ namespace vecmathlib { boolbuiltinvec() {} // Can't have a non-trivial copy constructor; if so, objects won't // be passed in registers - // boolbuiltinvec(boolbuiltinvec const& x): v(x.v) {} - // boolbuiltinvec& operator=(boolbuiltinvec const& x) { return v=x.v, *this; } - boolbuiltinvec(bool a): v(-(int_t)a) {} - boolbuiltinvec(bool const* as) + // boolbuiltinvec(const boolbuiltinvec& x): v(x.v) {} + // boolbuiltinvec& operator=(const boolbuiltinvec& x) { return v=x.v, *this; } + // Can't have a constructor from bvector_t, since this would + // conflict with the constructor from bool + // boolbuiltinvec(bvector_t x): v(x) {} + static boolvec_t mkvec(bvector_t x) { boolvec_t res; res.v=x; return res; } + boolbuiltinvec(bool a): v(from_bool(a)) {} + boolbuiltinvec(const bool* as) { for (int d=0; d<size; ++d) set_elt(d, as[d]); } -#ifdef __clang__ - bool operator[](int n) const { return v[n]; } - boolvec_t& set_elt(int n, bool a) { return v[n]=-(int_t)a, *this; } -#else - bool operator[](int n) const { return ((int_t const*)&v)[n]; } - boolvec_t& set_elt(int n, bool a) { return ((int_t*)&v)[n]=a, *this; } -#endif + operator bvector_t() const { return v; } + bool operator[](int n) const { return to_bool(v[n]); } + boolvec_t& set_elt(int n, bool a) { return v[n]=from_bool(a), *this; } @@ -94,41 +89,30 @@ namespace vecmathlib { - boolvec_t operator!() const { return wrap(!v); } + boolvec_t operator!() const { return mkvec(!v); } - boolvec_t operator&&(boolvec_t x) const - { - return wrap((typename intvec_t::ivector_t)(v && x.v)); - } - boolvec_t operator||(boolvec_t x) const - { - return wrap((typename intvec_t::ivector_t)(v || x.v)); - } - boolvec_t operator==(boolvec_t x) const - { - return wrap((typename intvec_t::ivector_t)(v == x.v)); - } - boolvec_t operator!=(boolvec_t x) const - { - return wrap((typename intvec_t::ivector_t)(v != x.v)); - } + boolvec_t operator&&(boolvec_t x) const { return mkvec(v && x.v); } + boolvec_t operator||(boolvec_t x) const { return mkvec(v || x.v); } + boolvec_t operator==(boolvec_t x) const { return mkvec(v == x.v); } + boolvec_t operator!=(boolvec_t x) const { return mkvec(v != x.v); } bool all() const { - bool res = true; - for (int d=0; d<size; ++d) res = res && (*this)[d]; + bool res = (*this)[0]; + for (int d=1; d<size; ++d) res = res && (*this)[d]; return res; } bool any() const { - bool res = false; - for (int d=0; d<size; ++d) res = res || (*this)[d]; + bool res = (*this)[0]; + for (int d=1; d<size; ++d) res = res || (*this)[d]; return res; } // ifthen(condition, then-value, else-value) + boolvec_t ifthen(boolvec_t x, boolvec_t y) const; intvec_t ifthen(intvec_t x, intvec_t y) const; // defined after intbuiltinvec realvec_t ifthen(realvec_t x, realvec_t y) const; // defined after realbuiltinvec }; @@ -142,16 +126,11 @@ namespace vecmathlib { typedef typename floatprops<T>::uint_t uint_t; typedef typename floatprops<T>::real_t real_t; - static int const size = N; + static const int size = N; typedef int_t scalar_t; -#ifdef __clang__ typedef int_t ivector_t __attribute__((__ext_vector_type__(N))); typedef uint_t uvector_t __attribute__((__ext_vector_type__(N))); -#else - typedef int_t ivector_t __attribute__((__vector_size__(N*sizeof(int_t)))); - typedef uint_t uvector_t __attribute__((__vector_size__(N*sizeof(uint_t)))); -#endif - static int const alignment = sizeof(ivector_t); + static const int alignment = sizeof(ivector_t); static_assert(size * sizeof(real_t) == sizeof(ivector_t), "vector size is wrong"); @@ -172,13 +151,6 @@ namespace vecmathlib { typedef floatprops<real_t> FP; typedef mathfuncs<realvec_t> MF; - static intvec_t wrap(ivector_t x) - { - intvec_t res; - res.v = x; - return res; - } - ivector_t v; @@ -186,10 +158,14 @@ namespace vecmathlib { intbuiltinvec() {} // Can't have a non-trivial copy constructor; if so, objects won't // be passed in registers - // intbuiltinvec(intbuiltinvec const& x): v(x.v) {} - // intbuiltinvec& operator=(intbuiltinvec const& x) { return v=x.v, *this; } - intbuiltinvec(int_t a): v(ivector_t {a}) {} - intbuiltinvec(int_t const* as) { std::memcpy(&v, as, sizeof v); } + // intbuiltinvec(const intbuiltinvec& x): v(x.v) {} + // intbuiltinvec& operator=(const intbuiltinvec& x) { return v=x.v, *this; } + // Can't have a constructor from ivector_t, since this would + // conflict with the constructor from int_t + // intbuiltinvec(ivector_t x): v(x) {} + static intvec_t mkvec(ivector_t x) { intvec_t res; res.v=x; return res; } + intbuiltinvec(int_t a): v(a) {} + intbuiltinvec(const int_t* as) { std::memcpy(&v, as, sizeof v); } static intvec_t iota() { intvec_t res; @@ -197,8 +173,6 @@ namespace vecmathlib { return res; } - // int_t operator[](int n) const { return ((int_t const*)&v)[n]; } - // intvec_t& set_elt(int n, int_t a) { return ((int_t*)&v)[n]=a, *this; } int_t operator[](int n) const { return v[n]; } intvec_t& set_elt(int n, int_t a) { return v[n]=a, *this; } @@ -207,87 +181,126 @@ namespace vecmathlib { boolvec_t as_bool() const { boolvec_t res; - std::memcpy(&res.v, &v, sizeof v); + std::memcpy(&res.v, &v, sizeof res.v); return res; } - boolvec_t convert_bool() const { return boolvec_t::wrap(ivector_t(!!v)); } + boolvec_t convert_bool() const { return *this != IV(I(0)); } realvec_t as_float() const; // defined after realbuiltinvec realvec_t convert_float() const; // defined after realbuiltinvec - intvec_t operator+() const { return wrap(+v); } - intvec_t operator-() const { return wrap(-v); } + intvec_t operator+() const { return mkvec(+v); } + intvec_t operator-() const { return mkvec(-v); } - intvec_t operator+(intvec_t x) const { return wrap(v+x.v); } - intvec_t operator-(intvec_t x) const { return wrap(v-x.v); } - intvec_t operator*(intvec_t x) const { return wrap(v*x.v); } - intvec_t operator/(intvec_t x) const { return wrap(v/x.v); } - intvec_t operator%(intvec_t x) const { return wrap(v%x.v); } + intvec_t operator+(intvec_t x) const { return mkvec(v + x.v); } + intvec_t operator-(intvec_t x) const { return mkvec(v - x.v); } + intvec_t operator*(intvec_t x) const { return mkvec(v * x.v); } + intvec_t operator/(intvec_t x) const { return mkvec(v / x.v); } + intvec_t operator%(intvec_t x) const { return mkvec(v % x.v); } - intvec_t& operator+=(intvec_t const& x) { return *this=*this+x; } - intvec_t& operator-=(intvec_t const& x) { return *this=*this-x; } - intvec_t& operator*=(intvec_t const& x) { return *this=*this*x; } - intvec_t& operator/=(intvec_t const& x) { return *this=*this/x; } - intvec_t& operator%=(intvec_t const& x) { return *this=*this%x; } + intvec_t& operator+=(const intvec_t& x) { return *this=*this+x; } + intvec_t& operator-=(const intvec_t& x) { return *this=*this-x; } + intvec_t& operator*=(const intvec_t& x) { return *this=*this*x; } + intvec_t& operator/=(const intvec_t& x) { return *this=*this/x; } + intvec_t& operator%=(const intvec_t& x) { return *this=*this%x; } - intvec_t operator~() const { return wrap(~v); } + intvec_t operator~() const { return mkvec(~v); } - intvec_t operator&(intvec_t x) const { return wrap(v&x.v); } - intvec_t operator|(intvec_t x) const { return wrap(v|x.v); } - intvec_t operator^(intvec_t x) const { return wrap(v^x.v); } + intvec_t operator&(intvec_t x) const { return mkvec(v & x.v); } + intvec_t operator|(intvec_t x) const { return mkvec(v | x.v); } + intvec_t operator^(intvec_t x) const { return mkvec(v ^ x.v); } - intvec_t& operator&=(intvec_t const& x) { return *this=*this&x; } - intvec_t& operator|=(intvec_t const& x) { return *this=*this|x; } - intvec_t& operator^=(intvec_t const& x) { return *this=*this^x; } + intvec_t& operator&=(const intvec_t& x) { return *this=*this&x; } + intvec_t& operator|=(const intvec_t& x) { return *this=*this|x; } + intvec_t& operator^=(const intvec_t& x) { return *this=*this^x; } + intvec_t bitifthen(intvec_t x, intvec_t y) const + { + return MF::vml_bitifthen(*this, x, y); + } - intvec_t lsr(int_t n) const { return wrap(ivector_t(uvector_t(v)>>U(n))); } - intvec_t operator>>(int_t n) const { return wrap(v>>n); } - intvec_t operator<<(int_t n) const { return wrap(v<<n); } + + intvec_t lsr(int_t n) const + { + return mkvec(ivector_t(uvector_t(v) >> U(n))); + } + intvec_t rotate(int_t n) const { return MF::vml_rotate(*this, n); } + intvec_t operator>>(int_t n) const { return mkvec(v >> n); } + intvec_t operator<<(int_t n) const { return mkvec(v << n); } intvec_t& operator>>=(int_t n) { return *this=*this>>n; } intvec_t& operator<<=(int_t n) { return *this=*this<<n; } intvec_t lsr(intvec_t n) const { - return wrap(ivector_t(uvector_t(v)>>uvector_t(n.v))); + return mkvec(ivector_t(uvector_t(v)>>uvector_t(n.v))); } - intvec_t operator>>(intvec_t n) const { return wrap(v>>n.v); } - intvec_t operator<<(intvec_t n) const { return wrap(v<<n.v); } + intvec_t rotate(intvec_t n) const { return MF::vml_rotate(*this, n); } + intvec_t operator>>(intvec_t n) const { return mkvec(v >> n.v); } + intvec_t operator<<(intvec_t n) const { return mkvec(v << n.v); } intvec_t& operator>>=(intvec_t n) { return *this=*this>>n; } intvec_t& operator<<=(intvec_t n) { return *this=*this<<n; } + intvec_t clz() const + { + intvec_t res; + for (int d=0; d<size; ++d) { + res.set_elt(d, builtin_clz(U((*this)[d]))); + } + return res; + } + intvec_t popcount() const + { + intvec_t res; + for (int d=0; d<size; ++d) { + res.set_elt(d, builtin_popcount(U((*this)[d]))); + } + return res; + } - boolvec_t operator==(intvec_t const& x) const + + boolvec_t operator==(const intvec_t& x) const { - return boolvec_t::wrap((ivector_t)(v==x.v)); + return boolvec_t::mkvec(v == x.v); } - boolvec_t operator!=(intvec_t const& x) const + boolvec_t operator!=(const intvec_t& x) const { - return boolvec_t::wrap((ivector_t)(v!=x.v)); + return boolvec_t::mkvec(v != x.v); } - boolvec_t operator<(intvec_t const& x) const + boolvec_t operator<(const intvec_t& x) const { - return boolvec_t::wrap((ivector_t)(v<x.v)); + return boolvec_t::mkvec(v < x.v); } - boolvec_t operator<=(intvec_t const& x) const + boolvec_t operator<=(const intvec_t& x) const { - return boolvec_t::wrap((ivector_t)(v<=x.v)); + return boolvec_t::mkvec(v <= x.v); } - boolvec_t operator>(intvec_t const& x) const + boolvec_t operator>(const intvec_t& x) const { - return boolvec_t::wrap((ivector_t)(v>x.v)); + return boolvec_t::mkvec(v > x.v); } - boolvec_t operator>=(intvec_t const& x) const + boolvec_t operator>=(const intvec_t& x) const { - return boolvec_t::wrap((ivector_t)(v>=x.v)); + return boolvec_t::mkvec(v >= x.v); } + + intvec_t abs() const + { + intvec_t res; + for (int d=0; d<size; ++d) res.set_elt(d, builtin_abs((*this)[d])); + return res; + } + + boolvec_t isignbit() const { return MF::vml_isignbit(*this); } + + intvec_t max(intvec_t x) const { return MF::vml_max(*this, x); } + intvec_t min(intvec_t x) const { return MF::vml_min(*this, x); } }; @@ -299,19 +312,15 @@ namespace vecmathlib { typedef typename floatprops<T>::uint_t uint_t; typedef typename floatprops<T>::real_t real_t; - static int const size = N; + static const int size = N; typedef real_t scalar_t; -#ifdef __clang__ typedef real_t vector_t __attribute__((__ext_vector_type__(N))); -#else - typedef real_t vector_t __attribute__((__vector_size__(N*sizeof(real_t)))); -#endif - static int const alignment = sizeof(vector_t); + static const int alignment = sizeof(vector_t); static_assert(size * sizeof(real_t) == sizeof(vector_t), "vector size is wrong"); - static char const* name() + static const char* name() { static std::string name_; if (name_.empty()) { @@ -327,6 +336,56 @@ namespace vecmathlib { typedef intbuiltinvec<real_t, size> intvec_t; typedef realbuiltinvec realvec_t; + private: + boolvec_t mapb(bool f(real_t)) const + { + boolvec_t res; + for (int d=0; d<size; ++d) res.v[d] = f(v[d]); + return res; + } + intvec_t map(int_t f(real_t)) const + { + intvec_t res; + for (int d=0; d<size; ++d) res.v[d] = f(v[d]); + return res; + } + realvec_t map(real_t f(real_t)) const + { + realvec_t res; + for (int d=0; d<size; ++d) res.v[d] = f(v[d]); + return res; + } + realvec_t map(real_t f(real_t, int_t), intvec_t x) const + { + realvec_t res; + for (int d=0; d<size; ++d) res.v[d] = f(v[d], x.v[d]); + return res; + } + realvec_t map(real_t f(real_t, int_t*), intvec_t* x) const + { + realvec_t res; + for (int d=0; d<size; ++d) { + int_t ix; + res.v[d] = f(v[d], &ix); + x->set_elt(d, ix); + } + return res; + } + realvec_t map(real_t f(real_t, real_t), realvec_t x) const + { + realvec_t res; + for (int d=0; d<size; ++d) res.v[d] = f(v[d], x.v[d]); + return res; + } + realvec_t map(real_t f(real_t, real_t, real_t), + realvec_t x, realvec_t y) const + { + realvec_t res; + for (int d=0; d<size; ++d) res.v[d] = f(v[d], x.v[d], y.v[d]); + return res; + } + public: + // Short names for type casts typedef real_t R; typedef int_t I; @@ -337,13 +396,6 @@ namespace vecmathlib { typedef floatprops<real_t> FP; typedef mathfuncs<realvec_t> MF; - static realvec_t wrap(vector_t x) - { - realvec_t res; - res.v = x; - return res; - } - vector_t v; @@ -351,53 +403,54 @@ namespace vecmathlib { realbuiltinvec() {} // Can't have a non-trivial copy constructor; if so, objects won't // be passed in registers - realbuiltinvec(realbuiltinvec const& x): v(x.v) {} - realbuiltinvec& operator=(realbuiltinvec const& x) { return v=x.v, *this; } - realbuiltinvec(real_t a): v(vector_t {a}) {} - realbuiltinvec(real_t const* as) { std::memcpy(&v, as, sizeof v); } + // realbuiltinvec(const realbuiltinvec& x): v(x.v) {} + // realbuiltinvec& operator=(const realbuiltinvec& x) { return v=x.v, *this; } + // Can't have a constructor from vector_t, since this would + // conflict with the constructor from real_t + // realbuiltinvec(vector_t x): v(x) {} + static realvec_t mkvec(vector_t x) { realvec_t res; res.v=x; return res; } + realbuiltinvec(real_t a): v(a) {} + realbuiltinvec(const real_t* as) { std::memcpy(&v, as, sizeof v); } -#ifdef __clang__ real_t operator[](int n) const { return v[n]; } realvec_t& set_elt(int n, real_t a) { return v[n]=a, *this; } -#else - real_t operator[](int n) const { return ((real_t const*)&v)[n]; } - realvec_t& set_elt(int n, real_t a) { return ((real_t*)&v)[n]=a, *this; } -#endif typedef vecmathlib::mask_t<realvec_t> mask_t; - static realvec_t loada(real_t const* p) + static realvec_t loada(const real_t* p) { VML_ASSERT(intptr_t(p) % alignment == 0); -#ifdef __clang__ -#else - p = (real_t const*)__builtin_assume_aligned(p, sizeof(realvec_t)); +#if __has_builtin(__builtin_assume_aligned) + p = (const real_t*)__builtin_assume_aligned(p, sizeof(realvec_t)); #endif - return wrap(*(vector_t const*)p); + return mkvec(*(const vector_t*)p); } - static realvec_t loadu(real_t const* p) + static realvec_t loadu(const real_t* p) { + // return mkvec(*(const vector_t*)p); + realvec_t res; + for (int d=0; d<size; ++d) res.set_elt(d, p[d]); + return res; // realvec_t res; - // for (int d=0; d<size; ++d) res.set_elt(d, p[d]); + // memcpy(&res.v, p, sizeof res.v); // return res; - return wrap(*(vector_t const*)p); } - static realvec_t loadu(real_t const* p, size_t ioff) + static realvec_t loadu(const real_t* p, size_t ioff) { VML_ASSERT(intptr_t(p) % alignment == 0); return loadu(p+ioff); } - realvec_t loada(real_t const* p, mask_t const& m) const + realvec_t loada(const real_t* p, const mask_t& m) const { return m.m.ifthen(loada(p), *this); } - realvec_t loadu(real_t const* p, mask_t const& m) const + realvec_t loadu(const real_t* p, const mask_t& m) const { return m.m.ifthen(loadu(p), *this); } - realvec_t loadu(real_t const* p, size_t ioff, mask_t const& m) const + realvec_t loadu(const real_t* p, size_t ioff, const mask_t& m) const { return m.m.ifthen(loadu(p, ioff), *this); } @@ -405,32 +458,32 @@ namespace vecmathlib { void storea(real_t* p) const { VML_ASSERT(intptr_t(p) % alignment == 0); -#ifdef __clang__ -#else +#if __has_builtin(__builtin_assume_aligned) p = __builtin_assume_aligned(p, sizeof(realvec_t)); #endif *(vector_t*)p = v; } void storeu(real_t* p) const { - // for (int d=0; d<size; ++d) p[d] = v[d]; - *(vector_t*)p = v; + // *(vector_t*)p = v; + for (int d=0; d<size; ++d) p[d] = (*this)[d]; + // memcpy(p, &v, sizeof res.v); } void storeu(real_t* p, size_t ioff) const { VML_ASSERT(intptr_t(p) % alignment == 0); storeu(p+ioff); } - void storea(real_t* p, mask_t const& m) const + void storea(real_t* p, const mask_t& m) const { VML_ASSERT(intptr_t(p) % alignment == 0); storeu(p, m); } - void storeu(real_t* p, mask_t const& m) const + void storeu(real_t* p, const mask_t& m) const { - for (int d=0; d<size; ++d) if (m.m[d]) p[d] = v[d]; + for (int d=0; d<size; ++d) if (m.m[d]) p[d] = (*this)[d]; } - void storeu(real_t* p, size_t ioff, mask_t const& m) const + void storeu(real_t* p, size_t ioff, const mask_t& m) const { VML_ASSERT(intptr_t(p) % alignment == 0); storeu(p+ioff, m); @@ -441,126 +494,219 @@ namespace vecmathlib { intvec_t as_int() const { intvec_t res; - std::memcpy(&res.v, &v, sizeof v); + std::memcpy(&res.v, &v, sizeof res.v); return res; } intvec_t convert_int() const { - return intvec_t::wrap((typename intvec_t::ivector_t)v); + intvec_t res; + for (int d=0; d<size; ++d) res.set_elt(d, int_t((*this)[d])); + return res; } - realvec_t operator+() const { return wrap(+v); } - realvec_t operator-() const { return wrap(-v); } + realvec_t operator+() const { return mkvec(+v); } + realvec_t operator-() const { return mkvec(-v); } - realvec_t operator+(realvec_t x) const { return wrap(v+x.v); } - realvec_t operator-(realvec_t x) const { return wrap(v-x.v); } - realvec_t operator*(realvec_t x) const { return wrap(v*x.v); } - realvec_t operator/(realvec_t x) const { return wrap(v/x.v); } + realvec_t operator+(realvec_t x) const { return mkvec(v + x.v); } + realvec_t operator-(realvec_t x) const { return mkvec(v - x.v); } + realvec_t operator*(realvec_t x) const { return mkvec(v * x.v); } + realvec_t operator/(realvec_t x) const { return mkvec(v / x.v); } - realvec_t& operator+=(realvec_t const& x) { return *this=*this+x; } - realvec_t& operator-=(realvec_t const& x) { return *this=*this-x; } - realvec_t& operator*=(realvec_t const& x) { return *this=*this*x; } - realvec_t& operator/=(realvec_t const& x) { return *this=*this/x; } + realvec_t& operator+=(const realvec_t& x) { return *this=*this+x; } + realvec_t& operator-=(const realvec_t& x) { return *this=*this-x; } + realvec_t& operator*=(const realvec_t& x) { return *this=*this*x; } + realvec_t& operator/=(const realvec_t& x) { return *this=*this/x; } + real_t maxval() const + { + real_t res = v[0]; + for (int d=1; d<size; ++d) { + res = builtin_fmax(res, (*this)[d]); + } + return res; + } + real_t minval() const + { + real_t res = v[0]; + for (int d=1; d<size; ++d) { + res = builtin_fmin(res, (*this)[d]); + } + return res; + } real_t prod() const { - real_t res = R(1.0); - for (int d=0; d<size; ++d) res *= (*this)[d]; + real_t res = (*this)[0]; + for (int d=1; d<size; ++d) res *= (*this)[d]; return res; } real_t sum() const { - real_t res = R(0.0); - for (int d=0; d<size; ++d) res += (*this)[d]; + real_t res = (*this)[0]; + for (int d=1; d<size; ++d) res += (*this)[d]; return res; } - boolvec_t operator==(realvec_t const& x) const + boolvec_t operator==(const realvec_t& x) const { - return boolvec_t::wrap((typename intvec_t::ivector_t)(v==x.v)); + return boolvec_t::mkvec(v == x.v); } - boolvec_t operator!=(realvec_t const& x) const + boolvec_t operator!=(const realvec_t& x) const { - return boolvec_t::wrap((typename intvec_t::ivector_t)(v!=x.v)); + return boolvec_t::mkvec(v != x.v); } - boolvec_t operator<(realvec_t const& x) const + boolvec_t operator<(const realvec_t& x) const { - return boolvec_t::wrap((typename intvec_t::ivector_t)(v<x.v)); + return boolvec_t::mkvec(v < x.v); } - boolvec_t operator<=(realvec_t const& x) const + boolvec_t operator<=(const realvec_t& x) const { - return boolvec_t::wrap((typename intvec_t::ivector_t)(v<=x.v)); + return boolvec_t::mkvec(v <= x.v); } - boolvec_t operator>(realvec_t const& x) const + boolvec_t operator>(const realvec_t& x) const { - return boolvec_t::wrap((typename intvec_t::ivector_t)(v>x.v)); + return boolvec_t::mkvec(v > x.v); } - boolvec_t operator>=(realvec_t const& x) const + boolvec_t operator>=(const realvec_t& x) const { - return boolvec_t::wrap((typename intvec_t::ivector_t)(v>=x.v)); + return boolvec_t::mkvec(v >= x.v); } - realvec_t acos() const { return MF::vml_acos(*this); } - realvec_t acosh() const { return MF::vml_acosh(*this); } - realvec_t asin() const { return MF::vml_asin(*this); } - realvec_t asinh() const { return MF::vml_asinh(*this); } - realvec_t atan() const { return MF::vml_atan(*this); } - realvec_t atan2(realvec_t y) const { return MF::vml_atan(*this, y); } - realvec_t atanh() const { return MF::vml_atanh(*this); } - realvec_t cbrt() const { return MF::vml_cbrt(*this); } - realvec_t ceil() const { return MF::vml_ceil(*this); } - realvec_t copysign(realvec_t y) const { return MF::vml_copysign(*this, y); } - realvec_t cos() const { return MF::vml_cos(*this); } - realvec_t cosh() const { return MF::vml_cosh(*this); } - realvec_t exp() const { return MF::vml_exp(*this); } + realvec_t acos() const { return map(builtin_acos); } + realvec_t acosh() const { return map(builtin_acosh); } + realvec_t asin() const { return map(builtin_asin); } + realvec_t asinh() const { return map(builtin_asinh); } + realvec_t atan() const { return map(builtin_atan); } + realvec_t atan2(realvec_t y) const { return map(builtin_atan2, y); } + realvec_t atanh() const { return map(builtin_atanh); } + realvec_t cbrt() const { return map(builtin_cbrt); } + realvec_t ceil() const { return map(builtin_ceil); } + realvec_t copysign(realvec_t y) const { return map(builtin_copysign, y); } + realvec_t cos() const { return map(builtin_cos); } + realvec_t cosh() const { return map(builtin_cosh); } + realvec_t exp() const { return map(builtin_exp); } realvec_t exp10() const { return MF::vml_exp10(*this); } - realvec_t exp2() const { return MF::vml_exp2(*this); } - realvec_t expm1() const { return MF::vml_expm1(*this); } - realvec_t fabs() const { return MF::vml_fabs(*this); } - realvec_t fdim(realvec_t y) const { return MF::vml_fdim(*this, y); } - realvec_t floor() const { return MF::vml_floor(*this); } + realvec_t exp2() const { return map(builtin_exp2); } + realvec_t expm1() const { return map(builtin_expm1); } + realvec_t fabs() const { return map(builtin_fabs); } + realvec_t fdim(realvec_t y) const { return map(builtin_fdim, y); } + realvec_t floor() const { return map(builtin_floor); } realvec_t fma(realvec_t y, realvec_t z) const { - return MF::vml_fma(*this, y, z); + return map(builtin_fma, y, z); + } + realvec_t fmax(realvec_t y) const { return map(builtin_fmax, y); } + realvec_t fmin(realvec_t y) const { return map(builtin_fmin, y); } + realvec_t fmod(realvec_t y) const { return map(builtin_fmod, y); } + realvec_t frexp(intvec_t* r) const + { + realvec_t res; + intvec_t exp; + for (int d=0; d<size; ++d) { + real_t val = (*this)[d]; + int iexp; + res.set_elt(d, __builtin_frexp(val, &iexp)); + int_t jexp = int_t(iexp); + if (__builtin_isinf(val)) jexp = std::numeric_limits<int_t>::max(); + if (__builtin_isnan(val)) jexp = std::numeric_limits<int_t>::min(); + exp.set_elt(d, jexp); + } + *r = exp; + return res; + } + realvec_t hypot(realvec_t y) const { return map(builtin_hypot, y); } + intvec_t ilogb() const + { + intvec_t res; + for (int d=0; d<size; ++d) { + real_t val = (*this)[d]; + int iexp = __builtin_ilogb(val); + int_t jexp = int_t(iexp); + if (val == R(0.0)) jexp = std::numeric_limits<int_t>::min(); + if (__builtin_isinf(val)) jexp = std::numeric_limits<int_t>::max(); + if (__builtin_isnan(val)) jexp = std::numeric_limits<int_t>::min(); + res.set_elt(d, jexp); + } + return res; + } + boolvec_t isfinite() const + { + boolvec_t res; + for (int d=0; d<size; ++d) { + res.set_elt(d, builtin_isfinite((*this)[d]) != 0); + } + return res; + } + boolvec_t isinf() const + { + boolvec_t res; + for (int d=0; d<size; ++d) { + res.set_elt(d, builtin_isinf((*this)[d]) != 0); + } + return res; + } + boolvec_t isnan() const + { + boolvec_t res; + for (int d=0; d<size; ++d) { + res.set_elt(d, builtin_isnan((*this)[d]) != 0); + } + return res; + } + boolvec_t isnormal() const + { + boolvec_t res; + for (int d=0; d<size; ++d) { + res.set_elt(d, builtin_isnormal((*this)[d]) != 0); + } + return res; + } + realvec_t ldexp(int_t n) const + { + realvec_t res; + for (int d=0; d<size; ++d) { + res.set_elt(d, builtin_ldexp((*this)[d], int(n))); + } + return res; } - realvec_t fmax(realvec_t y) const { return MF::vml_fmax(*this, y); } - realvec_t fmin(realvec_t y) const { return MF::vml_fmin(*this, y); } - realvec_t fmod(realvec_t y) const { return MF::vml_fmod(*this, y); } - realvec frexp(intvec_t* r) const { return MF::vml_frexp(*this, r); } - realvec_t hypot(realvec_t y) const { return MF::vml_hypot(*this, y); } - intvec_t ilogb() const { return MF::vml_ilogb(*this); } - boolvec_t isfinite() const { return MF::vml_isfinite(*this); } - boolvec_t isinf() const { return MF::vml_isinf(*this); } - boolvec_t isnan() const { return MF::vml_isnan(*this); } - boolvec_t isnormal() const { return MF::vml_isnormal(*this); } - realvec_t ldexp(int_t n) const { return MF::vml_ldexp(*this, n); } - realvec_t ldexp(intvec_t n) const { return MF::vml_ldexp(*this, n); } - realvec_t log() const { return MF::vml_log(*this); } - realvec_t log10() const { return MF::vml_log10(*this); } - realvec_t log1p() const { return MF::vml_log1p(*this); } - realvec_t log2() const { return MF::vml_log2(*this); } - realvec_t pow(realvec_t y) const { return MF::vml_pow(*this, y); } - realvec_t rcp() const { return MF::vml_rcp(*this); } - realvec_t remainder(realvec_t y) const + realvec_t ldexp(intvec_t n) const { - return MF::vml_remainder(*this, y); + realvec_t res; + for (int d=0; d<size; ++d) { + res.set_elt(d, builtin_ldexp((*this)[d], int(n[d]))); + } + return res; } - realvec_t rint() const { return MF::vml_rint(*this); } - realvec_t round() const { return MF::vml_round(*this); } - realvec_t rsqrt() const { return MF::vml_rsqrt(*this); } - boolvec_t signbit() const { return MF::vml_signbit(*this); } - realvec_t sin() const { return MF::vml_sin(*this); } - realvec_t sinh() const { return MF::vml_sinh(*this); } - realvec_t sqrt() const { return MF::vml_sqrt(*this); } - realvec_t tan() const { return MF::vml_tan(*this); } - realvec_t tanh() const { return MF::vml_tanh(*this); } - realvec_t trunc() const { return MF::vml_trunc(*this); } + realvec_t log() const { return map(builtin_log); } + realvec_t log10() const { return map(builtin_log10); } + realvec_t log1p() const { return map(builtin_log1p); } + realvec_t log2() const { return map(builtin_log2); } + realvec_t nextafter(realvec_t y) const { return map(builtin_nextafter, y); } + realvec_t pow(realvec_t y) const { return map(builtin_pow, y); } + realvec_t rcp() const { return RV(1.0) / *this; } + realvec_t remainder(realvec_t y) const { return map(builtin_remainder, y); } + realvec_t rint() const { return map(builtin_rint); } + realvec_t round() const { return map(builtin_round); } + realvec_t rsqrt() const { return RV(1.0) / sqrt(); } + boolvec_t signbit() const + { + boolvec_t res; + for (int d=0; d<size; ++d) { + res.set_elt(d, builtin_signbit((*this)[d]) != 0); + } + return res; + } + realvec_t sin() const { return map(builtin_sin); } + realvec_t sinh() const { return map(builtin_sinh); } + realvec_t sqrt() const { return map(builtin_sqrt); } + realvec_t tan() const { return map(builtin_tan); } + realvec_t tanh() const { return map(builtin_tanh); } + realvec_t trunc() const { return map(builtin_trunc); } }; @@ -569,42 +715,52 @@ namespace vecmathlib { template<typename T, int N> inline - auto boolbuiltinvec<T,N>::as_int() const -> intvec_t + typename boolbuiltinvec<T,N>::intvec_t boolbuiltinvec<T,N>::as_int() const { intvec_t res; - std::memcpy(&res.v, &v, sizeof v); + std::memcpy(&res.v, &v, sizeof res.v); return res; } template<typename T, int N> inline - auto boolbuiltinvec<T,N>::convert_int() const -> intvec_t + typename boolbuiltinvec<T,N>::intvec_t + boolbuiltinvec<T,N>::convert_int() const { - return intvec_t::wrap(-v); + return - as_int(); } template<typename T, int N> inline - auto boolbuiltinvec<T,N>::ifthen(intvec_t x, intvec_t y) const -> intvec_t + typename boolbuiltinvec<T,N>::boolvec_t + boolbuiltinvec<T,N>::ifthen(boolvec_t x, boolvec_t y) const { -#ifdef __clang__ - intvec_t mask = as_int(); - return (mask & x) | (~mask & y); - #else - return intvec_t::wrap(v ? x.v : y.v); -#endif + // return v ? x.v : y.v; + boolvec_t res; + for (int d=0; d<size; ++d) res.set_elt(d, (*this)[d] ? x[d] : y[d]); + return res; } template<typename T, int N> inline - auto boolbuiltinvec<T,N>::ifthen(realvec_t x, realvec_t y) const -> realvec_t + typename boolbuiltinvec<T,N>::intvec_t + boolbuiltinvec<T,N>::ifthen(intvec_t x, intvec_t y) const { -#ifdef __clang__ - intvec_t mask = as_int(); - return as_float((mask & x.as_int()) | (~mask & y.as_int())); - #else - return realvec_t::wrap(v ? x.v : y.v); -#endif + // return v ? x.v : y.v; + intvec_t res; + for (int d=0; d<size; ++d) res.set_elt(d, (*this)[d] ? x[d] : y[d]); + return res; + } + + template<typename T, int N> + inline + typename boolbuiltinvec<T,N>::realvec_t + boolbuiltinvec<T,N>::ifthen(realvec_t x, realvec_t y) const + { + // return v ? x.v : y.v; + realvec_t res; + for (int d=0; d<size; ++d) res.set_elt(d, (*this)[d] ? x[d] : y[d]); + return res; } @@ -612,17 +768,22 @@ namespace vecmathlib { // intbuiltinvec definitions template<typename T, int N> - inline auto intbuiltinvec<T,N>::as_float() const -> realvec_t + inline + typename intbuiltinvec<T,N>::realvec_t intbuiltinvec<T,N>::as_float() const { realvec_t res; - std::memcpy(&res.v, &v, sizeof v); + std::memcpy(&res.v, &v, sizeof res.v); return res; } template<typename T, int N> - inline auto intbuiltinvec<T,N>::convert_float() const -> realvec_t + inline + typename intbuiltinvec<T,N>::realvec_t + intbuiltinvec<T,N>::convert_float() const { - return realvec_t::wrap((typename realvec_t::vector_t)v); + realvec_t res; + for (int d=0; d<size; ++d) res.set_elt(d, real_t((*this)[d])); + return res; } @@ -632,13 +793,15 @@ namespace vecmathlib { // boolbuiltinvec wrappers template<typename real_t, int size> - inline intbuiltinvec<real_t, size> as_int(boolbuiltinvec<real_t, size> x) + inline + intbuiltinvec<real_t, size> as_int(boolbuiltinvec<real_t, size> x) { return x.as_int(); } template<typename real_t, int size> - inline intbuiltinvec<real_t, size> convert_int(boolbuiltinvec<real_t, size> x) + inline + intbuiltinvec<real_t, size> convert_int(boolbuiltinvec<real_t, size> x) { return x.convert_int(); } @@ -651,6 +814,15 @@ namespace vecmathlib { template<typename real_t, int size> inline + boolbuiltinvec<real_t, size> ifthen(boolbuiltinvec<real_t, size> c, + boolbuiltinvec<real_t, size> x, + boolbuiltinvec<real_t, size> y) + { + return c.ifthen(x, y); + } + + template<typename real_t, int size> + inline intbuiltinvec<real_t, size> ifthen(boolbuiltinvec<real_t, size> c, intbuiltinvec<real_t, size> x, intbuiltinvec<real_t, size> y) @@ -672,15 +844,15 @@ namespace vecmathlib { // intbuiltinvec wrappers template<typename real_t, int size> - inline boolbuiltinvec<real_t, size> as_bool(intbuiltinvec<real_t, size> x) + inline intbuiltinvec<real_t, size> abs(intbuiltinvec<real_t, size> x) { - return x.as_bool(); + return x.abs(); } template<typename real_t, int size> - inline boolbuiltinvec<real_t, size> convert_bool(intbuiltinvec<real_t, size> x) + inline boolbuiltinvec<real_t, size> as_bool(intbuiltinvec<real_t, size> x) { - return x.convert_bool(); + return x.as_bool(); } template<typename real_t, int size> @@ -690,32 +862,101 @@ namespace vecmathlib { } template<typename real_t, int size> + inline + intbuiltinvec<real_t, size> bitifthen(intbuiltinvec<real_t, size> x, + intbuiltinvec<real_t, size> y, + intbuiltinvec<real_t, size> z) + { + return x.bitifthen(y, z); + } + + template<typename real_t, int size> + inline intbuiltinvec<real_t, size> clz(intbuiltinvec<real_t, size> x) + { + return x.clz(); + } + + template<typename real_t, int size> + inline boolbuiltinvec<real_t, size> convert_bool(intbuiltinvec<real_t, size> x) + { + return x.convert_bool(); + } + + template<typename real_t, int size> inline realbuiltinvec<real_t, size> convert_float(intbuiltinvec<real_t, size> x) { return x.convert_float(); } template<typename real_t, int size> + inline boolbuiltinvec<real_t, size> isignbit(intbuiltinvec<real_t, size> x) + { + return x.isignbit(); + } + + template<typename real_t, int size> inline intbuiltinvec<real_t, size> lsr(intbuiltinvec<real_t, size> x, - typename intbuiltinvec<real_t, size>::int_t n) + typename intbuiltinvec<real_t, size>::int_t n) { return x.lsr(n); } template<typename real_t, int size> - inline intbuiltinvec<real_t, size> lsr(intbuiltinvec<real_t, size> x, - intbuiltinvec<real_t, size> n) + inline + intbuiltinvec<real_t, size> lsr(intbuiltinvec<real_t, size> x, + intbuiltinvec<real_t, size> n) { return x.lsr(n); } + template<typename real_t, int size> + inline + intbuiltinvec<real_t, size> max(intbuiltinvec<real_t, size> x, + intbuiltinvec<real_t, size> y) + { + return x.max(y); + } + + template<typename real_t, int size> + inline + intbuiltinvec<real_t, size> min(intbuiltinvec<real_t, size> x, + intbuiltinvec<real_t, size> y) + { + return x.min(y); + } + + template<typename real_t, int size> + inline + intbuiltinvec<real_t, size> popcount(intbuiltinvec<real_t, size> x) + { + return x.popcount(); + } + + template<typename real_t, int size> + inline + intbuiltinvec<real_t, size> + rotate(intbuiltinvec<real_t, size> x, + typename intbuiltinvec<real_t, size>::int_t n) + { + return x.rotate(n); + } + + template<typename real_t, int size> + inline + intbuiltinvec<real_t, size> rotate(intbuiltinvec<real_t, size> x, + intbuiltinvec<real_t, size> n) + { + return x.rotate(n); + } + // realbuiltinvec wrappers template<typename real_t, int size> - inline realbuiltinvec<real_t, size> + inline + realbuiltinvec<real_t, size> loada(real_t const* p, realbuiltinvec<real_t, size> x, typename realbuiltinvec<real_t, size>::mask_t const& m) @@ -733,7 +974,8 @@ namespace vecmathlib { } template<typename real_t, int size> - inline realbuiltinvec<real_t, size> + inline + realbuiltinvec<real_t, size> loadu(real_t const* p, size_t ioff, realbuiltinvec<real_t, size> x, typename realbuiltinvec<real_t, size>::mask_t const& m) @@ -744,40 +986,40 @@ namespace vecmathlib { template<typename real_t, int size> inline void storea(realbuiltinvec<real_t, size> x, real_t* p) { - x.storea(p); + return x.storea(p); } template<typename real_t, int size> inline void storeu(realbuiltinvec<real_t, size> x, real_t* p) { - x.storeu(p); + return x.storeu(p); } template<typename real_t, int size> inline void storeu(realbuiltinvec<real_t, size> x, real_t* p, size_t ioff) { - x.storeu(p, ioff); + return x.storeu(p, ioff); } template<typename real_t, int size> inline void storea(realbuiltinvec<real_t, size> x, real_t* p, typename realbuiltinvec<real_t, size>::mask_t const& m) { - x.storea(p, m); + return x.storea(p, m); } template<typename real_t, int size> inline void storeu(realbuiltinvec<real_t, size> x, real_t* p, typename realbuiltinvec<real_t, size>::mask_t const& m) { - x.storeu(p, m); + return x.storeu(p, m); } template<typename real_t, int size> inline void storeu(realbuiltinvec<real_t, size> x, real_t* p, size_t ioff, typename realbuiltinvec<real_t, size>::mask_t const& m) { - x.storeu(p, ioff, m); + return x.storeu(p, ioff, m); } @@ -795,13 +1037,25 @@ namespace vecmathlib { } template<typename real_t, int size> - inline auto prod(realbuiltinvec<real_t, size> x) -> real_t + inline real_t maxval(realbuiltinvec<real_t, size> x) + { + return x.maxval(); + } + + template<typename real_t, int size> + inline real_t minval(realbuiltinvec<real_t, size> x) + { + return x.minval(); + } + + template<typename real_t, int size> + inline real_t prod(realbuiltinvec<real_t, size> x) { return x.prod(); } template<typename real_t, int size> - inline auto sum(realbuiltinvec<real_t, size> x) -> real_t + inline real_t sum(realbuiltinvec<real_t, size> x) { return x.sum(); } @@ -850,13 +1104,13 @@ namespace vecmathlib { { return x.atanh(); } - + template<typename real_t, int size> inline realbuiltinvec<real_t, size> cbrt(realbuiltinvec<real_t, size> x) { return x.cbrt(); } - + template<typename real_t, int size> inline realbuiltinvec<real_t, size> ceil(realbuiltinvec<real_t, size> x) { @@ -920,7 +1174,7 @@ namespace vecmathlib { template<typename real_t, int size> inline realbuiltinvec<real_t, size> fdim(realbuiltinvec<real_t, size> x, - realbuiltinvec<real_t, size> y) + realbuiltinvec<real_t, size> y) { return x.fdim(y); } @@ -955,6 +1209,13 @@ namespace vecmathlib { } template<typename real_t, int size> + inline realbuiltinvec<real_t, size> frexp(realbuiltinvec<real_t, size> x, + intbuiltinvec<real_t, size>* r) + { + return x.frexp(r); + } + + template<typename real_t, int size> inline realbuiltinvec<real_t, size> hypot(realbuiltinvec<real_t, size> x, realbuiltinvec<real_t, size> y) { @@ -993,8 +1254,9 @@ namespace vecmathlib { template<typename real_t, int size> inline - realbuiltinvec<real_t, size> ldexp(realbuiltinvec<real_t, size> x, - typename intbuiltinvec<real_t, size>::int_t n) + realbuiltinvec<real_t, size> + ldexp(realbuiltinvec<real_t, size> x, + typename intbuiltinvec<real_t, size>::int_t n) { return x.ldexp(n); } @@ -1032,6 +1294,13 @@ namespace vecmathlib { } template<typename real_t, int size> + inline realbuiltinvec<real_t, size> nextafter(realbuiltinvec<real_t, size> x, + realbuiltinvec<real_t, size> y) + { + return x.nextafter(y); + } + + template<typename real_t, int size> inline realbuiltinvec<real_t, size> pow(realbuiltinvec<real_t, size> x, realbuiltinvec<real_t, size> y) { diff --git a/vecmathlib.h b/vecmathlib.h index eab0f83..f5dfcf7 100644 --- a/vecmathlib.h +++ b/vecmathlib.h @@ -95,8 +95,7 @@ namespace std { class type_info; } #include "vec_pseudo.h" // Use compiler-provided vector types -// Problem: How to determine which operations are available in hardware? -// #include "vec_builtin.h" +#include "vec_builtin.h" // Scalarise all vector operations; don't use libm, use only // Vecmathlib's functions (mostly useful for testing Vecmathlib) |