diff options
-rw-r--r-- | mathfuncs_base.h | 5 | ||||
-rw-r--r-- | mathfuncs_convert.h | 12 | ||||
-rw-r--r-- | mathfuncs_exp.h | 2 | ||||
-rw-r--r-- | mathfuncs_fabs.h | 2 | ||||
-rw-r--r-- | mathfuncs_log.h | 2 | ||||
-rw-r--r-- | mathfuncs_rcp.h | 2 | ||||
-rw-r--r-- | mathfuncs_sqrt.h | 25 | ||||
-rw-r--r-- | test.cc | 13 | ||||
-rw-r--r-- | vec_base.h | 55 | ||||
-rw-r--r-- | vec_double_avx.h | 7 | ||||
-rw-r--r-- | vec_double_sse2.h | 14 | ||||
-rw-r--r-- | vec_double_sse2_scalar.h | 15 | ||||
-rw-r--r-- | vec_float_avx.h | 7 | ||||
-rw-r--r-- | vec_float_sse2.h | 14 | ||||
-rw-r--r-- | vec_float_sse2_scalar.h | 15 | ||||
-rw-r--r-- | vec_pseudo.h | 82 | ||||
-rw-r--r-- | vec_test.h | 60 |
17 files changed, 237 insertions, 95 deletions
diff --git a/mathfuncs_base.h b/mathfuncs_base.h index 30f8321..c46824e 100644 --- a/mathfuncs_base.h +++ b/mathfuncs_base.h @@ -54,6 +54,7 @@ namespace vecmathlib { static intvec_t vml_convert_int(realvec_t x); static realvec_t vml_floor(realvec_t x); static realvec_t vml_round(realvec_t x); + static realvec_t vml_trunc(realvec_t x); // fabs static realvec_t vml_copysign(realvec_t x, realvec_t y); @@ -67,7 +68,7 @@ namespace vecmathlib { static boolvec_t vml_isinf(realvec_t x); static boolvec_t vml_isnan(realvec_t x); static boolvec_t vml_isnormal(realvec_t x); - static realvec_t vml_scalbn(realvec_t x, intvec_t n); + static realvec_t vml_ldexp(realvec_t x, intvec_t n); static boolvec_t vml_signbit(realvec_t x); // exp @@ -101,6 +102,8 @@ namespace vecmathlib { static realvec_t vml_tanh(realvec_t x); // sqrt + static realvec_t vml_cbrt(realvec_t x); + static realvec_t vml_hypot(realvec_t x, realvec_t y); static realvec_t vml_rsqrt(realvec_t x); static realvec_t vml_sqrt(realvec_t x); }; diff --git a/mathfuncs_convert.h b/mathfuncs_convert.h index 44a3aee..190388f 100644 --- a/mathfuncs_convert.h +++ b/mathfuncs_convert.h @@ -100,7 +100,7 @@ namespace vecmathlib { { realvec_t r = x; // Round by adding a large number, destroying all excess precision - realvec_t offset = copysign(RV(std::scalbn(R(1.0), FP::mantissa_bits)), x); + realvec_t offset = copysign(RV(std::ldexp(R(1.0), FP::mantissa_bits)), x); r += offset; // Ensure the rounding is not optimised away r.barrier(); @@ -112,7 +112,7 @@ namespace vecmathlib { realvec_t mathfuncs<realvec_t>::vml_ceil(realvec_t x) { boolvec_t iszero = x == RV(0.0); - realvec_t offset = RV(0.5) - scalbn(fabs(x), I(-FP::mantissa_bits)); + realvec_t offset = RV(0.5) - ldexp(fabs(x), I(-FP::mantissa_bits)); return ifthen(iszero, x, round(x + offset)); } @@ -120,10 +120,16 @@ namespace vecmathlib { realvec_t mathfuncs<realvec_t>::vml_floor(realvec_t x) { boolvec_t iszero = x == RV(0.0); - realvec_t offset = RV(0.5) - scalbn(fabs(x), I(-FP::mantissa_bits)); + realvec_t offset = RV(0.5) - ldexp(fabs(x), I(-FP::mantissa_bits)); return ifthen(iszero, x, round(x - offset)); } + template<typename realvec_t> + realvec_t mathfuncs<realvec_t>::vml_trunc(realvec_t x) + { + return copysign(floor(fabs(x)), x); + } + }; // namespace vecmathlib #endif // #ifndef MATHFUNCS_CONVERT_H diff --git a/mathfuncs_exp.h b/mathfuncs_exp.h index f7e917d..ceb89c0 100644 --- a/mathfuncs_exp.h +++ b/mathfuncs_exp.h @@ -19,7 +19,7 @@ namespace vecmathlib { // realvec_t round_x = round(x); // intvec_t iround_x = convert_int(round_x); - // r = scalbn(r, iround_x); + // r = ldexp(r, iround_x); // Round by adding, then subtracting again a large number // Add a large number to move the mantissa bits to the right diff --git a/mathfuncs_fabs.h b/mathfuncs_fabs.h index aee8a18..1050147 100644 --- a/mathfuncs_fabs.h +++ b/mathfuncs_fabs.h @@ -103,7 +103,7 @@ namespace vecmathlib { } template<typename realvec_t> - realvec_t mathfuncs<realvec_t>::vml_scalbn(realvec_t x, intvec_t n) + realvec_t mathfuncs<realvec_t>::vml_ldexp(realvec_t x, intvec_t n) { return as_float(as_int(x) + (n << I(FP::mantissa_bits))); // return x * as_float((n + exponent_offset) << mantissa_bits); diff --git a/mathfuncs_log.h b/mathfuncs_log.h index de44de5..8d99a62 100644 --- a/mathfuncs_log.h +++ b/mathfuncs_log.h @@ -17,7 +17,7 @@ namespace vecmathlib { // Rescale VML_ASSERT(all(x > RV(0.0))); // intvec_t ilogb_x = ilogb(x); - // x = scalbn(x, -ilogb_x); + // x = ldexp(x, -ilogb_x); // sign bit is known to be zero intvec_t ilogb_x = (lsr(as_int(x), I(FP::mantissa_bits)) - IV(FP::exponent_offset)); diff --git a/mathfuncs_rcp.h b/mathfuncs_rcp.h index 66196e3..99c516c 100644 --- a/mathfuncs_rcp.h +++ b/mathfuncs_rcp.h @@ -22,7 +22,7 @@ namespace vecmathlib { VML_ASSERT(all(x > RV(0.0))); intvec_t ilogb_x = ilogb(x); // For stability, choose a starting value that is below the result - realvec_t r = scalbn(RV(0.5), -ilogb_x); + realvec_t r = ldexp(RV(0.5), -ilogb_x); // Iterate int const nmax = 7; diff --git a/mathfuncs_sqrt.h b/mathfuncs_sqrt.h index 303398d..917f7ac 100644 --- a/mathfuncs_sqrt.h +++ b/mathfuncs_sqrt.h @@ -9,9 +9,6 @@ -// For cbrt: Use "Halley's method with cubic convergence": -// <http://press.mcs.anl.gov/gswjanuary12/files/2012/01/Optimizing-Single-Node-Performance-on-BlueGene.pdf> - namespace vecmathlib { template<typename realvec_t> @@ -25,11 +22,11 @@ namespace vecmathlib { VML_ASSERT(all(x > RV(0.0))); #if 0 intvec_t ilogb_x = ilogb(x); - realvec_t r = scalbn(RV(M_SQRT2), ilogb_x >> 1); + realvec_t r = ldexp(RV(M_SQRT2), ilogb_x >> 1); // TODO: divide by M_SQRT2 if ilogb_x % 2 == 1 ? #else real_t correction = - std::scalbn(R(FP::exponent_offset & 1 ? M_SQRT2 : 1.0), + std::ldexp(R(FP::exponent_offset & 1 ? M_SQRT2 : 1.0), FP::exponent_offset >> 1); realvec_t r = lsr(x.as_int(), 1).as_float() * RV(correction); #endif @@ -56,6 +53,16 @@ namespace vecmathlib { + // TODO: Use "Halley's method with cubic convergence": + // <http://press.mcs.anl.gov/gswjanuary12/files/2012/01/Optimizing-Single-Node-Performance-on-BlueGene.pdf> + template<typename realvec_t> + realvec_t mathfuncs<realvec_t>::vml_cbrt(realvec_t x) + { + return pow(x, RV(1.0/3.0)); + } + + + template<typename realvec_t> realvec_t mathfuncs<realvec_t>::vml_rsqrt(realvec_t x) { @@ -70,6 +77,14 @@ namespace vecmathlib { return rcp(sqrt(x)); } + + + template<typename realvec_t> + realvec_t mathfuncs<realvec_t>::vml_hypot(realvec_t x, realvec_t y) + { + return sqrt(x*x + y*y); + } + }; // namespace vecmathlib #endif // #ifndef MATHFUNCS_SQRT_H @@ -369,10 +369,10 @@ struct vecmathlib_test { // Change signature: "int" -> "int_t" static int_t ilogb(real_t x) { return std::ilogb(x); } - static real_t scalbn(real_t x, int_t n) { return std::scalbn(x, n); } + static real_t ldexp(real_t x, int_t n) { return std::ldexp(x, n); } static void test_fabs() { - cout << " testing copysign fabs fdim fma fmax fmin ilogb isfinite isinf isnan isnormal scalbn signbit...\n" << flush; + cout << " testing copysign fabs fdim fma fmax fmin ilogb isfinite isinf isnan isnormal ldexp signbit...\n" << flush; for (int i=0; i<imax; ++i) { realvec_t const x = random(R(-10.0), R(+10.0)); realvec_t const y = random(R(-10.0), R(+10.0)); @@ -389,7 +389,7 @@ struct vecmathlib_test { check("isinf", isinf, vecmathlib::isinf, x); check("isnan", isnan, vecmathlib::isnan, x); check("isnormal", isnormal, vecmathlib::isnormal, x); - check("scalbn", scalbn, vecmathlib::scalbn, x, n, 0.0); + check("ldexp", ldexp, vecmathlib::ldexp, x, n, 0.0); check("signbit", signbit, vecmathlib::signbit, x); } } @@ -419,6 +419,9 @@ struct vecmathlib_test { check("round", round, vecmathlib::round, x, accuracy()); check("round", round, vecmathlib::round, fn1, accuracy()); check("round", round, vecmathlib::round, fn2, accuracy()); + check("trunc", trunc, vecmathlib::trunc, x, accuracy()); + check("trunc", trunc, vecmathlib::trunc, fn1, accuracy()); + check("trunc", trunc, vecmathlib::trunc, fn2, accuracy()); } } @@ -558,6 +561,10 @@ struct vecmathlib_test { cout << " testing rsqrt sqrt...\n" << flush; for (int i=0; i<imax; ++i) { realvec_t const x = random(R(0.0), R(1.0e+3)); + realvec_t const y = random(-R(1.0e+3), R(1.0e+3)); + realvec_t const z = random(-R(1.0e+3), R(1.0e+3)); + check("cbrt", cbrt, vecmathlib::cbrt, x, accuracy()); + check("hypot", hypot, vecmathlib::hypot, y, z, accuracy()); check("rsqrt", rsqrt, vecmathlib::rsqrt, x, accuracy()); check("sqrt", sqrt, vecmathlib::sqrt, x, accuracy()); } @@ -240,13 +240,19 @@ namespace vecmathlib { { return x.atanh(); } - + + template<typename real_t, int size> + inline realvec<real_t, size> cbrt(realvec<real_t, size> x) + { + return x.cbrt(); + } + template<typename real_t, int size> inline realvec<real_t, size> ceil(realvec<real_t, size> x) { return x.ceil(); } - + template<typename real_t, int size> inline realvec<real_t, size> copysign(realvec<real_t, size> x, realvec<real_t, size> y) @@ -339,6 +345,13 @@ namespace vecmathlib { } template<typename real_t, int size> + inline realvec<real_t, size> hypot(realvec<real_t, size> x, + realvec<real_t, size> y) + { + return x.hypot(y); + } + + template<typename real_t, int size> inline intvec<real_t, size> ilogb(realvec<real_t, size> x) { return x.ilogb(); @@ -369,6 +382,22 @@ namespace vecmathlib { } template<typename real_t, int size> + inline + realvec<real_t, size> ldexp(realvec<real_t, size> x, + typename intvec<real_t, size>::int_t n) + { + return x.ldexp(n); + } + + template<typename real_t, int size> + inline + realvec<real_t, size> ldexp(realvec<real_t, size> x, + intvec<real_t, size> n) + { + return x.ldexp(n); + } + + template<typename real_t, int size> inline realvec<real_t, size> log(realvec<real_t, size> x) { return x.log(); @@ -425,22 +454,6 @@ namespace vecmathlib { } template<typename real_t, int size> - inline - realvec<real_t, size> scalbn(realvec<real_t, size> x, - typename intvec<real_t, size>::int_t n) - { - return x.scalbn(n); - } - - template<typename real_t, int size> - inline - realvec<real_t, size> scalbn(realvec<real_t, size> x, - intvec<real_t, size> n) - { - return x.scalbn(n); - } - - template<typename real_t, int size> inline boolvec<real_t, size> signbit(realvec<real_t, size> x) { return x.signbit(); @@ -476,6 +489,12 @@ namespace vecmathlib { return x.tanh(); } + template<typename real_t, int size> + inline realvec<real_t, size> trunc(realvec<real_t, size> x) + { + return x.trunc(); + } + template<typename real_t, int size> diff --git a/vec_double_avx.h b/vec_double_avx.h index 571f5a5..b2ec43d 100644 --- a/vec_double_avx.h +++ b/vec_double_avx.h @@ -596,6 +596,7 @@ namespace vecmathlib { realvec asinh() const { return MF::vml_asinh(*this); } realvec atan() const { return MF::vml_atan(*this); } realvec atanh() const { return MF::vml_atanh(*this); } + realvec cbrt() const { return MF::vml_cbrt(*this); } realvec ceil() const { return _mm256_ceil_pd(v); } realvec copysign(realvec y) const { return MF::vml_copysign(*this, y); } realvec cos() const { return MF::vml_cos(*this); } @@ -611,11 +612,14 @@ namespace vecmathlib { realvec fmax(realvec y) const { return _mm256_max_pd(v, y.v); } realvec fmin(realvec y) const { return _mm256_min_pd(v, y.v); } realvec fmod(realvec y) const { return MF::vml_fmod(*this, y); } + realvec hypot(realvec y) const { return MF::vml_hypot(*this, y); } intvec_t ilogb() const { return MF::vml_ilogb(*this); } boolvec_t isfinite() const { return MF::vml_isfinite(*this); } boolvec_t isinf() const { return MF::vml_isinf(*this); } boolvec_t isnan() const { return MF::vml_isnan(*this); } boolvec_t isnormal() const { return MF::vml_isnormal(*this); } + realvec ldexp(int_t n) const { return MF::vml_ldexp(*this, n); } + realvec ldexp(intvec_t n) const { return MF::vml_ldexp(*this, n); } realvec log() const { return MF::vml_log(*this); } realvec log10() const { return MF::vml_log10(*this); } realvec log1p() const { return MF::vml_log1p(*this); } @@ -625,14 +629,13 @@ namespace vecmathlib { realvec remainder(realvec y) const { return MF::vml_remainder(*this, y); } realvec round() const { return _mm256_round_pd(v, _MM_FROUND_NINT); } realvec rsqrt() const { return MF::vml_rsqrt(*this); } - realvec scalbn(int_t n) const { return MF::vml_scalbn(*this, n); } - realvec scalbn(intvec_t n) const { return MF::vml_scalbn(*this, n); } boolvec_t signbit() const { return v; } realvec sin() const { return MF::vml_sin(*this); } realvec sinh() const { return MF::vml_sinh(*this); } realvec sqrt() const { return _mm256_sqrt_pd(v); } realvec tan() const { return MF::vml_tan(*this); } realvec tanh() const { return MF::vml_tanh(*this); } + realvec trunc() const { return _mm256_round_pd(v, _MM_FROUND_TO_ZERO); } }; diff --git a/vec_double_sse2.h b/vec_double_sse2.h index e9be2d7..40410ca 100644 --- a/vec_double_sse2.h +++ b/vec_double_sse2.h @@ -500,6 +500,7 @@ namespace vecmathlib { realvec asinh() const { return MF::vml_asinh(*this); } realvec atan() const { return MF::vml_atan(*this); } realvec atanh() const { return MF::vml_atanh(*this); } + realvec cbrt() const { return MF::vml_cbrt(*this); } realvec ceil() const { #ifdef __SSE4_1__ @@ -529,11 +530,14 @@ namespace vecmathlib { realvec fmax(realvec y) const { return _mm_max_pd(v, y.v); } realvec fmin(realvec y) const { return _mm_min_pd(v, y.v); } realvec fmod(realvec y) const { return MF::vml_fmod(*this, y); } + realvec hypot(realvec y) const { return MF::vml_hypot(*this, y); } intvec_t ilogb() const { return MF::vml_ilogb(*this); } boolvec_t isfinite() const { return MF::vml_isfinite(*this); } boolvec_t isinf() const { return MF::vml_isinf(*this); } boolvec_t isnan() const { return MF::vml_isnan(*this); } boolvec_t isnormal() const { return MF::vml_isnormal(*this); } + realvec ldexp(int_t n) const { return MF::vml_ldexp(*this, n); } + realvec ldexp(intvec_t n) const { return MF::vml_ldexp(*this, n); } realvec log() const { return MF::vml_log(*this); } realvec log10() const { return MF::vml_log10(*this); } realvec log1p() const { return MF::vml_log1p(*this); } @@ -550,14 +554,20 @@ namespace vecmathlib { #endif } realvec rsqrt() const { return MF::vml_rsqrt(*this); } - realvec scalbn(int_t n) const { return MF::vml_scalbn(*this, n); } - realvec scalbn(intvec_t n) const { return MF::vml_scalbn(*this, n); } boolvec_t signbit() const { return v; } realvec sin() const { return MF::vml_sin(*this); } realvec sinh() const { return MF::vml_sinh(*this); } realvec sqrt() const { return _mm_sqrt_pd(v); } realvec tan() const { return MF::vml_tan(*this); } realvec tanh() const { return MF::vml_tanh(*this); } + realvec trunc() const + { +#ifdef __SSE4_1__ + return _mm_round_pd(v, _MM_FROUND_TO_ZERO); +#else + return MF::vml_trunc(*this); +#endif + } }; diff --git a/vec_double_sse2_scalar.h b/vec_double_sse2_scalar.h index 8696d3d..ff3afd4 100644 --- a/vec_double_sse2_scalar.h +++ b/vec_double_sse2_scalar.h @@ -371,6 +371,7 @@ namespace vecmathlib { realvec asinh() const { return MF::vml_asinh(*this); } realvec atan() const { return MF::vml_atan(*this); } realvec atanh() const { return MF::vml_atanh(*this); } + realvec cbrt() const { return MF::vml_cbrt(*this); } realvec ceil() const { return std::ceil(v); } realvec copysign(realvec y) const { return std::copysign(v, y.v); } realvec cos() const { return MF::vml_cos(*this); } @@ -392,11 +393,14 @@ namespace vecmathlib { return to_double(_mm_min_sd(from_double(v), from_double(y.v))); } realvec fmod(realvec y) const { return std::fmod(v, y.v); } + realvec hypot(realvec y) const { return MF::vml_hypot(*this, y); } intvec_t ilogb() const { return std::ilogb(v); } boolvec_t isfinite() const { return std::isfinite(v); } boolvec_t isinf() const { return std::isinf(v); } boolvec_t isnan() const { return std::isnan(v); } boolvec_t isnormal() const { return std::isnormal(v); } + realvec ldexp(int_t n) const { return std::ldexp(v, n); } + realvec ldexp(intvec_t n) const { return std::ldexp(v, n); } realvec log() const { return MF::vml_log(*this); } realvec log10() const { return MF::vml_log10(*this); } realvec log1p() const { return MF::vml_log1p(*this); } @@ -415,8 +419,6 @@ namespace vecmathlib { #endif } realvec rsqrt() const { return MF::vml_rsqrt(*this); } - realvec scalbn(int_t n) const { return std::scalbn(v, n); } - realvec scalbn(intvec_t n) const { return std::scalbn(v, n); } boolvec_t signbit() const { return std::signbit(v); } realvec sin() const { return MF::vml_sin(*this); } realvec sinh() const { return MF::vml_sinh(*this); } @@ -427,6 +429,15 @@ namespace vecmathlib { } realvec tan() const { return MF::vml_tan(*this); } realvec tanh() const { return MF::vml_tanh(*this); } + realvec trunc() const + { +#ifdef __SSE4_1__ + return to_double(_mm_round_sd(from_double(v), from_double(v), + _MM_FROUND_TO_ZERO)); +#else + return MF::vml_trunc(*this); +#endif + } }; diff --git a/vec_float_avx.h b/vec_float_avx.h index d6e0e24..a6dd04f 100644 --- a/vec_float_avx.h +++ b/vec_float_avx.h @@ -567,6 +567,7 @@ namespace vecmathlib { realvec asinh() const { return MF::vml_asinh(*this); } realvec atan() const { return MF::vml_atan(*this); } realvec atanh() const { return MF::vml_atanh(*this); } + realvec cbrt() const { return MF::vml_cbrt(*this); } realvec ceil() const { return _mm256_ceil_ps(v); } realvec copysign(realvec y) const { return MF::vml_copysign(*this, y); } realvec cos() const { return MF::vml_cos(*this); } @@ -582,11 +583,14 @@ namespace vecmathlib { realvec fmax(realvec y) const { return _mm256_max_ps(v, y.v); } realvec fmin(realvec y) const { return _mm256_min_ps(v, y.v); } realvec fmod(realvec y) const { return MF::vml_fmod(*this, y); } + realvec hypot(realvec y) const { return MF::vml_hypot(*this, y); } intvec_t ilogb() const { return MF::vml_ilogb(*this); } boolvec_t isfinite() const { return MF::vml_isfinite(*this); } boolvec_t isinf() const { return MF::vml_isinf(*this); } boolvec_t isnan() const { return MF::vml_isnan(*this); } boolvec_t isnormal() const { return MF::vml_isnormal(*this); } + realvec ldexp(int_t n) const { return MF::vml_ldexp(*this, n); } + realvec ldexp(intvec_t n) const { return MF::vml_ldexp(*this, n); } realvec log() const { return MF::vml_log(*this); } realvec log10() const { return MF::vml_log10(*this); } realvec log1p() const { return MF::vml_log1p(*this); } @@ -608,14 +612,13 @@ namespace vecmathlib { r *= RV(1.5) - RV(0.5)*x * r*r; // one Newton iteration (see vml_rsqrt) return r; } - realvec scalbn(int_t n) const { return MF::vml_scalbn(*this, n); } - realvec scalbn(intvec_t n) const { return MF::vml_scalbn(*this, n); } boolvec_t signbit() const { return v; } realvec sin() const { return MF::vml_sin(*this); } realvec sinh() const { return MF::vml_sinh(*this); } realvec sqrt() const { return _mm256_sqrt_ps(v); } realvec tan() const { return MF::vml_tan(*this); } realvec tanh() const { return MF::vml_tanh(*this); } + realvec trunc() const { return _mm256_round_ps(v, _MM_FROUND_TO_ZERO); } }; diff --git a/vec_float_sse2.h b/vec_float_sse2.h index 9319bfc..d459d3d 100644 --- a/vec_float_sse2.h +++ b/vec_float_sse2.h @@ -476,6 +476,7 @@ namespace vecmathlib { realvec asinh() const { return MF::vml_asinh(*this); } realvec atan() const { return MF::vml_atan(*this); } realvec atanh() const { return MF::vml_atanh(*this); } + realvec cbrt() const { return MF::vml_cbrt(*this); } realvec ceil() const { #ifdef __SSE4_1__ @@ -505,11 +506,14 @@ namespace vecmathlib { realvec fmax(realvec y) const { return _mm_max_ps(v, y.v); } realvec fmin(realvec y) const { return _mm_min_ps(v, y.v); } realvec fmod(realvec y) const { return MF::vml_fmod(*this, y); } + realvec hypot(realvec y) const { return MF::vml_hypot(*this, y); } intvec_t ilogb() const { return MF::vml_ilogb(*this); } boolvec_t isfinite() const { return MF::vml_isfinite(*this); } boolvec_t isinf() const { return MF::vml_isinf(*this); } boolvec_t isnan() const { return MF::vml_isnan(*this); } boolvec_t isnormal() const { return MF::vml_isnormal(*this); } + realvec ldexp(int_t n) const { return MF::vml_ldexp(*this, n); } + realvec ldexp(intvec_t n) const { return MF::vml_ldexp(*this, n); } realvec log() const { return MF::vml_log(*this); } realvec log10() const { return MF::vml_log10(*this); } realvec log1p() const { return MF::vml_log1p(*this); } @@ -538,14 +542,20 @@ namespace vecmathlib { r *= RV(1.5) - RV(0.5)*x * r*r; // one Newton iteration (see vml_rsqrt) return r; } - realvec scalbn(int_t n) const { return MF::vml_scalbn(*this, n); } - realvec scalbn(intvec_t n) const { return MF::vml_scalbn(*this, n); } boolvec_t signbit() const { return v; } realvec sin() const { return MF::vml_sin(*this); } realvec sinh() const { return MF::vml_sinh(*this); } realvec sqrt() const { return _mm_sqrt_ps(v); } realvec tan() const { return MF::vml_tan(*this); } realvec tanh() const { return MF::vml_tanh(*this); } + realvec trunc() const + { +#ifdef __SSE4_1__ + return _mm_round_ps(v, _MM_FROUND_TO_ZERO); +#else + return MF::vml_trunc(*this); +#endif + } }; diff --git a/vec_float_sse2_scalar.h b/vec_float_sse2_scalar.h index 405b6d7..b72ecf7 100644 --- a/vec_float_sse2_scalar.h +++ b/vec_float_sse2_scalar.h @@ -371,6 +371,7 @@ namespace vecmathlib { realvec asinh() const { return MF::vml_asinh(*this); } realvec atan() const { return MF::vml_atan(*this); } realvec atanh() const { return MF::vml_atanh(*this); } + realvec cbrt() const { return MF::vml_cbrt(*this); } realvec ceil() const { return std::ceil(v); } realvec copysign(realvec y) const { return std::copysign(v, y.v); } realvec cos() const { return MF::vml_cos(*this); } @@ -392,11 +393,14 @@ namespace vecmathlib { return to_float(_mm_min_ss(from_float(v), from_float(y.v))); } realvec fmod(realvec y) const { return std::fmod(v, y.v); } + realvec hypot(realvec y) const { return MF::vml_hypot(*this, y); } intvec_t ilogb() const { return std::ilogb(v); } boolvec_t isfinite() const { return std::isfinite(v); } boolvec_t isinf() const { return std::isinf(v); } boolvec_t isnan() const { return std::isnan(v); } boolvec_t isnormal() const { return std::isnormal(v); } + realvec ldexp(int_t n) const { return std::ldexp(v, n); } + realvec ldexp(intvec_t n) const { return std::ldexp(v, n); } realvec log() const { return MF::vml_log(*this); } realvec log10() const { return MF::vml_log10(*this); } realvec log1p() const { return MF::vml_log1p(*this); } @@ -415,8 +419,6 @@ namespace vecmathlib { #endif } realvec rsqrt() const { return MF::vml_rsqrt(*this); } - realvec scalbn(int_t n) const { return std::scalbn(v, n); } - realvec scalbn(intvec_t n) const { return std::scalbn(v, n); } boolvec_t signbit() const { return std::signbit(v); } realvec sin() const { return MF::vml_sin(*this); } realvec sinh() const { return MF::vml_sinh(*this); } @@ -424,6 +426,15 @@ namespace vecmathlib { realvec sqrt() const { return to_float(_mm_sqrt_ss(from_float(v))); } realvec tan() const { return MF::vml_tan(*this); } realvec tanh() const { return MF::vml_tanh(*this); } + realvec trunc() const + { +#ifdef __SSE4_1__ + return to_float(_mm_round_ss(from_float(v), from_float(v), + _MM_FROUND_TO_ZERO)); +#else + return MF::vml_trunc(*this); +#endif + } }; diff --git a/vec_pseudo.h b/vec_pseudo.h index e16c668..ac66701 100644 --- a/vec_pseudo.h +++ b/vec_pseudo.h @@ -651,6 +651,7 @@ namespace vecmathlib { realpseudovec asinh() const { return map(std::asinh); } realpseudovec atan() const { return map(std::atan); } realpseudovec atanh() const { return map(std::atanh); } + realpseudovec cbrt() const { return map(std::cbrt); } realpseudovec ceil() const { return map(std::ceil); } realpseudovec copysign(realpseudovec y) const { @@ -677,6 +678,7 @@ namespace vecmathlib { realpseudovec fmax(realpseudovec y) const { return map(std::fmax, y); } realpseudovec fmin(realpseudovec y) const { return map(std::fmin, y); } realpseudovec fmod(realpseudovec y) const { return map(std::fmod, y); } + realpseudovec hypot(realpseudovec y) const { return map(std::hypot, y); } intvec_t ilogb() const { intvec_t res; @@ -687,6 +689,18 @@ namespace vecmathlib { boolvec_t isinf() const { return mapb(std::isinf); } boolvec_t isnan() const { return mapb(std::isnan); } boolvec_t isnormal() const { return mapb(std::isnormal); } + realpseudovec ldexp(int_t n) const + { + realvec_t res; + for (int d=0; d<size; ++d) res.v[d] = std::ldexp(v[d], n); + return res; + } + realpseudovec ldexp(intvec_t n) const + { + realvec_t res; + for (int d=0; d<size; ++d) res.v[d] = std::ldexp(v[d], n.v[d]); + return res; + } realpseudovec log() const { return map(std::log); } realpseudovec log10() const { return map(std::log10); } realpseudovec log1p() const { return map(std::log1p); } @@ -704,24 +718,13 @@ namespace vecmathlib { } realpseudovec round() const { return map(std::round); } realpseudovec rsqrt() const { return sqrt().rcp(); } - realpseudovec scalbn(int_t n) const - { - realvec_t res; - for (int d=0; d<size; ++d) res.v[d] = std::scalbn(v[d], n); - return res; - } - realpseudovec scalbn(intvec_t n) const - { - realvec_t res; - for (int d=0; d<size; ++d) res.v[d] = std::scalbn(v[d], n.v[d]); - return res; - } boolvec_t signbit() const { return mapb(std::signbit); } realpseudovec sin() const { return map(std::sin); } realpseudovec sinh() const { return map(std::sinh); } realpseudovec sqrt() const { return map(std::sqrt); } realpseudovec tan() const { return map(std::tan); } realpseudovec tanh() const { return map(std::tanh); } + realpseudovec trunc() const { return map(std::trunc); } }; @@ -1000,13 +1003,19 @@ namespace vecmathlib { { return x.atanh(); } - + + template<typename real_t, int size> + inline realpseudovec<real_t, size> cbrt(realpseudovec<real_t, size> x) + { + return x.cbrt(); + } + template<typename real_t, int size> inline realpseudovec<real_t, size> ceil(realpseudovec<real_t, size> x) { return x.ceil(); } - + template<typename real_t, int size> inline realpseudovec<real_t, size> copysign(realpseudovec<real_t, size> x, realpseudovec<real_t, size> y) @@ -1099,6 +1108,13 @@ namespace vecmathlib { } template<typename real_t, int size> + inline realpseudovec<real_t, size> hypot(realpseudovec<real_t, size> x, + realpseudovec<real_t, size> y) + { + return x.hypot(y); + } + + template<typename real_t, int size> inline intpseudovec<real_t, size> ilogb(realpseudovec<real_t, size> x) { return x.ilogb(); @@ -1129,6 +1145,22 @@ namespace vecmathlib { } template<typename real_t, int size> + inline + realpseudovec<real_t, size> ldexp(realpseudovec<real_t, size> x, + typename intpseudovec<real_t, size>::int_t n) + { + return x.ldexp(n); + } + + template<typename real_t, int size> + inline + realpseudovec<real_t, size> ldexp(realpseudovec<real_t, size> x, + intpseudovec<real_t, size> n) + { + return x.ldexp(n); + } + + template<typename real_t, int size> inline realpseudovec<real_t, size> log(realpseudovec<real_t, size> x) { return x.log(); @@ -1185,22 +1217,6 @@ namespace vecmathlib { } template<typename real_t, int size> - inline - realpseudovec<real_t, size> scalbn(realpseudovec<real_t, size> x, - typename intpseudovec<real_t, size>::int_t n) - { - return x.scalbn(n); - } - - template<typename real_t, int size> - inline - realpseudovec<real_t, size> scalbn(realpseudovec<real_t, size> x, - intpseudovec<real_t, size> n) - { - return x.scalbn(n); - } - - template<typename real_t, int size> inline boolpseudovec<real_t, size> signbit(realpseudovec<real_t, size> x) { return x.signbit(); @@ -1236,6 +1252,12 @@ namespace vecmathlib { return x.tanh(); } + template<typename real_t, int size> + inline realpseudovec<real_t, size> trunc(realpseudovec<real_t, size> x) + { + return x.trunc(); + } + template<typename real_t, int size> @@ -610,6 +610,7 @@ namespace vecmathlib { realtestvec asinh() const { return MF::vml_asinh(*this); } realtestvec atan() const { return MF::vml_atan(*this); } realtestvec atanh() const { return MF::vml_atanh(*this); } + realtestvec cbrt() const { return MF::vml_cbrt(*this); } realtestvec ceil() const { return MF::vml_ceil(*this); } realtestvec copysign(realtestvec y) const { return MF::vml_copysign(*this, y); } realtestvec cos() const { return MF::vml_cos(*this); } @@ -625,11 +626,14 @@ namespace vecmathlib { realtestvec fmax(realtestvec y) const { return MF::vml_fmax(*this, y); } realtestvec fmin(realtestvec y) const { return MF::vml_fmin(*this, y); } realtestvec fmod(realtestvec y) const { return MF::vml_fmod(*this, y); } + realtestvec hypot(realtestvec y) const { return MF::vml_hypot(*this, y); } intvec_t ilogb() const { return MF::vml_ilogb(*this); } boolvec_t isfinite() const { return MF::vml_isfinite(*this); } boolvec_t isinf() const { return MF::vml_isinf(*this); } boolvec_t isnan() const { return MF::vml_isnan(*this); } boolvec_t isnormal() const { return MF::vml_isnormal(*this); } + realtestvec ldexp(int_t n) const { return MF::vml_ldexp(*this, n); } + realtestvec ldexp(intvec_t n) const { return MF::vml_ldexp(*this, n); } realtestvec log() const { return MF::vml_log(*this); } realtestvec log10() const { return MF::vml_log10(*this); } realtestvec log1p() const { return MF::vml_log1p(*this); } @@ -639,14 +643,13 @@ namespace vecmathlib { realtestvec remainder(realtestvec y) const { return MF::vml_remainder(*this, y); } realtestvec round() const { return MF::vml_round(*this); } realtestvec rsqrt() const { return MF::vml_rsqrt(*this); } - realtestvec scalbn(int_t n) const { return MF::vml_scalbn(*this, n); } - realtestvec scalbn(intvec_t n) const { return MF::vml_scalbn(*this, n); } boolvec_t signbit() const { return MF::vml_signbit(*this); } realtestvec sin() const { return MF::vml_sin(*this); } realtestvec sinh() const { return MF::vml_sinh(*this); } realtestvec sqrt() const { return MF::vml_sqrt(*this); } realtestvec tan() const { return MF::vml_tan(*this); } realtestvec tanh() const { return MF::vml_tanh(*this); } + realtestvec trunc() const { return MF::vml_trunc(*this); } }; @@ -925,11 +928,17 @@ namespace vecmathlib { } template<typename real_t, int size> + inline realtestvec<real_t, size> cbrt(realtestvec<real_t, size> x) + { + return x.cbrt(); + } + + template<typename real_t, int size> inline realtestvec<real_t, size> ceil(realtestvec<real_t, size> x) { return x.ceil(); } - + template<typename real_t, int size> inline realtestvec<real_t, size> copysign(realtestvec<real_t, size> x, realtestvec<real_t, size> y) @@ -1022,6 +1031,13 @@ namespace vecmathlib { } template<typename real_t, int size> + inline realtestvec<real_t, size> hypot(realtestvec<real_t, size> x, + realtestvec<real_t, size> y) + { + return x.hypot(y); + } + + template<typename real_t, int size> inline inttestvec<real_t, size> ilogb(realtestvec<real_t, size> x) { return x.ilogb(); @@ -1052,6 +1068,22 @@ namespace vecmathlib { } template<typename real_t, int size> + inline + realtestvec<real_t, size> ldexp(realtestvec<real_t, size> x, + typename inttestvec<real_t, size>::int_t n) + { + return x.ldexp(n); + } + + template<typename real_t, int size> + inline + realtestvec<real_t, size> ldexp(realtestvec<real_t, size> x, + inttestvec<real_t, size> n) + { + return x.ldexp(n); + } + + template<typename real_t, int size> inline realtestvec<real_t, size> log(realtestvec<real_t, size> x) { return x.log(); @@ -1108,22 +1140,6 @@ namespace vecmathlib { } template<typename real_t, int size> - inline - realtestvec<real_t, size> scalbn(realtestvec<real_t, size> x, - typename inttestvec<real_t, size>::int_t n) - { - return x.scalbn(n); - } - - template<typename real_t, int size> - inline - realtestvec<real_t, size> scalbn(realtestvec<real_t, size> x, - inttestvec<real_t, size> n) - { - return x.scalbn(n); - } - - template<typename real_t, int size> inline booltestvec<real_t, size> signbit(realtestvec<real_t, size> x) { return x.signbit(); @@ -1159,6 +1175,12 @@ namespace vecmathlib { return x.tanh(); } + template<typename real_t, int size> + inline realtestvec<real_t, size> trunc(realtestvec<real_t, size> x) + { + return x.trunc(); + } + template<typename real_t, int size> |