diff options
-rw-r--r-- | mathfuncs_base.h | 1 | ||||
-rw-r--r-- | mathfuncs_convert.h | 18 | ||||
-rw-r--r-- | mathfuncs_rcp.h | 2 | ||||
-rw-r--r-- | mathfuncs_sin.h | 2 | ||||
-rw-r--r-- | test.cc | 19 | ||||
-rw-r--r-- | vec_base.h | 6 | ||||
-rw-r--r-- | vec_double_avx.h | 6 | ||||
-rw-r--r-- | vec_double_sse2.h | 7 | ||||
-rw-r--r-- | vec_double_sse2_scalar.h | 9 | ||||
-rw-r--r-- | vec_float_avx.h | 6 | ||||
-rw-r--r-- | vec_float_sse2.h | 7 | ||||
-rw-r--r-- | vec_float_sse2_scalar.h | 8 | ||||
-rw-r--r-- | vec_pseudo.h | 7 | ||||
-rw-r--r-- | vec_test.h | 22 |
14 files changed, 93 insertions, 27 deletions
diff --git a/mathfuncs_base.h b/mathfuncs_base.h index c46824e..8edec82 100644 --- a/mathfuncs_base.h +++ b/mathfuncs_base.h @@ -53,6 +53,7 @@ namespace vecmathlib { static realvec_t vml_convert_float(intvec_t x); static intvec_t vml_convert_int(realvec_t x); static realvec_t vml_floor(realvec_t x); + static realvec_t vml_rint(realvec_t x); static realvec_t vml_round(realvec_t x); static realvec_t vml_trunc(realvec_t x); diff --git a/mathfuncs_convert.h b/mathfuncs_convert.h index 190388f..b81e4e1 100644 --- a/mathfuncs_convert.h +++ b/mathfuncs_convert.h @@ -95,8 +95,10 @@ namespace vecmathlib { + // Round to nearest integer, breaking ties using prevailing rounding + // mode (default: round to even) template<typename realvec_t> - realvec_t mathfuncs<realvec_t>::vml_round(realvec_t x) + realvec_t mathfuncs<realvec_t>::vml_rint(realvec_t x) { realvec_t r = x; // Round by adding a large number, destroying all excess precision @@ -108,22 +110,32 @@ namespace vecmathlib { return r; } + // Round to next integer above template<typename realvec_t> realvec_t mathfuncs<realvec_t>::vml_ceil(realvec_t x) { boolvec_t iszero = x == RV(0.0); realvec_t offset = RV(0.5) - ldexp(fabs(x), I(-FP::mantissa_bits)); - return ifthen(iszero, x, round(x + offset)); + return ifthen(iszero, x, rint(x + offset)); } + // Round to next integer below template<typename realvec_t> realvec_t mathfuncs<realvec_t>::vml_floor(realvec_t x) { boolvec_t iszero = x == RV(0.0); realvec_t offset = RV(0.5) - ldexp(fabs(x), I(-FP::mantissa_bits)); - return ifthen(iszero, x, round(x - offset)); + return ifthen(iszero, x, rint(x - offset)); + } + + // Round to nearest integer, breaking ties away from zero + template<typename realvec_t> + realvec_t mathfuncs<realvec_t>::vml_round(realvec_t x) + { + return copysign(floor(fabs(x)+RV(0.5)), x); } + // Round towards zero template<typename realvec_t> realvec_t mathfuncs<realvec_t>::vml_trunc(realvec_t x) { diff --git a/mathfuncs_rcp.h b/mathfuncs_rcp.h index 99c516c..a7d7ba3 100644 --- a/mathfuncs_rcp.h +++ b/mathfuncs_rcp.h @@ -47,7 +47,7 @@ namespace vecmathlib { template<typename realvec_t> realvec_t mathfuncs<realvec_t>::vml_remainder(realvec_t x, realvec_t y) { - return x - round(x / y) * y; + return x - rint(x / y) * y; } template<typename realvec_t> diff --git a/mathfuncs_sin.h b/mathfuncs_sin.h index 035513f..2c94e1e 100644 --- a/mathfuncs_sin.h +++ b/mathfuncs_sin.h @@ -18,7 +18,7 @@ namespace vecmathlib { x *= RV(1.0/(2.0*M_PI)); // Reduce range: sin(x) = sin(x + 2pi) - x -= round(x); + x -= rint(x); VML_ASSERT(all(x >= RV(-0.5) && x <= RV(+0.5))); // Reduce range: sin(x) = -sin(-x) @@ -396,7 +396,7 @@ struct vecmathlib_test { static void test_convert() { - cout << " testing ceil convert_float convert_int floor round...\n" + cout << " testing ceil convert_float convert_int floor rint round trunc...\n" << flush; for (int i=0; i<imax; ++i) { @@ -405,6 +405,8 @@ struct vecmathlib_test { intvec_t const n2 = random(int_t(-1000000000), int_t(+1000000000)); realvec_t const fn1 = vecmathlib::convert_float(n1); realvec_t const fn2 = vecmathlib::convert_float(n2); + realvec_t const fn1h = vecmathlib::convert_float(n1) * RV(0.25); + realvec_t const fn2h = vecmathlib::convert_float(n2) * RV(0.25); check("convert_float", FP::convert_float, vecmathlib::convert_float, n1, accuracy()); check("convert_float", @@ -413,15 +415,28 @@ struct vecmathlib_test { check("ceil", ceil, vecmathlib::ceil, x, accuracy()); check("ceil", ceil, vecmathlib::ceil, fn1, accuracy()); check("ceil", ceil, vecmathlib::ceil, fn2, accuracy()); + check("ceil", ceil, vecmathlib::ceil, fn1h, accuracy()); + check("ceil", ceil, vecmathlib::ceil, fn2h, accuracy()); check("floor", floor, vecmathlib::floor, x, accuracy()); check("floor", floor, vecmathlib::floor, fn1, accuracy()); check("floor", floor, vecmathlib::floor, fn2, accuracy()); + check("floor", floor, vecmathlib::floor, fn1h, accuracy()); + check("floor", floor, vecmathlib::floor, fn2h, accuracy()); + check("rint", rint, vecmathlib::rint, x, accuracy()); + check("rint", rint, vecmathlib::rint, fn1, accuracy()); + check("rint", rint, vecmathlib::rint, fn2, accuracy()); + check("rint", rint, vecmathlib::rint, fn1h, accuracy()); + check("rint", rint, vecmathlib::rint, fn2h, accuracy()); check("round", round, vecmathlib::round, x, accuracy()); check("round", round, vecmathlib::round, fn1, accuracy()); check("round", round, vecmathlib::round, fn2, accuracy()); + check("round", round, vecmathlib::round, fn1h, accuracy()); + check("round", round, vecmathlib::round, fn2h, accuracy()); check("trunc", trunc, vecmathlib::trunc, x, accuracy()); check("trunc", trunc, vecmathlib::trunc, fn1, accuracy()); check("trunc", trunc, vecmathlib::trunc, fn2, accuracy()); + check("trunc", trunc, vecmathlib::trunc, fn1h, accuracy()); + check("trunc", trunc, vecmathlib::trunc, fn2h, accuracy()); } } @@ -558,7 +573,7 @@ struct vecmathlib_test { static real_t rsqrt(real_t x) { return R(1.0)/sqrt(x); } static void test_sqrt() { - cout << " testing rsqrt sqrt...\n" << flush; + cout << " testing cbrt hypot rsqrt sqrt...\n" << flush; for (int i=0; i<imax; ++i) { realvec_t const x = random(R(0.0), R(1.0e+3)); realvec_t const y = random(-R(1.0e+3), R(1.0e+3)); @@ -442,6 +442,12 @@ namespace vecmathlib { } template<typename real_t, int size> + inline realvec<real_t, size> rint(realvec<real_t, size> x) + { + return x.rint(); + } + + template<typename real_t, int size> inline realvec<real_t, size> round(realvec<real_t, size> x) { return x.round(); diff --git a/vec_double_avx.h b/vec_double_avx.h index b2ec43d..91b006e 100644 --- a/vec_double_avx.h +++ b/vec_double_avx.h @@ -627,7 +627,11 @@ namespace vecmathlib { realvec pow(realvec y) const { return MF::vml_pow(*this, y); } realvec rcp() const { return _mm256_div_pd(_mm256_set1_pd(1.0), v); } realvec remainder(realvec y) const { return MF::vml_remainder(*this, y); } - realvec round() const { return _mm256_round_pd(v, _MM_FROUND_NINT); } + realvec rint() const + { + return _mm256_round_pd(v, _MM_FROUND_TO_NEAREST_INT); + } + realvec round() const { return MF::vml_round(*this); } realvec rsqrt() const { return MF::vml_rsqrt(*this); } boolvec_t signbit() const { return v; } realvec sin() const { return MF::vml_sin(*this); } diff --git a/vec_double_sse2.h b/vec_double_sse2.h index 40410ca..ce2739d 100644 --- a/vec_double_sse2.h +++ b/vec_double_sse2.h @@ -545,14 +545,15 @@ namespace vecmathlib { realvec pow(realvec y) const { return MF::vml_pow(*this, y); } realvec rcp() const { return _mm_div_pd(_mm_set1_pd(1.0), v); } realvec remainder(realvec y) const { return MF::vml_remainder(*this, y); } - realvec round() const + realvec rint() const { #ifdef __SSE4_1__ - return _mm_round_pd(v, _MM_FROUND_NINT); + return _mm_round_pd(v, _MM_FROUND_TO_NEAREST_INT); #else - return MF::vml_round(*this); + return MF::vml_rint(*this); #endif } + realvec round() const { return MF::vml_round(*this); } realvec rsqrt() const { return MF::vml_rsqrt(*this); } boolvec_t signbit() const { return v; } realvec sin() const { return MF::vml_sin(*this); } diff --git a/vec_double_sse2_scalar.h b/vec_double_sse2_scalar.h index ff3afd4..48e540b 100644 --- a/vec_double_sse2_scalar.h +++ b/vec_double_sse2_scalar.h @@ -408,21 +408,20 @@ namespace vecmathlib { realvec pow(realvec y) const { return MF::vml_pow(*this, y); } realvec rcp() const { return R(1.0)/v; } realvec remainder(realvec y) const { return std::remainder(v, y.v); } - realvec round1() const { return std::round(v); } - realvec round() const + realvec rint() const { #ifdef __SSE4_1__ return to_double(_mm_round_sd(from_double(v), from_double(v), - _MM_FROUND_NINT)); + _MM_FROUND_TO_NEAREST_INT)); #else - return MF::vml_round(*this); + return MF::vml_rint(*this); #endif } + realvec round() const { return MF::vml_round(*this); } realvec rsqrt() const { return MF::vml_rsqrt(*this); } boolvec_t signbit() const { return std::signbit(v); } realvec sin() const { return MF::vml_sin(*this); } realvec sinh() const { return MF::vml_sinh(*this); } - realvec sqrt1() const { return std::sqrt(v); } realvec sqrt() const { return to_double(_mm_sqrt_sd(from_double(v), from_double(v))); diff --git a/vec_float_avx.h b/vec_float_avx.h index a6dd04f..6cce781 100644 --- a/vec_float_avx.h +++ b/vec_float_avx.h @@ -604,7 +604,11 @@ namespace vecmathlib { return r; } realvec remainder(realvec y) const { return MF::vml_remainder(*this, y); } - realvec round() const { return _mm256_round_ps(v, _MM_FROUND_NINT); } + realvec rint() const + { + return _mm256_round_ps(v, _MM_FROUND_TO_NEAREST_INT); + } + realvec round() const { return MF::vml_round(*this); } realvec rsqrt() const { realvec x = *this; diff --git a/vec_float_sse2.h b/vec_float_sse2.h index d459d3d..d8963f6 100644 --- a/vec_float_sse2.h +++ b/vec_float_sse2.h @@ -527,14 +527,15 @@ namespace vecmathlib { return r; } realvec remainder(realvec y) const { return MF::vml_remainder(*this, y); } - realvec round() const + realvec rint() const { #ifdef __SSE4_1__ - return _mm_round_ps(v, _MM_FROUND_NINT); + return _mm_round_ps(v, _MM_FROUND_TO_NEAREST_INT); #else - return MF::vml_round(*this); + return MF::vml_rint(*this); #endif } + realvec round() const { return MF::vml_round(*this); } realvec rsqrt() const { realvec x = *this; diff --git a/vec_float_sse2_scalar.h b/vec_float_sse2_scalar.h index b72ecf7..0b31b7c 100644 --- a/vec_float_sse2_scalar.h +++ b/vec_float_sse2_scalar.h @@ -408,16 +408,16 @@ namespace vecmathlib { realvec pow(realvec y) const { return MF::vml_pow(*this, y); } realvec rcp() const { return R(1.0)/v; } realvec remainder(realvec y) const { return std::remainder(v, y.v); } - realvec round1() const { return std::round(v); } - realvec round() const + realvec rint() const { #ifdef __SSE4_1__ return to_float(_mm_round_ss(from_float(v), from_float(v), - _MM_FROUND_NINT)); + _MM_FROUND_TO_NEAREST_INT)); #else - return MF::vml_round(*this); + return MF::vml_rint(*this); #endif } + realvec round() const { return MF::vml_round(*this); } realvec rsqrt() const { return MF::vml_rsqrt(*this); } boolvec_t signbit() const { return std::signbit(v); } realvec sin() const { return MF::vml_sin(*this); } diff --git a/vec_pseudo.h b/vec_pseudo.h index ac66701..31c0e61 100644 --- a/vec_pseudo.h +++ b/vec_pseudo.h @@ -716,6 +716,7 @@ namespace vecmathlib { { return map(std::remainder, y); } + realpseudovec rint() const { return map(std::rint); } realpseudovec round() const { return map(std::round); } realpseudovec rsqrt() const { return sqrt().rcp(); } boolvec_t signbit() const { return mapb(std::signbit); } @@ -1205,6 +1206,12 @@ namespace vecmathlib { } template<typename real_t, int size> + inline realpseudovec<real_t, size> rint(realpseudovec<real_t, size> x) + { + return x.rint(); + } + + template<typename real_t, int size> inline realpseudovec<real_t, size> round(realpseudovec<real_t, size> x) { return x.round(); @@ -612,7 +612,10 @@ namespace vecmathlib { realtestvec atanh() const { return MF::vml_atanh(*this); } realtestvec cbrt() const { return MF::vml_cbrt(*this); } realtestvec ceil() const { return MF::vml_ceil(*this); } - realtestvec copysign(realtestvec y) const { return MF::vml_copysign(*this, y); } + realtestvec copysign(realtestvec y) const + { + return MF::vml_copysign(*this, y); + } realtestvec cos() const { return MF::vml_cos(*this); } realtestvec cosh() const { return MF::vml_cosh(*this); } realtestvec exp() const { return MF::vml_exp(*this); } @@ -622,7 +625,10 @@ namespace vecmathlib { realtestvec fabs() const { return MF::vml_fabs(*this); } realtestvec fdim(realtestvec y) const { return MF::vml_fdim(*this, y); } realtestvec floor() const { return MF::vml_floor(*this); } - realtestvec fma(realtestvec y, realtestvec z) const { return MF::vml_fma(*this, y, z); } + realtestvec fma(realtestvec y, realtestvec z) const + { + return MF::vml_fma(*this, y, z); + } realtestvec fmax(realtestvec y) const { return MF::vml_fmax(*this, y); } realtestvec fmin(realtestvec y) const { return MF::vml_fmin(*this, y); } realtestvec fmod(realtestvec y) const { return MF::vml_fmod(*this, y); } @@ -640,7 +646,11 @@ namespace vecmathlib { realtestvec log2() const { return MF::vml_log2(*this); } realtestvec pow(realtestvec y) const { return MF::vml_pow(*this, y); } realtestvec rcp() const { return MF::vml_rcp(*this); } - realtestvec remainder(realtestvec y) const { return MF::vml_remainder(*this, y); } + realtestvec remainder(realtestvec y) const + { + return MF::vml_remainder(*this, y); + } + realtestvec rint() const { return MF::vml_rint(*this); } realtestvec round() const { return MF::vml_round(*this); } realtestvec rsqrt() const { return MF::vml_rsqrt(*this); } boolvec_t signbit() const { return MF::vml_signbit(*this); } @@ -1128,6 +1138,12 @@ namespace vecmathlib { } template<typename real_t, int size> + inline realtestvec<real_t, size> rint(realtestvec<real_t, size> x) + { + return x.rint(); + } + + template<typename real_t, int size> inline realtestvec<real_t, size> round(realtestvec<real_t, size> x) { return x.round(); |