14 files changed, 93 insertions, 27 deletions
diff --git a/mathfuncs_base.h b/mathfuncs_base.h
index c46824e..8edec82 100644
--- a/mathfuncs_base.h
+++ b/mathfuncs_base.h
@@ -53,6 +53,7 @@ namespace vecmathlib {
     static realvec_t vml_convert_float(intvec_t x);
     static intvec_t vml_convert_int(realvec_t x);
     static realvec_t vml_floor(realvec_t x);
+    static realvec_t vml_rint(realvec_t x);
     static realvec_t vml_round(realvec_t x);
     static realvec_t vml_trunc(realvec_t x);
     
diff --git a/mathfuncs_convert.h b/mathfuncs_convert.h
index 190388f..b81e4e1 100644
--- a/mathfuncs_convert.h
+++ b/mathfuncs_convert.h
@@ -95,8 +95,10 @@ namespace vecmathlib {
   
   
   
+  // Round to nearest integer, breaking ties using prevailing rounding
+  // mode (default: round to even)
   template<typename realvec_t>
-  realvec_t mathfuncs<realvec_t>::vml_round(realvec_t x)
+  realvec_t mathfuncs<realvec_t>::vml_rint(realvec_t x)
   {
     realvec_t r = x;
     // Round by adding a large number, destroying all excess precision
@@ -108,22 +110,32 @@ namespace vecmathlib {
     return r;
   }
   
+  // Round to next integer above
   template<typename realvec_t>
   realvec_t mathfuncs<realvec_t>::vml_ceil(realvec_t x)
   {
     boolvec_t iszero = x == RV(0.0);
     realvec_t offset = RV(0.5) - ldexp(fabs(x), I(-FP::mantissa_bits));
-    return ifthen(iszero, x, round(x + offset));
+    return ifthen(iszero, x, rint(x + offset));
   }
   
+  // Round to next integer below
   template<typename realvec_t>
   realvec_t mathfuncs<realvec_t>::vml_floor(realvec_t x)
   {
     boolvec_t iszero = x == RV(0.0);
     realvec_t offset = RV(0.5) - ldexp(fabs(x), I(-FP::mantissa_bits));
-    return ifthen(iszero, x, round(x - offset));
+    return ifthen(iszero, x, rint(x - offset));
+  }
+  
+  // Round to nearest integer, breaking ties away from zero
+  template<typename realvec_t>
+  realvec_t mathfuncs<realvec_t>::vml_round(realvec_t x)
+  {
+    return copysign(floor(fabs(x)+RV(0.5)), x);
   }
   
+  // Round towards zero
   template<typename realvec_t>
   realvec_t mathfuncs<realvec_t>::vml_trunc(realvec_t x)
   {
diff --git a/mathfuncs_rcp.h b/mathfuncs_rcp.h
index 99c516c..a7d7ba3 100644
--- a/mathfuncs_rcp.h
+++ b/mathfuncs_rcp.h
@@ -47,7 +47,7 @@ namespace vecmathlib {
   template<typename realvec_t>
   realvec_t mathfuncs<realvec_t>::vml_remainder(realvec_t x, realvec_t y)
   {
-    return x - round(x / y) * y;
+    return x - rint(x / y) * y;
   }
   
   template<typename realvec_t>
diff --git a/mathfuncs_sin.h b/mathfuncs_sin.h
index 035513f..2c94e1e 100644
--- a/mathfuncs_sin.h
+++ b/mathfuncs_sin.h
@@ -18,7 +18,7 @@ namespace vecmathlib {
     x *= RV(1.0/(2.0*M_PI));
     
     // Reduce range: sin(x) = sin(x + 2pi)
-    x -= round(x);
+    x -= rint(x);
     VML_ASSERT(all(x >= RV(-0.5) && x <= RV(+0.5)));
     
     // Reduce range: sin(x) = -sin(-x)
diff --git a/test.cc b/test.cc
index aef634f..203dd8d 100644
--- a/test.cc
+++ b/test.cc
@@ -396,7 +396,7 @@ struct vecmathlib_test {
   
   static void test_convert()
   {
-    cout << "   testing ceil convert_float convert_int floor round...\n"
+    cout << "   testing ceil convert_float convert_int floor rint round trunc...\n"
          << flush;
     
     for (int i=0; i<imax; ++i) {
@@ -405,6 +405,8 @@ struct vecmathlib_test {
       intvec_t const n2 = random(int_t(-1000000000), int_t(+1000000000));
       realvec_t const fn1 = vecmathlib::convert_float(n1);
       realvec_t const fn2 = vecmathlib::convert_float(n2);
+      realvec_t const fn1h = vecmathlib::convert_float(n1) * RV(0.25);
+      realvec_t const fn2h = vecmathlib::convert_float(n2) * RV(0.25);
       check("convert_float",
             FP::convert_float, vecmathlib::convert_float, n1, accuracy());
       check("convert_float",
@@ -413,15 +415,28 @@ struct vecmathlib_test {
       check("ceil", ceil, vecmathlib::ceil, x, accuracy());
       check("ceil", ceil, vecmathlib::ceil, fn1, accuracy());
       check("ceil", ceil, vecmathlib::ceil, fn2, accuracy());
+      check("ceil", ceil, vecmathlib::ceil, fn1h, accuracy());
+      check("ceil", ceil, vecmathlib::ceil, fn2h, accuracy());
       check("floor", floor, vecmathlib::floor, x, accuracy());
       check("floor", floor, vecmathlib::floor, fn1, accuracy());
       check("floor", floor, vecmathlib::floor, fn2, accuracy());
+      check("floor", floor, vecmathlib::floor, fn1h, accuracy());
+      check("floor", floor, vecmathlib::floor, fn2h, accuracy());
+      check("rint", rint, vecmathlib::rint, x, accuracy());
+      check("rint", rint, vecmathlib::rint, fn1, accuracy());
+      check("rint", rint, vecmathlib::rint, fn2, accuracy());
+      check("rint", rint, vecmathlib::rint, fn1h, accuracy());
+      check("rint", rint, vecmathlib::rint, fn2h, accuracy());
       check("round", round, vecmathlib::round, x, accuracy());
       check("round", round, vecmathlib::round, fn1, accuracy());
       check("round", round, vecmathlib::round, fn2, accuracy());
+      check("round", round, vecmathlib::round, fn1h, accuracy());
+      check("round", round, vecmathlib::round, fn2h, accuracy());
       check("trunc", trunc, vecmathlib::trunc, x, accuracy());
       check("trunc", trunc, vecmathlib::trunc, fn1, accuracy());
       check("trunc", trunc, vecmathlib::trunc, fn2, accuracy());
+      check("trunc", trunc, vecmathlib::trunc, fn1h, accuracy());
+      check("trunc", trunc, vecmathlib::trunc, fn2h, accuracy());
     }
   }
   
@@ -558,7 +573,7 @@ struct vecmathlib_test {
   static real_t rsqrt(real_t x) { return R(1.0)/sqrt(x); }
   static void test_sqrt()
   {
-    cout << "   testing rsqrt sqrt...\n" << flush;
+    cout << "   testing cbrt hypot rsqrt sqrt...\n" << flush;
     for (int i=0; i<imax; ++i) {
       realvec_t const x = random(R(0.0), R(1.0e+3));
       realvec_t const y = random(-R(1.0e+3), R(1.0e+3));
diff --git a/vec_base.h b/vec_base.h
index bd49d0d..28fa400 100644
--- a/vec_base.h
+++ b/vec_base.h
@@ -442,6 +442,12 @@ namespace vecmathlib {
   }
   
   template<typename real_t, int size>
+  inline realvec<real_t, size> rint(realvec<real_t, size> x)
+  {
+    return x.rint();
+  }
+  
+  template<typename real_t, int size>
   inline realvec<real_t, size> round(realvec<real_t, size> x)
   {
     return x.round();
diff --git a/vec_double_avx.h b/vec_double_avx.h
index b2ec43d..91b006e 100644
--- a/vec_double_avx.h
+++ b/vec_double_avx.h
@@ -627,7 +627,11 @@ namespace vecmathlib {
     realvec pow(realvec y) const { return MF::vml_pow(*this, y); }
     realvec rcp() const { return _mm256_div_pd(_mm256_set1_pd(1.0), v); }
     realvec remainder(realvec y) const { return MF::vml_remainder(*this, y); }
-    realvec round() const { return _mm256_round_pd(v, _MM_FROUND_NINT); }
+    realvec rint() const
+    {
+      return _mm256_round_pd(v, _MM_FROUND_TO_NEAREST_INT);
+    }
+    realvec round() const { return MF::vml_round(*this); }
     realvec rsqrt() const { return MF::vml_rsqrt(*this); }
     boolvec_t signbit() const { return v; }
     realvec sin() const { return MF::vml_sin(*this); }
diff --git a/vec_double_sse2.h b/vec_double_sse2.h
index 40410ca..ce2739d 100644
--- a/vec_double_sse2.h
+++ b/vec_double_sse2.h
@@ -545,14 +545,15 @@ namespace vecmathlib {
     realvec pow(realvec y) const { return MF::vml_pow(*this, y); }
     realvec rcp() const { return _mm_div_pd(_mm_set1_pd(1.0), v); }
     realvec remainder(realvec y) const { return MF::vml_remainder(*this, y); }
-    realvec round() const
+    realvec rint() const
     {
 #ifdef __SSE4_1__
-      return _mm_round_pd(v, _MM_FROUND_NINT);
+      return _mm_round_pd(v, _MM_FROUND_TO_NEAREST_INT);
 #else
-      return MF::vml_round(*this);
+      return MF::vml_rint(*this);
 #endif
     }
+    realvec round() const { return MF::vml_round(*this); }
     realvec rsqrt() const { return MF::vml_rsqrt(*this); }
     boolvec_t signbit() const { return v; }
     realvec sin() const { return MF::vml_sin(*this); }
diff --git a/vec_double_sse2_scalar.h b/vec_double_sse2_scalar.h
index ff3afd4..48e540b 100644
--- a/vec_double_sse2_scalar.h
+++ b/vec_double_sse2_scalar.h
@@ -408,21 +408,20 @@ namespace vecmathlib {
     realvec pow(realvec y) const { return MF::vml_pow(*this, y); }
     realvec rcp() const { return R(1.0)/v; }
     realvec remainder(realvec y) const { return std::remainder(v, y.v); }
-    realvec round1() const { return std::round(v); }
-    realvec round() const
+    realvec rint() const
     {
 #ifdef __SSE4_1__
       return to_double(_mm_round_sd(from_double(v), from_double(v),
-                                    _MM_FROUND_NINT));
+                                    _MM_FROUND_TO_NEAREST_INT));
 #else
-      return MF::vml_round(*this);
+      return MF::vml_rint(*this);
 #endif
     }
+    realvec round() const { return MF::vml_round(*this); }
     realvec rsqrt() const { return MF::vml_rsqrt(*this); }
     boolvec_t signbit() const { return std::signbit(v); }
     realvec sin() const { return MF::vml_sin(*this); }
     realvec sinh() const { return MF::vml_sinh(*this); }
-    realvec sqrt1() const { return std::sqrt(v); }
     realvec sqrt() const
     {
       return to_double(_mm_sqrt_sd(from_double(v), from_double(v)));
diff --git a/vec_float_avx.h b/vec_float_avx.h
index a6dd04f..6cce781 100644
--- a/vec_float_avx.h
+++ b/vec_float_avx.h
@@ -604,7 +604,11 @@ namespace vecmathlib {
       return r;
     }
     realvec remainder(realvec y) const { return MF::vml_remainder(*this, y); }
-    realvec round() const { return _mm256_round_ps(v, _MM_FROUND_NINT); }
+    realvec rint() const
+    {
+      return _mm256_round_ps(v, _MM_FROUND_TO_NEAREST_INT);
+    }
+    realvec round() const { return MF::vml_round(*this); }
     realvec rsqrt() const
     {
       realvec x = *this;
diff --git a/vec_float_sse2.h b/vec_float_sse2.h
index d459d3d..d8963f6 100644
--- a/vec_float_sse2.h
+++ b/vec_float_sse2.h
@@ -527,14 +527,15 @@ namespace vecmathlib {
       return r;
     }
     realvec remainder(realvec y) const { return MF::vml_remainder(*this, y); }
-    realvec round() const
+    realvec rint() const
     {
 #ifdef __SSE4_1__
-      return _mm_round_ps(v, _MM_FROUND_NINT);
+      return _mm_round_ps(v, _MM_FROUND_TO_NEAREST_INT);
 #else
-      return MF::vml_round(*this);
+      return MF::vml_rint(*this);
 #endif
     }
+    realvec round() const { return MF::vml_round(*this); }
     realvec rsqrt() const
     {
       realvec x = *this;
diff --git a/vec_float_sse2_scalar.h b/vec_float_sse2_scalar.h
index b72ecf7..0b31b7c 100644
--- a/vec_float_sse2_scalar.h
+++ b/vec_float_sse2_scalar.h
@@ -408,16 +408,16 @@ namespace vecmathlib {
     realvec pow(realvec y) const { return MF::vml_pow(*this, y); }
     realvec rcp() const { return R(1.0)/v; }
     realvec remainder(realvec y) const { return std::remainder(v, y.v); }
-    realvec round1() const { return std::round(v); }
-    realvec round() const
+    realvec rint() const
     {
 #ifdef __SSE4_1__
       return to_float(_mm_round_ss(from_float(v), from_float(v),
-                                   _MM_FROUND_NINT));
+                                   _MM_FROUND_TO_NEAREST_INT));
 #else
-      return MF::vml_round(*this);
+      return MF::vml_rint(*this);
 #endif
     }
+    realvec round() const { return MF::vml_round(*this); }
     realvec rsqrt() const { return MF::vml_rsqrt(*this); }
     boolvec_t signbit() const { return std::signbit(v); }
     realvec sin() const { return MF::vml_sin(*this); }
diff --git a/vec_pseudo.h b/vec_pseudo.h
index ac66701..31c0e61 100644
--- a/vec_pseudo.h
+++ b/vec_pseudo.h
@@ -716,6 +716,7 @@ namespace vecmathlib {
     {
       return map(std::remainder, y);
     }
+    realpseudovec rint() const { return map(std::rint); }
     realpseudovec round() const { return map(std::round); }
     realpseudovec rsqrt() const { return sqrt().rcp(); }
     boolvec_t signbit() const { return mapb(std::signbit); }
@@ -1205,6 +1206,12 @@ namespace vecmathlib {
   }
   
   template<typename real_t, int size>
+  inline realpseudovec<real_t, size> rint(realpseudovec<real_t, size> x)
+  {
+    return x.rint();
+  }
+  
+  template<typename real_t, int size>
   inline realpseudovec<real_t, size> round(realpseudovec<real_t, size> x)
   {
     return x.round();
diff --git a/vec_test.h b/vec_test.h
index 15f90bf..6813f66 100644
--- a/vec_test.h
+++ b/vec_test.h
@@ -612,7 +612,10 @@ namespace vecmathlib {
     realtestvec atanh() const { return MF::vml_atanh(*this); }
     realtestvec cbrt() const { return MF::vml_cbrt(*this); }
     realtestvec ceil() const { return MF::vml_ceil(*this); }
-    realtestvec copysign(realtestvec y) const { return MF::vml_copysign(*this, y); }
+    realtestvec copysign(realtestvec y) const
+    {
+      return MF::vml_copysign(*this, y);
+    }
     realtestvec cos() const { return MF::vml_cos(*this); }
     realtestvec cosh() const { return MF::vml_cosh(*this); }
     realtestvec exp() const { return MF::vml_exp(*this); }
@@ -622,7 +625,10 @@ namespace vecmathlib {
     realtestvec fabs() const { return MF::vml_fabs(*this); }
     realtestvec fdim(realtestvec y) const { return MF::vml_fdim(*this, y); }
     realtestvec floor() const { return MF::vml_floor(*this); }
-    realtestvec fma(realtestvec y, realtestvec z) const { return MF::vml_fma(*this, y, z); }
+    realtestvec fma(realtestvec y, realtestvec z) const 
+    {
+      return MF::vml_fma(*this, y, z);
+    }
     realtestvec fmax(realtestvec y) const { return MF::vml_fmax(*this, y); }
     realtestvec fmin(realtestvec y) const { return MF::vml_fmin(*this, y); }
     realtestvec fmod(realtestvec y) const { return MF::vml_fmod(*this, y); }
@@ -640,7 +646,11 @@ namespace vecmathlib {
     realtestvec log2() const { return MF::vml_log2(*this); }
     realtestvec pow(realtestvec y) const { return MF::vml_pow(*this, y); }
     realtestvec rcp() const { return MF::vml_rcp(*this); }
-    realtestvec remainder(realtestvec y) const { return MF::vml_remainder(*this, y); }
+    realtestvec remainder(realtestvec y) const
+    {
+      return MF::vml_remainder(*this, y);
+    }
+    realtestvec rint() const { return MF::vml_rint(*this); }
     realtestvec round() const { return MF::vml_round(*this); }
     realtestvec rsqrt() const { return MF::vml_rsqrt(*this); }
     boolvec_t signbit() const { return MF::vml_signbit(*this); }
@@ -1128,6 +1138,12 @@ namespace vecmathlib {
   }
   
   template<typename real_t, int size>
+  inline realtestvec<real_t, size> rint(realtestvec<real_t, size> x)
+  {
+    return x.rint();
+  }
+  
+  template<typename real_t, int size>
   inline realtestvec<real_t, size> round(realtestvec<real_t, size> x)
   {
     return x.round();