22 files changed, 896 insertions, 110 deletions
diff --git a/floatprops.h b/floatprops.h
index 45d4fed..e81d8d0 100644
--- a/floatprops.h
+++ b/floatprops.h
@@ -84,6 +84,12 @@ namespace vecmathlib {
       std::memcpy(&res, &x, sizeof res);
       return res;
     }
+    static int_t replicate_byte(unsigned char byte)
+    {
+      int_t res;
+      std::memset(&res, byte, sizeof res);
+      return res;
+    }
     
     // Convert values (truncate)
     static real_t convert_float(int_t x) { __builtin_unreachable(); }
@@ -144,6 +150,12 @@ namespace vecmathlib {
       std::memcpy(&res, &x, sizeof res);
       return res;
     }
+    static int_t replicate_byte(unsigned char byte)
+    {
+      int_t res;
+      std::memset(&res, byte, sizeof res);
+      return res;
+    }
     
     // Convert values (truncate)
     static real_t convert_float(int_t x) { __builtin_unreachable(); }
@@ -199,6 +211,12 @@ namespace vecmathlib {
       std::memcpy(&res, &x, sizeof res);
       return res;
     }
+    static int_t replicate_byte(unsigned char byte)
+    {
+      int_t res;
+      std::memset(&res, byte, sizeof res);
+      return res;
+    }
     
     // Convert values (truncate)
     static real_t convert_float(int_t x) { return real_t(x); }
@@ -254,6 +272,12 @@ namespace vecmathlib {
       std::memcpy(&res, &x, sizeof res);
       return res;
     }
+    static int_t replicate_byte(unsigned char byte)
+    {
+      int_t res;
+      std::memset(&res, byte, sizeof res);
+      return res;
+    }
     
     // Convert values (truncate)
     static real_t convert_float(int_t x) { return real_t(x); }
diff --git a/instantiations.cc b/instantiations.cc
index aca2447..5e64ef2 100644
--- a/instantiations.cc
+++ b/instantiations.cc
@@ -35,6 +35,10 @@ namespace vecmathlib {
   template realvec<float,1> round(realvec<float,1> x);
 #endif
   
+#ifdef VECMATHLIB_HAVE_VEC_FLOAT_1
+  template intvec<float,8> popcount(intvec<float,8>);
+#endif
+  
 #ifdef VECMATHLIB_HAVE_VEC_DOUBLE_1
   template realvec<double,1> exp(realvec<double,1> x);
   template realvec<double,1> log(realvec<double,1> x);
@@ -68,6 +72,7 @@ namespace vecmathlib {
   template realvec<double,4> set_elt<realvec<double,4>,1>(realvec<double,4> x, realvec<double,4>::real_t a);
   template realvec<double,4> set_elt<realvec<double,4>,2>(realvec<double,4> x, realvec<double,4>::real_t a);
   template realvec<double,4> set_elt<realvec<double,4>,3>(realvec<double,4> x, realvec<double,4>::real_t a);
+  template intvec<double,4> popcount(intvec<double,4>);
 #endif
   
 }
diff --git a/mathfuncs.h b/mathfuncs.h
index af494c8..8d90f9a 100644
--- a/mathfuncs.h
+++ b/mathfuncs.h
@@ -5,6 +5,8 @@
 
 #include "mathfuncs_base.h"
 
+#include "mathfuncs_int.h"
+
 #include "mathfuncs_asin.h"
 #include "mathfuncs_asinh.h"
 #include "mathfuncs_convert.h"
diff --git a/mathfuncs_base.h b/mathfuncs_base.h
index f413d2d..14f1051 100644
--- a/mathfuncs_base.h
+++ b/mathfuncs_base.h
@@ -4,7 +4,6 @@
 #define MATHFUNCS_BASE_H
 
 #include "floatprops.h"
-#include "vec_base.h"
 
 
 
@@ -38,6 +37,17 @@ namespace vecmathlib {
     // static intvec_t IV(int_t a) { return intvec_t(a); }
     // static boolvec_t BV(bool a) { return boolvec_t(a); }
     
+    // int
+    static intvec_t vml_abs(intvec_t x);
+    static intvec_t vml_bitifthen(intvec_t x, intvec_t y, intvec_t z);
+    static intvec_t vml_clz(intvec_t x);
+    static boolvec_t vml_isignbit(intvec_t x);
+    static intvec_t vml_max(intvec_t x, intvec_t y);
+    static intvec_t vml_min(intvec_t x, intvec_t y);
+    static intvec_t vml_popcount(intvec_t x);
+    static intvec_t vml_rotate(intvec_t x, int_t n);
+    static intvec_t vml_rotate(intvec_t x, intvec_t n);
+    
     // asin
     static realvec_t vml_acos(realvec_t x);
     static realvec_t vml_asin(realvec_t x);
diff --git a/mathfuncs_int.h b/mathfuncs_int.h
new file mode 100644
index 0000000..862189d
--- /dev/null
+++ b/mathfuncs_int.h
@@ -0,0 +1,135 @@
+// -*-C++-*-
+
+#ifndef MATHFUNCS_INT_H
+#define MATHFUNCS_INT_H
+
+#include "mathfuncs_base.h"
+
+#include <climits>
+
+
+
+namespace vecmathlib {
+  
+  template<typename realvec_t>
+  typename realvec_t::intvec_t mathfuncs<realvec_t>::vml_abs(intvec_t x)
+  {
+    return ifthen(isignbit(x), -x, x);
+  }
+  
+  template<typename realvec_t>
+  typename realvec_t::intvec_t mathfuncs<realvec_t>::vml_bitifthen(intvec_t x,
+                                                                   intvec_t y,
+                                                                   intvec_t z)
+  {
+    return (x & y) | (~x & z);
+  }
+  
+  template<typename realvec_t>
+  typename realvec_t::intvec_t mathfuncs<realvec_t>::vml_clz(intvec_t x)
+  {
+    // These implementations return 8*sizeof(TYPE) when the input is 0
+    
+    // These explicit implementations are taken from
+    // <http://aggregate.org/MAGIC/>:
+    // 
+    // @techreport{magicalgorithms,
+    //   author={Henry Gordon Dietz},
+    //   title={{The Aggregate Magic Algorithms}},
+    //   institution={University of Kentucky},
+    //   howpublished={Aggregate.Org online technical report},
+    //   date={2013-03-25},
+    //   URL={http://aggregate.org/MAGIC/}
+    // }
+    
+    int_t bits = CHAR_BIT * sizeof(int_t);
+    if (bits >  1) x |= lsr(x,  1);
+    if (bits >  2) x |= lsr(x,  2);
+    if (bits >  4) x |= lsr(x,  4);
+    if (bits >  8) x |= lsr(x,  8);
+    if (bits > 16) x |= lsr(x, 16);
+    if (bits > 32) x |= lsr(x, 32);
+    if (bits > 64) x |= lsr(x, 64);
+    assert(bits<=128);
+    return IV(I(bits)) - popcount(x);
+  }
+  
+  template<typename realvec_t>
+  typename realvec_t::boolvec_t mathfuncs<realvec_t>::vml_isignbit(intvec_t x)
+  {
+    return x < IV(I(0));
+  }
+  
+  template<typename realvec_t>
+  typename realvec_t::intvec_t mathfuncs<realvec_t>::vml_max(intvec_t x,
+                                                             intvec_t y)
+  {
+    return ifthen(x>=y, x, y);
+  }
+  
+  template<typename realvec_t>
+  typename realvec_t::intvec_t mathfuncs<realvec_t>::vml_min(intvec_t x,
+                                                             intvec_t y)
+  {
+    return ifthen(x<y, x, y);
+  }
+  
+  template<typename realvec_t>
+  typename realvec_t::intvec_t mathfuncs<realvec_t>::vml_popcount(intvec_t x)
+  {
+    // These explicit implementations are taken from
+    // <http://aggregate.org/MAGIC/>:
+    // 
+    // @techreport{magicalgorithms,
+    //   author={Henry Gordon Dietz},
+    //   title={{The Aggregate Magic Algorithms}},
+    //   institution={University of Kentucky},
+    //   howpublished={Aggregate.Org online technical report},
+    //   date={2013-03-25},
+    //   URL={http://aggregate.org/MAGIC/}
+    // }
+    
+    int_t bits = CHAR_BIT * sizeof(int_t);
+    
+    // intvec_t x55 = IV(FP::replicate_byte(0x55));
+    // intvec_t x33 = IV(FP::replicate_byte(0x33));
+    // intvec_t x0f = IV(FP::replicate_byte(0x0f));
+    intvec_t x55 = I(~U(0) /  U(3)); // 0x0101...
+    intvec_t x33 = I(~U(0) /  U(5)); // 0x00110011...
+    intvec_t x0f = I(~U(0) / U(17)); // 0b0000111100001111...
+    
+    x -= lsr(x, I(1)) & x55;
+    x = (x & x33) + (lsr(x, I(2)) & x33);
+    x += lsr(x, I(4));
+    x &= x0f;
+    if (bits >  8) x += lsr(x,  I(8));
+    if (bits > 16) x += lsr(x, I(16));
+    if (bits > 32) x += lsr(x, I(32));
+    if (bits > 64) x += lsr(x, I(64));
+    assert(bits<=128);
+    return x & IV(I(0xff));
+  }
+  
+  template<typename realvec_t>
+  typename realvec_t::intvec_t mathfuncs<realvec_t>::vml_rotate(intvec_t x,
+                                                                int_t n)
+  {
+    int_t mask = CHAR_BIT * sizeof(int_t) - 1;
+    intvec_t left = x << (n & mask);
+    intvec_t right = lsr(x, -n & mask);
+    return left | right;
+  }
+  
+  template<typename realvec_t>
+  typename realvec_t::intvec_t mathfuncs<realvec_t>::vml_rotate(intvec_t x,
+                                                                intvec_t n)
+  {
+    intvec_t mask = IV(I(CHAR_BIT * sizeof(int_t) - 1));
+    intvec_t left = x << (n & mask);
+    intvec_t right = lsr(x, -n & mask);
+    return left | right;
+  }
+  
+}; // namespace vecmathlib
+
+#endif  // #ifndef MATHFUNCS_ASIN_H
diff --git a/test.cc b/test.cc
index 256c0e3..3f88492 100644
--- a/test.cc
+++ b/test.cc
@@ -2,6 +2,7 @@
 
 #include "vecmathlib.h"
 
+#include <algorithm>
 #include <cassert>
 #include <cmath>
 #include <cstdio>
@@ -944,7 +945,7 @@ struct vecmathlib_test {
   template<typename T> static T local_sr(T x, T y) { return x>>y; }
   template<typename T> static T local_sl(T x, T y) { return x<<y; }
 
-  template<typename T> static bool local_signbit(T x) { return x<0; }
+  template<typename T> static bool local_isignbit(T x) { return x<0; }
   template<typename T> static bool local_eq(T x, T y) { return x==y; }
   template<typename T> static bool local_ne(T x, T y) { return x!=y; }
   template<typename T> static bool local_lt(T x, T y) { return x<y; }
@@ -1027,7 +1028,7 @@ struct vecmathlib_test {
       check_int<IV,IV>(">>", local_sr, local_sr, x, y & IV(bits-1));
       check_int<IV,IV>("<<", local_sl, local_sl, x, y & IV(bits-1));
       
-      check_bool<IV>("signbit", local_signbit, vecmathlib::signbit, x);
+      check_bool<IV>("isignbit", local_isignbit, vecmathlib::isignbit, x);
       check_bool<IV,IV>("==", local_eq, local_veq, x, y);
       check_bool<IV,IV>("!=", local_ne, local_vne, x, y);
       check_bool<IV,IV>("<", local_lt, local_vlt, x, y);
@@ -1075,6 +1076,64 @@ struct vecmathlib_test {
     check_real("flt_rounds", R(1.0) + 2*FP::epsilon(), rbase[0]);
   }
   
+  static int_t local_bitifthen(int_t x, int_t y, int_t z)
+  {
+    return (x & y) | (~x & z);
+  }
+  static int_t local_clz(int_t x)
+  {
+    int bits = CHAR_BIT * sizeof(x);
+    int res = 0;
+    for (; res<bits; ++res) {
+      if (x & (I(1) << (bits-res-1))) break;
+    }
+    return res;
+  }
+  static int_t local_max(int_t x, int_t y)
+  {
+    return std::max(x, y);
+  }
+  static int_t local_min(int_t x, int_t y)
+  {
+    return std::min(x, y);
+  }
+  static int_t local_popcount(int_t x)
+  {
+    int bits = CHAR_BIT * sizeof(x);
+    int res = 0;
+    for (int d=0; d<bits; ++d) {
+      if (x & (I(1) << d)) ++res;
+    }
+    return res;
+  }
+  static int_t local_rotate(int_t x, int_t n)
+  {
+    int_t mask = CHAR_BIT * sizeof(int_t) - 1;
+    int_t left = x << (n & mask);
+    int_t right = I(U(x) >> U(-n & mask));
+    return left | right;
+  }
+  static void test_abs()
+  {
+    cout << "   testing abs bitifthen clz isignbit max min popcount rotate...\n" << flush;
+        
+    for (int i=0; i<imax; ++i) {
+      const intvec_t x = random(I(-1000000), I(+1000000));
+      const intvec_t y = random(I(-1000000), I(+1000000));
+      const intvec_t z = random(I(-1000000), I(+1000000));
+      
+      check_int<IV>("abs", std::abs, vecmathlib::abs, x);
+      check_int<IV,IV,IV>("bitifthen",
+                          local_bitifthen, vecmathlib::bitifthen, x, y, z);
+      check_int<IV>("clz", local_clz, vecmathlib::clz, x);
+      check_int<IV,IV>("max", local_max, vecmathlib::max, x, y);
+      check_int<IV,IV>("min", local_min, vecmathlib::min, x, y);
+      check_int<IV>("popcount", local_popcount, vecmathlib::popcount, x);
+      check_int<IV,IV>("rotate", local_rotate, vecmathlib::rotate, x, y[0]);
+      check_int<IV,IV>("rotate", local_rotate, vecmathlib::rotate, x, y);
+    }
+  }
+  
   // Change signature: "int" -> "int_t"
   static real_t local_frexp0(real_t x)
   {
@@ -1521,6 +1580,7 @@ struct vecmathlib_test {
     test_mem();
     
     // Test "basic" functions first
+    test_abs();
     test_fabs();
     test_convert();
     test_rcp();
diff --git a/vec_altivec_float4.h b/vec_altivec_float4.h
index 024f15c..26f798e 100644
--- a/vec_altivec_float4.h
+++ b/vec_altivec_float4.h
@@ -201,18 +201,25 @@ namespace vecmathlib {
     intvec& operator|=(intvec const& x) { return *this=*this|x; }
     intvec& operator^=(intvec const& x) { return *this=*this^x; }
     
+    intvec_t bitifthen(intvec_t x, intvec_t y) const
+    {
+      return MF::vml_bitifthen(*this, x, y);
+    }
+    
     
     
-    intvec lsr(int_t n) const { return lsr(IV(n)); }
+    intvec_t lsr(int_t n) const { return lsr(IV(n)); }
+    intvec_t rotate(int_t n) const { MF::vml_rotate(*this, n); }
     intvec operator>>(int_t n) const { return *this >> IV(n); }
     intvec operator<<(int_t n) const { return *this << IV(n); }
     intvec& operator>>=(int_t n) { return *this=*this>>n; }
     intvec& operator<<=(int_t n) { return *this=*this<<n; }
     
-    intvec lsr(intvec n) const
+    intvec_t lsr(intvec_t n) const
     {
       return vec_sr(v, (__vector unsigned int)n.v);
     }
+    intvec_t rotate(intvec_t n) const { MF::vml_rotate(*this, n); }
     intvec operator>>(intvec n) const
     {
       return vec_sra(v, (__vector unsigned int)n.v);
@@ -224,12 +231,10 @@ namespace vecmathlib {
     intvec& operator>>=(intvec n) { return *this=*this>>n; }
     intvec& operator<<=(intvec n) { return *this=*this<<n; }
     
+    intvec_t clz() const { return MF::vml_clz(*this); }
+    intvec_t popcount() const { return MF::vml_popcount(*this); }
     
     
-    boolvec_t signbit() const
-    {
-      return *this < IV(I(0));
-    }
     
     boolvec_t operator==(intvec const& x) const { return vec_cmpeq(v, x.v); }
     boolvec_t operator!=(intvec const& x) const { return !(*this == x); }
@@ -237,6 +242,11 @@ namespace vecmathlib {
     boolvec_t operator<=(intvec const& x) const { return !(*this > x); }
     boolvec_t operator>(intvec const& x) const { return vec_cmpgt(v, x.v); }
     boolvec_t operator>=(intvec const& x) const { return !(*this < x); }
+    
+    intvec_t abs() const { return vec_abs(v); }
+    intvec_t max(intvec_t x) const { return vec_max(v, x.v); }
+    intvec_t min(intvec_t x) const { return vec_min(v, x.v); }
+    boolvec_t isignbit() const { return MF::vml_isignbit(*this); }
   };
   
   
diff --git a/vec_avx_double4.h b/vec_avx_double4.h
index 4117225..28dce07 100644
--- a/vec_avx_double4.h
+++ b/vec_avx_double4.h
@@ -239,9 +239,11 @@ namespace vecmathlib {
     intvec& operator|=(intvec const& x) { return *this=*this|x; }
     intvec& operator^=(intvec const& x) { return *this=*this^x; }
     
+    intvec_t bitifthen(intvec_t x, intvec_t y) const;
     
     
-    intvec lsr(int_t n) const
+    
+    intvec_t lsr(int_t n) const
     {
       __m128i vlo = _mm256_castsi256_si128(v);
       __m128i vhi = _mm256_extractf128_si256(v, 1);
@@ -249,6 +251,7 @@ namespace vecmathlib {
       vhi = _mm_srli_epi64(vhi, n);
       return _mm256_insertf128_si256(_mm256_castsi128_si256(vlo), vhi, 1);
     }
+    intvec_t rotate(int_t n) const;
     intvec operator>>(int_t n) const
     {
       __m128i vlo = _mm256_castsi256_si128(v);
@@ -291,14 +294,15 @@ namespace vecmathlib {
     intvec& operator>>=(int_t n) { return *this=*this>>n; }
     intvec& operator<<=(int_t n) { return *this=*this<<n; }
     
-    intvec lsr(intvec n) const
+    intvec_t lsr(intvec_t n) const
     {
-      intvec r;
+      intvec_t r;
       for (int i=0; i<size; ++i) {
         r.set_elt(i, U((*this)[i]) >> U(n[i]));
       }
       return r;
     }
+    intvec_t rotate(intvec_t n) const;
     intvec operator>>(intvec n) const
     {
       intvec r;
@@ -318,12 +322,10 @@ namespace vecmathlib {
     intvec& operator>>=(intvec n) { return *this=*this>>n; }
     intvec& operator<<=(intvec n) { return *this=*this<<n; }
     
+    intvec_t clz() const;
+    intvec_t popcount() const;
     
     
-    boolvec_t signbit() const
-    {
-      return as_bool();
-    }
     
     boolvec_t operator==(intvec const& x) const
     {
@@ -354,6 +356,11 @@ namespace vecmathlib {
     {
       return ! (*this < x);
     }
+    
+    intvec_t abs() const;
+    boolvec_t isignbit() const;
+    intvec_t max(intvec_t x) const;
+    intvec_t min(intvec_t x) const;
   };
   
   
@@ -687,6 +694,22 @@ namespace vecmathlib {
   
   // intvec definitions
   
+  inline intvec<double,4> intvec<double,4>::abs() const
+  {
+    return MF::vml_abs(*this);
+  }
+  
+  inline intvec<double,4> intvec<double,4>::bitifthen(intvec_t x,
+                                                      intvec_t y) const
+  {
+    return MF::vml_bitifthen(*this, x, y);
+  }
+  
+  inline intvec<double,4> intvec<double,4>::clz() const
+  {
+    return MF::vml_clz(*this);
+  }
+  
   inline realvec<double,4> intvec<double,4>::as_float() const
   {
     return _mm256_castsi256_pd(v);
@@ -702,6 +725,36 @@ namespace vecmathlib {
     return r;
   }
   
+  inline boolvec<double,4> intvec<double,4>::isignbit() const
+  {
+    return MF::vml_isignbit(*this);
+  }
+  
+  inline intvec<double,4> intvec<double,4>::max(intvec_t x) const
+  {
+    return MF::vml_max(*this, x);
+  }
+  
+  inline intvec<double,4> intvec<double,4>::min(intvec_t x) const
+  {
+    return MF::vml_min(*this, x);
+  }
+  
+  inline intvec<double,4> intvec<double,4>::popcount() const
+  {
+    return MF::vml_popcount(*this);
+  }
+  
+  inline intvec<double,4> intvec<double,4>::rotate(int_t n) const
+  {
+    return MF::vml_rotate(*this, n);
+  }
+  
+  inline intvec<double,4> intvec<double,4>::rotate(intvec_t n) const
+  {
+    return MF::vml_rotate(*this, n);
+  }
+  
 } // namespace vecmathlib
 
 #endif  // #ifndef VEC_AVX_DOUBLE4_H
diff --git a/vec_avx_float8.h b/vec_avx_float8.h
index aeea50b..e27c53e 100644
--- a/vec_avx_float8.h
+++ b/vec_avx_float8.h
@@ -248,9 +248,11 @@ namespace vecmathlib {
     intvec& operator|=(intvec const& x) { return *this=*this|x; }
     intvec& operator^=(intvec const& x) { return *this=*this^x; }
     
+    intvec_t bitifthen(intvec_t x, intvec_t y) const;
     
     
-    intvec lsr(int_t n) const
+    
+    intvec_t lsr(int_t n) const
     {
       __m128i vlo = _mm256_castsi256_si128(v);
       __m128i vhi = _mm256_extractf128_si256(v, 1);
@@ -258,6 +260,7 @@ namespace vecmathlib {
       vhi = _mm_srli_epi32(vhi, n);
       return _mm256_insertf128_si256(_mm256_castsi128_si256(vlo), vhi, 1);
     }
+    intvec_t rotate(int_t n) const;
     intvec operator>>(int_t n) const
     {
       __m128i vlo = _mm256_castsi256_si128(v);
@@ -277,14 +280,15 @@ namespace vecmathlib {
     intvec& operator>>=(int_t n) { return *this=*this>>n; }
     intvec& operator<<=(int_t n) { return *this=*this<<n; }
     
-    intvec lsr(intvec n) const
+    intvec_t lsr(intvec_t n) const
     {
-      intvec r;
+      intvec_t r;
       for (int i=0; i<size; ++i) {
         r.set_elt(i, U((*this)[i]) >> U(n[i]));
       }
       return r;
     }
+    intvec_t rotate(intvec_t n) const;
     intvec operator>>(intvec n) const
     {
       intvec r;
@@ -304,12 +308,10 @@ namespace vecmathlib {
     intvec& operator>>=(intvec n) { return *this=*this>>n; }
     intvec& operator<<=(intvec n) { return *this=*this<<n; }
     
+    intvec_t clz() const;
+    intvec_t popcount() const;
     
     
-    boolvec_t signbit() const
-    {
-      return as_bool();
-    }
     
     boolvec_t operator==(intvec const& x) const
     {
@@ -340,6 +342,11 @@ namespace vecmathlib {
     {
       return ! (*this < x);
     }
+    
+    intvec_t abs() const;
+    boolvec_t isignbit() const { return as_bool(); }
+    intvec_t max(intvec_t x) const;
+    intvec_t min(intvec_t x) const;
   };
   
   
@@ -691,16 +698,57 @@ namespace vecmathlib {
   
   // intvec definitions
   
+  inline intvec<float,8> intvec<float,8>::abs() const
+  {
+    return MF::vml_abs(*this);
+  }
+  
   inline realvec<float,8> intvec<float,8>::as_float() const
   {
     return _mm256_castsi256_ps(v);
   }
   
+  inline intvec<float,8> intvec<float,8>::bitifthen(intvec_t x,
+                                                    intvec_t y) const
+  {
+    return MF::vml_bitifthen(*this, x, y);
+  }
+  
+  inline intvec<float,8> intvec<float,8>::clz() const
+  {
+    return MF::vml_clz(*this);
+  }
+  
   inline realvec<float,8> intvec<float,8>::convert_float() const
   {
     return _mm256_cvtepi32_ps(v);
   }
   
+  inline intvec<float,8> intvec<float,8>::max(intvec_t x) const
+  {
+    return MF::vml_max(*this, x);
+  }
+  
+  inline intvec<float,8> intvec<float,8>::min(intvec_t x) const
+  {
+    return MF::vml_min(*this, x);
+  }
+  
+  inline intvec<float,8> intvec<float,8>::popcount() const
+  {
+    return MF::vml_popcount(*this);
+  }
+  
+  inline intvec<float,8> intvec<float,8>::rotate(int_t n) const
+  {
+    return MF::vml_rotate(*this, n);
+  }
+  
+  inline intvec<float,8> intvec<float,8>::rotate(intvec_t n) const
+  {
+    return MF::vml_rotate(*this, n);
+  }
+  
 } // namespace vecmathlib
 
 #endif  // #ifndef VEC_AVX_FLOAT8_H
diff --git a/vec_base.h b/vec_base.h
index c2dc3b4..3db2e24 100644
--- a/vec_base.h
+++ b/vec_base.h
@@ -101,6 +101,32 @@ namespace vecmathlib {
   }
   
   template<typename real_t, int size>
+  inline intvec<real_t, size> abs(intvec<real_t, size> x)
+  {
+    return x.abs();
+  }
+  
+  template<typename real_t, int size>
+  inline intvec<real_t, size> bitifthen(intvec<real_t, size> x,
+                                        intvec<real_t, size> y,
+                                        intvec<real_t, size> z)
+  {
+    return x.bitifthen(y, z);
+  }
+  
+  template<typename real_t, int size>
+  inline intvec<real_t, size> clz(intvec<real_t, size> x)
+  {
+    return x.clz();
+  }
+  
+  template<typename real_t, int size>
+  inline boolvec<real_t, size> isignbit(intvec<real_t, size> x)
+  {
+    return x.isignbit();
+  }
+  
+  template<typename real_t, int size>
   inline intvec<real_t, size> lsr(intvec<real_t, size> x,
                                   typename intvec<real_t, size>::int_t n)
   {
@@ -115,9 +141,37 @@ namespace vecmathlib {
   }
   
   template<typename real_t, int size>
-  inline boolvec<real_t, size> signbit(intvec<real_t, size> x)
+  inline intvec<real_t, size> max(intvec<real_t, size> x,
+                                  intvec<real_t, size> y)
   {
-    return x.signbit();
+    return x.max(y);
+  }
+  
+  template<typename real_t, int size>
+  inline intvec<real_t, size> min(intvec<real_t, size> x,
+                                  intvec<real_t, size> y)
+  {
+    return x.min(y);
+  }
+  
+  template<typename real_t, int size>
+  inline intvec<real_t, size> popcount(intvec<real_t, size> x)
+  {
+    return x.popcount();
+  }
+  
+  template<typename real_t, int size>
+  inline intvec<real_t, size> rotate(intvec<real_t, size> x,
+                                     typename intvec<real_t, size>::int_t n)
+  {
+    return x.rotate(n);
+  }
+  
+  template<typename real_t, int size>
+  inline intvec<real_t, size> rotate(intvec<real_t, size> x,
+                                     intvec<real_t, size> n)
+  {
+    return x.rotate(n);
   }
   
   
diff --git a/vec_mask.h b/vec_mask.h
index 31538b0..6f8c996 100644
--- a/vec_mask.h
+++ b/vec_mask.h
@@ -36,8 +36,8 @@ namespace vecmathlib {
       if (__builtin_expect(all_m, true)) {
         m = true;
       } else {
-        m = (! signbit(intvec_t(i          - imin) + intvec_t::iota()) &&
-               signbit(intvec_t(i + size-1 - imax) + intvec_t::iota()));
+        m = (! isignbit(intvec_t(i          - imin) + intvec_t::iota()) &&
+               isignbit(intvec_t(i + size-1 - imax) + intvec_t::iota()));
       }
     }
     
@@ -49,8 +49,8 @@ namespace vecmathlib {
       if (__builtin_expect(all_m, true)) {
         m = true;
       } else {
-        m = (! signbit(intvec_t(i          - imin) + intvec_t::iota()) &&
-               signbit(intvec_t(i + size-1 - imax) + intvec_t::iota()));
+        m = (! isignbit(intvec_t(i          - imin) + intvec_t::iota()) &&
+               isignbit(intvec_t(i + size-1 - imax) + intvec_t::iota()));
       }
     }
     
@@ -68,7 +68,7 @@ namespace vecmathlib {
       if (__builtin_expect(all_m, true)) {
         m = true;
       } else {
-        m = signbit(intvec_t(i + size-1 - imax) + intvec_t::iota());
+        m = isignbit(intvec_t(i + size-1 - imax) + intvec_t::iota());
       }
     }
   };
diff --git a/vec_mic_double8.h b/vec_mic_double8.h
index b8873c7..b4dad21 100644
--- a/vec_mic_double8.h
+++ b/vec_mic_double8.h
@@ -216,6 +216,8 @@ namespace vecmathlib {
     intvec& operator|=(const intvec& x) { return *this=*this|x; }
     intvec& operator^=(const intvec& x) { return *this=*this^x; }
     
+    intvec_t bitifthen(intvec_t x, intvec_t y) const;
+    
     
     
     intvec lsr(int_t n) const
@@ -231,6 +233,7 @@ namespace vecmathlib {
         return _mm512_mask_swizzle_epi32(vhi, 0xb0101010101010101, vlo);
       }
     }
+    intvec_t rotate(int_t n) const;
     intvec operator>>(int_t n) const
     {
       if (n < 32) {
@@ -270,6 +273,7 @@ namespace vecmathlib {
       }
       return r;
     }
+    intvec_t rotate(intvec_t n) const;
     intvec operator>>(intvec n) const
     {
       // TODO: improve this
@@ -291,13 +295,28 @@ namespace vecmathlib {
     intvec& operator>>=(intvec n) { return *this=*this>>n; }
     intvec& operator<<=(intvec n) { return *this=*this<<n; }
     
-    
-    
-    boolvec_t signbit() const
+    intvec_t clz() const
     {
-      return *this < IV(I(0));
+      // Return 8*sizeof(TYPE) when the input is 0
+      intvec_t r;
+      for (int i=0; i<size; ++i) {
+        // __lzcnt64
+        r.set_elt(i, __builtin_clzll((*this)[i]));
+      }
+      return r;
+    }
+    intvec_t popcount() const
+    {
+      intvec_t r;
+      for (int i=0; i<size; ++i) {
+        // _mm_popcnt_u64
+        r.set_elt(i, __builtin_popcountll((*this)[i]));
+      }
+      return r;
     }
     
+    
+    
     boolvec_t operator==(const intvec& x) const
     {
       return mask16tomask8(_mm512_cmp_epi32_mask(v, x.v, _MM_CMPINT_EQ));
@@ -322,6 +341,11 @@ namespace vecmathlib {
     {
       return mask16tomask8(_mm512_cmp_epi32_mask(v, x.v, _MM_CMPINT_GE));
     }
+    
+    intvec_t abs() const;
+    boolvec_t isignbit() const;
+    intvec_t max(intvec_t x) const;
+    intvec_t min(intvec_t x) const;
   };
   
   
@@ -639,6 +663,42 @@ namespace vecmathlib {
     return r;
   }
   
+  inline intvec<double,8> intvec<double,8>::abs() const
+  {
+    return MF::vml_abs(*this);
+  }
+  
+  inline intvec<double,8> intvec<double,8>::bitifthen(intvec_t x,
+                                                      intvec_t y) const
+  {
+    return MF::vml_bitifthen(*this, x, y);
+  }
+  
+  inline boolvec<double,8> intvec<double,8>::isignbit() const
+  {
+    return MF::vml_isignbit(*this);
+  }
+  
+  inline intvec<double,8> intvec<double,8>::max(intvec_t x) const
+  {
+    return MF::vml_max(*this, x);
+  }
+  
+  inline intvec<double,8> intvec<double,8>::min(intvec_t x) const
+  {
+    return MF::vml_min(*this, x);
+  }
+  
+  inline intvec<double,8> intvec<double,8>::rotate(int_t n) const
+  {
+    return MF::vml_rotate(*this, n);
+  }
+  
+  inline intvec<double,8> intvec<double,8>::rotate(intvec_t n) const
+  {
+    return MF::vml_rotate(*this, n);
+  }
+  
 } // namespace vecmathlib
 
 #endif  // #ifndef VEC_MIC_DOUBLE8_H
diff --git a/vec_neon_float2.h b/vec_neon_float2.h
index 9b48424..84d9e4e 100644
--- a/vec_neon_float2.h
+++ b/vec_neon_float2.h
@@ -231,7 +231,7 @@ namespace vecmathlib {
     
     
     
-    boolvec_t signbit() const
+    boolvec_t isignbit() const
     {
       //return *this < IV(I(0));
       return intvec(vshr_n_s32(v, FP::bits-1)).as_bool();
diff --git a/vec_neon_float4.h b/vec_neon_float4.h
index 7bba759..122b66a 100644
--- a/vec_neon_float4.h
+++ b/vec_neon_float4.h
@@ -237,7 +237,7 @@ namespace vecmathlib {
     
     
     
-    boolvec_t signbit() const
+    boolvec_t isignbit() const
     {
       //return *this < IV(I(0));
       return intvec(vshrq_n_s32(v, FP::bits-1)).as_bool();
diff --git a/vec_pseudo.h b/vec_pseudo.h
index f079bdb..9e2d1e0 100644
--- a/vec_pseudo.h
+++ b/vec_pseudo.h
@@ -10,6 +10,7 @@
 #include <algorithm>
 #include <cmath>
 #include <climits>
+#include <cstdlib>
 #include <string>
 #include <sstream>
 
@@ -185,7 +186,7 @@ namespace vecmathlib {
     {
       // Result: convert_bool(0)=false, convert_bool(else)=true
       boolvec_t res;
-      for (int d=0; d<size; ++d) res.set_elt(d, v[d]);
+      for (int d=0; d<size; ++d) res.v[d] = v[d];
       return res;
     }
     realvec_t as_float() const;      // defined after realpseudovec
@@ -299,14 +300,17 @@ namespace vecmathlib {
       return res ^= x;
     }
     
+    intvec_t bitifthen(intvec_t x, intvec_t y) const;
     
     
-    intpseudovec lsr(int_t n) const
+    
+    intvec_t lsr(int_t n) const
     {
-      intpseudovec res;
+      intvec_t res;
       for (int d=0; d<size; ++d) res.v[d] = I(U(v[d]) >> U(n));
       return res;
     }
+    intvec_t rotate(int_t n) const;
     intpseudovec& operator>>=(int_t n)
     {
       for (int d=0; d<size; ++d) v[d] >>= n;
@@ -328,12 +332,13 @@ namespace vecmathlib {
       return res <<= n;
     }
     
-    intpseudovec lsr(intpseudovec n) const
+    intvec_t lsr(intvec_t n) const
     {
-      intpseudovec res;
+      intvec_t res;
       for (int d=0; d<size; ++d) res.v[d] = I(U(v[d]) >> U(n.v[d]));
       return res;
     }
+    intvec_t rotate(intvec_t n) const;
     intpseudovec& operator>>=(intpseudovec n)
     {
       for (int d=0; d<size; ++d) v[d] >>= n.v[d];
@@ -355,49 +360,101 @@ namespace vecmathlib {
       return res <<= n;
     }
     
-    
-    
-    boolvec_t signbit() const
+    intvec_t clz() const
     {
-      boolvec_t res;
-      for (int d=0; d<size; ++d) res.set_elt(d, v[d] < 0);
+      intvec_t res;
+      if (sizeof(int_t) == sizeof(long long)) {
+        for (int d=0; d<size; ++d) res.v[d] = __builtin_clzll(v[d]);
+      } else if (sizeof(int_t) == sizeof(long)) {
+        for (int d=0; d<size; ++d) res.v[d] = __builtin_clzl(v[d]);
+      } else if (sizeof(int_t) <= sizeof(int)) {
+        for (int d=0; d<size; ++d)
+          res.v[d] =
+            CHAR_BIT * (sizeof(int) - sizeof(int_t)) + __builtin_clz(v[d]);
+      } else {
+        __builtin_unreachable();
+      }
+      return res;
+    }
+    intvec_t popcount() const
+    {
+      intvec_t res;
+      if (sizeof(int_t) == sizeof(long long)) {
+        for (int d=0; d<size; ++d) res.v[d] = __builtin_popcountll(v[d]);
+      } else if (sizeof(int_t) == sizeof(long)) {
+        for (int d=0; d<size; ++d) res.v[d] = __builtin_popcountl(v[d]);
+      } else if (sizeof(int_t) <= sizeof(int)) {
+        for (int d=0; d<size; ++d) res.v[d] = __builtin_popcount(v[d]);
+      } else {
+        __builtin_unreachable();
+      }
       return res;
     }
     
+    
+    
     boolvec_t operator==(intpseudovec const& x) const
     {
       boolvec_t res;
-      for (int d=0; d<size; ++d) res.set_elt(d, v[d] == x.v[d]);
+      for (int d=0; d<size; ++d) res.v[d] = v[d] == x.v[d];
       return res;
     }
     boolvec_t operator!=(intpseudovec const& x) const
     {
       boolvec_t res;
-      for (int d=0; d<size; ++d) res.set_elt(d, v[d] != x.v[d]);
+      for (int d=0; d<size; ++d) res.v[d] = v[d] != x.v[d];
       return res;
     }
     boolvec_t operator<(intpseudovec const& x) const
     {
       boolvec_t res;
-      for (int d=0; d<size; ++d) res.set_elt(d, v[d] < x.v[d]);
+      for (int d=0; d<size; ++d) res.v[d] = v[d] < x.v[d];
       return res;
     }
     boolvec_t operator<=(intpseudovec const& x) const
     {
       boolvec_t res;
-      for (int d=0; d<size; ++d) res.set_elt(d, v[d] <= x.v[d]);
+      for (int d=0; d<size; ++d) res.v[d] = v[d] <= x.v[d];
       return res;
     }
     boolvec_t operator>(intpseudovec const& x) const
     {
       boolvec_t res;
-      for (int d=0; d<size; ++d) res.set_elt(d, v[d] > x.v[d]);
+      for (int d=0; d<size; ++d) res.v[d] = v[d] > x.v[d];
       return res;
     }
     boolvec_t operator>=(intpseudovec const& x) const
     {
       boolvec_t res;
-      for (int d=0; d<size; ++d) res.set_elt(d, v[d] >= x.v[d]);
+      for (int d=0; d<size; ++d) res.v[d] = v[d] >= x.v[d];
+      return res;
+    }
+    
+    intvec_t abs() const
+    {
+      intvec_t res;
+      for (int d=0; d<size; ++d) res.v[d] = std::abs(v[d]);
+      return res;
+    }
+    
+    boolvec_t isignbit() const
+    {
+      boolvec_t res;
+      for (int d=0; d<size; ++d) res.v[d] = v[d] < 0;
+      return res;
+    }
+    
+    intvec_t max(intvec_t x) const
+    {
+      intvec_t res;
+      for (int d=0; d<size; ++d) res.v[d] = std::max(v[d], x.v[d]);
+      return res;
+    }
+    
+    intvec_t min(intvec_t x) const
+    {
+      intvec_t res;
+      for (int d=0; d<size; ++d) res.v[d] = std::min(v[d], x.v[d]);
       return res;
     }
   };
@@ -462,38 +519,38 @@ namespace vecmathlib {
     boolvec_t mapb(bool f(real_t)) const
     {
       boolvec_t res;
-      for (int d=0; d<size; ++d) res.set_elt(d, f(v[d]));
+      for (int d=0; d<size; ++d) res.v[d] = f(v[d]);
       return res;
     }
     intvec_t map(int_t f(real_t)) const
     {
       intvec_t res;
-      for (int d=0; d<size; ++d) res.set_elt(d, f(v[d]));
+      for (int d=0; d<size; ++d) res.v[d] = f(v[d]);
       return res;
     }
     realvec_t map(real_t f(real_t)) const
     {
       realvec_t res;
-      for (int d=0; d<size; ++d) res.set_elt(d, f(v[d]));
+      for (int d=0; d<size; ++d) res.v[d] = f(v[d]);
       return res;
     }
     realvec_t map(real_t f(real_t, int_t), intvec_t x) const
     {
       realvec_t res;
-      for (int d=0; d<size; ++d) res.set_elt(d, f(v[d], x.v[d]));
+      for (int d=0; d<size; ++d) res.v[d] = f(v[d], x.v[d]);
       return res;
     }
     realvec_t map(real_t f(real_t, real_t), realvec_t x) const
     {
       realvec_t res;
-      for (int d=0; d<size; ++d) res.set_elt(d, f(v[d], x.v[d]));
+      for (int d=0; d<size; ++d) res.v[d] = f(v[d], x.v[d]);
       return res;
     }
     realvec_t map(real_t f(real_t, real_t, real_t),
                   realvec_t x, realvec_t y) const
     {
       realvec_t res;
-      for (int d=0; d<size; ++d) res.set_elt(d, f(v[d], x.v[d], y.v[d]));
+      for (int d=0; d<size; ++d) res.v[d] = f(v[d], x.v[d], y.v[d]);
       return res;
     }
   public:
@@ -687,37 +744,37 @@ namespace vecmathlib {
     boolvec_t operator==(realpseudovec const& x) const
     {
       boolvec_t res;
-      for (int d=0; d<size; ++d) res.set_elt(d, v[d] == x.v[d]);
+      for (int d=0; d<size; ++d) res.v[d] = v[d] == x.v[d];
       return res;
     }
     boolvec_t operator!=(realpseudovec const& x) const
     {
       boolvec_t res;
-      for (int d=0; d<size; ++d) res.set_elt(d, v[d] != x.v[d]);
+      for (int d=0; d<size; ++d) res.v[d] = v[d] != x.v[d];
       return res;
     }
     boolvec_t operator<(realpseudovec const& x) const
     {
       boolvec_t res;
-      for (int d=0; d<size; ++d) res.set_elt(d, v[d] < x.v[d]);
+      for (int d=0; d<size; ++d) res.v[d] = v[d] < x.v[d];
       return res;
     }
     boolvec_t operator<=(realpseudovec const& x) const
     {
       boolvec_t res;
-      for (int d=0; d<size; ++d) res.set_elt(d, v[d] <= x.v[d]);
+      for (int d=0; d<size; ++d) res.v[d] = v[d] <= x.v[d];
       return res;
     }
     boolvec_t operator>(realpseudovec const& x) const
     {
       boolvec_t res;
-      for (int d=0; d<size; ++d) res.set_elt(d, v[d] > x.v[d]);
+      for (int d=0; d<size; ++d) res.v[d] = v[d] > x.v[d];
       return res;
     }
     boolvec_t operator>=(realpseudovec const& x) const
     {
       boolvec_t res;
-      for (int d=0; d<size; ++d) res.set_elt(d, v[d] >= x.v[d]);
+      for (int d=0; d<size; ++d) res.v[d] = v[d] >= x.v[d];
       return res;
     }
     
@@ -912,6 +969,13 @@ namespace vecmathlib {
   
   template<typename T, int N>
   inline
+  intpseudovec<T,N> intpseudovec<T,N>::bitifthen(intvec_t x, intvec_t y) const
+  {
+    return MF::vml_bitifthen(*this, x, y);
+  }
+  
+  template<typename T, int N>
+  inline
   typename intpseudovec<T,N>::realvec_t intpseudovec<T,N>::convert_float() const
   {
     realvec_t res;
@@ -919,6 +983,18 @@ namespace vecmathlib {
     return res;
   }
   
+  template<typename T, int N>
+  inline intpseudovec<T,N> intpseudovec<T,N>::rotate(int_t n) const
+  {
+    return MF::vml_rotate(*this, n);
+  }
+  
+  template<typename T, int N>
+  inline intpseudovec<T,N> intpseudovec<T,N>::rotate(intvec_t n) const
+  {
+    return MF::vml_rotate(*this, n);
+  }
+  
 
 
   // Wrappers
@@ -975,15 +1051,15 @@ namespace vecmathlib {
   // intpseudovec wrappers
   
   template<typename real_t, int size>
-  inline boolpseudovec<real_t, size> as_bool(intpseudovec<real_t, size> x)
+  inline intpseudovec<real_t, size> abs(intpseudovec<real_t, size> x)
   {
-    return x.as_bool();
+    return x.abs();
   }
   
   template<typename real_t, int size>
-  inline boolpseudovec<real_t, size> convert_bool(intpseudovec<real_t, size> x)
+  inline boolpseudovec<real_t, size> as_bool(intpseudovec<real_t, size> x)
   {
-    return x.convert_bool();
+    return x.as_bool();
   }
   
   template<typename real_t, int size>
@@ -993,12 +1069,38 @@ namespace vecmathlib {
   }
   
   template<typename real_t, int size>
+  inline intpseudovec<real_t, size> bitifthen(intpseudovec<real_t, size> x,
+                                              intpseudovec<real_t, size> y,
+                                              intpseudovec<real_t, size> z)
+  {
+    return x.bitifthen(y, z);
+  }
+  
+  template<typename real_t, int size>
+  inline intpseudovec<real_t, size> clz(intpseudovec<real_t, size> x)
+  {
+    return x.clz();
+  }
+  
+  template<typename real_t, int size>
+  inline boolpseudovec<real_t, size> convert_bool(intpseudovec<real_t, size> x)
+  {
+    return x.convert_bool();
+  }
+  
+  template<typename real_t, int size>
   inline realpseudovec<real_t, size> convert_float(intpseudovec<real_t, size> x)
   {
     return x.convert_float();
   }
   
   template<typename real_t, int size>
+  inline boolpseudovec<real_t, size> isignbit(intpseudovec<real_t, size> x)
+  {
+    return x.isignbit();
+  }
+  
+  template<typename real_t, int size>
   inline
   intpseudovec<real_t, size> lsr(intpseudovec<real_t, size> x,
                                  typename intpseudovec<real_t, size>::int_t n)
@@ -1014,9 +1116,39 @@ namespace vecmathlib {
   }
   
   template<typename real_t, int size>
-  inline boolpseudovec<real_t, size> signbit(intpseudovec<real_t, size> x)
+  inline intpseudovec<real_t, size> max(intpseudovec<real_t, size> x,
+                                        intpseudovec<real_t, size> y)
   {
-    return x.signbit();
+    return x.max(y);
+  }
+  
+  template<typename real_t, int size>
+  inline intpseudovec<real_t, size> min(intpseudovec<real_t, size> x,
+                                        intpseudovec<real_t, size> y)
+  {
+    return x.min(y);
+  }
+  
+  template<typename real_t, int size>
+  inline intpseudovec<real_t, size> popcount(intpseudovec<real_t, size> x)
+  {
+    return x.popcount();
+  }
+  
+  template<typename real_t, int size>
+  inline
+  intpseudovec<real_t, size> rotate(intpseudovec<real_t, size> x,
+                                    typename
+                                    intpseudovec<real_t, size>::int_t n)
+  {
+    return x.rotate(n);
+  }
+  
+  template<typename real_t, int size>
+  inline intpseudovec<real_t, size> rotate(intpseudovec<real_t, size> x,
+                                           intpseudovec<real_t, size> n)
+  {
+    return x.rotate(n);
   }
   
   
diff --git a/vec_qpx_double4.h b/vec_qpx_double4.h
index 3bcab2f..ca1eee8 100644
--- a/vec_qpx_double4.h
+++ b/vec_qpx_double4.h
@@ -298,7 +298,7 @@ namespace vecmathlib {
     
     
     
-    boolvec_t signbit() const
+    boolvec_t isignbit() const
     {
       return *this < IV(I(0));
     }
diff --git a/vec_sse_double1.h b/vec_sse_double1.h
index eec59f2..75ee4c6 100644
--- a/vec_sse_double1.h
+++ b/vec_sse_double1.h
@@ -185,25 +185,30 @@ namespace vecmathlib {
     intvec& operator|=(intvec const& x) { return *this=*this|x; }
     intvec& operator^=(intvec const& x) { return *this=*this^x; }
     
+    intvec_t bitifthen(intvec_t x, intvec_t y) const;
     
     
-    intvec lsr(int_t n) const { return U(v) >> U(n); }
+    
+    intvec_t lsr(int_t n) const { return U(v) >> U(n); }
+    intvec_t rotate(int_t n) const;
     intvec operator>>(int_t n) const { return v>>n; }
     intvec operator<<(int_t n) const { return v<<n; }
     
     intvec& operator>>=(int_t n) { return *this=*this>>n; }
     intvec& operator<<=(int_t n) { return *this=*this<<n; }
     
-    intvec lsr(intvec n) const { return U(v) >> U(n); }
+    intvec_t lsr(intvec_t n) const { return U(v) >> U(n); }
+    intvec_t rotate(intvec_t n) const;
     intvec operator>>(intvec n) const { return v>>n; }
     intvec operator<<(intvec n) const { return v<<n; }
     
     intvec& operator>>=(intvec n) { return *this=*this>>n; }
     intvec& operator<<=(intvec n) { return *this=*this<<n; }
     
+    intvec_t clz() const { return __builtin_clzll(v); }
+    intvec_t popcount() const { return __builtin_popcountll(v); }
     
     
-    boolvec_t signbit() const { return *this < IV(I(0)); }
     
     boolvec_t operator==(intvec const& x) const { return v==x.v; }
     boolvec_t operator!=(intvec const& x) const { return v!=x.v; }
@@ -211,6 +216,11 @@ namespace vecmathlib {
     boolvec_t operator<=(intvec const& x) const { return v<=x.v; }
     boolvec_t operator>(intvec const& x) const { return v>x.v; }
     boolvec_t operator>=(intvec const& x) const { return v>=x.v; }
+    
+    intvec_t abs() const { return std::abs(v); }
+    boolvec_t isignbit() const { return v<0; }
+    intvec_t max(intvec_t x) const { return std::max(v, x.v); }
+    intvec_t min(intvec_t x) const { return std::min(v, x.v); }
   };
   
   
@@ -555,6 +565,22 @@ namespace vecmathlib {
 #endif
   }
   
+  inline intvec<double,1> intvec<double,1>::bitifthen(intvec_t x,
+                                                      intvec_t y) const
+  {
+    return MF::vml_bitifthen(*this, x, y);
+  }
+  
+  inline intvec<double,1> intvec<double,1>::rotate(int_t n) const
+  {
+    return MF::vml_rotate(*this, n);
+  }
+  
+  inline intvec<double,1> intvec<double,1>::rotate(intvec_t n) const
+  {
+    return MF::vml_rotate(*this, n);
+  }
+  
 } // namespace vecmathlib
 
 #endif  // #ifndef VEC_SSE_DOUBLE1_H
diff --git a/vec_sse_double2.h b/vec_sse_double2.h
index a5667a5..b7962e2 100644
--- a/vec_sse_double2.h
+++ b/vec_sse_double2.h
@@ -236,9 +236,12 @@ namespace vecmathlib {
     intvec& operator|=(intvec const& x) { return *this=*this|x; }
     intvec& operator^=(intvec const& x) { return *this=*this^x; }
     
+    intvec_t bitifthen(intvec_t x, intvec_t y) const;
     
     
-    intvec lsr(int_t n) const { return _mm_srli_epi64(v, n); }
+    
+    intvec_t lsr(int_t n) const { return _mm_srli_epi64(v, n); }
+    intvec_t rotate(int_t n) const;
     intvec operator>>(int_t n) const
     {
       // There is no _mm_srai_epi64. To emulate it, add 0x80000000
@@ -257,14 +260,15 @@ namespace vecmathlib {
     intvec& operator>>=(int_t n) { return *this=*this>>n; }
     intvec& operator<<=(int_t n) { return *this=*this<<n; }
     
-    intvec lsr(intvec n) const
+    intvec_t lsr(intvec_t n) const
     {
-      intvec r;
+      intvec_t r;
       for (int i=0; i<size; ++i) {
         r.set_elt(i, U((*this)[i]) >> U(n[i]));
       }
       return r;
     }
+    intvec_t rotate(intvec_t n) const;
     intvec operator>>(intvec n) const
     {
       intvec r;
@@ -284,12 +288,10 @@ namespace vecmathlib {
     intvec& operator>>=(intvec n) { return *this=*this>>n; }
     intvec& operator<<=(intvec n) { return *this=*this<<n; }
     
+    intvec_t clz() const;
+    intvec_t popcount() const;
     
     
-    boolvec_t signbit() const
-    {
-      return as_bool();
-    }
     
     boolvec_t operator==(intvec const& x) const
     {
@@ -320,6 +322,11 @@ namespace vecmathlib {
     {
       return ! (*this < x);
     }
+    
+    intvec_t abs() const;
+    boolvec_t isignbit() const { return as_bool(); }
+    intvec_t max(intvec_t x) const;
+    intvec_t min(intvec_t x) const;
   };
   
   
@@ -681,6 +688,47 @@ namespace vecmathlib {
     return r;
   }
   
+  inline intvec<double,2> intvec<double,2>::abs() const
+  {
+    return MF::vml_abs(*this);
+  }
+  
+  inline intvec<double,2> intvec<double,2>::bitifthen(intvec_t x,
+                                                      intvec_t y) const
+  {
+    return MF::vml_bitifthen(*this, x, y);
+  }
+  
+  inline intvec<double,2> intvec<double,2>::clz() const
+  {
+    return MF::vml_clz(*this);
+  }
+  
+  inline intvec<double,2> intvec<double,2>::max(intvec_t x) const
+  {
+    return MF::vml_max(*this, x);
+  }
+  
+  inline intvec<double,2> intvec<double,2>::min(intvec_t x) const
+  {
+    return MF::vml_min(*this, x);
+  }
+  
+  inline intvec<double,2> intvec<double,2>::popcount() const
+  {
+    return MF::vml_popcount(*this);
+  }
+  
+  inline intvec<double,2> intvec<double,2>::rotate(int_t n) const
+  {
+    return MF::vml_rotate(*this, n);
+  }
+  
+  inline intvec<double,2> intvec<double,2>::rotate(intvec_t n) const
+  {
+    return MF::vml_rotate(*this, n);
+  }
+  
 } // namespace vecmathlib
 
 #endif  // #ifndef VEC_SSE_DOUBLE2_H
diff --git a/vec_sse_float1.h b/vec_sse_float1.h
index a339cf5..2976275 100644
--- a/vec_sse_float1.h
+++ b/vec_sse_float1.h
@@ -185,28 +185,30 @@ namespace vecmathlib {
     intvec& operator|=(intvec const& x) { return *this=*this|x; }
     intvec& operator^=(intvec const& x) { return *this=*this^x; }
     
+    intvec_t bitifthen(intvec_t x, intvec_t y) const;
     
     
-    intvec lsr(int_t n) const { return U(v) >> U(n); }
+    
+    intvec_t lsr(int_t n) const { return U(v) >> U(n); }
+    intvec_t rotate(int_t n) const;
     intvec operator>>(int_t n) const { return v>>n; }
     intvec operator<<(int_t n) const { return v<<n; }
     
     intvec& operator>>=(int_t n) { return *this=*this>>n; }
     intvec& operator<<=(int_t n) { return *this=*this<<n; }
     
-    intvec lsr(intvec n) const { return U(v) >> U(n); }
+    intvec_t lsr(intvec_t n) const { return U(v) >> U(n); }
+    intvec_t rotate(intvec_t n) const;
     intvec operator>>(intvec n) const { return v>>n; }
     intvec operator<<(intvec n) const { return v<<n; }
     
     intvec& operator>>=(intvec n) { return *this=*this>>n; }
     intvec& operator<<=(intvec n) { return *this=*this<<n; }
     
+    intvec_t clz() const { return __builtin_clz(v); }
+    intvec_t popcount() const { return __builtin_popcount(v); }
     
     
-    boolvec_t signbit() const
-    {
-      return *this < IV(I(0));
-    }
     
     boolvec_t operator==(intvec const& x) const { return v==x.v; }
     boolvec_t operator!=(intvec const& x) const { return v!=x.v; }
@@ -214,6 +216,11 @@ namespace vecmathlib {
     boolvec_t operator<=(intvec const& x) const { return v<=x.v; }
     boolvec_t operator>(intvec const& x) const { return v>x.v; }
     boolvec_t operator>=(intvec const& x) const { return v>=x.v; }
+    
+    intvec_t abs() const { return std::abs(v); }
+    boolvec_t isignbit() const { return v<0; }
+    intvec_t max(intvec_t x) const { return std::max(v, x.v); }
+    intvec_t min(intvec_t x) const { return std::min(v, x.v); }
   };
   
   
@@ -546,12 +553,28 @@ namespace vecmathlib {
     return FP::as_float(v);
   }
   
+  inline intvec<float,1> intvec<float,1>::bitifthen(intvec_t x,
+                                                    intvec_t y) const
+  {
+    return MF::vml_bitifthen(*this, x, y);
+  }
+  
   inline realvec<float,1> intvec<float,1>::convert_float() const
   {
     // return FP::convert_float(v);
     return _mm_cvtss_f32(_mm_cvtsi32_ss(_mm_setzero_ps(), v));
   }
   
+  inline intvec<float,1> intvec<float,1>::rotate(int_t n) const
+  {
+    return MF::vml_rotate(*this, n);
+  }
+  
+  inline intvec<float,1> intvec<float,1>::rotate(intvec_t n) const
+  {
+    return MF::vml_rotate(*this, n);
+  }
+  
 } // namespace vecmathlib
 
 #endif  // #ifndef VEC_SSE_FLOAT1_H
diff --git a/vec_sse_float4.h b/vec_sse_float4.h
index 720cbcd..c8aa593 100644
--- a/vec_sse_float4.h
+++ b/vec_sse_float4.h
@@ -239,22 +239,26 @@ namespace vecmathlib {
     intvec& operator|=(intvec const& x) { return *this=*this|x; }
     intvec& operator^=(intvec const& x) { return *this=*this^x; }
     
+    intvec_t bitifthen(intvec_t x, intvec_t y) const;
     
     
-    intvec lsr(int_t n) const { return _mm_srli_epi32(v, n); }
+    
+    intvec_t lsr(int_t n) const { return _mm_srli_epi32(v, n); }
+    intvec_t rotate(int_t n) const;
     intvec operator>>(int_t n) const { return _mm_srai_epi32(v, n); }
     intvec operator<<(int_t n) const { return _mm_slli_epi32(v, n); }
     intvec& operator>>=(int_t n) { return *this=*this>>n; }
     intvec& operator<<=(int_t n) { return *this=*this<<n; }
     
-    intvec lsr(intvec n) const
+    intvec_t lsr(intvec_t n) const
     {
-      intvec r;
+      intvec_t r;
       for (int i=0; i<size; ++i) {
         r.set_elt(i, U((*this)[i]) >> U(n[i]));
       }
       return r;
     }
+    intvec_t rotate(intvec_t n) const;
     intvec operator>>(intvec n) const
     {
       intvec r;
@@ -274,12 +278,10 @@ namespace vecmathlib {
     intvec& operator>>=(intvec n) { return *this=*this>>n; }
     intvec& operator<<=(intvec n) { return *this=*this<<n; }
     
+    intvec_t clz() const;
+    intvec_t popcount() const;
     
     
-    boolvec_t signbit() const
-    {
-      return as_bool();
-    }
     
     boolvec_t operator==(intvec const& x) const
     {
@@ -310,6 +312,11 @@ namespace vecmathlib {
     {
       return ! (*this < x);
     }
+    
+    intvec_t abs() const { return _mm_abs_epi32(v); }
+    boolvec_t isignbit() const { return as_bool(); }
+    intvec_t max(intvec_t x) const { return _mm_max_epi32(v, x.v); }
+    intvec_t min(intvec_t x) const { return _mm_min_epi32(v, x.v); }
   };
   
   
@@ -693,11 +700,37 @@ namespace vecmathlib {
     return _mm_castsi128_ps(v);
   }
   
+  inline intvec<float,4> intvec<float,4>::bitifthen(intvec_t x,
+                                                    intvec_t y) const
+  {
+    return MF::vml_bitifthen(*this, x, y);
+  }
+  
+  inline intvec<float,4> intvec<float,4>::clz() const
+  {
+    return MF::vml_clz(*this);
+  }
+  
   inline realvec<float,4> intvec<float,4>::convert_float() const
   {
     return _mm_cvtepi32_ps(v);
   }
   
+  inline intvec<float,4> intvec<float,4>::popcount() const
+  {
+    return MF::vml_popcount(*this);
+  }
+  
+  inline intvec<float,4> intvec<float,4>::rotate(int_t n) const
+  {
+    return MF::vml_rotate(*this, n);
+  }
+  
+  inline intvec<float,4> intvec<float,4>::rotate(intvec_t n) const
+  {
+    return MF::vml_rotate(*this, n);
+  }
+  
 } // namespace vecmathlib
 
 #endif  // #ifndef VEC_SSE_FLOAT4_H
diff --git a/vec_test.h b/vec_test.h
index 52d3442..5b557ae 100644
--- a/vec_test.h
+++ b/vec_test.h
@@ -292,14 +292,20 @@ namespace vecmathlib {
       return res ^= x;
     }
     
+    intvec_t bitifthen(intvec_t x, intvec_t y) const
+    {
+      return MF::vml_bitifthen(*this, x, y);
+    }
     
     
-    inttestvec lsr(int_t n) const
+    
+    intvec_t lsr(int_t n) const
     {
-      inttestvec res;
+      intvec_t res;
       for (int d=0; d<size; ++d) res.v[d] = I(U(v[d]) >> U(n));
       return res;
     }
+    intvec_t rotate(int_t n) const { return MF::vml_rotate(*this, n); }
     inttestvec& operator>>=(int_t n)
     {
       for (int d=0; d<size; ++d) v[d] >>= n;
@@ -321,12 +327,13 @@ namespace vecmathlib {
       return res <<= n;
     }
     
-    inttestvec lsr(inttestvec n) const
+    intvec_t lsr(intvec_t n) const
     {
       inttestvec res;
       for (int d=0; d<size; ++d) res.v[d] = I(U(v[d]) >> U(n.v[d]));
       return res;
     }
+    intvec_t rotate(intvec_t n) const { return MF::vml_rotate(*this, n); }
     inttestvec& operator>>=(inttestvec n)
     {
       for (int d=0; d<size; ++d) v[d] >>= n.v[d];
@@ -348,14 +355,10 @@ namespace vecmathlib {
       return res <<= n;
     }
     
+    intvec_t clz() const { return MF::vml_clz(*this); }
+    intvec_t popcount() const { return MF::vml_popcount(*this); }
     
     
-    boolvec_t signbit() const
-    {
-      boolvec_t res;
-      for (int d=0; d<size; ++d) res.set_elt(d, v[d] < 0);
-      return res;
-    }
     
     boolvec_t operator==(inttestvec const& x) const
     {
@@ -393,6 +396,11 @@ namespace vecmathlib {
       for (int d=0; d<size; ++d) res.v[d] = v[d] >= x.v[d];
       return res;
     }
+    
+    intvec_t abs() const { return MF::vml_abs(*this); }
+    boolvec_t isignbit() const { return MF::vml_isignbit(*this); }
+    intvec_t max(intvec_t x) const { return MF::vml_max(*this, x); }
+    intvec_t min(intvec_t x) const { return MF::vml_min(*this, x); }
   };
   
   
@@ -862,15 +870,15 @@ namespace vecmathlib {
   // inttestvec wrappers
   
   template<typename real_t, int size>
-  inline booltestvec<real_t, size> as_bool(inttestvec<real_t, size> x)
+  inline inttestvec<real_t, size> abs(inttestvec<real_t, size> x)
   {
-    return x.as_bool();
+    return x.abs();
   }
   
   template<typename real_t, int size>
-  inline booltestvec<real_t, size> convert_bool(inttestvec<real_t, size> x)
+  inline booltestvec<real_t, size> as_bool(inttestvec<real_t, size> x)
   {
-    return x.convert_bool();
+    return x.as_bool();
   }
   
   template<typename real_t, int size>
@@ -880,12 +888,38 @@ namespace vecmathlib {
   }
   
   template<typename real_t, int size>
+  inline inttestvec<real_t, size> bitifthen(inttestvec<real_t, size> x,
+                                            inttestvec<real_t, size> y,
+                                            inttestvec<real_t, size> z)
+  {
+    return x.bitifthen(y, z);
+  }
+  
+  template<typename real_t, int size>
+  inline inttestvec<real_t, size> clz(inttestvec<real_t, size> x)
+  {
+    return x.clz();
+  }
+  
+  template<typename real_t, int size>
+  inline booltestvec<real_t, size> convert_bool(inttestvec<real_t, size> x)
+  {
+    return x.convert_bool();
+  }
+  
+  template<typename real_t, int size>
   inline realtestvec<real_t, size> convert_float(inttestvec<real_t, size> x)
   {
     return x.convert_float();
   }
   
   template<typename real_t, int size>
+  inline booltestvec<real_t, size> isignbit(inttestvec<real_t, size> x)
+  {
+    return x.isignbit();
+  }
+  
+  template<typename real_t, int size>
   inline
   inttestvec<real_t, size> lsr(inttestvec<real_t, size> x,
                                typename inttestvec<real_t, size>::int_t n)
@@ -901,9 +935,38 @@ namespace vecmathlib {
   }
   
   template<typename real_t, int size>
-  inline booltestvec<real_t, size> signbit(inttestvec<real_t, size> x)
+  inline inttestvec<real_t, size> max(inttestvec<real_t, size> x,
+                                      inttestvec<real_t, size> y)
   {
-    return x.signbit();
+    return x.max(y);
+  }
+  
+  template<typename real_t, int size>
+  inline inttestvec<real_t, size> min(inttestvec<real_t, size> x,
+                                      inttestvec<real_t, size> y)
+  {
+    return x.min(y);
+  }
+  
+  template<typename real_t, int size>
+  inline inttestvec<real_t, size> popcount(inttestvec<real_t, size> x)
+  {
+    return x.popcount();
+  }
+  
+  template<typename real_t, int size>
+  inline
+  inttestvec<real_t, size> rotate(inttestvec<real_t, size> x,
+                                  typename inttestvec<real_t, size>::int_t n)
+  {
+    return x.rotate(n);
+  }
+  
+  template<typename real_t, int size>
+  inline inttestvec<real_t, size> rotate(inttestvec<real_t, size> x,
+                                         inttestvec<real_t, size> n)
+  {
+    return x.rotate(n);
   }
   
   
diff --git a/vec_vsx_double2.h b/vec_vsx_double2.h
index 5931286..6505cdb 100644
--- a/vec_vsx_double2.h
+++ b/vec_vsx_double2.h
@@ -322,7 +322,7 @@ namespace vecmathlib {
     
     
     
-    boolvec_t signbit() const
+    boolvec_t isignbit() const
     {
       return (*this >> (bits-1)).as_bool();
     }