10 files changed, 424 insertions, 29 deletions
diff --git a/IDEAS b/IDEAS
index 6a2f385..692e69a 100644
--- a/IDEAS
+++ b/IDEAS
@@ -1,7 +1,9 @@
+Some ideas for improving accuracy or speed:
+
 implement cosh, sinh via expansion, similar to asin
-   https://en.wikipedia.org/wiki/Hyperbolic_function
+   <https://en.wikipedia.org/wiki/Hyperbolic_function>
 
 implement exp via cosh, sinh
 
 implement acosh, asinh via expansion, similar to asin
-   https://en.wikipedia.org/wiki/Inverse_hyperbolic_function
+   <https://en.wikipedia.org/wiki/Inverse_hyperbolic_function>
diff --git a/build.ninja b/build.ninja
index 23566c4..fbfd02b 100644
--- a/build.ninja
+++ b/build.ninja
@@ -2,7 +2,8 @@ ar  = ar
 cxx = g++
 
 cppflags =
-cxxflags = -std=gnu++11 -Wall -g -march=native -Ofast
+cxxflags = -std=gnu++11 -Wall -g -march=native
+# -Ofast
 ldflags = -L.
 
 rule cxx
diff --git a/mathfuncs_convert.h b/mathfuncs_convert.h
index ea39e05..e729fce 100644
--- a/mathfuncs_convert.h
+++ b/mathfuncs_convert.h
@@ -58,6 +58,10 @@ namespace vecmathlib {
   {
     // Handle zero
     boolvec_t is_zero = x == RV(0.0);
+    // Handle overflow
+    int_t min_int = FP::sign_mask;
+    int_t max_int = ~FP::sign_mask;
+    boolvec_t is_overflow = x < RV(R(min_int)) || x > RV(R(max_int));
     // Handle negative numbers
     boolvec_t is_negative = signbit(x);
     x = fabs(x);
@@ -82,6 +86,8 @@ namespace vecmathlib {
     
     // Handle negative numbers
     ix = ifthen(is_negative, -ix, ix);
+    // Handle overflow
+    ix = ifthen(is_overflow, IV(min_int), ix);
     // Handle zero
     ix = ifthen(is_zero, IV(I(0)), ix);
     
@@ -93,26 +99,28 @@ namespace vecmathlib {
   template<typename realvec_t>
   realvec_t mathfuncs<realvec_t>::vml_round(realvec_t x)
   {
-    realvec_t r = fabs(x);
+    realvec_t r = x;
     // Round by adding a large number, destroying all excess precision
-    real_t offset = RV(std::scalbn(R(1.0), FP::mantissa_bits));
+    realvec_t offset = copysign(RV(std::scalbn(R(1.0), FP::mantissa_bits)), x);
     r += offset;
     // Ensure the rounding is not optimised away
     r.barrier();
     r -= offset;
-    return copysign(r, x);
+    return r;
   }
   
   template<typename realvec_t>
   realvec_t mathfuncs<realvec_t>::vml_ceil(realvec_t x)
   {
-    return round(x + RV(0.5));
+    real_t offset = R(0.5) - std::scalbn(fabs(x), -FP::mantissa_bits);
+    return round(x + RV(offset));
   }
   
   template<typename realvec_t>
   realvec_t mathfuncs<realvec_t>::vml_floor(realvec_t x)
   {
-    return round(x - RV(0.5));
+    real_t offset = R(0.5) - std::scalbn(fabs(x), -FP::mantissa_bits);
+    return round(x - RV(offset));
   }
   
 }; // namespace vecmathlib
diff --git a/mathfuncs_exp.h b/mathfuncs_exp.h
index bad3959..851e6f9 100644
--- a/mathfuncs_exp.h
+++ b/mathfuncs_exp.h
@@ -17,25 +17,24 @@ namespace vecmathlib {
   {
     // Rescale
     realvec_t x0 = x;
-    realvec_t floor_x = floor(x);
-    x -= floor_x;
-    intvec_t ifloor_x = convert_int(floor_x);
+    realvec_t round_x = round(x);
+    x -= round_x;
+    intvec_t iround_x = convert_int(round_x);
     
     // Approximate
-    assert(all(x >= RV(0.0) && x < RV(1.0)));
+    assert(all(x >= RV(-0.5) && x <= RV(0.5)));
     // exp(x) = Sum[n=0,nmax] x^n / n!
     int const nmax = 15;
-    x -= RV(0.5);               // shift range to increase accuracy
     x *= RV(M_LN2);
-    realvec_t y = RV(M_SQRT2); // x^n / n!, compensated for range shift
-    realvec_t r = y;
-    for (int n=1; n<nmax; ++n) {
+    realvec_t y = x;            // x^n / n!
+    realvec_t r = RV(1.0) + y;
+    for (int n=2; n<nmax; ++n) {
       y *= x * RV(R(1.0) / R(n));
       r += y;
     }
     
     // Undo rescaling
-    r = ifthen(x0 < RV(R(FP::min_exponent)), RV(0.0), scalbn(r, ifloor_x));
+    r = ifthen(x0 < RV(R(FP::min_exponent)), RV(0.0), scalbn(r, iround_x));
     
     return r;
   }
@@ -61,6 +60,10 @@ namespace vecmathlib {
   realvec_t mathfuncs<realvec_t>::vml_expm1(realvec_t x)
   {
     return exp(x) - RV(1.0);
+#if 0
+    r = exp(x) - RV(1.0);
+    return ifthen(r == RV(0.0), x, r);
+#endif
   }
   
 }; // namespace vecmathlib
diff --git a/mathfuncs_fabs.h b/mathfuncs_fabs.h
index 034fedb..2d1a91f 100644
--- a/mathfuncs_fabs.h
+++ b/mathfuncs_fabs.h
@@ -37,7 +37,7 @@ namespace vecmathlib {
   template<typename realvec_t>
   realvec_t mathfuncs<realvec_t>::vml_scalbn(realvec_t x, intvec_t n)
   {
-    return as_float(as_int(x) + (n << FP::mantissa_bits));
+    return as_float(as_int(x) + (n << IV(FP::mantissa_bits)));
     // return x * as_float((n + exponent_offset) << mantissa_bits);
   }
   
diff --git a/mathfuncs_log.h b/mathfuncs_log.h
index 8fb3204..aaa15b9 100644
--- a/mathfuncs_log.h
+++ b/mathfuncs_log.h
@@ -63,6 +63,12 @@ namespace vecmathlib {
   realvec_t mathfuncs<realvec_t>::vml_log1p(realvec_t x)
   {
     return log(RV(1.0) + x);
+#if 0
+    // Goldberg, theorem 4
+    realvec_t x1 = RV(1.0) + x;
+    x1.barrier();
+    return ifthen(x1 == x, x, x * log(x1) / (x1 - RV(1.0)));
+#endif
   }
   
 }; // namespace vecmathlib
diff --git a/mathfuncs_pow.h b/mathfuncs_pow.h
index 985117e..96b2ce7 100644
--- a/mathfuncs_pow.h
+++ b/mathfuncs_pow.h
@@ -15,10 +15,20 @@ namespace vecmathlib {
   template<typename realvec_t>
   realvec_t mathfuncs<realvec_t>::vml_pow(realvec_t x, realvec_t y)
   {
+    // Handle zero
+    boolvec_t is_zero = x == RV(0.0);
+    x = ifthen(is_zero, RV(1.0), x);
+    
     realvec_t r = exp(log(fabs(x)) * y);
+    
     // The result is negative if x<0 and if y is integer and odd
-    realvec_t sign = fmod(x, RV(2.0)) + RV(1.0);
-    return ifthen(x == RV(0.0), RV(0.0), copysign(r, sign));
+    realvec_t sign = fmod(copysign(y, x), RV(2.0)) + RV(0.5);
+    r = copysign(r, sign);
+    
+    // Handle zero
+    r = ifthen(is_zero, RV(0.0), r);
+    
+    return r;
   }
   
 }; // namespace vecmathlib
diff --git a/test.cc b/test.cc
index 8991c64..9d45069 100644
--- a/test.cc
+++ b/test.cc
@@ -40,7 +40,7 @@ struct vecmathlib_test {
   
   
   // Test each function with this many random values
-  static int const imax = 1000;
+  static int const imax = 100000;
   // Require that 3/4 of the digits are correct
   static real_t constexpr accuracy = pow(realvec_t::epsilon(), R(0.75));
   
@@ -63,7 +63,7 @@ struct vecmathlib_test {
     for (int i=0; i<intvec_t::size; ++i) {
       real_t r =
         R(nmax - nmin + 1) *
-        R(rand()) / (real_t(RAND_MAX) + R(1.0));
+        R(rand()) / (R(RAND_MAX) + R(1.0));
       n.set_elt(i, nmin + FP::convert_int(std::floor(r)));
     }
     return n;
@@ -199,15 +199,20 @@ struct vecmathlib_test {
   static void test_convert()
   {
     cout << "   testing ceil convert_float convert_int floor round...\n";
+    
     for (int i=0; i<imax; ++i) {
-      realvec_t const x = random(R(-10.0), R(+10.0));
-      intvec_t const n = random(int_t(-10), int_t(+10));
-      check("ceil", ceil, vecmathlib::ceil, x, accuracy);
+      realvec_t const x = random(R(-1.0e+10), R(+1.0e+10));
+      intvec_t const n = random(int_t(-1000000), int_t(+1000000));
+      realvec_t const fn = vecmathlib::convert_float(n);
       check("convert_float",
             FP::convert_float, vecmathlib::convert_float, n, accuracy);
       check("convert_int", FP::convert_int, vecmathlib::convert_int, x);
+      check("ceil", ceil, vecmathlib::ceil, x, accuracy);
+      check("ceil", ceil, vecmathlib::ceil, fn, accuracy);
       check("floor", floor, vecmathlib::floor, x, accuracy);
+      check("floor", floor, vecmathlib::floor, fn, accuracy);
       check("round", round, vecmathlib::round, x, accuracy);
+      check("round", round, vecmathlib::round, fn, accuracy);
     }
   }
   
@@ -275,7 +280,12 @@ struct vecmathlib_test {
     for (int i=0; i<imax; ++i) {
       realvec_t const x = random(R(0.001), R(1000.0));
       realvec_t const y = random(R(-10.0), R(+10.0));
+      intvec_t const n = random(I(-10), I(+10));
+      realvec_t const fn = vecmathlib::convert_float(n);
+      check("pow", pow, vecmathlib::pow, RV(0.0), y, accuracy);
+      check("pow", pow, vecmathlib::pow, x, RV(0.0), accuracy);
       check("pow", pow, vecmathlib::pow, x, y, accuracy);
+      check("pow", pow, vecmathlib::pow, -x, fn, accuracy);
     }
   }
   
@@ -284,11 +294,21 @@ struct vecmathlib_test {
   {
     cout << "   testing fmod rcp remainder...\n";
     for (int i=0; i<imax; ++i) {
-      realvec_t const x = random(R(-10.0), R(+10.0));
-      realvec_t const y = random(R(-10.0), R(+10.0));
-      check("fmod", fmod, vecmathlib::fmod, x, y, accuracy);
+      realvec_t const x = random(R(-100.0), R(+100.0));
+      realvec_t const y = random(R(-100.0), R(+100.0));
+      intvec_t const n = random(I(-100), I(+100));
+      intvec_t const m = random(I(-100), I(+100));
+      realvec_t const fn = vecmathlib::convert_float(n);
+      realvec_t const fm = vecmathlib::convert_float(m);
       check("rcp", rcp, vecmathlib::rcp, x, accuracy);
+      check("fmod", fmod, vecmathlib::fmod, x, y, accuracy);
+      check("fmod", fmod, vecmathlib::fmod, x, fm, accuracy);
+      check("fmod", fmod, vecmathlib::fmod, fn, y, accuracy);
+      check("fmod", fmod, vecmathlib::fmod, fn, y, accuracy);
       check("remainder", remainder, vecmathlib::remainder, x, y, accuracy);
+      check("remainder", remainder, vecmathlib::remainder, x, fm, accuracy);
+      check("remainder", remainder, vecmathlib::remainder, fn, y, accuracy);
+      check("remainder", remainder, vecmathlib::remainder, fn, y, accuracy);
     }
   }
   
@@ -300,7 +320,7 @@ struct vecmathlib_test {
       check("sin", sin, vecmathlib::sin, x, accuracy);
       check("cos", cos, vecmathlib::cos, x, accuracy);
       // tan loses accuracy (by definition, not by implementation?)
-      check("tan", tan, vecmathlib::tan, x, R(10.0)*accuracy);
+      check("tan", tan, vecmathlib::tan, x, R(100.0)*accuracy);
     }
   }
   
@@ -360,6 +380,7 @@ int main(int argc, char** argv)
   cout << "Testing math functions:\n";
   
   vecmathlib_test<realvec<float,1>>::test();
+  vecmathlib_test<realvec<double,1>>::test();
   vecmathlib_test<realvec<double,4>>::test();
   
   cout << "\n";
diff --git a/vec_double.h b/vec_double.h
new file mode 100644
index 0000000..1f1a1dd
--- /dev/null
+++ b/vec_double.h
@@ -0,0 +1,343 @@
+// -*-C++-*-
+
+#ifndef VEC_DOUBLE_H
+#define VEC_DOUBLE_H
+
+#include "floatprops.h"
+#include "mathfuncs.h"
+#include "vec_base.h"
+
+#include <cmath>
+
+
+
+namespace vecmathlib {
+  
+  template<> struct boolvec<double,1>;
+  template<> struct intvec<double,1>;
+  template<> struct realvec<double,1>;
+  
+  
+  
+  template<>
+  struct boolvec<double,1>: floatprops<double>
+  {
+    static int const size = 1;
+    typedef bool scalar_t;
+    typedef bool bvector_t;
+    
+    typedef boolvec boolvec_t;
+    typedef intvec<real_t, size> intvec_t;
+    typedef realvec<real_t, size> realvec_t;
+    
+    // Short names for type casts
+    typedef real_t R;
+    typedef int_t I;
+    typedef uint_t U;
+    typedef realvec_t RV;
+    typedef intvec_t IV;
+    typedef boolvec_t BV;
+    typedef floatprops<real_t> FP;
+    typedef mathfuncs<realvec_t> MF;
+    // static real_t R(double a) { return real_t(a); }
+    // static int_t I(int a) { return int_t(a); }
+    // static uint_t U(int a) { return uint_t(a); }
+    // static realvec_t RV(real_t a) { return realvec_t(a); }
+    // static intvec_t IV(int_t a) { return intvec_t(a); }
+    // static boolvec_t BV(bool a) { return boolvec_t(a); }
+    
+    
+    
+    bvector_t v;
+    
+    boolvec() {}
+    boolvec(boolvec const& x): v(x.v) {}
+    boolvec& operator=(boolvec const& x) { return v=x.v, *this; }
+    //boolvec(vector_t x): v(x) {}
+    boolvec(bool a): v(a) {}
+    boolvec(bool const* as): v(as[0]) {}
+    
+    operator bvector_t() const { return v; }
+    bool operator[](int n) const { return v; }
+    boolvec& set_elt(int n, bool a) { return v=a, *this; }
+    
+    
+    
+    auto as_int() const -> intvec_t;      // defined after intvec
+    auto convert_int() const -> intvec_t; // defined after intvec
+    
+    
+    
+    boolvec operator!() const { return !v; }
+    
+    boolvec operator&&(boolvec x) const { return v&&x.v; }
+    boolvec operator||(boolvec x) const { return v||x.v; }
+    boolvec operator==(boolvec x) const { return v==x.v; }
+    boolvec operator!=(boolvec x) const { return v!=x.v; }
+    
+    bool all() const { return v; }
+    bool any() const { return v; }
+    
+    
+    
+    // ifthen(condition, then-value, else-value)
+    auto ifthen(intvec_t x,
+                intvec_t y) const -> intvec_t; // defined after intvec
+    auto ifthen(realvec_t x,
+                realvec_t y) const -> realvec_t; // defined after realvec
+    
+  };
+  
+  
+  
+  template<>
+  struct intvec<double,1>: floatprops<double>
+  {
+    static int const size = 1;
+    typedef int_t scalar_t;
+    typedef int_t ivector_t;
+    
+    typedef boolvec<real_t, size> boolvec_t;
+    typedef intvec intvec_t;
+    typedef realvec<real_t, size> realvec_t;
+    
+    // Short names for type casts
+    typedef real_t R;
+    typedef int_t I;
+    typedef uint_t U;
+    typedef realvec_t RV;
+    typedef intvec_t IV;
+    typedef boolvec_t BV;
+    typedef floatprops<real_t> FP;
+    typedef mathfuncs<realvec_t> MF;
+    // static real_t R(double a) { return real_t(a); }
+    // static int_t I(int a) { return int_t(a); }
+    // static uint_t U(int a) { return uint_t(a); }
+    // static realvec_t RV(real_t a) { return realvec_t(a); }
+    // static intvec_t IV(int_t a) { return intvec_t(a); }
+    // static boolvec_t BV(bool a) { return boolvec_t(a); }
+    
+    
+    
+    ivector_t v;
+    
+    intvec() {}
+    intvec(intvec const& x): v(x.v) {}
+    intvec& operator=(intvec const& x) { return v=x.v, *this; }
+    //intvec(vector_t x): v(x) {}
+    intvec(int_t a): v(a) {}
+    intvec(int_t const* as): v(as[0]) {}
+    
+    operator ivector_t() const { return v; }
+    int_t operator[](int n) const { return v; }
+    intvec& set_elt(int n, int_t a) { return v=a, *this; }
+    
+    
+    
+    auto as_bool() const -> boolvec_t { return v; }
+    auto convert_bool() const -> boolvec_t { return v; }
+    auto as_float() const -> realvec_t; // defined after realvec
+    auto convert_float() const -> realvec_t; // defined after realvec
+    
+    
+    
+    // Note: not all arithmetic operations are supported!
+    
+    intvec operator+() const { return +v; }
+    intvec operator-() const { return -v; }
+    
+    intvec operator+(intvec x) const { return v+x.v; }
+    intvec operator-(intvec x) const { return v-x.v; }
+    
+    intvec& operator+=(intvec const& x) { return *this=*this+x; }
+    intvec& operator-=(intvec const& x) { return *this=*this-x; }
+
+
+    
+    intvec operator~() const { return ~v; }
+    
+    intvec operator&(intvec x) const { return v&x.v; }
+    intvec operator|(intvec x) const { return v|x.v; }
+    intvec operator^(intvec x) const { return v^x.v; }
+    
+    intvec& operator&=(intvec const& x) { return *this=*this&x; }
+    intvec& operator|=(intvec const& x) { return *this=*this|x; }
+    intvec& operator^=(intvec const& x) { return *this=*this^x; }
+    
+    
+    
+    intvec lsr(int_t n) const { return U(v)>>n; }
+    intvec operator>>(int_t n) const { return v>>n; }
+    intvec operator<<(int_t n) const { return v<<n; }
+    intvec& operator>>=(int_t n) { return *this=*this>>n; }
+    intvec& operator<<=(int_t n) { return *this=*this<<n; }
+    
+    intvec lsr(intvec n) const { return U(v)>>n; }
+    intvec operator>>(intvec n) const { return v>>n; }
+    intvec operator<<(intvec n) const { return v<<n; }
+    intvec& operator>>=(intvec n) { return *this=*this>>n; }
+    intvec& operator<<=(intvec n) { return *this=*this<<n; }
+  };
+  
+  
+  
+  template<>
+  struct realvec<double,1>: floatprops<double>
+  {
+    static int const size = 1;
+    typedef real_t scalar_t;
+    typedef real_t vector_t;
+    
+    static constexpr char const* const name = "<1*double>";
+    inline void barrier() { asm("": "+x" (v)); }
+    
+    typedef boolvec<real_t, size> boolvec_t;
+    typedef intvec<real_t, size> intvec_t;
+    typedef realvec realvec_t;
+    
+    // Short names for type casts
+    typedef real_t R;
+    typedef int_t I;
+    typedef uint_t U;
+    typedef realvec_t RV;
+    typedef intvec_t IV;
+    typedef boolvec_t BV;
+    typedef floatprops<real_t> FP;
+    typedef mathfuncs<realvec_t> MF;
+    // static real_t R(double a) { return real_t(a); }
+    // static int_t I(int a) { return int_t(a); }
+    // static uint_t U(int a) { return uint_t(a); }
+    // static realvec_t RV(real_t a) { return realvec_t(a); }
+    // static intvec_t IV(int_t a) { return intvec_t(a); }
+    // static boolvec_t BV(bool a) { return boolvec_t(a); }
+    
+    
+    
+    vector_t v;
+    
+    realvec() {}
+    realvec(realvec const& x): v(x.v) {}
+    realvec& operator=(realvec const& x) { return v=x.v, *this; }
+    //realvec(vector_t x): v(x) {}
+    realvec(real_t a): v(a) {}
+    realvec(real_t const* as): v(as[0]) {}
+    
+    operator vector_t() const { return v; }
+    real_t operator[](int n) const { return v; }
+    realvec& set_elt(int n, real_t a) { return v=a, *this; }
+    
+    
+    
+    intvec_t as_int() const { return FP::as_int(v); }
+    intvec_t convert_int() const { return MF::vml_convert_int(v); }
+    
+    
+    
+    realvec operator+() const { return +v; }
+    realvec operator-() const { return -v; }
+    
+    realvec operator+(realvec x) const { return v+x.v; }
+    realvec operator-(realvec x) const { return v-x.v; }
+    realvec operator*(realvec x) const { return v*x.v; }
+    realvec operator/(realvec x) const { return v/x.v; }
+    
+    realvec& operator+=(realvec const& x) { return *this=*this+x; }
+    realvec& operator-=(realvec const& x) { return *this=*this-x; }
+    realvec& operator*=(realvec const& x) { return *this=*this*x; }
+    realvec& operator/=(realvec const& x) { return *this=*this/x; }
+    
+    real_t prod() const { return v; }
+    real_t sum() const { return v; }
+    
+    
+    
+    boolvec_t operator==(realvec const& x) const { return v==x.v; }
+    boolvec_t operator!=(realvec const& x) const { return v!=x.v; }
+    boolvec_t operator<(realvec const& x) const { return v<x.v; }
+    boolvec_t operator<=(realvec const& x) const { return v<=x.v; }
+    boolvec_t operator>(realvec const& x) const { return v>x.v; }
+    boolvec_t operator>=(realvec const& x) const { return v>=x.v; }
+    
+    
+    
+    realvec acos() const { return MF::vml_acos(*this); }
+    realvec acosh() const { return MF::vml_acosh(*this); }
+    realvec asin() const { return MF::vml_asin(*this); }
+    realvec asinh() const { return MF::vml_asinh(*this); }
+    realvec atan() const { return MF::vml_atan(*this); }
+    realvec atanh() const { return MF::vml_atanh(*this); }
+    realvec ceil() const { return MF::vml_ceil(*this); }
+    realvec copysign(realvec y) const { return MF::vml_copysign(*this, y); }
+    realvec cos() const { return MF::vml_cos(*this); }
+    realvec cosh() const { return MF::vml_cosh(*this); }
+    realvec exp() const { return MF::vml_exp(*this); }
+    realvec exp10() const { return MF::vml_exp10(*this); }
+    realvec exp2() const { return MF::vml_exp2(*this); }
+    realvec expm1() const { return MF::vml_expm1(*this); }
+    realvec fabs() const { return MF::vml_fabs(*this); }
+    realvec floor() const { return MF::vml_floor(*this); }
+    realvec fmod(realvec y) const { return MF::vml_fmod(*this, y); }
+    intvec_t ilogb() const { return MF::vml_ilogb(*this); }
+    realvec log() const { return MF::vml_log(*this); }
+    realvec log10() const { return MF::vml_log10(*this); }
+    realvec log1p() const { return MF::vml_log1p(*this); }
+    realvec log2() const { return MF::vml_log2(*this); }
+    realvec pow(realvec y) const { return MF::vml_pow(*this, y); }
+    realvec rcp() const { return MF::vml_rcp(*this); }
+    realvec remainder(realvec y) const { return MF::vml_remainder(*this, y); }
+    realvec round() const { return MF::vml_round(*this); }
+    realvec rsqrt() const { return MF::vml_rsqrt(*this); }
+    realvec scalbn(intvec_t n) const { return MF::vml_scalbn(*this, n); }
+    boolvec_t signbit() const { return MF::vml_signbit(*this); }
+    realvec sin() const { return MF::vml_sin(*this); }
+    realvec sinh() const { return MF::vml_sinh(*this); }
+    realvec sqrt() const { return MF::vml_sqrt(*this); }
+    realvec tan() const { return MF::vml_tan(*this); }
+    realvec tanh() const { return MF::vml_tanh(*this); }
+  };
+  
+  
+  
+  // boolvec definitions
+  
+  inline
+  auto boolvec<double,1>::as_int() const -> intvec_t
+  {
+    return v;
+  }
+  
+  inline
+  auto boolvec<double,1>::convert_int() const -> intvec_t
+  {
+    return v;
+  }
+  
+  inline
+  auto boolvec<double,1>::ifthen(intvec_t x, intvec_t y) const -> intvec_t
+  {
+    return v ? x.v : y.v;
+  }
+  
+  inline
+  auto boolvec<double,1>::ifthen(realvec_t x, realvec_t y) const -> realvec_t
+  {
+    return v ? x.v : y.v;
+  }
+
+  
+  
+  // intvec definitions
+  
+  inline auto intvec<double,1>::as_float() const -> realvec_t
+  {
+    return FP::as_float(v);
+  }
+  
+  inline auto intvec<double,1>::convert_float() const -> realvec_t
+  {
+    return MF::vml_convert_float(v);
+  }
+  
+} // namespace vecmathlib
+
+#endif  // #ifndef VEC_DOUBLE_H
diff --git a/vecmathlib.h b/vecmathlib.h
index 2264545..b5703bb 100644
--- a/vecmathlib.h
+++ b/vecmathlib.h
@@ -4,6 +4,7 @@
 #define VECMATHLIB_H
 
 #include "vec_float.h"
+#include "vec_double.h"
 #if defined __AVX__             // Intel AVX
 #  include "vec_double_avx.h"
 #endif