19 files changed, 109 insertions, 7 deletions
diff --git a/mathfuncs_base.h b/mathfuncs_base.h
index 14f1051..9cb6eb2 100644
--- a/mathfuncs_base.h
+++ b/mathfuncs_base.h
@@ -88,6 +88,7 @@ namespace vecmathlib {
     static boolvec_t vml_isnan(realvec_t x);
     static boolvec_t vml_isnormal(realvec_t x);
     static realvec_t vml_ldexp(realvec_t x, intvec_t n);
+    static realvec_t vml_mad(realvec_t x, realvec_t y, realvec_t z);
     static boolvec_t vml_signbit(realvec_t x);
     
     // exp
diff --git a/mathfuncs_fabs.h b/mathfuncs_fabs.h
index 857694f..baf5f33 100644
--- a/mathfuncs_fabs.h
+++ b/mathfuncs_fabs.h
@@ -170,6 +170,12 @@ namespace vecmathlib {
   }
   
   template<typename realvec_t>
+  realvec_t mathfuncs<realvec_t>::vml_mad(realvec_t x, realvec_t y, realvec_t z)
+  {
+    return x * y + z;
+  }
+  
+  template<typename realvec_t>
   typename realvec_t::boolvec_t mathfuncs<realvec_t>::vml_signbit(realvec_t x)
   {
     return convert_bool(as_int(x) & IV(FP::signbit_mask));
diff --git a/test.cc b/test.cc
index e8aaefc..f07b572 100644
--- a/test.cc
+++ b/test.cc
@@ -1166,9 +1166,10 @@ struct vecmathlib_test {
     return vml_std::ilogb(x);
   }
   static real_t local_ldexp(real_t x, int_t n) { return ldexp(x, n); }
+  static real_t local_mad(real_t x, real_t y, real_t z) { return x*y+z; }
   static void test_fabs()
   {
-    cout << "   testing + - + - * == != < <= > >= copysign fabs fdim fma fmax fmin frexp ilogb isfinite isinf isnan isnormal ldexp nextafter signbit...\n" << flush;
+    cout << "   testing + - + - * == != < <= > >= copysign fabs fdim fma fmax fmin frexp ilogb isfinite isinf isnan isnormal ldexp mad nextafter signbit...\n" << flush;
     
     const real_t eps = FP::epsilon();
     const real_t int_min = R(std::numeric_limits<int_t>::min());
@@ -1295,6 +1296,9 @@ struct vecmathlib_test {
 #endif
       check_real<RV,I>("ldexp", local_ldexp, vecmathlib::ldexp, x, n[0], 0.0);
       check_real<RV,IV>("ldexp", local_ldexp, vecmathlib::ldexp, x, n, 0.0);
+      check_real<RV,RV,RV>("mad",
+                           local_mad, vecmathlib::mad,
+                           x, y, z, R(10.0)*accuracy());
       check_real<RV,RV>("nextafter",
                         vml_std::nextafter, vecmathlib::nextafter, x, y, 0.0);
       check_bool<RV>("signbit", vml_std::signbit, vecmathlib::signbit, x);
diff --git a/vec_altivec_float4.h b/vec_altivec_float4.h
index 55f3830..14e0308 100644
--- a/vec_altivec_float4.h
+++ b/vec_altivec_float4.h
@@ -529,6 +529,10 @@ namespace vecmathlib {
     realvec log10() const { return MF::vml_log10(*this); }
     realvec log1p() const { return MF::vml_log1p(*this); }
     realvec log2() const { return MF::vml_log2(*this); }
+    realvec_t mad(realvec_t y, realvec_t z) const
+    {
+      return vec_madd(v, y.v, z.v);
+    }
     realvec nextafter(realvec y) const { return MF::vml_nextafter(*this, y); }
     realvec pow(realvec y) const { return MF::vml_pow(*this, y); }
     realvec rcp() const
diff --git a/vec_avx_double4.h b/vec_avx_double4.h
index adc8c48..37fd73b 100644
--- a/vec_avx_double4.h
+++ b/vec_avx_double4.h
@@ -639,6 +639,10 @@ namespace vecmathlib {
     realvec_t log10() const { return MF::vml_log10(*this); }
     realvec_t log1p() const { return MF::vml_log1p(*this); }
     realvec_t log2() const { return MF::vml_log2(*this); }
+    realvec_t mad(realvec_t y, realvec_t z) const
+    {
+      return MF::vml_mad(*this, y, z);
+    }
     realvec_t nextafter(realvec_t y) const
     {
       return MF::vml_nextafter(*this, y);
diff --git a/vec_avx_float8.h b/vec_avx_float8.h
index 4367969..bba77cb 100644
--- a/vec_avx_float8.h
+++ b/vec_avx_float8.h
@@ -637,6 +637,10 @@ namespace vecmathlib {
     realvec_t log10() const { return MF::vml_log10(*this); }
     realvec_t log1p() const { return MF::vml_log1p(*this); }
     realvec_t log2() const { return MF::vml_log2(*this); }
+    realvec_t mad(realvec_t y, realvec_t z) const
+    {
+      return MF::vml_mad(*this, y, z);
+    }
     realvec_t nextafter(realvec_t y) const
     {
       return MF::vml_nextafter(*this, y);
diff --git a/vec_base.h b/vec_base.h
index 3db2e24..702a405 100644
--- a/vec_base.h
+++ b/vec_base.h
@@ -519,6 +519,14 @@ namespace vecmathlib {
   }
   
   template<typename real_t, int size>
+  inline realvec<real_t, size> mad(realvec<real_t, size> x,
+                                   realvec<real_t, size> y,
+                                   realvec<real_t, size> z)
+  {
+    return x.mad(y, z);
+  }
+  
+  template<typename real_t, int size>
   inline realvec<real_t, size> nextafter(realvec<real_t, size> x,
                                          realvec<real_t, size> y)
   {
diff --git a/vec_builtin.h b/vec_builtin.h
index decab46..a9a87cb 100644
--- a/vec_builtin.h
+++ b/vec_builtin.h
@@ -686,6 +686,10 @@ namespace vecmathlib {
     realvec_t log10() const { return map(builtin_log10); }
     realvec_t log1p() const { return map(builtin_log1p); }
     realvec_t log2() const { return map(builtin_log2); }
+    realvec_t mad(realvec_t y, realvec_t z) const
+    {
+      return MF::vml_mad(*this, y, z);
+    }
     realvec_t nextafter(realvec_t y) const { return map(builtin_nextafter, y); }
     realvec_t pow(realvec_t y) const { return map(builtin_pow, y); }
     realvec_t rcp() const { return RV(1.0) / *this; }
@@ -1294,6 +1298,14 @@ namespace vecmathlib {
   }
   
   template<typename real_t, int size>
+  inline realbuiltinvec<real_t, size> mad(realbuiltinvec<real_t, size> x,
+                                          realbuiltinvec<real_t, size> y,
+                                          realbuiltinvec<real_t, size> z)
+  {
+    return x.mad(y, z);
+  }
+  
+  template<typename real_t, int size>
   inline realbuiltinvec<real_t, size> nextafter(realbuiltinvec<real_t, size> x,
                                                 realbuiltinvec<real_t, size> y)
   {
diff --git a/vec_mic_double8.h b/vec_mic_double8.h
index b4dad21..68dd5aa 100644
--- a/vec_mic_double8.h
+++ b/vec_mic_double8.h
@@ -594,6 +594,10 @@ namespace vecmathlib {
     realvec log10() const { return MF::vml_log10(*this); }
     realvec log1p() const { return MF::vml_log1p(*this); }
     realvec log2() const { return MF::vml_log2(*this); }
+    realvec_t mad(realvec_t y, realvec_t z) const
+    {
+      return _mm512_fmadd_pd(v, x.v, y.v);
+    }
     realvec nextafter(realvec y) const { return MF::vml_nextafter(*this, y); }
     realvec pow(realvec y) const { return MF::vml_pow(*this, y); }
     realvec rcp() const { return _mm512_div_pd(_mm512_set1_pd(1.0), v); }
diff --git a/vec_neon_float2.h b/vec_neon_float2.h
index b5f874f..5b01d7b 100644
--- a/vec_neon_float2.h
+++ b/vec_neon_float2.h
@@ -496,10 +496,9 @@ namespace vecmathlib {
       // return vrndm_f32(v);
       return MF::vml_floor(*this);
     }
-    realvec fma(realvec y, realvec z) const
+    realvec_t fma(realvec_t y, realvec_t z) const
     {
-      // TODO: vfma_f32
-      return vmla_f32(z.v, v, y.v);
+      return vfma_f32(z.v, v, y.v);
     }
     realvec fmax(realvec y) const { return vmax_f32(v, y.v); }
     realvec fmin(realvec y) const { return vmin_f32(v, y.v); }
@@ -517,6 +516,11 @@ namespace vecmathlib {
     realvec log10() const { return MF::vml_log10(*this); }
     realvec log1p() const { return MF::vml_log1p(*this); }
     realvec log2() const { return MF::vml_log2(*this); }
+    realvec_t mad(realvec_t y, realvec_t z) const
+    {
+      // TODO: vfma_f32
+      return vmla_f32(z.v, v, y.v);
+    }
     realvec nextafter(realvec y) const { return MF::vml_nextafter(*this, y); }
     realvec pow(realvec y) const { return MF::vml_pow(*this, y); }
     realvec rcp() const
diff --git a/vec_neon_float4.h b/vec_neon_float4.h
index 3944d83..94a77b9 100644
--- a/vec_neon_float4.h
+++ b/vec_neon_float4.h
@@ -517,10 +517,9 @@ namespace vecmathlib {
       // return vrndmq_f32(v);
       return MF::vml_floor(*this);
     }
-    realvec fma(realvec y, realvec z) const
+    realvec_t fma(realvec_t y, realvec_t z) const
     {
-      // TODO: vfmaq_f32
-      return vmlaq_f32(z.v, v, y.v);
+      return vfmaq_f32(z.v, v, y.v);
     }
     realvec fmax(realvec y) const { return vmaxq_f32(v, y.v); }
     realvec fmin(realvec y) const { return vminq_f32(v, y.v); }
@@ -538,6 +537,10 @@ namespace vecmathlib {
     realvec log10() const { return MF::vml_log10(*this); }
     realvec log1p() const { return MF::vml_log1p(*this); }
     realvec log2() const { return MF::vml_log2(*this); }
+    realvec_t mad(realvec_t y, realvec_t z) const
+    {
+      return vmlaq_f32(z.v, v, y.v);
+    }
     realvec nextafter(realvec y) const { return MF::vml_nextafter(*this, y); }
     realvec pow(realvec y) const { return MF::vml_pow(*this, y); }
     realvec rcp() const
diff --git a/vec_pseudo.h b/vec_pseudo.h
index ec3bcb2..5f84d3c 100644
--- a/vec_pseudo.h
+++ b/vec_pseudo.h
@@ -887,6 +887,10 @@ namespace vecmathlib {
     realvec_t log10() const { return map(vml_std::log10); }
     realvec_t log1p() const { return map(vml_std::log1p); }
     realvec_t log2() const { return map(vml_std::log2); }
+    realvec_t mad(realvec_t y, realvec_t z) const
+    {
+      return MF::vml_mad(*this, y, z);
+    }
     realvec_t nextafter(realvec_t y) const
     {
       return map(vml_std::nextafter, y);
@@ -1503,6 +1507,14 @@ namespace vecmathlib {
   }
   
   template<typename real_t, int size>
+  inline realpseudovec<real_t, size> mad(realpseudovec<real_t, size> x,
+                                         realpseudovec<real_t, size> y,
+                                         realpseudovec<real_t, size> z)
+  {
+    return x.mad(y, z);
+  }
+  
+  template<typename real_t, int size>
   inline realpseudovec<real_t, size> nextafter(realpseudovec<real_t, size> x,
                                                realpseudovec<real_t, size> y)
   {
diff --git a/vec_qpx_double4.h b/vec_qpx_double4.h
index b13d607..9fa6bd0 100644
--- a/vec_qpx_double4.h
+++ b/vec_qpx_double4.h
@@ -644,6 +644,10 @@ namespace vecmathlib {
     realvec log10() const { return log10d4(v); }
     realvec log1p() const { return log1pd4(v); }
     realvec log2() const { return log2d4(v); }
+    realvec_t mad(realvec_t y, realvec_t z) const
+    {
+      return MF::vml_mad(*this, y, z);
+    }
     realvec nextafter(realvec y) const
     {
       return MF::vml_nextafter(*this, y);
diff --git a/vec_sse_double1.h b/vec_sse_double1.h
index b993ba7..5558356 100644
--- a/vec_sse_double1.h
+++ b/vec_sse_double1.h
@@ -484,6 +484,10 @@ namespace vecmathlib {
     realvec_t log10() const { return MF::vml_log10(*this); }
     realvec_t log1p() const { return MF::vml_log1p(*this); }
     realvec_t log2() const { return MF::vml_log2(*this); }
+    realvec_t mad(realvec_t y, realvec_t z) const
+    {
+      return MF::vml_mad(*this, y, z);
+    }
     realvec_t nextafter(realvec_t y) const
     {
       return MF::vml_nextafter(*this, y);
diff --git a/vec_sse_double2.h b/vec_sse_double2.h
index 5728ea5..11790c3 100644
--- a/vec_sse_double2.h
+++ b/vec_sse_double2.h
@@ -606,6 +606,10 @@ namespace vecmathlib {
     realvec_t log10() const { return MF::vml_log10(*this); }
     realvec_t log1p() const { return MF::vml_log1p(*this); }
     realvec_t log2() const { return MF::vml_log2(*this); }
+    realvec_t mad(realvec_t y, realvec_t z) const
+    {
+      return MF::vml_mad(*this, y, z);
+    }
     realvec_t nextafter(realvec_t y) const
     {
       return MF::vml_nextafter(*this, y);
diff --git a/vec_sse_float1.h b/vec_sse_float1.h
index f038cd6..9cee891 100644
--- a/vec_sse_float1.h
+++ b/vec_sse_float1.h
@@ -485,6 +485,10 @@ namespace vecmathlib {
     realvec_t log10() const { return MF::vml_log10(*this); }
     realvec_t log1p() const { return MF::vml_log1p(*this); }
     realvec_t log2() const { return MF::vml_log2(*this); }
+    realvec_t mad(realvec_t y, realvec_t z) const
+    {
+      return MF::vml_mad(*this, y, z);
+    }
     realvec_t nextafter(realvec_t y) const
     {
       return MF::vml_nextafter(*this, y);
diff --git a/vec_sse_float4.h b/vec_sse_float4.h
index 75e2421..34ac64f 100644
--- a/vec_sse_float4.h
+++ b/vec_sse_float4.h
@@ -619,6 +619,10 @@ namespace vecmathlib {
     realvec_t log10() const { return MF::vml_log10(*this); }
     realvec_t log1p() const { return MF::vml_log1p(*this); }
     realvec_t log2() const { return MF::vml_log2(*this); }
+    realvec_t mad(realvec_t y, realvec_t z) const
+    {
+      return MF::vml_mad(*this, y, z);
+    }
     realvec_t nextafter(realvec_t y) const
     {
       return MF::vml_nextafter(*this, y);
diff --git a/vec_test.h b/vec_test.h
index d619e0a..93ae551 100644
--- a/vec_test.h
+++ b/vec_test.h
@@ -722,6 +722,10 @@ namespace vecmathlib {
     realvec_t log10() const { return MF::vml_log10(*this); }
     realvec_t log1p() const { return MF::vml_log1p(*this); }
     realvec_t log2() const { return MF::vml_log2(*this); }
+    realvec_t mad(realvec_t y, realvec_t z) const
+    {
+      return MF::vml_mad(*this, y, z);
+    }
     realvec_t nextafter(realvec_t y) const
     {
       return MF::vml_nextafter(*this, y);
@@ -1324,6 +1328,14 @@ namespace vecmathlib {
   }
   
   template<typename real_t, int size>
+  inline realtestvec<real_t, size> mad(realtestvec<real_t, size> x,
+                                       realtestvec<real_t, size> y,
+                                       realtestvec<real_t, size> z)
+  {
+    return x.mad(y, z);
+  }
+  
+  template<typename real_t, int size>
   inline realtestvec<real_t, size> nextafter(realtestvec<real_t, size> x,
                                              realtestvec<real_t, size> y)
   {
diff --git a/vec_vsx_double2.h b/vec_vsx_double2.h
index 7ee0974..6725859 100644
--- a/vec_vsx_double2.h
+++ b/vec_vsx_double2.h
@@ -568,6 +568,10 @@ namespace vecmathlib {
     realvec log10() const { return MF::vml_log10(*this); }
     realvec log1p() const { return MF::vml_log1p(*this); }
     realvec log2() const { return MF::vml_log2(*this); }
+    realvec_t mad(realvec_t y, realvec_t z) const
+    {
+      return MF::vml_mad(*this, y, z);
+    }
     realvec nextafter(realvec y) const { return MF::vml_nextafter(*this, y); }
     realvec pow(realvec y) const { return MF::vml_pow(*this, y); }
     realvec rcp() const