summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--mathfuncs_base.h1
-rw-r--r--mathfuncs_fabs.h6
-rw-r--r--test.cc6
-rw-r--r--vec_altivec_float4.h4
-rw-r--r--vec_avx_double4.h4
-rw-r--r--vec_avx_float8.h4
-rw-r--r--vec_base.h8
-rw-r--r--vec_builtin.h12
-rw-r--r--vec_mic_double8.h4
-rw-r--r--vec_neon_float2.h10
-rw-r--r--vec_neon_float4.h9
-rw-r--r--vec_pseudo.h12
-rw-r--r--vec_qpx_double4.h4
-rw-r--r--vec_sse_double1.h4
-rw-r--r--vec_sse_double2.h4
-rw-r--r--vec_sse_float1.h4
-rw-r--r--vec_sse_float4.h4
-rw-r--r--vec_test.h12
-rw-r--r--vec_vsx_double2.h4
19 files changed, 109 insertions, 7 deletions
diff --git a/mathfuncs_base.h b/mathfuncs_base.h
index 14f1051..9cb6eb2 100644
--- a/mathfuncs_base.h
+++ b/mathfuncs_base.h
@@ -88,6 +88,7 @@ namespace vecmathlib {
static boolvec_t vml_isnan(realvec_t x);
static boolvec_t vml_isnormal(realvec_t x);
static realvec_t vml_ldexp(realvec_t x, intvec_t n);
+ static realvec_t vml_mad(realvec_t x, realvec_t y, realvec_t z);
static boolvec_t vml_signbit(realvec_t x);
// exp
diff --git a/mathfuncs_fabs.h b/mathfuncs_fabs.h
index 857694f..baf5f33 100644
--- a/mathfuncs_fabs.h
+++ b/mathfuncs_fabs.h
@@ -170,6 +170,12 @@ namespace vecmathlib {
}
template<typename realvec_t>
+ realvec_t mathfuncs<realvec_t>::vml_mad(realvec_t x, realvec_t y, realvec_t z)
+ {
+ return x * y + z;
+ }
+
+ template<typename realvec_t>
typename realvec_t::boolvec_t mathfuncs<realvec_t>::vml_signbit(realvec_t x)
{
return convert_bool(as_int(x) & IV(FP::signbit_mask));
diff --git a/test.cc b/test.cc
index e8aaefc..f07b572 100644
--- a/test.cc
+++ b/test.cc
@@ -1166,9 +1166,10 @@ struct vecmathlib_test {
return vml_std::ilogb(x);
}
static real_t local_ldexp(real_t x, int_t n) { return ldexp(x, n); }
+ static real_t local_mad(real_t x, real_t y, real_t z) { return x*y+z; }
static void test_fabs()
{
- cout << " testing + - + - * == != < <= > >= copysign fabs fdim fma fmax fmin frexp ilogb isfinite isinf isnan isnormal ldexp nextafter signbit...\n" << flush;
+ cout << " testing + - + - * == != < <= > >= copysign fabs fdim fma fmax fmin frexp ilogb isfinite isinf isnan isnormal ldexp mad nextafter signbit...\n" << flush;
const real_t eps = FP::epsilon();
const real_t int_min = R(std::numeric_limits<int_t>::min());
@@ -1295,6 +1296,9 @@ struct vecmathlib_test {
#endif
check_real<RV,I>("ldexp", local_ldexp, vecmathlib::ldexp, x, n[0], 0.0);
check_real<RV,IV>("ldexp", local_ldexp, vecmathlib::ldexp, x, n, 0.0);
+ check_real<RV,RV,RV>("mad",
+ local_mad, vecmathlib::mad,
+ x, y, z, R(10.0)*accuracy());
check_real<RV,RV>("nextafter",
vml_std::nextafter, vecmathlib::nextafter, x, y, 0.0);
check_bool<RV>("signbit", vml_std::signbit, vecmathlib::signbit, x);
diff --git a/vec_altivec_float4.h b/vec_altivec_float4.h
index 55f3830..14e0308 100644
--- a/vec_altivec_float4.h
+++ b/vec_altivec_float4.h
@@ -529,6 +529,10 @@ namespace vecmathlib {
realvec log10() const { return MF::vml_log10(*this); }
realvec log1p() const { return MF::vml_log1p(*this); }
realvec log2() const { return MF::vml_log2(*this); }
+ realvec_t mad(realvec_t y, realvec_t z) const
+ {
+ return vec_madd(v, y.v, z.v);
+ }
realvec nextafter(realvec y) const { return MF::vml_nextafter(*this, y); }
realvec pow(realvec y) const { return MF::vml_pow(*this, y); }
realvec rcp() const
diff --git a/vec_avx_double4.h b/vec_avx_double4.h
index adc8c48..37fd73b 100644
--- a/vec_avx_double4.h
+++ b/vec_avx_double4.h
@@ -639,6 +639,10 @@ namespace vecmathlib {
realvec_t log10() const { return MF::vml_log10(*this); }
realvec_t log1p() const { return MF::vml_log1p(*this); }
realvec_t log2() const { return MF::vml_log2(*this); }
+ realvec_t mad(realvec_t y, realvec_t z) const
+ {
+ return MF::vml_mad(*this, y, z);
+ }
realvec_t nextafter(realvec_t y) const
{
return MF::vml_nextafter(*this, y);
diff --git a/vec_avx_float8.h b/vec_avx_float8.h
index 4367969..bba77cb 100644
--- a/vec_avx_float8.h
+++ b/vec_avx_float8.h
@@ -637,6 +637,10 @@ namespace vecmathlib {
realvec_t log10() const { return MF::vml_log10(*this); }
realvec_t log1p() const { return MF::vml_log1p(*this); }
realvec_t log2() const { return MF::vml_log2(*this); }
+ realvec_t mad(realvec_t y, realvec_t z) const
+ {
+ return MF::vml_mad(*this, y, z);
+ }
realvec_t nextafter(realvec_t y) const
{
return MF::vml_nextafter(*this, y);
diff --git a/vec_base.h b/vec_base.h
index 3db2e24..702a405 100644
--- a/vec_base.h
+++ b/vec_base.h
@@ -519,6 +519,14 @@ namespace vecmathlib {
}
template<typename real_t, int size>
+ inline realvec<real_t, size> mad(realvec<real_t, size> x,
+ realvec<real_t, size> y,
+ realvec<real_t, size> z)
+ {
+ return x.mad(y, z);
+ }
+
+ template<typename real_t, int size>
inline realvec<real_t, size> nextafter(realvec<real_t, size> x,
realvec<real_t, size> y)
{
diff --git a/vec_builtin.h b/vec_builtin.h
index decab46..a9a87cb 100644
--- a/vec_builtin.h
+++ b/vec_builtin.h
@@ -686,6 +686,10 @@ namespace vecmathlib {
realvec_t log10() const { return map(builtin_log10); }
realvec_t log1p() const { return map(builtin_log1p); }
realvec_t log2() const { return map(builtin_log2); }
+ realvec_t mad(realvec_t y, realvec_t z) const
+ {
+ return MF::vml_mad(*this, y, z);
+ }
realvec_t nextafter(realvec_t y) const { return map(builtin_nextafter, y); }
realvec_t pow(realvec_t y) const { return map(builtin_pow, y); }
realvec_t rcp() const { return RV(1.0) / *this; }
@@ -1294,6 +1298,14 @@ namespace vecmathlib {
}
template<typename real_t, int size>
+ inline realbuiltinvec<real_t, size> mad(realbuiltinvec<real_t, size> x,
+ realbuiltinvec<real_t, size> y,
+ realbuiltinvec<real_t, size> z)
+ {
+ return x.mad(y, z);
+ }
+
+ template<typename real_t, int size>
inline realbuiltinvec<real_t, size> nextafter(realbuiltinvec<real_t, size> x,
realbuiltinvec<real_t, size> y)
{
diff --git a/vec_mic_double8.h b/vec_mic_double8.h
index b4dad21..68dd5aa 100644
--- a/vec_mic_double8.h
+++ b/vec_mic_double8.h
@@ -594,6 +594,10 @@ namespace vecmathlib {
realvec log10() const { return MF::vml_log10(*this); }
realvec log1p() const { return MF::vml_log1p(*this); }
realvec log2() const { return MF::vml_log2(*this); }
+ realvec_t mad(realvec_t y, realvec_t z) const
+ {
+ return _mm512_fmadd_pd(v, x.v, y.v);
+ }
realvec nextafter(realvec y) const { return MF::vml_nextafter(*this, y); }
realvec pow(realvec y) const { return MF::vml_pow(*this, y); }
realvec rcp() const { return _mm512_div_pd(_mm512_set1_pd(1.0), v); }
diff --git a/vec_neon_float2.h b/vec_neon_float2.h
index b5f874f..5b01d7b 100644
--- a/vec_neon_float2.h
+++ b/vec_neon_float2.h
@@ -496,10 +496,9 @@ namespace vecmathlib {
// return vrndm_f32(v);
return MF::vml_floor(*this);
}
- realvec fma(realvec y, realvec z) const
+ realvec_t fma(realvec_t y, realvec_t z) const
{
- // TODO: vfma_f32
- return vmla_f32(z.v, v, y.v);
+ return vfma_f32(z.v, v, y.v);
}
realvec fmax(realvec y) const { return vmax_f32(v, y.v); }
realvec fmin(realvec y) const { return vmin_f32(v, y.v); }
@@ -517,6 +516,11 @@ namespace vecmathlib {
realvec log10() const { return MF::vml_log10(*this); }
realvec log1p() const { return MF::vml_log1p(*this); }
realvec log2() const { return MF::vml_log2(*this); }
+ realvec_t mad(realvec_t y, realvec_t z) const
+ {
+ // TODO: vfma_f32
+ return vmla_f32(z.v, v, y.v);
+ }
realvec nextafter(realvec y) const { return MF::vml_nextafter(*this, y); }
realvec pow(realvec y) const { return MF::vml_pow(*this, y); }
realvec rcp() const
diff --git a/vec_neon_float4.h b/vec_neon_float4.h
index 3944d83..94a77b9 100644
--- a/vec_neon_float4.h
+++ b/vec_neon_float4.h
@@ -517,10 +517,9 @@ namespace vecmathlib {
// return vrndmq_f32(v);
return MF::vml_floor(*this);
}
- realvec fma(realvec y, realvec z) const
+ realvec_t fma(realvec_t y, realvec_t z) const
{
- // TODO: vfmaq_f32
- return vmlaq_f32(z.v, v, y.v);
+ return vfmaq_f32(z.v, v, y.v);
}
realvec fmax(realvec y) const { return vmaxq_f32(v, y.v); }
realvec fmin(realvec y) const { return vminq_f32(v, y.v); }
@@ -538,6 +537,10 @@ namespace vecmathlib {
realvec log10() const { return MF::vml_log10(*this); }
realvec log1p() const { return MF::vml_log1p(*this); }
realvec log2() const { return MF::vml_log2(*this); }
+ realvec_t mad(realvec_t y, realvec_t z) const
+ {
+ return vmlaq_f32(z.v, v, y.v);
+ }
realvec nextafter(realvec y) const { return MF::vml_nextafter(*this, y); }
realvec pow(realvec y) const { return MF::vml_pow(*this, y); }
realvec rcp() const
diff --git a/vec_pseudo.h b/vec_pseudo.h
index ec3bcb2..5f84d3c 100644
--- a/vec_pseudo.h
+++ b/vec_pseudo.h
@@ -887,6 +887,10 @@ namespace vecmathlib {
realvec_t log10() const { return map(vml_std::log10); }
realvec_t log1p() const { return map(vml_std::log1p); }
realvec_t log2() const { return map(vml_std::log2); }
+ realvec_t mad(realvec_t y, realvec_t z) const
+ {
+ return MF::vml_mad(*this, y, z);
+ }
realvec_t nextafter(realvec_t y) const
{
return map(vml_std::nextafter, y);
@@ -1503,6 +1507,14 @@ namespace vecmathlib {
}
template<typename real_t, int size>
+ inline realpseudovec<real_t, size> mad(realpseudovec<real_t, size> x,
+ realpseudovec<real_t, size> y,
+ realpseudovec<real_t, size> z)
+ {
+ return x.mad(y, z);
+ }
+
+ template<typename real_t, int size>
inline realpseudovec<real_t, size> nextafter(realpseudovec<real_t, size> x,
realpseudovec<real_t, size> y)
{
diff --git a/vec_qpx_double4.h b/vec_qpx_double4.h
index b13d607..9fa6bd0 100644
--- a/vec_qpx_double4.h
+++ b/vec_qpx_double4.h
@@ -644,6 +644,10 @@ namespace vecmathlib {
realvec log10() const { return log10d4(v); }
realvec log1p() const { return log1pd4(v); }
realvec log2() const { return log2d4(v); }
+ realvec_t mad(realvec_t y, realvec_t z) const
+ {
+ return MF::vml_mad(*this, y, z);
+ }
realvec nextafter(realvec y) const
{
return MF::vml_nextafter(*this, y);
diff --git a/vec_sse_double1.h b/vec_sse_double1.h
index b993ba7..5558356 100644
--- a/vec_sse_double1.h
+++ b/vec_sse_double1.h
@@ -484,6 +484,10 @@ namespace vecmathlib {
realvec_t log10() const { return MF::vml_log10(*this); }
realvec_t log1p() const { return MF::vml_log1p(*this); }
realvec_t log2() const { return MF::vml_log2(*this); }
+ realvec_t mad(realvec_t y, realvec_t z) const
+ {
+ return MF::vml_mad(*this, y, z);
+ }
realvec_t nextafter(realvec_t y) const
{
return MF::vml_nextafter(*this, y);
diff --git a/vec_sse_double2.h b/vec_sse_double2.h
index 5728ea5..11790c3 100644
--- a/vec_sse_double2.h
+++ b/vec_sse_double2.h
@@ -606,6 +606,10 @@ namespace vecmathlib {
realvec_t log10() const { return MF::vml_log10(*this); }
realvec_t log1p() const { return MF::vml_log1p(*this); }
realvec_t log2() const { return MF::vml_log2(*this); }
+ realvec_t mad(realvec_t y, realvec_t z) const
+ {
+ return MF::vml_mad(*this, y, z);
+ }
realvec_t nextafter(realvec_t y) const
{
return MF::vml_nextafter(*this, y);
diff --git a/vec_sse_float1.h b/vec_sse_float1.h
index f038cd6..9cee891 100644
--- a/vec_sse_float1.h
+++ b/vec_sse_float1.h
@@ -485,6 +485,10 @@ namespace vecmathlib {
realvec_t log10() const { return MF::vml_log10(*this); }
realvec_t log1p() const { return MF::vml_log1p(*this); }
realvec_t log2() const { return MF::vml_log2(*this); }
+ realvec_t mad(realvec_t y, realvec_t z) const
+ {
+ return MF::vml_mad(*this, y, z);
+ }
realvec_t nextafter(realvec_t y) const
{
return MF::vml_nextafter(*this, y);
diff --git a/vec_sse_float4.h b/vec_sse_float4.h
index 75e2421..34ac64f 100644
--- a/vec_sse_float4.h
+++ b/vec_sse_float4.h
@@ -619,6 +619,10 @@ namespace vecmathlib {
realvec_t log10() const { return MF::vml_log10(*this); }
realvec_t log1p() const { return MF::vml_log1p(*this); }
realvec_t log2() const { return MF::vml_log2(*this); }
+ realvec_t mad(realvec_t y, realvec_t z) const
+ {
+ return MF::vml_mad(*this, y, z);
+ }
realvec_t nextafter(realvec_t y) const
{
return MF::vml_nextafter(*this, y);
diff --git a/vec_test.h b/vec_test.h
index d619e0a..93ae551 100644
--- a/vec_test.h
+++ b/vec_test.h
@@ -722,6 +722,10 @@ namespace vecmathlib {
realvec_t log10() const { return MF::vml_log10(*this); }
realvec_t log1p() const { return MF::vml_log1p(*this); }
realvec_t log2() const { return MF::vml_log2(*this); }
+ realvec_t mad(realvec_t y, realvec_t z) const
+ {
+ return MF::vml_mad(*this, y, z);
+ }
realvec_t nextafter(realvec_t y) const
{
return MF::vml_nextafter(*this, y);
@@ -1324,6 +1328,14 @@ namespace vecmathlib {
}
template<typename real_t, int size>
+ inline realtestvec<real_t, size> mad(realtestvec<real_t, size> x,
+ realtestvec<real_t, size> y,
+ realtestvec<real_t, size> z)
+ {
+ return x.mad(y, z);
+ }
+
+ template<typename real_t, int size>
inline realtestvec<real_t, size> nextafter(realtestvec<real_t, size> x,
realtestvec<real_t, size> y)
{
diff --git a/vec_vsx_double2.h b/vec_vsx_double2.h
index 7ee0974..6725859 100644
--- a/vec_vsx_double2.h
+++ b/vec_vsx_double2.h
@@ -568,6 +568,10 @@ namespace vecmathlib {
realvec log10() const { return MF::vml_log10(*this); }
realvec log1p() const { return MF::vml_log1p(*this); }
realvec log2() const { return MF::vml_log2(*this); }
+ realvec_t mad(realvec_t y, realvec_t z) const
+ {
+ return MF::vml_mad(*this, y, z);
+ }
realvec nextafter(realvec y) const { return MF::vml_nextafter(*this, y); }
realvec pow(realvec y) const { return MF::vml_pow(*this, y); }
realvec rcp() const
OpenPOWER on IntegriCloud