diff options
author | Erik Schnetter <schnetter@gmail.com> | 2013-09-10 14:22:35 -0400 |
---|---|---|
committer | Erik Schnetter <schnetter@gmail.com> | 2013-09-10 14:22:35 -0400 |
commit | 80d472d7531d541d953adf94228e0555f38296fe (patch) | |
tree | b6943e23b21cf4f697ed94b15e3ab9b9cbfab39c | |
parent | 05b92eee2c5f5792d148b671e67fc92392e99eee (diff) | |
download | vecmathlib-80d472d7531d541d953adf94228e0555f38296fe.zip vecmathlib-80d472d7531d541d953adf94228e0555f38296fe.tar.gz |
Introduce mad(), as low-accuracy version of fma()
-rw-r--r-- | mathfuncs_base.h | 1 | ||||
-rw-r--r-- | mathfuncs_fabs.h | 6 | ||||
-rw-r--r-- | test.cc | 6 | ||||
-rw-r--r-- | vec_altivec_float4.h | 4 | ||||
-rw-r--r-- | vec_avx_double4.h | 4 | ||||
-rw-r--r-- | vec_avx_float8.h | 4 | ||||
-rw-r--r-- | vec_base.h | 8 | ||||
-rw-r--r-- | vec_builtin.h | 12 | ||||
-rw-r--r-- | vec_mic_double8.h | 4 | ||||
-rw-r--r-- | vec_neon_float2.h | 10 | ||||
-rw-r--r-- | vec_neon_float4.h | 9 | ||||
-rw-r--r-- | vec_pseudo.h | 12 | ||||
-rw-r--r-- | vec_qpx_double4.h | 4 | ||||
-rw-r--r-- | vec_sse_double1.h | 4 | ||||
-rw-r--r-- | vec_sse_double2.h | 4 | ||||
-rw-r--r-- | vec_sse_float1.h | 4 | ||||
-rw-r--r-- | vec_sse_float4.h | 4 | ||||
-rw-r--r-- | vec_test.h | 12 | ||||
-rw-r--r-- | vec_vsx_double2.h | 4 |
19 files changed, 109 insertions, 7 deletions
diff --git a/mathfuncs_base.h b/mathfuncs_base.h index 14f1051..9cb6eb2 100644 --- a/mathfuncs_base.h +++ b/mathfuncs_base.h @@ -88,6 +88,7 @@ namespace vecmathlib { static boolvec_t vml_isnan(realvec_t x); static boolvec_t vml_isnormal(realvec_t x); static realvec_t vml_ldexp(realvec_t x, intvec_t n); + static realvec_t vml_mad(realvec_t x, realvec_t y, realvec_t z); static boolvec_t vml_signbit(realvec_t x); // exp diff --git a/mathfuncs_fabs.h b/mathfuncs_fabs.h index 857694f..baf5f33 100644 --- a/mathfuncs_fabs.h +++ b/mathfuncs_fabs.h @@ -170,6 +170,12 @@ namespace vecmathlib { } template<typename realvec_t> + realvec_t mathfuncs<realvec_t>::vml_mad(realvec_t x, realvec_t y, realvec_t z) + { + return x * y + z; + } + + template<typename realvec_t> typename realvec_t::boolvec_t mathfuncs<realvec_t>::vml_signbit(realvec_t x) { return convert_bool(as_int(x) & IV(FP::signbit_mask)); @@ -1166,9 +1166,10 @@ struct vecmathlib_test { return vml_std::ilogb(x); } static real_t local_ldexp(real_t x, int_t n) { return ldexp(x, n); } + static real_t local_mad(real_t x, real_t y, real_t z) { return x*y+z; } static void test_fabs() { - cout << " testing + - + - * == != < <= > >= copysign fabs fdim fma fmax fmin frexp ilogb isfinite isinf isnan isnormal ldexp nextafter signbit...\n" << flush; + cout << " testing + - + - * == != < <= > >= copysign fabs fdim fma fmax fmin frexp ilogb isfinite isinf isnan isnormal ldexp mad nextafter signbit...\n" << flush; const real_t eps = FP::epsilon(); const real_t int_min = R(std::numeric_limits<int_t>::min()); @@ -1295,6 +1296,9 @@ struct vecmathlib_test { #endif check_real<RV,I>("ldexp", local_ldexp, vecmathlib::ldexp, x, n[0], 0.0); check_real<RV,IV>("ldexp", local_ldexp, vecmathlib::ldexp, x, n, 0.0); + check_real<RV,RV,RV>("mad", + local_mad, vecmathlib::mad, + x, y, z, R(10.0)*accuracy()); check_real<RV,RV>("nextafter", vml_std::nextafter, vecmathlib::nextafter, x, y, 0.0); check_bool<RV>("signbit", vml_std::signbit, vecmathlib::signbit, x); diff --git a/vec_altivec_float4.h b/vec_altivec_float4.h index 55f3830..14e0308 100644 --- a/vec_altivec_float4.h +++ b/vec_altivec_float4.h @@ -529,6 +529,10 @@ namespace vecmathlib { realvec log10() const { return MF::vml_log10(*this); } realvec log1p() const { return MF::vml_log1p(*this); } realvec log2() const { return MF::vml_log2(*this); } + realvec_t mad(realvec_t y, realvec_t z) const + { + return vec_madd(v, y.v, z.v); + } realvec nextafter(realvec y) const { return MF::vml_nextafter(*this, y); } realvec pow(realvec y) const { return MF::vml_pow(*this, y); } realvec rcp() const diff --git a/vec_avx_double4.h b/vec_avx_double4.h index adc8c48..37fd73b 100644 --- a/vec_avx_double4.h +++ b/vec_avx_double4.h @@ -639,6 +639,10 @@ namespace vecmathlib { realvec_t log10() const { return MF::vml_log10(*this); } realvec_t log1p() const { return MF::vml_log1p(*this); } realvec_t log2() const { return MF::vml_log2(*this); } + realvec_t mad(realvec_t y, realvec_t z) const + { + return MF::vml_mad(*this, y, z); + } realvec_t nextafter(realvec_t y) const { return MF::vml_nextafter(*this, y); diff --git a/vec_avx_float8.h b/vec_avx_float8.h index 4367969..bba77cb 100644 --- a/vec_avx_float8.h +++ b/vec_avx_float8.h @@ -637,6 +637,10 @@ namespace vecmathlib { realvec_t log10() const { return MF::vml_log10(*this); } realvec_t log1p() const { return MF::vml_log1p(*this); } realvec_t log2() const { return MF::vml_log2(*this); } + realvec_t mad(realvec_t y, realvec_t z) const + { + return MF::vml_mad(*this, y, z); + } realvec_t nextafter(realvec_t y) const { return MF::vml_nextafter(*this, y); @@ -519,6 +519,14 @@ namespace vecmathlib { } template<typename real_t, int size> + inline realvec<real_t, size> mad(realvec<real_t, size> x, + realvec<real_t, size> y, + realvec<real_t, size> z) + { + return x.mad(y, z); + } + + template<typename real_t, int size> inline realvec<real_t, size> nextafter(realvec<real_t, size> x, realvec<real_t, size> y) { diff --git a/vec_builtin.h b/vec_builtin.h index decab46..a9a87cb 100644 --- a/vec_builtin.h +++ b/vec_builtin.h @@ -686,6 +686,10 @@ namespace vecmathlib { realvec_t log10() const { return map(builtin_log10); } realvec_t log1p() const { return map(builtin_log1p); } realvec_t log2() const { return map(builtin_log2); } + realvec_t mad(realvec_t y, realvec_t z) const + { + return MF::vml_mad(*this, y, z); + } realvec_t nextafter(realvec_t y) const { return map(builtin_nextafter, y); } realvec_t pow(realvec_t y) const { return map(builtin_pow, y); } realvec_t rcp() const { return RV(1.0) / *this; } @@ -1294,6 +1298,14 @@ namespace vecmathlib { } template<typename real_t, int size> + inline realbuiltinvec<real_t, size> mad(realbuiltinvec<real_t, size> x, + realbuiltinvec<real_t, size> y, + realbuiltinvec<real_t, size> z) + { + return x.mad(y, z); + } + + template<typename real_t, int size> inline realbuiltinvec<real_t, size> nextafter(realbuiltinvec<real_t, size> x, realbuiltinvec<real_t, size> y) { diff --git a/vec_mic_double8.h b/vec_mic_double8.h index b4dad21..68dd5aa 100644 --- a/vec_mic_double8.h +++ b/vec_mic_double8.h @@ -594,6 +594,10 @@ namespace vecmathlib { realvec log10() const { return MF::vml_log10(*this); } realvec log1p() const { return MF::vml_log1p(*this); } realvec log2() const { return MF::vml_log2(*this); } + realvec_t mad(realvec_t y, realvec_t z) const + { + return _mm512_fmadd_pd(v, x.v, y.v); + } realvec nextafter(realvec y) const { return MF::vml_nextafter(*this, y); } realvec pow(realvec y) const { return MF::vml_pow(*this, y); } realvec rcp() const { return _mm512_div_pd(_mm512_set1_pd(1.0), v); } diff --git a/vec_neon_float2.h b/vec_neon_float2.h index b5f874f..5b01d7b 100644 --- a/vec_neon_float2.h +++ b/vec_neon_float2.h @@ -496,10 +496,9 @@ namespace vecmathlib { // return vrndm_f32(v); return MF::vml_floor(*this); } - realvec fma(realvec y, realvec z) const + realvec_t fma(realvec_t y, realvec_t z) const { - // TODO: vfma_f32 - return vmla_f32(z.v, v, y.v); + return vfma_f32(z.v, v, y.v); } realvec fmax(realvec y) const { return vmax_f32(v, y.v); } realvec fmin(realvec y) const { return vmin_f32(v, y.v); } @@ -517,6 +516,11 @@ namespace vecmathlib { realvec log10() const { return MF::vml_log10(*this); } realvec log1p() const { return MF::vml_log1p(*this); } realvec log2() const { return MF::vml_log2(*this); } + realvec_t mad(realvec_t y, realvec_t z) const + { + // TODO: vfma_f32 + return vmla_f32(z.v, v, y.v); + } realvec nextafter(realvec y) const { return MF::vml_nextafter(*this, y); } realvec pow(realvec y) const { return MF::vml_pow(*this, y); } realvec rcp() const diff --git a/vec_neon_float4.h b/vec_neon_float4.h index 3944d83..94a77b9 100644 --- a/vec_neon_float4.h +++ b/vec_neon_float4.h @@ -517,10 +517,9 @@ namespace vecmathlib { // return vrndmq_f32(v); return MF::vml_floor(*this); } - realvec fma(realvec y, realvec z) const + realvec_t fma(realvec_t y, realvec_t z) const { - // TODO: vfmaq_f32 - return vmlaq_f32(z.v, v, y.v); + return vfmaq_f32(z.v, v, y.v); } realvec fmax(realvec y) const { return vmaxq_f32(v, y.v); } realvec fmin(realvec y) const { return vminq_f32(v, y.v); } @@ -538,6 +537,10 @@ namespace vecmathlib { realvec log10() const { return MF::vml_log10(*this); } realvec log1p() const { return MF::vml_log1p(*this); } realvec log2() const { return MF::vml_log2(*this); } + realvec_t mad(realvec_t y, realvec_t z) const + { + return vmlaq_f32(z.v, v, y.v); + } realvec nextafter(realvec y) const { return MF::vml_nextafter(*this, y); } realvec pow(realvec y) const { return MF::vml_pow(*this, y); } realvec rcp() const diff --git a/vec_pseudo.h b/vec_pseudo.h index ec3bcb2..5f84d3c 100644 --- a/vec_pseudo.h +++ b/vec_pseudo.h @@ -887,6 +887,10 @@ namespace vecmathlib { realvec_t log10() const { return map(vml_std::log10); } realvec_t log1p() const { return map(vml_std::log1p); } realvec_t log2() const { return map(vml_std::log2); } + realvec_t mad(realvec_t y, realvec_t z) const + { + return MF::vml_mad(*this, y, z); + } realvec_t nextafter(realvec_t y) const { return map(vml_std::nextafter, y); @@ -1503,6 +1507,14 @@ namespace vecmathlib { } template<typename real_t, int size> + inline realpseudovec<real_t, size> mad(realpseudovec<real_t, size> x, + realpseudovec<real_t, size> y, + realpseudovec<real_t, size> z) + { + return x.mad(y, z); + } + + template<typename real_t, int size> inline realpseudovec<real_t, size> nextafter(realpseudovec<real_t, size> x, realpseudovec<real_t, size> y) { diff --git a/vec_qpx_double4.h b/vec_qpx_double4.h index b13d607..9fa6bd0 100644 --- a/vec_qpx_double4.h +++ b/vec_qpx_double4.h @@ -644,6 +644,10 @@ namespace vecmathlib { realvec log10() const { return log10d4(v); } realvec log1p() const { return log1pd4(v); } realvec log2() const { return log2d4(v); } + realvec_t mad(realvec_t y, realvec_t z) const + { + return MF::vml_mad(*this, y, z); + } realvec nextafter(realvec y) const { return MF::vml_nextafter(*this, y); diff --git a/vec_sse_double1.h b/vec_sse_double1.h index b993ba7..5558356 100644 --- a/vec_sse_double1.h +++ b/vec_sse_double1.h @@ -484,6 +484,10 @@ namespace vecmathlib { realvec_t log10() const { return MF::vml_log10(*this); } realvec_t log1p() const { return MF::vml_log1p(*this); } realvec_t log2() const { return MF::vml_log2(*this); } + realvec_t mad(realvec_t y, realvec_t z) const + { + return MF::vml_mad(*this, y, z); + } realvec_t nextafter(realvec_t y) const { return MF::vml_nextafter(*this, y); diff --git a/vec_sse_double2.h b/vec_sse_double2.h index 5728ea5..11790c3 100644 --- a/vec_sse_double2.h +++ b/vec_sse_double2.h @@ -606,6 +606,10 @@ namespace vecmathlib { realvec_t log10() const { return MF::vml_log10(*this); } realvec_t log1p() const { return MF::vml_log1p(*this); } realvec_t log2() const { return MF::vml_log2(*this); } + realvec_t mad(realvec_t y, realvec_t z) const + { + return MF::vml_mad(*this, y, z); + } realvec_t nextafter(realvec_t y) const { return MF::vml_nextafter(*this, y); diff --git a/vec_sse_float1.h b/vec_sse_float1.h index f038cd6..9cee891 100644 --- a/vec_sse_float1.h +++ b/vec_sse_float1.h @@ -485,6 +485,10 @@ namespace vecmathlib { realvec_t log10() const { return MF::vml_log10(*this); } realvec_t log1p() const { return MF::vml_log1p(*this); } realvec_t log2() const { return MF::vml_log2(*this); } + realvec_t mad(realvec_t y, realvec_t z) const + { + return MF::vml_mad(*this, y, z); + } realvec_t nextafter(realvec_t y) const { return MF::vml_nextafter(*this, y); diff --git a/vec_sse_float4.h b/vec_sse_float4.h index 75e2421..34ac64f 100644 --- a/vec_sse_float4.h +++ b/vec_sse_float4.h @@ -619,6 +619,10 @@ namespace vecmathlib { realvec_t log10() const { return MF::vml_log10(*this); } realvec_t log1p() const { return MF::vml_log1p(*this); } realvec_t log2() const { return MF::vml_log2(*this); } + realvec_t mad(realvec_t y, realvec_t z) const + { + return MF::vml_mad(*this, y, z); + } realvec_t nextafter(realvec_t y) const { return MF::vml_nextafter(*this, y); @@ -722,6 +722,10 @@ namespace vecmathlib { realvec_t log10() const { return MF::vml_log10(*this); } realvec_t log1p() const { return MF::vml_log1p(*this); } realvec_t log2() const { return MF::vml_log2(*this); } + realvec_t mad(realvec_t y, realvec_t z) const + { + return MF::vml_mad(*this, y, z); + } realvec_t nextafter(realvec_t y) const { return MF::vml_nextafter(*this, y); @@ -1324,6 +1328,14 @@ namespace vecmathlib { } template<typename real_t, int size> + inline realtestvec<real_t, size> mad(realtestvec<real_t, size> x, + realtestvec<real_t, size> y, + realtestvec<real_t, size> z) + { + return x.mad(y, z); + } + + template<typename real_t, int size> inline realtestvec<real_t, size> nextafter(realtestvec<real_t, size> x, realtestvec<real_t, size> y) { diff --git a/vec_vsx_double2.h b/vec_vsx_double2.h index 7ee0974..6725859 100644 --- a/vec_vsx_double2.h +++ b/vec_vsx_double2.h @@ -568,6 +568,10 @@ namespace vecmathlib { realvec log10() const { return MF::vml_log10(*this); } realvec log1p() const { return MF::vml_log1p(*this); } realvec log2() const { return MF::vml_log2(*this); } + realvec_t mad(realvec_t y, realvec_t z) const + { + return MF::vml_mad(*this, y, z); + } realvec nextafter(realvec y) const { return MF::vml_nextafter(*this, y); } realvec pow(realvec y) const { return MF::vml_pow(*this, y); } realvec rcp() const |