diff options
author | Erik Schnetter <schnetter@gmail.com> | 2013-02-11 13:14:28 -0500 |
---|---|---|
committer | Erik Schnetter <schnetter@gmail.com> | 2013-02-11 13:14:28 -0500 |
commit | ea98bf5fa2bc59b0380a14881dfb7e168c331fc3 (patch) | |
tree | 3cf082fcb412a0b8aa7b443dda581e1b0116ec29 | |
parent | 710ad2c75462b9b77784773c396e9a80b92672fe (diff) | |
parent | a7b829d470c87403ac37cf433e21d99b29b311d8 (diff) | |
download | vecmathlib-ea98bf5fa2bc59b0380a14881dfb7e168c331fc3.zip vecmathlib-ea98bf5fa2bc59b0380a14881dfb7e168c331fc3.tar.gz |
Merged in upcaste/vecmathlib (pull request #2)
Re-generate pull request
-rw-r--r-- | mathfuncs_base.h | 4 | ||||
-rw-r--r-- | mathfuncs_sin.h | 154 | ||||
-rw-r--r-- | vec_double.h | 4 | ||||
-rw-r--r-- | vec_double_avx.h | 4 | ||||
-rw-r--r-- | vec_double_sse2.h | 4 | ||||
-rw-r--r-- | vec_float.h | 4 | ||||
-rw-r--r-- | vec_float_avx.h | 4 | ||||
-rw-r--r-- | vec_float_sse2.h | 4 |
8 files changed, 165 insertions, 17 deletions
diff --git a/mathfuncs_base.h b/mathfuncs_base.h index ea552fc..99ff9fc 100644 --- a/mathfuncs_base.h +++ b/mathfuncs_base.h @@ -94,6 +94,10 @@ namespace vecmathlib { static realvec_t vml_cos(realvec_t x); static realvec_t vml_sin(realvec_t x); static realvec_t vml_tan(realvec_t x); + static realvec_t vml_cos_chebyshev_single(realvec_t x); + static realvec_t vml_cos_chebyshev_double(realvec_t x); + static realvec_t vml_sin_chebyshev_single(realvec_t x); + static realvec_t vml_sin_chebyshev_double(realvec_t x); // sinh static realvec_t vml_cosh(realvec_t x); diff --git a/mathfuncs_sin.h b/mathfuncs_sin.h index 2079603..107e6eb 100644 --- a/mathfuncs_sin.h +++ b/mathfuncs_sin.h @@ -55,13 +55,11 @@ namespace vecmathlib { return r; } - - - + template<typename realvec_t> realvec_t mathfuncs<realvec_t>::vml_cos(realvec_t x) { - return sin(x + RV(M_PI_2)); + return vml_sin(x + RV(M_PI_2)); } template<typename realvec_t> @@ -92,7 +90,153 @@ namespace vecmathlib { return r; } - + + template<typename realvec_t> + realvec_t mathfuncs<realvec_t>::vml_sin_chebyshev_single(realvec_t x) + { + typename realvec_t::scalar_t const eps __attribute__((__unused__)) = 0.000001; + realvec_t r, s, t; + + // Reduce range: sin(x) = sin(x + 2PI) + t = x + RV(M_PI); + t = t * RV(1.0 / (2.0 * M_PI)); + t = floor(t); + t = x - (t * RV(2.0 * M_PI)); + assert(all(t >= RV(-(1.0 + eps) * M_PI) && t <= RV((1.0 + eps) * M_PI))); + + // Reduce range: -sin(x) = -sin(-x) + s = copysign(RV(1.0), t); + t = fabs(t); + + // Reduce range: sin(x) = sin(pi-x) + x = ifthen(t > RV(M_PI_2), RV(M_PI) - t, t); + assert(all(x >= RV(0.0 - eps) && x <= RV(M_PI_2))); + + // Evaluate Chebyshev polynomial expansion r = s*T(x) + // where T(x) = x + c1*x^3 + c2*x^5 + c3*x^7 + ... + t = x * x; + x = x * s; + s = t * x; + r = fma(RV(-2.2636462059494963e-09), t, RV(1.9761453001926614e-08)); + r = fma(r, t, RV(-9.1621870444937360e-08)); + r = fma(r, t, RV(2.8673509297532770e-06)); + r = fma(r, t, RV(-1.9850777498822076e-04)); + r = fma(r, t, RV(8.3333708144611930e-03)); + r = fma(r, t, RV(-1.6666667163372040e-01)); + r = fma(r, s, x); + return r; + } + + template<typename realvec_t> + realvec_t mathfuncs<realvec_t>::vml_sin_chebyshev_double(realvec_t x) + { + typename realvec_t::scalar_t const eps __attribute__((__unused__)) = 0.000001; + realvec_t r, s, t; + + // Reduce range: sin(x) = sin(x + 2PI) + t = x + RV(M_PI); + t = t * RV(1.0 / (2.0 * M_PI)); + t = floor(t); + t = x - (t * RV(2.0 * M_PI)); + assert(all(t >= RV(-(1.0 + eps) * M_PI) && t <= RV((1.0 + eps) * M_PI))); + + // Reduce range: -sin(x) = -sin(-x) + s = copysign(RV(1.0), t); + t = fabs(t); + + // Reduce range: sin(x) = sin(pi-x) + x = ifthen(t > RV(M_PI_2), RV(M_PI) - t, t); + assert(all(x >= RV(0.0 - eps) && x <= RV(M_PI_2))); + + // Evaluate Chebyshev polynomial expansion r = s*T(x) + // where T(x) = x + c1*x^3 + c2*x^5 + c3*x^7 + ... + t = x * x; + x = x * s; + s = t * x; + r = fma(RV(1.1888871779171205e-23), t, RV(-1.6213346583948200e-21)); + r = fma(r, t, RV(9.4674830124704450e-19)); + r = fma(r, t, RV(-3.6586864533554100e-16)); + r = fma(r, t, RV(-7.5815669263036780e-13)); + r = fma(r, t, RV(1.6058175109947732e-10)); + r = fma(r, t, RV(-2.5052101017560582e-08)); + r = fma(r, t, RV(2.7557319185249400e-06)); + r = fma(r, t, RV(-1.9841269841152493e-04)); + r = fma(r, t, RV(8.3333333333331560e-03)); + r = fma(r, t, RV(-1.6666666666666666e-01)); + r = fma(r, s, x); + return r; + } + + template<typename realvec_t> + realvec_t mathfuncs<realvec_t>::vml_cos_chebyshev_single(realvec_t x) + { + typename realvec_t::boolvec_t m; + realvec_t r, s, t; + + // Reduce range: cos(x) = cos(x + 2PI) + t = x + RV(M_PI); + t = t * RV(1.0 / (2.0 * M_PI)); + t = floor(t); + t = x - (t * RV(2.0 * M_PI)); + + // Reduce range: cos(x) = sin(PI/2 - x) + t = fabs(t); + m = t > RV(M_PI_2); + x = RV(M_PI) - t; + s = ifthen(m, RV(-1.0), RV(1.0)); + x = ifthen(m, x, t); + + // Evaluate Chebyshev polynomial s*T(x) where + // T(x) = 1 + c1*x^2 + c2*x^4 + c3*x^6 + ... + t = x * x; + x = t * s; + r = fma(RV(-1.0986536764839979e-11), t, RV(2.0856702467901100e-09)); + r = fma(r, t, RV(-2.7556891974788950e-07)); + r = fma(r, t, RV(2.4801582421938170e-05)); + r = fma(r, t, RV(-1.3888888860984269e-03)); + r = fma(r, t, RV(4.1666666666056330e-02)); + r = fma(r, t, RV(-5.0000000000000000e-01)); + r = fma(r, x, s); + return r; + } + + template<typename realvec_t> + realvec_t mathfuncs<realvec_t>::vml_cos_chebyshev_double(realvec_t x) + { + typename realvec_t::boolvec_t m; + realvec_t r, s, t; + + // Reduce range: cos(x) = cos(x + 2PI) + t = x + RV(M_PI); + t = t * RV(1.0 / (2.0 * M_PI)); + t = floor(t); + t = x - (t * RV(2.0 * M_PI)); + + // Reduce range: cos(x) = sin(PI/2 - x) + t = fabs(t); + m = t > RV(M_PI_2); + x = RV(M_PI) - t; + s = ifthen(m, RV(-1.0), RV(1.0)); + x = ifthen(m, x, t); + + // Evaluate Chebyshev polynomial s*T(x) where + // T(x) = 1 + c1*x^2 + c2*x^4 + c3*x^6 + ... + t = x * x; + x = t * s; + r = fma(RV(-8.6512994843471700e-22), t, RV(4.1086675770914360e-19)); + r = fma(r, t, RV(-1.5619143199049570e-16)); + r = fma(r, t, RV(4.7794771764282040e-14)); + r = fma(r, t, RV(-1.1470745595224050e-11)); + r = fma(r, t, RV(2.0876756987841530e-09)); + r = fma(r, t, RV(-2.7557319223985710e-07)); + r = fma(r, t, RV(2.4801587301587300e-05)); + r = fma(r, t, RV(-1.3888888888888890e-03)); + r = fma(r, t, RV(4.1666666666666664e-02)); + r = fma(r, t, RV(-5.0000000000000000e-01)); + r = fma(r, x, s); + return r; + } + }; // namespace vecmathlib #endif // #ifndef MATHFUNCS_SIN_H diff --git a/vec_double.h b/vec_double.h index 2392756..d19126b 100644 --- a/vec_double.h +++ b/vec_double.h @@ -290,7 +290,7 @@ namespace vecmathlib { realvec atanh() const { return MF::vml_atanh(*this); } realvec ceil() const { return MF::vml_ceil(*this); } realvec copysign(realvec y) const { return MF::vml_copysign(*this, y); } - realvec cos() const { return MF::vml_cos(*this); } + realvec cos() const { return MF::vml_cos_chebyshev_double(*this); } realvec cosh() const { return MF::vml_cosh(*this); } realvec exp() const { return MF::vml_exp(*this); } realvec exp10() const { return MF::vml_exp10(*this); } @@ -319,7 +319,7 @@ namespace vecmathlib { realvec rsqrt() const { return MF::vml_rsqrt(*this); } realvec scalbn(intvec_t n) const { return MF::vml_scalbn(*this, n); } boolvec_t signbit() const { return MF::vml_signbit(*this); } - realvec sin() const { return MF::vml_sin(*this); } + realvec sin() const { return MF::vml_sin_chebyshev_double(*this); } realvec sinh() const { return MF::vml_sinh(*this); } realvec sqrt() const { return MF::vml_sqrt(*this); } realvec tan() const { return MF::vml_tan(*this); } diff --git a/vec_double_avx.h b/vec_double_avx.h index 214959b..5ecb2a5 100644 --- a/vec_double_avx.h +++ b/vec_double_avx.h @@ -598,7 +598,7 @@ namespace vecmathlib { realvec atanh() const { return MF::vml_atanh(*this); } realvec ceil() const { return _mm256_ceil_pd(v); } realvec copysign(realvec y) const { return MF::vml_copysign(*this, y); } - realvec cos() const { return MF::vml_cos(*this); } + realvec cos() const { return MF::vml_cos_chebyshev_double(*this); } realvec cosh() const { return MF::vml_cosh(*this); } realvec exp() const { return MF::vml_exp(*this); } realvec exp10() const { return MF::vml_exp10(*this); } @@ -627,7 +627,7 @@ namespace vecmathlib { realvec rsqrt() const { return MF::vml_rsqrt(*this); } realvec scalbn(intvec_t n) const { return MF::vml_scalbn(*this, n); } boolvec_t signbit() const { return v; } - realvec sin() const { return MF::vml_sin(*this); } + realvec sin() const { return MF::vml_sin_chebyshev_double(*this); } realvec sinh() const { return MF::vml_sinh(*this); } realvec sqrt() const { return _mm256_sqrt_pd(v); } realvec tan() const { return MF::vml_tan(*this); } diff --git a/vec_double_sse2.h b/vec_double_sse2.h index 40c789e..636ef47 100644 --- a/vec_double_sse2.h +++ b/vec_double_sse2.h @@ -496,7 +496,7 @@ namespace vecmathlib { realvec atanh() const { return MF::vml_atanh(*this); } realvec ceil() const { return _mm_ceil_pd(v); } realvec copysign(realvec y) const { return MF::vml_copysign(*this, y); } - realvec cos() const { return MF::vml_cos(*this); } + realvec cos() const { return MF::vml_cos_chebyshev_double(*this); } realvec cosh() const { return MF::vml_cosh(*this); } realvec exp() const { return MF::vml_exp(*this); } realvec exp10() const { return MF::vml_exp10(*this); } @@ -525,7 +525,7 @@ namespace vecmathlib { realvec rsqrt() const { return MF::vml_rsqrt(*this); } realvec scalbn(intvec_t n) const { return MF::vml_scalbn(*this, n); } boolvec_t signbit() const { return v; } - realvec sin() const { return MF::vml_sin(*this); } + realvec sin() const { return MF::vml_sin_chebyshev_double(*this); } realvec sinh() const { return MF::vml_sinh(*this); } realvec sqrt() const { return _mm_sqrt_pd(v); } realvec tan() const { return MF::vml_tan(*this); } diff --git a/vec_float.h b/vec_float.h index 5424b05..08bcd4f 100644 --- a/vec_float.h +++ b/vec_float.h @@ -290,7 +290,7 @@ namespace vecmathlib { realvec atanh() const { return MF::vml_atanh(*this); } realvec ceil() const { return MF::vml_ceil(*this); } realvec copysign(realvec y) const { return MF::vml_copysign(*this, y); } - realvec cos() const { return MF::vml_cos(*this); } + realvec cos() const { return MF::vml_cos_chebyshev_single(*this); } realvec cosh() const { return MF::vml_cosh(*this); } realvec exp() const { return MF::vml_exp(*this); } realvec exp10() const { return MF::vml_exp10(*this); } @@ -319,7 +319,7 @@ namespace vecmathlib { realvec rsqrt() const { return MF::vml_rsqrt(*this); } realvec scalbn(intvec_t n) const { return MF::vml_scalbn(*this, n); } boolvec_t signbit() const { return MF::vml_signbit(*this); } - realvec sin() const { return MF::vml_sin(*this); } + realvec sin() const { return MF::vml_sin_chebyshev_single(*this); } realvec sinh() const { return MF::vml_sinh(*this); } realvec sqrt() const { return MF::vml_sqrt(*this); } realvec tan() const { return MF::vml_tan(*this); } diff --git a/vec_float_avx.h b/vec_float_avx.h index 08cdb08..ae6ab40 100644 --- a/vec_float_avx.h +++ b/vec_float_avx.h @@ -569,7 +569,7 @@ namespace vecmathlib { realvec atanh() const { return MF::vml_atanh(*this); } realvec ceil() const { return _mm256_ceil_ps(v); } realvec copysign(realvec y) const { return MF::vml_copysign(*this, y); } - realvec cos() const { return MF::vml_cos(*this); } + realvec cos() const { return MF::vml_cos_chebyshev_single(*this); } realvec cosh() const { return MF::vml_cosh(*this); } realvec exp() const { return MF::vml_exp(*this); } realvec exp10() const { return MF::vml_exp10(*this); } @@ -610,7 +610,7 @@ namespace vecmathlib { } realvec scalbn(intvec_t n) const { return MF::vml_scalbn(*this, n); } boolvec_t signbit() const { return v; } - realvec sin() const { return MF::vml_sin(*this); } + realvec sin() const { return MF::vml_sin_chebyshev_single(*this); } realvec sinh() const { return MF::vml_sinh(*this); } realvec sqrt() const { return _mm256_sqrt_ps(v); } realvec tan() const { return MF::vml_tan(*this); } diff --git a/vec_float_sse2.h b/vec_float_sse2.h index 38c42b3..c870751 100644 --- a/vec_float_sse2.h +++ b/vec_float_sse2.h @@ -472,7 +472,7 @@ namespace vecmathlib { realvec atanh() const { return MF::vml_atanh(*this); } realvec ceil() const { return _mm_ceil_ps(v); } realvec copysign(realvec y) const { return MF::vml_copysign(*this, y); } - realvec cos() const { return MF::vml_cos(*this); } + realvec cos() const { return MF::vml_cos_chebyshev_single(*this); } realvec cosh() const { return MF::vml_cosh(*this); } realvec exp() const { return MF::vml_exp(*this); } realvec exp10() const { return MF::vml_exp10(*this); } @@ -513,7 +513,7 @@ namespace vecmathlib { } realvec scalbn(intvec_t n) const { return MF::vml_scalbn(*this, n); } boolvec_t signbit() const { return v; } - realvec sin() const { return MF::vml_sin(*this); } + realvec sin() const { return MF::vml_sin_chebyshev_single(*this); } realvec sinh() const { return MF::vml_sinh(*this); } realvec sqrt() const { return _mm_sqrt_ps(v); } realvec tan() const { return MF::vml_tan(*this); } |