summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--mathfuncs_base.h4
-rw-r--r--mathfuncs_sin.h154
-rw-r--r--vec_double.h4
-rw-r--r--vec_double_avx.h4
-rw-r--r--vec_double_sse2.h4
-rw-r--r--vec_float.h4
-rw-r--r--vec_float_avx.h4
-rw-r--r--vec_float_sse2.h4
8 files changed, 165 insertions, 17 deletions
diff --git a/mathfuncs_base.h b/mathfuncs_base.h
index ea552fc..99ff9fc 100644
--- a/mathfuncs_base.h
+++ b/mathfuncs_base.h
@@ -94,6 +94,10 @@ namespace vecmathlib {
static realvec_t vml_cos(realvec_t x);
static realvec_t vml_sin(realvec_t x);
static realvec_t vml_tan(realvec_t x);
+ static realvec_t vml_cos_chebyshev_single(realvec_t x);
+ static realvec_t vml_cos_chebyshev_double(realvec_t x);
+ static realvec_t vml_sin_chebyshev_single(realvec_t x);
+ static realvec_t vml_sin_chebyshev_double(realvec_t x);
// sinh
static realvec_t vml_cosh(realvec_t x);
diff --git a/mathfuncs_sin.h b/mathfuncs_sin.h
index 2079603..107e6eb 100644
--- a/mathfuncs_sin.h
+++ b/mathfuncs_sin.h
@@ -55,13 +55,11 @@ namespace vecmathlib {
return r;
}
-
-
-
+
template<typename realvec_t>
realvec_t mathfuncs<realvec_t>::vml_cos(realvec_t x)
{
- return sin(x + RV(M_PI_2));
+ return vml_sin(x + RV(M_PI_2));
}
template<typename realvec_t>
@@ -92,7 +90,153 @@ namespace vecmathlib {
return r;
}
-
+
+ template<typename realvec_t>
+ realvec_t mathfuncs<realvec_t>::vml_sin_chebyshev_single(realvec_t x)
+ {
+ typename realvec_t::scalar_t const eps __attribute__((__unused__)) = 0.000001;
+ realvec_t r, s, t;
+
+ // Reduce range: sin(x) = sin(x + 2PI)
+ t = x + RV(M_PI);
+ t = t * RV(1.0 / (2.0 * M_PI));
+ t = floor(t);
+ t = x - (t * RV(2.0 * M_PI));
+ assert(all(t >= RV(-(1.0 + eps) * M_PI) && t <= RV((1.0 + eps) * M_PI)));
+
+ // Reduce range: -sin(x) = -sin(-x)
+ s = copysign(RV(1.0), t);
+ t = fabs(t);
+
+ // Reduce range: sin(x) = sin(pi-x)
+ x = ifthen(t > RV(M_PI_2), RV(M_PI) - t, t);
+ assert(all(x >= RV(0.0 - eps) && x <= RV(M_PI_2)));
+
+ // Evaluate Chebyshev polynomial expansion r = s*T(x)
+ // where T(x) = x + c1*x^3 + c2*x^5 + c3*x^7 + ...
+ t = x * x;
+ x = x * s;
+ s = t * x;
+ r = fma(RV(-2.2636462059494963e-09), t, RV(1.9761453001926614e-08));
+ r = fma(r, t, RV(-9.1621870444937360e-08));
+ r = fma(r, t, RV(2.8673509297532770e-06));
+ r = fma(r, t, RV(-1.9850777498822076e-04));
+ r = fma(r, t, RV(8.3333708144611930e-03));
+ r = fma(r, t, RV(-1.6666667163372040e-01));
+ r = fma(r, s, x);
+ return r;
+ }
+
+ template<typename realvec_t>
+ realvec_t mathfuncs<realvec_t>::vml_sin_chebyshev_double(realvec_t x)
+ {
+ typename realvec_t::scalar_t const eps __attribute__((__unused__)) = 0.000001;
+ realvec_t r, s, t;
+
+ // Reduce range: sin(x) = sin(x + 2PI)
+ t = x + RV(M_PI);
+ t = t * RV(1.0 / (2.0 * M_PI));
+ t = floor(t);
+ t = x - (t * RV(2.0 * M_PI));
+ assert(all(t >= RV(-(1.0 + eps) * M_PI) && t <= RV((1.0 + eps) * M_PI)));
+
+ // Reduce range: -sin(x) = -sin(-x)
+ s = copysign(RV(1.0), t);
+ t = fabs(t);
+
+ // Reduce range: sin(x) = sin(pi-x)
+ x = ifthen(t > RV(M_PI_2), RV(M_PI) - t, t);
+ assert(all(x >= RV(0.0 - eps) && x <= RV(M_PI_2)));
+
+ // Evaluate Chebyshev polynomial expansion r = s*T(x)
+ // where T(x) = x + c1*x^3 + c2*x^5 + c3*x^7 + ...
+ t = x * x;
+ x = x * s;
+ s = t * x;
+ r = fma(RV(1.1888871779171205e-23), t, RV(-1.6213346583948200e-21));
+ r = fma(r, t, RV(9.4674830124704450e-19));
+ r = fma(r, t, RV(-3.6586864533554100e-16));
+ r = fma(r, t, RV(-7.5815669263036780e-13));
+ r = fma(r, t, RV(1.6058175109947732e-10));
+ r = fma(r, t, RV(-2.5052101017560582e-08));
+ r = fma(r, t, RV(2.7557319185249400e-06));
+ r = fma(r, t, RV(-1.9841269841152493e-04));
+ r = fma(r, t, RV(8.3333333333331560e-03));
+ r = fma(r, t, RV(-1.6666666666666666e-01));
+ r = fma(r, s, x);
+ return r;
+ }
+
+ template<typename realvec_t>
+ realvec_t mathfuncs<realvec_t>::vml_cos_chebyshev_single(realvec_t x)
+ {
+ typename realvec_t::boolvec_t m;
+ realvec_t r, s, t;
+
+ // Reduce range: cos(x) = cos(x + 2PI)
+ t = x + RV(M_PI);
+ t = t * RV(1.0 / (2.0 * M_PI));
+ t = floor(t);
+ t = x - (t * RV(2.0 * M_PI));
+
+ // Reduce range: cos(x) = sin(PI/2 - x)
+ t = fabs(t);
+ m = t > RV(M_PI_2);
+ x = RV(M_PI) - t;
+ s = ifthen(m, RV(-1.0), RV(1.0));
+ x = ifthen(m, x, t);
+
+ // Evaluate Chebyshev polynomial s*T(x) where
+ // T(x) = 1 + c1*x^2 + c2*x^4 + c3*x^6 + ...
+ t = x * x;
+ x = t * s;
+ r = fma(RV(-1.0986536764839979e-11), t, RV(2.0856702467901100e-09));
+ r = fma(r, t, RV(-2.7556891974788950e-07));
+ r = fma(r, t, RV(2.4801582421938170e-05));
+ r = fma(r, t, RV(-1.3888888860984269e-03));
+ r = fma(r, t, RV(4.1666666666056330e-02));
+ r = fma(r, t, RV(-5.0000000000000000e-01));
+ r = fma(r, x, s);
+ return r;
+ }
+
+ template<typename realvec_t>
+ realvec_t mathfuncs<realvec_t>::vml_cos_chebyshev_double(realvec_t x)
+ {
+ typename realvec_t::boolvec_t m;
+ realvec_t r, s, t;
+
+ // Reduce range: cos(x) = cos(x + 2PI)
+ t = x + RV(M_PI);
+ t = t * RV(1.0 / (2.0 * M_PI));
+ t = floor(t);
+ t = x - (t * RV(2.0 * M_PI));
+
+ // Reduce range: cos(x) = sin(PI/2 - x)
+ t = fabs(t);
+ m = t > RV(M_PI_2);
+ x = RV(M_PI) - t;
+ s = ifthen(m, RV(-1.0), RV(1.0));
+ x = ifthen(m, x, t);
+
+ // Evaluate Chebyshev polynomial s*T(x) where
+ // T(x) = 1 + c1*x^2 + c2*x^4 + c3*x^6 + ...
+ t = x * x;
+ x = t * s;
+ r = fma(RV(-8.6512994843471700e-22), t, RV(4.1086675770914360e-19));
+ r = fma(r, t, RV(-1.5619143199049570e-16));
+ r = fma(r, t, RV(4.7794771764282040e-14));
+ r = fma(r, t, RV(-1.1470745595224050e-11));
+ r = fma(r, t, RV(2.0876756987841530e-09));
+ r = fma(r, t, RV(-2.7557319223985710e-07));
+ r = fma(r, t, RV(2.4801587301587300e-05));
+ r = fma(r, t, RV(-1.3888888888888890e-03));
+ r = fma(r, t, RV(4.1666666666666664e-02));
+ r = fma(r, t, RV(-5.0000000000000000e-01));
+ r = fma(r, x, s);
+ return r;
+ }
+
}; // namespace vecmathlib
#endif // #ifndef MATHFUNCS_SIN_H
diff --git a/vec_double.h b/vec_double.h
index 2392756..d19126b 100644
--- a/vec_double.h
+++ b/vec_double.h
@@ -290,7 +290,7 @@ namespace vecmathlib {
realvec atanh() const { return MF::vml_atanh(*this); }
realvec ceil() const { return MF::vml_ceil(*this); }
realvec copysign(realvec y) const { return MF::vml_copysign(*this, y); }
- realvec cos() const { return MF::vml_cos(*this); }
+ realvec cos() const { return MF::vml_cos_chebyshev_double(*this); }
realvec cosh() const { return MF::vml_cosh(*this); }
realvec exp() const { return MF::vml_exp(*this); }
realvec exp10() const { return MF::vml_exp10(*this); }
@@ -319,7 +319,7 @@ namespace vecmathlib {
realvec rsqrt() const { return MF::vml_rsqrt(*this); }
realvec scalbn(intvec_t n) const { return MF::vml_scalbn(*this, n); }
boolvec_t signbit() const { return MF::vml_signbit(*this); }
- realvec sin() const { return MF::vml_sin(*this); }
+ realvec sin() const { return MF::vml_sin_chebyshev_double(*this); }
realvec sinh() const { return MF::vml_sinh(*this); }
realvec sqrt() const { return MF::vml_sqrt(*this); }
realvec tan() const { return MF::vml_tan(*this); }
diff --git a/vec_double_avx.h b/vec_double_avx.h
index 214959b..5ecb2a5 100644
--- a/vec_double_avx.h
+++ b/vec_double_avx.h
@@ -598,7 +598,7 @@ namespace vecmathlib {
realvec atanh() const { return MF::vml_atanh(*this); }
realvec ceil() const { return _mm256_ceil_pd(v); }
realvec copysign(realvec y) const { return MF::vml_copysign(*this, y); }
- realvec cos() const { return MF::vml_cos(*this); }
+ realvec cos() const { return MF::vml_cos_chebyshev_double(*this); }
realvec cosh() const { return MF::vml_cosh(*this); }
realvec exp() const { return MF::vml_exp(*this); }
realvec exp10() const { return MF::vml_exp10(*this); }
@@ -627,7 +627,7 @@ namespace vecmathlib {
realvec rsqrt() const { return MF::vml_rsqrt(*this); }
realvec scalbn(intvec_t n) const { return MF::vml_scalbn(*this, n); }
boolvec_t signbit() const { return v; }
- realvec sin() const { return MF::vml_sin(*this); }
+ realvec sin() const { return MF::vml_sin_chebyshev_double(*this); }
realvec sinh() const { return MF::vml_sinh(*this); }
realvec sqrt() const { return _mm256_sqrt_pd(v); }
realvec tan() const { return MF::vml_tan(*this); }
diff --git a/vec_double_sse2.h b/vec_double_sse2.h
index 40c789e..636ef47 100644
--- a/vec_double_sse2.h
+++ b/vec_double_sse2.h
@@ -496,7 +496,7 @@ namespace vecmathlib {
realvec atanh() const { return MF::vml_atanh(*this); }
realvec ceil() const { return _mm_ceil_pd(v); }
realvec copysign(realvec y) const { return MF::vml_copysign(*this, y); }
- realvec cos() const { return MF::vml_cos(*this); }
+ realvec cos() const { return MF::vml_cos_chebyshev_double(*this); }
realvec cosh() const { return MF::vml_cosh(*this); }
realvec exp() const { return MF::vml_exp(*this); }
realvec exp10() const { return MF::vml_exp10(*this); }
@@ -525,7 +525,7 @@ namespace vecmathlib {
realvec rsqrt() const { return MF::vml_rsqrt(*this); }
realvec scalbn(intvec_t n) const { return MF::vml_scalbn(*this, n); }
boolvec_t signbit() const { return v; }
- realvec sin() const { return MF::vml_sin(*this); }
+ realvec sin() const { return MF::vml_sin_chebyshev_double(*this); }
realvec sinh() const { return MF::vml_sinh(*this); }
realvec sqrt() const { return _mm_sqrt_pd(v); }
realvec tan() const { return MF::vml_tan(*this); }
diff --git a/vec_float.h b/vec_float.h
index 5424b05..08bcd4f 100644
--- a/vec_float.h
+++ b/vec_float.h
@@ -290,7 +290,7 @@ namespace vecmathlib {
realvec atanh() const { return MF::vml_atanh(*this); }
realvec ceil() const { return MF::vml_ceil(*this); }
realvec copysign(realvec y) const { return MF::vml_copysign(*this, y); }
- realvec cos() const { return MF::vml_cos(*this); }
+ realvec cos() const { return MF::vml_cos_chebyshev_single(*this); }
realvec cosh() const { return MF::vml_cosh(*this); }
realvec exp() const { return MF::vml_exp(*this); }
realvec exp10() const { return MF::vml_exp10(*this); }
@@ -319,7 +319,7 @@ namespace vecmathlib {
realvec rsqrt() const { return MF::vml_rsqrt(*this); }
realvec scalbn(intvec_t n) const { return MF::vml_scalbn(*this, n); }
boolvec_t signbit() const { return MF::vml_signbit(*this); }
- realvec sin() const { return MF::vml_sin(*this); }
+ realvec sin() const { return MF::vml_sin_chebyshev_single(*this); }
realvec sinh() const { return MF::vml_sinh(*this); }
realvec sqrt() const { return MF::vml_sqrt(*this); }
realvec tan() const { return MF::vml_tan(*this); }
diff --git a/vec_float_avx.h b/vec_float_avx.h
index 08cdb08..ae6ab40 100644
--- a/vec_float_avx.h
+++ b/vec_float_avx.h
@@ -569,7 +569,7 @@ namespace vecmathlib {
realvec atanh() const { return MF::vml_atanh(*this); }
realvec ceil() const { return _mm256_ceil_ps(v); }
realvec copysign(realvec y) const { return MF::vml_copysign(*this, y); }
- realvec cos() const { return MF::vml_cos(*this); }
+ realvec cos() const { return MF::vml_cos_chebyshev_single(*this); }
realvec cosh() const { return MF::vml_cosh(*this); }
realvec exp() const { return MF::vml_exp(*this); }
realvec exp10() const { return MF::vml_exp10(*this); }
@@ -610,7 +610,7 @@ namespace vecmathlib {
}
realvec scalbn(intvec_t n) const { return MF::vml_scalbn(*this, n); }
boolvec_t signbit() const { return v; }
- realvec sin() const { return MF::vml_sin(*this); }
+ realvec sin() const { return MF::vml_sin_chebyshev_single(*this); }
realvec sinh() const { return MF::vml_sinh(*this); }
realvec sqrt() const { return _mm256_sqrt_ps(v); }
realvec tan() const { return MF::vml_tan(*this); }
diff --git a/vec_float_sse2.h b/vec_float_sse2.h
index 38c42b3..c870751 100644
--- a/vec_float_sse2.h
+++ b/vec_float_sse2.h
@@ -472,7 +472,7 @@ namespace vecmathlib {
realvec atanh() const { return MF::vml_atanh(*this); }
realvec ceil() const { return _mm_ceil_ps(v); }
realvec copysign(realvec y) const { return MF::vml_copysign(*this, y); }
- realvec cos() const { return MF::vml_cos(*this); }
+ realvec cos() const { return MF::vml_cos_chebyshev_single(*this); }
realvec cosh() const { return MF::vml_cosh(*this); }
realvec exp() const { return MF::vml_exp(*this); }
realvec exp10() const { return MF::vml_exp10(*this); }
@@ -513,7 +513,7 @@ namespace vecmathlib {
}
realvec scalbn(intvec_t n) const { return MF::vml_scalbn(*this, n); }
boolvec_t signbit() const { return v; }
- realvec sin() const { return MF::vml_sin(*this); }
+ realvec sin() const { return MF::vml_sin_chebyshev_single(*this); }
realvec sinh() const { return MF::vml_sinh(*this); }
realvec sqrt() const { return _mm_sqrt_ps(v); }
realvec tan() const { return MF::vml_tan(*this); }
OpenPOWER on IntegriCloud