8 files changed, 165 insertions, 17 deletions
diff --git a/mathfuncs_base.h b/mathfuncs_base.h
index ea552fc..99ff9fc 100644
--- a/mathfuncs_base.h
+++ b/mathfuncs_base.h
@@ -94,6 +94,10 @@ namespace vecmathlib {
     static realvec_t vml_cos(realvec_t x);
     static realvec_t vml_sin(realvec_t x);
     static realvec_t vml_tan(realvec_t x);
+    static realvec_t vml_cos_chebyshev_single(realvec_t x);
+    static realvec_t vml_cos_chebyshev_double(realvec_t x);
+    static realvec_t vml_sin_chebyshev_single(realvec_t x);
+    static realvec_t vml_sin_chebyshev_double(realvec_t x);
     
     // sinh
     static realvec_t vml_cosh(realvec_t x);
diff --git a/mathfuncs_sin.h b/mathfuncs_sin.h
index 2079603..107e6eb 100644
--- a/mathfuncs_sin.h
+++ b/mathfuncs_sin.h
@@ -55,13 +55,11 @@ namespace vecmathlib {
     
     return r;
   }
-  
-  
-  
+
   template<typename realvec_t>
   realvec_t mathfuncs<realvec_t>::vml_cos(realvec_t x)
   {
-    return sin(x + RV(M_PI_2));
+    return vml_sin(x + RV(M_PI_2));
   }
   
   template<typename realvec_t>
@@ -92,7 +90,153 @@ namespace vecmathlib {
     
     return r;
   }
-  
+
+  template<typename realvec_t>
+  realvec_t mathfuncs<realvec_t>::vml_sin_chebyshev_single(realvec_t x)
+  {
+    typename realvec_t::scalar_t const eps __attribute__((__unused__)) = 0.000001;
+    realvec_t r, s, t;
+
+    // Reduce range: sin(x) = sin(x + 2PI)
+    t = x + RV(M_PI);
+    t = t * RV(1.0 / (2.0 * M_PI));
+    t = floor(t);
+    t = x - (t * RV(2.0 * M_PI));
+    assert(all(t >= RV(-(1.0 + eps) * M_PI) && t <= RV((1.0 + eps) * M_PI)));
+
+    // Reduce range: -sin(x) = -sin(-x)
+    s = copysign(RV(1.0), t);
+    t = fabs(t);
+
+    // Reduce range: sin(x) = sin(pi-x)
+    x = ifthen(t > RV(M_PI_2), RV(M_PI) - t, t);
+    assert(all(x >= RV(0.0 - eps) && x <= RV(M_PI_2)));
+
+    // Evaluate Chebyshev polynomial expansion r = s*T(x)
+    // where T(x) = x + c1*x^3 + c2*x^5 + c3*x^7 + ...
+    t = x * x;
+    x = x * s;
+    s = t * x;
+    r = fma(RV(-2.2636462059494963e-09), t, RV(1.9761453001926614e-08));
+    r = fma(r, t, RV(-9.1621870444937360e-08));
+    r = fma(r, t, RV(2.8673509297532770e-06));
+    r = fma(r, t, RV(-1.9850777498822076e-04));
+    r = fma(r, t, RV(8.3333708144611930e-03));
+    r = fma(r, t, RV(-1.6666667163372040e-01));
+    r = fma(r, s, x);
+    return r;
+  }
+
+  template<typename realvec_t>
+  realvec_t mathfuncs<realvec_t>::vml_sin_chebyshev_double(realvec_t x)
+  {
+    typename realvec_t::scalar_t const eps __attribute__((__unused__)) = 0.000001;
+    realvec_t r, s, t;
+
+    // Reduce range: sin(x) = sin(x + 2PI)
+    t = x + RV(M_PI);
+    t = t * RV(1.0 / (2.0 * M_PI));
+    t = floor(t);
+    t = x - (t * RV(2.0 * M_PI));
+    assert(all(t >= RV(-(1.0 + eps) * M_PI) && t <= RV((1.0 + eps) * M_PI)));
+
+    // Reduce range: -sin(x) = -sin(-x)
+    s = copysign(RV(1.0), t);
+    t = fabs(t);
+
+    // Reduce range: sin(x) = sin(pi-x)
+    x = ifthen(t > RV(M_PI_2), RV(M_PI) - t, t);
+    assert(all(x >= RV(0.0 - eps) && x <= RV(M_PI_2)));
+
+    // Evaluate Chebyshev polynomial expansion r = s*T(x)
+    // where T(x) = x + c1*x^3 + c2*x^5 + c3*x^7 + ...
+    t = x * x;
+    x = x * s;
+    s = t * x;
+    r = fma(RV(1.1888871779171205e-23), t, RV(-1.6213346583948200e-21));
+    r = fma(r, t, RV(9.4674830124704450e-19));
+    r = fma(r, t, RV(-3.6586864533554100e-16));
+    r = fma(r, t, RV(-7.5815669263036780e-13));
+    r = fma(r, t, RV(1.6058175109947732e-10));
+    r = fma(r, t, RV(-2.5052101017560582e-08));
+    r = fma(r, t, RV(2.7557319185249400e-06));
+    r = fma(r, t, RV(-1.9841269841152493e-04));
+    r = fma(r, t, RV(8.3333333333331560e-03));
+    r = fma(r, t, RV(-1.6666666666666666e-01));
+    r = fma(r, s, x);
+    return r;
+  }
+
+  template<typename realvec_t>
+  realvec_t mathfuncs<realvec_t>::vml_cos_chebyshev_single(realvec_t x)
+  {
+    typename realvec_t::boolvec_t m;
+    realvec_t r, s, t;
+
+    // Reduce range: cos(x) = cos(x + 2PI)
+    t = x + RV(M_PI);
+    t = t * RV(1.0 / (2.0 * M_PI));
+    t = floor(t);
+    t = x - (t * RV(2.0 * M_PI));
+
+    // Reduce range: cos(x) = sin(PI/2 - x)
+    t = fabs(t);
+    m = t > RV(M_PI_2);
+    x = RV(M_PI) - t;
+    s = ifthen(m, RV(-1.0), RV(1.0));
+    x = ifthen(m, x, t);
+
+    // Evaluate Chebyshev polynomial s*T(x) where
+    // T(x) = 1 + c1*x^2 + c2*x^4 + c3*x^6 + ...
+    t = x * x;
+    x = t * s;
+    r = fma(RV(-1.0986536764839979e-11), t, RV(2.0856702467901100e-09));
+    r = fma(r, t, RV(-2.7556891974788950e-07));
+    r = fma(r, t, RV(2.4801582421938170e-05));
+    r = fma(r, t, RV(-1.3888888860984269e-03));
+    r = fma(r, t, RV(4.1666666666056330e-02));
+    r = fma(r, t, RV(-5.0000000000000000e-01));
+    r = fma(r, x, s);
+    return r;
+  }
+
+  template<typename realvec_t>
+  realvec_t mathfuncs<realvec_t>::vml_cos_chebyshev_double(realvec_t x)
+  {
+    typename realvec_t::boolvec_t m;
+    realvec_t r, s, t;
+
+    // Reduce range: cos(x) = cos(x + 2PI)
+    t = x + RV(M_PI);
+    t = t * RV(1.0 / (2.0 * M_PI));
+    t = floor(t);
+    t = x - (t * RV(2.0 * M_PI));
+
+    // Reduce range: cos(x) = sin(PI/2 - x)
+    t = fabs(t);
+    m = t > RV(M_PI_2);
+    x = RV(M_PI) - t;
+    s = ifthen(m, RV(-1.0), RV(1.0));
+    x = ifthen(m, x, t);
+
+    // Evaluate Chebyshev polynomial s*T(x) where
+    // T(x) = 1 + c1*x^2 + c2*x^4 + c3*x^6 + ...
+    t = x * x;
+    x = t * s;
+    r = fma(RV(-8.6512994843471700e-22), t, RV(4.1086675770914360e-19));
+    r = fma(r, t, RV(-1.5619143199049570e-16));
+    r = fma(r, t, RV(4.7794771764282040e-14));
+    r = fma(r, t, RV(-1.1470745595224050e-11));
+    r = fma(r, t, RV(2.0876756987841530e-09));
+    r = fma(r, t, RV(-2.7557319223985710e-07));
+    r = fma(r, t, RV(2.4801587301587300e-05));
+    r = fma(r, t, RV(-1.3888888888888890e-03));
+    r = fma(r, t, RV(4.1666666666666664e-02));
+    r = fma(r, t, RV(-5.0000000000000000e-01));
+    r = fma(r, x, s);
+    return r;
+  }
+
 }; // namespace vecmathlib
 
 #endif  // #ifndef MATHFUNCS_SIN_H
diff --git a/vec_double.h b/vec_double.h
index 2392756..d19126b 100644
--- a/vec_double.h
+++ b/vec_double.h
@@ -290,7 +290,7 @@ namespace vecmathlib {
     realvec atanh() const { return MF::vml_atanh(*this); }
     realvec ceil() const { return MF::vml_ceil(*this); }
     realvec copysign(realvec y) const { return MF::vml_copysign(*this, y); }
-    realvec cos() const { return MF::vml_cos(*this); }
+    realvec cos() const { return MF::vml_cos_chebyshev_double(*this); }
     realvec cosh() const { return MF::vml_cosh(*this); }
     realvec exp() const { return MF::vml_exp(*this); }
     realvec exp10() const { return MF::vml_exp10(*this); }
@@ -319,7 +319,7 @@ namespace vecmathlib {
     realvec rsqrt() const { return MF::vml_rsqrt(*this); }
     realvec scalbn(intvec_t n) const { return MF::vml_scalbn(*this, n); }
     boolvec_t signbit() const { return MF::vml_signbit(*this); }
-    realvec sin() const { return MF::vml_sin(*this); }
+    realvec sin() const { return MF::vml_sin_chebyshev_double(*this); }
     realvec sinh() const { return MF::vml_sinh(*this); }
     realvec sqrt() const { return MF::vml_sqrt(*this); }
     realvec tan() const { return MF::vml_tan(*this); }
diff --git a/vec_double_avx.h b/vec_double_avx.h
index 214959b..5ecb2a5 100644
--- a/vec_double_avx.h
+++ b/vec_double_avx.h
@@ -598,7 +598,7 @@ namespace vecmathlib {
     realvec atanh() const { return MF::vml_atanh(*this); }
     realvec ceil() const { return _mm256_ceil_pd(v); }
     realvec copysign(realvec y) const { return MF::vml_copysign(*this, y); }
-    realvec cos() const { return MF::vml_cos(*this); }
+    realvec cos() const { return MF::vml_cos_chebyshev_double(*this); }
     realvec cosh() const { return MF::vml_cosh(*this); }
     realvec exp() const { return MF::vml_exp(*this); }
     realvec exp10() const { return MF::vml_exp10(*this); }
@@ -627,7 +627,7 @@ namespace vecmathlib {
     realvec rsqrt() const { return MF::vml_rsqrt(*this); }
     realvec scalbn(intvec_t n) const { return MF::vml_scalbn(*this, n); }
     boolvec_t signbit() const { return v; }
-    realvec sin() const { return MF::vml_sin(*this); }
+    realvec sin() const { return MF::vml_sin_chebyshev_double(*this); }
     realvec sinh() const { return MF::vml_sinh(*this); }
     realvec sqrt() const { return _mm256_sqrt_pd(v); }
     realvec tan() const { return MF::vml_tan(*this); }
diff --git a/vec_double_sse2.h b/vec_double_sse2.h
index 40c789e..636ef47 100644
--- a/vec_double_sse2.h
+++ b/vec_double_sse2.h
@@ -496,7 +496,7 @@ namespace vecmathlib {
     realvec atanh() const { return MF::vml_atanh(*this); }
     realvec ceil() const { return _mm_ceil_pd(v); }
     realvec copysign(realvec y) const { return MF::vml_copysign(*this, y); }
-    realvec cos() const { return MF::vml_cos(*this); }
+    realvec cos() const { return MF::vml_cos_chebyshev_double(*this); }
     realvec cosh() const { return MF::vml_cosh(*this); }
     realvec exp() const { return MF::vml_exp(*this); }
     realvec exp10() const { return MF::vml_exp10(*this); }
@@ -525,7 +525,7 @@ namespace vecmathlib {
     realvec rsqrt() const { return MF::vml_rsqrt(*this); }
     realvec scalbn(intvec_t n) const { return MF::vml_scalbn(*this, n); }
     boolvec_t signbit() const { return v; }
-    realvec sin() const { return MF::vml_sin(*this); }
+    realvec sin() const { return MF::vml_sin_chebyshev_double(*this); }
     realvec sinh() const { return MF::vml_sinh(*this); }
     realvec sqrt() const { return _mm_sqrt_pd(v); }
     realvec tan() const { return MF::vml_tan(*this); }
diff --git a/vec_float.h b/vec_float.h
index 5424b05..08bcd4f 100644
--- a/vec_float.h
+++ b/vec_float.h
@@ -290,7 +290,7 @@ namespace vecmathlib {
     realvec atanh() const { return MF::vml_atanh(*this); }
     realvec ceil() const { return MF::vml_ceil(*this); }
     realvec copysign(realvec y) const { return MF::vml_copysign(*this, y); }
-    realvec cos() const { return MF::vml_cos(*this); }
+    realvec cos() const { return MF::vml_cos_chebyshev_single(*this); }
     realvec cosh() const { return MF::vml_cosh(*this); }
     realvec exp() const { return MF::vml_exp(*this); }
     realvec exp10() const { return MF::vml_exp10(*this); }
@@ -319,7 +319,7 @@ namespace vecmathlib {
     realvec rsqrt() const { return MF::vml_rsqrt(*this); }
     realvec scalbn(intvec_t n) const { return MF::vml_scalbn(*this, n); }
     boolvec_t signbit() const { return MF::vml_signbit(*this); }
-    realvec sin() const { return MF::vml_sin(*this); }
+    realvec sin() const { return MF::vml_sin_chebyshev_single(*this); }
     realvec sinh() const { return MF::vml_sinh(*this); }
     realvec sqrt() const { return MF::vml_sqrt(*this); }
     realvec tan() const { return MF::vml_tan(*this); }
diff --git a/vec_float_avx.h b/vec_float_avx.h
index 08cdb08..ae6ab40 100644
--- a/vec_float_avx.h
+++ b/vec_float_avx.h
@@ -569,7 +569,7 @@ namespace vecmathlib {
     realvec atanh() const { return MF::vml_atanh(*this); }
     realvec ceil() const { return _mm256_ceil_ps(v); }
     realvec copysign(realvec y) const { return MF::vml_copysign(*this, y); }
-    realvec cos() const { return MF::vml_cos(*this); }
+    realvec cos() const { return MF::vml_cos_chebyshev_single(*this); }
     realvec cosh() const { return MF::vml_cosh(*this); }
     realvec exp() const { return MF::vml_exp(*this); }
     realvec exp10() const { return MF::vml_exp10(*this); }
@@ -610,7 +610,7 @@ namespace vecmathlib {
     }
     realvec scalbn(intvec_t n) const { return MF::vml_scalbn(*this, n); }
     boolvec_t signbit() const { return v; }
-    realvec sin() const { return MF::vml_sin(*this); }
+    realvec sin() const { return MF::vml_sin_chebyshev_single(*this); }
     realvec sinh() const { return MF::vml_sinh(*this); }
     realvec sqrt() const { return _mm256_sqrt_ps(v); }
     realvec tan() const { return MF::vml_tan(*this); }
diff --git a/vec_float_sse2.h b/vec_float_sse2.h
index 38c42b3..c870751 100644
--- a/vec_float_sse2.h
+++ b/vec_float_sse2.h
@@ -472,7 +472,7 @@ namespace vecmathlib {
     realvec atanh() const { return MF::vml_atanh(*this); }
     realvec ceil() const { return _mm_ceil_ps(v); }
     realvec copysign(realvec y) const { return MF::vml_copysign(*this, y); }
-    realvec cos() const { return MF::vml_cos(*this); }
+    realvec cos() const { return MF::vml_cos_chebyshev_single(*this); }
     realvec cosh() const { return MF::vml_cosh(*this); }
     realvec exp() const { return MF::vml_exp(*this); }
     realvec exp10() const { return MF::vml_exp10(*this); }
@@ -513,7 +513,7 @@ namespace vecmathlib {
     }
     realvec scalbn(intvec_t n) const { return MF::vml_scalbn(*this, n); }
     boolvec_t signbit() const { return v; }
-    realvec sin() const { return MF::vml_sin(*this); }
+    realvec sin() const { return MF::vml_sin_chebyshev_single(*this); }
     realvec sinh() const { return MF::vml_sinh(*this); }
     realvec sqrt() const { return _mm_sqrt_ps(v); }
     realvec tan() const { return MF::vml_tan(*this); }