From 078cc157a086a0f6b47813044ee16e9a842bce28 Mon Sep 17 00:00:00 2001
From: Erik Schnetter <schnetter@gmail.com>
Date: Thu, 14 Feb 2013 16:51:53 -0500
Subject: Fold Chebyshev versions of sin and cos back into the main versions

---
 mathfuncs_base.h  |   4 -
 mathfuncs_sin.h   | 233 +++++++++---------------------------------------------
 vec_double_avx.h  |   4 +-
 vec_double_sse2.h |   4 +-
 vec_float_avx.h   |   4 +-
 vec_float_sse2.h  |   4 +-
 6 files changed, 46 insertions(+), 207 deletions(-)
diff --git a/mathfuncs_base.h b/mathfuncs_base.h
index 8c5c99a..30f8321 100644
--- a/mathfuncs_base.h
+++ b/mathfuncs_base.h
@@ -94,10 +94,6 @@ namespace vecmathlib {
     static realvec_t vml_cos(realvec_t x);
     static realvec_t vml_sin(realvec_t x);
     static realvec_t vml_tan(realvec_t x);
-    static realvec_t vml_cos_chebyshev_single(realvec_t x);
-    static realvec_t vml_cos_chebyshev_double(realvec_t x);
-    static realvec_t vml_sin_chebyshev_single(realvec_t x);
-    static realvec_t vml_sin_chebyshev_double(realvec_t x);
     
     // sinh
     static realvec_t vml_cosh(realvec_t x);
diff --git a/mathfuncs_sin.h b/mathfuncs_sin.h
index 107e6eb..9e1b900 100644
--- a/mathfuncs_sin.h
+++ b/mathfuncs_sin.h
@@ -15,40 +15,52 @@ namespace vecmathlib {
   template<typename realvec_t>
   realvec_t mathfuncs<realvec_t>::vml_sin(realvec_t x)
   {
-    typename realvec_t::scalar_t eps __attribute__((__unused__)) = 0.000001;
+    // Rescale input
+    x *= RV(1.0/(2.0*M_PI));
     
     // Reduce range: sin(x) = sin(x + 2pi)
-    x = remainder(x, RV(2.0*M_PI));
-    assert(all(x >= RV(-(1.0+eps)*M_PI) && x <= RV((1.0+eps)*M_PI)));
+    x -= round(x);
+    VML_ASSERT(all(x >= RV(-0.5) && x <= RV(+0.5)));
     
     // Reduce range: sin(x) = -sin(-x)
     realvec_t sign = x;
     x = fabs(x);
-    assert(all(x >= RV(0.0) && x <= RV((1.0+eps)*M_PI)));
+    VML_ASSERT(all(x >= RV(0.0) && x <= RV(0.5)));
     
-    // Reduce range: sin(x) = sin(pi-x)
-    x = ifthen(x > RV(M_PI_2), RV(M_PI) - x, x);
-    assert(all(x >= RV(0.0-eps) && x <= RV(M_PI_2)));
+    // Reduce range: sin(x) = sin(pi - x)
+    x = fmin(x, RV(0.5)-x);
+    VML_ASSERT(all(x >= RV(0.0) && x <= RV(0.25)));
     
-    // Reduce range: cos(x) = sin(pi/2 - x)
-    boolvec_t use_cos = x > RV(0.5*M_PI_2);
-    x = ifthen(use_cos, RV(M_PI_2) - x, x);
-    
-    // Taylor expansion
-    // sin(x) = Sum[n=0,nmax,n%2!=0] (-1)^((n-1)/2) x^n / n!
-    // cos(x) = Sum[n=0,nmax,n%2==0] (-1)^((n-1)/2) x^n / n!
-    realvec_t noffset = ifthen(use_cos, RV(-1.0), RV(0.0));
-    int const nmax = 15;
+    // Polynomial expansion
     realvec_t x2 = x*x;
-    realvec_t y = ifthen(use_cos, RV(1.0), x);
-    realvec_t r = y;
-    for (int n=3; n<nmax; n+=2) {
-      // sin: (n-1)*n
-      // cos: n*(n+1)
-      realvec_t rn = RV(FP::convert_float(n)) + noffset;
-      y *= - x2 * rcp((rn-RV(1.0))*rn);
-      r += y;
+    realvec_t r;
+    switch (sizeof(real_t)) {
+    case 4:
+      // float, error=9.63199952895688527467513447362e-9
+      r = RV(+39.6507114184650539767708004425);
+      r = fma(r, x2, RV(-76.564420291497946510082570076));
+      r = fma(r, x2, RV(+81.601622084783653475948476293)); 
+      r = fma(r, x2, RV(-41.3416645219970165527584702233));
+      r = fma(r, x2, RV(+6.2831851984651420764055506572));
+      break;
+    case 8:
+      // double, error=5.7727879332805926019333251099475986171199401686760527068393e-19
+      x*=RV(4.0);
+      x2*=RV(16.0);
+      r = RV(+5.86762142331525639276611151945042252894600631928465887664953969158e-12);
+      r = fma(r, x2, RV(-6.6841794237726945077103730471119313610778831290978967922327437499159e-10));
+      r = fma(r, x2, RV(+5.69213209691935763964803333647156625719770522961368e-8));
+      r = fma(r, x2, RV(-3.598842978568308467678745880795603458651065024316994e-6));
+      r = fma(r, x2, RV(+0.000160441184690020871131343239138998965356942754919962001));
+      r = fma(r, x2, RV(-0.0046817541352970526059161105189653385731022565875740295038));
+      r = fma(r, x2, RV(+0.0796926262461644478160657529010052899007494340748685273131));
+      r = fma(r, x2, RV(-0.6459640975062461124233071963502627016003287601029162984167));
+      r = fma(r, x2, RV(+1.5707963267948966169881546168049607518586729643258729850326));
+      break;
+    default:
+      __builtin_unreachable();
     }
+    r *= x;
     
     // Undo range reduction
     r = copysign(r, sign);
@@ -65,178 +77,9 @@ namespace vecmathlib {
   template<typename realvec_t>
   realvec_t mathfuncs<realvec_t>::vml_tan(realvec_t x)
   {
-    // Reduce range: tan(x) = tan(x + pi)
-    x = remainder(x, RV(M_PI));
-    assert(all(x >= RV(-M_PI_2) && x <= RV(M_PI_2)));
-    
-    // Reduce range: tan(x) = -tan(-x)
-    realvec_t sign = x;
-    x = fabs(x);
-    assert(all(x >= RV(0.0) && x <= RV(M_PI_2)));
-    
-    // Reduce range: cot(x) = -tan(x + pi/2)
-    boolvec_t use_cot = x > RV(0.5 * M_PI_2);
-    x = ifthen(use_cot, RV(M_PI_2) -x, x);
-    assert(all(x >= RV(0.0) && x <= RV(0.5 * M_PI_2)));
-    
-    // Calculate tan
-    realvec_t r = sin(x) / cos(x);
-    
-    // Undo range reduction
-    r = ifthen(use_cot, -rcp(r), r);
-    
-    // Undo range reducion
-    r = copysign(r, sign);
-    
-    return r;
-  }
-
-  template<typename realvec_t>
-  realvec_t mathfuncs<realvec_t>::vml_sin_chebyshev_single(realvec_t x)
-  {
-    typename realvec_t::scalar_t const eps __attribute__((__unused__)) = 0.000001;
-    realvec_t r, s, t;
-
-    // Reduce range: sin(x) = sin(x + 2PI)
-    t = x + RV(M_PI);
-    t = t * RV(1.0 / (2.0 * M_PI));
-    t = floor(t);
-    t = x - (t * RV(2.0 * M_PI));
-    assert(all(t >= RV(-(1.0 + eps) * M_PI) && t <= RV((1.0 + eps) * M_PI)));
-
-    // Reduce range: -sin(x) = -sin(-x)
-    s = copysign(RV(1.0), t);
-    t = fabs(t);
-
-    // Reduce range: sin(x) = sin(pi-x)
-    x = ifthen(t > RV(M_PI_2), RV(M_PI) - t, t);
-    assert(all(x >= RV(0.0 - eps) && x <= RV(M_PI_2)));
-
-    // Evaluate Chebyshev polynomial expansion r = s*T(x)
-    // where T(x) = x + c1*x^3 + c2*x^5 + c3*x^7 + ...
-    t = x * x;
-    x = x * s;
-    s = t * x;
-    r = fma(RV(-2.2636462059494963e-09), t, RV(1.9761453001926614e-08));
-    r = fma(r, t, RV(-9.1621870444937360e-08));
-    r = fma(r, t, RV(2.8673509297532770e-06));
-    r = fma(r, t, RV(-1.9850777498822076e-04));
-    r = fma(r, t, RV(8.3333708144611930e-03));
-    r = fma(r, t, RV(-1.6666667163372040e-01));
-    r = fma(r, s, x);
-    return r;
-  }
-
-  template<typename realvec_t>
-  realvec_t mathfuncs<realvec_t>::vml_sin_chebyshev_double(realvec_t x)
-  {
-    typename realvec_t::scalar_t const eps __attribute__((__unused__)) = 0.000001;
-    realvec_t r, s, t;
-
-    // Reduce range: sin(x) = sin(x + 2PI)
-    t = x + RV(M_PI);
-    t = t * RV(1.0 / (2.0 * M_PI));
-    t = floor(t);
-    t = x - (t * RV(2.0 * M_PI));
-    assert(all(t >= RV(-(1.0 + eps) * M_PI) && t <= RV((1.0 + eps) * M_PI)));
-
-    // Reduce range: -sin(x) = -sin(-x)
-    s = copysign(RV(1.0), t);
-    t = fabs(t);
-
-    // Reduce range: sin(x) = sin(pi-x)
-    x = ifthen(t > RV(M_PI_2), RV(M_PI) - t, t);
-    assert(all(x >= RV(0.0 - eps) && x <= RV(M_PI_2)));
-
-    // Evaluate Chebyshev polynomial expansion r = s*T(x)
-    // where T(x) = x + c1*x^3 + c2*x^5 + c3*x^7 + ...
-    t = x * x;
-    x = x * s;
-    s = t * x;
-    r = fma(RV(1.1888871779171205e-23), t, RV(-1.6213346583948200e-21));
-    r = fma(r, t, RV(9.4674830124704450e-19));
-    r = fma(r, t, RV(-3.6586864533554100e-16));
-    r = fma(r, t, RV(-7.5815669263036780e-13));
-    r = fma(r, t, RV(1.6058175109947732e-10));
-    r = fma(r, t, RV(-2.5052101017560582e-08));
-    r = fma(r, t, RV(2.7557319185249400e-06));
-    r = fma(r, t, RV(-1.9841269841152493e-04));
-    r = fma(r, t, RV(8.3333333333331560e-03));
-    r = fma(r, t, RV(-1.6666666666666666e-01));
-    r = fma(r, s, x);
-    return r;
-  }
-
-  template<typename realvec_t>
-  realvec_t mathfuncs<realvec_t>::vml_cos_chebyshev_single(realvec_t x)
-  {
-    typename realvec_t::boolvec_t m;
-    realvec_t r, s, t;
-
-    // Reduce range: cos(x) = cos(x + 2PI)
-    t = x + RV(M_PI);
-    t = t * RV(1.0 / (2.0 * M_PI));
-    t = floor(t);
-    t = x - (t * RV(2.0 * M_PI));
-
-    // Reduce range: cos(x) = sin(PI/2 - x)
-    t = fabs(t);
-    m = t > RV(M_PI_2);
-    x = RV(M_PI) - t;
-    s = ifthen(m, RV(-1.0), RV(1.0));
-    x = ifthen(m, x, t);
-
-    // Evaluate Chebyshev polynomial s*T(x) where
-    // T(x) = 1 + c1*x^2 + c2*x^4 + c3*x^6 + ...
-    t = x * x;
-    x = t * s;
-    r = fma(RV(-1.0986536764839979e-11), t, RV(2.0856702467901100e-09));
-    r = fma(r, t, RV(-2.7556891974788950e-07));
-    r = fma(r, t, RV(2.4801582421938170e-05));
-    r = fma(r, t, RV(-1.3888888860984269e-03));
-    r = fma(r, t, RV(4.1666666666056330e-02));
-    r = fma(r, t, RV(-5.0000000000000000e-01));
-    r = fma(r, x, s);
-    return r;
-  }
-
-  template<typename realvec_t>
-  realvec_t mathfuncs<realvec_t>::vml_cos_chebyshev_double(realvec_t x)
-  {
-    typename realvec_t::boolvec_t m;
-    realvec_t r, s, t;
-
-    // Reduce range: cos(x) = cos(x + 2PI)
-    t = x + RV(M_PI);
-    t = t * RV(1.0 / (2.0 * M_PI));
-    t = floor(t);
-    t = x - (t * RV(2.0 * M_PI));
-
-    // Reduce range: cos(x) = sin(PI/2 - x)
-    t = fabs(t);
-    m = t > RV(M_PI_2);
-    x = RV(M_PI) - t;
-    s = ifthen(m, RV(-1.0), RV(1.0));
-    x = ifthen(m, x, t);
-
-    // Evaluate Chebyshev polynomial s*T(x) where
-    // T(x) = 1 + c1*x^2 + c2*x^4 + c3*x^6 + ...
-    t = x * x;
-    x = t * s;
-    r = fma(RV(-8.6512994843471700e-22), t, RV(4.1086675770914360e-19));
-    r = fma(r, t, RV(-1.5619143199049570e-16));
-    r = fma(r, t, RV(4.7794771764282040e-14));
-    r = fma(r, t, RV(-1.1470745595224050e-11));
-    r = fma(r, t, RV(2.0876756987841530e-09));
-    r = fma(r, t, RV(-2.7557319223985710e-07));
-    r = fma(r, t, RV(2.4801587301587300e-05));
-    r = fma(r, t, RV(-1.3888888888888890e-03));
-    r = fma(r, t, RV(4.1666666666666664e-02));
-    r = fma(r, t, RV(-5.0000000000000000e-01));
-    r = fma(r, x, s);
-    return r;
+    return sin(x) / cos(x);
   }
-
+  
 }; // namespace vecmathlib
 
 #endif  // #ifndef MATHFUNCS_SIN_H
diff --git a/vec_double_avx.h b/vec_double_avx.h
index 8b09dc6..b2de135 100644
--- a/vec_double_avx.h
+++ b/vec_double_avx.h
@@ -598,7 +598,7 @@ namespace vecmathlib {
     realvec atanh() const { return MF::vml_atanh(*this); }
     realvec ceil() const { return _mm256_ceil_pd(v); }
     realvec copysign(realvec y) const { return MF::vml_copysign(*this, y); }
-    realvec cos() const { return MF::vml_cos_chebyshev_double(*this); }
+    realvec cos() const { return MF::vml_cos(*this); }
     realvec cosh() const { return MF::vml_cosh(*this); }
     realvec exp() const { return MF::vml_exp(*this); }
     realvec exp10() const { return MF::vml_exp10(*this); }
@@ -628,7 +628,7 @@ namespace vecmathlib {
     realvec scalbn(int_t n) const { return MF::vml_scalbn(*this, n); }
     realvec scalbn(intvec_t n) const { return MF::vml_scalbn(*this, n); }
     boolvec_t signbit() const { return v; }
-    realvec sin() const { return MF::vml_sin_chebyshev_double(*this); }
+    realvec sin() const { return MF::vml_sin(*this); }
     realvec sinh() const { return MF::vml_sinh(*this); }
     realvec sqrt() const { return _mm256_sqrt_pd(v); }
     realvec tan() const { return MF::vml_tan(*this); }
diff --git a/vec_double_sse2.h b/vec_double_sse2.h
index 6f9c751..2c7f724 100644
--- a/vec_double_sse2.h
+++ b/vec_double_sse2.h
@@ -509,7 +509,7 @@ namespace vecmathlib {
 #endif
  }
     realvec copysign(realvec y) const { return MF::vml_copysign(*this, y); }
-    realvec cos() const { return MF::vml_cos_chebyshev_double(*this); }
+    realvec cos() const { return MF::vml_cos(*this); }
     realvec cosh() const { return MF::vml_cosh(*this); }
     realvec exp() const { return MF::vml_exp(*this); }
     realvec exp10() const { return MF::vml_exp10(*this); }
@@ -553,7 +553,7 @@ namespace vecmathlib {
     realvec scalbn(int_t n) const { return MF::vml_scalbn(*this, n); }
     realvec scalbn(intvec_t n) const { return MF::vml_scalbn(*this, n); }
     boolvec_t signbit() const { return v; }
-    realvec sin() const { return MF::vml_sin_chebyshev_double(*this); }
+    realvec sin() const { return MF::vml_sin(*this); }
     realvec sinh() const { return MF::vml_sinh(*this); }
     realvec sqrt() const { return _mm_sqrt_pd(v); }
     realvec tan() const { return MF::vml_tan(*this); }
diff --git a/vec_float_avx.h b/vec_float_avx.h
index 56bdf99..4c3ec62 100644
--- a/vec_float_avx.h
+++ b/vec_float_avx.h
@@ -569,7 +569,7 @@ namespace vecmathlib {
     realvec atanh() const { return MF::vml_atanh(*this); }
     realvec ceil() const { return _mm256_ceil_ps(v); }
     realvec copysign(realvec y) const { return MF::vml_copysign(*this, y); }
-    realvec cos() const { return MF::vml_cos_chebyshev_single(*this); }
+    realvec cos() const { return MF::vml_cos(*this); }
     realvec cosh() const { return MF::vml_cosh(*this); }
     realvec exp() const { return MF::vml_exp(*this); }
     realvec exp10() const { return MF::vml_exp10(*this); }
@@ -611,7 +611,7 @@ namespace vecmathlib {
     realvec scalbn(int_t n) const { return MF::vml_scalbn(*this, n); }
     realvec scalbn(intvec_t n) const { return MF::vml_scalbn(*this, n); }
     boolvec_t signbit() const { return v; }
-    realvec sin() const { return MF::vml_sin_chebyshev_single(*this); }
+    realvec sin() const { return MF::vml_sin(*this); }
     realvec sinh() const { return MF::vml_sinh(*this); }
     realvec sqrt() const { return _mm256_sqrt_ps(v); }
     realvec tan() const { return MF::vml_tan(*this); }
diff --git a/vec_float_sse2.h b/vec_float_sse2.h
index 77eb632..605521d 100644
--- a/vec_float_sse2.h
+++ b/vec_float_sse2.h
@@ -485,7 +485,7 @@ namespace vecmathlib {
 #endif
     }
     realvec copysign(realvec y) const { return MF::vml_copysign(*this, y); }
-    realvec cos() const { return MF::vml_cos_chebyshev_single(*this); }
+    realvec cos() const { return MF::vml_cos(*this); }
     realvec cosh() const { return MF::vml_cosh(*this); }
     realvec exp() const { return MF::vml_exp(*this); }
     realvec exp10() const { return MF::vml_exp10(*this); }
@@ -541,7 +541,7 @@ namespace vecmathlib {
     realvec scalbn(int_t n) const { return MF::vml_scalbn(*this, n); }
     realvec scalbn(intvec_t n) const { return MF::vml_scalbn(*this, n); }
     boolvec_t signbit() const { return v; }
-    realvec sin() const { return MF::vml_sin_chebyshev_single(*this); }
+    realvec sin() const { return MF::vml_sin(*this); }
     realvec sinh() const { return MF::vml_sinh(*this); }
     realvec sqrt() const { return _mm_sqrt_ps(v); }
     realvec tan() const { return MF::vml_tan(*this); }
-- 
cgit v1.1