diff options
author | Erik Schnetter <schnetter@gmail.com> | 2012-12-01 16:08:57 -0500 |
---|---|---|
committer | Erik Schnetter <schnetter@gmail.com> | 2012-12-01 16:08:57 -0500 |
commit | 471686ed980d1068aa5e5c8fb8289f8f81d33d39 (patch) | |
tree | c02940e1ca726573a20d117ca008fa50ea8c8039 | |
parent | af41d3bc35d082cb4d8e793071a801b589978ce8 (diff) | |
download | vecmathlib-471686ed980d1068aa5e5c8fb8289f8f81d33d39.zip vecmathlib-471686ed980d1068aa5e5c8fb8289f8f81d33d39.tar.gz |
Implement sin, make optimised builds work
-rw-r--r-- | build.ninja | 3 | ||||
-rw-r--r-- | floatprops.h | 28 | ||||
-rw-r--r-- | mathfuncs.h | 1 | ||||
-rw-r--r-- | mathfuncs_asinh.h | 12 | ||||
-rw-r--r-- | mathfuncs_base.h | 6 | ||||
-rw-r--r-- | mathfuncs_convert.h | 8 | ||||
-rw-r--r-- | mathfuncs_sin.h | 96 | ||||
-rw-r--r-- | test.cc | 90 | ||||
-rw-r--r-- | vec_double_avx.h | 3 | ||||
-rw-r--r-- | vec_float.h | 21 |
10 files changed, 218 insertions, 50 deletions
diff --git a/build.ninja b/build.ninja index fbfd02b..23566c4 100644 --- a/build.ninja +++ b/build.ninja @@ -2,8 +2,7 @@ ar = ar cxx = g++ cppflags = -cxxflags = -std=gnu++11 -Wall -g -march=native -# -Ofast +cxxflags = -std=gnu++11 -Wall -g -march=native -Ofast ldflags = -L. rule cxx diff --git a/floatprops.h b/floatprops.h index 7048112..9058c23 100644 --- a/floatprops.h +++ b/floatprops.h @@ -61,8 +61,18 @@ namespace vecmathlib { "error in masks"); // Re-interpret bit patterns - static inline real_t as_float(int_t x) { return *(real_t*)&x; } - static inline int_t as_int(real_t x) { return *(int_t*)&x; } + static inline real_t as_float(int_t x) + { + // return *(real_t*)&x; + union { int_t i; real_t r; } ir; + return ir.i=x, ir.r; + } + static inline int_t as_int(real_t x) + { + // return *(int_t*)&x; + union { real_t r; int_t i; } ri; + return ri.r=x, ri.i; + } // Convert values static inline real_t convert_float(int_t x) { return real_t(x); } @@ -114,8 +124,18 @@ namespace vecmathlib { "error in masks"); // Re-interpret bit patterns - static inline real_t as_float(int_t x) { return *(real_t*)&x; } - static inline int_t as_int(real_t x) { return *(int_t*)&x; } + static inline real_t as_float(int_t x) + { + // return *(real_t*)&x; + union { int_t i; real_t r; } ir; + return ir.i=x, ir.r; + } + static inline int_t as_int(real_t x) + { + // return *(int_t*)&x; + union { real_t r; int_t i; } ri; + return ri.r=x, ri.i; + } // Convert values static inline real_t convert_float(int_t x) { return real_t(x); } diff --git a/mathfuncs.h b/mathfuncs.h index 237c396..af494c8 100644 --- a/mathfuncs.h +++ b/mathfuncs.h @@ -13,6 +13,7 @@ #include "mathfuncs_log.h" #include "mathfuncs_pow.h" #include "mathfuncs_rcp.h" +#include "mathfuncs_sin.h" #include "mathfuncs_sinh.h" #include "mathfuncs_sqrt.h" diff --git a/mathfuncs_asinh.h b/mathfuncs_asinh.h index 6ea7efd..754f5d7 100644 --- a/mathfuncs_asinh.h +++ b/mathfuncs_asinh.h @@ -21,13 +21,21 @@ namespace vecmathlib { template<typename realvec_t> realvec_t mathfuncs<realvec_t>::vml_asinh(realvec_t x) { - return log(x + sqrt(x*x + RV(1.0))); + // Reduce range + realvec_t r = fabs(x); + r = log(r + sqrt(r*r + RV(1.0))); + r = copysign(r, x); + return r; } template<typename realvec_t> realvec_t mathfuncs<realvec_t>::vml_atanh(realvec_t x) { - return RV(0.5) * log((RV(1.0) + x) / (RV(1.0) - x)); + // Reduce range + realvec_t r = fabs(x); + r = RV(0.5) * log((RV(1.0) + r) / (RV(1.0) - r)); + r = copysign(r, x); + return r; } }; // namespace vecmathlib diff --git a/mathfuncs_base.h b/mathfuncs_base.h index a7207f8..5dc3a27 100644 --- a/mathfuncs_base.h +++ b/mathfuncs_base.h @@ -31,6 +31,12 @@ namespace vecmathlib { typedef realvec_t RV; typedef intvec_t IV; typedef boolvec_t BV; + // static real_t R(double a) { return real_t(a); } + // static int_t I(int a) { return int_t(a); } + // static uint_t U(int a) { return uint_t(a); } + // static realvec_t RV(real_t a) { return realvec_t(a); } + // static intvec_t IV(int_t a) { return intvec_t(a); } + // static boolvec_t BV(bool a) { return boolvec_t(a); } // asin static realvec_t vml_acos(realvec_t x); diff --git a/mathfuncs_convert.h b/mathfuncs_convert.h index 852c94e..ea39e05 100644 --- a/mathfuncs_convert.h +++ b/mathfuncs_convert.h @@ -42,6 +42,10 @@ namespace vecmathlib { realvec_t fhi = as_float(xhi) - RV(FP::as_float(exponent_hi)); // add largest negative number again fhi -= RV(R(FP::sign_mask)); + // Ensure that the converted low and high bits are calculated + // separately, since a real_t doesn't have enough precision to + // hold all the bits of an int_t + fhi.barrier(); // Combine results return flo + fhi; @@ -90,9 +94,11 @@ namespace vecmathlib { realvec_t mathfuncs<realvec_t>::vml_round(realvec_t x) { realvec_t r = fabs(x); + // Round by adding a large number, destroying all excess precision real_t offset = RV(std::scalbn(R(1.0), FP::mantissa_bits)); r += offset; -#warning "TODO: don't optimise this away!" + // Ensure the rounding is not optimised away + r.barrier(); r -= offset; return copysign(r, x); } diff --git a/mathfuncs_sin.h b/mathfuncs_sin.h new file mode 100644 index 0000000..3e1a6d6 --- /dev/null +++ b/mathfuncs_sin.h @@ -0,0 +1,96 @@ +// -*-C++-*- + +#ifndef MATHFUNCS_SIN_H +#define MATHFUNCS_SIN_H + +#include "mathfuncs_base.h" + +#include <cassert> +#include <cmath> + + + +namespace vecmathlib { + + template<typename realvec_t> + realvec_t mathfuncs<realvec_t>::vml_sin(realvec_t x) + { + // Reduce range: sin(x) = sin(x + 2pi) + x = remainder(x, RV(2.0*M_PI)); + assert(all(x >= RV(-M_PI) && x <= RV(M_PI))); + + // Reduce range: sin(x) = -sin(-x) + realvec_t sign = x; + x = fabs(x); + assert(all(x >= RV(0.0) && x <= RV(M_PI))); + + // Reduce range: sin(x) = sin(pi-x) + x = ifthen(x > RV(M_PI_2), RV(M_PI) - x, x); + assert(all(x >= RV(0.0) && x <= RV(M_PI_2))); + + // Reduce range: cos(x) = sin(pi/2 - x) + boolvec_t use_cos = x > RV(0.5*M_PI_2); + x = ifthen(use_cos, RV(M_PI_2) - x, x); + + // Taylor expansion + // sin(x) = Sum[n=0,nmax,n%2!=0] (-1)^((n-1)/2) x^n / n! + // cos(x) = Sum[n=0,nmax,n%2==0] (-1)^((n-1)/2) x^n / n! + realvec_t noffset = ifthen(use_cos, RV(-1.0), RV(0.0)); + int const nmax = 15; + realvec_t x2 = x*x; + realvec_t y = ifthen(use_cos, RV(1.0), x); + realvec_t r = y; + for (int n=3; n<nmax; n+=2) { + // sin: (n-1)*n + // cos: n*(n+1) + realvec_t rn = RV(FP::convert_float(n)) + noffset; + y *= - x2 * rcp((rn-RV(1.0))*rn); + r += y; + } + + // Undo range reduction + r = copysign(r, sign); + + return r; + } + + + + template<typename realvec_t> + realvec_t mathfuncs<realvec_t>::vml_cos(realvec_t x) + { + return sin(x + RV(M_PI_2)); + } + + template<typename realvec_t> + realvec_t mathfuncs<realvec_t>::vml_tan(realvec_t x) + { + // Reduce range: tan(x) = tan(x + pi) + x = remainder(x, RV(M_PI)); + assert(all(x >= RV(-M_PI_2) && x <= RV(M_PI_2))); + + // Reduce range: tan(x) = -tan(-x) + realvec_t sign = x; + x = fabs(x); + assert(all(x >= RV(0.0) && x <= RV(M_PI_2))); + + // Reduce range: cot(x) = -tan(x + pi/2) + boolvec_t use_cot = x > RV(0.5 * M_PI_2); + x = ifthen(use_cot, RV(M_PI_2) -x, x); + assert(all(x >= RV(0.0) && x <= RV(0.5 * M_PI_2))); + + // Calculate tan + realvec_t r = sin(x) / cos(x); + + // Undo range reduction + r = ifthen(use_cot, -rcp(r), r); + + // Undo range reducion + r = copysign(r, sign); + + return r; + } + +}; // namespace vecmathlib + +#endif // #ifndef MATHFUNCS_SIN_H @@ -24,14 +24,25 @@ struct vecmathlib_test { typedef typename realvec_t::intvec_t intvec_t; typedef typename realvec_t::int_t int_t; + typedef typename realvec_t::uint_t uint_t; typedef typename realvec_t::real_t real_t; + // Short names for type casts + typedef real_t R; + typedef int_t I; + typedef uint_t U; + typedef realvec_t RV; + typedef intvec_t IV; + typedef boolvec_t BV; + typedef vecmathlib::floatprops<real_t> FP; + // Test each function with this many random values static int const imax = 1000; - static real_t constexpr accuracy = pow(realvec_t::epsilon(), real_t(0.75)); + // Require that 3/4 of the digits are correct + static real_t constexpr accuracy = pow(realvec_t::epsilon(), R(0.75)); @@ -39,7 +50,9 @@ struct vecmathlib_test { { realvec_t x; for (int i=0; i<realvec_t::size; ++i) { - x.set_elt(i, xmin + (xmax - xmin) * real_t(rand()) / real_t(RAND_MAX)); + real_t r = + (xmax - xmin) *FP::convert_float(rand()) / FP::convert_float(RAND_MAX); + x.set_elt(i, xmin + r); } return x; } @@ -48,10 +61,10 @@ struct vecmathlib_test { { intvec_t n; for (int i=0; i<intvec_t::size; ++i) { - real_t x = - real_t(nmax - nmin + 1) * - real_t(rand()) / (real_t(RAND_MAX) + real_t(1.0)); - n.set_elt(i, nmin + FP::convert_int(std::floor(x))); + real_t r = + R(nmax - nmin + 1) * + R(rand()) / (real_t(RAND_MAX) + R(1.0)); + n.set_elt(i, nmin + FP::convert_int(std::floor(r))); } return n; } @@ -74,7 +87,7 @@ struct vecmathlib_test { realvec_t(accuracy) * (fabs(rstd) + fabs(rvml) + realvec_t(1.0)))) { ++ num_errors; - cout << setprecision(realvec_t::digits10+1) + cout << setprecision(realvec_t::digits10+2) << "Error in " << func << "(" << x << "):\n" << " fstd(x)=" << rstd << "\n" << " fvml(x)=" << rvml << "\n"; @@ -172,8 +185,8 @@ struct vecmathlib_test { { cout << " testing copysign fabs ilogb scalbn signbit...\n"; for (int i=0; i<imax; ++i) { - realvec_t const x = random(real_t(-10.0), real_t(+10.0)); - realvec_t const y = random(real_t(-10.0), real_t(+10.0)); + realvec_t const x = random(R(-10.0), R(+10.0)); + realvec_t const y = random(R(-10.0), R(+10.0)); intvec_t const n = random(int_t(-10), int_t(+10)); check("copysign", copysign, vecmathlib::copysign, x, y, 0.0); check("fabs", fabs, vecmathlib::fabs, x, 0.0); @@ -187,7 +200,7 @@ struct vecmathlib_test { { cout << " testing ceil convert_float convert_int floor round...\n"; for (int i=0; i<imax; ++i) { - realvec_t const x = random(real_t(-10.0), real_t(+10.0)); + realvec_t const x = random(R(-10.0), R(+10.0)); intvec_t const n = random(int_t(-10), int_t(+10)); check("ceil", ceil, vecmathlib::ceil, x, accuracy); check("convert_float", @@ -204,12 +217,12 @@ struct vecmathlib_test { { cout << " testing asin acos atan...\n"; for (int i=0; i<imax; ++i) { - realvec_t const x = random(real_t(-1.0), real_t(+1.0)); + realvec_t const x = random(R(-1.0), R(+1.0)); check("asin", asin, vecmathlib::asin, x, accuracy); check("acos", acos, vecmathlib::acos, x, accuracy); } for (int i=0; i<imax; ++i) { - realvec_t const x = random(real_t(-100.0), real_t(+100.0)); + realvec_t const x = random(R(-100.0), R(+100.0)); check("atan", atan, vecmathlib::atan, x, accuracy); } } @@ -218,26 +231,25 @@ struct vecmathlib_test { { cout << " testing asinh acosh atanh...\n"; for (int i=0; i<imax; ++i) { - realvec_t const x = random(real_t(-1000.0), real_t(+1000.0)); - // asinh loses accuracy (by definition, not by implementation?) - check("asinh", asinh, vecmathlib::asinh, x, real_t(1.0e+3)*accuracy); + realvec_t const x = random(R(-1000.0), R(+1000.0)); + check("asinh", asinh, vecmathlib::asinh, x, accuracy); } for (int i=0; i<imax; ++i) { - realvec_t const x = random(real_t(1.0), real_t(1000.0)); + realvec_t const x = random(R(1.0), R(1000.0)); check("acosh", acosh, vecmathlib::acosh, x, accuracy); } for (int i=0; i<imax; ++i) { - realvec_t const x = random(real_t(-1.0), real_t(+1.0)); + realvec_t const x = random(R(-1.0), R(+1.0)); check("atanh", atanh, vecmathlib::atanh, x, accuracy); } } - static real_t exp10(real_t x) { return pow(real_t(10.0), x); } + static real_t exp10(real_t x) { return pow(R(10.0), x); } static void test_exp() { cout << " testing exp exp10 exp2 expm1...\n"; for (int i=0; i<imax; ++i) { - realvec_t const x = random(real_t(-100.0), real_t(+100.0)); + realvec_t const x = random(R(-100.0), R(+100.0)); check("exp", exp, vecmathlib::exp, x, accuracy); check("exp10", exp10, vecmathlib::exp10, x, accuracy); check("exp2", exp2, vecmathlib::exp2, x, accuracy); @@ -249,7 +261,7 @@ struct vecmathlib_test { { cout << " testing log log10 log1p log2...\n"; for (int i=0; i<imax; ++i) { - realvec_t const x = random(real_t(1.0e-10), real_t(1.0e+10)); + realvec_t const x = random(R(1.0e-10), R(1.0e+10)); check("log", log, vecmathlib::log, x, accuracy); check("log10", log10, vecmathlib::log10, x, accuracy); check("log1p", log1p, vecmathlib::log1p, x, accuracy); @@ -261,52 +273,54 @@ struct vecmathlib_test { { cout << " testing pow...\n"; for (int i=0; i<imax; ++i) { - realvec_t const x = random(real_t(0.001), real_t(1000.0)); - realvec_t const y = random(real_t(-10.0), real_t(+10.0)); + realvec_t const x = random(R(0.001), R(1000.0)); + realvec_t const y = random(R(-10.0), R(+10.0)); check("pow", pow, vecmathlib::pow, x, y, accuracy); } } - static real_t rcp(real_t x) { return real_t(1.0)/x; } + static real_t rcp(real_t x) { return R(1.0)/x; } static void test_rcp() { cout << " testing fmod rcp remainder...\n"; for (int i=0; i<imax; ++i) { - realvec_t const x = random(real_t(-10.0), real_t(+10.0)); - realvec_t const y = random(real_t(-10.0), real_t(+10.0)); + realvec_t const x = random(R(-10.0), R(+10.0)); + realvec_t const y = random(R(-10.0), R(+10.0)); check("fmod", fmod, vecmathlib::fmod, x, y, accuracy); check("rcp", rcp, vecmathlib::rcp, x, accuracy); check("remainder", remainder, vecmathlib::remainder, x, y, accuracy); } } - // void test_sin() - // { - // for (int i=0; i<imax; ++i) { - // real_t const x = random(-10.0, 10.0); - // check("sin", sin, vecmathlib::sin, x, accuracy); - // check("cos", cos, vecmathlib::cos, x, accuracy); - // check("tan", tan, vecmathlib::tan, x, accuracy); - // } - // } + static void test_sin() + { + cout << " testing cos sin tan...\n"; + for (int i=0; i<imax; ++i) { + realvec_t const x = random(R(-10.0), R(+10.0)); + check("sin", sin, vecmathlib::sin, x, accuracy); + check("cos", cos, vecmathlib::cos, x, accuracy); + // tan loses accuracy (by definition, not by implementation?) + check("tan", tan, vecmathlib::tan, x, R(10.0)*accuracy); + } + } static void test_sinh() { cout << " testing cosh sinh tanh...\n"; for (int i=0; i<imax; ++i) { - realvec_t const x = random(real_t(-10.0), real_t(+10.0)); + realvec_t const x = random(R(-10.0), R(+10.0)); check("sinh", sinh, vecmathlib::sinh, x, accuracy); check("cosh", cosh, vecmathlib::cosh, x, accuracy); check("tanh", tanh, vecmathlib::tanh, x, accuracy); } } - static real_t rsqrt(real_t x) { return real_t(1.0)/sqrt(x); } + static real_t rsqrt(real_t x) { return R(1.0)/sqrt(x); } static void test_sqrt() { cout << " testing rsqrt sqrt...\n"; for (int i=0; i<imax; ++i) { - realvec_t const x = random(real_t(0.0), real_t(1.0e+3)); + realvec_t const x = random(R(0.0), R(1.0e+3)); check("rsqrt", rsqrt, vecmathlib::rsqrt, x, accuracy); check("sqrt", sqrt, vecmathlib::sqrt, x, accuracy); } @@ -331,7 +345,7 @@ struct vecmathlib_test { test_log(); test_pow(); test_rcp(); - // test_sin(); + test_sin(); test_sinh(); test_sqrt(); } diff --git a/vec_double_avx.h b/vec_double_avx.h index b3e8db2..f311095 100644 --- a/vec_double_avx.h +++ b/vec_double_avx.h @@ -361,6 +361,7 @@ namespace vecmathlib { typedef __m256d vector_t; static constexpr char const* const name = "<AVX:4*double>"; + inline void barrier() { asm("": "+x" (v)); } static_assert(size * sizeof(real_t) == sizeof(vector_t), "vector size is wrong"); @@ -476,7 +477,6 @@ namespace vecmathlib { realvec log2() const { return MF::vml_log2(*this); } realvec pow(realvec y) const { return MF::vml_pow(*this, y); } realvec rcp() const { return _mm256_div_pd(_mm256_set1_pd(1.0), v); } - // realvec rcp() const { return MF::vml_rcp(*this); } realvec remainder(realvec y) const { return MF::vml_remainder(*this, y); } realvec round() const { return _mm256_round_pd(v, _MM_FROUND_NINT); } realvec rsqrt() const { return MF::vml_rsqrt(*this); } @@ -485,7 +485,6 @@ namespace vecmathlib { realvec sin() const { return MF::vml_sin(*this); } realvec sinh() const { return MF::vml_sinh(*this); } realvec sqrt() const { return _mm256_sqrt_pd(v); } - // realvec sqrt() const { return MF::vml_sqrt(*this); } realvec tan() const { return MF::vml_tan(*this); } realvec tanh() const { return MF::vml_tanh(*this); } }; diff --git a/vec_float.h b/vec_float.h index 4bdc398..60636ba 100644 --- a/vec_float.h +++ b/vec_float.h @@ -39,6 +39,12 @@ namespace vecmathlib { typedef boolvec_t BV; typedef floatprops<real_t> FP; typedef mathfuncs<realvec_t> MF; + // static real_t R(double a) { return real_t(a); } + // static int_t I(int a) { return int_t(a); } + // static uint_t U(int a) { return uint_t(a); } + // static realvec_t RV(real_t a) { return realvec_t(a); } + // static intvec_t IV(int_t a) { return intvec_t(a); } + // static boolvec_t BV(bool a) { return boolvec_t(a); } @@ -104,6 +110,12 @@ namespace vecmathlib { typedef boolvec_t BV; typedef floatprops<real_t> FP; typedef mathfuncs<realvec_t> MF; + // static real_t R(double a) { return real_t(a); } + // static int_t I(int a) { return int_t(a); } + // static uint_t U(int a) { return uint_t(a); } + // static realvec_t RV(real_t a) { return realvec_t(a); } + // static intvec_t IV(int_t a) { return intvec_t(a); } + // static boolvec_t BV(bool a) { return boolvec_t(a); } @@ -122,7 +134,7 @@ namespace vecmathlib { - auto as_bool() const -> boolvec_t { return *(bool const*)&v; } + auto as_bool() const -> boolvec_t { return v; } auto convert_bool() const -> boolvec_t { return v; } auto as_float() const -> realvec_t; // defined after realvec auto convert_float() const -> realvec_t; // defined after realvec @@ -177,6 +189,7 @@ namespace vecmathlib { typedef real_t vector_t; static constexpr char const* const name = "<1*float>"; + inline void barrier() { asm("": "+x" (v)); } typedef boolvec<real_t, size> boolvec_t; typedef intvec<real_t, size> intvec_t; @@ -191,6 +204,12 @@ namespace vecmathlib { typedef boolvec_t BV; typedef floatprops<real_t> FP; typedef mathfuncs<realvec_t> MF; + // static real_t R(double a) { return real_t(a); } + // static int_t I(int a) { return int_t(a); } + // static uint_t U(int a) { return uint_t(a); } + // static realvec_t RV(real_t a) { return realvec_t(a); } + // static intvec_t IV(int_t a) { return intvec_t(a); } + // static boolvec_t BV(bool a) { return boolvec_t(a); } |