summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorErik Schnetter <schnetter@gmail.com>2012-12-01 16:08:57 -0500
committerErik Schnetter <schnetter@gmail.com>2012-12-01 16:08:57 -0500
commit471686ed980d1068aa5e5c8fb8289f8f81d33d39 (patch)
treec02940e1ca726573a20d117ca008fa50ea8c8039
parentaf41d3bc35d082cb4d8e793071a801b589978ce8 (diff)
downloadvecmathlib-471686ed980d1068aa5e5c8fb8289f8f81d33d39.zip
vecmathlib-471686ed980d1068aa5e5c8fb8289f8f81d33d39.tar.gz
Implement sin, make optimised builds work
-rw-r--r--build.ninja3
-rw-r--r--floatprops.h28
-rw-r--r--mathfuncs.h1
-rw-r--r--mathfuncs_asinh.h12
-rw-r--r--mathfuncs_base.h6
-rw-r--r--mathfuncs_convert.h8
-rw-r--r--mathfuncs_sin.h96
-rw-r--r--test.cc90
-rw-r--r--vec_double_avx.h3
-rw-r--r--vec_float.h21
10 files changed, 218 insertions, 50 deletions
diff --git a/build.ninja b/build.ninja
index fbfd02b..23566c4 100644
--- a/build.ninja
+++ b/build.ninja
@@ -2,8 +2,7 @@ ar = ar
cxx = g++
cppflags =
-cxxflags = -std=gnu++11 -Wall -g -march=native
-# -Ofast
+cxxflags = -std=gnu++11 -Wall -g -march=native -Ofast
ldflags = -L.
rule cxx
diff --git a/floatprops.h b/floatprops.h
index 7048112..9058c23 100644
--- a/floatprops.h
+++ b/floatprops.h
@@ -61,8 +61,18 @@ namespace vecmathlib {
"error in masks");
// Re-interpret bit patterns
- static inline real_t as_float(int_t x) { return *(real_t*)&x; }
- static inline int_t as_int(real_t x) { return *(int_t*)&x; }
+ static inline real_t as_float(int_t x)
+ {
+ // return *(real_t*)&x;
+ union { int_t i; real_t r; } ir;
+ return ir.i=x, ir.r;
+ }
+ static inline int_t as_int(real_t x)
+ {
+ // return *(int_t*)&x;
+ union { real_t r; int_t i; } ri;
+ return ri.r=x, ri.i;
+ }
// Convert values
static inline real_t convert_float(int_t x) { return real_t(x); }
@@ -114,8 +124,18 @@ namespace vecmathlib {
"error in masks");
// Re-interpret bit patterns
- static inline real_t as_float(int_t x) { return *(real_t*)&x; }
- static inline int_t as_int(real_t x) { return *(int_t*)&x; }
+ static inline real_t as_float(int_t x)
+ {
+ // return *(real_t*)&x;
+ union { int_t i; real_t r; } ir;
+ return ir.i=x, ir.r;
+ }
+ static inline int_t as_int(real_t x)
+ {
+ // return *(int_t*)&x;
+ union { real_t r; int_t i; } ri;
+ return ri.r=x, ri.i;
+ }
// Convert values
static inline real_t convert_float(int_t x) { return real_t(x); }
diff --git a/mathfuncs.h b/mathfuncs.h
index 237c396..af494c8 100644
--- a/mathfuncs.h
+++ b/mathfuncs.h
@@ -13,6 +13,7 @@
#include "mathfuncs_log.h"
#include "mathfuncs_pow.h"
#include "mathfuncs_rcp.h"
+#include "mathfuncs_sin.h"
#include "mathfuncs_sinh.h"
#include "mathfuncs_sqrt.h"
diff --git a/mathfuncs_asinh.h b/mathfuncs_asinh.h
index 6ea7efd..754f5d7 100644
--- a/mathfuncs_asinh.h
+++ b/mathfuncs_asinh.h
@@ -21,13 +21,21 @@ namespace vecmathlib {
template<typename realvec_t>
realvec_t mathfuncs<realvec_t>::vml_asinh(realvec_t x)
{
- return log(x + sqrt(x*x + RV(1.0)));
+ // Reduce range
+ realvec_t r = fabs(x);
+ r = log(r + sqrt(r*r + RV(1.0)));
+ r = copysign(r, x);
+ return r;
}
template<typename realvec_t>
realvec_t mathfuncs<realvec_t>::vml_atanh(realvec_t x)
{
- return RV(0.5) * log((RV(1.0) + x) / (RV(1.0) - x));
+ // Reduce range
+ realvec_t r = fabs(x);
+ r = RV(0.5) * log((RV(1.0) + r) / (RV(1.0) - r));
+ r = copysign(r, x);
+ return r;
}
}; // namespace vecmathlib
diff --git a/mathfuncs_base.h b/mathfuncs_base.h
index a7207f8..5dc3a27 100644
--- a/mathfuncs_base.h
+++ b/mathfuncs_base.h
@@ -31,6 +31,12 @@ namespace vecmathlib {
typedef realvec_t RV;
typedef intvec_t IV;
typedef boolvec_t BV;
+ // static real_t R(double a) { return real_t(a); }
+ // static int_t I(int a) { return int_t(a); }
+ // static uint_t U(int a) { return uint_t(a); }
+ // static realvec_t RV(real_t a) { return realvec_t(a); }
+ // static intvec_t IV(int_t a) { return intvec_t(a); }
+ // static boolvec_t BV(bool a) { return boolvec_t(a); }
// asin
static realvec_t vml_acos(realvec_t x);
diff --git a/mathfuncs_convert.h b/mathfuncs_convert.h
index 852c94e..ea39e05 100644
--- a/mathfuncs_convert.h
+++ b/mathfuncs_convert.h
@@ -42,6 +42,10 @@ namespace vecmathlib {
realvec_t fhi = as_float(xhi) - RV(FP::as_float(exponent_hi));
// add largest negative number again
fhi -= RV(R(FP::sign_mask));
+ // Ensure that the converted low and high bits are calculated
+ // separately, since a real_t doesn't have enough precision to
+ // hold all the bits of an int_t
+ fhi.barrier();
// Combine results
return flo + fhi;
@@ -90,9 +94,11 @@ namespace vecmathlib {
realvec_t mathfuncs<realvec_t>::vml_round(realvec_t x)
{
realvec_t r = fabs(x);
+ // Round by adding a large number, destroying all excess precision
real_t offset = RV(std::scalbn(R(1.0), FP::mantissa_bits));
r += offset;
-#warning "TODO: don't optimise this away!"
+ // Ensure the rounding is not optimised away
+ r.barrier();
r -= offset;
return copysign(r, x);
}
diff --git a/mathfuncs_sin.h b/mathfuncs_sin.h
new file mode 100644
index 0000000..3e1a6d6
--- /dev/null
+++ b/mathfuncs_sin.h
@@ -0,0 +1,96 @@
+// -*-C++-*-
+
+#ifndef MATHFUNCS_SIN_H
+#define MATHFUNCS_SIN_H
+
+#include "mathfuncs_base.h"
+
+#include <cassert>
+#include <cmath>
+
+
+
+namespace vecmathlib {
+
+ template<typename realvec_t>
+ realvec_t mathfuncs<realvec_t>::vml_sin(realvec_t x)
+ {
+ // Reduce range: sin(x) = sin(x + 2pi)
+ x = remainder(x, RV(2.0*M_PI));
+ assert(all(x >= RV(-M_PI) && x <= RV(M_PI)));
+
+ // Reduce range: sin(x) = -sin(-x)
+ realvec_t sign = x;
+ x = fabs(x);
+ assert(all(x >= RV(0.0) && x <= RV(M_PI)));
+
+ // Reduce range: sin(x) = sin(pi-x)
+ x = ifthen(x > RV(M_PI_2), RV(M_PI) - x, x);
+ assert(all(x >= RV(0.0) && x <= RV(M_PI_2)));
+
+ // Reduce range: cos(x) = sin(pi/2 - x)
+ boolvec_t use_cos = x > RV(0.5*M_PI_2);
+ x = ifthen(use_cos, RV(M_PI_2) - x, x);
+
+ // Taylor expansion
+ // sin(x) = Sum[n=0,nmax,n%2!=0] (-1)^((n-1)/2) x^n / n!
+ // cos(x) = Sum[n=0,nmax,n%2==0] (-1)^((n-1)/2) x^n / n!
+ realvec_t noffset = ifthen(use_cos, RV(-1.0), RV(0.0));
+ int const nmax = 15;
+ realvec_t x2 = x*x;
+ realvec_t y = ifthen(use_cos, RV(1.0), x);
+ realvec_t r = y;
+ for (int n=3; n<nmax; n+=2) {
+ // sin: (n-1)*n
+ // cos: n*(n+1)
+ realvec_t rn = RV(FP::convert_float(n)) + noffset;
+ y *= - x2 * rcp((rn-RV(1.0))*rn);
+ r += y;
+ }
+
+ // Undo range reduction
+ r = copysign(r, sign);
+
+ return r;
+ }
+
+
+
+ template<typename realvec_t>
+ realvec_t mathfuncs<realvec_t>::vml_cos(realvec_t x)
+ {
+ return sin(x + RV(M_PI_2));
+ }
+
+ template<typename realvec_t>
+ realvec_t mathfuncs<realvec_t>::vml_tan(realvec_t x)
+ {
+ // Reduce range: tan(x) = tan(x + pi)
+ x = remainder(x, RV(M_PI));
+ assert(all(x >= RV(-M_PI_2) && x <= RV(M_PI_2)));
+
+ // Reduce range: tan(x) = -tan(-x)
+ realvec_t sign = x;
+ x = fabs(x);
+ assert(all(x >= RV(0.0) && x <= RV(M_PI_2)));
+
+ // Reduce range: cot(x) = -tan(x + pi/2)
+ boolvec_t use_cot = x > RV(0.5 * M_PI_2);
+ x = ifthen(use_cot, RV(M_PI_2) -x, x);
+ assert(all(x >= RV(0.0) && x <= RV(0.5 * M_PI_2)));
+
+ // Calculate tan
+ realvec_t r = sin(x) / cos(x);
+
+ // Undo range reduction
+ r = ifthen(use_cot, -rcp(r), r);
+
+ // Undo range reducion
+ r = copysign(r, sign);
+
+ return r;
+ }
+
+}; // namespace vecmathlib
+
+#endif // #ifndef MATHFUNCS_SIN_H
diff --git a/test.cc b/test.cc
index 2fecda5..8991c64 100644
--- a/test.cc
+++ b/test.cc
@@ -24,14 +24,25 @@ struct vecmathlib_test {
typedef typename realvec_t::intvec_t intvec_t;
typedef typename realvec_t::int_t int_t;
+ typedef typename realvec_t::uint_t uint_t;
typedef typename realvec_t::real_t real_t;
+ // Short names for type casts
+ typedef real_t R;
+ typedef int_t I;
+ typedef uint_t U;
+ typedef realvec_t RV;
+ typedef intvec_t IV;
+ typedef boolvec_t BV;
+
typedef vecmathlib::floatprops<real_t> FP;
+ // Test each function with this many random values
static int const imax = 1000;
- static real_t constexpr accuracy = pow(realvec_t::epsilon(), real_t(0.75));
+ // Require that 3/4 of the digits are correct
+ static real_t constexpr accuracy = pow(realvec_t::epsilon(), R(0.75));
@@ -39,7 +50,9 @@ struct vecmathlib_test {
{
realvec_t x;
for (int i=0; i<realvec_t::size; ++i) {
- x.set_elt(i, xmin + (xmax - xmin) * real_t(rand()) / real_t(RAND_MAX));
+ real_t r =
+ (xmax - xmin) *FP::convert_float(rand()) / FP::convert_float(RAND_MAX);
+ x.set_elt(i, xmin + r);
}
return x;
}
@@ -48,10 +61,10 @@ struct vecmathlib_test {
{
intvec_t n;
for (int i=0; i<intvec_t::size; ++i) {
- real_t x =
- real_t(nmax - nmin + 1) *
- real_t(rand()) / (real_t(RAND_MAX) + real_t(1.0));
- n.set_elt(i, nmin + FP::convert_int(std::floor(x)));
+ real_t r =
+ R(nmax - nmin + 1) *
+ R(rand()) / (real_t(RAND_MAX) + R(1.0));
+ n.set_elt(i, nmin + FP::convert_int(std::floor(r)));
}
return n;
}
@@ -74,7 +87,7 @@ struct vecmathlib_test {
realvec_t(accuracy) * (fabs(rstd) + fabs(rvml) + realvec_t(1.0))))
{
++ num_errors;
- cout << setprecision(realvec_t::digits10+1)
+ cout << setprecision(realvec_t::digits10+2)
<< "Error in " << func << "(" << x << "):\n"
<< " fstd(x)=" << rstd << "\n"
<< " fvml(x)=" << rvml << "\n";
@@ -172,8 +185,8 @@ struct vecmathlib_test {
{
cout << " testing copysign fabs ilogb scalbn signbit...\n";
for (int i=0; i<imax; ++i) {
- realvec_t const x = random(real_t(-10.0), real_t(+10.0));
- realvec_t const y = random(real_t(-10.0), real_t(+10.0));
+ realvec_t const x = random(R(-10.0), R(+10.0));
+ realvec_t const y = random(R(-10.0), R(+10.0));
intvec_t const n = random(int_t(-10), int_t(+10));
check("copysign", copysign, vecmathlib::copysign, x, y, 0.0);
check("fabs", fabs, vecmathlib::fabs, x, 0.0);
@@ -187,7 +200,7 @@ struct vecmathlib_test {
{
cout << " testing ceil convert_float convert_int floor round...\n";
for (int i=0; i<imax; ++i) {
- realvec_t const x = random(real_t(-10.0), real_t(+10.0));
+ realvec_t const x = random(R(-10.0), R(+10.0));
intvec_t const n = random(int_t(-10), int_t(+10));
check("ceil", ceil, vecmathlib::ceil, x, accuracy);
check("convert_float",
@@ -204,12 +217,12 @@ struct vecmathlib_test {
{
cout << " testing asin acos atan...\n";
for (int i=0; i<imax; ++i) {
- realvec_t const x = random(real_t(-1.0), real_t(+1.0));
+ realvec_t const x = random(R(-1.0), R(+1.0));
check("asin", asin, vecmathlib::asin, x, accuracy);
check("acos", acos, vecmathlib::acos, x, accuracy);
}
for (int i=0; i<imax; ++i) {
- realvec_t const x = random(real_t(-100.0), real_t(+100.0));
+ realvec_t const x = random(R(-100.0), R(+100.0));
check("atan", atan, vecmathlib::atan, x, accuracy);
}
}
@@ -218,26 +231,25 @@ struct vecmathlib_test {
{
cout << " testing asinh acosh atanh...\n";
for (int i=0; i<imax; ++i) {
- realvec_t const x = random(real_t(-1000.0), real_t(+1000.0));
- // asinh loses accuracy (by definition, not by implementation?)
- check("asinh", asinh, vecmathlib::asinh, x, real_t(1.0e+3)*accuracy);
+ realvec_t const x = random(R(-1000.0), R(+1000.0));
+ check("asinh", asinh, vecmathlib::asinh, x, accuracy);
}
for (int i=0; i<imax; ++i) {
- realvec_t const x = random(real_t(1.0), real_t(1000.0));
+ realvec_t const x = random(R(1.0), R(1000.0));
check("acosh", acosh, vecmathlib::acosh, x, accuracy);
}
for (int i=0; i<imax; ++i) {
- realvec_t const x = random(real_t(-1.0), real_t(+1.0));
+ realvec_t const x = random(R(-1.0), R(+1.0));
check("atanh", atanh, vecmathlib::atanh, x, accuracy);
}
}
- static real_t exp10(real_t x) { return pow(real_t(10.0), x); }
+ static real_t exp10(real_t x) { return pow(R(10.0), x); }
static void test_exp()
{
cout << " testing exp exp10 exp2 expm1...\n";
for (int i=0; i<imax; ++i) {
- realvec_t const x = random(real_t(-100.0), real_t(+100.0));
+ realvec_t const x = random(R(-100.0), R(+100.0));
check("exp", exp, vecmathlib::exp, x, accuracy);
check("exp10", exp10, vecmathlib::exp10, x, accuracy);
check("exp2", exp2, vecmathlib::exp2, x, accuracy);
@@ -249,7 +261,7 @@ struct vecmathlib_test {
{
cout << " testing log log10 log1p log2...\n";
for (int i=0; i<imax; ++i) {
- realvec_t const x = random(real_t(1.0e-10), real_t(1.0e+10));
+ realvec_t const x = random(R(1.0e-10), R(1.0e+10));
check("log", log, vecmathlib::log, x, accuracy);
check("log10", log10, vecmathlib::log10, x, accuracy);
check("log1p", log1p, vecmathlib::log1p, x, accuracy);
@@ -261,52 +273,54 @@ struct vecmathlib_test {
{
cout << " testing pow...\n";
for (int i=0; i<imax; ++i) {
- realvec_t const x = random(real_t(0.001), real_t(1000.0));
- realvec_t const y = random(real_t(-10.0), real_t(+10.0));
+ realvec_t const x = random(R(0.001), R(1000.0));
+ realvec_t const y = random(R(-10.0), R(+10.0));
check("pow", pow, vecmathlib::pow, x, y, accuracy);
}
}
- static real_t rcp(real_t x) { return real_t(1.0)/x; }
+ static real_t rcp(real_t x) { return R(1.0)/x; }
static void test_rcp()
{
cout << " testing fmod rcp remainder...\n";
for (int i=0; i<imax; ++i) {
- realvec_t const x = random(real_t(-10.0), real_t(+10.0));
- realvec_t const y = random(real_t(-10.0), real_t(+10.0));
+ realvec_t const x = random(R(-10.0), R(+10.0));
+ realvec_t const y = random(R(-10.0), R(+10.0));
check("fmod", fmod, vecmathlib::fmod, x, y, accuracy);
check("rcp", rcp, vecmathlib::rcp, x, accuracy);
check("remainder", remainder, vecmathlib::remainder, x, y, accuracy);
}
}
- // void test_sin()
- // {
- // for (int i=0; i<imax; ++i) {
- // real_t const x = random(-10.0, 10.0);
- // check("sin", sin, vecmathlib::sin, x, accuracy);
- // check("cos", cos, vecmathlib::cos, x, accuracy);
- // check("tan", tan, vecmathlib::tan, x, accuracy);
- // }
- // }
+ static void test_sin()
+ {
+ cout << " testing cos sin tan...\n";
+ for (int i=0; i<imax; ++i) {
+ realvec_t const x = random(R(-10.0), R(+10.0));
+ check("sin", sin, vecmathlib::sin, x, accuracy);
+ check("cos", cos, vecmathlib::cos, x, accuracy);
+ // tan loses accuracy (by definition, not by implementation?)
+ check("tan", tan, vecmathlib::tan, x, R(10.0)*accuracy);
+ }
+ }
static void test_sinh()
{
cout << " testing cosh sinh tanh...\n";
for (int i=0; i<imax; ++i) {
- realvec_t const x = random(real_t(-10.0), real_t(+10.0));
+ realvec_t const x = random(R(-10.0), R(+10.0));
check("sinh", sinh, vecmathlib::sinh, x, accuracy);
check("cosh", cosh, vecmathlib::cosh, x, accuracy);
check("tanh", tanh, vecmathlib::tanh, x, accuracy);
}
}
- static real_t rsqrt(real_t x) { return real_t(1.0)/sqrt(x); }
+ static real_t rsqrt(real_t x) { return R(1.0)/sqrt(x); }
static void test_sqrt()
{
cout << " testing rsqrt sqrt...\n";
for (int i=0; i<imax; ++i) {
- realvec_t const x = random(real_t(0.0), real_t(1.0e+3));
+ realvec_t const x = random(R(0.0), R(1.0e+3));
check("rsqrt", rsqrt, vecmathlib::rsqrt, x, accuracy);
check("sqrt", sqrt, vecmathlib::sqrt, x, accuracy);
}
@@ -331,7 +345,7 @@ struct vecmathlib_test {
test_log();
test_pow();
test_rcp();
- // test_sin();
+ test_sin();
test_sinh();
test_sqrt();
}
diff --git a/vec_double_avx.h b/vec_double_avx.h
index b3e8db2..f311095 100644
--- a/vec_double_avx.h
+++ b/vec_double_avx.h
@@ -361,6 +361,7 @@ namespace vecmathlib {
typedef __m256d vector_t;
static constexpr char const* const name = "<AVX:4*double>";
+ inline void barrier() { asm("": "+x" (v)); }
static_assert(size * sizeof(real_t) == sizeof(vector_t),
"vector size is wrong");
@@ -476,7 +477,6 @@ namespace vecmathlib {
realvec log2() const { return MF::vml_log2(*this); }
realvec pow(realvec y) const { return MF::vml_pow(*this, y); }
realvec rcp() const { return _mm256_div_pd(_mm256_set1_pd(1.0), v); }
- // realvec rcp() const { return MF::vml_rcp(*this); }
realvec remainder(realvec y) const { return MF::vml_remainder(*this, y); }
realvec round() const { return _mm256_round_pd(v, _MM_FROUND_NINT); }
realvec rsqrt() const { return MF::vml_rsqrt(*this); }
@@ -485,7 +485,6 @@ namespace vecmathlib {
realvec sin() const { return MF::vml_sin(*this); }
realvec sinh() const { return MF::vml_sinh(*this); }
realvec sqrt() const { return _mm256_sqrt_pd(v); }
- // realvec sqrt() const { return MF::vml_sqrt(*this); }
realvec tan() const { return MF::vml_tan(*this); }
realvec tanh() const { return MF::vml_tanh(*this); }
};
diff --git a/vec_float.h b/vec_float.h
index 4bdc398..60636ba 100644
--- a/vec_float.h
+++ b/vec_float.h
@@ -39,6 +39,12 @@ namespace vecmathlib {
typedef boolvec_t BV;
typedef floatprops<real_t> FP;
typedef mathfuncs<realvec_t> MF;
+ // static real_t R(double a) { return real_t(a); }
+ // static int_t I(int a) { return int_t(a); }
+ // static uint_t U(int a) { return uint_t(a); }
+ // static realvec_t RV(real_t a) { return realvec_t(a); }
+ // static intvec_t IV(int_t a) { return intvec_t(a); }
+ // static boolvec_t BV(bool a) { return boolvec_t(a); }
@@ -104,6 +110,12 @@ namespace vecmathlib {
typedef boolvec_t BV;
typedef floatprops<real_t> FP;
typedef mathfuncs<realvec_t> MF;
+ // static real_t R(double a) { return real_t(a); }
+ // static int_t I(int a) { return int_t(a); }
+ // static uint_t U(int a) { return uint_t(a); }
+ // static realvec_t RV(real_t a) { return realvec_t(a); }
+ // static intvec_t IV(int_t a) { return intvec_t(a); }
+ // static boolvec_t BV(bool a) { return boolvec_t(a); }
@@ -122,7 +134,7 @@ namespace vecmathlib {
- auto as_bool() const -> boolvec_t { return *(bool const*)&v; }
+ auto as_bool() const -> boolvec_t { return v; }
auto convert_bool() const -> boolvec_t { return v; }
auto as_float() const -> realvec_t; // defined after realvec
auto convert_float() const -> realvec_t; // defined after realvec
@@ -177,6 +189,7 @@ namespace vecmathlib {
typedef real_t vector_t;
static constexpr char const* const name = "<1*float>";
+ inline void barrier() { asm("": "+x" (v)); }
typedef boolvec<real_t, size> boolvec_t;
typedef intvec<real_t, size> intvec_t;
@@ -191,6 +204,12 @@ namespace vecmathlib {
typedef boolvec_t BV;
typedef floatprops<real_t> FP;
typedef mathfuncs<realvec_t> MF;
+ // static real_t R(double a) { return real_t(a); }
+ // static int_t I(int a) { return int_t(a); }
+ // static uint_t U(int a) { return uint_t(a); }
+ // static realvec_t RV(real_t a) { return realvec_t(a); }
+ // static intvec_t IV(int_t a) { return intvec_t(a); }
+ // static boolvec_t BV(bool a) { return boolvec_t(a); }
OpenPOWER on IntegriCloud