// -*-C++-*- #include "vecmathlib.h" #include #include #include #include #include #include #include #include #include using namespace std; int num_errors = 0; template struct vecmathlib_test { typedef typename realvec_t::boolvec_t boolvec_t; typedef typename realvec_t::intvec_t intvec_t; typedef typename realvec_t::int_t int_t; typedef typename realvec_t::uint_t uint_t; typedef typename realvec_t::real_t real_t; // Short names for type casts typedef real_t R; typedef int_t I; typedef uint_t U; typedef realvec_t RV; typedef intvec_t IV; typedef boolvec_t BV; typedef vecmathlib::floatprops FP; typedef vecmathlib::mathfuncs MF; // Test each function with this many random values static const int imax = 10000; static real_t accuracy(real_t ulp = R(0.5)) { #ifdef VML_HAVE_FP_CONTRACT // Require that 100% of the digits are correct // real_t digit_fraction = 1.0; // We can't do that yet -- require fewer digits real_t digit_fraction = 0.9; #else // Require that 80% of the digits are correct real_t digit_fraction = 0.8; #endif digit_fraction *= 0.95; // some lenience for testing (why?) return pow(ulp * realvec_t::epsilon(), digit_fraction); } static realvec_t random(const real_t xmin, const real_t xmax) { realvec_t x; for (int i = 0; i < realvec_t::size; ++i) { const real_t r = (xmax - xmin) * FP::convert_float(rand()) / FP::convert_float(RAND_MAX); x.set_elt(i, xmin + r); } return x; } static intvec_t random(const int_t nmin, const int_t nmax) { intvec_t n; for (int i = 0; i < intvec_t::size; ++i) { const real_t r = R(nmax - nmin + 1) * R(rand()) / (R(RAND_MAX) + R(1.0)); n.set_elt(i, nmin + FP::convert_int(floor(r))); } return n; } static bool is_big_endian() { const int i = 1; unsigned char cs[sizeof i]; memcpy(cs, &i, sizeof i); return cs[0] == 0; } template static string hex(const T x) { unsigned char cs[sizeof x]; memcpy(cs, &x, sizeof x); ostringstream buf; buf << "0x"; const char *const hexdigits = "0123456789abcdef"; const int n0 = is_big_endian() ? 0 : sizeof x - 1; const int dn = is_big_endian() ? +1 : -1; const int n1 = n0 + sizeof x * dn; for (int n = n0; n != n1; n += dn) { buf << hexdigits[cs[n] >> 4] << hexdigits[cs[n] & 15]; } return buf.str(); } static boolvec_t supported(realvec_t x) { return x == RV(0.0) || MF::vml_ieee_isnormal(x) #ifdef VML_HAVE_DENORMALS || MF::vml_ieee_isfinite(x) #endif #ifdef VML_HAVE_INF || MF::vml_ieee_isinf(x) #endif #ifdef VML_HAVE_NAN || MF::vml_ieee_isnan(x) #endif ; } static boolvec_t supported(intvec_t x) { return true; } static boolvec_t supported(boolvec_t x) { return true; } // Check load memory access static void check_mem(const char *const func, const realvec_t x, const real_t *const p, const realvec_t xold, const int mval) { realvec_t xwant; for (int i = 0; i < realvec_t::size; ++i) { xwant.set_elt(i, mval & (1 << i) ? p[i] : xold[i]); } const boolvec_t isbad = x != xwant; if (any(isbad)) { ++num_errors; cout << setprecision(realvec_t::digits10 + 2) << "Error in " << func << ":\n" << " found=" << x << " [" << hex(x) << "]\n" << " expected=" << xwant << " [" << hex(xwant) << "]\n" << " mval=" << mval << " [" << hex(mval) << "]\n" << " isbad=" << isbad << "\n" << flush; } } // Check store memory access static void check_mem(const char *const func, const real_t *const p, const realvec_t x, const real_t *const pold, const int mval) { realvec_t pv, pvwant; for (int i = 0; i < realvec_t::size; ++i) { pv.set_elt(i, p[i]); pvwant.set_elt(i, mval & (1 << i) ? x[i] : pold[i]); } const boolvec_t isbad = pv != pvwant; if (any(isbad)) { ++num_errors; cout << setprecision(realvec_t::digits10 + 2) << "Error in " << func << ":\n" << " found=" << pv << " [" << hex(pv) << "]\n" << " expected=" << pvwant << " [" << hex(pvwant) << "]\n" << " isbad=" << isbad << "\n" << flush; } } static void check_bool(const char *const func, const bool rstd, const bool rvml) { const bool dr = rstd ^ rvml; const bool isbad = dr; if (isbad) { ++num_errors; cout << "Error in " << func << ":\n" << " fstd()=" << rstd << " [" << hex(rstd) << "]\n" << " fvml()=" << rvml << " [" << hex(rvml) << "]\n" << " isbad()=" << isbad << "\n" << flush; } } template static void check_bool(const char *const func, const bool rstd, const bool rvml, const A x) { const bool dr = rstd ^ rvml; const bool isbad = dr; if (isbad) { ++num_errors; cout << "Error in " << func << ":\n" << " x=" << x << " [" << hex(x) << "]\n" << " fstd(x)=" << rstd << " [" << hex(rstd) << "]\n" << " fvml(x)=" << rvml << " [" << hex(rvml) << "]\n" << " isbad(x)=" << isbad << "\n" << flush; } } template static void check_bool(const char *const func, const boolvec_t rstd, const boolvec_t rvml, const A x) { boolvec_t dr; bool isbad = false; for (int i = 0; i < realvec_t::size; ++i) { dr.set_elt(i, rstd[i] ^ rvml[i]); isbad |= dr[i]; } if (isbad) { ++num_errors; cout << "Error in " << func << ":\n" << " x=" << x << " [" << hex(x) << "]\n" << " fstd(x)=" << rstd << " [" << hex(rstd) << "]\n" << " fvml(x)=" << rvml << " [" << hex(rvml) << "]\n" << " error(x)=" << dr << " [" << hex(rvml) << "]\n" << " isbad(x)=" << isbad << "\n" << flush; } } template static void check_bool(const char *const func, const boolvec_t rstd, const boolvec_t rvml, const A x, const B y) { boolvec_t dr; bool isbad = false; for (int i = 0; i < realvec_t::size; ++i) { dr.set_elt(i, rstd[i] ^ rvml[i]); isbad |= dr[i]; } if (isbad) { ++num_errors; cout << "Error in " << func << ":\n" << " x=" << x << " [" << hex(x) << "]\n" << " y=" << y << " [" << hex(y) << "]\n" << " fstd(x,y)=" << rstd << " [" << hex(rstd) << "]\n" << " fvml(x,y)=" << rvml << " [" << hex(rvml) << "]\n" << " error(x,y)=" << dr << " [" << hex(rvml) << "]\n" << " isbad(x,y)=" << isbad << "\n" << flush; } } template static void check_bool(const char *const func, bool fstd(typename A::scalar_t x), boolvec_t fvml(A x), const A x) { boolvec_t rstd; for (int i = 0; i < boolvec_t::size; ++i) { rstd.set_elt(i, fstd(x[i])); } const boolvec_t rvml = fvml(x); const boolvec_t dr = rstd != rvml; const boolvec_t isbad = supported(x) && supported(rstd) && dr; if (any(isbad)) { ++num_errors; cout << setprecision(realvec_t::digits10 + 2) << "Error in " << func << ":\n" << " x=" << x << " [" << hex(x) << "]\n" << " fstd(x)=" << rstd << " [" << hex(rstd) << "]\n" << " fvml(x)=" << rvml << " [" << hex(rvml) << "]\n" << " error(x)=" << dr << " [" << hex(dr) << "]\n" << " isbad(x)=" << isbad << "\n" << flush; } } template static void check_bool(const char *const func, bool fstd(typename A::scalar_t x, typename B::scalar_t y), boolvec_t fvml(A x, B y), const A x, const B y) { boolvec_t rstd; for (int i = 0; i < boolvec_t::size; ++i) { rstd.set_elt(i, fstd(x[i], y[i])); } const boolvec_t rvml = fvml(x, y); const boolvec_t dr = rstd != rvml; const boolvec_t isbad = supported(x) && supported(rstd) && dr; if (any(isbad)) { ++num_errors; cout << setprecision(realvec_t::digits10 + 2) << "Error in " << func << ":\n" << " x=" << x << " [" << hex(x) << "]\n" << " y=" << y << " [" << hex(y) << "]\n" << " fstd(x,y)=" << rstd << " [" << hex(rstd) << "]\n" << " fvml(x,y)=" << rvml << " [" << hex(rvml) << "]\n" << " error(x,y)=" << dr << " [" << hex(dr) << "]\n" << " isbad(x,y)=" << isbad << "\n" << flush; } } template static void check_bool(const char *const func, bool fstd(typename A::scalar_t x, typename B::scalar_t y, typename C::scalar_t z), boolvec_t fvml(A x, B y, C z), const A x, const B y, const C z) { boolvec_t rstd; for (int i = 0; i < boolvec_t::size; ++i) { rstd.set_elt(i, fstd(x[i], y[i], z[i])); } const boolvec_t rvml = fvml(x, y, z); const boolvec_t dr = rstd != rvml; const boolvec_t isbad = supported(x) && supported(rstd) && dr; if (any(isbad)) { ++num_errors; cout << setprecision(realvec_t::digits10 + 2) << "Error in " << func << ":\n" << " x=" << x << " [" << hex(x) << "]\n" << " y=" << y << " [" << hex(y) << "]\n" << " z=" << z << " [" << hex(z) << "]\n" << " fstd(x,y,z)=" << rstd << " [" << hex(rstd) << "]\n" << " fvml(x,y,z)=" << rvml << " [" << hex(rvml) << "]\n" << " error(x,y,z)=" << dr << " [" << hex(dr) << "]\n" << " isbad(x,y,z)=" << isbad << "\n" << flush; } } static void check_int(const char *const func, const int_t rstd, const int_t rvml) { const int_t dr = rstd - rvml; const bool isbad = dr; if (isbad) { ++num_errors; cout << setprecision(realvec_t::digits10 + 2) << "Error in " << func << ":\n" << " fstd()=" << rstd << " [" << hex(rstd) << "]\n" << " fvml()=" << rvml << " [" << hex(rvml) << "]\n" << " error()=" << dr << " [" << hex(dr) << "]\n" << " isbad()=" << isbad << "\n" << flush; } } template static void check_int(const char *const func, int_t fstd(typename A::scalar_t x), intvec_t fvml(A x), const A x) { intvec_t rstd; for (int i = 0; i < intvec_t::size; ++i) { rstd.set_elt(i, fstd(x[i])); } const intvec_t rvml = fvml(x); const intvec_t dr = rstd - rvml; const boolvec_t isbad = supported(x) && supported(rstd) && convert_bool(dr); if (any(isbad)) { ++num_errors; cout << setprecision(realvec_t::digits10 + 2) << "Error in " << func << ":\n" << " x=" << x << " [" << hex(x) << "]\n" << " fstd(x)=" << rstd << " [" << hex(rstd) << "]\n" << " fvml(x)=" << rvml << " [" << hex(rvml) << "]\n" << " error(x)=" << dr << " [" << hex(dr) << "]\n" << " isbad(x)=" << isbad << "\n" << flush; } } template static void check_int(const char *const func, int_t fstd(typename A::scalar_t x, B y), intvec_t fvml(A x, B y), const A x, const B y) { intvec_t rstd; for (int i = 0; i < intvec_t::size; ++i) { rstd.set_elt(i, fstd(x[i], y)); } const intvec_t rvml = fvml(x, y); const intvec_t dr = rstd - rvml; const boolvec_t isbad = supported(x) && supported(rstd) && convert_bool(dr); if (any(isbad)) { ++num_errors; cout << setprecision(realvec_t::digits10 + 2) << "Error in " << func << ":\n" << " x=" << x << " [" << hex(x) << "]\n" << " y=" << y << " [" << hex(y) << "]\n" << " fstd(x,y)=" << rstd << " [" << hex(rstd) << "]\n" << " fvml(x,y)=" << rvml << " [" << hex(rvml) << "]\n" << " error(x,y)=" << dr << " [" << hex(dr) << "]\n" << " isbad(x,y)=" << isbad << "\n" << flush; } } template static void check_int(const char *const func, int_t fstd(typename A::scalar_t x, typename B::scalar_t y), intvec_t fvml(A x, B y), const A x, const B y) { intvec_t rstd; for (int i = 0; i < intvec_t::size; ++i) { rstd.set_elt(i, fstd(x[i], y[i])); } const intvec_t rvml = fvml(x, y); const intvec_t dr = rstd - rvml; const boolvec_t isbad = supported(x) && supported(y) && supported(rstd) && convert_bool(dr); if (any(isbad)) { ++num_errors; cout << setprecision(realvec_t::digits10 + 2) << "Error in " << func << ":\n" << " x=" << x << " [" << hex(x) << "]\n" << " y=" << y << " [" << hex(y) << "]\n" << " fstd(x,y)=" << rstd << " [" << hex(rstd) << "]\n" << " fvml(x,y)=" << rvml << " [" << hex(rvml) << "]\n" << " error(x,y)=" << dr << " [" << hex(dr) << "]\n" << " isbad(x,y)=" << isbad << "\n" << flush; } } template static void check_int(const char *const func, int_t fstd(typename A::scalar_t x, typename B::scalar_t y, typename C::scalar_t z), intvec_t fvml(A x, B y, C z), const A x, const B y, const C z) { intvec_t rstd; for (int i = 0; i < intvec_t::size; ++i) { rstd.set_elt(i, fstd(x[i], y[i], z[i])); } const intvec_t rvml = fvml(x, y, z); const intvec_t dr = rstd - rvml; const boolvec_t isbad = supported(x) && supported(y) && supported(z) && supported(rstd) && convert_bool(dr); if (any(isbad)) { ++num_errors; cout << setprecision(realvec_t::digits10 + 2) << "Error in " << func << ":\n" << " x=" << x << " [" << hex(x) << "]\n" << " y=" << y << " [" << hex(y) << "]\n" << " z=" << z << " [" << hex(z) << "]\n" << " fstd(x,y,z)=" << rstd << " [" << hex(rstd) << "]\n" << " fvml(x,y,z)=" << rvml << " [" << hex(rvml) << "]\n" << " error(x,y,z)=" << dr << " [" << hex(dr) << "]\n" << " isbad(x,y,z)=" << isbad << "\n" << flush; } } static void check_real(const char *const func, const real_t rstd, const real_t rvml) { const real_t dr = rstd - rvml; const bool isbad = dr != R(0.0); if (isbad) { ++num_errors; cout << setprecision(realvec_t::digits10 + 2) << "Error in " << func << "():\n" << " fstd()=" << rstd << " [" << hex(rstd) << "]\n" << " fvml()=" << rvml << " [" << hex(rvml) << "]\n" << " error()=" << dr << "\n" << " isbad()=" << isbad << "\n" << flush; } } template static void check_real(const char *const func, const real_t rstd, const real_t rvml, const A x, const real_t accuracy) { const real_t dr = rstd - rvml; real_t maxabs = 0.0; for (int i = 0; i < realvec_t::size; ++i) { maxabs = vml_std::fmax(maxabs, vml_std::fabs(x[i])); } const real_t scale = fabs(rstd) + fabs(rvml) + fabs(maxabs) + R(1.0); const bool isbad = fabs(dr) > accuracy * scale; if (isbad) { ++num_errors; cout << setprecision(realvec_t::digits10 + 2) << "Error in " << func << "():\n" << " x=" << x << " [" << hex(x) << "]\n" << " fstd(x)=" << rstd << " [" << hex(rstd) << "]\n" << " fvml(x)=" << rvml << " [" << hex(rvml) << "]\n" << " error(x)=" << dr << "\n" << " isbad(x)=" << isbad << "\n" << flush; } } template static void check_real(const char *const func, real_t fstd(typename A::scalar_t x), realvec_t fvml(A x), const A x, const real_t accuracy) { realvec_t rstd; for (int i = 0; i < realvec_t::size; ++i) { rstd.set_elt(i, fstd(x[i])); } const realvec_t rvml = fvml(x); const realvec_t dr = rstd - rvml; const realvec_t scale = fabs(rstd) + fabs(rvml) + realvec_t(1.0); const boolvec_t isbad = supported(x) && supported(rstd) && fabs(dr) > realvec_t(accuracy) * scale; if (any(isbad)) { ++num_errors; cout << setprecision(realvec_t::digits10 + 2) << "Error in " << func << ":\n" << " x=" << x << " [" << hex(x) << "]\n" << " fstd(x)=" << rstd << " [" << hex(rstd) << "]\n" << " fvml(x)=" << rvml << " [" << hex(rvml) << "]\n" << " abs-error(x)=" << fabs(dr) << "\n" << " rel-error(x)=" << fabs(dr) / scale << "\n" << " isbad(x)=" << isbad << "\n" << " accuracy=" << accuracy << "\n" << flush; } } template static void check_real(const char *const func, real_t fstd(typename A::scalar_t x, B y), realvec_t fvml(A x, B y), const A x, const B y, const real_t accuracy) { realvec_t rstd; for (int i = 0; i < realvec_t::size; ++i) { rstd.set_elt(i, fstd(x[i], y)); } const realvec_t rvml = fvml(x, y); const realvec_t dr = rstd - rvml; const realvec_t scale = fabs(rstd) + fabs(rvml) + realvec_t(1.0); const boolvec_t isbad = supported(x) && supported(rstd) && fabs(dr) > realvec_t(accuracy) * scale; if (any(isbad)) { ++num_errors; cout << setprecision(realvec_t::digits10 + 2) << "Error in " << func << ":\n" << " x=" << x << " [" << hex(x) << "]\n" << " y=" << y << " [" << hex(y) << "]\n" << " fstd(x,y)=" << rstd << " [" << hex(rstd) << "]\n" << " fvml(x,y)=" << rvml << " [" << hex(rvml) << "]\n" << " abs-error(x,y)=" << fabs(dr) << "\n" << " rel-error(x,y)=" << fabs(dr) / scale << "\n" << " isbad(x,y)=" << isbad << "\n" << " accuracy=" << accuracy << "\n" << flush; } } template static void check_real(const char *const func, real_t fstd(typename A::scalar_t x, typename B::scalar_t y), realvec_t fvml(A x, B y), const A x, const B y, const real_t accuracy, const realvec_t offset = RV(0.0)) { realvec_t rstd; for (int i = 0; i < realvec_t::size; ++i) { rstd.set_elt(i, fstd(x[i], y[i])); } realvec_t rvml = fvml(x, y); // Fix up rvml by adding/subtracting the offset rvml = ifthen(fabs(rstd - rvml) > fabs(offset / RV(2.0)), rvml + copysign(offset, rstd - rvml), rvml); const realvec_t dr = rstd - rvml; const realvec_t scale = fabs(rstd) + fabs(rvml) + realvec_t(1.0); const boolvec_t isbad = supported(x) && supported(y) && supported(rstd) && fabs(dr) > realvec_t(accuracy) * scale; if (any(isbad)) { ++num_errors; cout << setprecision(realvec_t::digits10 + 2) << "Error in " << func << ":\n" << " x=" << x << " [" << hex(x) << "]\n" << " y=" << y << " [" << hex(y) << "]\n" << " fstd(x,y)=" << rstd << " [" << hex(rstd) << "]\n" << " fvml(x,y)=" << rvml << " [" << hex(rvml) << "]\n" << " abs-error(x,y)=" << fabs(dr) << "\n" << " rel-error(x,y)=" << fabs(dr) / scale << "\n" << " isbad(x,y)=" << isbad << "\n" << " accuracy=" << accuracy << "\n" << flush; } } template static void check_real(const char *const func, real_t fstd(typename A::scalar_t x, typename B::scalar_t y, typename C::scalar_t z), realvec_t fvml(A x, B y, C z), const A x, const B y, C const z, const real_t accuracy) { realvec_t rstd; for (int i = 0; i < realvec_t::size; ++i) { rstd.set_elt(i, fstd(x[i], y[i], z[i])); } const realvec_t rvml = fvml(x, y, z); const realvec_t dr = rstd - rvml; const realvec_t scale = fabs(rstd) + fabs(rvml) + realvec_t(1.0); const boolvec_t isbad = supported(x) && supported(y) && supported(z) && supported(rstd) && fabs(dr) > realvec_t(accuracy) * scale; if (any(isbad)) { ++num_errors; cout << setprecision(realvec_t::digits10 + 2) << "Error in " << func << ":\n" << " x=" << x << " [" << hex(x) << "]\n" << " y=" << y << " [" << hex(y) << "]\n" << " z=" << z << " [" << hex(z) << "]\n" << " fstd(x,y,z)=" << rstd << " [" << hex(rstd) << "]\n" << " fvml(x,y,z)=" << rvml << " [" << hex(rvml) << "]\n" << " abs-error(x,y,z)=" << fabs(dr) << "\n" << " rel-error(x,y,z)=" << fabs(dr) / scale << "\n" << " isbad(x,y,z)=" << isbad << "\n" << " accuracy=" << accuracy << "\n" << flush; } } static real_t *align_mem(real_t *p) { const ptrdiff_t alignment = sizeof(realvec_t); p = (real_t *)((intptr_t(p) + alignment - 1) & -alignment); assert(intptr_t(p) % alignment == 0); return p; } static string add_suffix(const char *str, int i) { ostringstream buf; buf << str << "." << i; return buf.str(); } static void test_mem() { cout << " testing loada loadu storea storeu (errors may lead to " "segfaults)...\n" << flush; const int n = 4; const int sz = realvec_t::size; const int nbytes = n * sz * sizeof(real_t); real_t *const x = align_mem(new real_t[(n + 1) * sz]); real_t *const xnew = align_mem(new real_t[(n + 1) * sz]); for (int i = 0; i < n; ++i) { realvec_t xv = random(R(-10.0), R(+10.0)); memcpy(&x[i * sz], &xv, sizeof xv); } const realvec_t z = random(R(-10.0), R(+10.0)); // loada { const real_t *p = &x[sz]; realvec_t y = realvec_t::loada(p); check_mem("loada", y, p, z, ~0); } // loadu for (ptrdiff_t i = 0; i < realvec_t::size; ++i) { const real_t *p = &x[sz]; realvec_t y = realvec_t::loadu(p + i); check_mem(add_suffix("loadu", i).c_str(), y, p + i, z, ~0); } // loadu(ioff) for (ptrdiff_t ioff = 0; ioff < realvec_t::size; ++ioff) { const real_t *p = &x[sz]; realvec_t y = realvec_t::loadu(p, ioff); check_mem(add_suffix("loadu(ioff)", ioff).c_str(), y, p + ioff, z, ~0); } // storea { memcpy(xnew, x, nbytes); real_t *p = &xnew[sz]; storea(z, p); check_mem("storea", p, z, &x[sz], ~0); } // storeu for (ptrdiff_t i = 0; i < realvec_t::size; ++i) { memcpy(xnew, x, nbytes); real_t *p = &xnew[sz]; storeu(z, p + i); check_mem(add_suffix("storeu", i).c_str(), p + i, z, &x[sz + i], ~0); } // storeu for (ptrdiff_t ioff = 0; ioff < realvec_t::size; ++ioff) { memcpy(xnew, x, nbytes); real_t *p = &xnew[sz]; storeu(z, p, ioff); check_mem(add_suffix("storeu(ioff)", ioff).c_str(), p + ioff, z, &x[sz + ioff], ~0); } for (int mval = 0; mval < (1 << realvec_t::size); ++mval) { boolvec_t mbool; for (int i = 0; i < realvec_t::size; ++i) mbool.set_elt(i, mval & (1 << i)); typename realvec_t::mask_t mask(mbool); // loada(mask) { const real_t *p = &x[sz]; realvec_t y = loada(p, z, mask); check_mem("loada(mask)", y, p, z, mval); } // loadu(mask) for (ptrdiff_t i = 0; i < realvec_t::size; ++i) { const real_t *p = &x[sz]; realvec_t y = loadu(p + i, z, mask); check_mem("loadu(mask)", y, p + i, z, mval); } // loadu(ioff, mask) for (ptrdiff_t ioff = 0; ioff < realvec_t::size; ++ioff) { const real_t *p = &x[sz]; realvec_t y = loadu(p, ioff, z, mask); check_mem("loadu(ioff,mask)", y, p + ioff, z, mval); } // storea { memcpy(xnew, x, nbytes); real_t *p = &xnew[sz]; storea(z, p, mask); check_mem("storea(mask)", p, z, &x[sz], mval); } // storeu for (ptrdiff_t i = 0; i < realvec_t::size; ++i) { memcpy(xnew, x, nbytes); real_t *p = &xnew[sz]; storeu(z, p + i, mask); check_mem("storeu(mask)", p + i, z, &x[sz + i], mval); } // storeu for (ptrdiff_t ioff = 0; ioff < realvec_t::size; ++ioff) { memcpy(xnew, x, nbytes); real_t *p = &xnew[sz]; storeu(z, p, ioff, mask); check_mem("storeu(ioff,mask)", p + ioff, z, &x[sz + ioff], mval); } } // for mval } template static T local_ifthen(bool b, T x, T y) { return b ? x : y; } static void test_bool() { cout << " testing boolean operations...\n" << flush; const boolvec_t bf = boolvec_t(false); const boolvec_t bt = boolvec_t(true); for (int i = 0; i < realvec_t::size; ++i) { check_bool("false", false, bf[i]); check_bool("true", true, bt[i]); } check_bool("all", false, all(bf), false); check_bool("all", true, all(bt), true); check_bool("any", false, any(bf), false); check_bool("any", true, any(bt), true); boolvec_t b0 = bt; boolvec_t b1 = bf; for (int n = 0; n < realvec_t::size; ++n) { b0.set_elt(n, false); b1.set_elt(n, true); for (int i = 0; i < realvec_t::size; ++i) { check_bool("set_elt", i <= n ? false : true, b0[i], false); check_bool("set_elt", i <= n ? true : false, b1[i], true); } } for (int n = 0; n < (1 << realvec_t::size); ++n) { boolvec_t x; for (int i = 0; i < realvec_t::size; ++i) { x.set_elt(i, n & (1 << i)); } for (int i = 0; i < realvec_t::size; ++i) { bool rstd = n & (1 << i); bool rvml = x[i]; check_bool("[]", rstd, rvml, x); } { boolvec_t rstd; for (int i = 0; i < realvec_t::size; ++i) { rstd.set_elt(i, !x[i]); } boolvec_t rvml = !x; check_bool("!", rstd, rvml, x); } { bool rstd = x[0]; for (int i = 1; i < realvec_t::size; ++i) { rstd &= x[i]; } bool rvml = all(x); check_bool("all", rstd, rvml, x); } { bool rstd = x[0]; for (int i = 1; i < realvec_t::size; ++i) { rstd |= x[i]; } bool rvml = any(x); check_bool("any", rstd, rvml, x); } check_bool( "ifthen(bool)", local_ifthen, (boolvec_t (*)(boolvec_t, boolvec_t, boolvec_t))vecmathlib::ifthen, x, BV(false), BV(true)); check_int("ifthen(int)", local_ifthen, (intvec_t (*)(boolvec_t, intvec_t, intvec_t))vecmathlib::ifthen, x, IV(I(1)), IV(I(2))); check_real( "ifthen(real)", local_ifthen, ((realvec_t (*)(boolvec_t, realvec_t, realvec_t))vecmathlib::ifthen), x, RV(1.0), RV(2.0), R(0.0)); } for (int n = 0; n < (1 << realvec_t::size); ++n) { for (int m = 0; m < (1 << realvec_t::size); ++m) { boolvec_t x, y; for (int i = 0; i < realvec_t::size; ++i) { x.set_elt(i, n & (1 << i)); y.set_elt(i, m & (1 << i)); } { boolvec_t rstd; for (int i = 0; i < realvec_t::size; ++i) { rstd.set_elt(i, x[i] && y[i]); } boolvec_t rvml = x && y; check_bool("&&", rstd, rvml, x, y); } { boolvec_t rstd; for (int i = 0; i < realvec_t::size; ++i) { rstd.set_elt(i, x[i] || y[i]); } boolvec_t rvml = x || y; check_bool("||", rstd, rvml, x, y); } { boolvec_t rstd; for (int i = 0; i < realvec_t::size; ++i) { rstd.set_elt(i, x[i] == y[i]); } boolvec_t rvml = x == y; check_bool("==", rstd, rvml, x, y); } { boolvec_t rstd; for (int i = 0; i < realvec_t::size; ++i) { rstd.set_elt(i, x[i] != y[i]); } boolvec_t rvml = x != y; check_bool("!=", rstd, rvml, x, y); } } } } static bool local_convert_bool(int_t x) { return x; } static int_t local_convert_int(bool x) { return x; } template static T local_pos(T x) { return +x; } template static T local_neg(T x) { return -x; } template static T local_not(T x) { return ~x; } template static T local_add(T x, T y) { return x + y; } template static T local_sub(T x, T y) { return x - y; } template static T local_mul(T x, T y) { return x * y; } template static T local_div(T x, T y) { return x / y; } template static T local_mod(T x, T y) { return x % y; } template static T local_and(T x, T y) { return x & y; } template static T local_or(T x, T y) { return x | y; } template static T local_xor(T x, T y) { return x ^ y; } static int_t local_lsr(int_t x, int_t y) { return uint_t(x) >> uint_t(y); } template static T local_srs(T x, typename T::scalar_t y) { return x >> y; } template static T local_sls(T x, typename T::scalar_t y) { return x << y; } template static T local_sr(T x, T y) { return x >> y; } template static T local_sl(T x, T y) { return x << y; } template static bool local_isignbit(T x) { return x < 0; } template static bool local_eq(T x, T y) { return x == y; } template static bool local_ne(T x, T y) { return x != y; } template static bool local_lt(T x, T y) { return x < y; } template static bool local_le(T x, T y) { return x <= y; } template static bool local_gt(T x, T y) { return x > y; } template static bool local_ge(T x, T y) { return x >= y; } template static boolvec_t local_veq(T x, T y) { return x == y; } template static boolvec_t local_vne(T x, T y) { return x != y; } template static boolvec_t local_vlt(T x, T y) { return x < y; } template static boolvec_t local_vle(T x, T y) { return x <= y; } template static boolvec_t local_vgt(T x, T y) { return x > y; } template static boolvec_t local_vge(T x, T y) { return x >= y; } static void test_int() { cout << " testing integer operations...\n" << flush; intvec_t i0 = intvec_t(I(0)); intvec_t i1 = intvec_t(I(1)); intvec_t iiota = intvec_t::iota(); for (int i = 0; i < realvec_t::size; ++i) { check_int("0", 0, i0[i]); check_int("1", 1, i1[i]); check_int("iota", i, iiota[i]); } i0 = intvec_t(I(1)); i1 = intvec_t(I(0)); for (int n = 0; n < realvec_t::size; ++n) { i0.set_elt(n, 0); i1.set_elt(n, 1); for (int i = 0; i < realvec_t::size; ++i) { check_bool("set_elt", i <= n ? 0 : 1, i0[i], 0); check_bool("set_elt", i <= n ? 1 : 0, i1[i], 1); } } const int_t int_min = std::numeric_limits::min(); const int_t int_max = std::numeric_limits::max(); const int_t values[] = { 0, 1, 2, 3, -1, -2, -3, int_min, int_min + 1, int_min + 2, int_min + 3, int_max, int_max - 1, int_max - 2, int_max - 3, }; const int nvalues = sizeof values / sizeof *values; for (int i = 0; i < nvalues * nvalues + 2 * imax; ++i) { intvec_t x, y; if (i < nvalues * nvalues) { x = values[i % nvalues]; y = values[i / nvalues]; } else if (i < nvalues * nvalues + imax) { x = random(I(-100), I(+100)); y = random(I(-100), I(+100)); } else { x = random(int_min / 2, int_max / 2); y = random(int_min / 2, int_max / 2); } boolvec_t b = convert_bool(random(I(0), I(1))); check_bool("convert_bool(int)", local_convert_bool, vecmathlib::convert_bool, x); check_int("convert_int(bool)", local_convert_int, vecmathlib::convert_int, b); check_int("+", local_pos, local_pos, x); check_int("-", local_neg, local_neg, x); check_int("~", local_not, local_not, x); check_int("+", local_add, local_add, x, y); check_int("-", local_sub, local_sub, x, y); check_int("&", local_and, local_and, x, y); check_int("|", local_or, local_or, x, y); check_int("^", local_xor, local_xor, x, y); const int_t bits = 8 * sizeof(int_t); check_int("lsr", local_lsr, vecmathlib::lsr, x, y[0] & (bits - 1)); check_int(">>", local_sr, local_srs, x, y[0] & (bits - 1)); check_int("<<", local_sl, local_sls, x, y[0] & (bits - 1)); check_int("lsr", local_lsr, vecmathlib::lsr, x, y & IV(bits - 1)); check_int(">>", local_sr, local_sr, x, y & IV(bits - 1)); check_int("<<", local_sl, local_sl, x, y & IV(bits - 1)); check_bool("isignbit", local_isignbit, vecmathlib::isignbit, x); check_bool("==", local_eq, local_veq, x, y); check_bool("!=", local_ne, local_vne, x, y); check_bool("<", local_lt, local_vlt, x, y); check_bool("<=", local_le, local_vle, x, y); check_bool(">", local_gt, local_vgt, x, y); check_bool(">=", local_ge, local_vge, x, y); } } static void test_real() { cout << " testing real operations...\n" << flush; realvec_t r0 = realvec_t(0.0); realvec_t r1 = realvec_t(1.0); for (int i = 0; i < realvec_t::size; ++i) { check_real("0.0", R(0.0), r0[i]); check_real("1.0", R(1.0), r1[i]); } r0 = realvec_t(1.0); r1 = realvec_t(0.0); for (int n = 0; n < realvec_t::size; ++n) { r0.set_elt(n, R(0.0)); r1.set_elt(n, R(1.0)); for (int i = 0; i < realvec_t::size; ++i) { check_bool("set_elt", i <= n ? R(0.0) : R(1.0), r0[i], R(0.0)); check_bool("set_elt", i <= n ? R(1.0) : R(0.0), r1[i], R(1.0)); } } // barrier realvec_t rcancel = r1; rcancel += RV(R(FP::max() / 2)); rcancel.barrier(); rcancel -= RV(R(FP::max() / 2)); check_real("barrier", R(0.0), rcancel[0]); // rounding (break ties to even, or break ties away from zero?) realvec_t rbase = RV(R(1.0)); rbase += RV(FP::epsilon() / 2); check_real("flt_rounds", R(1.0), rbase[0]); rbase = RV(R(1.0) + FP::epsilon()); rbase += RV(FP::epsilon() / 2); check_real("flt_rounds", R(1.0) + 2 * FP::epsilon(), rbase[0]); } static int_t local_bitifthen(int_t x, int_t y, int_t z) { return (x & y) | (~x & z); } static int_t local_clz(int_t x) { int bits = CHAR_BIT * sizeof(x); int res = 0; for (; res < bits; ++res) { if (x & (I(1) << (bits - res - 1))) break; } return res; } static int_t local_max(int_t x, int_t y) { return std::max(x, y); } static int_t local_min(int_t x, int_t y) { return std::min(x, y); } static int_t local_popcount(int_t x) { int bits = CHAR_BIT * sizeof(x); int res = 0; for (int d = 0; d < bits; ++d) { if (x & (I(1) << d)) ++res; } return res; } static int_t local_rotate(int_t x, int_t n) { int_t mask = CHAR_BIT * sizeof(int_t) - 1; int_t left = x << (n & mask); int_t right = I(U(x) >> U(-n & mask)); return left | right; } static void test_abs() { cout << " testing abs bitifthen clz isignbit max min popcount rotate...\n" << flush; for (int i = 0; i < imax; ++i) { const intvec_t x = random(I(-1000000), I(+1000000)); const intvec_t y = random(I(-1000000), I(+1000000)); const intvec_t z = random(I(-1000000), I(+1000000)); check_int("abs", std::abs, vecmathlib::abs, x); check_int("bitifthen", local_bitifthen, vecmathlib::bitifthen, x, y, z); check_int("clz", local_clz, vecmathlib::clz, x); check_int("max", local_max, vecmathlib::max, x, y); check_int("min", local_min, vecmathlib::min, x, y); check_int("popcount", local_popcount, vecmathlib::popcount, x); check_int("rotate", local_rotate, vecmathlib::rotate, x, y[0]); check_int("rotate", local_rotate, vecmathlib::rotate, x, y); } } // Change signature: "int" -> "int_t" static real_t local_frexp0(real_t x) { int r; return vml_std::frexp(x, &r); } static int_t local_frexp1(real_t x) { if (vml_std::isinf(x)) return std::numeric_limits::max(); if (vml_std::isnan(x)) return std::numeric_limits::min(); int r; vml_std::frexp(x, &r); return r; } static realvec_t local_vfrexp0(realvec_t x) { intvec_t r; return vecmathlib::frexp(x, &r); } static intvec_t local_vfrexp1(realvec_t x) { intvec_t r; vecmathlib::frexp(x, &r); return r; } static int_t local_ilogb(real_t x) { if (x == R(0.0)) return std::numeric_limits::min(); if (vml_std::isinf(x)) return std::numeric_limits::max(); if (vml_std::isnan(x)) return std::numeric_limits::min(); return vml_std::ilogb(x); } static real_t local_ldexp(real_t x, int_t n) { return ldexp(x, n); } static real_t local_mad(real_t x, real_t y, real_t z) { return x * y + z; } static void test_fabs() { cout << " testing + - + - * == != < <= > >= copysign fabs fdim fma fmax " "fmin frexp ilogb isfinite isinf isnan isnormal ldexp mad " "nextafter signbit...\n" << flush; const real_t eps = FP::epsilon(); const real_t int_min = R(std::numeric_limits::min()); const real_t int_max = R(std::numeric_limits::max()); const real_t uint_min = R(std::numeric_limits::min()); const real_t uint_max = R(std::numeric_limits::max()); const real_t values[] = { R(+0.0), R(+0.1), R(+0.9), R(+1.0), R(+1.1), R(-0.0), R(-0.1), R(-0.9), R(-1.0), R(-1.1), R(+0.0) + eps, R(+0.1) + eps, R(+0.9) + eps, R(+1.0) + eps, R(+1.1) + eps, R(-0.0) + eps, R(-0.1) + eps, R(-0.9) + eps, R(-1.0) + eps, R(-1.1) + eps, R(+0.0) - eps, R(+0.1) - eps, R(+0.9) - eps, R(+1.0) - eps, R(+1.1) - eps, R(-0.0) - eps, R(-0.1) - eps, R(-0.9) - eps, R(-1.0) - eps, R(-1.1) - eps, #ifdef VML_HAVE_DENORMALS +FP::min(), +FP::min() * (R(1.0) + eps), +FP::min() * R(2.0), -FP::min(), -FP::min() * (R(1.0) + eps), -FP::min() * R(2.0), #endif +FP::max(), +FP::max() * (R(1.0) - eps), +FP::max() * (R(1.0) - R(2.0) * eps), -FP::max(), -FP::max() * (R(1.0) - eps), -FP::max() * (R(1.0) - R(2.0) * eps), +R(0.5) * FP::max(), +R(0.5) * FP::max() * (R(1.0) + eps), -R(0.5) * FP::max(), -R(0.5) * FP::max() * (R(1.0) + eps), #ifdef VML_HAVE_INF +R(1.0 / 0.0), // +FP::infinity() -R(1.0 / 0.0), // -FP::infinity() #endif #ifdef VML_HAVE_NAN R(0.0 / 0.0), // FP::quiet_NaN() #endif +int_min, +int_max, +uint_min, +uint_max, -int_min, -int_max, -uint_min, -uint_max, +int_min + R(0.1), +int_max + R(0.1), +uint_min + R(0.1), +uint_max + R(0.1), -int_min + R(0.1), -int_max + R(0.1), -uint_min + R(0.1), -uint_max + R(0.1), +int_min - R(0.1), +int_max - R(0.1), +uint_min - R(0.1), +uint_max - R(0.1), -int_min - R(0.1), -int_max - R(0.1), -uint_min - R(0.1), -uint_max - R(0.1), +int_min + R(1.0), +int_max + R(1.0), +uint_min + R(1.0), +uint_max + R(1.0), -int_min + R(1.0), -int_max + R(1.0), -uint_min + R(1.0), -uint_max + R(1.0), +int_min - R(1.0), +int_max - R(1.0), +uint_min - R(1.0), +uint_max - R(1.0), -int_min - R(1.0), -int_max - R(1.0), -uint_min - R(1.0), -uint_max - R(1.0), -R(443.9999425), }; const int nvalues = sizeof values / sizeof *values; for (int i = 0; i < 8 * nvalues + imax; ++i) { const realvec_t x = i < 8 * nvalues && i & 1 ? RV(values[i / 8]) : random(R(-10.0), R(+10.0)); const realvec_t y = i < 8 * nvalues && i & 2 ? RV(values[i / 8]) : random(R(-10.0), R(+10.0)); const realvec_t z = i < 8 * nvalues && i & 4 ? RV(values[i / 8]) : random(R(-10.0), R(+10.0)); const intvec_t n = random(int_t(-10), int_t(+10)); check_real("+", local_pos, local_pos, x, R(0.0)); check_real("-", local_neg, local_neg, x, R(0.0)); check_real("+", local_add, local_add, x, y, R(0.0)); check_real("-", local_sub, local_sub, x, y, R(0.0)); check_real("*", local_mul, local_mul, x, y, R(0.0)); { real_t rstd = x[0]; for (int i = 1; i < realvec_t::size; ++i) { rstd += x[i]; } real_t rvml = sum(x); check_real("sum", rstd, rvml, x, accuracy()); } { real_t rstd = x[0]; for (int i = 1; i < realvec_t::size; ++i) { rstd *= x[i]; } real_t rvml = prod(x); check_real("prod", rstd, rvml, x, accuracy()); } { real_t rstd = x[0]; for (int i = 1; i < realvec_t::size; ++i) { rstd = vml_std::fmax(rstd, x[i]); } real_t rvml = vecmathlib::maxval(x); check_real("maxval", rstd, rvml, x, R(0.0)); } { real_t rstd = x[0]; for (int i = 1; i < realvec_t::size; ++i) { rstd = vml_std::fmin(rstd, x[i]); } real_t rvml = vecmathlib::minval(x); check_real("minval", rstd, rvml, x, R(0.0)); } check_bool("==", local_eq, local_veq, x, y); check_bool("!=", local_ne, local_vne, x, y); check_bool("<", local_lt, local_vlt, x, y); check_bool("<=", local_le, local_vle, x, y); check_bool(">", local_gt, local_vgt, x, y); check_bool(">=", local_ge, local_vge, x, y); check_real("copysign", vml_std::copysign, vecmathlib::copysign, x, y, 0.0); check_real("fabs", vml_std::fabs, vecmathlib::fabs, x, 0.0); check_real("fdim", vml_std::fdim, vecmathlib::fdim, x, y, accuracy()); check_real("fma", vml_std::fma, vecmathlib::fma, x, y, z, R(10.0) * accuracy()); check_real("fmax", vml_std::fmax, vecmathlib::fmax, x, y, 0.0); check_real("fmin", vml_std::fmin, vecmathlib::fmin, x, y, 0.0); check_real("frexp0", local_frexp0, local_vfrexp0, x, 0.0); check_int("frexp1", local_frexp1, local_vfrexp1, x); check_int("ilogb", local_ilogb, (intvec_t (*)(realvec_t))vecmathlib::ilogb, x); #if defined VML_HAVE_INF || defined VML_HAVE_NAN check_bool("isfinite", vml_std::isfinite, vecmathlib::isfinite, x); #endif #ifdef VML_HAVE_INF check_bool("isinf", vml_std::isinf, vecmathlib::isinf, x); #endif #ifdef VML_HAVE_NAN check_bool("isnan", vml_std::isnan, vecmathlib::isnan, x); #endif #ifdef VML_HAVE_DENORMALS check_bool("isnormal", vml_std::isnormal, vecmathlib::isnormal, x); #endif check_real("ldexp", local_ldexp, vecmathlib::ldexp, x, n[0], 0.0); check_real("ldexp", local_ldexp, vecmathlib::ldexp, x, n, 0.0); check_real("mad", local_mad, vecmathlib::mad, x, y, z, R(10.0) * accuracy()); check_real("nextafter", vml_std::nextafter, vecmathlib::nextafter, x, y, 0.0); check_bool("signbit", vml_std::signbit, vecmathlib::signbit, x); } } static void test_convert() { cout << " testing ceil convert_float convert_int floor rint round " "trunc...\n" << flush; const real_t eps = FP::epsilon(); const real_t int_min = R(std::numeric_limits::min()); const real_t int_max = R(std::numeric_limits::max()); const real_t uint_min = R(std::numeric_limits::min()); const real_t uint_max = R(std::numeric_limits::max()); const real_t mantissa_max = (U(1) << (FP::mantissa_bits + 1)) - U(1); const real_t real_max = (((U(1) << (FP::mantissa_bits + 1)) - U(1)) << (FP::exponent_bits - 1)) + (U(1) << (FP::exponent_bits - 1)) - U(1); const real_t values[] = { R(+0.0), R(+0.1), R(+0.9), R(+1.0), R(+1.1), R(-0.0), R(-0.1), R(-0.9), R(-1.0), R(-1.1), R(+0.0) + eps, R(+0.1) + eps, R(+0.9) + eps, R(+1.0) + eps, R(+1.1) + eps, R(-0.0) + eps, R(-0.1) + eps, R(-0.9) + eps, R(-1.0) + eps, R(-1.1) + eps, R(+0.0) - eps, R(+0.1) - eps, R(+0.9) - eps, R(+1.0) - eps, R(+1.1) - eps, R(-0.0) - eps, R(-0.1) - eps, R(-0.9) - eps, R(-1.0) - eps, R(-1.1) - eps, #ifdef VML_HAVE_DENORMALS +FP::min(), +FP::min() * (R(1.0) + eps), +FP::min() * R(2.0), -FP::min(), -FP::min() * (R(1.0) + eps), -FP::min() * R(2.0), #endif +FP::max(), +FP::max() * (R(1.0) - eps), +FP::max() * (R(1.0) - R(2.0) * eps), -FP::max(), -FP::max() * (R(1.0) - eps), -FP::max() * (R(1.0) - R(2.0) * eps), +R(0.5) * FP::max(), +R(0.5) * FP::max() * (R(1.0) + eps), -R(0.5) * FP::max(), -R(0.5) * FP::max() * (R(1.0) + eps), #ifdef VML_HAVE_INF +R(1.0 / 0.0), // +FP::infinity() -R(1.0 / 0.0), // -FP::infinity() #endif #ifdef VML_HAVE_NAN R(0.0 / 0.0), // FP::quiet_NaN() #endif +int_min, +int_max, +uint_min, +uint_max, -int_min, -int_max, -uint_min, -uint_max, +int_min + R(0.1), +int_max + R(0.1), +uint_min + R(0.1), +uint_max + R(0.1), -int_min + R(0.1), -int_max + R(0.1), -uint_min + R(0.1), -uint_max + R(0.1), +int_min - R(0.1), +int_max - R(0.1), +uint_min - R(0.1), +uint_max - R(0.1), -int_min - R(0.1), -int_max - R(0.1), -uint_min - R(0.1), -uint_max - R(0.1), +int_min + R(1.0), +int_max + R(1.0), +uint_min + R(1.0), +uint_max + R(1.0), -int_min + R(1.0), -int_max + R(1.0), -uint_min + R(1.0), -uint_max + R(1.0), +int_min - R(1.0), +int_max - R(1.0), +uint_min - R(1.0), +uint_max - R(1.0), -int_min - R(1.0), -int_max - R(1.0), -uint_min - R(1.0), -uint_max - R(1.0), +mantissa_max, +mantissa_max - R(1.0), +mantissa_max + R(1.0), -mantissa_max, -mantissa_max - R(1.0), -mantissa_max + R(1.0), +real_max, +real_max - R(1.0), +real_max + R(1.0), -real_max, -real_max - R(1.0), -real_max + R(1.0), -R(443.9999425), }; const int nvalues = sizeof values / sizeof *values; for (int i = 0; i < nvalues + imax; ++i) { const realvec_t x = i < nvalues ? RV(values[i]) : random(R(-1.0e+10), R(+1.0e+10)); const intvec_t n1 = random(int_t(-100), int_t(+100)); // const intvec_t n2 = random(int_t(-1000000000), int_t(+1000000000)); const intvec_t n2 = random(std::numeric_limits::min() / 2, // avoid overflow std::numeric_limits::max() / 2); const realvec_t fn1 = vecmathlib::convert_float(n1); const realvec_t fn2 = vecmathlib::convert_float(n2); const realvec_t fn1h = vecmathlib::convert_float(n1) * RV(0.25); const realvec_t fn2h = vecmathlib::convert_float(n2) * RV(0.25); check_real("convert_float", FP::convert_float, vecmathlib::convert_float, n1, R(0.0)); check_real("convert_float", FP::convert_float, vecmathlib::convert_float, n2, R(0.0)); // Note: RV(int_max) > int_max due to rounding if (all(x >= RV(int_min) && x < RV(int_max))) { check_int("convert_int", FP::convert_int, vecmathlib::convert_int, x); } // TODO: These should all have accuracy R(0.0) instead! check_real("ceil", vml_std::ceil, vecmathlib::ceil, x, accuracy()); check_real("ceil", vml_std::ceil, vecmathlib::ceil, fn1, accuracy()); check_real("ceil", vml_std::ceil, vecmathlib::ceil, fn2, accuracy()); check_real("ceil", vml_std::ceil, vecmathlib::ceil, fn1h, accuracy()); check_real("ceil", vml_std::ceil, vecmathlib::ceil, fn2h, accuracy()); check_real("floor", vml_std::floor, vecmathlib::floor, x, accuracy()); check_real("floor", vml_std::floor, vecmathlib::floor, fn1, accuracy()); check_real("floor", vml_std::floor, vecmathlib::floor, fn2, accuracy()); check_real("floor", vml_std::floor, vecmathlib::floor, fn1h, accuracy()); check_real("floor", vml_std::floor, vecmathlib::floor, fn2h, accuracy()); // check_int("lrint", vml_std::lrint, vecmathlib::rint, x, // accuracy()); // check_int("lrint", vml_std::lrint, vecmathlib::rint, fn1, // accuracy()); // check_int("lrint", vml_std::lrint, vecmathlib::rint, fn2, // accuracy()); // check_int("lrint", vml_std::lrint, vecmathlib::rint, fn1h, // accuracy()); // check_int("lrint", vml_std::lrint, vecmathlib::rint, fn2h, // accuracy()); check_real("rint", vml_std::rint, vecmathlib::rint, x, accuracy()); check_real("rint", vml_std::rint, vecmathlib::rint, fn1, accuracy()); check_real("rint", vml_std::rint, vecmathlib::rint, fn2, accuracy()); check_real("rint", vml_std::rint, vecmathlib::rint, fn1h, accuracy()); check_real("rint", vml_std::rint, vecmathlib::rint, fn2h, accuracy()); check_real("round", vml_std::round, vecmathlib::round, x, accuracy()); check_real("round", vml_std::round, vecmathlib::round, fn1, accuracy()); check_real("round", vml_std::round, vecmathlib::round, fn2, accuracy()); check_real("round", vml_std::round, vecmathlib::round, fn1h, accuracy()); check_real("round", vml_std::round, vecmathlib::round, fn2h, accuracy()); check_real("trunc", vml_std::trunc, vecmathlib::trunc, x, accuracy()); check_real("trunc", vml_std::trunc, vecmathlib::trunc, fn1, accuracy()); check_real("trunc", vml_std::trunc, vecmathlib::trunc, fn2, accuracy()); check_real("trunc", vml_std::trunc, vecmathlib::trunc, fn1h, accuracy()); check_real("trunc", vml_std::trunc, vecmathlib::trunc, fn2h, accuracy()); } } static void test_asin() { cout << " testing asin acos atan atan2...\n" << flush; for (int i = 0; i < imax; ++i) { const realvec_t x = random(R(-1.0), R(+1.0)); check_real("asin", vml_std::asin, vecmathlib::asin, x, accuracy(4)); check_real("acos", vml_std::acos, vecmathlib::acos, x, accuracy(4)); } for (int i = 0; i < imax; ++i) { const realvec_t x = random(R(-100.0), R(+100.0)); const realvec_t y = random(R(-100.0), R(+100.0)); check_real("atan", vml_std::atan, vecmathlib::atan, x, accuracy(5)); check_real("atan2", vml_std::atan2, vecmathlib::atan2, x, y, accuracy(6)); } } static void test_asinh() { cout << " testing asinh acosh atanh...\n" << flush; for (int i = 0; i < imax; ++i) { const realvec_t x = random(R(-1000.0), R(+1000.0)); check_real("asinh", vml_std::asinh, vecmathlib::asinh, x, accuracy(4)); } for (int i = 0; i < imax; ++i) { const realvec_t x = random(R(1.0), R(1000.0)); check_real("acosh", vml_std::acosh, vecmathlib::acosh, x, accuracy(4)); } for (int i = 0; i < imax; ++i) { const realvec_t x = random(R(-1.0), R(+1.0)); check_real("atanh", vml_std::atanh, vecmathlib::atanh, x, accuracy(5)); } } static real_t local_exp10(real_t x) { return pow(R(10.0), x); } static void test_exp() { cout << " testing exp exp10 exp2 expm1...\n" << flush; for (int i = 0; i < imax; ++i) { const realvec_t x = random(R(-100.0), R(+100.0)); check_real("exp", vml_std::exp, vecmathlib::exp, x, accuracy(3)); check_real("exp10", local_exp10, vecmathlib::exp10, x, accuracy(3)); check_real("exp2", vml_std::exp2, vecmathlib::exp2, x, accuracy(3)); check_real("expm1", vml_std::expm1, vecmathlib::expm1, x, accuracy(3)); } } static void test_log() { cout << " testing log log10 log1p log2...\n" << flush; for (int i = 0; i < imax; ++i) { const realvec_t x = random(R(1.0e-10), R(1.0e+10)); check_real("log", vml_std::log, vecmathlib::log, x, accuracy(3)); check_real("log10", vml_std::log10, vecmathlib::log10, x, accuracy(3)); check_real("log1p", vml_std::log1p, vecmathlib::log1p, x, accuracy(2)); check_real("log2", vml_std::log2, vecmathlib::log2, x, accuracy(3)); } } static void test_pow() { cout << " testing pow...\n" << flush; for (int i = 0; i < imax; ++i) { const realvec_t x = random(R(0.001), R(1000.0)); const realvec_t y = random(R(-10.0), R(+10.0)); const realvec_t ya = fabs(y); const intvec_t n = random(I(-10), I(+10)); const realvec_t fn = vecmathlib::convert_float(n); check_real("pow(0,y)", vml_std::pow, vecmathlib::pow, RV(0.0), ya, accuracy(16)); check_real("pow(x,0)", vml_std::pow, vecmathlib::pow, x, RV(0.0), accuracy(16)); // just to check check_real("log(x)", vml_std::log, vecmathlib::log, x, accuracy(3)); check_real("pow(x,y)", vml_std::pow, vecmathlib::pow, x, y, accuracy(16)); check_real("pow(-x,n)", vml_std::pow, vecmathlib::pow, -x, fn, accuracy(16)); } } static real_t local_rcp(real_t x) { return R(1.0) / x; } static void test_rcp() { cout << " testing / fmod rcp remainder...\n" << flush; for (int i = 0; i < imax; ++i) { const realvec_t x = random(R(-100.0), R(+100.0)); const realvec_t y = random(R(-100.0), R(+100.0)); const intvec_t n = random(I(-100), I(+100)); const intvec_t m = random(I(-100), I(+100)); const realvec_t fn = vecmathlib::convert_float(n); const realvec_t fm = vecmathlib::convert_float( m + vecmathlib::convert_int(m == intvec_t(I(0)))); check_real("/", local_div, local_div, x, y, accuracy()); check_real("rcp", local_rcp, vecmathlib::rcp, x, accuracy()); check_real("fmod(x,y)", vml_std::fmod, vecmathlib::fmod, x, y, 2.0 * accuracy(), y); check_real("fmod(x,m)", vml_std::fmod, vecmathlib::fmod, x, fm, 2.0 * accuracy(), fm); check_real("fmod(n,y)", vml_std::fmod, vecmathlib::fmod, fn, y, 2.0 * accuracy(), y); check_real("remainder(x,y)", vml_std::remainder, vecmathlib::remainder, x, y, R(2.0) * accuracy(), y); check_real("remainder(x,m)", vml_std::remainder, vecmathlib::remainder, x, fm, R(2.0) * accuracy(), fm); check_real("remainder(n,y)", vml_std::remainder, vecmathlib::remainder, fn, y, R(2.0) * accuracy(), y); } } static void test_sin() { cout << " testing cos sin tan...\n" << flush; for (int i = 0; i < imax; ++i) { const realvec_t x = random(R(-10.0), R(+10.0)); check_real("sin", vml_std::sin, vecmathlib::sin, x, accuracy(4)); check_real("cos", vml_std::cos, vecmathlib::cos, x, accuracy(4)); } for (int i = 0; i < imax; ++i) { const realvec_t x0 = random(R(-1.55), R(+1.55)); const intvec_t n = random(I(-10), I(+10)); const realvec_t x = x0 + vecmathlib::convert_float(n) * RV(M_PI); // tan loses accuracy near pi/2 // (by definition, not by implementation?) check_real("tan", vml_std::tan, vecmathlib::tan, x, R(20.0) * accuracy(5)); } } static void test_sinh() { cout << " testing cosh sinh tanh...\n" << flush; for (int i = 0; i < imax; ++i) { const realvec_t x = random(R(-10.0), R(+10.0)); check_real("sinh", vml_std::sinh, vecmathlib::sinh, x, accuracy(4)); check_real("cosh", vml_std::cosh, vecmathlib::cosh, x, accuracy(4)); check_real("tanh", vml_std::tanh, vecmathlib::tanh, x, accuracy(5)); } } static real_t local_rsqrt(real_t x) { return R(1.0) / sqrt(x); } static void test_sqrt() { cout << " testing cbrt hypot rsqrt sqrt...\n" << flush; for (int i = 0; i < imax; ++i) { const realvec_t x = random(R(1.0e-3), R(1.0e+3)); const realvec_t y = random(-R(1.0e+3), R(1.0e+3)); const realvec_t z = random(-R(1.0e+3), R(1.0e+3)); check_real("cbrt", vml_std::cbrt, vecmathlib::cbrt, x, accuracy()); check_real("hypot", vml_std::hypot, vecmathlib::hypot, y, z, accuracy()); check_real("rsqrt", local_rsqrt, vecmathlib::rsqrt, x, accuracy()); check_real("sqrt", vml_std::sqrt, vecmathlib::sqrt, x, accuracy()); } } static void test() { cout << "\n" << "Testing math functions for type " << realvec_t::name() << ":\n"; test_bool(); test_int(); test_real(); test_mem(); // Test "basic" functions first test_abs(); test_fabs(); test_convert(); test_rcp(); test_sqrt(); test_exp(); test_log(); test_pow(); test_sin(); test_sinh(); test_asin(); test_asinh(); } }; int main(int argc, char **argv) { using namespace vecmathlib; cout << "Testing math functions:\n" << "[" VECMATHLIB_CONFIGURATION "]\n" << flush; vecmathlib_test >::test(); #ifdef __clang__ vecmathlib_test >::test(); #endif vecmathlib_test >::test(); #ifdef VECMATHLIB_HAVE_VEC_FLOAT_1 vecmathlib_test >::test(); #endif vecmathlib_test >::test(); #ifdef __clang__ vecmathlib_test >::test(); #endif vecmathlib_test >::test(); #ifdef VECMATHLIB_HAVE_VEC_FLOAT_2 vecmathlib_test >::test(); #endif vecmathlib_test >::test(); #ifdef __clang__ vecmathlib_test >::test(); #endif vecmathlib_test >::test(); #ifdef VECMATHLIB_HAVE_VEC_FLOAT_4 vecmathlib_test >::test(); #endif #ifdef VECMATHLIB_HAVE_VEC_FLOAT_8 vecmathlib_test >::test(); #ifdef __clang__ vecmathlib_test >::test(); #endif vecmathlib_test >::test(); vecmathlib_test >::test(); #endif #ifdef VECMATHLIB_HAVE_VEC_FLOAT_16 vecmathlib_test >::test(); #ifdef __clang__ vecmathlib_test >::test(); #endif vecmathlib_test >::test(); vecmathlib_test >::test(); #endif vecmathlib_test >::test(); #ifdef __clang__ vecmathlib_test >::test(); #endif vecmathlib_test >::test(); #ifdef VECMATHLIB_HAVE_VEC_DOUBLE_1 vecmathlib_test >::test(); #endif vecmathlib_test >::test(); #ifdef __clang__ vecmathlib_test >::test(); #endif vecmathlib_test >::test(); #ifdef VECMATHLIB_HAVE_VEC_DOUBLE_2 vecmathlib_test >::test(); #endif #ifdef VECMATHLIB_HAVE_VEC_DOUBLE_4 vecmathlib_test >::test(); #ifdef __clang__ vecmathlib_test >::test(); #endif vecmathlib_test >::test(); vecmathlib_test >::test(); #endif #ifdef VECMATHLIB_HAVE_VEC_DOUBLE_8 vecmathlib_test >::test(); #ifdef __clang__ vecmathlib_test >::test(); #endif vecmathlib_test >::test(); vecmathlib_test >::test(); #endif cout << "\n"; if (num_errors == 0) { cout << "SUCCESS"; } else { cout << "FAILURE"; } cout << ": " << num_errors << " errors found\n" << flush; return num_errors == 0 ? 0 : 1; }