// -*-C++-*- #define VML_NODEBUG #include "vecmathlib.h" #include #include #include #include #include #include #include using namespace std; using namespace vecmathlib; #ifndef __has_builtin # define __has_builtin(x) 0 // Compatibility with non-clang compilers #endif typedef unsigned long long ticks; inline ticks getticks() { #if __has_builtin(__builtin_readcyclecounter) return __builtin_readcyclecounter(); #elif defined __x86_64__ ticks a, d; asm volatile("rdtsc" : "=a" (a), "=d" (d)); return a | (d << 32); #elif defined __powerpc__ unsigned int tbl, tbu, tbu1; do { asm volatile("mftbu %0": "=r"(tbu)); asm volatile("mftb %0": "=r"(tbl)); asm volatile("mftbu %0": "=r"(tbu1)); } while (tbu != tbu1); return ((unsigned long long)tbu << 32) | tbl; #else timeval tv; gettimeofday(&tv, NULL); return 1000000ULL * tv.tv_sec + tv.tv_usec; // timespec ts; // clock_gettime(CLOCK_REALTIME, &ts); // return 1000000000ULL * ts.tv_sec + ts.tv_nsec; #endif } inline double elapsed(ticks t1, ticks t0) { return t1-t0; } double get_sys_time() { timeval tv; gettimeofday(&tv, NULL); return tv.tv_sec + 1.0e-6 * tv.tv_usec; // timespec ts; // clock_gettime(CLOCK_REALTIME, &ts); // return ts.tv_sec + 1.0e-9 * ts.tv_nsec; } double measure_tick() { ticks const rstart = getticks(); double const wstart = get_sys_time(); while (get_sys_time() - wstart < 0.1) { // do nothing, just wait } ticks const rend = getticks(); double const wend = get_sys_time(); assert(wend-wstart >= 0.09); return (wend - wstart) / elapsed(rend, rstart); } double global_result = 0.0; template void save_result(realvec_t result) { for (int i=0; i inline T nop(T x) { return x; } #define DECLARE_FUNCTOR(FUNC, XMIN, XMAX) \ template \ struct functor_##FUNC { \ static typename T::real_t get_xmin() { return XMIN; } \ static typename T::real_t get_xmax() { return XMAX; } \ static const char* name() { return #FUNC; } \ T operator()(T x) { return FUNC(x); } \ } DECLARE_FUNCTOR(nop, 0.0, 1.0); DECLARE_FUNCTOR(acos, -0.5, +0.5); DECLARE_FUNCTOR(acosh, 0.0, 1.0); DECLARE_FUNCTOR(asin, -0.5, +0.5); DECLARE_FUNCTOR(asinh, -1.0, +1.0); DECLARE_FUNCTOR(atan, -1.0, +1.0); // DECLARE_FUNCTOR(atan2, 0.0); DECLARE_FUNCTOR(atanh, -0.5, +0.5); DECLARE_FUNCTOR(cbrt, -1.0, 1.0); DECLARE_FUNCTOR(ceil, -1.0, +1.0); // DECLARE_FUNCTOR(copysign, 0.0); DECLARE_FUNCTOR(cos, 0.0, 1.0); DECLARE_FUNCTOR(cosh, 0.0, 1.0); DECLARE_FUNCTOR(exp, 0.0, 1.0); DECLARE_FUNCTOR(exp10, 0.0, 1.0); DECLARE_FUNCTOR(exp2, 0.0, 1.0); DECLARE_FUNCTOR(expm1, 0.0, 1.0); DECLARE_FUNCTOR(fabs, -1.0, 1.0); DECLARE_FUNCTOR(floor, -1.0, +1.0); // DECLARE_FUNCTOR(fdim, 0.0); // DECLARE_FUNCTOR(fma, 0.0); // DECLARE_FUNCTOR(fmax, 0.0); // DECLARE_FUNCTOR(fmin, 0.0); // DECLARE_FUNCTOR(fmod, 0.0); // DECLARE_FUNCTOR(frexp, 0.0); // DECLARE_FUNCTOR(hypot, 0.0); // DECLARE_FUNCTOR(ilogb, 0.0); // DECLARE_FUNCTOR(isfinite, 0.0); // DECLARE_FUNCTOR(isinf, 0.0); // DECLARE_FUNCTOR(isnan, 0.0); // DECLARE_FUNCTOR(isnormal, 0.0); // DECLARE_FUNCTOR(ldexp, 0.0); // DECLARE_FUNCTOR(ldexp, 0.0); DECLARE_FUNCTOR(log, 1.0, 2.0); DECLARE_FUNCTOR(log10, 1.0, 2.0); DECLARE_FUNCTOR(log1p, 0.0, 1.0); DECLARE_FUNCTOR(log2, 1.0, 2.0); // DECLARE_FUNCTOR(nextafter, 0.0); // DECLARE_FUNCTOR(pow, 0.0); DECLARE_FUNCTOR(rcp, 1.0, 2.0); // DECLARE_FUNCTOR(remainder, 0.0); DECLARE_FUNCTOR(rint, -1.0, +1.0); DECLARE_FUNCTOR(round, -1.0, +1.0); DECLARE_FUNCTOR(rsqrt, 0.0, 1.0); // DECLARE_FUNCTOR(signbit, 0.0); DECLARE_FUNCTOR(sin, 0.0, 1.0); DECLARE_FUNCTOR(sinh, -1.0, +1.0); DECLARE_FUNCTOR(sqrt, 0.0, 1.0); DECLARE_FUNCTOR(tan, 0.0, 1.0); DECLARE_FUNCTOR(tanh, -1.0, +1.0); DECLARE_FUNCTOR(trunc, -1.0, +1.0); template class func_t> double run_bench() { const int numiters = 10000000; typedef typename realvec_t::real_t real_t; const real_t xmin = func_t::get_xmin(); const real_t xmax = func_t::get_xmax(); realvec_t x0, dx; for (int i=0; i func; t0 = getticks(); x = y = x0; for (int n=0; n class func_t> void bench_type_func() { cout << " " << setw(-5) << func_t::name() << " " << setw(18) << realvec_t::name() << ": " << flush; double const cycles = run_bench(); cout << cycles << " cycles\n" << flush; } template class func_t> void bench_func() { cout << "\n" << "Benchmarking " << func_t().name() << ":\n"; // bench_type_func, func_t>(); // // bench_type_func, func_t>(); // bench_type_func, func_t>(); // #ifdef VECMATHLIB_HAVE_VEC_FLOAT_1 // bench_type_func, func_t>(); // #endif // #ifdef VECMATHLIB_HAVE_VEC_FLOAT_2 // bench_type_func, func_t>(); // // bench_type_func, func_t>(); // // bench_type_func, func_t>(); // bench_type_func, func_t>(); // #endif // #ifdef VECMATHLIB_HAVE_VEC_FLOAT_4 // bench_type_func, func_t>(); // // bench_type_func, func_t>(); // // bench_type_func, func_t>(); // bench_type_func, func_t>(); // #endif // #ifdef VECMATHLIB_HAVE_VEC_FLOAT_8 // bench_type_func, func_t>(); // // bench_type_func, func_t>(); // // bench_type_func, func_t>(); // bench_type_func, func_t>(); // #endif // bench_type_func, func_t>(); // // bench_type_func, func_t>(); // bench_type_func, func_t>(); // #ifdef VECMATHLIB_HAVE_VEC_DOUBLE_1 // bench_type_func, func_t>(); // #endif // #ifdef VECMATHLIB_HAVE_VEC_DOUBLE_2 // bench_type_func, func_t>(); // // bench_type_func, func_t>(); // // bench_type_func, func_t>(); // bench_type_func, func_t>(); // #endif #ifdef VECMATHLIB_HAVE_VEC_DOUBLE_4 bench_type_func, func_t>(); // bench_type_func, func_t>(); // bench_type_func, func_t>(); bench_type_func, func_t>(); #endif } void bench() { bench_func(); bench_func(); bench_func(); bench_func(); bench_func(); bench_func(); // bench_func(); bench_func(); bench_func(); bench_func(); // bench_func(); bench_func(); bench_func(); bench_func(); bench_func(); bench_func(); bench_func(); bench_func(); bench_func(); // bench_func(); // bench_func(); // bench_func(); // bench_func(); // bench_func(); // bench_func(); // bench_func(); // bench_func(); // bench_func(); // bench_func(); // bench_func(); // bench_func(); // bench_func(); // bench_func(); bench_func(); bench_func(); bench_func(); bench_func(); // bench_func(); // bench_func(); bench_func(); // bench_func(); bench_func(); bench_func(); bench_func(); // bench_func(); bench_func(); bench_func(); bench_func(); bench_func(); bench_func(); bench_func(); } int main(int argc, char** argv) { using namespace vecmathlib; cout << "Benchmarking math functions:\n"; bench(); // Checking global accumulator to prevent optimisation if (! std::isfinite(global_result)) { cout << "\n" << "WARNING: Global accumulator is not finite\n"; } return 0; }