// -*-C++-*- #define VML_NODEBUG #include "vecmathlib.h" #include #include #include #include #include #include #include using namespace std; using namespace vecmathlib; #ifndef __has_builtin #define __has_builtin(x) 0 // Compatibility with non-clang compilers #endif typedef unsigned long long ticks; inline ticks getticks() { #if __has_builtin(__builtin_readcyclecounter) return __builtin_readcyclecounter(); #elif defined __x86_64__ ticks a, d; asm volatile("rdtsc" : "=a"(a), "=d"(d)); return a | (d << 32); #elif defined __powerpc__ unsigned int tbl, tbu, tbu1; do { asm volatile("mftbu %0" : "=r"(tbu)); asm volatile("mftb %0" : "=r"(tbl)); asm volatile("mftbu %0" : "=r"(tbu1)); } while (tbu != tbu1); return ((unsigned long long)tbu << 32) | tbl; #else timeval tv; gettimeofday(&tv, NULL); return 1000000ULL * tv.tv_sec + tv.tv_usec; // timespec ts; // clock_gettime(CLOCK_REALTIME, &ts); // return 1000000000ULL * ts.tv_sec + ts.tv_nsec; #endif } inline double elapsed(ticks t1, ticks t0) { return t1 - t0; } double get_sys_time() { timeval tv; gettimeofday(&tv, NULL); return tv.tv_sec + 1.0e-6 * tv.tv_usec; // timespec ts; // clock_gettime(CLOCK_REALTIME, &ts); // return ts.tv_sec + 1.0e-9 * ts.tv_nsec; } double measure_tick() { ticks const rstart = getticks(); double const wstart = get_sys_time(); while (get_sys_time() - wstart < 0.1) { // do nothing, just wait } ticks const rend = getticks(); double const wend = get_sys_time(); assert(wend - wstart >= 0.09); return (wend - wstart) / elapsed(rend, rstart); } double global_result = 0.0; template void save_result(realvec_t result) { for (int i = 0; i < realvec_t::size; ++i) { global_result += result[i]; } // Check global accumulator to prevent optimisation if (!vml_std::isfinite(global_result)) { cout << "\n" << "WARNING: Global accumulator is not finite\n"; } } template inline T nop(T x) { return x; } template inline T fneg(T x) { return -x; } template inline T fadd(T x, T y) { return x + y; } template inline T fsub(T x, T y) { return x - y; } template inline T fmul(T x, T y) { return x * y; } template inline T fdiv(T x, T y) { return x / y; } template inline T frexp0(T x) { typename T::intvec_t ir; return frexp(x, &ir); } template inline typename T::intvec_t frexp1(T x) { typename T::intvec_t ir; frexp(x, &ir); return ir; } template inline T ldexps(T x, T y) { typename T::intvec_t iy = convert_int(y); return ldexp(x, iy[0]); } template inline T ldexpv(T x, T y) { typename T::intvec_t iy = convert_int(y); return ldexp(x, iy); } #define DECLARE_FUNCTOR(FUNC, XMIN, XMAX) \ template struct functor_##FUNC { \ static typename T::real_t get_xmin() { return XMIN; } \ static typename T::real_t get_xmax() { return XMAX; } \ static const char *name() { return #FUNC; } \ T operator()(T x) { return FUNC(x); } \ } #define DECLARE_BFUNCTOR(FUNC, XMIN, XMAX) \ template struct functor_##FUNC { \ static typename T::real_t get_xmin() { return XMIN; } \ static typename T::real_t get_xmax() { return XMAX; } \ static const char *name() { return #FUNC; } \ T operator()(T x) { \ typename T::boolvec_t res = FUNC(x); \ return convert_float(convert_int(res)); \ } \ } #define DECLARE_IFUNCTOR(FUNC, XMIN, XMAX) \ template struct functor_##FUNC { \ static typename T::real_t get_xmin() { return XMIN; } \ static typename T::real_t get_xmax() { return XMAX; } \ static const char *name() { return #FUNC; } \ T operator()(T x) { \ typename T::intvec_t res = FUNC(x); \ return convert_float(res); \ } \ } #define DECLARE_FUNCTOR2(FUNC, XMIN, XMAX, YOFFSET) \ template struct functor_##FUNC { \ static typename T::real_t get_xmin() { return XMIN; } \ static typename T::real_t get_xmax() { return XMAX; } \ static const char *name() { return #FUNC; } \ T operator()(T x) { \ const typename T::real_t yoffset = YOFFSET; \ return FUNC(x, x + T(yoffset)); \ } \ } #define DECLARE_FUNCTOR3(FUNC, XMIN, XMAX, YOFFSET, ZOFFSET) \ template struct functor_##FUNC { \ static typename T::real_t get_xmin() { return XMIN; } \ static typename T::real_t get_xmax() { return XMAX; } \ static const char *name() { return #FUNC; } \ T operator()(T x) { \ const typename T::real_t yoffset = YOFFSET; \ const typename T::real_t zoffset = ZOFFSET; \ return FUNC(x, x + T(yoffset), x + T(zoffset)); \ } \ } DECLARE_FUNCTOR(nop, 0.0, 1.0); DECLARE_FUNCTOR(fneg, 0.0, 1.0); DECLARE_FUNCTOR2(fadd, 0.0, 1.0, 2.0); DECLARE_FUNCTOR2(fsub, 0.0, 1.0, 2.0); DECLARE_FUNCTOR2(fmul, 0.0, 1.0, 2.0); DECLARE_FUNCTOR2(fdiv, 0.0, 1.0, 2.0); DECLARE_FUNCTOR(acos, -0.5, +0.5); DECLARE_FUNCTOR(acosh, 1.0, 2.0); DECLARE_FUNCTOR(asin, -0.5, +0.5); DECLARE_FUNCTOR(asinh, -1.0, +1.0); DECLARE_FUNCTOR(atan, -1.0, +1.0); DECLARE_FUNCTOR2(atan2, 0.0, 1.0, 2.0); DECLARE_FUNCTOR(atanh, -0.5, +0.5); DECLARE_FUNCTOR(cbrt, -1.0, 1.0); DECLARE_FUNCTOR(ceil, -1.0, +1.0); DECLARE_FUNCTOR2(copysign, -1.0, +1.0, 2.0); DECLARE_FUNCTOR(cos, 0.0, 1.0); DECLARE_FUNCTOR(cosh, 0.0, 1.0); DECLARE_FUNCTOR(exp, 0.0, 1.0); DECLARE_FUNCTOR(exp10, 0.0, 1.0); DECLARE_FUNCTOR(exp2, 0.0, 1.0); DECLARE_FUNCTOR(expm1, 0.0, 1.0); DECLARE_FUNCTOR(fabs, -1.0, 1.0); DECLARE_FUNCTOR(floor, -1.0, +1.0); DECLARE_FUNCTOR2(fdim, 0.0, 1.0, 2.0); DECLARE_FUNCTOR3(fma, 0.0, 1.0, 2.0, 3.0); DECLARE_FUNCTOR2(fmax, 0.0, 1.0, 2.0); DECLARE_FUNCTOR2(fmin, 0.0, 1.0, 2.0); DECLARE_FUNCTOR2(fmod, 0.0, 1.0, 2.0); DECLARE_FUNCTOR(frexp0, 1.0, 100.0); DECLARE_IFUNCTOR(frexp1, 1.0, 100.0); DECLARE_FUNCTOR2(hypot, 0.0, 1.0, 2.0); DECLARE_IFUNCTOR(ilogb, 1.0, 100.0); DECLARE_BFUNCTOR(isfinite, 0.0, 1.0); DECLARE_BFUNCTOR(isinf, 0.0, 1.0); DECLARE_BFUNCTOR(isnan, 0.0, 1.0); DECLARE_BFUNCTOR(isnormal, 0.0, 1.0); DECLARE_FUNCTOR2(ldexps, 1.0, 20.0, -10.0); DECLARE_FUNCTOR2(ldexpv, 1.0, 20.0, -10.0); DECLARE_FUNCTOR(log, 1.0, 2.0); DECLARE_FUNCTOR(log10, 1.0, 2.0); DECLARE_FUNCTOR(log1p, 0.0, 1.0); DECLARE_FUNCTOR(log2, 1.0, 2.0); DECLARE_FUNCTOR2(nextafter, -1.0, +1.0, 0.0); DECLARE_FUNCTOR2(pow, 0.0, 1.0, 2.0); DECLARE_FUNCTOR(rcp, 1.0, 2.0); DECLARE_FUNCTOR2(remainder, 0.0, 1.0, 2.0); DECLARE_FUNCTOR(rint, -1.0, +1.0); DECLARE_FUNCTOR(round, -1.0, +1.0); DECLARE_FUNCTOR(rsqrt, 1.0, 2.0); DECLARE_BFUNCTOR(signbit, -1.0, +1.0); DECLARE_FUNCTOR(sin, 0.0, 1.0); DECLARE_FUNCTOR(sinh, -1.0, +1.0); DECLARE_FUNCTOR(sqrt, 0.0, 1.0); DECLARE_FUNCTOR(tan, 0.0, 1.0); DECLARE_FUNCTOR(tanh, -1.0, +1.0); DECLARE_FUNCTOR(trunc, -1.0, +1.0); template class func_t> double run_bench() { const int numiters = 1000000; typedef typename realvec_t::real_t real_t; const real_t xmin = func_t::get_xmin(); const real_t xmax = func_t::get_xmax(); realvec_t x0, dx; for (int i = 0; i < realvec_t::size; ++i) { x0.set_elt(i, xmin + (xmax - xmin) / numiters * i / realvec_t::size); dx.set_elt(i, (xmax - xmin) / numiters); } realvec_t x, y; ticks t0, t1; double const cycles_per_tick = 1.0; // measure_tick(); func_t func; t0 = getticks(); x = y = x0; for (int n = 0; n < numiters; ++n) { y += func(x); x += dx; } t1 = getticks(); save_result(y); return cycles_per_tick * elapsed(t1, t0) * realvec_t::size / numiters; } template class func_t> void bench_type_func() { cout << " " << setw(-5) << func_t::name() << " " << setw(18) << realvec_t::name() << ": " << flush; double const cycles = run_bench(); cout << cycles << " cycles\n" << flush; } template