// -*-C++-*- #define restrict __restrict__ #include "vecmathlib.h" #include #include #include #include #include using namespace std; using namespace vecmathlib; #ifndef __has_builtin # define __has_builtin(x) 0 // Compatibility with non-clang compilers #endif typedef unsigned long long ticks; inline ticks getticks() { #if __has_builtin(__builtin_readcyclecounter) return __builtin_readcyclecounter(); #else # if defined __x86_64__ ticks a, d; asm volatile("rdtsc" : "=a" (a), "=d" (d)); return a | (d << 32); # elif defined __powerpc__ unsigned int tbl, tbu, tbu1; do { asm volatile("mftbu %0": "=r"(tbu)); asm volatile("mftb %0": "=r"(tbl)); asm volatile("mftbu %0": "=r"(tbu1)); } while (tbu != tbu1); return ((unsigned long long)tbu << 32) | tbl; # endif #endif } inline double elapsed(ticks t1, ticks t0) { return t1-t0; } double get_sys_time() { timeval tp; gettimeofday(&tp, NULL); return tp.tv_sec + 1.0e-6 * tp.tv_usec; } double measure_tick() { ticks const rstart = getticks(); double const wstart = get_sys_time(); while (get_sys_time() - wstart < 0.1) { // do nothing, just wait } ticks const rend = getticks(); double const wend = get_sys_time(); assert(wend-wstart >= 0.09); return (wend - wstart) / elapsed(rend, rstart); } template void init(typename realvec_t::real_t *restrict xptr, ptrdiff_t m, ptrdiff_t ldm, ptrdiff_t n) { for (ptrdiff_t j=0; j void smooth_scalar(typename realvec_t::real_t const *restrict xptr, typename realvec_t::real_t *restrict yptr, ptrdiff_t m, ptrdiff_t ldm, ptrdiff_t n) { typedef typename realvec_t::real_t real_t; for (ptrdiff_t j=1; j void smooth_unaligned(typename realvec_t::real_t const *restrict xptr, typename realvec_t::real_t *restrict yptr, ptrdiff_t m, ptrdiff_t ldm, ptrdiff_t n) { typedef typename realvec_t::real_t real_t; typedef typename realvec_t::mask_t mask_t; for (ptrdiff_t j=1; j void smooth_aligned(typename realvec_t::real_t const *restrict xptr, typename realvec_t::real_t *restrict yptr, ptrdiff_t m, ptrdiff_t ldm, ptrdiff_t n) { typedef typename realvec_t::real_t real_t; typedef typename realvec_t::mask_t mask_t; assert(ldm % realvec_t::size == 0); for (ptrdiff_t j=1; j realvec_t; #elif defined VECMATHLIB_HAVE_VEC_DOUBLE_2 typedef realvec realvec_t; #else typedef realpseudovec realvec_t; #endif const ptrdiff_t ldm = align_up(m, realvec_t::size); typedef realvec_t::real_t real_t; vector x(ldm*n), y(ldm*n, 0.0); init(&x[0], m, ldm, n); ticks t0, t1; double const cycles_per_tick = 1.0; // measure_tick(); double cycles; t0 = getticks(); for (int iter=0; iter(&x[0], &y[0], m, ldm, n); } t1 = getticks(); cycles = cycles_per_tick * elapsed(t1,t0) / (1.0 * (n-1) * (m-1) * niters); cout << "smooth_scalar: " << cycles << " cycles/point\n"; t0 = getticks(); for (int iter=0; iter(&x[0], &y[0], m, ldm, n); } t1 = getticks(); cycles = cycles_per_tick * elapsed(t1,t0) / (1.0 * (n-1) * (m-1) * niters); cout << "smooth_unaligned: " << cycles << " cycles/point\n"; t0 = getticks(); for (int iter=0; iter(&x[0], &y[0], m, ldm, n); } t1 = getticks(); cycles = cycles_per_tick * elapsed(t1,t0) / (1.0 * (n-1) * (m-1) * niters); cout << "smooth_aligned: " << cycles << " cycles/point\n"; return 0; }