// -*-C++-*- #define restrict __restrict__ #include "vecmathlib.h" #include #include #include #include #include using namespace std; using namespace vecmathlib; typedef unsigned long long ticks; inline ticks getticks() { ticks a, d; asm volatile("rdtsc" : "=a" (a), "=d" (d)); return a | (d << 32); } inline double elapsed(ticks t1, ticks t0) { return t1-t0; } double get_sys_time() { timeval tp; gettimeofday(&tp, NULL); return tp.tv_sec + 1.0e-6 * tp.tv_usec; } double measure_tick() { ticks const rstart = getticks(); double const wstart = get_sys_time(); while (get_sys_time() - wstart < 0.1) { // do nothing, just wait } ticks const rend = getticks(); double const wend = get_sys_time(); assert(wend-wstart >= 0.09); return (wend - wstart) / elapsed(rend, rstart); } template void init(typename realvec_t::real_t *restrict xptr, ptrdiff_t m, ptrdiff_t ldm, ptrdiff_t n) { typedef typename realvec_t::real_t real_t; for (ptrdiff_t j=0; j void smootho(typename realvec_t::real_t const *restrict xptr, typename realvec_t::real_t *restrict yptr, ptrdiff_t m, ptrdiff_t ldm, ptrdiff_t n) { typedef typename realvec_t::real_t real_t; for (ptrdiff_t j=1; j void smoothu(typename realvec_t::real_t const *restrict xptr, typename realvec_t::real_t *restrict yptr, ptrdiff_t m, ptrdiff_t ldm, ptrdiff_t n) { typedef typename realvec_t::real_t real_t; typedef typename realvec_t::mask_t mask_t; for (ptrdiff_t j=1; j void smootha(typename realvec_t::real_t const *restrict xptr, typename realvec_t::real_t *restrict yptr, ptrdiff_t m, ptrdiff_t ldm, ptrdiff_t n) { typedef typename realvec_t::real_t real_t; typedef typename realvec_t::mask_t mask_t; assert(ldm % realvec_t::size == 0); for (ptrdiff_t j=1; j realvec_t; #elif defined VECMATHLIB_HAVE_VEC_DOUBLE_2 typedef realvec realvec_t; #else typedef realpseudovec realvec_t; #endif const ptrdiff_t ldm = align_up(m, realvec_t::size); typedef realvec_t::real_t real_t; vector x(ldm*n), y(ldm*n, 0.0); init(&x[0], m, ldm, n); ticks t0, t1; double const cycles_per_tick = 1.0; // measure_tick(); double cycles; t0 = getticks(); for (int iter=0; iter(&x[0], &y[0], m, ldm, n); } t1 = getticks(); cycles = cycles_per_tick * elapsed(t1,t0) / (1.0 * (n-1) * (m-1) * niters); cout << "smootho: " << cycles << " cycles/point\n"; t0 = getticks(); for (int iter=0; iter(&x[0], &y[0], m, ldm, n); } t1 = getticks(); cycles = cycles_per_tick * elapsed(t1,t0) / (1.0 * (n-1) * (m-1) * niters); cout << "smoothu: " << cycles << " cycles/point\n"; t0 = getticks(); for (int iter=0; iter(&x[0], &y[0], m, ldm, n); } t1 = getticks(); cycles = cycles_per_tick * elapsed(t1,t0) / (1.0 * (n-1) * (m-1) * niters); cout << "smootha: " << cycles << " cycles/point\n"; return 0; }