summaryrefslogtreecommitdiffstats
path: root/bench.cc
diff options
context:
space:
mode:
Diffstat (limited to 'bench.cc')
-rw-r--r--bench.cc274
1 files changed, 115 insertions, 159 deletions
diff --git a/bench.cc b/bench.cc
index e795985..ac3eb46 100644
--- a/bench.cc
+++ b/bench.cc
@@ -16,47 +16,38 @@
using namespace std;
using namespace vecmathlib;
-
-
#ifndef __has_builtin
-# define __has_builtin(x) 0 // Compatibility with non-clang compilers
+#define __has_builtin(x) 0 // Compatibility with non-clang compilers
#endif
-
-
typedef unsigned long long ticks;
-inline ticks getticks()
-{
+inline ticks getticks() {
#if __has_builtin(__builtin_readcyclecounter)
return __builtin_readcyclecounter();
#elif defined __x86_64__
ticks a, d;
- asm volatile("rdtsc" : "=a" (a), "=d" (d));
+ asm volatile("rdtsc" : "=a"(a), "=d"(d));
return a | (d << 32);
#elif defined __powerpc__
unsigned int tbl, tbu, tbu1;
do {
- asm volatile("mftbu %0": "=r"(tbu));
- asm volatile("mftb %0": "=r"(tbl));
- asm volatile("mftbu %0": "=r"(tbu1));
+ asm volatile("mftbu %0" : "=r"(tbu));
+ asm volatile("mftb %0" : "=r"(tbl));
+ asm volatile("mftbu %0" : "=r"(tbu1));
} while (tbu != tbu1);
return ((unsigned long long)tbu << 32) | tbl;
#else
timeval tv;
gettimeofday(&tv, NULL);
return 1000000ULL * tv.tv_sec + tv.tv_usec;
- // timespec ts;
- // clock_gettime(CLOCK_REALTIME, &ts);
- // return 1000000000ULL * ts.tv_sec + ts.tv_nsec;
+// timespec ts;
+// clock_gettime(CLOCK_REALTIME, &ts);
+// return 1000000000ULL * ts.tv_sec + ts.tv_nsec;
#endif
}
-inline double elapsed(ticks t1, ticks t0)
-{
- return t1-t0;
-}
+inline double elapsed(ticks t1, ticks t0) { return t1 - t0; }
-double get_sys_time()
-{
+double get_sys_time() {
timeval tv;
gettimeofday(&tv, NULL);
return tv.tv_sec + 1.0e-6 * tv.tv_usec;
@@ -65,8 +56,7 @@ double get_sys_time()
// return ts.tv_sec + 1.0e-9 * ts.tv_nsec;
}
-double measure_tick()
-{
+double measure_tick() {
ticks const rstart = getticks();
double const wstart = get_sys_time();
while (get_sys_time() - wstart < 0.1) {
@@ -74,124 +64,103 @@ double measure_tick()
}
ticks const rend = getticks();
double const wend = get_sys_time();
- assert(wend-wstart >= 0.09);
+ assert(wend - wstart >= 0.09);
return (wend - wstart) / elapsed(rend, rstart);
}
-
-
double global_result = 0.0;
-template<typename realvec_t>
-void save_result(realvec_t result)
-{
- for (int i=0; i<realvec_t::size; ++i) {
+template <typename realvec_t> void save_result(realvec_t result) {
+ for (int i = 0; i < realvec_t::size; ++i) {
global_result += result[i];
}
// Check global accumulator to prevent optimisation
- if (! vml_std::isfinite(global_result)) {
+ if (!vml_std::isfinite(global_result)) {
cout << "\n"
<< "WARNING: Global accumulator is not finite\n";
}
}
+template <typename T> inline T nop(T x) { return x; }
+template <typename T> inline T fneg(T x) { return -x; }
-template<typename T> inline T nop(T x) { return x; }
-
-template<typename T> inline T fneg(T x) { return -x; }
+template <typename T> inline T fadd(T x, T y) { return x + y; }
+template <typename T> inline T fsub(T x, T y) { return x - y; }
+template <typename T> inline T fmul(T x, T y) { return x * y; }
+template <typename T> inline T fdiv(T x, T y) { return x / y; }
-template<typename T> inline T fadd(T x, T y) { return x+y; }
-template<typename T> inline T fsub(T x, T y) { return x-y; }
-template<typename T> inline T fmul(T x, T y) { return x*y; }
-template<typename T> inline T fdiv(T x, T y) { return x/y; }
-
-template<typename T> inline T frexp0(T x)
-{
+template <typename T> inline T frexp0(T x) {
typename T::intvec_t ir;
return frexp(x, &ir);
}
-template<typename T> inline typename T::intvec_t frexp1(T x)
-{
+template <typename T> inline typename T::intvec_t frexp1(T x) {
typename T::intvec_t ir;
frexp(x, &ir);
return ir;
}
-template<typename T> inline T ldexps(T x, T y)
-{
+template <typename T> inline T ldexps(T x, T y) {
typename T::intvec_t iy = convert_int(y);
return ldexp(x, iy[0]);
}
-template<typename T> inline T ldexpv(T x, T y)
-{
+template <typename T> inline T ldexpv(T x, T y) {
typename T::intvec_t iy = convert_int(y);
return ldexp(x, iy);
}
-
-
-#define DECLARE_FUNCTOR(FUNC, XMIN, XMAX) \
- template<typename T> \
- struct functor_##FUNC { \
- static typename T::real_t get_xmin() { return XMIN; } \
- static typename T::real_t get_xmax() { return XMAX; } \
- static const char* name() { return #FUNC; } \
- T operator()(T x) { \
- return FUNC(x); \
- } \
+#define DECLARE_FUNCTOR(FUNC, XMIN, XMAX) \
+ template <typename T> struct functor_##FUNC { \
+ static typename T::real_t get_xmin() { return XMIN; } \
+ static typename T::real_t get_xmax() { return XMAX; } \
+ static const char *name() { return #FUNC; } \
+ T operator()(T x) { return FUNC(x); } \
}
-#define DECLARE_BFUNCTOR(FUNC, XMIN, XMAX) \
- template<typename T> \
- struct functor_##FUNC { \
- static typename T::real_t get_xmin() { return XMIN; } \
- static typename T::real_t get_xmax() { return XMAX; } \
- static const char* name() { return #FUNC; } \
- T operator()(T x) { \
- typename T::boolvec_t res = FUNC(x); \
- return convert_float(convert_int(res)); \
- } \
+#define DECLARE_BFUNCTOR(FUNC, XMIN, XMAX) \
+ template <typename T> struct functor_##FUNC { \
+ static typename T::real_t get_xmin() { return XMIN; } \
+ static typename T::real_t get_xmax() { return XMAX; } \
+ static const char *name() { return #FUNC; } \
+ T operator()(T x) { \
+ typename T::boolvec_t res = FUNC(x); \
+ return convert_float(convert_int(res)); \
+ } \
}
-#define DECLARE_IFUNCTOR(FUNC, XMIN, XMAX) \
- template<typename T> \
- struct functor_##FUNC { \
- static typename T::real_t get_xmin() { return XMIN; } \
- static typename T::real_t get_xmax() { return XMAX; } \
- static const char* name() { return #FUNC; } \
- T operator()(T x) { \
- typename T::intvec_t res = FUNC(x); \
- return convert_float(res); \
- } \
+#define DECLARE_IFUNCTOR(FUNC, XMIN, XMAX) \
+ template <typename T> struct functor_##FUNC { \
+ static typename T::real_t get_xmin() { return XMIN; } \
+ static typename T::real_t get_xmax() { return XMAX; } \
+ static const char *name() { return #FUNC; } \
+ T operator()(T x) { \
+ typename T::intvec_t res = FUNC(x); \
+ return convert_float(res); \
+ } \
}
-#define DECLARE_FUNCTOR2(FUNC, XMIN, XMAX, YOFFSET) \
- template<typename T> \
- struct functor_##FUNC { \
- static typename T::real_t get_xmin() { return XMIN; } \
- static typename T::real_t get_xmax() { return XMAX; } \
- static const char* name() { return #FUNC; } \
- T operator()(T x) { \
- const typename T::real_t yoffset = YOFFSET; \
- return FUNC(x, x + T(yoffset)); \
- } \
+#define DECLARE_FUNCTOR2(FUNC, XMIN, XMAX, YOFFSET) \
+ template <typename T> struct functor_##FUNC { \
+ static typename T::real_t get_xmin() { return XMIN; } \
+ static typename T::real_t get_xmax() { return XMAX; } \
+ static const char *name() { return #FUNC; } \
+ T operator()(T x) { \
+ const typename T::real_t yoffset = YOFFSET; \
+ return FUNC(x, x + T(yoffset)); \
+ } \
}
-#define DECLARE_FUNCTOR3(FUNC, XMIN, XMAX, YOFFSET, ZOFFSET) \
- template<typename T> \
- struct functor_##FUNC { \
- static typename T::real_t get_xmin() { return XMIN; } \
- static typename T::real_t get_xmax() { return XMAX; } \
- static const char* name() { return #FUNC; } \
- T operator()(T x) { \
- const typename T::real_t yoffset = YOFFSET; \
- const typename T::real_t zoffset = ZOFFSET; \
- return FUNC(x, x + T(yoffset), x + T(zoffset)); \
- } \
+#define DECLARE_FUNCTOR3(FUNC, XMIN, XMAX, YOFFSET, ZOFFSET) \
+ template <typename T> struct functor_##FUNC { \
+ static typename T::real_t get_xmin() { return XMIN; } \
+ static typename T::real_t get_xmax() { return XMAX; } \
+ static const char *name() { return #FUNC; } \
+ T operator()(T x) { \
+ const typename T::real_t yoffset = YOFFSET; \
+ const typename T::real_t zoffset = ZOFFSET; \
+ return FUNC(x, x + T(yoffset), x + T(zoffset)); \
+ } \
}
-
-
DECLARE_FUNCTOR(nop, 0.0, 1.0);
DECLARE_FUNCTOR(fneg, 0.0, 1.0);
@@ -252,137 +221,127 @@ DECLARE_FUNCTOR(tan, 0.0, 1.0);
DECLARE_FUNCTOR(tanh, -1.0, +1.0);
DECLARE_FUNCTOR(trunc, -1.0, +1.0);
-
-
-template<typename realvec_t, template<typename> class func_t>
-double run_bench()
-{
+template <typename realvec_t, template <typename> class func_t>
+double run_bench() {
const int numiters = 1000000;
-
+
typedef typename realvec_t::real_t real_t;
const real_t xmin = func_t<realvec_t>::get_xmin();
const real_t xmax = func_t<realvec_t>::get_xmax();
realvec_t x0, dx;
- for (int i=0; i<realvec_t::size; ++i) {
+ for (int i = 0; i < realvec_t::size; ++i) {
x0.set_elt(i, xmin + (xmax - xmin) / numiters * i / realvec_t::size);
dx.set_elt(i, (xmax - xmin) / numiters);
}
realvec_t x, y;
ticks t0, t1;
double const cycles_per_tick = 1.0; // measure_tick();
-
+
func_t<realvec_t> func;
t0 = getticks();
x = y = x0;
- for (int n=0; n<numiters; ++n) {
+ for (int n = 0; n < numiters; ++n) {
y += func(x);
x += dx;
}
t1 = getticks();
save_result(y);
-
- return cycles_per_tick * elapsed(t1,t0) * realvec_t::size / numiters;
+
+ return cycles_per_tick * elapsed(t1, t0) * realvec_t::size / numiters;
}
-template<typename realvec_t, template<typename> class func_t>
-void bench_type_func()
-{
- cout << " "
- << setw(-5) << func_t<realvec_t>::name() << " "
- << setw(18) << realvec_t::name() << ": " << flush;
+template <typename realvec_t, template <typename> class func_t>
+void bench_type_func() {
+ cout << " " << setw(-5) << func_t<realvec_t>::name() << " " << setw(18)
+ << realvec_t::name() << ": " << flush;
double const cycles = run_bench<realvec_t, func_t>();
cout << cycles << " cycles\n" << flush;
}
-template<template<typename> class func_t>
-void bench_func()
-{
+template <template <typename> class func_t> void bench_func() {
cout << "\n"
<< "Benchmarking " << func_t<float32_vec>().name() << ":\n";
-
+
// Note: We benchmark neither testvec (since this is known to be
// slow), nor builtinvec (since this has about the same performance
// as pseudovec, and is also not very efficient).
-
- bench_type_func<realpseudovec<float,1>, func_t>();
+
+ bench_type_func<realpseudovec<float, 1>, func_t>();
#ifdef __clang__
- bench_type_func<realbuiltinvec<float,1>, func_t>();
+ bench_type_func<realbuiltinvec<float, 1>, func_t>();
#endif
- bench_type_func<realtestvec<float,1>, func_t>();
+ bench_type_func<realtestvec<float, 1>, func_t>();
#ifdef VECMATHLIB_HAVE_VEC_FLOAT_1
- bench_type_func<realvec<float,1>, func_t>();
+ bench_type_func<realvec<float, 1>, func_t>();
#endif
#ifdef VECMATHLIB_HAVE_VEC_FLOAT_2
- bench_type_func<realpseudovec<float,2>, func_t>();
+ bench_type_func<realpseudovec<float, 2>, func_t>();
#ifdef __clang__
- bench_type_func<realbuiltinvec<float,2>, func_t>();
+ bench_type_func<realbuiltinvec<float, 2>, func_t>();
#endif
// bench_type_func<realtestvec<float,2>, func_t>();
- bench_type_func<realvec<float,2>, func_t>();
+ bench_type_func<realvec<float, 2>, func_t>();
#endif
#ifdef VECMATHLIB_HAVE_VEC_FLOAT_4
- bench_type_func<realpseudovec<float,4>, func_t>();
+ bench_type_func<realpseudovec<float, 4>, func_t>();
#ifdef __clang__
- bench_type_func<realbuiltinvec<float,4>, func_t>();
+ bench_type_func<realbuiltinvec<float, 4>, func_t>();
#endif
// bench_type_func<realtestvec<float,4>, func_t>();
- bench_type_func<realvec<float,4>, func_t>();
+ bench_type_func<realvec<float, 4>, func_t>();
#endif
#ifdef VECMATHLIB_HAVE_VEC_FLOAT_8
- bench_type_func<realpseudovec<float,8>, func_t>();
+ bench_type_func<realpseudovec<float, 8>, func_t>();
#ifdef __clang__
- bench_type_func<realbuiltinvec<float,8>, func_t>();
+ bench_type_func<realbuiltinvec<float, 8>, func_t>();
#endif
// bench_type_func<realtestvec<float,8>, func_t>();
- bench_type_func<realvec<float,8>, func_t>();
+ bench_type_func<realvec<float, 8>, func_t>();
#endif
-
- bench_type_func<realpseudovec<double,1>, func_t>();
+
+ bench_type_func<realpseudovec<double, 1>, func_t>();
#ifdef __clang__
- bench_type_func<realbuiltinvec<double,1>, func_t>();
+ bench_type_func<realbuiltinvec<double, 1>, func_t>();
#endif
- bench_type_func<realtestvec<double,1>, func_t>();
+ bench_type_func<realtestvec<double, 1>, func_t>();
#ifdef VECMATHLIB_HAVE_VEC_DOUBLE_1
- bench_type_func<realvec<double,1>, func_t>();
+ bench_type_func<realvec<double, 1>, func_t>();
#endif
#ifdef VECMATHLIB_HAVE_VEC_DOUBLE_2
- bench_type_func<realpseudovec<double,2>, func_t>();
+ bench_type_func<realpseudovec<double, 2>, func_t>();
#ifdef __clang__
- bench_type_func<realbuiltinvec<double,2>, func_t>();
+ bench_type_func<realbuiltinvec<double, 2>, func_t>();
#endif
// bench_type_func<realtestvec<double,2>, func_t>();
- bench_type_func<realvec<double,2>, func_t>();
+ bench_type_func<realvec<double, 2>, func_t>();
#endif
#ifdef VECMATHLIB_HAVE_VEC_DOUBLE_4
- bench_type_func<realpseudovec<double,4>, func_t>();
+ bench_type_func<realpseudovec<double, 4>, func_t>();
#ifdef __clang__
- bench_type_func<realbuiltinvec<double,4>, func_t>();
+ bench_type_func<realbuiltinvec<double, 4>, func_t>();
#endif
// bench_type_func<realtestvec<double,4>, func_t>();
- bench_type_func<realvec<double,4>, func_t>();
+ bench_type_func<realvec<double, 4>, func_t>();
#endif
#ifdef VECMATHLIB_HAVE_VEC_DOUBLE_8
- bench_type_func<realpseudovec<double,8>, func_t>();
+ bench_type_func<realpseudovec<double, 8>, func_t>();
#ifdef __clang__
- bench_type_func<realbuiltinvec<double,8>, func_t>();
+ bench_type_func<realbuiltinvec<double, 8>, func_t>();
#endif
// bench_type_func<realtestvec<double,8>, func_t>();
- bench_type_func<realvec<double,8>, func_t>();
+ bench_type_func<realvec<double, 8>, func_t>();
#endif
}
-
-
-void bench()
-{
+void bench() {
bench_func<functor_nop>();
-
+
bench_func<functor_fneg>();
bench_func<functor_fadd>();
bench_func<functor_fsub>();
bench_func<functor_fmul>();
bench_func<functor_fdiv>();
-
+
bench_func<functor_acos>();
bench_func<functor_acosh>();
bench_func<functor_asin>();
@@ -436,10 +395,7 @@ void bench()
bench_func<functor_trunc>();
}
-
-
-int main(int argc, char** argv)
-{
+int main(int argc, char **argv) {
cout << "Benchmarking math functions:\n";
bench();
return 0;
OpenPOWER on IntegriCloud