From 89853488275b118f6f2ba7724eec30f4d37d67b9 Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Fri, 30 Oct 2015 10:53:52 -0400 Subject: Improve optimization barriers --- floatprops.h | 24 ++++++++++++++++++++++++ vec_builtin.h | 5 ++++- vec_pseudo.h | 29 +---------------------------- 3 files changed, 29 insertions(+), 29 deletions(-) diff --git a/floatprops.h b/floatprops.h index c7a3b7f..e284c5f 100644 --- a/floatprops.h +++ b/floatprops.h @@ -283,6 +283,30 @@ V &set_elt(V &v, const int n, const E e) { return v; } +template real_t barrier(real_t x) { +#if defined __GNUC__ && !defined __clang__ && !defined __ICC +// GCC crashes when +X is used as constraint +#if defined __SSE2__ + __asm__("" : "+x"(x)); +#elif defined __PPC64__ // maybe also __PPC__ + __asm__("" : "+f"(x)); +#elif defined __arm__ + __asm__("" : "+w"(x)); +#else +#error "Floating point barrier undefined on this architecture" +#endif +#elif defined __clang__ + __asm__("" : "+x"(x)); +#elif defined __ICC + __asm__("" : "+x"(x)); +#elif defined __IBMCPP__ + __asm__("" : "+f"(x)); +#else +#error "Floating point barrier undefined on this architecture" +#endif + return x; +} + } // namespace vecmathlib #endif // #ifndef FLOATPROPS_H diff --git a/vec_builtin.h b/vec_builtin.h index 2f1ff90..e810ddd 100644 --- a/vec_builtin.h +++ b/vec_builtin.h @@ -296,7 +296,10 @@ template struct realbuiltinvec : floatprops { return name_.c_str(); } #endif - void barrier() { volatile vector_t x __attribute__((__unused__)) = v; } + void barrier() { + volatile vector_t x = v; + v = x; + } typedef boolbuiltinvec boolvec_t; typedef intbuiltinvec intvec_t; diff --git a/vec_pseudo.h b/vec_pseudo.h index c4cbbc1..fa2895c 100644 --- a/vec_pseudo.h +++ b/vec_pseudo.h @@ -472,35 +472,8 @@ template struct realpseudovec : floatprops { } #endif void barrier() { -#if defined __GNUC__ && !defined __clang__ && !defined __ICC -// GCC crashes when +X is used as constraint -#if defined __SSE2__ for (int d = 0; d < size; ++d) - __asm__("" : "+x"(v[d])); -#elif defined __PPC64__ // maybe also __PPC__ - for (int d = 0; d < size; ++d) - __asm__("" : "+f"(v[d])); -#elif defined __arm__ - for (int d = 0; d < size; ++d) - __asm__("" : "+w"(v[d])); -#else -#error "Floating point barrier undefined on this architecture" -#endif -#elif defined __clang__ - for (int d = 0; d < size; ++d) - __asm__("" : "+X"(v[d])); -#elif defined __ICC - for (int d = 0; d < size; ++d) { - real_t tmp = v[d]; - __asm__("" : "+X"(tmp)); - v[d] = tmp; - } -#elif defined __IBMCPP__ - for (int d = 0; d < size; ++d) - __asm__("" : "+f"(v[d])); -#else -#error "Floating point barrier undefined on this architecture" -#endif + v[d] = vecmathlib::barrier(v[d]); } typedef boolpseudovec boolvec_t; -- cgit v1.1