diff options
author | Erik Schnetter <schnetter@gmail.com> | 2015-10-30 10:53:52 -0400 |
---|---|---|
committer | Erik Schnetter <schnetter@gmail.com> | 2015-10-30 10:53:52 -0400 |
commit | 89853488275b118f6f2ba7724eec30f4d37d67b9 (patch) | |
tree | 34a6b6404a5e72b903dce29c5da9e5a835d63f78 | |
parent | 3688d964d80b038cb6d5a8e5babf4b183ad9416a (diff) | |
download | vecmathlib-89853488275b118f6f2ba7724eec30f4d37d67b9.zip vecmathlib-89853488275b118f6f2ba7724eec30f4d37d67b9.tar.gz |
Improve optimization barriers
-rw-r--r-- | floatprops.h | 24 | ||||
-rw-r--r-- | vec_builtin.h | 5 | ||||
-rw-r--r-- | vec_pseudo.h | 29 |
3 files changed, 29 insertions, 29 deletions
diff --git a/floatprops.h b/floatprops.h index c7a3b7f..e284c5f 100644 --- a/floatprops.h +++ b/floatprops.h @@ -283,6 +283,30 @@ V &set_elt(V &v, const int n, const E e) { return v; } +template <typename real_t> real_t barrier(real_t x) { +#if defined __GNUC__ && !defined __clang__ && !defined __ICC +// GCC crashes when +X is used as constraint +#if defined __SSE2__ + __asm__("" : "+x"(x)); +#elif defined __PPC64__ // maybe also __PPC__ + __asm__("" : "+f"(x)); +#elif defined __arm__ + __asm__("" : "+w"(x)); +#else +#error "Floating point barrier undefined on this architecture" +#endif +#elif defined __clang__ + __asm__("" : "+x"(x)); +#elif defined __ICC + __asm__("" : "+x"(x)); +#elif defined __IBMCPP__ + __asm__("" : "+f"(x)); +#else +#error "Floating point barrier undefined on this architecture" +#endif + return x; +} + } // namespace vecmathlib #endif // #ifndef FLOATPROPS_H diff --git a/vec_builtin.h b/vec_builtin.h index 2f1ff90..e810ddd 100644 --- a/vec_builtin.h +++ b/vec_builtin.h @@ -296,7 +296,10 @@ template <typename T, int N> struct realbuiltinvec : floatprops<T> { return name_.c_str(); } #endif - void barrier() { volatile vector_t x __attribute__((__unused__)) = v; } + void barrier() { + volatile vector_t x = v; + v = x; + } typedef boolbuiltinvec<real_t, size> boolvec_t; typedef intbuiltinvec<real_t, size> intvec_t; diff --git a/vec_pseudo.h b/vec_pseudo.h index c4cbbc1..fa2895c 100644 --- a/vec_pseudo.h +++ b/vec_pseudo.h @@ -472,35 +472,8 @@ template <typename T, int N> struct realpseudovec : floatprops<T> { } #endif void barrier() { -#if defined __GNUC__ && !defined __clang__ && !defined __ICC -// GCC crashes when +X is used as constraint -#if defined __SSE2__ for (int d = 0; d < size; ++d) - __asm__("" : "+x"(v[d])); -#elif defined __PPC64__ // maybe also __PPC__ - for (int d = 0; d < size; ++d) - __asm__("" : "+f"(v[d])); -#elif defined __arm__ - for (int d = 0; d < size; ++d) - __asm__("" : "+w"(v[d])); -#else -#error "Floating point barrier undefined on this architecture" -#endif -#elif defined __clang__ - for (int d = 0; d < size; ++d) - __asm__("" : "+X"(v[d])); -#elif defined __ICC - for (int d = 0; d < size; ++d) { - real_t tmp = v[d]; - __asm__("" : "+X"(tmp)); - v[d] = tmp; - } -#elif defined __IBMCPP__ - for (int d = 0; d < size; ++d) - __asm__("" : "+f"(v[d])); -#else -#error "Floating point barrier undefined on this architecture" -#endif + v[d] = vecmathlib::barrier(v[d]); } typedef boolpseudovec<real_t, size> boolvec_t; |