diff options
author | Erik Schnetter <schnetter@gmail.com> | 2013-06-18 18:47:48 -0400 |
---|---|---|
committer | Erik Schnetter <schnetter@gmail.com> | 2013-06-18 18:47:48 -0400 |
commit | 2f0318a2afef17f1991d1b61848b83ed3e0c9aff (patch) | |
tree | 2dfdf19e1db88584cf05e91ca9bb4e1240e5ee3d | |
parent | 886ceaffa99d9aaa6decdfa49f71596a9d4775c3 (diff) | |
download | vecmathlib-2f0318a2afef17f1991d1b61848b83ed3e0c9aff.zip vecmathlib-2f0318a2afef17f1991d1b61848b83ed3e0c9aff.tar.gz |
Implement barrier for vec_pseudo
-rw-r--r-- | vec_pseudo.h | 27 |
1 files changed, 27 insertions, 0 deletions
diff --git a/vec_pseudo.h b/vec_pseudo.h index abc1b30..0764d94 100644 --- a/vec_pseudo.h +++ b/vec_pseudo.h @@ -424,6 +424,33 @@ namespace vecmathlib { } return name_.c_str(); } + inline void barrier() + { +#if defined __GNUC__ && !defined __clang__ && !defined __ICC + // GCC crashes when +X is used as constraint +# if defined __SSE2__ + for (int d=0; d<size; ++d) __asm__("": "+x" (v[d])); +# elif defined __PPC64__ // maybe also __PPC__ + for (int d=0; d<size; ++d) __asm__("": "+f" (v[d])); +# elif defined __arm__ + for (int d=0; d<size; ++d) __asm__("": "+w" (v[d])); +# else +# error "Floating point barrier undefined on this architecture" +# endif +#elif defined __clang__ + for (int d=0; d<size; ++d) __asm__("": "+X" (v[d])); +#elif defined __ICC + for (int d=0; d<size; ++d) { + real_t tmp = v[d]; + __asm__("": "+X" (tmp)); + v[d] = tmp; + } +#elif defined __IBMCPP__ + for (int d=0; d<size; ++d) __asm__("": "+f" (v[d])); +#else +# error "Floating point barrier undefined on this architecture" +#endif + } typedef boolpseudovec<real_t, size> boolvec_t; typedef intpseudovec<real_t, size> intvec_t; |