summaryrefslogtreecommitdiffstats
path: root/vec_pseudo.h
diff options
context:
space:
mode:
authorErik Schnetter <schnetter@gmail.com>2013-06-18 18:47:48 -0400
committerErik Schnetter <schnetter@gmail.com>2013-06-18 18:47:48 -0400
commit2f0318a2afef17f1991d1b61848b83ed3e0c9aff (patch)
tree2dfdf19e1db88584cf05e91ca9bb4e1240e5ee3d /vec_pseudo.h
parent886ceaffa99d9aaa6decdfa49f71596a9d4775c3 (diff)
downloadvecmathlib-2f0318a2afef17f1991d1b61848b83ed3e0c9aff.zip
vecmathlib-2f0318a2afef17f1991d1b61848b83ed3e0c9aff.tar.gz
Implement barrier for vec_pseudo
Diffstat (limited to 'vec_pseudo.h')
-rw-r--r--vec_pseudo.h27
1 files changed, 27 insertions, 0 deletions
diff --git a/vec_pseudo.h b/vec_pseudo.h
index abc1b30..0764d94 100644
--- a/vec_pseudo.h
+++ b/vec_pseudo.h
@@ -424,6 +424,33 @@ namespace vecmathlib {
}
return name_.c_str();
}
+ inline void barrier()
+ {
+#if defined __GNUC__ && !defined __clang__ && !defined __ICC
+ // GCC crashes when +X is used as constraint
+# if defined __SSE2__
+ for (int d=0; d<size; ++d) __asm__("": "+x" (v[d]));
+# elif defined __PPC64__ // maybe also __PPC__
+ for (int d=0; d<size; ++d) __asm__("": "+f" (v[d]));
+# elif defined __arm__
+ for (int d=0; d<size; ++d) __asm__("": "+w" (v[d]));
+# else
+# error "Floating point barrier undefined on this architecture"
+# endif
+#elif defined __clang__
+ for (int d=0; d<size; ++d) __asm__("": "+X" (v[d]));
+#elif defined __ICC
+ for (int d=0; d<size; ++d) {
+ real_t tmp = v[d];
+ __asm__("": "+X" (tmp));
+ v[d] = tmp;
+ }
+#elif defined __IBMCPP__
+ for (int d=0; d<size; ++d) __asm__("": "+f" (v[d]));
+#else
+# error "Floating point barrier undefined on this architecture"
+#endif
+ }
typedef boolpseudovec<real_t, size> boolvec_t;
typedef intpseudovec<real_t, size> intvec_t;
OpenPOWER on IntegriCloud