summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorErik Schnetter <schnetter@gmail.com>2015-10-30 10:53:52 -0400
committerErik Schnetter <schnetter@gmail.com>2015-10-30 10:53:52 -0400
commit89853488275b118f6f2ba7724eec30f4d37d67b9 (patch)
tree34a6b6404a5e72b903dce29c5da9e5a835d63f78
parent3688d964d80b038cb6d5a8e5babf4b183ad9416a (diff)
downloadvecmathlib-89853488275b118f6f2ba7724eec30f4d37d67b9.zip
vecmathlib-89853488275b118f6f2ba7724eec30f4d37d67b9.tar.gz
Improve optimization barriers
-rw-r--r--floatprops.h24
-rw-r--r--vec_builtin.h5
-rw-r--r--vec_pseudo.h29
3 files changed, 29 insertions, 29 deletions
diff --git a/floatprops.h b/floatprops.h
index c7a3b7f..e284c5f 100644
--- a/floatprops.h
+++ b/floatprops.h
@@ -283,6 +283,30 @@ V &set_elt(V &v, const int n, const E e) {
return v;
}
+template <typename real_t> real_t barrier(real_t x) {
+#if defined __GNUC__ && !defined __clang__ && !defined __ICC
+// GCC crashes when +X is used as constraint
+#if defined __SSE2__
+ __asm__("" : "+x"(x));
+#elif defined __PPC64__ // maybe also __PPC__
+ __asm__("" : "+f"(x));
+#elif defined __arm__
+ __asm__("" : "+w"(x));
+#else
+#error "Floating point barrier undefined on this architecture"
+#endif
+#elif defined __clang__
+ __asm__("" : "+x"(x));
+#elif defined __ICC
+ __asm__("" : "+x"(x));
+#elif defined __IBMCPP__
+ __asm__("" : "+f"(x));
+#else
+#error "Floating point barrier undefined on this architecture"
+#endif
+ return x;
+}
+
} // namespace vecmathlib
#endif // #ifndef FLOATPROPS_H
diff --git a/vec_builtin.h b/vec_builtin.h
index 2f1ff90..e810ddd 100644
--- a/vec_builtin.h
+++ b/vec_builtin.h
@@ -296,7 +296,10 @@ template <typename T, int N> struct realbuiltinvec : floatprops<T> {
return name_.c_str();
}
#endif
- void barrier() { volatile vector_t x __attribute__((__unused__)) = v; }
+ void barrier() {
+ volatile vector_t x = v;
+ v = x;
+ }
typedef boolbuiltinvec<real_t, size> boolvec_t;
typedef intbuiltinvec<real_t, size> intvec_t;
diff --git a/vec_pseudo.h b/vec_pseudo.h
index c4cbbc1..fa2895c 100644
--- a/vec_pseudo.h
+++ b/vec_pseudo.h
@@ -472,35 +472,8 @@ template <typename T, int N> struct realpseudovec : floatprops<T> {
}
#endif
void barrier() {
-#if defined __GNUC__ && !defined __clang__ && !defined __ICC
-// GCC crashes when +X is used as constraint
-#if defined __SSE2__
for (int d = 0; d < size; ++d)
- __asm__("" : "+x"(v[d]));
-#elif defined __PPC64__ // maybe also __PPC__
- for (int d = 0; d < size; ++d)
- __asm__("" : "+f"(v[d]));
-#elif defined __arm__
- for (int d = 0; d < size; ++d)
- __asm__("" : "+w"(v[d]));
-#else
-#error "Floating point barrier undefined on this architecture"
-#endif
-#elif defined __clang__
- for (int d = 0; d < size; ++d)
- __asm__("" : "+X"(v[d]));
-#elif defined __ICC
- for (int d = 0; d < size; ++d) {
- real_t tmp = v[d];
- __asm__("" : "+X"(tmp));
- v[d] = tmp;
- }
-#elif defined __IBMCPP__
- for (int d = 0; d < size; ++d)
- __asm__("" : "+f"(v[d]));
-#else
-#error "Floating point barrier undefined on this architecture"
-#endif
+ v[d] = vecmathlib::barrier(v[d]);
}
typedef boolpseudovec<real_t, size> boolvec_t;
OpenPOWER on IntegriCloud