summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorErik Schnetter <schnetter@gmail.com>2014-06-07 16:03:09 -0400
committerErik Schnetter <schnetter@gmail.com>2014-06-07 16:03:09 -0400
commitfb18dfd0e58e2b8e84bdb991541475c5e530b7b0 (patch)
treed86780f70eb30d7a244ad8de0cedbcf2e5478166
parent739e9da48e792989a55ddb7ddc744fe7532686c0 (diff)
downloadvecmathlib-fb18dfd0e58e2b8e84bdb991541475c5e530b7b0.zip
vecmathlib-fb18dfd0e58e2b8e84bdb991541475c5e530b7b0.tar.gz
Provide integer comparison operafors for AVX fp8 and fp16 vectors
-rw-r--r--mathfuncs_sqrt.h13
-rw-r--r--vec_avx_fp16_16.h6
-rw-r--r--vec_avx_fp8_32.h6
3 files changed, 25 insertions, 0 deletions
diff --git a/mathfuncs_sqrt.h b/mathfuncs_sqrt.h
index a9ab172..b5a4ed8 100644
--- a/mathfuncs_sqrt.h
+++ b/mathfuncs_sqrt.h
@@ -72,6 +72,19 @@ namespace vecmathlib {
template<typename realvec_t>
realvec_t mathfuncs<realvec_t>::vml_rsqrt(realvec_t x)
{
+#if 0
+ // See <http://en.wikipedia.org/wiki/Fast_inverse_square_root>
+ realvec_t x_2 = RV(0.5) * x;
+ realvec_t r = x;
+ intvec_t i = as_int(r);
+ int_t magic = sizeof(real_t)==4 ? I(0x5f375a86) : I(0x5fe6eb50c7b537a9);
+ i = IV(magic) - (i >> I(1));
+ r = as_float(i);
+ r += r * (RV(0.5) - (x_2 * r * r));
+ r += r * (RV(0.5) - (x_2 * r * r));
+ r += r * (RV(0.5) - (x_2 * r * r));
+ return r;
+#else
// Initial guess
// VML_ASSERT(all(x > RV(0.0)));
intvec_t ilogb_x = ilogb(x);
diff --git a/vec_avx_fp16_16.h b/vec_avx_fp16_16.h
index 3b6fe03..e461ce4 100644
--- a/vec_avx_fp16_16.h
+++ b/vec_avx_fp16_16.h
@@ -350,6 +350,12 @@ namespace vecmathlib {
{
return (*this ^ x).convert_bool();
}
+ // TODO: First compare sign; then if equal, compare sign of difference
+ // TODO: Also look for intrinsics
+ boolvec_t operator<(intvec const& x) const { __builtin_unreachable(); }
+ boolvec_t operator<=(intvec const& x) const { __builtin_unreachable(); }
+ boolvec_t operator>(intvec const& x) const { __builtin_unreachable(); }
+ boolvec_t operator>=(intvec const& x) const { __builtin_unreachable(); }
};
diff --git a/vec_avx_fp8_32.h b/vec_avx_fp8_32.h
index 2b63ff9..5ed93e4 100644
--- a/vec_avx_fp8_32.h
+++ b/vec_avx_fp8_32.h
@@ -400,6 +400,12 @@ namespace vecmathlib {
{
return (*this ^ x).convert_bool();
}
+ // TODO: First compare sign; then if equal, compare sign of difference
+ // TODO: Also look for intrinsics
+ boolvec_t operator<(intvec const& x) const { __builtin_unreachable(); }
+ boolvec_t operator<=(intvec const& x) const { __builtin_unreachable(); }
+ boolvec_t operator>(intvec const& x) const { __builtin_unreachable(); }
+ boolvec_t operator>=(intvec const& x) const { __builtin_unreachable(); }
};
OpenPOWER on IntegriCloud