summaryrefslogtreecommitdiffstats
path: root/mathfuncs_int.h
diff options
context:
space:
mode:
Diffstat (limited to 'mathfuncs_int.h')
-rw-r--r--mathfuncs_int.h243
1 files changed, 121 insertions, 122 deletions
diff --git a/mathfuncs_int.h b/mathfuncs_int.h
index 862189d..fff65ff 100644
--- a/mathfuncs_int.h
+++ b/mathfuncs_int.h
@@ -7,129 +7,128 @@
#include <climits>
+namespace vecmathlib {
+template <typename realvec_t>
+typename realvec_t::intvec_t mathfuncs<realvec_t>::vml_abs(intvec_t x) {
+ return ifthen(isignbit(x), -x, x);
+}
+
+template <typename realvec_t>
+typename realvec_t::intvec_t
+mathfuncs<realvec_t>::vml_bitifthen(intvec_t x, intvec_t y, intvec_t z) {
+ return (x & y) | (~x & z);
+}
+
+template <typename realvec_t>
+typename realvec_t::intvec_t mathfuncs<realvec_t>::vml_clz(intvec_t x) {
+ // These implementations return 8*sizeof(TYPE) when the input is 0
+
+ // These explicit implementations are taken from
+ // <http://aggregate.org/MAGIC/>:
+ //
+ // @techreport{magicalgorithms,
+ // author={Henry Gordon Dietz},
+ // title={{The Aggregate Magic Algorithms}},
+ // institution={University of Kentucky},
+ // howpublished={Aggregate.Org online technical report},
+ // date={2013-03-25},
+ // URL={http://aggregate.org/MAGIC/}
+ // }
+
+ int_t bits = CHAR_BIT * sizeof(int_t);
+ if (bits > 1)
+ x |= lsr(x, 1);
+ if (bits > 2)
+ x |= lsr(x, 2);
+ if (bits > 4)
+ x |= lsr(x, 4);
+ if (bits > 8)
+ x |= lsr(x, 8);
+ if (bits > 16)
+ x |= lsr(x, 16);
+ if (bits > 32)
+ x |= lsr(x, 32);
+ if (bits > 64)
+ x |= lsr(x, 64);
+ assert(bits <= 128);
+ return IV(I(bits)) - popcount(x);
+}
+
+template <typename realvec_t>
+typename realvec_t::boolvec_t mathfuncs<realvec_t>::vml_isignbit(intvec_t x) {
+ return x < IV(I(0));
+}
+
+template <typename realvec_t>
+typename realvec_t::intvec_t mathfuncs<realvec_t>::vml_max(intvec_t x,
+ intvec_t y) {
+ return ifthen(x >= y, x, y);
+}
+
+template <typename realvec_t>
+typename realvec_t::intvec_t mathfuncs<realvec_t>::vml_min(intvec_t x,
+ intvec_t y) {
+ return ifthen(x < y, x, y);
+}
+
+template <typename realvec_t>
+typename realvec_t::intvec_t mathfuncs<realvec_t>::vml_popcount(intvec_t x) {
+ // These explicit implementations are taken from
+ // <http://aggregate.org/MAGIC/>:
+ //
+ // @techreport{magicalgorithms,
+ // author={Henry Gordon Dietz},
+ // title={{The Aggregate Magic Algorithms}},
+ // institution={University of Kentucky},
+ // howpublished={Aggregate.Org online technical report},
+ // date={2013-03-25},
+ // URL={http://aggregate.org/MAGIC/}
+ // }
+
+ int_t bits = CHAR_BIT * sizeof(int_t);
+
+ // intvec_t x55 = IV(FP::replicate_byte(0x55));
+ // intvec_t x33 = IV(FP::replicate_byte(0x33));
+ // intvec_t x0f = IV(FP::replicate_byte(0x0f));
+ intvec_t x55 = I(~U(0) / U(3)); // 0x0101...
+ intvec_t x33 = I(~U(0) / U(5)); // 0x00110011...
+ intvec_t x0f = I(~U(0) / U(17)); // 0b0000111100001111...
+
+ x -= lsr(x, I(1)) & x55;
+ x = (x & x33) + (lsr(x, I(2)) & x33);
+ x += lsr(x, I(4));
+ x &= x0f;
+ if (bits > 8)
+ x += lsr(x, I(8));
+ if (bits > 16)
+ x += lsr(x, I(16));
+ if (bits > 32)
+ x += lsr(x, I(32));
+ if (bits > 64)
+ x += lsr(x, I(64));
+ assert(bits <= 128);
+ return x & IV(I(0xff));
+}
+
+template <typename realvec_t>
+typename realvec_t::intvec_t mathfuncs<realvec_t>::vml_rotate(intvec_t x,
+ int_t n) {
+ int_t mask = CHAR_BIT * sizeof(int_t) - 1;
+ intvec_t left = x << (n & mask);
+ intvec_t right = lsr(x, -n & mask);
+ return left | right;
+}
+
+template <typename realvec_t>
+typename realvec_t::intvec_t mathfuncs<realvec_t>::vml_rotate(intvec_t x,
+ intvec_t n) {
+ intvec_t mask = IV(I(CHAR_BIT * sizeof(int_t) - 1));
+ intvec_t left = x << (n & mask);
+ intvec_t right = lsr(x, -n & mask);
+ return left | right;
+}
-namespace vecmathlib {
-
- template<typename realvec_t>
- typename realvec_t::intvec_t mathfuncs<realvec_t>::vml_abs(intvec_t x)
- {
- return ifthen(isignbit(x), -x, x);
- }
-
- template<typename realvec_t>
- typename realvec_t::intvec_t mathfuncs<realvec_t>::vml_bitifthen(intvec_t x,
- intvec_t y,
- intvec_t z)
- {
- return (x & y) | (~x & z);
- }
-
- template<typename realvec_t>
- typename realvec_t::intvec_t mathfuncs<realvec_t>::vml_clz(intvec_t x)
- {
- // These implementations return 8*sizeof(TYPE) when the input is 0
-
- // These explicit implementations are taken from
- // <http://aggregate.org/MAGIC/>:
- //
- // @techreport{magicalgorithms,
- // author={Henry Gordon Dietz},
- // title={{The Aggregate Magic Algorithms}},
- // institution={University of Kentucky},
- // howpublished={Aggregate.Org online technical report},
- // date={2013-03-25},
- // URL={http://aggregate.org/MAGIC/}
- // }
-
- int_t bits = CHAR_BIT * sizeof(int_t);
- if (bits > 1) x |= lsr(x, 1);
- if (bits > 2) x |= lsr(x, 2);
- if (bits > 4) x |= lsr(x, 4);
- if (bits > 8) x |= lsr(x, 8);
- if (bits > 16) x |= lsr(x, 16);
- if (bits > 32) x |= lsr(x, 32);
- if (bits > 64) x |= lsr(x, 64);
- assert(bits<=128);
- return IV(I(bits)) - popcount(x);
- }
-
- template<typename realvec_t>
- typename realvec_t::boolvec_t mathfuncs<realvec_t>::vml_isignbit(intvec_t x)
- {
- return x < IV(I(0));
- }
-
- template<typename realvec_t>
- typename realvec_t::intvec_t mathfuncs<realvec_t>::vml_max(intvec_t x,
- intvec_t y)
- {
- return ifthen(x>=y, x, y);
- }
-
- template<typename realvec_t>
- typename realvec_t::intvec_t mathfuncs<realvec_t>::vml_min(intvec_t x,
- intvec_t y)
- {
- return ifthen(x<y, x, y);
- }
-
- template<typename realvec_t>
- typename realvec_t::intvec_t mathfuncs<realvec_t>::vml_popcount(intvec_t x)
- {
- // These explicit implementations are taken from
- // <http://aggregate.org/MAGIC/>:
- //
- // @techreport{magicalgorithms,
- // author={Henry Gordon Dietz},
- // title={{The Aggregate Magic Algorithms}},
- // institution={University of Kentucky},
- // howpublished={Aggregate.Org online technical report},
- // date={2013-03-25},
- // URL={http://aggregate.org/MAGIC/}
- // }
-
- int_t bits = CHAR_BIT * sizeof(int_t);
-
- // intvec_t x55 = IV(FP::replicate_byte(0x55));
- // intvec_t x33 = IV(FP::replicate_byte(0x33));
- // intvec_t x0f = IV(FP::replicate_byte(0x0f));
- intvec_t x55 = I(~U(0) / U(3)); // 0x0101...
- intvec_t x33 = I(~U(0) / U(5)); // 0x00110011...
- intvec_t x0f = I(~U(0) / U(17)); // 0b0000111100001111...
-
- x -= lsr(x, I(1)) & x55;
- x = (x & x33) + (lsr(x, I(2)) & x33);
- x += lsr(x, I(4));
- x &= x0f;
- if (bits > 8) x += lsr(x, I(8));
- if (bits > 16) x += lsr(x, I(16));
- if (bits > 32) x += lsr(x, I(32));
- if (bits > 64) x += lsr(x, I(64));
- assert(bits<=128);
- return x & IV(I(0xff));
- }
-
- template<typename realvec_t>
- typename realvec_t::intvec_t mathfuncs<realvec_t>::vml_rotate(intvec_t x,
- int_t n)
- {
- int_t mask = CHAR_BIT * sizeof(int_t) - 1;
- intvec_t left = x << (n & mask);
- intvec_t right = lsr(x, -n & mask);
- return left | right;
- }
-
- template<typename realvec_t>
- typename realvec_t::intvec_t mathfuncs<realvec_t>::vml_rotate(intvec_t x,
- intvec_t n)
- {
- intvec_t mask = IV(I(CHAR_BIT * sizeof(int_t) - 1));
- intvec_t left = x << (n & mask);
- intvec_t right = lsr(x, -n & mask);
- return left | right;
- }
-
}; // namespace vecmathlib
-#endif // #ifndef MATHFUNCS_ASIN_H
+#endif // #ifndef MATHFUNCS_ASIN_H
OpenPOWER on IntegriCloud