diff options
author | Erik Schnetter <schnetter@gmail.com> | 2012-11-30 15:50:03 -0500 |
---|---|---|
committer | Erik Schnetter <schnetter@gmail.com> | 2012-11-30 15:50:03 -0500 |
commit | d2614759a1d542c41af59b04d8711246d2a1e876 (patch) | |
tree | 2689209034d9ccc3d29208d50b82b4f89116aa1b | |
download | vecmathlib-d2614759a1d542c41af59b04d8711246d2a1e876.zip vecmathlib-d2614759a1d542c41af59b04d8711246d2a1e876.tar.gz |
Import initial version
-rw-r--r-- | IDEAS | 7 | ||||
-rw-r--r-- | build.ninja | 33 | ||||
-rw-r--r-- | empty.cc | 0 | ||||
-rw-r--r-- | example.cc | 32 | ||||
-rw-r--r-- | floatprops.h | 152 | ||||
-rw-r--r-- | mathfuncs.h | 15 | ||||
-rw-r--r-- | mathfuncs_asin.h | 83 | ||||
-rw-r--r-- | mathfuncs_base.h | 67 | ||||
-rw-r--r-- | mathfuncs_convert.h | 86 | ||||
-rw-r--r-- | mathfuncs_fabs.h | 52 | ||||
-rw-r--r-- | mathfuncs_log.h | 70 | ||||
-rw-r--r-- | mathfuncs_rcp.h | 48 | ||||
-rw-r--r-- | mathfuncs_sqrt.h | 69 | ||||
-rw-r--r-- | vec_base.h | 277 | ||||
-rw-r--r-- | vec_double_avx.h | 588 | ||||
-rw-r--r-- | vec_float.h | 305 | ||||
-rw-r--r-- | vecmathlib.h | 11 |
17 files changed, 1895 insertions, 0 deletions
@@ -0,0 +1,7 @@ +implement cosh, sinh via expansion, similar to asin + https://en.wikipedia.org/wiki/Hyperbolic_function + +implement exp via cosh, sinh + +implement acosh, asinh via expansion, similar to asin + https://en.wikipedia.org/wiki/Inverse_hyperbolic_function diff --git a/build.ninja b/build.ninja new file mode 100644 index 0000000..fbfd02b --- /dev/null +++ b/build.ninja @@ -0,0 +1,33 @@ +ar = ar +cxx = g++ + +cppflags = +cxxflags = -std=gnu++11 -Wall -g -march=native +# -Ofast +ldflags = -L. + +rule cxx + command = $cxx $cppflags -MMD -MT $out -MF $out.d $cxxflags -c $in -o $out + description = CXX $out + depfile = $out.d + +rule ar + command = rm -f $out && $ar crs $out $in && ranlib $out + description = AR $out + +rule link + command = $cxx $cppflags $cxxflags $ldflags -o $out $in $libs + description = LINK $out + +build empty.o: cxx empty.cc +build example.o: cxx example.cc +build test.o: cxx test.cc + +build libvecmath.a: ar empty.o + +build example: link example.o | libvecmath.a + libs = -lvecmath +build test: link test.o | libvecmath.a + libs = -lvecmath + +default example test diff --git a/empty.cc b/empty.cc new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/empty.cc diff --git a/example.cc b/example.cc new file mode 100644 index 0000000..50415c2 --- /dev/null +++ b/example.cc @@ -0,0 +1,32 @@ +// -*-C++-*- + +#include "vec_float.h" +#include "vec_double_avx.h" + +#include <iostream> + +using namespace std; + +int main(int argc, char** argv) +{ + using namespace vecmathlib; + typedef realvec<float,1> realvec_t; + // typedef realvec<double,4> realvec_t; + typedef realvec_t::boolvec_t boolvec_t; + typedef realvec_t::intvec_t intvec_t; + + realvec_t x = 1.0; + realvec_t y = x + realvec_t(1.0); + y = sqrt(y); + realvec_t z = log(y); + boolvec_t b = x < y; + intvec_t i = convert_int(y); + + cout << "x=" << x << "\n"; + cout << "y=" << y << "\n"; + cout << "z=" << z << "\n"; + cout << "b=" << b << "\n"; + cout << "i=" << i << "\n"; + + return 0; +} diff --git a/floatprops.h b/floatprops.h new file mode 100644 index 0000000..7048112 --- /dev/null +++ b/floatprops.h @@ -0,0 +1,152 @@ +// -*-C++-*- + +#ifndef FLOATPROPS_H +#define FLOATPROPS_H + +#include <cmath> +#include <cstdint> +#include <limits> + + + +namespace vecmathlib { + + // A structure describing various properties of a floating point + // type. Most properties are already described in numeric_limits, so + // we inherit it. + template<typename real_t> + struct floatprops { + // Some interesting properties are: + // digits + // min_exponent + // max_exponent + }; + + template<typename int_t> + struct intprops { + }; + + + + // Properties of float + template<> + struct floatprops<float>: std::numeric_limits<float> { + typedef float real_t; + typedef int32_t int_t; + typedef uint32_t uint_t; + + // Ensure the internal representation is what we expect + static_assert(is_signed, "real_t is not signed"); + static_assert(radix==2, "real_t is not binary"); + + // Ensure the sizes match + static_assert(sizeof(real_t) == sizeof(int_t), "int_t has wrong size"); + static_assert(sizeof(real_t) == sizeof(uint_t), "uint_t has wrong size"); + + // Number of bits in internal representation + static int const bits = 8 * sizeof(real_t); + static int const mantissa_bits = digits - 1; + static int const sign_bits = 1; + static int const exponent_bits = bits - mantissa_bits - sign_bits; + static int const exponent_offset = 2 - min_exponent; + static_assert(mantissa_bits + exponent_bits + sign_bits == bits, + "error in bit counts"); + static uint_t const mantissa_mask = (uint_t(1) << mantissa_bits) - 1; + static uint_t const exponent_mask = + ((uint_t(1) << exponent_bits) - 1) << mantissa_bits; + static uint_t const sign_mask = uint_t(1) << (bits-1); + static_assert((mantissa_mask & exponent_mask & sign_mask) == uint_t(0), + "error in masks"); + static_assert((mantissa_mask | exponent_mask | sign_mask) == ~uint_t(0), + "error in masks"); + + // Re-interpret bit patterns + static inline real_t as_float(int_t x) { return *(real_t*)&x; } + static inline int_t as_int(real_t x) { return *(int_t*)&x; } + + // Convert values + static inline real_t convert_float(int_t x) { return real_t(x); } + static inline int_t convert_int(real_t x) + { + static_assert(sizeof(std::rint(x)) == sizeof(int_t), + "rint() has wrong return type"); + return std::rint(x); + } + }; + + template<> + struct intprops<floatprops<float>::int_t> { + typedef float real_t; + }; + + + + // Properties of double + template<> + struct floatprops<double>: std::numeric_limits<double> { + typedef double real_t; + typedef int64_t int_t; + typedef uint64_t uint_t; + + // Ensure the internal representation is what we expect + static_assert(is_signed, "real_t is not signed"); + static_assert(radix==2, "real_t is not binary"); + + // Ensure the sizes match + static_assert(sizeof(real_t) == sizeof(int_t), "int_t has wrong size"); + static_assert(sizeof(real_t) == sizeof(uint_t), "uint_t has wrong size"); + + // Number of bits in internal representation + static int const bits = 8 * sizeof(real_t); + static int const mantissa_bits = digits - 1; + static int const sign_bits = 1; + static int const exponent_bits = bits - mantissa_bits - sign_bits; + static int const exponent_offset = 2 - min_exponent; + static_assert(mantissa_bits + exponent_bits + sign_bits == bits, + "error in bit counts"); + static uint_t const mantissa_mask = (uint_t(1) << mantissa_bits) - 1; + static uint_t const exponent_mask = + ((uint_t(1) << exponent_bits) - 1) << mantissa_bits; + static uint_t const sign_mask = uint_t(1) << (bits-1); + static_assert((mantissa_mask & exponent_mask & sign_mask) == uint_t(0), + "error in masks"); + static_assert((mantissa_mask | exponent_mask | sign_mask) == ~uint_t(0), + "error in masks"); + + // Re-interpret bit patterns + static inline real_t as_float(int_t x) { return *(real_t*)&x; } + static inline int_t as_int(real_t x) { return *(int_t*)&x; } + + // Convert values + static inline real_t convert_float(int_t x) { return real_t(x); } + static inline int_t convert_int(real_t x) + { + static_assert(sizeof(std::llrint(x)) == sizeof(int_t), + "llrint() has wrong return type"); + return std::llrint(x); + } + }; + + template<> + struct intprops<floatprops<double>::int_t> { + typedef double real_t; + }; + + + + template<typename int_t> + inline typename intprops<int_t>::real_t as_float(int_t x) + { + typedef typename intprops<int_t>::real_t real_t; + return floatprops<real_t>::as_float(x); + } + + template<typename real_t> + inline typename floatprops<real_t>::int_t as_int(real_t x) + { + return floatprops<real_t>::as_int(x); + } + +} // namespace vecmathlib + +#endif // #ifndef FLOATPROPS_H diff --git a/mathfuncs.h b/mathfuncs.h new file mode 100644 index 0000000..e62358b --- /dev/null +++ b/mathfuncs.h @@ -0,0 +1,15 @@ +// -*-C++-*- + +#ifndef MATHFUNCS_H +#define MATHFUNCS_H + +#include "mathfuncs_base.h" + +#include "mathfuncs_asin.h" +#include "mathfuncs_convert.h" +#include "mathfuncs_fabs.h" +#include "mathfuncs_log.h" +#include "mathfuncs_rcp.h" +#include "mathfuncs_sqrt.h" + +#endif // #ifndef MATHFUNCS_H diff --git a/mathfuncs_asin.h b/mathfuncs_asin.h new file mode 100644 index 0000000..a8ffff4 --- /dev/null +++ b/mathfuncs_asin.h @@ -0,0 +1,83 @@ +// -*-C++-*- + +#ifndef MATHFUNCS_ASIN_H +#define MATHFUNCS_ASIN_H + +#include "mathfuncs_base.h" + +#include <cassert> +#include <cmath> + + + +namespace vecmathlib { + + template<typename realvec_t> + realvec_t mathfuncs<realvec_t>::vml_atan(realvec_t x) + { + // Handle negative values + realvec_t x0 = x; + x = fabs(x); + + // Reduce range using 1/x identity + assert(all(x >= RV(0.0))); + boolvec_t gt_one = x > RV(1.0); + x = ifthen(gt_one, rcp(x), x); + + // Reduce range again using half-angle formula; see + // <https://en.wikipedia.org/wiki/Inverse_trigonometric_functions>. + // This is necessary for good convergence below. + x = x / (RV(1.0) + sqrt(RV(1.0) + x*x)); + + // Taylor expansion; see + // <https://en.wikipedia.org/wiki/Inverse_trigonometric_functions>. + assert(all(x >= RV(0.0) && x <= RV(0.5))); + int const nmax = 30; // ??? + realvec_t y = x / (RV(1.0) + x*x); + realvec_t x2 = x * y; + realvec_t r = y; + for (int n=3; n<nmax; n+=2) { + y *= RV(R(n-1) / R(n)) * x2; + r += y; + } + + // Undo second range reduction + r = RV(2.0) * r; + + // Undo range reduction + r = ifthen(gt_one, RV(M_PI_2) - r, r); + + // Handle negative values + r = copysign(r, x0); + + return r; + } + + + + template<typename realvec_t> + realvec_t mathfuncs<realvec_t>::vml_acos(realvec_t x) + { + // Handle negative values + boolvec_t is_negative = signbit(x); + x = fabs(x); + + realvec_t r = RV(2.0) * atan(sqrt(RV(1.0) - x*x) / (RV(1.0) + x)); + + // Handle negative values + r = ifthen(is_negative, RV(M_PI) - r, r); + + return r; + } + + + + template<typename realvec_t> + realvec_t mathfuncs<realvec_t>::vml_asin(realvec_t x) + { + return RV(2.0) * atan(x / (RV(1.0) + sqrt(RV(1.0) - x*x))); + } + +}; // namespace vecmathlib + +#endif // #ifndef MATHFUNCS_ASIN_H diff --git a/mathfuncs_base.h b/mathfuncs_base.h new file mode 100644 index 0000000..0b9f04f --- /dev/null +++ b/mathfuncs_base.h @@ -0,0 +1,67 @@ +// -*-C++-*- + +#ifndef MATHFUNCS_BASE_H +#define MATHFUNCS_BASE_H + +#include "floatprops.h" +#include "vec_base.h" + + + +namespace vecmathlib { + + template<typename realvec_t> + struct mathfuncs { + typedef floatprops<typename realvec_t::real_t> FP; + + typedef typename FP::real_t real_t; + typedef typename FP::int_t int_t; + typedef typename FP::uint_t uint_t; + + static int const size = realvec_t::size; + + // typedef realvec<real_t, size> realvec_t; + typedef intvec<real_t, size> intvec_t; + typedef boolvec<real_t, size> boolvec_t; + + // Short names for type casts + typedef real_t R; + typedef int_t I; + typedef uint_t U; + typedef realvec_t RV; + typedef intvec_t IV; + typedef boolvec_t BV; + + // asin + static realvec_t vml_acos(realvec_t x); + static realvec_t vml_asin(realvec_t x); + static realvec_t vml_atan(realvec_t x); + + // convert + static realvec_t vml_convert_float(intvec_t x); + static intvec_t vml_convert_int(realvec_t x); + + // fabs + static realvec_t vml_copysign(realvec_t x, realvec_t y); + static realvec_t vml_fabs(realvec_t x); + static intvec_t vml_ilogb(realvec_t x); + static realvec_t vml_scalbn(realvec_t x, intvec_t n); + static boolvec_t vml_signbit(realvec_t x); + + // log + static realvec_t vml_log(realvec_t x); + static realvec_t vml_log10(realvec_t x); + static realvec_t vml_log1p(realvec_t x); + static realvec_t vml_log2(realvec_t x); + + // rcp + static realvec_t vml_rcp(realvec_t x); + + // sqrt + static realvec_t vml_rsqrt(realvec_t x); + static realvec_t vml_sqrt(realvec_t x); + }; + +} // namespace vecmathlib + +#endif // #ifndef MATHFUNCS_BASE_H diff --git a/mathfuncs_convert.h b/mathfuncs_convert.h new file mode 100644 index 0000000..4dfffef --- /dev/null +++ b/mathfuncs_convert.h @@ -0,0 +1,86 @@ +// -*-C++-*- + +#ifndef MATHFUNCS_CONVERT_H +#define MATHFUNCS_CONVERT_H + +#include "mathfuncs_base.h" + +#include <cassert> +#include <cmath> + + + +namespace vecmathlib { + + template<typename realvec_t> + realvec_t mathfuncs<realvec_t>::vml_convert_float(intvec_t x) + { + // Convert in two passes. Convert as much as possible during the + // first pass (lobits), so that the second pass (hibits) may be + // omitted if the high bits are known to be zero. + int_t lobits = FP::mantissa_bits; + int_t hibits = FP::bits - lobits; + + // Convert lower bits + intvec_t xlo = x & IV((U(1) << lobits) - 1); + // exponent for the equivalent floating point number + int_t exponent_lo = FP::exponent_offset + lobits; + xlo |= exponent_lo; + // subtract hidden mantissa bit + realvec_t flo = as_float(xlo) - RV(FP::as_float(exponent_lo)); + + // Convert upper bits + // make unsigned by subtracting largest negative number + x ^= FP::sign_mask; + intvec_t xhi = lsr(x, lobits); + // exponent for the equivalent floating point number + int_t exponent_hi = FP::exponent_offset + FP::bits; + xhi |= exponent_hi; + // add largest negative number, and subtract hidden mantissa bit + realvec_t + fhi = as_float(xhi) - RV(FP::as_float(exponent_hi) + R(FP::sign_mask)); + + // Combine results + return flo + fhi; + } + + + + template<typename realvec_t> + auto mathfuncs<realvec_t>::vml_convert_int(realvec_t x) -> intvec_t + { + // Handle zero + boolvec_t is_zero = x == RV(0.0); + // Handle negative numbers + boolvec_t is_negative = signbit(x); + x = fabs(x); + + // Round, by adding a large number that removes the excess + // precision + int_t large = U(1) << FP::mantissa_bits; + x += R(large); + + intvec_t exponent = ilogb(x); + for (int i=0; i<intvec_t::size; ++i) { + assert(exponent[i] >= FP::mantissa_bits); + } + intvec_t ix = as_int(x) & IV(FP::mantissa_mask); + // add hidden mantissa bit + ix |= U(1) << FP::mantissa_bits; + // shift according to exponent + ix <<= exponent - IV(FP::mantissa_bits); + + // Undo the adding above + ix -= large; + + // Handle negative numbers + ix = ifthen(is_negative, -ix, ix); + // Handle zero + ix = ifthen(is_zero, IV(0), ix); + + return ix; + } + +}; // namespace vecmathlib + +#endif // #ifndef MATHFUNCS_CONVERT_H diff --git a/mathfuncs_fabs.h b/mathfuncs_fabs.h new file mode 100644 index 0000000..034fedb --- /dev/null +++ b/mathfuncs_fabs.h @@ -0,0 +1,52 @@ +// -*-C++-*- + +#ifndef MATHFUNCS_FABS_H +#define MATHFUNCS_FABS_H + +#include "mathfuncs_base.h" + +#include <cassert> +#include <cmath> + + + +namespace vecmathlib { + + template<typename realvec_t> + realvec_t mathfuncs<realvec_t>::vml_copysign(realvec_t x, realvec_t y) + { + intvec_t value = as_int(x) & IV(~FP::sign_mask); + intvec_t sign = as_int(y) & IV(FP::sign_mask); + return as_float(sign | value); + } + + template<typename realvec_t> + realvec_t mathfuncs<realvec_t>::vml_fabs(realvec_t x) + { + return as_float(as_int(x) & IV(~FP::sign_mask)); + } + + template<typename realvec_t> + auto mathfuncs<realvec_t>::vml_ilogb(realvec_t x) -> intvec_t + { + return + lsr(as_int(x) & IV(FP::exponent_mask), FP::mantissa_bits) - + IV(FP::exponent_offset); + } + + template<typename realvec_t> + realvec_t mathfuncs<realvec_t>::vml_scalbn(realvec_t x, intvec_t n) + { + return as_float(as_int(x) + (n << FP::mantissa_bits)); + // return x * as_float((n + exponent_offset) << mantissa_bits); + } + + template<typename realvec_t> + auto mathfuncs<realvec_t>::vml_signbit(realvec_t x) -> boolvec_t + { + return convert_bool(as_int(x) & IV(FP::sign_mask)); + } + +}; // namespace vecmathlib + +#endif // #ifndef MATHFUNCS_FABS_H diff --git a/mathfuncs_log.h b/mathfuncs_log.h new file mode 100644 index 0000000..8fb3204 --- /dev/null +++ b/mathfuncs_log.h @@ -0,0 +1,70 @@ +// -*-C++-*- + +#ifndef MATHFUNCS_LOG_H +#define MATHFUNCS_LOG_H + +#include "mathfuncs_base.h" + +#include <cassert> +#include <cmath> + + + +namespace vecmathlib { + + template<typename realvec_t> + realvec_t mathfuncs<realvec_t>::vml_log2(realvec_t x) + { + // Rescale + assert(all(x > RV(0.0))); + intvec_t ilogb_x = ilogb(x); + x = scalbn(x, -ilogb_x); + assert(all(x >= RV(1.0) && x < RV(2.0))); + + // Approximate + assert(all(x >= RV(1.0) && x < RV(2.0))); + // log(x) = Sum[n=1,nmax,n%2==1] 2/n ((x-1) / (x+1))^n + int const nmax = 30; + x *= RV(M_SQRT1_2); // shift range to increase accuracy + realvec_t xm1_xp1 = (x - RV(1.0)) / (x + RV(1.0)); + realvec_t xm1_xp1_2 = xm1_xp1 * xm1_xp1; + realvec_t y = RV(M_LOG2E * 2.0) * xm1_xp1; + realvec_t r = y; + for (int n=3; n<nmax; n+=2) { + y *= xm1_xp1_2; + r += y * RV(R(1.0) / R(n)); + } + r += RV(0.5); // correct result for range shift + + // Undo rescaling + r += convert_float(ilogb_x); + + return r; + } + + + + template<typename realvec_t> + inline + realvec_t mathfuncs<realvec_t>::vml_log(realvec_t x) + { + return log2(x) * RV(M_LN2); + } + + template<typename realvec_t> + inline + realvec_t mathfuncs<realvec_t>::vml_log10(realvec_t x) + { + return log(x) * RV(M_LOG10E); + } + + template<typename realvec_t> + inline + realvec_t mathfuncs<realvec_t>::vml_log1p(realvec_t x) + { + return log(RV(1.0) + x); + } + +}; // namespace vecmathlib + +#endif // #ifndef MATHFUNCS_LOG_H diff --git a/mathfuncs_rcp.h b/mathfuncs_rcp.h new file mode 100644 index 0000000..1704d8f --- /dev/null +++ b/mathfuncs_rcp.h @@ -0,0 +1,48 @@ +// -*-C++-*- + +#ifndef MATHFUNCS_RCP_H +#define MATHFUNCS_RCP_H + +#include "mathfuncs_base.h" + +#include <cassert> +#include <cmath> + + + +namespace vecmathlib { + + template<typename realvec_t> + realvec_t mathfuncs<realvec_t>::vml_rcp(realvec_t x) + { + // Handle negative values + realvec_t x0 = x; + x = fabs(x); + + // Initial guess + assert(all(x > RV(0.0))); + intvec_t ilogb_x = ilogb(x); + // For stability, choose a starting value that is below the result + realvec_t r = scalbn(RV(0.5), -ilogb_x); + + // Iterate + int const nmax = 7; + for (int n=1; n<nmax; ++n) { + // Step + assert(all(x > RV(0.0))); + // Newton method: + // Solve f(r) = 0 for f(r) = x - 1/r + // r <- r - f(r) / f'(r) + // r <- 2 r - r^2 x + r *= RV(2.0) - r * x; + } + + // Handle negative values + r = copysign(r, x0); + + return r; + } + +}; // namespace vecmathlib + +#endif // #ifndef MATHFUNCS_RCP_H diff --git a/mathfuncs_sqrt.h b/mathfuncs_sqrt.h new file mode 100644 index 0000000..2c43478 --- /dev/null +++ b/mathfuncs_sqrt.h @@ -0,0 +1,69 @@ +// -*-C++-*- + +#ifndef MATHFUNCS_SQRT_H +#define MATHFUNCS_SQRT_H + +#include "mathfuncs_base.h" + +#include <cassert> +#include <cmath> + + + +namespace vecmathlib { + + template<typename realvec_t> + realvec_t mathfuncs<realvec_t>::vml_sqrt(realvec_t x) + { + std::cout << "mathfuncs::sqrt\n"; + // Handle special case: zero + boolvec_t is_zero = x <= RV(0.0); + x = ifthen(is_zero, RV(1.0), x); + + // Initial guess + assert(all(x > RV(0.0))); +#if 0 + intvec_t ilogb_x = ilogb(x); + realvec_t r = scalbn(RV(M_SQRT2), ilogb_x >> 1); + // TODO: divide by M_SQRT2 if ilogb_x % 2 == 1 ? +#else + real_t correction = + std::scalbn(4.0/3.0, FP::exponent_offset >> 1) * + (FP::exponent_offset & 1 ? 1.0 / M_SQRT1_2 : 1.0); + realvec_t r = lsr(x.as_int(), 1).as_float() * RV(correction); + real_t one=1.0; + std::cout << "x=" << x << " r=" << r << " eo=" << FP::exponent_offset << " corr=" << correction << " ione=" << RV(one).as_int() << "\n"; +#endif + + // Iterate + // nmax iterations give an accuracy of 2^nmax binary digits. 5 + // iterations suffice for double precision with its 53 digits. + int const nmax = 5; + std::cout << "bits" << FP::bits << "nmax" << nmax << "\n"; + for (int n=1; n<nmax; ++n) { + // Step + assert(all(x > RV(0.0))); + // Newton method: + // Solve f(r) = 0 for f(r) = x - r^2 + // r <- r - f(r) / f'(r) + // r <- (r + x / r) / 2 + r = RV(0.5) * (r + x / r); + } + + // Handle special case: zero + r = ifthen(is_zero, RV(0.0), r); + + return r; + } + + + + template<typename realvec_t> + realvec_t mathfuncs<realvec_t>::vml_rsqrt(realvec_t x) + { + return rcp(sqrt(x)); + } + +}; // namespace vecmathlib + +#endif // #ifndef MATHFUNCS_SQRT_H diff --git a/vec_base.h b/vec_base.h new file mode 100644 index 0000000..ab4a558 --- /dev/null +++ b/vec_base.h @@ -0,0 +1,277 @@ +// -*-C++-*- + +#ifndef VEC_BASE_H +#define VEC_BASE_H + +#include <iostream> + +namespace vecmathlib { + + template<typename real_t, int size> + struct boolvec { + }; + + template<typename real_t, int size> + struct intvec { + }; + + template<typename real_t, int size> + struct realvec { + }; + + + + // boolvec wrappers + + template<typename real_t, int size> + inline intvec<real_t, size> as_int(boolvec<real_t, size> x) + { + return x.as_int(); + } + + template<typename real_t, int size> + inline intvec<real_t, size> convert_int(boolvec<real_t, size> x) + { + return x.convert_int(); + } + + template<typename real_t, int size> + inline bool all(boolvec<real_t, size> x) { return x.all(); } + + template<typename real_t, int size> + inline bool any(boolvec<real_t, size> x) { return x.any(); } + + template<typename real_t, int size> + inline + intvec<real_t, size> ifthen(boolvec<real_t, size> c, + intvec<real_t, size> x, + intvec<real_t, size> y) + { + return c.ifthen(x, y); + } + + template<typename real_t, int size> + inline + realvec<real_t, size> ifthen(boolvec<real_t, size> c, + realvec<real_t, size> x, + realvec<real_t, size> y) + { + return c.ifthen(x, y); + } + + + + // intvec wrappers + + template<typename real_t, int size> + inline boolvec<real_t, size> as_bool(intvec<real_t, size> x) + { + return x.as_bool(); + } + + template<typename real_t, int size> + inline boolvec<real_t, size> convert_bool(intvec<real_t, size> x) + { + return x.convert_bool(); + } + + template<typename real_t, int size> + inline realvec<real_t, size> as_float(intvec<real_t, size> x) + { + return x.as_float(); + } + + template<typename real_t, int size> + inline realvec<real_t, size> convert_float(intvec<real_t, size> x) + { + return x.convert_float(); + } + + template<typename real_t, int size> + inline intvec<real_t, size> lsr(intvec<real_t, size> x, + typename intvec<real_t, size>::int_t n) + { + return x.lsr(n); + } + + template<typename real_t, int size> + inline intvec<real_t, size> lsr(intvec<real_t, size> x, + intvec<real_t, size> n) + { + return x.lsr(n); + } + + + + // realvec wrappers + + template<typename real_t, int size> + inline intvec<real_t, size> as_int(realvec<real_t, size> x) + { + return x.as_int(); + } + + template<typename real_t, int size> + inline intvec<real_t, size> convert_int(realvec<real_t, size> x) + { + return x.convert_int(); + } + + template<typename real_t, int size> + inline + typename realvec<real_t, size>::real_t prod(realvec<real_t, size> x) + { + return x.prod(); + } + + template<typename real_t, int size> + inline + typename realvec<real_t, size>::real_t sum(realvec<real_t, size> x) + { + return x.sum(); + } + + + + template<typename real_t, int size> + inline realvec<real_t, size> copysign(realvec<real_t, size> x, + realvec<real_t, size> y) + { + return x.copysign(y); + } + + template<typename real_t, int size> + inline realvec<real_t, size> fabs(realvec<real_t, size> x) + { + return x.fabs(); + } + + template<typename real_t, int size> + inline intvec<real_t, size> ilogb(realvec<real_t, size> x) + { + return x.ilogb(); + } + + template<typename real_t, int size> + inline + realvec<real_t, size> scalbn(realvec<real_t, size> x, + intvec<real_t, size> n) + { + return x.scalbn(n); + } + + template<typename real_t, int size> + inline boolvec<real_t, size> signbit(realvec<real_t, size> x) + { + return x.signbit(); + } + + + + template<typename real_t, int size> + inline realvec<real_t, size> acos(realvec<real_t, size> x) + { + return x.acos(); + } + + template<typename real_t, int size> + inline realvec<real_t, size> asin(realvec<real_t, size> x) + { + return x.asin(); + } + + template<typename real_t, int size> + inline realvec<real_t, size> atan(realvec<real_t, size> x) + { + return x.atan(); + } + + template<typename real_t, int size> + inline realvec<real_t, size> floor(realvec<real_t, size> x) + { + return x.floor(); + } + + template<typename real_t, int size> + inline realvec<real_t, size> log(realvec<real_t, size> x) + { + return x.log(); + } + + template<typename real_t, int size> + inline realvec<real_t, size> log10(realvec<real_t, size> x) + { + return x.log10(); + } + + template<typename real_t, int size> + inline realvec<real_t, size> log1p(realvec<real_t, size> x) + { + return x.log1p(); + } + + template<typename real_t, int size> + inline realvec<real_t, size> log2(realvec<real_t, size> x) + { + return x.log2(); + } + + template<typename real_t, int size> + inline realvec<real_t, size> rcp(realvec<real_t, size> x) + { + return x.rcp(); + } + + template<typename real_t, int size> + inline realvec<real_t, size> rsqrt(realvec<real_t, size> x) + { + return x.rsqrt(); + } + + template<typename real_t, int size> + inline realvec<real_t, size> sqrt(realvec<real_t, size> x) + { + return x.sqrt(); + } + + + + template<typename real_t, int size> + std::ostream& operator<<(std::ostream& os, boolvec<real_t, size> const& x) + { + os << "["; + for (int i=0; i<size; ++i) { + if (i!=0) os << ","; + os << x[i]; + } + os << "]"; + return os; + } + + template<typename real_t, int size> + std::ostream& operator<<(std::ostream& os, intvec<real_t, size> const& x) + { + os << "["; + for (int i=0; i<size; ++i) { + if (i!=0) os << ","; + os << x[i]; + } + os << "]"; + return os; + } + + template<typename real_t, int size> + std::ostream& operator<<(std::ostream& os, realvec<real_t, size> const& x) + { + os << "["; + for (int i=0; i<size; ++i) { + if (i!=0) os << ","; + os << x[i]; + } + os << "]"; + return os; + } + +} // namespace vecmathlib + +#endif // #ifndef VEC_BASE_H diff --git a/vec_double_avx.h b/vec_double_avx.h new file mode 100644 index 0000000..258ffd0 --- /dev/null +++ b/vec_double_avx.h @@ -0,0 +1,588 @@ +// -*-C++-*- + +#ifndef VEC_DOUBLE_AVX_H +#define VEC_DOUBLE_AVX_H + +#include "floatprops.h" +#include "mathfuncs.h" +#include "vec_base.h" + +#include <cmath> + +// AVX intrinsics +#include <immintrin.h> + + + +namespace vecmathlib { + + template<> struct boolvec<double,4>; + template<> struct intvec<double,4>; + template<> struct realvec<double,4>; + + + + template<> + struct boolvec<double,4>: floatprops<double> + { + static const int size = 4; + typedef bool scalar_t; + typedef __m256d bvector_t; + + static_assert(size * sizeof(real_t) == sizeof(bvector_t), + "vector size is wrong"); + + private: + // true values have the sign bit set, false values have it unset + static uint_t from_bool(bool a) { return - uint_t(a); } + static bool to_bool(uint_t a) { return int_t(a) < int_t(0); } + public: + + typedef boolvec boolvec_t; + typedef intvec<real_t, size> intvec_t; + typedef realvec<real_t, size> realvec_t; + + // Short names for type casts + typedef real_t R; + typedef int_t I; + typedef uint_t U; + typedef realvec_t RV; + typedef intvec_t IV; + typedef boolvec_t BV; + typedef floatprops<real_t> FP; + typedef mathfuncs<realvec_t> MF; + + + + bvector_t v; + + boolvec() {} + boolvec(boolvec const& x): v(x.v) {} + boolvec& operator=(boolvec const& x) { return v=x.v, *this; } + boolvec(bvector_t x): v(x) {} + boolvec(bool a): + v(_mm256_castsi256_pd(_mm256_set1_epi64x(from_bool(a)))) {} + boolvec(bool const* as): + v(_mm256_castsi256_pd(_mm256_set_epi64x(from_bool(as[3]), + from_bool(as[2]), + from_bool(as[1]), + from_bool(as[0])))) {} + + operator bvector_t() const { return v; } + bool operator[](int n) const { return to_bool(((uint_t const*)&v)[n]); } + boolvec& set_elt(int n, bool a) + { + return ((int_t*)&v)[n] = from_bool(a), *this; + } + + + + auto as_int() const -> intvec_t; // defined after intvec + auto convert_int() const -> intvec_t; // defined after intvec + + + + boolvec operator!() const { return _mm256_xor_pd(boolvec(true), v); } + + boolvec operator&&(boolvec x) const { return _mm256_and_pd(v, x.v); } + boolvec operator||(boolvec x) const { return _mm256_or_pd(v, x.v); } + boolvec operator==(boolvec x) const { return !(*this==x); } + boolvec operator!=(boolvec x) const { return _mm256_xor_pd(v, x.v); } + + bool all() const + { + return (*this)[0] && (*this)[1] && (*this)[2] && (*this)[3]; + } + bool any() const + { + return (*this)[0] || (*this)[1] || (*this)[2] || (*this)[3]; + } + + + + // ifthen(condition, then-value, else-value) + auto ifthen(intvec_t x, + intvec_t y) const -> intvec_t; // defined after intvec + auto ifthen(realvec_t x, + realvec_t y) const -> realvec_t; // defined after realvec + + }; + + + + template<> + struct intvec<double,4>: floatprops<double> + { + static const int size = 4; + typedef int_t scalar_t; + typedef __m256i ivector_t; + + static_assert(size * sizeof(real_t) == sizeof(ivector_t), + "vector size is wrong"); + + typedef boolvec<real_t, size> boolvec_t; + typedef intvec intvec_t; + typedef realvec<real_t, size> realvec_t; + + // Short names for type casts + typedef real_t R; + typedef int_t I; + typedef uint_t U; + typedef realvec_t RV; + typedef intvec_t IV; + typedef boolvec_t BV; + typedef floatprops<real_t> FP; + typedef mathfuncs<realvec_t> MF; + + + + ivector_t v; + + intvec() {} + intvec(intvec const& x): v(x.v) {} + intvec& operator=(intvec const& x) { return v=x.v, *this; } + intvec(ivector_t x): v(x) {} + intvec(int_t a): v(_mm256_set1_epi64x(a)) {} + intvec(int_t const* as): v(_mm256_set_epi64x(as[3], as[2], as[1], as[0])) {} + + operator ivector_t() const { return v; } + int_t operator[](int n) const { return ((int_t const*)&v)[n]; } + intvec& set_elt(int n, int_t a) { return ((int_t*)&v)[n]=a, *this; } + + + + auto as_bool() const -> boolvec_t { return _mm256_castsi256_pd(v); } + auto convert_bool() const -> boolvec_t + { + // Result: convert_bool(0)=false, convert_bool(else)=true + // There is no intrinsic to compare with zero. Instead, we check + // whether x is positive and x-1 is negative. + intvec x = *this; + intvec xm1 = x - 1; + // We know that boolvec values depend only on the sign bit + // return (~xm1 | x).as_bool(); + return x.as_bool() || !xm1.as_bool(); + } + auto as_float() const -> realvec_t; // defined after realvec + auto convert_float() const -> realvec_t; // defined after realvec + + + + // Note: not all arithmetic operations are supported! + + intvec operator+() const { return *this; } + intvec operator-() const + { + __m128i v01 = _mm256_castsi256_si128(v); + __m128i v23 = _mm256_extractf128_si256(v, 1); + v01 = _mm_sub_epi64(_mm_set1_epi64x(0), v01); + v23 = _mm_sub_epi64(_mm_set1_epi64x(0), v23); + return _mm256_insertf128_si256(_mm256_castsi128_si256(v01), v23, 1); + } + + intvec operator+(intvec x) const + { + __m128i v01 = _mm256_castsi256_si128(v); + __m128i v23 = _mm256_extractf128_si256(v, 1); + __m128i xv01 = _mm256_castsi256_si128(x.v); + __m128i xv23 = _mm256_extractf128_si256(x.v, 1); + v01 = _mm_add_epi64(v01, xv01); + v23 = _mm_add_epi64(v23, xv23); + return _mm256_insertf128_si256(_mm256_castsi128_si256(v01), v23, 1); + } + intvec operator-(intvec x) const + { + __m128i v01 = _mm256_castsi256_si128(v); + __m128i v23 = _mm256_extractf128_si256(v, 1); + __m128i xv01 = _mm256_castsi256_si128(x.v); + __m128i xv23 = _mm256_extractf128_si256(x.v, 1); + v01 = _mm_sub_epi64(v01, xv01); + v23 = _mm_sub_epi64(v23, xv23); + return _mm256_insertf128_si256(_mm256_castsi128_si256(v01), v23, 1); + } + + intvec& operator+=(intvec const& x) { return *this=*this+x; } + intvec& operator-=(intvec const& x) { return *this=*this-x; } + + + + intvec operator~() const + { + return _mm256_castpd_si256(_mm256_xor_pd(_mm256_castsi256_pd(IV(~U(0))), + _mm256_castsi256_pd(v))); + } + + intvec operator&(intvec x) const + { + return _mm256_castpd_si256(_mm256_and_pd(_mm256_castsi256_pd(v), + _mm256_castsi256_pd(x.v))); + } + intvec operator|(intvec x) const + { + return _mm256_castpd_si256(_mm256_or_pd(_mm256_castsi256_pd(v), + _mm256_castsi256_pd(x.v))); + } + intvec operator^(intvec x) const + { + return _mm256_castpd_si256(_mm256_xor_pd(_mm256_castsi256_pd(v), + _mm256_castsi256_pd(x.v))); + } + + intvec& operator&=(intvec const& x) { return *this=*this&x; } + intvec& operator|=(intvec const& x) { return *this=*this|x; } + intvec& operator^=(intvec const& x) { return *this=*this^x; } + + + + // SSE2 shift instructions potentially relevant for 64-bit + // operations: + // _mm_slli_epi64 + // _mm_srli_epi64 + // _mm_sll_epi64 + // _mm_srl_epi64 + // _mm_srai_epi32 + // _mm_sra_epi32 + // _mm_srli_si128 + // _mm_slli_si128 + intvec lsr(int_t n) const + { + __m128i v01 = _mm256_castsi256_si128(v); + __m128i v23 = _mm256_extractf128_si256(v, 1); + v01 = _mm_srli_epi64(v01, n); + v23 = _mm_srli_epi64(v23, n); + return _mm256_insertf128_si256(_mm256_castsi128_si256(v01), v23, 1); + } + intvec operator>>(int_t n) const + { + __m128i v01 = _mm256_castsi256_si128(v); + __m128i v23 = _mm256_extractf128_si256(v, 1); + // There is no _mm_srai_epi64. To emulate it, add 0x8000000 + // before shifting, and subtracted the shifted 0x80000000 after shifting +#if 0 + __m128i signmask01 = _mm_sub_epi64(_mm_set1_epi64x(0), + _mm_srli_epi64(v01, 63)); + __m128i signmask23 = _mm_sub_epi64(_mm_set1_epi64x(0), + _mm_srli_epi64(v23, 63)); + v01 = _mm_xor_si128(signmask01, v01); + v23 = _mm_xor_si128(signmask23, v23); + v01 = _mm_srli_epi64(v01, n); + v23 = _mm_srli_epi64(v23, n); + v01 = _mm_xor_si128(signmask01, v01); + v23 = _mm_xor_si128(signmask23, v23); +#else + // Convert signed to unsiged + v01 += _mm_set1_epi64x(U(1) << (bits-1)); + v23 += _mm_set1_epi64x(U(1) << (bits-1)); + // Shift + v01 = _mm_srli_epi64(v01, n); + v23 = _mm_srli_epi64(v23, n); + // Undo conversion + v01 -= _mm_set1_epi64x(U(1) << (bits-n)); + v23 -= _mm_set1_epi64x(U(1) << (bits-n)); +#endif + return _mm256_insertf128_si256(_mm256_castsi128_si256(v01), v23, 1); + } + intvec operator<<(int_t n) const + { + __m128i v01 = _mm256_castsi256_si128(v); + __m128i v23 = _mm256_extractf128_si256(v, 1); + v01 = _mm_slli_epi64(v01, n); + v23 = _mm_slli_epi64(v23, n); + return _mm256_insertf128_si256(_mm256_castsi128_si256(v01), v23, 1); + } + intvec& operator>>=(int_t n) { return *this=*this>>n; } + intvec& operator<<=(int_t n) { return *this=*this<<n; } + + intvec lsr(intvec n) const + { + __m128i v01 = _mm256_castsi256_si128(v); + __m128i v23 = _mm256_extractf128_si256(v, 1); + __m128i nv01 = _mm256_castsi256_si128(n.v); + __m128i nv23 = _mm256_extractf128_si256(n.v, 1); + v01 = _mm_srl_epi64(v01, nv01); + v23 = _mm_srl_epi64(v23, nv23); + return _mm256_insertf128_si256(_mm256_castsi128_si256(v01), v23, 1); + } + intvec operator>>(intvec n) const + { + __m128i v01 = _mm256_castsi256_si128(v); + __m128i v23 = _mm256_extractf128_si256(v, 1); + __m128i nv01 = _mm256_castsi256_si128(n.v); + __m128i nv23 = _mm256_extractf128_si256(n.v, 1); +#if 0 + // There is no _mm_srai_epi64. To emulate it, invert all bits + // before and after shifting if the sign bit is set. + __m128i signmask01 = _mm_sub_epi64(_mm_set1_epi64x(0), + _mm_srli_epi64(v01, 63)); + __m128i signmask23 = _mm_sub_epi64(_mm_set1_epi64x(0), + _mm_srli_epi64(v23, 63)); + v01 = _mm_xor_si128(signmask01, v01); + v23 = _mm_xor_si128(signmask23, v23); + v01 = _mm_srl_epi64(v01, nv01); + v23 = _mm_srl_epi64(v23, nv23); + v01 = _mm_xor_si128(signmask01, v01); + v23 = _mm_xor_si128(signmask23, v23); +#else + // Convert signed to unsiged + v01 += _mm_set1_epi64x(U(1) << (bits-1)); + v23 += _mm_set1_epi64x(U(1) << (bits-1)); + // Shift + v01 = _mm_srl_epi64(v01, nv01); + v23 = _mm_srl_epi64(v23, nv23); + // Undo conversion + v01 -= _mm_sll_epi64(_mm_set1_epi64x(1), + _mm_sub_epi64(_mm_set1_epi64x(bits), nv01)); + v23 -= _mm_sll_epi64(_mm_set1_epi64x(1), + _mm_sub_epi64(_mm_set1_epi64x(bits), nv23)); +#endif + return _mm256_insertf128_si256(_mm256_castsi128_si256(v01), v23, 1); + } + intvec operator<<(intvec n) const + { + __m128i v01 = _mm256_castsi256_si128(v); + __m128i v23 = _mm256_extractf128_si256(v, 1); + __m128i nv01 = _mm256_castsi256_si128(n.v); + __m128i nv23 = _mm256_extractf128_si256(n.v, 1); + v01 = _mm_sll_epi64(v01, nv01); + v23 = _mm_sll_epi64(v23, nv23); + return _mm256_insertf128_si256(_mm256_castsi128_si256(v01), v23, 1); + } + intvec& operator>>=(intvec n) { return *this=*this>>n; } + intvec& operator<<=(intvec n) { return *this=*this<<n; } + }; + + + + template<> + struct realvec<double,4>: floatprops<double> + { + static const int size = 4; + typedef real_t scalar_t; + typedef __m256d vector_t; + + static_assert(size * sizeof(real_t) == sizeof(vector_t), + "vector size is wrong"); + + typedef boolvec<real_t, size> boolvec_t; + typedef intvec<real_t, size> intvec_t; + typedef realvec realvec_t; + + // Short names for type casts + typedef real_t R; + typedef int_t I; + typedef uint_t U; + typedef realvec_t RV; + typedef intvec_t IV; + typedef boolvec_t BV; + typedef floatprops<real_t> FP; + typedef mathfuncs<realvec_t> MF; + + + + vector_t v; + + realvec() {} + realvec(realvec const& x): v(x.v) {} + realvec& operator=(realvec const& x) { return v=x.v, *this; } + realvec(vector_t x): v(x) {} + realvec(real_t a): v(_mm256_set1_pd(a)) {} + realvec(real_t const* as): v(_mm256_set_pd(as[3], as[2], as[1], as[0])) {} + + operator vector_t() const { return v; } + real_t operator[](int n) const { return ((real_t const*)&v)[n]; } + realvec& set_elt(int n, real_t a) { return ((real_t*)&v)[n]=a, *this; } + + + + intvec_t as_int() const { return _mm256_castpd_si256(v); } + intvec_t convert_int() const + { +#if 0 + __m128i iv0123 = _mm256_cvtpd_epi32(v); + __m128i iv2301 = _mm_shuffle_ps(iv0123, iv0123, 0b10110001); + __m256i iv01232301 = + _mm256_insertf128_si256(_mm256_castsi128_si256(iv0123), iv2301, 1); + __m256i zero = _mm256_setzero_ps(); + return _mm256_unpacklo_ps(iv01232301, zero); +#else + realvec x = _mm256_floor_pd(v); + intvec_t ix = x.as_int(); + boolvec_t sign = ix.as_bool(); + intvec_t exponent = (ix & exponent_mask) >> mantissa_bits; + ix &= mantissa_mask; + ix |= U(1) << mantissa_bits; // add hidden bit + ix <<= exponent - exponent_offset + 52; // ??? + ix = ifthen(sign, -ix, ix); + return ix; +#endif + } + + + + realvec operator+() const { return *this; } + realvec operator-() const { return _mm256_sub_pd(_mm256_set1_pd(0.0), v); } + + realvec operator+(realvec x) const { return _mm256_add_pd(v, x.v); } + realvec operator-(realvec x) const { return _mm256_sub_pd(v, x.v); } + realvec operator*(realvec x) const { return _mm256_mul_pd(v, x.v); } + realvec operator/(realvec x) const { return _mm256_div_pd(v, x.v); } + + realvec& operator+=(realvec const& x) { return *this=*this+x; } + realvec& operator-=(realvec const& x) { return *this=*this-x; } + realvec& operator*=(realvec const& x) { return *this=*this*x; } + realvec& operator/=(realvec const& x) { return *this=*this/x; } + + real_t prod() const + { + return (*this)[0] * (*this)[1] * (*this)[2] * (*this)[3]; + } + real_t sum() const + { + return (*this)[0] + (*this)[1] + (*this)[2] + (*this)[3]; + } + + + + boolvec_t operator==(realvec const& x) const + { + return _mm256_cmp_pd(v, x.v, _CMP_EQ_OQ); + } + boolvec_t operator!=(realvec const& x) const + { + return _mm256_cmp_pd(v, x.v, _CMP_NEQ_OQ); + } + boolvec_t operator<(realvec const& x) const + { + return _mm256_cmp_pd(v, x.v, _CMP_LT_OQ); + } + boolvec_t operator<=(realvec const& x) const + { + return _mm256_cmp_pd(v, x.v, _CMP_LE_OQ); + } + boolvec_t operator>(realvec const& x) const + { + return _mm256_cmp_pd(v, x.v, _CMP_GT_OQ); + } + boolvec_t operator>=(realvec const& x) const + { + return _mm256_cmp_pd(v, x.v, _CMP_GE_OQ); + } + + + +#if 0 + realvec copysign(realvec y) const + { + uint_t signmask = U(1) << (bits-1); + intvec_t value = as_int() & IV(~signmask); + intvec_t sign = y.as_int() & IV(signmask); + return (sign | value).as_float(); + } + + realvec fabs() const + { + uint_t signmask = U(1) << (bits-1); + return (as_int() & IV(~signmask)).as_float(); + } + + intvec_t ilogb() const + { + intvec_t exponent_mask = + ((U(1) << exponent_bits) - U(1)) << mantissa_bits; + return lsr(as_int() & exponent_mask, mantissa_bits) - IV(exponent_offset); + } + + realvec scalbn(intvec_t n) const + { + return *this * ((n + exponent_offset) << mantissa_bits).as_float(); + } + + boolvec_t signbit() const + { + return v; + } +#endif + + realvec copysign(realvec y) const { return MF::vml_copysign(*this, y); } + realvec fabs() const { return MF::vml_fabs(*this); } + intvec_t ilogb() const { return MF::vml_ilogb(*this); } + realvec scalbn(intvec_t n) const { return MF::vml_scalbn(*this, n); } + boolvec_t signbit() const { return v; } + + + + realvec acos() const { return MF::vml_acos(*this); } + realvec asin() const { return MF::vml_asin(*this); } + realvec atan() const { return MF::vml_atan(*this); } + realvec floor() const { return _mm256_floor_pd(v); } + realvec log() const { return MF::vml_log(*this); } + realvec log10() const { return MF::vml_log10(*this); } + realvec log1p() const { return MF::vml_log1p(*this); } + realvec log2() const { return MF::vml_log2(*this); } + realvec rcp() const { return _mm256_div_pd(_mm256_set1_pd(1.0), v); } + // realvec rcp() const { return MF::vml_rcp(*this); } + realvec rsqrt() const { return MF::vml_rsqrt(*this); } + realvec sqrt() const { return _mm256_sqrt_pd(v); } + // realvec sqrt() const { return MF::vml_sqrt(*this); } + }; + + + + // boolvec definitions + + inline + auto boolvec<double,4>::as_int() const -> intvec_t + { + return _mm256_castpd_si256(v); + } + + inline + auto boolvec<double,4>::convert_int() const -> intvec_t + { + //return ifthen(v, U(1), U(0)); + return lsr(as_int(), bits-1); + } + + inline + auto boolvec<double,4>::ifthen(intvec_t x, intvec_t y) const -> intvec_t + { + return ifthen(x.as_float(), y.as_float()).as_int(); + } + + inline + auto boolvec<double,4>::ifthen(realvec_t x, realvec_t y) const -> realvec_t + { + return _mm256_blendv_pd(y.v, x.v, v); + } + + + + // intvec definitions + + inline auto intvec<double,4>::as_float() const -> realvec_t + { + return _mm256_castsi256_pd(v); + } + + inline auto intvec<double,4>::convert_float() const -> realvec_t + { + intvec x = v; + uint_t signbitmask = U(1) << (bits-1); + // make unsigned + x += signbitmask; + // convert lower 52 bits + intvec xlo = x & IV((U(1) << mantissa_bits) - 1); + int_t exponent_0 = exponent_offset + mantissa_bits; + xlo = xlo | exponent_0; + realvec_t flo = xlo.as_float() - FP::as_float(exponent_0); + // convert upper 22 bits + intvec xhi = x.lsr(U(mantissa_bits)); + int_t exponent_52 = exponent_0 + mantissa_bits; + xhi = xhi | exponent_52; + realvec_t fhi = xhi.as_float() - FP::as_float(exponent_52); + return flo + fhi - R(signbitmask); + } + +} // namespace vecmathlib + +#endif // #ifndef VEC_DOUBLE_AVX_H diff --git a/vec_float.h b/vec_float.h new file mode 100644 index 0000000..6025ab7 --- /dev/null +++ b/vec_float.h @@ -0,0 +1,305 @@ +// -*-C++-*- + +#ifndef VEC_FLOAT_H +#define VEC_FLOAT_H + +#include "floatprops.h" +#include "mathfuncs.h" +#include "vec_base.h" + +#include <cmath> + + + +namespace vecmathlib { + + template<> struct boolvec<float,1>; + template<> struct intvec<float,1>; + template<> struct realvec<float,1>; + + + + template<> + struct boolvec<float,1>: floatprops<float> + { + static const int size = 1; + typedef bool scalar_t; + typedef bool bvector_t; + + typedef boolvec boolvec_t; + typedef intvec<real_t, size> intvec_t; + typedef realvec<real_t, size> realvec_t; + + // Short names for type casts + typedef real_t R; + typedef int_t I; + typedef uint_t U; + typedef realvec_t RV; + typedef intvec_t IV; + typedef boolvec_t BV; + typedef floatprops<real_t> FP; + typedef mathfuncs<realvec_t> MF; + + + + bvector_t v; + + boolvec() {} + boolvec(boolvec const& x): v(x.v) {} + boolvec& operator=(boolvec const& x) { return v=x.v, *this; } + //boolvec(vector_t x): v(x) {} + boolvec(bool a): v(a) {} + boolvec(bool const* as): v(as[0]) {} + + operator bvector_t() const { return v; } + bool operator[](int n) const { return v; } + boolvec& set_elt(int n, bool a) { return v=a, *this; } + + + + auto as_int() const -> intvec_t; // defined after intvec + auto convert_int() const -> intvec_t; // defined after intvec + + + + boolvec operator!() const { return !v; } + + boolvec operator&&(boolvec x) const { return v&&x.v; } + boolvec operator||(boolvec x) const { return v||x.v; } + boolvec operator==(boolvec x) const { return v==x.v; } + boolvec operator!=(boolvec x) const { return v!=x.v; } + + bool all() const { return v; } + bool any() const { return v; } + + + + // ifthen(condition, then-value, else-value) + auto ifthen(intvec_t x, + intvec_t y) const -> intvec_t; // defined after intvec + auto ifthen(realvec_t x, + realvec_t y) const -> realvec_t; // defined after realvec + + }; + + + + template<> + struct intvec<float,1>: floatprops<float> + { + static const int size = 1; + typedef int_t scalar_t; + typedef int_t ivector_t; + + typedef boolvec<real_t, size> boolvec_t; + typedef intvec intvec_t; + typedef realvec<real_t, size> realvec_t; + + // Short names for type casts + typedef real_t R; + typedef int_t I; + typedef uint_t U; + typedef realvec_t RV; + typedef intvec_t IV; + typedef boolvec_t BV; + typedef floatprops<real_t> FP; + typedef mathfuncs<realvec_t> MF; + + + + ivector_t v; + + intvec() {} + intvec(intvec const& x): v(x.v) {} + intvec& operator=(intvec const& x) { return v=x.v, *this; } + //intvec(vector_t x): v(x) {} + intvec(int_t a): v(a) {} + intvec(int_t const* as): v(as[0]) {} + + operator ivector_t() const { return v; } + int_t operator[](int n) const { return v; } + intvec& set_elt(int n, int_t a) { return v=a, *this; } + + + + auto as_bool() const -> boolvec_t { return *(bool const*)&v; } + auto convert_bool() const -> boolvec_t { return v; } + auto as_float() const -> realvec_t; // defined after realvec + auto convert_float() const -> realvec_t; // defined after realvec + + + + // Note: not all arithmetic operations are supported! + + intvec operator+() const { return +v; } + intvec operator-() const { return -v; } + + intvec operator+(intvec x) const { return v+x.v; } + intvec operator-(intvec x) const { return v-x.v; } + + intvec& operator+=(intvec const& x) { return *this=*this+x; } + intvec& operator-=(intvec const& x) { return *this=*this-x; } + + + + intvec operator~() const { return ~v; } + + intvec operator&(intvec x) const { return v&x.v; } + intvec operator|(intvec x) const { return v|x.v; } + intvec operator^(intvec x) const { return v^x.v; } + + intvec& operator&=(intvec const& x) { return *this=*this&x; } + intvec& operator|=(intvec const& x) { return *this=*this|x; } + intvec& operator^=(intvec const& x) { return *this=*this^x; } + + + + intvec lsr(int_t n) const { return U(v)>>n; } + intvec operator>>(int_t n) const { return v>>n; } + intvec operator<<(int_t n) const { return v<<n; } + intvec& operator>>=(int_t n) { return *this=*this>>n; } + intvec& operator<<=(int_t n) { return *this=*this<<n; } + + intvec lsr(intvec n) const { return U(v)>>n; } + intvec operator>>(intvec n) const { return v>>n; } + intvec operator<<(intvec n) const { return v<<n; } + intvec& operator>>=(intvec n) { return *this=*this>>n; } + intvec& operator<<=(intvec n) { return *this=*this<<n; } + }; + + + + template<> + struct realvec<float,1>: floatprops<float> + { + static int const size = 1; + typedef real_t scalar_t; + typedef real_t vector_t; + + typedef boolvec<real_t, size> boolvec_t; + typedef intvec<real_t, size> intvec_t; + typedef realvec realvec_t; + + // Short names for type casts + typedef real_t R; + typedef int_t I; + typedef uint_t U; + typedef realvec_t RV; + typedef intvec_t IV; + typedef boolvec_t BV; + typedef floatprops<real_t> FP; + typedef mathfuncs<realvec_t> MF; + + + + vector_t v; + + realvec() {} + realvec(realvec const& x): v(x.v) {} + realvec& operator=(realvec const& x) { return v=x.v, *this; } + //realvec(vector_t x): v(x) {} + realvec(real_t a): v(a) {} + realvec(real_t const* as): v(as[0]) {} + + operator vector_t() const { return v; } + real_t operator[](int n) const { return v; } + realvec& set_elt(int n, real_t a) { return v=a, *this; } + + + + intvec_t as_int() const { return FP::as_int(v); } + intvec_t convert_int() const { return MF::vml_convert_int(v); } + + + + realvec operator+() const { return +v; } + realvec operator-() const { return -v; } + + realvec operator+(realvec x) const { return v+x.v; } + realvec operator-(realvec x) const { return v-x.v; } + realvec operator*(realvec x) const { return v*x.v; } + realvec operator/(realvec x) const { return v/x.v; } + + realvec& operator+=(realvec const& x) { return *this=*this+x; } + realvec& operator-=(realvec const& x) { return *this=*this-x; } + realvec& operator*=(realvec const& x) { return *this=*this*x; } + realvec& operator/=(realvec const& x) { return *this=*this/x; } + + real_t prod() const { return v; } + real_t sum() const { return v; } + + + + boolvec_t operator==(realvec const& x) const { return v==x.v; } + boolvec_t operator!=(realvec const& x) const { return v!=x.v; } + boolvec_t operator<(realvec const& x) const { return v<x.v; } + boolvec_t operator<=(realvec const& x) const { return v<=x.v; } + boolvec_t operator>(realvec const& x) const { return v>x.v; } + boolvec_t operator>=(realvec const& x) const { return v>=x.v; } + + + + realvec copysign(realvec x) const { return MF::vml_copysign(v, x.v); } + realvec fabs() const { return MF::vml_fabs(v); } + intvec_t ilogb() const { return MF::vml_ilogb(v); } + realvec scalbn(intvec_t n) const { return MF::vml_scalbn(v, n.v); } + boolvec_t signbit() const { return MF::vml_signbit(v); } + + realvec acos() const { return MF::vml_acos(*this); } + realvec asin() const { return MF::vml_asin(*this); } + realvec atan() const { return MF::vml_atan(*this); } + realvec floor() const { return std::floor(v); } + realvec log() const { return MF::vml_log(*this); } + realvec log10() const { return MF::vml_log10(*this); } + realvec log1p() const { return MF::vml_log1p(*this); } + realvec log2() const { return MF::vml_log2(*this); } + realvec rcp() const { return MF::vml_rcp(*this); } + realvec rsqrt() const { return MF::vml_rsqrt(*this); } + realvec sqrt() const { return MF::vml_sqrt(*this); } + }; + + + + // boolvec definitions + + inline + auto boolvec<float,1>::as_int() const -> intvec_t + { + return v; + } + + inline + auto boolvec<float,1>::convert_int() const -> intvec_t + { + return v; + } + + inline + auto boolvec<float,1>::ifthen(intvec_t x, intvec_t y) const -> intvec_t + { + return v ? x.v : y.v; + } + + inline + auto boolvec<float,1>::ifthen(realvec_t x, realvec_t y) const -> realvec_t + { + return v ? x.v : y.v; + } + + + + // intvec definitions + + inline auto intvec<float,1>::as_float() const -> realvec_t + { + return FP::as_float(v); + } + + inline auto intvec<float,1>::convert_float() const -> realvec_t + { + return MF::vml_convert_float(v); + } + +} // namespace vecmathlib + +#endif // #ifndef VEC_FLOAT_H diff --git a/vecmathlib.h b/vecmathlib.h new file mode 100644 index 0000000..2264545 --- /dev/null +++ b/vecmathlib.h @@ -0,0 +1,11 @@ +// -*-C++-*- + +#ifndef VECMATHLIB_H +#define VECMATHLIB_H + +#include "vec_float.h" +#if defined __AVX__ // Intel AVX +# include "vec_double_avx.h" +#endif + +#endif // #ifndef VECMATHLIB_H |