// -*-C++-*- #ifndef MATHFUNCS_ASIN_H #define MATHFUNCS_ASIN_H #include "mathfuncs_base.h" #include namespace vecmathlib { namespace { template realvec_t mulsign(realvec_t x, realvec_t y) { typedef typename realvec_t::real_t real_t; typedef typename realvec_t::intvec_t intvec_t; typedef intvec_t IV; typedef floatprops FP; intvec_t value = as_int(x); intvec_t sign = as_int(y) & IV(FP::signbit_mask); return as_float(value ^ sign); } // Note: the order of arguments is y, x, as is convention for atan2 template realvec_t atan2k(realvec_t y, realvec_t x) { // Algorithm taken from SLEEF 2.80 typedef typename realvec_t::real_t real_t; typedef typename realvec_t::boolvec_t boolvec_t; typedef realvec_t RV; realvec_t q = RV(0.0); q = ifthen(signbit(x), RV(-2.0), q); x = fabs(x); boolvec_t cond = y > x; realvec_t x0 = x; realvec_t y0 = y; x = ifthen(cond, y0, x0); y = ifthen(cond, -x0, y0); q += ifthen(cond, RV(1.0), RV(0.0)); realvec_t s = y / x; realvec_t t = s * s; realvec_t u; switch (sizeof(real_t)) { default: __builtin_unreachable(); case sizeof(float): u = RV(0.00282363896258175373077393f); u = mad(u, t, RV(-0.0159569028764963150024414f)); u = mad(u, t, RV(0.0425049886107444763183594f)); u = mad(u, t, RV(-0.0748900920152664184570312f)); u = mad(u, t, RV(0.106347933411598205566406f)); u = mad(u, t, RV(-0.142027363181114196777344f)); u = mad(u, t, RV(0.199926957488059997558594f)); u = mad(u, t, RV(-0.333331018686294555664062f)); break; case sizeof(double): u = RV(-1.88796008463073496563746e-05); u = mad(u, t, RV(0.000209850076645816976906797)); u = mad(u, t, RV(-0.00110611831486672482563471)); u = mad(u, t, RV(0.00370026744188713119232403)); u = mad(u, t, RV(-0.00889896195887655491740809)); u = mad(u, t, RV(0.016599329773529201970117)); u = mad(u, t, RV(-0.0254517624932312641616861)); u = mad(u, t, RV(0.0337852580001353069993897)); u = mad(u, t, RV(-0.0407629191276836500001934)); u = mad(u, t, RV(0.0466667150077840625632675)); u = mad(u, t, RV(-0.0523674852303482457616113)); u = mad(u, t, RV(0.0587666392926673580854313)); u = mad(u, t, RV(-0.0666573579361080525984562)); u = mad(u, t, RV(0.0769219538311769618355029)); u = mad(u, t, RV(-0.090908995008245008229153)); u = mad(u, t, RV(0.111111105648261418443745)); u = mad(u, t, RV(-0.14285714266771329383765)); u = mad(u, t, RV(0.199999999996591265594148)); u = mad(u, t, RV(-0.333333333333311110369124)); break; } t = mad(u, t * s, s); t = mad(q, RV(M_PI_2), t); return t; } } template realvec_t mathfuncs::vml_asin(realvec_t d) { // Algorithm taken from SLEEF 2.80 return mulsign(atan2k(fabs(d), sqrt((RV(1.0) + d) * (RV(1.0) - d))), d); } template realvec_t mathfuncs::vml_acos(realvec_t d) { // Algorithm taken from SLEEF 2.80 return (mulsign(atan2k(sqrt((RV(1.0) + d) * (RV(1.0) - d)), fabs(d)), d) + ifthen(d < RV(0.0), RV(M_PI), RV(0.0))); } template realvec_t mathfuncs::vml_atan(realvec_t s) { // Algorithm taken from SLEEF 2.80 realvec_t q1 = s; s = fabs(s); boolvec_t q0 = s > RV(1.0); s = ifthen(q0, rcp(s), s); realvec_t t = s * s; realvec_t u; switch (sizeof(real_t)) { default: __builtin_unreachable(); case sizeof(float): u = RV(0.00282363896258175373077393f); u = mad(u, t, RV(-0.0159569028764963150024414f)); u = mad(u, t, RV(0.0425049886107444763183594f)); u = mad(u, t, RV(-0.0748900920152664184570312f)); u = mad(u, t, RV(0.106347933411598205566406f)); u = mad(u, t, RV(-0.142027363181114196777344f)); u = mad(u, t, RV(0.199926957488059997558594f)); u = mad(u, t, RV(-0.333331018686294555664062f)); break; case sizeof(double): u = RV(-1.88796008463073496563746e-05); u = mad(u, t, RV(0.000209850076645816976906797)); u = mad(u, t, RV(-0.00110611831486672482563471)); u = mad(u, t, RV(0.00370026744188713119232403)); u = mad(u, t, RV(-0.00889896195887655491740809)); u = mad(u, t, RV(0.016599329773529201970117)); u = mad(u, t, RV(-0.0254517624932312641616861)); u = mad(u, t, RV(0.0337852580001353069993897)); u = mad(u, t, RV(-0.0407629191276836500001934)); u = mad(u, t, RV(0.0466667150077840625632675)); u = mad(u, t, RV(-0.0523674852303482457616113)); u = mad(u, t, RV(0.0587666392926673580854313)); u = mad(u, t, RV(-0.0666573579361080525984562)); u = mad(u, t, RV(0.0769219538311769618355029)); u = mad(u, t, RV(-0.090908995008245008229153)); u = mad(u, t, RV(0.111111105648261418443745)); u = mad(u, t, RV(-0.14285714266771329383765)); u = mad(u, t, RV(0.199999999996591265594148)); u = mad(u, t, RV(-0.333333333333311110369124)); break; } t = s + s * (t * u); t = ifthen(q0, RV(M_PI_2) - t, t); t = copysign(t, q1); return t; } // Note: the order of arguments is y, x, as is convention for atan2 template realvec_t mathfuncs::vml_atan2(realvec_t y, realvec_t x) { // Algorithm taken from SLEEF 2.80 realvec_t r = atan2k(fabs(y), x); r = mulsign(r, x); r = ifthen(isinf(x) || x == RV(0.0), ifthen(isinf(x), RV(M_PI_2) - copysign(RV(M_PI_2), x), RV(M_PI_2)), r); r = ifthen(isinf(y), ifthen(isinf(x), RV(M_PI_2) - copysign(RV(M_PI_4), x), RV(M_PI_2)), r); r = ifthen(y == RV(0.0), ifthen(signbit(x), RV(M_PI), RV(0.0)), r); const real_t nan = std::numeric_limits::quiet_NaN(); return ifthen(isnan(x) || isnan(y), RV(nan), mulsign(r, y)); } }; // namespace vecmathlib #endif // #ifndef MATHFUNCS_ASIN_H