blob: 586e9d1e26e2372dc1612586ce7543121436688e (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
|
// -*-C++-*-
#ifndef MATHFUNCS_RCP_H
#define MATHFUNCS_RCP_H
#include "mathfuncs_base.h"
#include <cmath>
namespace vecmathlib {
template<typename realvec_t>
realvec_t mathfuncs<realvec_t>::vml_rcp(realvec_t x)
{
// Handle negative values
realvec_t x0 = x;
x = fabs(x);
// Initial guess
VML_ASSERT(all(x > RV(0.0)));
intvec_t ilogb_x = ilogb(x);
// For stability, choose a starting value that is below the result
realvec_t r = ldexp(RV(0.5), -ilogb_x);
// Iterate
int const nmax = 7;
for (int n=1; n<nmax; ++n) {
// Step
VML_ASSERT(all(x > RV(0.0)));
// Newton method:
// Solve f(r) = 0 for f(r) = x - 1/r
// r <- r - f(r) / f'(r)
// r <- 2 r - r^2 x
// r <- r + r (1 - r x)
// Note: don't rewrite this expression, this may introduce
// cancellation errors
r += r * (RV(1.0) - x*r);
}
// Handle negative values
r = copysign(r, x0);
return r;
}
template<typename realvec_t>
realvec_t mathfuncs<realvec_t>::vml_remainder(realvec_t x, realvec_t y)
{
// return x - rint(x / y) * y;
realvec_t r = x / y;
return y * (r - rint(r));
}
template<typename realvec_t>
realvec_t mathfuncs<realvec_t>::vml_fmod(realvec_t x, realvec_t y)
{
// realvec_t r = fabs(x);
// y = fabs(y);
// return copysign(r - floor(r / y) * y, x);
realvec_t r = x / y;
return y * (r - trunc(r));
}
}; // namespace vecmathlib
#endif // #ifndef MATHFUNCS_RCP_H
|