blob: 852c94ef2737d13817cef0fdaf21b70b84cb43e2 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
|
// -*-C++-*-
#ifndef MATHFUNCS_CONVERT_H
#define MATHFUNCS_CONVERT_H
#include "mathfuncs_base.h"
#include <cassert>
#include <cmath>
namespace vecmathlib {
template<typename realvec_t>
realvec_t mathfuncs<realvec_t>::vml_convert_float(intvec_t x)
{
// Convert in two passes. Convert as much as possible during the
// first pass (lobits), so that the second pass (hibits) may be
// omitted if the high bits are known to be zero.
int_t lobits = FP::mantissa_bits;
int_t hibits = FP::bits - lobits;
// Convert lower bits
intvec_t xlo = x & IV((U(1) << lobits) - 1);
// exponent for the equivalent floating point number
int_t exponent_lo = (FP::exponent_offset + lobits) << FP::mantissa_bits;
xlo |= exponent_lo;
// subtract hidden mantissa bit
realvec_t flo = as_float(xlo) - RV(FP::as_float(exponent_lo));
// Convert upper bits
// make unsigned by subtracting largest negative number
// (only do this for the high bits, since they have sufficient
// precision to handle the overflow)
x ^= FP::sign_mask;
intvec_t xhi = lsr(x, lobits);
// exponent for the equivalent floating point number
int_t exponent_hi = (FP::exponent_offset + 2*lobits) << FP::mantissa_bits;
xhi |= exponent_hi;
// subtract hidden mantissa bit
realvec_t fhi = as_float(xhi) - RV(FP::as_float(exponent_hi));
// add largest negative number again
fhi -= RV(R(FP::sign_mask));
// Combine results
return flo + fhi;
}
template<typename realvec_t>
auto mathfuncs<realvec_t>::vml_convert_int(realvec_t x) -> intvec_t
{
// Handle zero
boolvec_t is_zero = x == RV(0.0);
// Handle negative numbers
boolvec_t is_negative = signbit(x);
x = fabs(x);
// Round, by adding a large number that removes the excess
// precision
int_t large = U(1) << FP::mantissa_bits;
x += R(large);
intvec_t exponent = ilogb(x);
for (int i=0; i<intvec_t::size; ++i) {
assert(exponent[i] >= FP::mantissa_bits);
}
intvec_t ix = as_int(x) & IV(FP::mantissa_mask);
// add hidden mantissa bit
ix |= U(1) << FP::mantissa_bits;
// shift according to exponent
ix <<= exponent - IV(FP::mantissa_bits);
// Undo the adding above
ix -= large;
// Handle negative numbers
ix = ifthen(is_negative, -ix, ix);
// Handle zero
ix = ifthen(is_zero, IV(I(0)), ix);
return ix;
}
template<typename realvec_t>
realvec_t mathfuncs<realvec_t>::vml_round(realvec_t x)
{
realvec_t r = fabs(x);
real_t offset = RV(std::scalbn(R(1.0), FP::mantissa_bits));
r += offset;
#warning "TODO: don't optimise this away!"
r -= offset;
return copysign(r, x);
}
template<typename realvec_t>
realvec_t mathfuncs<realvec_t>::vml_ceil(realvec_t x)
{
return round(x + RV(0.5));
}
template<typename realvec_t>
realvec_t mathfuncs<realvec_t>::vml_floor(realvec_t x)
{
return round(x - RV(0.5));
}
}; // namespace vecmathlib
#endif // #ifndef MATHFUNCS_CONVERT_H
|