1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
|
// -*-C++-*-
#ifndef MATHFUNCS_CONVERT_H
#define MATHFUNCS_CONVERT_H
#include "mathfuncs_base.h"
#include <cmath>
namespace vecmathlib {
template<typename realvec_t>
realvec_t mathfuncs<realvec_t>::vml_convert_float(intvec_t x)
{
// Convert in two passes. Convert as much as possible during the
// first pass (lobits), so that the second pass (hibits) may be
// omitted if the high bits are known to be zero.
int_t lobits = FP::mantissa_bits;
// int_t hibits = FP::bits - lobits;
// Convert lower bits
intvec_t xlo = x & IV((U(1) << lobits) - 1);
// exponent for the equivalent floating point number
int_t exponent_lo = (FP::exponent_offset + lobits) << FP::mantissa_bits;
xlo |= exponent_lo;
// subtract hidden mantissa bit
realvec_t flo = as_float(xlo) - RV(FP::as_float(exponent_lo));
// Convert upper bits
// make unsigned by subtracting largest negative number
// (only do this for the high bits, since they have sufficient
// precision to handle the overflow)
x ^= FP::signbit_mask;
intvec_t xhi = lsr(x, lobits);
// exponent for the equivalent floating point number
int_t exponent_hi = (FP::exponent_offset + 2*lobits) << FP::mantissa_bits;
xhi |= exponent_hi;
// subtract hidden mantissa bit
realvec_t fhi = as_float(xhi) - RV(FP::as_float(exponent_hi));
// add largest negative number again
fhi -= RV(R(FP::signbit_mask));
// Ensure that the converted low and high bits are calculated
// separately, since a real_t doesn't have enough precision to
// hold all the bits of an int_t
fhi.barrier();
// Combine results
return flo + fhi;
}
template<typename realvec_t>
// auto mathfuncs<realvec_t>::vml_convert_int(realvec_t x) -> intvec_t
typename realvec_t::intvec_t mathfuncs<realvec_t>::vml_convert_int(realvec_t x)
{
// Handle overflow
// int_t min_int = FP::signbit_mask;
// int_t max_int = ~FP::signbit_mask;
// boolvec_t is_overflow = x < RV(R(min_int)) || x > RV(R(max_int));
// Handle negative numbers
boolvec_t is_negative = signbit(x);
x = fabs(x);
// Handle small numbers
boolvec_t issmall = x < RV(1.0);
intvec_t shift = ilogb(x) - IV(FP::mantissa_bits);
boolvec_t shift_left = x > RV(std::ldexp(R(1.0), FP::mantissa_bits));
intvec_t ix = as_int(x) & IV(FP::mantissa_mask);
// add hidden mantissa bit
ix |= U(1) << FP::mantissa_bits;
// shift according to exponent (which may truncate)
ix = ifthen(shift_left, ix << shift, ix >> -shift);
// Handle small numbers
ix = ifthen(issmall, IV(I(0)), ix);
// Handle negative numbers
ix = ifthen(is_negative, -ix, ix);
// Handle overflow
// ix = ifthen(is_overflow, IV(min_int), ix);
return ix;
}
// Round to nearest integer, breaking ties using prevailing rounding
// mode (default: round to even)
template<typename realvec_t>
realvec_t mathfuncs<realvec_t>::vml_rint(realvec_t x)
{
realvec_t r = x;
// Round by adding a large number, destroying all excess precision
realvec_t offset = copysign(RV(std::ldexp(R(1.0), FP::mantissa_bits)), x);
r += offset;
// Ensure the rounding is not optimised away
r.barrier();
r -= offset;
return r;
}
// Round to next integer above
template<typename realvec_t>
realvec_t mathfuncs<realvec_t>::vml_ceil(realvec_t x)
{
// boolvec_t iszero = x == RV(0.0);
// realvec_t offset = RV(0.5) - ldexp(fabs(x), I(-FP::mantissa_bits));
// return ifthen(iszero, x, rint(x + offset));
return ifthen(x<RV(0.0), trunc(x), vml_antitrunc(x));
}
// Round to next integer below
template<typename realvec_t>
realvec_t mathfuncs<realvec_t>::vml_floor(realvec_t x)
{
// boolvec_t iszero = x == RV(0.0);
// realvec_t offset = RV(0.5) - ldexp(fabs(x), I(-FP::mantissa_bits));
// return ifthen(iszero, x, rint(x - offset));
return ifthen(x<RV(0.0), vml_antitrunc(x), trunc(x));
}
// Round to nearest integer, breaking ties away from zero
template<typename realvec_t>
realvec_t mathfuncs<realvec_t>::vml_round(realvec_t x)
{
return copysign(floor(fabs(x)+RV(0.5)), x);
}
// Round to next integer towards zero
template<typename realvec_t>
realvec_t mathfuncs<realvec_t>::vml_trunc(realvec_t x)
{
realvec_t x0 = x;
x = fabs(x);
boolvec_t istoosmall = x < RV(1.0);
boolvec_t istoolarge = x >= RV(std::ldexp(R(1.0), FP::mantissa_bits));
// Number of mantissa bits to keep
intvec_t nbits = ilogb(x);
// This is probably faster than a shift operation
realvec_t mask = ldexp(RV(2.0), nbits) - RV(1.0);
intvec_t imask = IV(FP::signbit_mask | FP::exponent_mask) | as_int(mask);
realvec_t y = as_float(as_int(x) & imask);
realvec_t r =
copysign(ifthen(istoosmall, RV(0.0), ifthen(istoolarge, x, y)), x0);
return r;
}
// Round to next integer away from zero
template<typename realvec_t>
realvec_t mathfuncs<realvec_t>::vml_antitrunc(realvec_t x)
{
realvec_t x0 = x;
x = fabs(x);
boolvec_t iszero = x == RV(0.0);
boolvec_t issmall = x <= RV(1.0);
boolvec_t istoolarge =
x > RV(std::ldexp(R(1.0), FP::mantissa_bits) - R(1.0));
// Number of mantissa bits to keep
intvec_t nbits = ilogb(x);
// This is probably faster than a shift operation
realvec_t mask = ldexp(RV(2.0), nbits) - RV(1.0);
intvec_t imask = IV(FP::signbit_mask | FP::exponent_mask) | as_int(mask);
realvec_t offset = RV(1.0) - ldexp(RV(1.0), nbits - IV(FP::mantissa_bits));
offset.barrier();
realvec_t y = as_float(as_int(x + offset) & imask);
realvec_t r =
copysign(ifthen(iszero, RV(0.0),
ifthen(issmall, RV(1.0),
ifthen(istoolarge, x, y))), x0);
return r;
}
}; // namespace vecmathlib
#endif // #ifndef MATHFUNCS_CONVERT_H
|