mathfuncs_convert.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128

// -*-C++-*-

#ifndef MATHFUNCS_CONVERT_H
#define MATHFUNCS_CONVERT_H

#include "mathfuncs_base.h"

#include <cassert>
#include <cmath>


namespace vecmathlib {
  
  template<typename realvec_t>
  realvec_t mathfuncs<realvec_t>::vml_convert_float(intvec_t x)
  {
    // Convert in two passes. Convert as much as possible during the
    // first pass (lobits), so that the second pass (hibits) may be
    // omitted if the high bits are known to be zero.
    int_t lobits = FP::mantissa_bits;
    int_t hibits = FP::bits - lobits;
    
    // Convert lower bits
    intvec_t xlo = x & IV((U(1) << lobits) - 1);
    // exponent for the equivalent floating point number
    int_t exponent_lo = (FP::exponent_offset + lobits) << FP::mantissa_bits;
    xlo |= exponent_lo;
    // subtract hidden mantissa bit
    realvec_t flo = as_float(xlo) - RV(FP::as_float(exponent_lo));
    
    // Convert upper bits
    // make unsigned by subtracting largest negative number
    // (only do this for the high bits, since they have sufficient
    // precision to handle the overflow)
    x ^= FP::signbit_mask;
    intvec_t xhi = lsr(x, lobits);
    // exponent for the equivalent floating point number
    int_t exponent_hi = (FP::exponent_offset + 2*lobits) << FP::mantissa_bits;
    xhi |= exponent_hi;
    // subtract hidden mantissa bit
    realvec_t fhi = as_float(xhi) - RV(FP::as_float(exponent_hi));
    // add largest negative number again
    fhi -= RV(R(FP::signbit_mask));
    // Ensure that the converted low and high bits are calculated
    // separately, since a real_t doesn't have enough precision to
    // hold all the bits of an int_t
    fhi.barrier();
    
    // Combine results
    return flo + fhi;
  }
  
  
  template<typename realvec_t>
  auto mathfuncs<realvec_t>::vml_convert_int(realvec_t x) -> intvec_t
  {
    // Handle zero
    boolvec_t is_zero = x == RV(0.0);
    // Handle overflow
    int_t min_int = FP::signbit_mask;
    int_t max_int = ~FP::signbit_mask;
    boolvec_t is_overflow = x < RV(R(min_int)) || x > RV(R(max_int));
    // Handle negative numbers
    boolvec_t is_negative = signbit(x);
    x = fabs(x);
    
    // Round, by adding a large number that removes the excess
    // precision
    int_t large = U(1) << FP::mantissa_bits;
    x += R(large);
    
    intvec_t exponent = ilogb(x);
    for (int i=0; i<intvec_t::size; ++i) {
      assert(exponent[i] >= FP::mantissa_bits);
    }
    intvec_t ix = as_int(x) & IV(FP::mantissa_mask);
    // add hidden mantissa bit
    ix |= U(1) << FP::mantissa_bits;
    // shift according to exponent
    ix <<= exponent - IV(FP::mantissa_bits);
    
    // Undo the adding above
    ix -= large;
    
    // Handle negative numbers
    ix = ifthen(is_negative, -ix, ix);
    // Handle overflow
    ix = ifthen(is_overflow, IV(min_int), ix);
    // Handle zero
    ix = ifthen(is_zero, IV(I(0)), ix);
    
    return ix;
  }
  
  
  template<typename realvec_t>
  realvec_t mathfuncs<realvec_t>::vml_round(realvec_t x)
  {
    realvec_t r = x;
    // Round by adding a large number, destroying all excess precision
    realvec_t offset = copysign(RV(std::scalbn(R(1.0), FP::mantissa_bits)), x);
    r += offset;
    // Ensure the rounding is not optimised away
    r.barrier();
    r -= offset;
    return r;
  }
  
  template<typename realvec_t>
  realvec_t mathfuncs<realvec_t>::vml_ceil(realvec_t x)
  {
    real_t offset = R(0.5) - std::scalbn(fabs(x), -FP::mantissa_bits);
    return round(x + RV(offset));
  }
  
  template<typename realvec_t>
  realvec_t mathfuncs<realvec_t>::vml_floor(realvec_t x)
  {
    real_t offset = R(0.5) - std::scalbn(fabs(x), -FP::mantissa_bits);
    return round(x - RV(offset));
  }
  
}; // namespace vecmathlib

#endif  // #ifndef MATHFUNCS_CONVERT_H