mathfuncs_log.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116

// -*-C++-*-

#ifndef MATHFUNCS_LOG_H
#define MATHFUNCS_LOG_H

#include "mathfuncs_base.h"

#include <cmath>


namespace vecmathlib {
  
  template<typename realvec_t>
  realvec_t mathfuncs<realvec_t>::vml_log2(realvec_t x)
  {
    // Rescale
    VML_ASSERT(all(x > RV(0.0)));
    // intvec_t ilogb_x = ilogb(x);
    // x = ldexp(x, -ilogb_x);
    // sign bit is known to be zero
    intvec_t ilogb_x = (lsr(as_int(x), I(FP::mantissa_bits)) -
                        IV(FP::exponent_offset));
    x = as_float((as_int(x) & IV(FP::mantissa_mask)) |
                 IV(I(FP::exponent_offset) << I(FP::mantissa_bits)));
    VML_ASSERT(all(x >= RV(1.0) && x < RV(2.0)));
    
    realvec_t y = (x - RV(1.0)) / (x + RV(1.0));
    realvec_t y2 = y*y;
    
    realvec_t r;
    switch (sizeof(real_t)) {
    case 4:
#ifdef VML_HAVE_FP_CONTRACT
      // float, error=5.98355642684398209498469870525e-9
      r = RV(0.410981538282433293325329456838);
      r = fma(r, y2, RV(0.402155483172044562892705980539));
      r = fma(r, y2, RV(0.57755014627178237959721643293));
      r = fma(r, y2, RV(0.96178780600659929206930296869));
      r = fma(r, y2, RV(2.88539012786343587248965772685));
#else
      //flaot, error=2.25468184051947656525068987795e-7
      r = RV(0.498866687070343238590910977481);
      r = fma(r, y2, RV(0.57002741193682764193895550312));
      r = fma(r, y2, RV(0.96200215034262628756932169194));
      r = fma(r, y2, RV(2.88538850388042106595516956395));
#endif
      break;
    case 8:
#ifdef VML_HAVE_FP_CONTRACT
      // double, error=9.45037202901655672811489051683e-17
      r = RV(0.259935726478127940817401224248);
      r = fma(r, y2, RV(0.140676370079882918464564658472));
      r = fma(r, y2, RV(0.196513478841924000569879320851));
      r = fma(r, y2, RV(0.221596471338300882039273355617));
      r = fma(r, y2, RV(0.262327298560598641020007602127));
      r = fma(r, y2, RV(0.320598261015170101859472461613));
      r = fma(r, y2, RV(0.412198595799726905825871956187));
      r = fma(r, y2, RV(0.57707801621733949207376840932));
      r = fma(r, y2, RV(0.96179669392666302667713134701));
      r = fma(r, y2, RV(2.88539008177792581277410991327));
#else
      // double, error=1.21820548287702216975532695788e-13
      r = RV(0.293251364683280430617251942017);
      r = fma(r, y2, RV(0.201364223624519571276587631354));
      r = fma(r, y2, RV(0.264443947645547871780098560836));
      r = fma(r, y2, RV(0.320475051320227723946459855458));
      r = fma(r, y2, RV(0.412202612052105347480086431555));
      r = fma(r, y2, RV(0.57707794741938820005328259256));
      r = fma(r, y2, RV(0.96179669445173881282808321929));
      r = fma(r, y2, RV(2.88539008177676567117601117274));
#endif
      break;
    default:
      __builtin_unreachable();
    }
    r *= y;
    
    // Undo rescaling
    r += convert_float(ilogb_x);
    
    return r;
  }
  
  
  template<typename realvec_t>
  inline
  realvec_t mathfuncs<realvec_t>::vml_log(realvec_t x)
  {
    return log2(x) * RV(M_LN2);
  }

  template<typename realvec_t>
  inline
  realvec_t mathfuncs<realvec_t>::vml_log10(realvec_t x)
  {
    return log(x) * RV(M_LOG10E);
  }

  template<typename realvec_t>
  inline
  realvec_t mathfuncs<realvec_t>::vml_log1p(realvec_t x)
  {
    return log(RV(1.0) + x);
#if 0
    // Goldberg, theorem 4
    realvec_t x1 = RV(1.0) + x;
    x1.barrier();
    return ifthen(x1 == x, x, x * log(x1) / (x1 - RV(1.0)));
#endif
  }
  
}; // namespace vecmathlib

#endif  // #ifndef MATHFUNCS_LOG_H