1 files changed, 96 insertions, 54 deletions
diff --git a/contrib/llvm/lib/Support/BlockFrequency.cpp b/contrib/llvm/lib/Support/BlockFrequency.cpp
index 84a993e..00efe90 100644
--- a/contrib/llvm/lib/Support/BlockFrequency.cpp
+++ b/contrib/llvm/lib/Support/BlockFrequency.cpp
@@ -18,76 +18,94 @@
 
 using namespace llvm;
 
-namespace {
-
-/// mult96bit - Multiply FREQ by N and store result in W array.
-void mult96bit(uint64_t freq, uint32_t N, uint64_t W[2]) {
+/// Multiply FREQ by N and store result in W array.
+static void mult96bit(uint64_t freq, uint32_t N, uint32_t W[3]) {
   uint64_t u0 = freq & UINT32_MAX;
   uint64_t u1 = freq >> 32;
 
-  // Represent 96-bit value as w[2]:w[1]:w[0];
-  uint32_t w[3] = { 0, 0, 0 };
-
+  // Represent 96-bit value as W[2]:W[1]:W[0];
   uint64_t t = u0 * N;
   uint64_t k = t >> 32;
-  w[0] = t;
+  W[0] = t;
   t = u1 * N + k;
-  w[1] = t;
-  w[2] = t >> 32;
-
-  // W[1] - higher bits.
-  // W[0] - lower bits.
-  W[0] = w[0] + ((uint64_t) w[1] << 32);
-  W[1] = w[2];
+  W[1] = t;
+  W[2] = t >> 32;
 }
 
-
-/// div96bit - Divide 96-bit value stored in W array by D. Return 64-bit frequency.
-uint64_t div96bit(uint64_t W[2], uint32_t D) {
-  uint64_t y = W[0];
-  uint64_t x = W[1];
-  int i;
-
-  for (i = 1; i <= 64 && x; ++i) {
-    uint32_t t = (int)x >> 31;
-    x = (x << 1) | (y >> 63);
-    y = y << 1;
-    if ((x | t) >= D) {
-      x -= D;
-      ++y;
+/// Divide 96-bit value stored in W[2]:W[1]:W[0] by D. Since our word size is a
+/// 32 bit unsigned integer, we can use a short division algorithm.
+static uint64_t divrem96bit(uint32_t W[3], uint32_t D, uint32_t *Rout) {
+  // We assume that W[2] is non-zero since if W[2] is not then the user should
+  // just use hardware division.
+  assert(W[2] && "This routine assumes that W[2] is non-zero since if W[2] is "
+         "zero, the caller should just use 64/32 hardware.");
+  uint32_t Q[3] = { 0, 0, 0 };
+
+  // The generalized short division algorithm sets i to m + n - 1, where n is
+  // the number of words in the divisior and m is the number of words by which
+  // the divident exceeds the divisor (i.e. m + n == the length of the dividend
+  // in words). Due to our assumption that W[2] is non-zero, we know that the
+  // dividend is of length 3 implying since n is 1 that m = 2. Thus we set i to
+  // m + n - 1 = 2 + 1 - 1 = 2.
+  uint32_t R = 0;
+  for (int i = 2; i >= 0; --i) {
+    uint64_t PartialD = uint64_t(R) << 32 | W[i];
+    if (PartialD == 0) {
+      Q[i] = 0;
+      R = 0;
+    } else if (PartialD < D) {
+      Q[i] = 0;
+      R = uint32_t(PartialD);
+    } else if (PartialD == D) {
+      Q[i] = 1;
+      R = 0;
+    } else {
+      Q[i] = uint32_t(PartialD / D);
+      R = uint32_t(PartialD - (Q[i] * D));
     }
   }
 
-  return y << (64 - i + 1);
-}
+  // If Q[2] is non-zero, then we overflowed.
+  uint64_t Result;
+  if (Q[2]) {
+    Result = UINT64_MAX;
+    R = D;
+  } else {
+    // Form the final uint64_t result, avoiding endianness issues.
+    Result = uint64_t(Q[0]) | (uint64_t(Q[1]) << 32);
+  }
+
+  if (Rout)
+    *Rout = R;
 
+  return Result;
 }
 
+uint32_t BlockFrequency::scale(uint32_t N, uint32_t D) {
+  assert(D != 0 && "Division by zero");
 
-BlockFrequency &BlockFrequency::operator*=(const BranchProbability &Prob) {
-  uint32_t n = Prob.getNumerator();
-  uint32_t d = Prob.getDenominator();
-
-  assert(n <= d && "Probability must be less or equal to 1.");
-
-  // Calculate Frequency * n.
-  uint64_t mulLo = (Frequency & UINT32_MAX) * n;
-  uint64_t mulHi = (Frequency >> 32) * n;
-  uint64_t mulRes = (mulHi << 32) + mulLo;
-
-  // If there was overflow use 96-bit operations.
-  if (mulHi > UINT32_MAX || mulRes < mulLo) {
-    // 96-bit value represented as W[1]:W[0].
-    uint64_t W[2];
-
-    // Probability is less or equal to 1 which means that results must fit
-    // 64-bit.
-    mult96bit(Frequency, n, W);
-    Frequency = div96bit(W, d);
-    return *this;
+  // Calculate Frequency * N.
+  uint64_t MulLo = (Frequency & UINT32_MAX) * N;
+  uint64_t MulHi = (Frequency >> 32) * N;
+  uint64_t MulRes = (MulHi << 32) + MulLo;
+
+  // If the product fits in 64 bits, just use built-in division.
+  if (MulHi <= UINT32_MAX && MulRes >= MulLo) {
+    Frequency = MulRes / D;
+    return MulRes % D;
   }
 
-  Frequency = mulRes / d;
+  // Product overflowed, use 96-bit operations.
+  // 96-bit value represented as W[2]:W[1]:W[0].
+  uint32_t W[3];
+  uint32_t R;
+  mult96bit(Frequency, N, W);
+  Frequency = divrem96bit(W, D, &R);
+  return R;
+}
+
+BlockFrequency &BlockFrequency::operator*=(const BranchProbability &Prob) {
+  scale(Prob.getNumerator(), Prob.getDenominator());
   return *this;
 }
 
@@ -98,6 +116,17 @@ BlockFrequency::operator*(const BranchProbability &Prob) const {
   return Freq;
 }
 
+BlockFrequency &BlockFrequency::operator/=(const BranchProbability &Prob) {
+  scale(Prob.getDenominator(), Prob.getNumerator());
+  return *this;
+}
+
+BlockFrequency BlockFrequency::operator/(const BranchProbability &Prob) const {
+  BlockFrequency Freq(Frequency);
+  Freq /= Prob;
+  return Freq;
+}
+
 BlockFrequency &BlockFrequency::operator+=(const BlockFrequency &Freq) {
   uint64_t Before = Freq.Frequency;
   Frequency += Freq.Frequency;
@@ -116,8 +145,21 @@ BlockFrequency::operator+(const BlockFrequency &Prob) const {
   return Freq;
 }
 
+uint32_t BlockFrequency::scale(const BranchProbability &Prob) {
+  return scale(Prob.getNumerator(), Prob.getDenominator());
+}
+
 void BlockFrequency::print(raw_ostream &OS) const {
-  OS << Frequency;
+  // Convert fixed-point number to decimal.
+  OS << Frequency / getEntryFrequency() << ".";
+  uint64_t Rem = Frequency % getEntryFrequency();
+  uint64_t Eps = 1;
+  do {
+    Rem *= 10;
+    Eps *= 10;
+    OS << Rem / getEntryFrequency();
+    Rem = Rem % getEntryFrequency();
+  } while (Rem >= Eps/2);
 }
 
 namespace llvm {