summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Support/BlockFrequency.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Support/BlockFrequency.cpp')
-rw-r--r--contrib/llvm/lib/Support/BlockFrequency.cpp150
1 files changed, 96 insertions, 54 deletions
diff --git a/contrib/llvm/lib/Support/BlockFrequency.cpp b/contrib/llvm/lib/Support/BlockFrequency.cpp
index 84a993e..00efe90 100644
--- a/contrib/llvm/lib/Support/BlockFrequency.cpp
+++ b/contrib/llvm/lib/Support/BlockFrequency.cpp
@@ -18,76 +18,94 @@
using namespace llvm;
-namespace {
-
-/// mult96bit - Multiply FREQ by N and store result in W array.
-void mult96bit(uint64_t freq, uint32_t N, uint64_t W[2]) {
+/// Multiply FREQ by N and store result in W array.
+static void mult96bit(uint64_t freq, uint32_t N, uint32_t W[3]) {
uint64_t u0 = freq & UINT32_MAX;
uint64_t u1 = freq >> 32;
- // Represent 96-bit value as w[2]:w[1]:w[0];
- uint32_t w[3] = { 0, 0, 0 };
-
+ // Represent 96-bit value as W[2]:W[1]:W[0];
uint64_t t = u0 * N;
uint64_t k = t >> 32;
- w[0] = t;
+ W[0] = t;
t = u1 * N + k;
- w[1] = t;
- w[2] = t >> 32;
-
- // W[1] - higher bits.
- // W[0] - lower bits.
- W[0] = w[0] + ((uint64_t) w[1] << 32);
- W[1] = w[2];
+ W[1] = t;
+ W[2] = t >> 32;
}
-
-/// div96bit - Divide 96-bit value stored in W array by D. Return 64-bit frequency.
-uint64_t div96bit(uint64_t W[2], uint32_t D) {
- uint64_t y = W[0];
- uint64_t x = W[1];
- int i;
-
- for (i = 1; i <= 64 && x; ++i) {
- uint32_t t = (int)x >> 31;
- x = (x << 1) | (y >> 63);
- y = y << 1;
- if ((x | t) >= D) {
- x -= D;
- ++y;
+/// Divide 96-bit value stored in W[2]:W[1]:W[0] by D. Since our word size is a
+/// 32 bit unsigned integer, we can use a short division algorithm.
+static uint64_t divrem96bit(uint32_t W[3], uint32_t D, uint32_t *Rout) {
+ // We assume that W[2] is non-zero since if W[2] is not then the user should
+ // just use hardware division.
+ assert(W[2] && "This routine assumes that W[2] is non-zero since if W[2] is "
+ "zero, the caller should just use 64/32 hardware.");
+ uint32_t Q[3] = { 0, 0, 0 };
+
+ // The generalized short division algorithm sets i to m + n - 1, where n is
+ // the number of words in the divisior and m is the number of words by which
+ // the divident exceeds the divisor (i.e. m + n == the length of the dividend
+ // in words). Due to our assumption that W[2] is non-zero, we know that the
+ // dividend is of length 3 implying since n is 1 that m = 2. Thus we set i to
+ // m + n - 1 = 2 + 1 - 1 = 2.
+ uint32_t R = 0;
+ for (int i = 2; i >= 0; --i) {
+ uint64_t PartialD = uint64_t(R) << 32 | W[i];
+ if (PartialD == 0) {
+ Q[i] = 0;
+ R = 0;
+ } else if (PartialD < D) {
+ Q[i] = 0;
+ R = uint32_t(PartialD);
+ } else if (PartialD == D) {
+ Q[i] = 1;
+ R = 0;
+ } else {
+ Q[i] = uint32_t(PartialD / D);
+ R = uint32_t(PartialD - (Q[i] * D));
}
}
- return y << (64 - i + 1);
-}
+ // If Q[2] is non-zero, then we overflowed.
+ uint64_t Result;
+ if (Q[2]) {
+ Result = UINT64_MAX;
+ R = D;
+ } else {
+ // Form the final uint64_t result, avoiding endianness issues.
+ Result = uint64_t(Q[0]) | (uint64_t(Q[1]) << 32);
+ }
+
+ if (Rout)
+ *Rout = R;
+ return Result;
}
+uint32_t BlockFrequency::scale(uint32_t N, uint32_t D) {
+ assert(D != 0 && "Division by zero");
-BlockFrequency &BlockFrequency::operator*=(const BranchProbability &Prob) {
- uint32_t n = Prob.getNumerator();
- uint32_t d = Prob.getDenominator();
-
- assert(n <= d && "Probability must be less or equal to 1.");
-
- // Calculate Frequency * n.
- uint64_t mulLo = (Frequency & UINT32_MAX) * n;
- uint64_t mulHi = (Frequency >> 32) * n;
- uint64_t mulRes = (mulHi << 32) + mulLo;
-
- // If there was overflow use 96-bit operations.
- if (mulHi > UINT32_MAX || mulRes < mulLo) {
- // 96-bit value represented as W[1]:W[0].
- uint64_t W[2];
-
- // Probability is less or equal to 1 which means that results must fit
- // 64-bit.
- mult96bit(Frequency, n, W);
- Frequency = div96bit(W, d);
- return *this;
+ // Calculate Frequency * N.
+ uint64_t MulLo = (Frequency & UINT32_MAX) * N;
+ uint64_t MulHi = (Frequency >> 32) * N;
+ uint64_t MulRes = (MulHi << 32) + MulLo;
+
+ // If the product fits in 64 bits, just use built-in division.
+ if (MulHi <= UINT32_MAX && MulRes >= MulLo) {
+ Frequency = MulRes / D;
+ return MulRes % D;
}
- Frequency = mulRes / d;
+ // Product overflowed, use 96-bit operations.
+ // 96-bit value represented as W[2]:W[1]:W[0].
+ uint32_t W[3];
+ uint32_t R;
+ mult96bit(Frequency, N, W);
+ Frequency = divrem96bit(W, D, &R);
+ return R;
+}
+
+BlockFrequency &BlockFrequency::operator*=(const BranchProbability &Prob) {
+ scale(Prob.getNumerator(), Prob.getDenominator());
return *this;
}
@@ -98,6 +116,17 @@ BlockFrequency::operator*(const BranchProbability &Prob) const {
return Freq;
}
+BlockFrequency &BlockFrequency::operator/=(const BranchProbability &Prob) {
+ scale(Prob.getDenominator(), Prob.getNumerator());
+ return *this;
+}
+
+BlockFrequency BlockFrequency::operator/(const BranchProbability &Prob) const {
+ BlockFrequency Freq(Frequency);
+ Freq /= Prob;
+ return Freq;
+}
+
BlockFrequency &BlockFrequency::operator+=(const BlockFrequency &Freq) {
uint64_t Before = Freq.Frequency;
Frequency += Freq.Frequency;
@@ -116,8 +145,21 @@ BlockFrequency::operator+(const BlockFrequency &Prob) const {
return Freq;
}
+uint32_t BlockFrequency::scale(const BranchProbability &Prob) {
+ return scale(Prob.getNumerator(), Prob.getDenominator());
+}
+
void BlockFrequency::print(raw_ostream &OS) const {
- OS << Frequency;
+ // Convert fixed-point number to decimal.
+ OS << Frequency / getEntryFrequency() << ".";
+ uint64_t Rem = Frequency % getEntryFrequency();
+ uint64_t Eps = 1;
+ do {
+ Rem *= 10;
+ Eps *= 10;
+ OS << Rem / getEntryFrequency();
+ Rem = Rem % getEntryFrequency();
+ } while (Rem >= Eps/2);
}
namespace llvm {
OpenPOWER on IntegriCloud