summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp')
-rw-r--r--contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp294
1 files changed, 189 insertions, 105 deletions
diff --git a/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp b/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 39de108..6a43f8d 100644
--- a/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -88,6 +88,52 @@ typedef DenseMap<Edge, uint64_t> EdgeWeightMap;
typedef DenseMap<const BasicBlock *, SmallVector<const BasicBlock *, 8>>
BlockEdgeMap;
+class SampleCoverageTracker {
+public:
+ SampleCoverageTracker() : SampleCoverage(), TotalUsedSamples(0) {}
+
+ bool markSamplesUsed(const FunctionSamples *FS, uint32_t LineOffset,
+ uint32_t Discriminator, uint64_t Samples);
+ unsigned computeCoverage(unsigned Used, unsigned Total) const;
+ unsigned countUsedRecords(const FunctionSamples *FS) const;
+ unsigned countBodyRecords(const FunctionSamples *FS) const;
+ uint64_t getTotalUsedSamples() const { return TotalUsedSamples; }
+ uint64_t countBodySamples(const FunctionSamples *FS) const;
+ void clear() {
+ SampleCoverage.clear();
+ TotalUsedSamples = 0;
+ }
+
+private:
+ typedef std::map<LineLocation, unsigned> BodySampleCoverageMap;
+ typedef DenseMap<const FunctionSamples *, BodySampleCoverageMap>
+ FunctionSamplesCoverageMap;
+
+ /// Coverage map for sampling records.
+ ///
+ /// This map keeps a record of sampling records that have been matched to
+ /// an IR instruction. This is used to detect some form of staleness in
+ /// profiles (see flag -sample-profile-check-coverage).
+ ///
+ /// Each entry in the map corresponds to a FunctionSamples instance. This is
+ /// another map that counts how many times the sample record at the
+ /// given location has been used.
+ FunctionSamplesCoverageMap SampleCoverage;
+
+ /// Number of samples used from the profile.
+ ///
+ /// When a sampling record is used for the first time, the samples from
+ /// that record are added to this accumulator. Coverage is later computed
+ /// based on the total number of samples available in this function and
+ /// its callsites.
+ ///
+ /// Note that this accumulator tracks samples used from a single function
+ /// and all the inlined callsites. Strictly, we should have a map of counters
+ /// keyed by FunctionSamples pointers, but these stats are cleared after
+ /// every function, so we just need to keep a single counter.
+ uint64_t TotalUsedSamples;
+};
+
/// \brief Sample profile pass.
///
/// This pass reads profile data from the file specified by
@@ -110,9 +156,9 @@ protected:
bool runOnFunction(Function &F);
unsigned getFunctionLoc(Function &F);
bool emitAnnotations(Function &F);
- ErrorOr<uint64_t> getInstWeight(const Instruction &I) const;
- ErrorOr<uint64_t> getBlockWeight(const BasicBlock *BB) const;
- const FunctionSamples *findCalleeFunctionSamples(const CallInst &I) const;
+ ErrorOr<uint64_t> getInstWeight(const Instruction &I);
+ ErrorOr<uint64_t> getBlockWeight(const BasicBlock *BB);
+ const FunctionSamples *findCalleeFunctionSamples(const Instruction &I) const;
const FunctionSamples *findFunctionSamples(const Instruction &I) const;
bool inlineHotFunctions(Function &F);
void printEdgeWeight(raw_ostream &OS, Edge E);
@@ -125,7 +171,7 @@ protected:
void propagateWeights(Function &F);
uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge);
void buildEdges(Function &F);
- bool propagateThroughEdges(Function &F);
+ bool propagateThroughEdges(Function &F, bool UpdateBlockCount);
void computeDominanceAndLoopInfo(Function &F);
unsigned getOffset(unsigned L, unsigned H) const;
void clearFunctionData();
@@ -169,6 +215,8 @@ protected:
/// \brief Successors for each basic block in the CFG.
BlockEdgeMap Successors;
+ SampleCoverageTracker CoverageTracker;
+
/// \brief Profile reader object.
std::unique_ptr<SampleProfileReader> Reader;
@@ -176,7 +224,7 @@ protected:
FunctionSamples *Samples;
/// \brief Name of the profile file to load.
- StringRef Filename;
+ std::string Filename;
/// \brief Flag indicating whether the profile input loaded successfully.
bool ProfileIsValid;
@@ -204,64 +252,17 @@ public:
bool doInitialization(Module &M) override {
return SampleLoader.doInitialization(M);
}
- const char *getPassName() const override { return "Sample profile pass"; }
+ StringRef getPassName() const override { return "Sample profile pass"; }
bool runOnModule(Module &M) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
}
-private:
- SampleProfileLoader SampleLoader;
-};
-
-class SampleCoverageTracker {
-public:
- SampleCoverageTracker() : SampleCoverage(), TotalUsedSamples(0) {}
-
- bool markSamplesUsed(const FunctionSamples *FS, uint32_t LineOffset,
- uint32_t Discriminator, uint64_t Samples);
- unsigned computeCoverage(unsigned Used, unsigned Total) const;
- unsigned countUsedRecords(const FunctionSamples *FS) const;
- unsigned countBodyRecords(const FunctionSamples *FS) const;
- uint64_t getTotalUsedSamples() const { return TotalUsedSamples; }
- uint64_t countBodySamples(const FunctionSamples *FS) const;
- void clear() {
- SampleCoverage.clear();
- TotalUsedSamples = 0;
- }
private:
- typedef std::map<LineLocation, unsigned> BodySampleCoverageMap;
- typedef DenseMap<const FunctionSamples *, BodySampleCoverageMap>
- FunctionSamplesCoverageMap;
-
- /// Coverage map for sampling records.
- ///
- /// This map keeps a record of sampling records that have been matched to
- /// an IR instruction. This is used to detect some form of staleness in
- /// profiles (see flag -sample-profile-check-coverage).
- ///
- /// Each entry in the map corresponds to a FunctionSamples instance. This is
- /// another map that counts how many times the sample record at the
- /// given location has been used.
- FunctionSamplesCoverageMap SampleCoverage;
-
- /// Number of samples used from the profile.
- ///
- /// When a sampling record is used for the first time, the samples from
- /// that record are added to this accumulator. Coverage is later computed
- /// based on the total number of samples available in this function and
- /// its callsites.
- ///
- /// Note that this accumulator tracks samples used from a single function
- /// and all the inlined callsites. Strictly, we should have a map of counters
- /// keyed by FunctionSamples pointers, but these stats are cleared after
- /// every function, so we just need to keep a single counter.
- uint64_t TotalUsedSamples;
+ SampleProfileLoader SampleLoader;
};
-SampleCoverageTracker CoverageTracker;
-
/// Return true if the given callsite is hot wrt to its caller.
///
/// Functions that were inlined in the original binary will be represented
@@ -451,7 +452,7 @@ void SampleProfileLoader::printBlockWeight(raw_ostream &OS,
///
/// \returns the weight of \p Inst.
ErrorOr<uint64_t>
-SampleProfileLoader::getInstWeight(const Instruction &Inst) const {
+SampleProfileLoader::getInstWeight(const Instruction &Inst) {
const DebugLoc &DLoc = Inst.getDebugLoc();
if (!DLoc)
return std::error_code();
@@ -460,18 +461,28 @@ SampleProfileLoader::getInstWeight(const Instruction &Inst) const {
if (!FS)
return std::error_code();
- // Ignore all dbg_value intrinsics.
- const IntrinsicInst *II = dyn_cast<IntrinsicInst>(&Inst);
- if (II && II->getIntrinsicID() == Intrinsic::dbg_value)
+ // Ignore all intrinsics and branch instructions.
+ // Branch instruction usually contains debug info from sources outside of
+ // the residing basic block, thus we ignore them during annotation.
+ if (isa<BranchInst>(Inst) || isa<IntrinsicInst>(Inst))
return std::error_code();
+ // If a call/invoke instruction is inlined in profile, but not inlined here,
+ // it means that the inlined callsite has no sample, thus the call
+ // instruction should have 0 count.
+ bool IsCall = isa<CallInst>(Inst) || isa<InvokeInst>(Inst);
+ if (IsCall && findCalleeFunctionSamples(Inst))
+ return 0;
+
const DILocation *DIL = DLoc;
unsigned Lineno = DLoc.getLine();
unsigned HeaderLineno = DIL->getScope()->getSubprogram()->getLine();
uint32_t LineOffset = getOffset(Lineno, HeaderLineno);
uint32_t Discriminator = DIL->getDiscriminator();
- ErrorOr<uint64_t> R = FS->findSamplesAt(LineOffset, Discriminator);
+ ErrorOr<uint64_t> R = IsCall
+ ? FS->findCallSamplesAt(LineOffset, Discriminator)
+ : FS->findSamplesAt(LineOffset, Discriminator);
if (R) {
bool FirstMark =
CoverageTracker.markSamplesUsed(FS, LineOffset, Discriminator, R.get());
@@ -488,13 +499,6 @@ SampleProfileLoader::getInstWeight(const Instruction &Inst) const {
<< Inst << " (line offset: " << Lineno - HeaderLineno << "."
<< DIL->getDiscriminator() << " - weight: " << R.get()
<< ")\n");
- } else {
- // If a call instruction is inlined in profile, but not inlined here,
- // it means that the inlined callsite has no sample, thus the call
- // instruction should have 0 count.
- const CallInst *CI = dyn_cast<CallInst>(&Inst);
- if (CI && findCalleeFunctionSamples(*CI))
- R = 0;
}
return R;
}
@@ -508,23 +512,17 @@ SampleProfileLoader::getInstWeight(const Instruction &Inst) const {
///
/// \returns the weight for \p BB.
ErrorOr<uint64_t>
-SampleProfileLoader::getBlockWeight(const BasicBlock *BB) const {
- DenseMap<uint64_t, uint64_t> CM;
+SampleProfileLoader::getBlockWeight(const BasicBlock *BB) {
+ uint64_t Max = 0;
+ bool HasWeight = false;
for (auto &I : BB->getInstList()) {
const ErrorOr<uint64_t> &R = getInstWeight(I);
- if (R) CM[R.get()]++;
- }
- if (CM.size() == 0) return std::error_code();
- uint64_t W = 0, C = 0;
- for (const auto &C_W : CM) {
- if (C_W.second == W) {
- C = std::max(C, C_W.first);
- } else if (C_W.second > W) {
- C = C_W.first;
- W = C_W.second;
+ if (R) {
+ Max = std::max(Max, R.get());
+ HasWeight = true;
}
}
- return C;
+ return HasWeight ? ErrorOr<uint64_t>(Max) : std::error_code();
}
/// \brief Compute and store the weights of every basic block.
@@ -551,18 +549,18 @@ bool SampleProfileLoader::computeBlockWeights(Function &F) {
/// \brief Get the FunctionSamples for a call instruction.
///
-/// The FunctionSamples of a call instruction \p Inst is the inlined
+/// The FunctionSamples of a call/invoke instruction \p Inst is the inlined
/// instance in which that call instruction is calling to. It contains
/// all samples that resides in the inlined instance. We first find the
/// inlined instance in which the call instruction is from, then we
/// traverse its children to find the callsite with the matching
-/// location and callee function name.
+/// location.
///
-/// \param Inst Call instruction to query.
+/// \param Inst Call/Invoke instruction to query.
///
/// \returns The FunctionSamples pointer to the inlined instance.
const FunctionSamples *
-SampleProfileLoader::findCalleeFunctionSamples(const CallInst &Inst) const {
+SampleProfileLoader::findCalleeFunctionSamples(const Instruction &Inst) const {
const DILocation *DIL = Inst.getDebugLoc();
if (!DIL) {
return nullptr;
@@ -611,7 +609,6 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
return FS;
}
-
/// \brief Iteratively inline hot callsites of a function.
///
/// Iteratively traverse all callsites of the function \p F, and find if
@@ -627,22 +624,36 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
bool SampleProfileLoader::inlineHotFunctions(Function &F) {
bool Changed = false;
LLVMContext &Ctx = F.getContext();
+ std::function<AssumptionCache &(Function &)> GetAssumptionCache = [&](
+ Function &F) -> AssumptionCache & { return ACT->getAssumptionCache(F); };
while (true) {
bool LocalChanged = false;
- SmallVector<CallInst *, 10> CIS;
+ SmallVector<Instruction *, 10> CIS;
for (auto &BB : F) {
+ bool Hot = false;
+ SmallVector<Instruction *, 10> Candidates;
for (auto &I : BB.getInstList()) {
- CallInst *CI = dyn_cast<CallInst>(&I);
- if (CI && callsiteIsHot(Samples, findCalleeFunctionSamples(*CI)))
- CIS.push_back(CI);
+ const FunctionSamples *FS = nullptr;
+ if ((isa<CallInst>(I) || isa<InvokeInst>(I)) &&
+ (FS = findCalleeFunctionSamples(I))) {
+ Candidates.push_back(&I);
+ if (callsiteIsHot(Samples, FS))
+ Hot = true;
+ }
+ }
+ if (Hot) {
+ CIS.insert(CIS.begin(), Candidates.begin(), Candidates.end());
}
}
- for (auto CI : CIS) {
- InlineFunctionInfo IFI(nullptr, ACT);
- Function *CalledFunction = CI->getCalledFunction();
- DebugLoc DLoc = CI->getDebugLoc();
- uint64_t NumSamples = findCalleeFunctionSamples(*CI)->getTotalSamples();
- if (InlineFunction(CI, IFI)) {
+ for (auto I : CIS) {
+ InlineFunctionInfo IFI(nullptr, ACT ? &GetAssumptionCache : nullptr);
+ CallSite CS(I);
+ Function *CalledFunction = CS.getCalledFunction();
+ if (!CalledFunction || !CalledFunction->getSubprogram())
+ continue;
+ DebugLoc DLoc = I->getDebugLoc();
+ uint64_t NumSamples = findCalleeFunctionSamples(*I)->getTotalSamples();
+ if (InlineFunction(CS, IFI)) {
LocalChanged = true;
emitOptimizationRemark(Ctx, DEBUG_TYPE, F, DLoc,
Twine("inlined hot callee '") +
@@ -693,6 +704,10 @@ void SampleProfileLoader::findEquivalencesFor(
bool IsInSameLoop = LI->getLoopFor(BB1) == LI->getLoopFor(BB2);
if (BB1 != BB2 && IsDomParent && IsInSameLoop) {
EquivalenceClass[BB2] = EC;
+ // If BB2 is visited, then the entire EC should be marked as visited.
+ if (VisitedBlocks.count(BB2)) {
+ VisitedBlocks.insert(EC);
+ }
// If BB2 is heavier than BB1, make BB2 have the same weight
// as BB1.
@@ -705,7 +720,11 @@ void SampleProfileLoader::findEquivalencesFor(
Weight = std::max(Weight, BlockWeights[BB2]);
}
}
- BlockWeights[EC] = Weight;
+ if (EC == &EC->getParent()->getEntryBlock()) {
+ BlockWeights[EC] = Samples->getHeadSamples() + 1;
+ } else {
+ BlockWeights[EC] = Weight;
+ }
}
/// \brief Find equivalence classes.
@@ -796,9 +815,12 @@ uint64_t SampleProfileLoader::visitEdge(Edge E, unsigned *NumUnknownEdges,
/// count of the basic block, if needed.
///
/// \param F Function to process.
+/// \param UpdateBlockCount Whether we should update basic block counts that
+/// has already been annotated.
///
/// \returns True if new weights were assigned to edges or blocks.
-bool SampleProfileLoader::propagateThroughEdges(Function &F) {
+bool SampleProfileLoader::propagateThroughEdges(Function &F,
+ bool UpdateBlockCount) {
bool Changed = false;
DEBUG(dbgs() << "\nPropagation through edges\n");
for (const auto &BI : F) {
@@ -890,11 +912,35 @@ bool SampleProfileLoader::propagateThroughEdges(Function &F) {
EdgeWeights[UnknownEdge] = BBWeight - TotalWeight;
else
EdgeWeights[UnknownEdge] = 0;
+ const BasicBlock *OtherEC;
+ if (i == 0)
+ OtherEC = EquivalenceClass[UnknownEdge.first];
+ else
+ OtherEC = EquivalenceClass[UnknownEdge.second];
+ // Edge weights should never exceed the BB weights it connects.
+ if (VisitedBlocks.count(OtherEC) &&
+ EdgeWeights[UnknownEdge] > BlockWeights[OtherEC])
+ EdgeWeights[UnknownEdge] = BlockWeights[OtherEC];
VisitedEdges.insert(UnknownEdge);
Changed = true;
DEBUG(dbgs() << "Set weight for edge: ";
printEdgeWeight(dbgs(), UnknownEdge));
}
+ } else if (VisitedBlocks.count(EC) && BlockWeights[EC] == 0) {
+ // If a block Weights 0, all its in/out edges should weight 0.
+ if (i == 0) {
+ for (auto *Pred : Predecessors[BB]) {
+ Edge E = std::make_pair(Pred, BB);
+ EdgeWeights[E] = 0;
+ VisitedEdges.insert(E);
+ }
+ } else {
+ for (auto *Succ : Successors[BB]) {
+ Edge E = std::make_pair(BB, Succ);
+ EdgeWeights[E] = 0;
+ VisitedEdges.insert(E);
+ }
+ }
} else if (SelfReferentialEdge.first && VisitedBlocks.count(EC)) {
uint64_t &BBWeight = BlockWeights[BB];
// We have a self-referential edge and the weight of BB is known.
@@ -907,6 +953,11 @@ bool SampleProfileLoader::propagateThroughEdges(Function &F) {
DEBUG(dbgs() << "Set self-referential edge weight to: ";
printEdgeWeight(dbgs(), SelfReferentialEdge));
}
+ if (UpdateBlockCount && !VisitedBlocks.count(EC) && TotalWeight > 0) {
+ BlockWeights[EC] = TotalWeight;
+ VisitedBlocks.insert(EC);
+ Changed = true;
+ }
}
}
@@ -966,7 +1017,21 @@ void SampleProfileLoader::propagateWeights(Function &F) {
// Add an entry count to the function using the samples gathered
// at the function entry.
- F.setEntryCount(Samples->getHeadSamples());
+ F.setEntryCount(Samples->getHeadSamples() + 1);
+
+ // If BB weight is larger than its corresponding loop's header BB weight,
+ // use the BB weight to replace the loop header BB weight.
+ for (auto &BI : F) {
+ BasicBlock *BB = &BI;
+ Loop *L = LI->getLoopFor(BB);
+ if (!L) {
+ continue;
+ }
+ BasicBlock *Header = L->getHeader();
+ if (Header && BlockWeights[BB] > BlockWeights[Header]) {
+ BlockWeights[Header] = BlockWeights[BB];
+ }
+ }
// Before propagation starts, build, for each block, a list of
// unique predecessors and successors. This is necessary to handle
@@ -977,7 +1042,23 @@ void SampleProfileLoader::propagateWeights(Function &F) {
// Propagate until we converge or we go past the iteration limit.
while (Changed && I++ < SampleProfileMaxPropagateIterations) {
- Changed = propagateThroughEdges(F);
+ Changed = propagateThroughEdges(F, false);
+ }
+
+ // The first propagation propagates BB counts from annotated BBs to unknown
+ // BBs. The 2nd propagation pass resets edges weights, and use all BB weights
+ // to propagate edge weights.
+ VisitedEdges.clear();
+ Changed = true;
+ while (Changed && I++ < SampleProfileMaxPropagateIterations) {
+ Changed = propagateThroughEdges(F, false);
+ }
+
+ // The 3rd propagation pass allows adjust annotated BB weights that are
+ // obviously wrong.
+ Changed = true;
+ while (Changed && I++ < SampleProfileMaxPropagateIterations) {
+ Changed = propagateThroughEdges(F, true);
}
// Generate MD_prof metadata for every branch instruction using the
@@ -994,7 +1075,7 @@ void SampleProfileLoader::propagateWeights(Function &F) {
if (!dyn_cast<IntrinsicInst>(&I)) {
SmallVector<uint32_t, 1> Weights;
Weights.push_back(BlockWeights[BB]);
- CI->setMetadata(LLVMContext::MD_prof,
+ CI->setMetadata(LLVMContext::MD_prof,
MDB.createBranchWeights(Weights));
}
}
@@ -1023,7 +1104,9 @@ void SampleProfileLoader::propagateWeights(Function &F) {
DEBUG(dbgs() << " (saturated due to uint32_t overflow)");
Weight = std::numeric_limits<uint32_t>::max();
}
- Weights.push_back(static_cast<uint32_t>(Weight));
+ // Weight is added by one to avoid propagation errors introduced by
+ // 0 weights.
+ Weights.push_back(static_cast<uint32_t>(Weight + 1));
if (Weight != 0) {
if (Weight > MaxWeight) {
MaxWeight = Weight;
@@ -1192,10 +1275,10 @@ bool SampleProfileLoader::emitAnnotations(Function &F) {
char SampleProfileLoaderLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(SampleProfileLoaderLegacyPass, "sample-profile",
- "Sample Profile loader", false, false)
+ "Sample Profile loader", false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile",
- "Sample Profile loader", false, false)
+ "Sample Profile loader", false, false)
bool SampleProfileLoader::doInitialization(Module &M) {
auto &Ctx = M.getContext();
@@ -1232,12 +1315,13 @@ bool SampleProfileLoader::runOnModule(Module &M) {
clearFunctionData();
retval |= runOnFunction(F);
}
- M.setProfileSummary(Reader->getSummary().getMD(M.getContext()));
+ if (M.getProfileSummary() == nullptr)
+ M.setProfileSummary(Reader->getSummary().getMD(M.getContext()));
return retval;
}
bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) {
- // FIXME: pass in AssumptionCache correctly for the new pass manager.
+ // FIXME: pass in AssumptionCache correctly for the new pass manager.
SampleLoader.setACT(&getAnalysis<AssumptionCacheTracker>());
return SampleLoader.runOnModule(M);
}
@@ -1251,7 +1335,7 @@ bool SampleProfileLoader::runOnFunction(Function &F) {
}
PreservedAnalyses SampleProfileLoaderPass::run(Module &M,
- AnalysisManager<Module> &AM) {
+ ModuleAnalysisManager &AM) {
SampleProfileLoader SampleLoader(SampleProfileFile);
OpenPOWER on IntegriCloud