summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Transforms/IPO/Inliner.cpp')
-rw-r--r--contrib/llvm/lib/Transforms/IPO/Inliner.cpp276
1 files changed, 170 insertions, 106 deletions
diff --git a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
index 3f4731c..317770d 100644
--- a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -19,6 +19,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
@@ -260,8 +261,8 @@ static bool InlineCallIfPossible(
/// Return true if inlining of CS can block the caller from being
/// inlined which is proved to be more beneficial. \p IC is the
/// estimated inline cost associated with callsite \p CS.
-/// \p TotalAltCost will be set to the estimated cost of inlining the caller
-/// if \p CS is suppressed for inlining.
+/// \p TotalSecondaryCost will be set to the estimated cost of inlining the
+/// caller if \p CS is suppressed for inlining.
static bool
shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC,
int &TotalSecondaryCost,
@@ -288,7 +289,7 @@ shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC,
// treating them as truly abstract units etc.
TotalSecondaryCost = 0;
// The candidate cost to be imposed upon the current function.
- int CandidateCost = IC.getCost() - (InlineConstants::CallPenalty + 1);
+ int CandidateCost = IC.getCost() - 1;
// This bool tracks what happens if we do NOT inline C into B.
bool callerWillBeRemoved = Caller->hasLocalLinkage();
// This bool tracks what happens if we DO inline C into B.
@@ -325,7 +326,7 @@ shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC,
// one is set very low by getInlineCost, in anticipation that Caller will
// be removed entirely. We did not account for this above unless there
// is only one caller of Caller.
- if (callerWillBeRemoved && !Caller->use_empty())
+ if (callerWillBeRemoved && !Caller->hasOneUse())
TotalSecondaryCost -= InlineConstants::LastCallToStaticBonus;
if (inliningPreventsSomeOuterInline && TotalSecondaryCost < IC.getCost())
@@ -342,6 +343,7 @@ static bool shouldInline(CallSite CS,
InlineCost IC = GetInlineCost(CS);
Instruction *Call = CS.getInstruction();
Function *Callee = CS.getCalledFunction();
+ Function *Caller = CS.getCaller();
if (IC.isAlways()) {
DEBUG(dbgs() << " Inlining: cost=always"
@@ -355,19 +357,20 @@ static bool shouldInline(CallSite CS,
if (IC.isNever()) {
DEBUG(dbgs() << " NOT Inlining: cost=never"
<< ", Call: " << *CS.getInstruction() << "\n");
- ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "NeverInline", Call)
- << NV("Callee", Callee)
- << " should never be inlined (cost=never)");
+ ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
+ << NV("Callee", Callee) << " not inlined into "
+ << NV("Caller", Caller)
+ << " because it should never be inlined (cost=never)");
return false;
}
- Function *Caller = CS.getCaller();
if (!IC) {
DEBUG(dbgs() << " NOT Inlining: cost=" << IC.getCost()
<< ", thres=" << (IC.getCostDelta() + IC.getCost())
<< ", Call: " << *CS.getInstruction() << "\n");
- ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", Call)
- << NV("Callee", Callee) << " too costly to inline (cost="
+ ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "TooCostly", Call)
+ << NV("Callee", Callee) << " not inlined into "
+ << NV("Caller", Caller) << " because too costly to inline (cost="
<< NV("Cost", IC.getCost()) << ", threshold="
<< NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
return false;
@@ -378,8 +381,8 @@ static bool shouldInline(CallSite CS,
DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction()
<< " Cost = " << IC.getCost()
<< ", outer Cost = " << TotalSecondaryCost << '\n');
- ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE,
- "IncreaseCostInOtherContexts", Call)
+ ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "IncreaseCostInOtherContexts",
+ Call)
<< "Not inlining. Cost of inlining " << NV("Callee", Callee)
<< " increases the cost of inlining " << NV("Caller", Caller)
<< " in other contexts");
@@ -499,7 +502,7 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
std::swap(CallSites[i--], CallSites[--FirstCallInSCC]);
InlinedArrayAllocasTy InlinedArrayAllocas;
- InlineFunctionInfo InlineInfo(&CG, &GetAssumptionCache);
+ InlineFunctionInfo InlineInfo(&CG, &GetAssumptionCache, PSI);
// Now that we have all of the call sites, loop over them and inline them if
// it looks profitable to do so.
@@ -516,52 +519,54 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
Function *Caller = CS.getCaller();
Function *Callee = CS.getCalledFunction();
- // If this call site is dead and it is to a readonly function, we should
- // just delete the call instead of trying to inline it, regardless of
- // size. This happens because IPSCCP propagates the result out of the
- // call and then we're left with the dead call.
- if (isInstructionTriviallyDead(CS.getInstruction(), &TLI)) {
- DEBUG(dbgs() << " -> Deleting dead call: " << *CS.getInstruction()
- << "\n");
- // Update the call graph by deleting the edge from Callee to Caller.
- CG[Caller]->removeCallEdgeFor(CS);
- CS.getInstruction()->eraseFromParent();
- ++NumCallsDeleted;
- } else {
- // We can only inline direct calls to non-declarations.
- if (!Callee || Callee->isDeclaration())
- continue;
+ // We can only inline direct calls to non-declarations.
+ if (!Callee || Callee->isDeclaration())
+ continue;
+ Instruction *Instr = CS.getInstruction();
+
+ bool IsTriviallyDead = isInstructionTriviallyDead(Instr, &TLI);
+
+ int InlineHistoryID;
+ if (!IsTriviallyDead) {
// If this call site was obtained by inlining another function, verify
// that the include path for the function did not include the callee
// itself. If so, we'd be recursively inlining the same function,
// which would provide the same callsites, which would cause us to
// infinitely inline.
- int InlineHistoryID = CallSites[CSi].second;
+ InlineHistoryID = CallSites[CSi].second;
if (InlineHistoryID != -1 &&
InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory))
continue;
+ }
+ // FIXME for new PM: because of the old PM we currently generate ORE and
+ // in turn BFI on demand. With the new PM, the ORE dependency should
+ // just become a regular analysis dependency.
+ OptimizationRemarkEmitter ORE(Caller);
+
+ // If the policy determines that we should inline this function,
+ // delete the call instead.
+ if (!shouldInline(CS, GetInlineCost, ORE))
+ continue;
+
+ // If this call site is dead and it is to a readonly function, we should
+ // just delete the call instead of trying to inline it, regardless of
+ // size. This happens because IPSCCP propagates the result out of the
+ // call and then we're left with the dead call.
+ if (IsTriviallyDead) {
+ DEBUG(dbgs() << " -> Deleting dead call: " << *Instr << "\n");
+ // Update the call graph by deleting the edge from Callee to Caller.
+ CG[Caller]->removeCallEdgeFor(CS);
+ Instr->eraseFromParent();
+ ++NumCallsDeleted;
+ } else {
// Get DebugLoc to report. CS will be invalid after Inliner.
- DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
+ DebugLoc DLoc = Instr->getDebugLoc();
BasicBlock *Block = CS.getParent();
- // FIXME for new PM: because of the old PM we currently generate ORE and
- // in turn BFI on demand. With the new PM, the ORE dependency should
- // just become a regular analysis dependency.
- OptimizationRemarkEmitter ORE(Caller);
-
- // If the policy determines that we should inline this function,
- // try to do so.
- using namespace ore;
- if (!shouldInline(CS, GetInlineCost, ORE)) {
- ORE.emit(
- OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block)
- << NV("Callee", Callee) << " will not be inlined into "
- << NV("Caller", Caller));
- continue;
- }
// Attempt to inline the function.
+ using namespace ore;
if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas,
InlineHistoryID, InsertLifetime, AARGetter,
ImportedFunctionsStats)) {
@@ -638,22 +643,12 @@ bool LegacyInlinerBase::inlineCalls(CallGraphSCC &SCC) {
ACT = &getAnalysis<AssumptionCacheTracker>();
PSI = getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
- // We compute dedicated AA results for each function in the SCC as needed. We
- // use a lambda referencing external objects so that they live long enough to
- // be queried, but we re-use them each time.
- Optional<BasicAAResult> BAR;
- Optional<AAResults> AAR;
- auto AARGetter = [&](Function &F) -> AAResults & {
- BAR.emplace(createLegacyPMBasicAAResult(*this, F));
- AAR.emplace(createLegacyPMAAResults(*this, F, *BAR));
- return *AAR;
- };
auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
return ACT->getAssumptionCache(F);
};
return inlineCallsImpl(SCC, CG, GetAssumptionCache, PSI, TLI, InsertLifetime,
[this](CallSite CS) { return getInlineCost(CS); },
- AARGetter, ImportedFunctionsStats);
+ LegacyAARGetter(*this), ImportedFunctionsStats);
}
/// Remove now-dead linkonce functions at the end of
@@ -750,9 +745,6 @@ bool LegacyInlinerBase::removeDeadFunctions(CallGraph &CG,
PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
CGSCCAnalysisManager &AM, LazyCallGraph &CG,
CGSCCUpdateResult &UR) {
- FunctionAnalysisManager &FAM =
- AM.getResult<FunctionAnalysisManagerCGSCCProxy>(InitialC, CG)
- .getManager();
const ModuleAnalysisManager &MAM =
AM.getResult<ModuleAnalysisManagerCGSCCProxy>(InitialC, CG).getManager();
bool Changed = false;
@@ -761,35 +753,52 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
Module &M = *InitialC.begin()->getFunction().getParent();
ProfileSummaryInfo *PSI = MAM.getCachedResult<ProfileSummaryAnalysis>(M);
- std::function<AssumptionCache &(Function &)> GetAssumptionCache =
- [&](Function &F) -> AssumptionCache & {
- return FAM.getResult<AssumptionAnalysis>(F);
- };
-
- // Setup the data structure used to plumb customization into the
- // `InlineFunction` routine.
- InlineFunctionInfo IFI(/*cg=*/nullptr, &GetAssumptionCache);
+ // We use a single common worklist for calls across the entire SCC. We
+ // process these in-order and append new calls introduced during inlining to
+ // the end.
+ //
+ // Note that this particular order of processing is actually critical to
+ // avoid very bad behaviors. Consider *highly connected* call graphs where
+ // each function contains a small amonut of code and a couple of calls to
+ // other functions. Because the LLVM inliner is fundamentally a bottom-up
+ // inliner, it can handle gracefully the fact that these all appear to be
+ // reasonable inlining candidates as it will flatten things until they become
+ // too big to inline, and then move on and flatten another batch.
+ //
+ // However, when processing call edges *within* an SCC we cannot rely on this
+ // bottom-up behavior. As a consequence, with heavily connected *SCCs* of
+ // functions we can end up incrementally inlining N calls into each of
+ // N functions because each incremental inlining decision looks good and we
+ // don't have a topological ordering to prevent explosions.
+ //
+ // To compensate for this, we don't process transitive edges made immediate
+ // by inlining until we've done one pass of inlining across the entire SCC.
+ // Large, highly connected SCCs still lead to some amount of code bloat in
+ // this model, but it is uniformly spread across all the functions in the SCC
+ // and eventually they all become too large to inline, rather than
+ // incrementally maknig a single function grow in a super linear fashion.
+ SmallVector<std::pair<CallSite, int>, 16> Calls;
- auto GetInlineCost = [&](CallSite CS) {
- Function &Callee = *CS.getCalledFunction();
- auto &CalleeTTI = FAM.getResult<TargetIRAnalysis>(Callee);
- return getInlineCost(CS, Params, CalleeTTI, GetAssumptionCache, PSI);
- };
+ // Populate the initial list of calls in this SCC.
+ for (auto &N : InitialC) {
+ // We want to generally process call sites top-down in order for
+ // simplifications stemming from replacing the call with the returned value
+ // after inlining to be visible to subsequent inlining decisions.
+ // FIXME: Using instructions sequence is a really bad way to do this.
+ // Instead we should do an actual RPO walk of the function body.
+ for (Instruction &I : instructions(N.getFunction()))
+ if (auto CS = CallSite(&I))
+ if (Function *Callee = CS.getCalledFunction())
+ if (!Callee->isDeclaration())
+ Calls.push_back({CS, -1});
+ }
+ if (Calls.empty())
+ return PreservedAnalyses::all();
- // We use a worklist of nodes to process so that we can handle if the SCC
- // structure changes and some nodes are no longer part of the current SCC. We
- // also need to use an updatable pointer for the SCC as a consequence.
- SmallVector<LazyCallGraph::Node *, 16> Nodes;
- for (auto &N : InitialC)
- Nodes.push_back(&N);
+ // Capture updatable variables for the current SCC and RefSCC.
auto *C = &InitialC;
auto *RC = &C->getOuterRefSCC();
- // We also use a secondary worklist of call sites within a particular node to
- // allow quickly continuing to inline through newly inlined call sites where
- // possible.
- SmallVector<std::pair<CallSite, int>, 16> Calls;
-
// When inlining a callee produces new call sites, we want to keep track of
// the fact that they were inlined from the callee. This allows us to avoid
// infinite inlining in some obscure cases. To represent this, we use an
@@ -805,34 +814,58 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// defer deleting these to make it easier to handle the call graph updates.
SmallVector<Function *, 4> DeadFunctions;
- do {
- auto &N = *Nodes.pop_back_val();
+ // Loop forward over all of the calls. Note that we cannot cache the size as
+ // inlining can introduce new calls that need to be processed.
+ for (int i = 0; i < (int)Calls.size(); ++i) {
+ // We expect the calls to typically be batched with sequences of calls that
+ // have the same caller, so we first set up some shared infrastructure for
+ // this caller. We also do any pruning we can at this layer on the caller
+ // alone.
+ Function &F = *Calls[i].first.getCaller();
+ LazyCallGraph::Node &N = *CG.lookup(F);
if (CG.lookupSCC(N) != C)
continue;
- Function &F = N.getFunction();
if (F.hasFnAttribute(Attribute::OptimizeNone))
continue;
+ DEBUG(dbgs() << "Inlining calls in: " << F.getName() << "\n");
+
+ // Get a FunctionAnalysisManager via a proxy for this particular node. We
+ // do this each time we visit a node as the SCC may have changed and as
+ // we're going to mutate this particular function we want to make sure the
+ // proxy is in place to forward any invalidation events. We can use the
+ // manager we get here for looking up results for functions other than this
+ // node however because those functions aren't going to be mutated by this
+ // pass.
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerCGSCCProxy>(*C, CG)
+ .getManager();
+ std::function<AssumptionCache &(Function &)> GetAssumptionCache =
+ [&](Function &F) -> AssumptionCache & {
+ return FAM.getResult<AssumptionAnalysis>(F);
+ };
+ auto GetBFI = [&](Function &F) -> BlockFrequencyInfo & {
+ return FAM.getResult<BlockFrequencyAnalysis>(F);
+ };
+
+ auto GetInlineCost = [&](CallSite CS) {
+ Function &Callee = *CS.getCalledFunction();
+ auto &CalleeTTI = FAM.getResult<TargetIRAnalysis>(Callee);
+ return getInlineCost(CS, Params, CalleeTTI, GetAssumptionCache, {GetBFI},
+ PSI);
+ };
+
// Get the remarks emission analysis for the caller.
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
- // We want to generally process call sites top-down in order for
- // simplifications stemming from replacing the call with the returned value
- // after inlining to be visible to subsequent inlining decisions. So we
- // walk the function backwards and then process the back of the vector.
- // FIXME: Using reverse is a really bad way to do this. Instead we should
- // do an actual PO walk of the function body.
- for (Instruction &I : reverse(instructions(F)))
- if (auto CS = CallSite(&I))
- if (Function *Callee = CS.getCalledFunction())
- if (!Callee->isDeclaration())
- Calls.push_back({CS, -1});
-
+ // Now process as many calls as we have within this caller in the sequnece.
+ // We bail out as soon as the caller has to change so we can update the
+ // call graph and prepare the context of that new caller.
bool DidInline = false;
- while (!Calls.empty()) {
+ for (; i < (int)Calls.size() && Calls[i].first.getCaller() == &F; ++i) {
int InlineHistoryID;
CallSite CS;
- std::tie(CS, InlineHistoryID) = Calls.pop_back_val();
+ std::tie(CS, InlineHistoryID) = Calls[i];
Function &Callee = *CS.getCalledFunction();
if (InlineHistoryID != -1 &&
@@ -843,6 +876,13 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
if (!shouldInline(CS, GetInlineCost, ORE))
continue;
+ // Setup the data structure used to plumb customization into the
+ // `InlineFunction` routine.
+ InlineFunctionInfo IFI(
+ /*cg=*/nullptr, &GetAssumptionCache, PSI,
+ &FAM.getResult<BlockFrequencyAnalysis>(*(CS.getCaller())),
+ &FAM.getResult<BlockFrequencyAnalysis>(Callee));
+
if (!InlineFunction(CS, IFI))
continue;
DidInline = true;
@@ -869,7 +909,13 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// To check this we also need to nuke any dead constant uses (perhaps
// made dead by this operation on other functions).
Callee.removeDeadConstantUsers();
- if (Callee.use_empty()) {
+ if (Callee.use_empty() && !CG.isLibFunction(Callee)) {
+ Calls.erase(
+ std::remove_if(Calls.begin() + i + 1, Calls.end(),
+ [&Callee](const std::pair<CallSite, int> &Call) {
+ return Call.first.getCaller() == &Callee;
+ }),
+ Calls.end());
// Clear the body and queue the function itself for deletion when we
// finish inlining and call graph updates.
// Note that after this point, it is an error to do anything other
@@ -882,6 +928,10 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
}
}
+ // Back the call index up by one to put us in a good position to go around
+ // the outer loop.
+ --i;
+
if (!DidInline)
continue;
Changed = true;
@@ -896,8 +946,8 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// below.
for (Function *InlinedCallee : InlinedCallees) {
LazyCallGraph::Node &CalleeN = *CG.lookup(*InlinedCallee);
- for (LazyCallGraph::Edge &E : CalleeN)
- RC->insertTrivialRefEdge(N, *E.getNode());
+ for (LazyCallGraph::Edge &E : *CalleeN)
+ RC->insertTrivialRefEdge(N, E.getNode());
}
InlinedCallees.clear();
@@ -908,8 +958,9 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// re-use the exact same logic for updating the call graph to reflect the
// change..
C = &updateCGAndAnalysisManagerForFunctionPass(CG, *C, N, AM, UR);
+ DEBUG(dbgs() << "Updated inlining SCC: " << *C << "\n");
RC = &C->getOuterRefSCC();
- } while (!Nodes.empty());
+ }
// Now that we've finished inlining all of the calls across this SCC, delete
// all of the trivially dead functions, updating the call graph and the CGSCC
@@ -920,8 +971,13 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// sets.
for (Function *DeadF : DeadFunctions) {
// Get the necessary information out of the call graph and nuke the
- // function there.
+ // function there. Also, cclear out any cached analyses.
auto &DeadC = *CG.lookupSCC(*CG.lookup(*DeadF));
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerCGSCCProxy>(DeadC, CG)
+ .getManager();
+ FAM.clear(*DeadF);
+ AM.clear(DeadC);
auto &DeadRC = DeadC.getOuterRefSCC();
CG.removeDeadFunction(*DeadF);
@@ -933,5 +989,13 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// And delete the actual function from the module.
M.getFunctionList().erase(DeadF);
}
- return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
+
+ if (!Changed)
+ return PreservedAnalyses::all();
+
+ // Even if we change the IR, we update the core CGSCC data structures and so
+ // can preserve the proxy to the function analysis manager.
+ PreservedAnalyses PA;
+ PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
+ return PA;
}
OpenPOWER on IntegriCloud