diff options
Diffstat (limited to 'contrib/llvm/lib/Transforms/IPO/Inliner.cpp')
-rw-r--r-- | contrib/llvm/lib/Transforms/IPO/Inliner.cpp | 663 |
1 files changed, 462 insertions, 201 deletions
diff --git a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp index 79535ca..3f4731c 100644 --- a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp +++ b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp @@ -13,6 +13,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Transforms/IPO/Inliner.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -20,19 +21,21 @@ #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/OptimizationDiagnosticInfo.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/IPO/InlinerPass.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" using namespace llvm; #define DEBUG_TYPE "inline" @@ -47,15 +50,44 @@ STATISTIC(NumMergedAllocas, "Number of allocas merged together"); // if those would be more profitable and blocked inline steps. STATISTIC(NumCallerCallersAnalyzed, "Number of caller-callers analyzed"); -Inliner::Inliner(char &ID) : CallGraphSCCPass(ID), InsertLifetime(true) {} - -Inliner::Inliner(char &ID, bool InsertLifetime) +/// Flag to disable manual alloca merging. +/// +/// Merging of allocas was originally done as a stack-size saving technique +/// prior to LLVM's code generator having support for stack coloring based on +/// lifetime markers. It is now in the process of being removed. To experiment +/// with disabling it and relying fully on lifetime marker based stack +/// coloring, you can pass this flag to LLVM. +static cl::opt<bool> + DisableInlinedAllocaMerging("disable-inlined-alloca-merging", + cl::init(false), cl::Hidden); + +namespace { +enum class InlinerFunctionImportStatsOpts { + No = 0, + Basic = 1, + Verbose = 2, +}; + +cl::opt<InlinerFunctionImportStatsOpts> InlinerFunctionImportStats( + "inliner-function-import-stats", + cl::init(InlinerFunctionImportStatsOpts::No), + cl::values(clEnumValN(InlinerFunctionImportStatsOpts::Basic, "basic", + "basic statistics"), + clEnumValN(InlinerFunctionImportStatsOpts::Verbose, "verbose", + "printing of statistics for each inlined function")), + cl::Hidden, cl::desc("Enable inliner stats for imported functions")); +} // namespace + +LegacyInlinerBase::LegacyInlinerBase(char &ID) + : CallGraphSCCPass(ID), InsertLifetime(true) {} + +LegacyInlinerBase::LegacyInlinerBase(char &ID, bool InsertLifetime) : CallGraphSCCPass(ID), InsertLifetime(InsertLifetime) {} /// For this class, we declare that we require and preserve the call graph. /// If the derived class implements this method, it should /// always explicitly call the implementation here. -void Inliner::getAnalysisUsage(AnalysisUsage &AU) const { +void LegacyInlinerBase::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<AssumptionCacheTracker>(); AU.addRequired<ProfileSummaryInfoWrapperPass>(); AU.addRequired<TargetLibraryInfoWrapperPass>(); @@ -63,62 +95,33 @@ void Inliner::getAnalysisUsage(AnalysisUsage &AU) const { CallGraphSCCPass::getAnalysisUsage(AU); } +typedef DenseMap<ArrayType *, std::vector<AllocaInst *>> InlinedArrayAllocasTy; -typedef DenseMap<ArrayType*, std::vector<AllocaInst*> > -InlinedArrayAllocasTy; - -/// If it is possible to inline the specified call site, -/// do so and update the CallGraph for this operation. +/// Look at all of the allocas that we inlined through this call site. If we +/// have already inlined other allocas through other calls into this function, +/// then we know that they have disjoint lifetimes and that we can merge them. /// -/// This function also does some basic book-keeping to update the IR. The -/// InlinedArrayAllocas map keeps track of any allocas that are already -/// available from other functions inlined into the caller. If we are able to -/// inline this call site we attempt to reuse already available allocas or add -/// any new allocas to the set if not possible. -static bool InlineCallIfPossible(Pass &P, CallSite CS, InlineFunctionInfo &IFI, - InlinedArrayAllocasTy &InlinedArrayAllocas, - int InlineHistory, bool InsertLifetime) { - Function *Callee = CS.getCalledFunction(); - Function *Caller = CS.getCaller(); - - // We need to manually construct BasicAA directly in order to disable - // its use of other function analyses. - BasicAAResult BAR(createLegacyPMBasicAAResult(P, *Callee)); - - // Construct our own AA results for this function. We do this manually to - // work around the limitations of the legacy pass manager. - AAResults AAR(createLegacyPMAAResults(P, *Callee, BAR)); - - // Try to inline the function. Get the list of static allocas that were - // inlined. - if (!InlineFunction(CS, IFI, &AAR, InsertLifetime)) - return false; - - AttributeFuncs::mergeAttributesForInlining(*Caller, *Callee); +/// There are many heuristics possible for merging these allocas, and the +/// different options have different tradeoffs. One thing that we *really* +/// don't want to hurt is SRoA: once inlining happens, often allocas are no +/// longer address taken and so they can be promoted. +/// +/// Our "solution" for that is to only merge allocas whose outermost type is an +/// array type. These are usually not promoted because someone is using a +/// variable index into them. These are also often the most important ones to +/// merge. +/// +/// A better solution would be to have real memory lifetime markers in the IR +/// and not have the inliner do any merging of allocas at all. This would +/// allow the backend to do proper stack slot coloring of all allocas that +/// *actually make it to the backend*, which is really what we want. +/// +/// Because we don't have this information, we do this simple and useful hack. +static void mergeInlinedArrayAllocas( + Function *Caller, InlineFunctionInfo &IFI, + InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory) { + SmallPtrSet<AllocaInst *, 16> UsedAllocas; - // Look at all of the allocas that we inlined through this call site. If we - // have already inlined other allocas through other calls into this function, - // then we know that they have disjoint lifetimes and that we can merge them. - // - // There are many heuristics possible for merging these allocas, and the - // different options have different tradeoffs. One thing that we *really* - // don't want to hurt is SRoA: once inlining happens, often allocas are no - // longer address taken and so they can be promoted. - // - // Our "solution" for that is to only merge allocas whose outermost type is an - // array type. These are usually not promoted because someone is using a - // variable index into them. These are also often the most important ones to - // merge. - // - // A better solution would be to have real memory lifetime markers in the IR - // and not have the inliner do any merging of allocas at all. This would - // allow the backend to do proper stack slot coloring of all allocas that - // *actually make it to the backend*, which is really what we want. - // - // Because we don't have this information, we do this simple and useful hack. - // - SmallPtrSet<AllocaInst*, 16> UsedAllocas; - // When processing our SCC, check to see if CS was inlined from some other // call site. For example, if we're processing "A" in this code: // A() { B() } @@ -131,25 +134,25 @@ static bool InlineCallIfPossible(Pass &P, CallSite CS, InlineFunctionInfo &IFI, // because their scopes are not disjoint. We could make this smarter by // keeping track of the inline history for each alloca in the // InlinedArrayAllocas but this isn't likely to be a significant win. - if (InlineHistory != -1) // Only do merging for top-level call sites in SCC. - return true; - + if (InlineHistory != -1) // Only do merging for top-level call sites in SCC. + return; + // Loop over all the allocas we have so far and see if they can be merged with // a previously inlined alloca. If not, remember that we had it. - for (unsigned AllocaNo = 0, e = IFI.StaticAllocas.size(); - AllocaNo != e; ++AllocaNo) { + for (unsigned AllocaNo = 0, e = IFI.StaticAllocas.size(); AllocaNo != e; + ++AllocaNo) { AllocaInst *AI = IFI.StaticAllocas[AllocaNo]; - + // Don't bother trying to merge array allocations (they will usually be // canonicalized to be an allocation *of* an array), or allocations whose // type is not itself an array (because we're afraid of pessimizing SRoA). ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType()); if (!ATy || AI->isArrayAllocation()) continue; - + // Get the list of all available allocas for this array type. - std::vector<AllocaInst*> &AllocasForType = InlinedArrayAllocas[ATy]; - + std::vector<AllocaInst *> &AllocasForType = InlinedArrayAllocas[ATy]; + // Loop over the allocas in AllocasForType to see if we can reuse one. Note // that we have to be careful not to reuse the same "available" alloca for // multiple different allocas that we just inlined, we use the 'UsedAllocas' @@ -160,24 +163,24 @@ static bool InlineCallIfPossible(Pass &P, CallSite CS, InlineFunctionInfo &IFI, unsigned Align1 = AI->getAlignment(), Align2 = AvailableAlloca->getAlignment(); - + // The available alloca has to be in the right function, not in some other // function in this SCC. if (AvailableAlloca->getParent() != AI->getParent()) continue; - + // If the inlined function already uses this alloca then we can't reuse // it. if (!UsedAllocas.insert(AvailableAlloca).second) continue; - + // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare // success! - DEBUG(dbgs() << " ***MERGED ALLOCA: " << *AI << "\n\t\tINTO: " - << *AvailableAlloca << '\n'); - + DEBUG(dbgs() << " ***MERGED ALLOCA: " << *AI + << "\n\t\tINTO: " << *AvailableAlloca << '\n'); + // Move affected dbg.declare calls immediately after the new alloca to - // avoid the situation when a dbg.declare preceeds its alloca. + // avoid the situation when a dbg.declare precedes its alloca. if (auto *L = LocalAsMetadata::getIfExists(AI)) if (auto *MDV = MetadataAsValue::getIfExists(AI->getContext(), L)) for (User *U : MDV->users()) @@ -209,7 +212,7 @@ static bool InlineCallIfPossible(Pass &P, CallSite CS, InlineFunctionInfo &IFI, // If we already nuked the alloca, we're done with it. if (MergedAwayAlloca) continue; - + // If we were unable to merge away the alloca either because there are no // allocas of the right type available or because we reused them all // already, remember that this alloca came from an inlined function and mark @@ -218,19 +221,51 @@ static bool InlineCallIfPossible(Pass &P, CallSite CS, InlineFunctionInfo &IFI, AllocasForType.push_back(AI); UsedAllocas.insert(AI); } - - return true; } -static void emitAnalysis(CallSite CS, const Twine &Msg) { +/// If it is possible to inline the specified call site, +/// do so and update the CallGraph for this operation. +/// +/// This function also does some basic book-keeping to update the IR. The +/// InlinedArrayAllocas map keeps track of any allocas that are already +/// available from other functions inlined into the caller. If we are able to +/// inline this call site we attempt to reuse already available allocas or add +/// any new allocas to the set if not possible. +static bool InlineCallIfPossible( + CallSite CS, InlineFunctionInfo &IFI, + InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory, + bool InsertLifetime, function_ref<AAResults &(Function &)> &AARGetter, + ImportedFunctionsInliningStatistics &ImportedFunctionsStats) { + Function *Callee = CS.getCalledFunction(); Function *Caller = CS.getCaller(); - LLVMContext &Ctx = Caller->getContext(); - DebugLoc DLoc = CS.getInstruction()->getDebugLoc(); - emitOptimizationRemarkAnalysis(Ctx, DEBUG_TYPE, *Caller, DLoc, Msg); + + AAResults &AAR = AARGetter(*Callee); + + // Try to inline the function. Get the list of static allocas that were + // inlined. + if (!InlineFunction(CS, IFI, &AAR, InsertLifetime)) + return false; + + if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) + ImportedFunctionsStats.recordInline(*Caller, *Callee); + + AttributeFuncs::mergeAttributesForInlining(*Caller, *Callee); + + if (!DisableInlinedAllocaMerging) + mergeInlinedArrayAllocas(Caller, IFI, InlinedArrayAllocas, InlineHistory); + + return true; } -bool Inliner::shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC, - int &TotalSecondaryCost) { +/// Return true if inlining of CS can block the caller from being +/// inlined which is proved to be more beneficial. \p IC is the +/// estimated inline cost associated with callsite \p CS. +/// \p TotalAltCost will be set to the estimated cost of inlining the caller +/// if \p CS is suppressed for inlining. +static bool +shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC, + int &TotalSecondaryCost, + function_ref<InlineCost(CallSite CS)> GetInlineCost) { // For now we only handle local or inline functions. if (!Caller->hasLocalLinkage() && !Caller->hasLinkOnceODRLinkage()) @@ -269,7 +304,7 @@ bool Inliner::shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC, continue; } - InlineCost IC2 = getInlineCost(CS2); + InlineCost IC2 = GetInlineCost(CS2); ++NumCallerCallersAnalyzed; if (!IC2) { callerWillBeRemoved = false; @@ -278,7 +313,7 @@ bool Inliner::shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC, if (IC2.isAlways()) continue; - // See if inlining or original callsite would erase the cost delta of + // See if inlining of the original callsite would erase the cost delta of // this callsite. We subtract off the penalty for the call instruction, // which we would be deleting. if (IC2.getCostDelta() <= CandidateCost) { @@ -291,7 +326,7 @@ bool Inliner::shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC, // be removed entirely. We did not account for this above unless there // is only one caller of Caller. if (callerWillBeRemoved && !Caller->use_empty()) - TotalSecondaryCost += InlineConstants::LastCallToStaticBonus; + TotalSecondaryCost -= InlineConstants::LastCallToStaticBonus; if (inliningPreventsSomeOuterInline && TotalSecondaryCost < IC.getCost()) return true; @@ -300,63 +335,73 @@ bool Inliner::shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC, } /// Return true if the inliner should attempt to inline at the given CallSite. -bool Inliner::shouldInline(CallSite CS) { - InlineCost IC = getInlineCost(CS); - +static bool shouldInline(CallSite CS, + function_ref<InlineCost(CallSite CS)> GetInlineCost, + OptimizationRemarkEmitter &ORE) { + using namespace ore; + InlineCost IC = GetInlineCost(CS); + Instruction *Call = CS.getInstruction(); + Function *Callee = CS.getCalledFunction(); + if (IC.isAlways()) { DEBUG(dbgs() << " Inlining: cost=always" - << ", Call: " << *CS.getInstruction() << "\n"); - emitAnalysis(CS, Twine(CS.getCalledFunction()->getName()) + - " should always be inlined (cost=always)"); + << ", Call: " << *CS.getInstruction() << "\n"); + ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call) + << NV("Callee", Callee) + << " should always be inlined (cost=always)"); return true; } - + if (IC.isNever()) { DEBUG(dbgs() << " NOT Inlining: cost=never" - << ", Call: " << *CS.getInstruction() << "\n"); - emitAnalysis(CS, Twine(CS.getCalledFunction()->getName() + - " should never be inlined (cost=never)")); + << ", Call: " << *CS.getInstruction() << "\n"); + ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "NeverInline", Call) + << NV("Callee", Callee) + << " should never be inlined (cost=never)"); return false; } - + Function *Caller = CS.getCaller(); if (!IC) { DEBUG(dbgs() << " NOT Inlining: cost=" << IC.getCost() - << ", thres=" << (IC.getCostDelta() + IC.getCost()) - << ", Call: " << *CS.getInstruction() << "\n"); - emitAnalysis(CS, Twine(CS.getCalledFunction()->getName() + - " too costly to inline (cost=") + - Twine(IC.getCost()) + ", threshold=" + - Twine(IC.getCostDelta() + IC.getCost()) + ")"); + << ", thres=" << (IC.getCostDelta() + IC.getCost()) + << ", Call: " << *CS.getInstruction() << "\n"); + ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", Call) + << NV("Callee", Callee) << " too costly to inline (cost=" + << NV("Cost", IC.getCost()) << ", threshold=" + << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")"); return false; } int TotalSecondaryCost = 0; - if (shouldBeDeferred(Caller, CS, IC, TotalSecondaryCost)) { + if (shouldBeDeferred(Caller, CS, IC, TotalSecondaryCost, GetInlineCost)) { DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction() - << " Cost = " << IC.getCost() - << ", outer Cost = " << TotalSecondaryCost << '\n'); - emitAnalysis(CS, Twine("Not inlining. Cost of inlining " + - CS.getCalledFunction()->getName() + - " increases the cost of inlining " + - CS.getCaller()->getName() + " in other contexts")); + << " Cost = " << IC.getCost() + << ", outer Cost = " << TotalSecondaryCost << '\n'); + ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, + "IncreaseCostInOtherContexts", Call) + << "Not inlining. Cost of inlining " << NV("Callee", Callee) + << " increases the cost of inlining " << NV("Caller", Caller) + << " in other contexts"); return false; } DEBUG(dbgs() << " Inlining: cost=" << IC.getCost() - << ", thres=" << (IC.getCostDelta() + IC.getCost()) - << ", Call: " << *CS.getInstruction() << '\n'); - emitAnalysis( - CS, CS.getCalledFunction()->getName() + Twine(" can be inlined into ") + - CS.getCaller()->getName() + " with cost=" + Twine(IC.getCost()) + - " (threshold=" + Twine(IC.getCostDelta() + IC.getCost()) + ")"); + << ", thres=" << (IC.getCostDelta() + IC.getCost()) + << ", Call: " << *CS.getInstruction() << '\n'); + ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBeInlined", Call) + << NV("Callee", Callee) << " can be inlined into " + << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost()) + << " (threshold=" + << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")"); return true; } /// Return true if the specified inline history ID /// indicates an inline history that includes the specified function. -static bool InlineHistoryIncludes(Function *F, int InlineHistoryID, - const SmallVectorImpl<std::pair<Function*, int> > &InlineHistory) { +static bool InlineHistoryIncludes( + Function *F, int InlineHistoryID, + const SmallVectorImpl<std::pair<Function *, int>> &InlineHistory) { while (InlineHistoryID != -1) { assert(unsigned(InlineHistoryID) < InlineHistory.size() && "Invalid inline history ID"); @@ -367,23 +412,32 @@ static bool InlineHistoryIncludes(Function *F, int InlineHistoryID, return false; } -bool Inliner::runOnSCC(CallGraphSCC &SCC) { +bool LegacyInlinerBase::doInitialization(CallGraph &CG) { + if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) + ImportedFunctionsStats.setModuleInfo(CG.getModule()); + return false; // No changes to CallGraph. +} + +bool LegacyInlinerBase::runOnSCC(CallGraphSCC &SCC) { if (skipSCC(SCC)) return false; return inlineCalls(SCC); } -bool Inliner::inlineCalls(CallGraphSCC &SCC) { - CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); - ACT = &getAnalysis<AssumptionCacheTracker>(); - PSI = getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(CG.getModule()); - auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); - - SmallPtrSet<Function*, 8> SCCFunctions; +static bool +inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG, + std::function<AssumptionCache &(Function &)> GetAssumptionCache, + ProfileSummaryInfo *PSI, TargetLibraryInfo &TLI, + bool InsertLifetime, + function_ref<InlineCost(CallSite CS)> GetInlineCost, + function_ref<AAResults &(Function &)> AARGetter, + ImportedFunctionsInliningStatistics &ImportedFunctionsStats) { + SmallPtrSet<Function *, 8> SCCFunctions; DEBUG(dbgs() << "Inliner visiting SCC:"); for (CallGraphNode *Node : SCC) { Function *F = Node->getFunction(); - if (F) SCCFunctions.insert(F); + if (F) + SCCFunctions.insert(F); DEBUG(dbgs() << " " << (F ? F->getName() : "INDIRECTNODE")); } @@ -391,17 +445,19 @@ bool Inliner::inlineCalls(CallGraphSCC &SCC) { // inline call sites in the original functions, not call sites that result // from inlining other functions. SmallVector<std::pair<CallSite, int>, 16> CallSites; - + // When inlining a callee produces new call sites, we want to keep track of // the fact that they were inlined from the callee. This allows us to avoid // infinite inlining in some obscure cases. To represent this, we use an // index into the InlineHistory vector. - SmallVector<std::pair<Function*, int>, 8> InlineHistory; + SmallVector<std::pair<Function *, int>, 8> InlineHistory; for (CallGraphNode *Node : SCC) { Function *F = Node->getFunction(); - if (!F) continue; - + if (!F || F->isDeclaration()) + continue; + + OptimizationRemarkEmitter ORE(F); for (BasicBlock &BB : *F) for (Instruction &I : BB) { CallSite CS(cast<Value>(&I)); @@ -409,14 +465,21 @@ bool Inliner::inlineCalls(CallGraphSCC &SCC) { // never be inlined. if (!CS || isa<IntrinsicInst>(I)) continue; - + // If this is a direct call to an external function, we can never inline // it. If it is an indirect call, inlining may resolve it to be a // direct call, so we keep it. if (Function *Callee = CS.getCalledFunction()) - if (Callee->isDeclaration()) + if (Callee->isDeclaration()) { + using namespace ore; + ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I) + << NV("Callee", Callee) << " will not be inlined into " + << NV("Caller", CS.getCaller()) + << " because its definition is unavailable" + << setIsVerbose()); continue; - + } + CallSites.push_back(std::make_pair(CS, -1)); } } @@ -435,9 +498,8 @@ bool Inliner::inlineCalls(CallGraphSCC &SCC) { if (SCCFunctions.count(F)) std::swap(CallSites[i--], CallSites[--FirstCallInSCC]); - InlinedArrayAllocasTy InlinedArrayAllocas; - InlineFunctionInfo InlineInfo(&CG, ACT); + InlineFunctionInfo InlineInfo(&CG, &GetAssumptionCache); // Now that we have all of the call sites, loop over them and inline them if // it looks profitable to do so. @@ -450,7 +512,7 @@ bool Inliner::inlineCalls(CallGraphSCC &SCC) { // CallSites may be modified inside so ranged for loop can not be used. for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) { CallSite CS = CallSites[CSi].first; - + Function *Caller = CS.getCaller(); Function *Callee = CS.getCalledFunction(); @@ -459,16 +521,17 @@ bool Inliner::inlineCalls(CallGraphSCC &SCC) { // size. This happens because IPSCCP propagates the result out of the // call and then we're left with the dead call. if (isInstructionTriviallyDead(CS.getInstruction(), &TLI)) { - DEBUG(dbgs() << " -> Deleting dead call: " - << *CS.getInstruction() << "\n"); + DEBUG(dbgs() << " -> Deleting dead call: " << *CS.getInstruction() + << "\n"); // Update the call graph by deleting the edge from Callee to Caller. CG[Caller]->removeCallEdgeFor(CS); CS.getInstruction()->eraseFromParent(); ++NumCallsDeleted; } else { // We can only inline direct calls to non-declarations. - if (!Callee || Callee->isDeclaration()) continue; - + if (!Callee || Callee->isDeclaration()) + continue; + // If this call site was obtained by inlining another function, verify // that the include path for the function did not include the callee // itself. If so, we'd be recursively inlining the same function, @@ -478,37 +541,42 @@ bool Inliner::inlineCalls(CallGraphSCC &SCC) { if (InlineHistoryID != -1 && InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)) continue; - - LLVMContext &CallerCtx = Caller->getContext(); // Get DebugLoc to report. CS will be invalid after Inliner. DebugLoc DLoc = CS.getInstruction()->getDebugLoc(); + BasicBlock *Block = CS.getParent(); + // FIXME for new PM: because of the old PM we currently generate ORE and + // in turn BFI on demand. With the new PM, the ORE dependency should + // just become a regular analysis dependency. + OptimizationRemarkEmitter ORE(Caller); // If the policy determines that we should inline this function, // try to do so. - if (!shouldInline(CS)) { - emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc, - Twine(Callee->getName() + - " will not be inlined into " + - Caller->getName())); + using namespace ore; + if (!shouldInline(CS, GetInlineCost, ORE)) { + ORE.emit( + OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block) + << NV("Callee", Callee) << " will not be inlined into " + << NV("Caller", Caller)); continue; } // Attempt to inline the function. - if (!InlineCallIfPossible(*this, CS, InlineInfo, InlinedArrayAllocas, - InlineHistoryID, InsertLifetime)) { - emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc, - Twine(Callee->getName() + - " will not be inlined into " + - Caller->getName())); + if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas, + InlineHistoryID, InsertLifetime, AARGetter, + ImportedFunctionsStats)) { + ORE.emit( + OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block) + << NV("Callee", Callee) << " will not be inlined into " + << NV("Caller", Caller)); continue; } ++NumInlined; // Report the inline decision. - emitOptimizationRemark( - CallerCtx, DEBUG_TYPE, *Caller, DLoc, - Twine(Callee->getName() + " inlined into " + Caller->getName())); + ORE.emit(OptimizationRemark(DEBUG_TYPE, "Inlined", DLoc, Block) + << NV("Callee", Callee) << " inlined into " + << NV("Caller", Caller)); // If inlining this function gave us any new call sites, throw them // onto our worklist to process. They are useful inline candidates. @@ -522,30 +590,30 @@ bool Inliner::inlineCalls(CallGraphSCC &SCC) { CallSites.push_back(std::make_pair(CallSite(Ptr), NewHistoryID)); } } - + // If we inlined or deleted the last possible call site to the function, // delete the function body now. if (Callee && Callee->use_empty() && Callee->hasLocalLinkage() && // TODO: Can remove if in SCC now. !SCCFunctions.count(Callee) && - + // The function may be apparently dead, but if there are indirect // callgraph references to the node, we cannot delete it yet, this // could invalidate the CGSCC iterator. CG[Callee]->getNumReferences() == 0) { - DEBUG(dbgs() << " -> Deleting dead function: " - << Callee->getName() << "\n"); + DEBUG(dbgs() << " -> Deleting dead function: " << Callee->getName() + << "\n"); CallGraphNode *CalleeNode = CG[Callee]; // Remove any call graph edges from the callee to its callees. CalleeNode->removeAllCalledFunctions(); - + // Removing the node for callee from the call graph and delete it. delete CG.removeFunctionFromModule(CalleeNode); ++NumDeleted; } - // Remove this call site from the list. If possible, use + // Remove this call site from the list. If possible, use // swap/pop_back for efficiency, but do not use it if doing so would // move a call site to a function in this SCC before the // 'FirstCallInSCC' barrier. @@ -553,7 +621,7 @@ bool Inliner::inlineCalls(CallGraphSCC &SCC) { CallSites[CSi] = CallSites.back(); CallSites.pop_back(); } else { - CallSites.erase(CallSites.begin()+CSi); + CallSites.erase(CallSites.begin() + CSi); } --CSi; @@ -565,17 +633,43 @@ bool Inliner::inlineCalls(CallGraphSCC &SCC) { return Changed; } +bool LegacyInlinerBase::inlineCalls(CallGraphSCC &SCC) { + CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); + ACT = &getAnalysis<AssumptionCacheTracker>(); + PSI = getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); + auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); + // We compute dedicated AA results for each function in the SCC as needed. We + // use a lambda referencing external objects so that they live long enough to + // be queried, but we re-use them each time. + Optional<BasicAAResult> BAR; + Optional<AAResults> AAR; + auto AARGetter = [&](Function &F) -> AAResults & { + BAR.emplace(createLegacyPMBasicAAResult(*this, F)); + AAR.emplace(createLegacyPMAAResults(*this, F, *BAR)); + return *AAR; + }; + auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & { + return ACT->getAssumptionCache(F); + }; + return inlineCallsImpl(SCC, CG, GetAssumptionCache, PSI, TLI, InsertLifetime, + [this](CallSite CS) { return getInlineCost(CS); }, + AARGetter, ImportedFunctionsStats); +} + /// Remove now-dead linkonce functions at the end of /// processing to avoid breaking the SCC traversal. -bool Inliner::doFinalization(CallGraph &CG) { +bool LegacyInlinerBase::doFinalization(CallGraph &CG) { + if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) + ImportedFunctionsStats.dump(InlinerFunctionImportStats == + InlinerFunctionImportStatsOpts::Verbose); return removeDeadFunctions(CG); } /// Remove dead functions that are not included in DNR (Do Not Remove) list. -bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) { - SmallVector<CallGraphNode*, 16> FunctionsToRemove; - SmallVector<CallGraphNode *, 16> DeadFunctionsInComdats; - SmallDenseMap<const Comdat *, int, 16> ComdatEntriesAlive; +bool LegacyInlinerBase::removeDeadFunctions(CallGraph &CG, + bool AlwaysInlineOnly) { + SmallVector<CallGraphNode *, 16> FunctionsToRemove; + SmallVector<Function *, 16> DeadFunctionsInComdats; auto RemoveCGN = [&](CallGraphNode *CGN) { // Remove any call graph edges from the function to its callees. @@ -616,9 +710,8 @@ bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) { // The inliner doesn't visit non-function entities which are in COMDAT // groups so it is unsafe to do so *unless* the linkage is local. if (!F->hasLocalLinkage()) { - if (const Comdat *C = F->getComdat()) { - --ComdatEntriesAlive[C]; - DeadFunctionsInComdats.push_back(CGN); + if (F->hasComdat()) { + DeadFunctionsInComdats.push_back(F); continue; } } @@ -626,32 +719,11 @@ bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) { RemoveCGN(CGN); } if (!DeadFunctionsInComdats.empty()) { - // Count up all the entities in COMDAT groups - auto ComdatGroupReferenced = [&](const Comdat *C) { - auto I = ComdatEntriesAlive.find(C); - if (I != ComdatEntriesAlive.end()) - ++(I->getSecond()); - }; - for (const Function &F : CG.getModule()) - if (const Comdat *C = F.getComdat()) - ComdatGroupReferenced(C); - for (const GlobalVariable &GV : CG.getModule().globals()) - if (const Comdat *C = GV.getComdat()) - ComdatGroupReferenced(C); - for (const GlobalAlias &GA : CG.getModule().aliases()) - if (const Comdat *C = GA.getComdat()) - ComdatGroupReferenced(C); - for (CallGraphNode *CGN : DeadFunctionsInComdats) { - Function *F = CGN->getFunction(); - const Comdat *C = F->getComdat(); - int NumAlive = ComdatEntriesAlive[C]; - // We can remove functions in a COMDAT group if the entire group is dead. - assert(NumAlive >= 0); - if (NumAlive > 0) - continue; - - RemoveCGN(CGN); - } + // Filter out the functions whose comdats remain alive. + filterDeadComdatFunctions(CG.getModule(), DeadFunctionsInComdats); + // Remove the rest. + for (Function *F : DeadFunctionsInComdats) + RemoveCGN(CG[F]); } if (FunctionsToRemove.empty()) @@ -665,12 +737,201 @@ bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) { // here to do this, it doesn't matter which order the functions are deleted // in. array_pod_sort(FunctionsToRemove.begin(), FunctionsToRemove.end()); - FunctionsToRemove.erase(std::unique(FunctionsToRemove.begin(), - FunctionsToRemove.end()), - FunctionsToRemove.end()); + FunctionsToRemove.erase( + std::unique(FunctionsToRemove.begin(), FunctionsToRemove.end()), + FunctionsToRemove.end()); for (CallGraphNode *CGN : FunctionsToRemove) { delete CG.removeFunctionFromModule(CGN); ++NumDeleted; } return true; } + +PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, + CGSCCAnalysisManager &AM, LazyCallGraph &CG, + CGSCCUpdateResult &UR) { + FunctionAnalysisManager &FAM = + AM.getResult<FunctionAnalysisManagerCGSCCProxy>(InitialC, CG) + .getManager(); + const ModuleAnalysisManager &MAM = + AM.getResult<ModuleAnalysisManagerCGSCCProxy>(InitialC, CG).getManager(); + bool Changed = false; + + assert(InitialC.size() > 0 && "Cannot handle an empty SCC!"); + Module &M = *InitialC.begin()->getFunction().getParent(); + ProfileSummaryInfo *PSI = MAM.getCachedResult<ProfileSummaryAnalysis>(M); + + std::function<AssumptionCache &(Function &)> GetAssumptionCache = + [&](Function &F) -> AssumptionCache & { + return FAM.getResult<AssumptionAnalysis>(F); + }; + + // Setup the data structure used to plumb customization into the + // `InlineFunction` routine. + InlineFunctionInfo IFI(/*cg=*/nullptr, &GetAssumptionCache); + + auto GetInlineCost = [&](CallSite CS) { + Function &Callee = *CS.getCalledFunction(); + auto &CalleeTTI = FAM.getResult<TargetIRAnalysis>(Callee); + return getInlineCost(CS, Params, CalleeTTI, GetAssumptionCache, PSI); + }; + + // We use a worklist of nodes to process so that we can handle if the SCC + // structure changes and some nodes are no longer part of the current SCC. We + // also need to use an updatable pointer for the SCC as a consequence. + SmallVector<LazyCallGraph::Node *, 16> Nodes; + for (auto &N : InitialC) + Nodes.push_back(&N); + auto *C = &InitialC; + auto *RC = &C->getOuterRefSCC(); + + // We also use a secondary worklist of call sites within a particular node to + // allow quickly continuing to inline through newly inlined call sites where + // possible. + SmallVector<std::pair<CallSite, int>, 16> Calls; + + // When inlining a callee produces new call sites, we want to keep track of + // the fact that they were inlined from the callee. This allows us to avoid + // infinite inlining in some obscure cases. To represent this, we use an + // index into the InlineHistory vector. + SmallVector<std::pair<Function *, int>, 16> InlineHistory; + + // Track a set vector of inlined callees so that we can augment the caller + // with all of their edges in the call graph before pruning out the ones that + // got simplified away. + SmallSetVector<Function *, 4> InlinedCallees; + + // Track the dead functions to delete once finished with inlining calls. We + // defer deleting these to make it easier to handle the call graph updates. + SmallVector<Function *, 4> DeadFunctions; + + do { + auto &N = *Nodes.pop_back_val(); + if (CG.lookupSCC(N) != C) + continue; + Function &F = N.getFunction(); + if (F.hasFnAttribute(Attribute::OptimizeNone)) + continue; + + // Get the remarks emission analysis for the caller. + auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); + + // We want to generally process call sites top-down in order for + // simplifications stemming from replacing the call with the returned value + // after inlining to be visible to subsequent inlining decisions. So we + // walk the function backwards and then process the back of the vector. + // FIXME: Using reverse is a really bad way to do this. Instead we should + // do an actual PO walk of the function body. + for (Instruction &I : reverse(instructions(F))) + if (auto CS = CallSite(&I)) + if (Function *Callee = CS.getCalledFunction()) + if (!Callee->isDeclaration()) + Calls.push_back({CS, -1}); + + bool DidInline = false; + while (!Calls.empty()) { + int InlineHistoryID; + CallSite CS; + std::tie(CS, InlineHistoryID) = Calls.pop_back_val(); + Function &Callee = *CS.getCalledFunction(); + + if (InlineHistoryID != -1 && + InlineHistoryIncludes(&Callee, InlineHistoryID, InlineHistory)) + continue; + + // Check whether we want to inline this callsite. + if (!shouldInline(CS, GetInlineCost, ORE)) + continue; + + if (!InlineFunction(CS, IFI)) + continue; + DidInline = true; + InlinedCallees.insert(&Callee); + + // Add any new callsites to defined functions to the worklist. + if (!IFI.InlinedCallSites.empty()) { + int NewHistoryID = InlineHistory.size(); + InlineHistory.push_back({&Callee, InlineHistoryID}); + for (CallSite &CS : reverse(IFI.InlinedCallSites)) + if (Function *NewCallee = CS.getCalledFunction()) + if (!NewCallee->isDeclaration()) + Calls.push_back({CS, NewHistoryID}); + } + + // Merge the attributes based on the inlining. + AttributeFuncs::mergeAttributesForInlining(F, Callee); + + // For local functions, check whether this makes the callee trivially + // dead. In that case, we can drop the body of the function eagerly + // which may reduce the number of callers of other functions to one, + // changing inline cost thresholds. + if (Callee.hasLocalLinkage()) { + // To check this we also need to nuke any dead constant uses (perhaps + // made dead by this operation on other functions). + Callee.removeDeadConstantUsers(); + if (Callee.use_empty()) { + // Clear the body and queue the function itself for deletion when we + // finish inlining and call graph updates. + // Note that after this point, it is an error to do anything other + // than use the callee's address or delete it. + Callee.dropAllReferences(); + assert(find(DeadFunctions, &Callee) == DeadFunctions.end() && + "Cannot put cause a function to become dead twice!"); + DeadFunctions.push_back(&Callee); + } + } + } + + if (!DidInline) + continue; + Changed = true; + + // Add all the inlined callees' edges as ref edges to the caller. These are + // by definition trivial edges as we always have *some* transitive ref edge + // chain. While in some cases these edges are direct calls inside the + // callee, they have to be modeled in the inliner as reference edges as + // there may be a reference edge anywhere along the chain from the current + // caller to the callee that causes the whole thing to appear like + // a (transitive) reference edge that will require promotion to a call edge + // below. + for (Function *InlinedCallee : InlinedCallees) { + LazyCallGraph::Node &CalleeN = *CG.lookup(*InlinedCallee); + for (LazyCallGraph::Edge &E : CalleeN) + RC->insertTrivialRefEdge(N, *E.getNode()); + } + InlinedCallees.clear(); + + // At this point, since we have made changes we have at least removed + // a call instruction. However, in the process we do some incremental + // simplification of the surrounding code. This simplification can + // essentially do all of the same things as a function pass and we can + // re-use the exact same logic for updating the call graph to reflect the + // change.. + C = &updateCGAndAnalysisManagerForFunctionPass(CG, *C, N, AM, UR); + RC = &C->getOuterRefSCC(); + } while (!Nodes.empty()); + + // Now that we've finished inlining all of the calls across this SCC, delete + // all of the trivially dead functions, updating the call graph and the CGSCC + // pass manager in the process. + // + // Note that this walks a pointer set which has non-deterministic order but + // that is OK as all we do is delete things and add pointers to unordered + // sets. + for (Function *DeadF : DeadFunctions) { + // Get the necessary information out of the call graph and nuke the + // function there. + auto &DeadC = *CG.lookupSCC(*CG.lookup(*DeadF)); + auto &DeadRC = DeadC.getOuterRefSCC(); + CG.removeDeadFunction(*DeadF); + + // Mark the relevant parts of the call graph as invalid so we don't visit + // them. + UR.InvalidatedSCCs.insert(&DeadC); + UR.InvalidatedRefSCCs.insert(&DeadRC); + + // And delete the actual function from the module. + M.getFunctionList().erase(DeadF); + } + return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); +} |