summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Transforms/IPO
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Transforms/IPO')
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp1308
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp26
-rw-r--r--contrib/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp18
-rw-r--r--contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp155
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp14
-rw-r--r--contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp180
-rw-r--r--contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp304
-rw-r--r--contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp152
-rw-r--r--contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp57
-rw-r--r--contrib/llvm/lib/Transforms/IPO/GlobalSplit.cpp13
-rw-r--r--contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp10
-rw-r--r--contrib/llvm/lib/Transforms/IPO/IPO.cpp4
-rw-r--r--contrib/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp13
-rw-r--r--contrib/llvm/lib/Transforms/IPO/Inliner.cpp276
-rw-r--r--contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/IPO/LowerTypeTests.cpp520
-rw-r--r--contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp323
-rw-r--r--contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp893
-rw-r--r--contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp236
-rw-r--r--contrib/llvm/lib/Transforms/IPO/PruneEH.cpp4
-rw-r--r--contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp333
-rw-r--r--contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp26
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp379
-rw-r--r--contrib/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp766
26 files changed, 4199 insertions, 1817 deletions
diff --git a/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index 65b7bad..72bae20 100644
--- a/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -29,8 +29,9 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/ArgumentPromotion.h"
#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -38,6 +39,7 @@
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/Analysis/LazyCallGraph.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/CFG.h"
@@ -51,323 +53,404 @@
#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/IPO.h"
#include <set>
using namespace llvm;
#define DEBUG_TYPE "argpromotion"
-STATISTIC(NumArgumentsPromoted , "Number of pointer arguments promoted");
+STATISTIC(NumArgumentsPromoted, "Number of pointer arguments promoted");
STATISTIC(NumAggregatesPromoted, "Number of aggregate arguments promoted");
-STATISTIC(NumByValArgsPromoted , "Number of byval arguments promoted");
-STATISTIC(NumArgumentsDead , "Number of dead pointer args eliminated");
+STATISTIC(NumByValArgsPromoted, "Number of byval arguments promoted");
+STATISTIC(NumArgumentsDead, "Number of dead pointer args eliminated");
-namespace {
- /// ArgPromotion - The 'by reference' to 'by value' argument promotion pass.
- ///
- struct ArgPromotion : public CallGraphSCCPass {
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- getAAResultsAnalysisUsage(AU);
- CallGraphSCCPass::getAnalysisUsage(AU);
- }
+/// A vector used to hold the indices of a single GEP instruction
+typedef std::vector<uint64_t> IndicesVector;
- bool runOnSCC(CallGraphSCC &SCC) override;
- static char ID; // Pass identification, replacement for typeid
- explicit ArgPromotion(unsigned maxElements = 3)
- : CallGraphSCCPass(ID), maxElements(maxElements) {
- initializeArgPromotionPass(*PassRegistry::getPassRegistry());
- }
+/// DoPromotion - This method actually performs the promotion of the specified
+/// arguments, and returns the new function. At this point, we know that it's
+/// safe to do so.
+static Function *
+doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
+ SmallPtrSetImpl<Argument *> &ByValArgsToTransform,
+ Optional<function_ref<void(CallSite OldCS, CallSite NewCS)>>
+ ReplaceCallSite) {
- private:
+ // Start by computing a new prototype for the function, which is the same as
+ // the old function, but has modified arguments.
+ FunctionType *FTy = F->getFunctionType();
+ std::vector<Type *> Params;
- using llvm::Pass::doInitialization;
- bool doInitialization(CallGraph &CG) override;
- /// The maximum number of elements to expand, or 0 for unlimited.
- unsigned maxElements;
- };
-}
+ typedef std::set<std::pair<Type *, IndicesVector>> ScalarizeTable;
-/// A vector used to hold the indices of a single GEP instruction
-typedef std::vector<uint64_t> IndicesVector;
+ // ScalarizedElements - If we are promoting a pointer that has elements
+ // accessed out of it, keep track of which elements are accessed so that we
+ // can add one argument for each.
+ //
+ // Arguments that are directly loaded will have a zero element value here, to
+ // handle cases where there are both a direct load and GEP accesses.
+ //
+ std::map<Argument *, ScalarizeTable> ScalarizedElements;
-static CallGraphNode *
-PromoteArguments(CallGraphNode *CGN, CallGraph &CG,
- function_ref<AAResults &(Function &F)> AARGetter,
- unsigned MaxElements);
-static bool isDenselyPacked(Type *type, const DataLayout &DL);
-static bool canPaddingBeAccessed(Argument *Arg);
-static bool isSafeToPromoteArgument(Argument *Arg, bool isByVal, AAResults &AAR,
- unsigned MaxElements);
-static CallGraphNode *
-DoPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
- SmallPtrSetImpl<Argument *> &ByValArgsToTransform, CallGraph &CG);
+ // OriginalLoads - Keep track of a representative load instruction from the
+ // original function so that we can tell the alias analysis implementation
+ // what the new GEP/Load instructions we are inserting look like.
+ // We need to keep the original loads for each argument and the elements
+ // of the argument that are accessed.
+ std::map<std::pair<Argument *, IndicesVector>, LoadInst *> OriginalLoads;
-char ArgPromotion::ID = 0;
-INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion",
- "Promote 'by reference' arguments to scalars", false, false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_END(ArgPromotion, "argpromotion",
- "Promote 'by reference' arguments to scalars", false, false)
+ // Attribute - Keep track of the parameter attributes for the arguments
+ // that we are *not* promoting. For the ones that we do promote, the parameter
+ // attributes are lost
+ SmallVector<AttributeSet, 8> ArgAttrVec;
+ AttributeList PAL = F->getAttributes();
-Pass *llvm::createArgumentPromotionPass(unsigned maxElements) {
- return new ArgPromotion(maxElements);
-}
+ // First, determine the new argument list
+ unsigned ArgNo = 0;
+ for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
+ ++I, ++ArgNo) {
+ if (ByValArgsToTransform.count(&*I)) {
+ // Simple byval argument? Just add all the struct element types.
+ Type *AgTy = cast<PointerType>(I->getType())->getElementType();
+ StructType *STy = cast<StructType>(AgTy);
+ Params.insert(Params.end(), STy->element_begin(), STy->element_end());
+ ArgAttrVec.insert(ArgAttrVec.end(), STy->getNumElements(),
+ AttributeSet());
+ ++NumByValArgsPromoted;
+ } else if (!ArgsToPromote.count(&*I)) {
+ // Unchanged argument
+ Params.push_back(I->getType());
+ ArgAttrVec.push_back(PAL.getParamAttributes(ArgNo));
+ } else if (I->use_empty()) {
+ // Dead argument (which are always marked as promotable)
+ ++NumArgumentsDead;
-static bool runImpl(CallGraphSCC &SCC, CallGraph &CG,
- function_ref<AAResults &(Function &F)> AARGetter,
- unsigned MaxElements) {
- bool Changed = false, LocalChange;
+ // There may be remaining metadata uses of the argument for things like
+ // llvm.dbg.value. Replace them with undef.
+ I->replaceAllUsesWith(UndefValue::get(I->getType()));
+ } else {
+ // Okay, this is being promoted. This means that the only uses are loads
+ // or GEPs which are only used by loads
- do { // Iterate until we stop promoting from this SCC.
- LocalChange = false;
- // Attempt to promote arguments from all functions in this SCC.
- for (CallGraphNode *OldNode : SCC) {
- if (CallGraphNode *NewNode =
- PromoteArguments(OldNode, CG, AARGetter, MaxElements)) {
- LocalChange = true;
- SCC.ReplaceNode(OldNode, NewNode);
+ // In this table, we will track which indices are loaded from the argument
+ // (where direct loads are tracked as no indices).
+ ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
+ for (User *U : I->users()) {
+ Instruction *UI = cast<Instruction>(U);
+ Type *SrcTy;
+ if (LoadInst *L = dyn_cast<LoadInst>(UI))
+ SrcTy = L->getType();
+ else
+ SrcTy = cast<GetElementPtrInst>(UI)->getSourceElementType();
+ IndicesVector Indices;
+ Indices.reserve(UI->getNumOperands() - 1);
+ // Since loads will only have a single operand, and GEPs only a single
+ // non-index operand, this will record direct loads without any indices,
+ // and gep+loads with the GEP indices.
+ for (User::op_iterator II = UI->op_begin() + 1, IE = UI->op_end();
+ II != IE; ++II)
+ Indices.push_back(cast<ConstantInt>(*II)->getSExtValue());
+ // GEPs with a single 0 index can be merged with direct loads
+ if (Indices.size() == 1 && Indices.front() == 0)
+ Indices.clear();
+ ArgIndices.insert(std::make_pair(SrcTy, Indices));
+ LoadInst *OrigLoad;
+ if (LoadInst *L = dyn_cast<LoadInst>(UI))
+ OrigLoad = L;
+ else
+ // Take any load, we will use it only to update Alias Analysis
+ OrigLoad = cast<LoadInst>(UI->user_back());
+ OriginalLoads[std::make_pair(&*I, Indices)] = OrigLoad;
}
- }
- Changed |= LocalChange; // Remember that we changed something.
- } while (LocalChange);
-
- return Changed;
-}
-bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) {
- if (skipSCC(SCC))
- return false;
+ // Add a parameter to the function for each element passed in.
+ for (const auto &ArgIndex : ArgIndices) {
+ // not allowed to dereference ->begin() if size() is 0
+ Params.push_back(GetElementPtrInst::getIndexedType(
+ cast<PointerType>(I->getType()->getScalarType())->getElementType(),
+ ArgIndex.second));
+ ArgAttrVec.push_back(AttributeSet());
+ assert(Params.back());
+ }
- // Get the callgraph information that we need to update to reflect our
- // changes.
- CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
+ if (ArgIndices.size() == 1 && ArgIndices.begin()->second.empty())
+ ++NumArgumentsPromoted;
+ else
+ ++NumAggregatesPromoted;
+ }
+ }
- // We compute dedicated AA results for each function in the SCC as needed. We
- // use a lambda referencing external objects so that they live long enough to
- // be queried, but we re-use them each time.
- Optional<BasicAAResult> BAR;
- Optional<AAResults> AAR;
- auto AARGetter = [&](Function &F) -> AAResults & {
- BAR.emplace(createLegacyPMBasicAAResult(*this, F));
- AAR.emplace(createLegacyPMAAResults(*this, F, *BAR));
- return *AAR;
- };
-
- return runImpl(SCC, CG, AARGetter, maxElements);
-}
+ Type *RetTy = FTy->getReturnType();
-/// \brief Checks if a type could have padding bytes.
-static bool isDenselyPacked(Type *type, const DataLayout &DL) {
+ // Construct the new function type using the new arguments.
+ FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg());
- // There is no size information, so be conservative.
- if (!type->isSized())
- return false;
+ // Create the new function body and insert it into the module.
+ Function *NF = Function::Create(NFTy, F->getLinkage(), F->getName());
+ NF->copyAttributesFrom(F);
- // If the alloc size is not equal to the storage size, then there are padding
- // bytes. For x86_fp80 on x86-64, size: 80 alloc size: 128.
- if (DL.getTypeSizeInBits(type) != DL.getTypeAllocSizeInBits(type))
- return false;
+ // Patch the pointer to LLVM function in debug info descriptor.
+ NF->setSubprogram(F->getSubprogram());
+ F->setSubprogram(nullptr);
- if (!isa<CompositeType>(type))
- return true;
+ DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n"
+ << "From: " << *F);
- // For homogenous sequential types, check for padding within members.
- if (SequentialType *seqTy = dyn_cast<SequentialType>(type))
- return isDenselyPacked(seqTy->getElementType(), DL);
+ // Recompute the parameter attributes list based on the new arguments for
+ // the function.
+ NF->setAttributes(AttributeList::get(F->getContext(), PAL.getFnAttributes(),
+ PAL.getRetAttributes(), ArgAttrVec));
+ ArgAttrVec.clear();
- // Check for padding within and between elements of a struct.
- StructType *StructTy = cast<StructType>(type);
- const StructLayout *Layout = DL.getStructLayout(StructTy);
- uint64_t StartPos = 0;
- for (unsigned i = 0, E = StructTy->getNumElements(); i < E; ++i) {
- Type *ElTy = StructTy->getElementType(i);
- if (!isDenselyPacked(ElTy, DL))
- return false;
- if (StartPos != Layout->getElementOffsetInBits(i))
- return false;
- StartPos += DL.getTypeAllocSizeInBits(ElTy);
- }
+ F->getParent()->getFunctionList().insert(F->getIterator(), NF);
+ NF->takeName(F);
- return true;
-}
+ // Loop over all of the callers of the function, transforming the call sites
+ // to pass in the loaded pointers.
+ //
+ SmallVector<Value *, 16> Args;
+ while (!F->use_empty()) {
+ CallSite CS(F->user_back());
+ assert(CS.getCalledFunction() == F);
+ Instruction *Call = CS.getInstruction();
+ const AttributeList &CallPAL = CS.getAttributes();
-/// \brief Checks if the padding bytes of an argument could be accessed.
-static bool canPaddingBeAccessed(Argument *arg) {
+ // Loop over the operands, inserting GEP and loads in the caller as
+ // appropriate.
+ CallSite::arg_iterator AI = CS.arg_begin();
+ ArgNo = 0;
+ for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
+ ++I, ++AI, ++ArgNo)
+ if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) {
+ Args.push_back(*AI); // Unmodified argument
+ ArgAttrVec.push_back(CallPAL.getParamAttributes(ArgNo));
+ } else if (ByValArgsToTransform.count(&*I)) {
+ // Emit a GEP and load for each element of the struct.
+ Type *AgTy = cast<PointerType>(I->getType())->getElementType();
+ StructType *STy = cast<StructType>(AgTy);
+ Value *Idxs[2] = {
+ ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr};
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
+ Value *Idx = GetElementPtrInst::Create(
+ STy, *AI, Idxs, (*AI)->getName() + "." + Twine(i), Call);
+ // TODO: Tell AA about the new values?
+ Args.push_back(new LoadInst(Idx, Idx->getName() + ".val", Call));
+ ArgAttrVec.push_back(AttributeSet());
+ }
+ } else if (!I->use_empty()) {
+ // Non-dead argument: insert GEPs and loads as appropriate.
+ ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
+ // Store the Value* version of the indices in here, but declare it now
+ // for reuse.
+ std::vector<Value *> Ops;
+ for (const auto &ArgIndex : ArgIndices) {
+ Value *V = *AI;
+ LoadInst *OrigLoad =
+ OriginalLoads[std::make_pair(&*I, ArgIndex.second)];
+ if (!ArgIndex.second.empty()) {
+ Ops.reserve(ArgIndex.second.size());
+ Type *ElTy = V->getType();
+ for (auto II : ArgIndex.second) {
+ // Use i32 to index structs, and i64 for others (pointers/arrays).
+ // This satisfies GEP constraints.
+ Type *IdxTy =
+ (ElTy->isStructTy() ? Type::getInt32Ty(F->getContext())
+ : Type::getInt64Ty(F->getContext()));
+ Ops.push_back(ConstantInt::get(IdxTy, II));
+ // Keep track of the type we're currently indexing.
+ if (auto *ElPTy = dyn_cast<PointerType>(ElTy))
+ ElTy = ElPTy->getElementType();
+ else
+ ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(II);
+ }
+ // And create a GEP to extract those indices.
+ V = GetElementPtrInst::Create(ArgIndex.first, V, Ops,
+ V->getName() + ".idx", Call);
+ Ops.clear();
+ }
+ // Since we're replacing a load make sure we take the alignment
+ // of the previous load.
+ LoadInst *newLoad = new LoadInst(V, V->getName() + ".val", Call);
+ newLoad->setAlignment(OrigLoad->getAlignment());
+ // Transfer the AA info too.
+ AAMDNodes AAInfo;
+ OrigLoad->getAAMetadata(AAInfo);
+ newLoad->setAAMetadata(AAInfo);
- assert(arg->hasByValAttr());
+ Args.push_back(newLoad);
+ ArgAttrVec.push_back(AttributeSet());
+ }
+ }
- // Track all the pointers to the argument to make sure they are not captured.
- SmallPtrSet<Value *, 16> PtrValues;
- PtrValues.insert(arg);
+ // Push any varargs arguments on the list.
+ for (; AI != CS.arg_end(); ++AI, ++ArgNo) {
+ Args.push_back(*AI);
+ ArgAttrVec.push_back(CallPAL.getParamAttributes(ArgNo));
+ }
- // Track all of the stores.
- SmallVector<StoreInst *, 16> Stores;
+ SmallVector<OperandBundleDef, 1> OpBundles;
+ CS.getOperandBundlesAsDefs(OpBundles);
- // Scan through the uses recursively to make sure the pointer is always used
- // sanely.
- SmallVector<Value *, 16> WorkList;
- WorkList.insert(WorkList.end(), arg->user_begin(), arg->user_end());
- while (!WorkList.empty()) {
- Value *V = WorkList.back();
- WorkList.pop_back();
- if (isa<GetElementPtrInst>(V) || isa<PHINode>(V)) {
- if (PtrValues.insert(V).second)
- WorkList.insert(WorkList.end(), V->user_begin(), V->user_end());
- } else if (StoreInst *Store = dyn_cast<StoreInst>(V)) {
- Stores.push_back(Store);
- } else if (!isa<LoadInst>(V)) {
- return true;
+ CallSite NewCS;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
+ NewCS = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
+ Args, OpBundles, "", Call);
+ } else {
+ auto *NewCall = CallInst::Create(NF, Args, OpBundles, "", Call);
+ NewCall->setTailCallKind(cast<CallInst>(Call)->getTailCallKind());
+ NewCS = NewCall;
}
- }
+ NewCS.setCallingConv(CS.getCallingConv());
+ NewCS.setAttributes(
+ AttributeList::get(F->getContext(), CallPAL.getFnAttributes(),
+ CallPAL.getRetAttributes(), ArgAttrVec));
+ NewCS->setDebugLoc(Call->getDebugLoc());
+ uint64_t W;
+ if (Call->extractProfTotalWeight(W))
+ NewCS->setProfWeight(W);
+ Args.clear();
+ ArgAttrVec.clear();
-// Check to make sure the pointers aren't captured
- for (StoreInst *Store : Stores)
- if (PtrValues.count(Store->getValueOperand()))
- return true;
+ // Update the callgraph to know that the callsite has been transformed.
+ if (ReplaceCallSite)
+ (*ReplaceCallSite)(CS, NewCS);
- return false;
-}
+ if (!Call->use_empty()) {
+ Call->replaceAllUsesWith(NewCS.getInstruction());
+ NewCS->takeName(Call);
+ }
-/// PromoteArguments - This method checks the specified function to see if there
-/// are any promotable arguments and if it is safe to promote the function (for
-/// example, all callers are direct). If safe to promote some arguments, it
-/// calls the DoPromotion method.
-///
-static CallGraphNode *
-PromoteArguments(CallGraphNode *CGN, CallGraph &CG,
- function_ref<AAResults &(Function &F)> AARGetter,
- unsigned MaxElements) {
- Function *F = CGN->getFunction();
+ // Finally, remove the old call from the program, reducing the use-count of
+ // F.
+ Call->eraseFromParent();
+ }
- // Make sure that it is local to this module.
- if (!F || !F->hasLocalLinkage()) return nullptr;
+ const DataLayout &DL = F->getParent()->getDataLayout();
- // Don't promote arguments for variadic functions. Adding, removing, or
- // changing non-pack parameters can change the classification of pack
- // parameters. Frontends encode that classification at the call site in the
- // IR, while in the callee the classification is determined dynamically based
- // on the number of registers consumed so far.
- if (F->isVarArg()) return nullptr;
+ // Since we have now created the new function, splice the body of the old
+ // function right into the new function, leaving the old rotting hulk of the
+ // function empty.
+ NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList());
- // First check: see if there are any pointer arguments! If not, quick exit.
- SmallVector<Argument*, 16> PointerArgs;
- for (Argument &I : F->args())
- if (I.getType()->isPointerTy())
- PointerArgs.push_back(&I);
- if (PointerArgs.empty()) return nullptr;
+ // Loop over the argument list, transferring uses of the old arguments over to
+ // the new arguments, also transferring over the names as well.
+ //
+ for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(),
+ I2 = NF->arg_begin();
+ I != E; ++I) {
+ if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) {
+ // If this is an unmodified argument, move the name and users over to the
+ // new version.
+ I->replaceAllUsesWith(&*I2);
+ I2->takeName(&*I);
+ ++I2;
+ continue;
+ }
- // Second check: make sure that all callers are direct callers. We can't
- // transform functions that have indirect callers. Also see if the function
- // is self-recursive.
- bool isSelfRecursive = false;
- for (Use &U : F->uses()) {
- CallSite CS(U.getUser());
- // Must be a direct call.
- if (CS.getInstruction() == nullptr || !CS.isCallee(&U)) return nullptr;
-
- if (CS.getInstruction()->getParent()->getParent() == F)
- isSelfRecursive = true;
- }
-
- const DataLayout &DL = F->getParent()->getDataLayout();
+ if (ByValArgsToTransform.count(&*I)) {
+ // In the callee, we create an alloca, and store each of the new incoming
+ // arguments into the alloca.
+ Instruction *InsertPt = &NF->begin()->front();
- AAResults &AAR = AARGetter(*F);
+ // Just add all the struct element types.
+ Type *AgTy = cast<PointerType>(I->getType())->getElementType();
+ Value *TheAlloca = new AllocaInst(AgTy, DL.getAllocaAddrSpace(), nullptr,
+ I->getParamAlignment(), "", InsertPt);
+ StructType *STy = cast<StructType>(AgTy);
+ Value *Idxs[2] = {ConstantInt::get(Type::getInt32Ty(F->getContext()), 0),
+ nullptr};
- // Check to see which arguments are promotable. If an argument is promotable,
- // add it to ArgsToPromote.
- SmallPtrSet<Argument*, 8> ArgsToPromote;
- SmallPtrSet<Argument*, 8> ByValArgsToTransform;
- for (Argument *PtrArg : PointerArgs) {
- Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType();
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
+ Value *Idx = GetElementPtrInst::Create(
+ AgTy, TheAlloca, Idxs, TheAlloca->getName() + "." + Twine(i),
+ InsertPt);
+ I2->setName(I->getName() + "." + Twine(i));
+ new StoreInst(&*I2++, Idx, InsertPt);
+ }
- // Replace sret attribute with noalias. This reduces register pressure by
- // avoiding a register copy.
- if (PtrArg->hasStructRetAttr()) {
- unsigned ArgNo = PtrArg->getArgNo();
- F->setAttributes(
- F->getAttributes()
- .removeAttribute(F->getContext(), ArgNo + 1, Attribute::StructRet)
- .addAttribute(F->getContext(), ArgNo + 1, Attribute::NoAlias));
- for (Use &U : F->uses()) {
- CallSite CS(U.getUser());
- CS.setAttributes(
- CS.getAttributes()
- .removeAttribute(F->getContext(), ArgNo + 1,
- Attribute::StructRet)
- .addAttribute(F->getContext(), ArgNo + 1, Attribute::NoAlias));
+ // Anything that used the arg should now use the alloca.
+ I->replaceAllUsesWith(TheAlloca);
+ TheAlloca->takeName(&*I);
+
+ // If the alloca is used in a call, we must clear the tail flag since
+ // the callee now uses an alloca from the caller.
+ for (User *U : TheAlloca->users()) {
+ CallInst *Call = dyn_cast<CallInst>(U);
+ if (!Call)
+ continue;
+ Call->setTailCall(false);
}
+ continue;
}
- // If this is a byval argument, and if the aggregate type is small, just
- // pass the elements, which is always safe, if the passed value is densely
- // packed or if we can prove the padding bytes are never accessed. This does
- // not apply to inalloca.
- bool isSafeToPromote =
- PtrArg->hasByValAttr() &&
- (isDenselyPacked(AgTy, DL) || !canPaddingBeAccessed(PtrArg));
- if (isSafeToPromote) {
- if (StructType *STy = dyn_cast<StructType>(AgTy)) {
- if (MaxElements > 0 && STy->getNumElements() > MaxElements) {
- DEBUG(dbgs() << "argpromotion disable promoting argument '"
- << PtrArg->getName() << "' because it would require adding more"
- << " than " << MaxElements << " arguments to the function.\n");
- continue;
- }
-
- // If all the elements are single-value types, we can promote it.
- bool AllSimple = true;
- for (const auto *EltTy : STy->elements()) {
- if (!EltTy->isSingleValueType()) {
- AllSimple = false;
- break;
- }
+ if (I->use_empty())
+ continue;
+
+ // Otherwise, if we promoted this argument, then all users are load
+ // instructions (or GEPs with only load users), and all loads should be
+ // using the new argument that we added.
+ ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
+
+ while (!I->use_empty()) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(I->user_back())) {
+ assert(ArgIndices.begin()->second.empty() &&
+ "Load element should sort to front!");
+ I2->setName(I->getName() + ".val");
+ LI->replaceAllUsesWith(&*I2);
+ LI->eraseFromParent();
+ DEBUG(dbgs() << "*** Promoted load of argument '" << I->getName()
+ << "' in function '" << F->getName() << "'\n");
+ } else {
+ GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->user_back());
+ IndicesVector Operands;
+ Operands.reserve(GEP->getNumIndices());
+ for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end();
+ II != IE; ++II)
+ Operands.push_back(cast<ConstantInt>(*II)->getSExtValue());
+
+ // GEPs with a single 0 index can be merged with direct loads
+ if (Operands.size() == 1 && Operands.front() == 0)
+ Operands.clear();
+
+ Function::arg_iterator TheArg = I2;
+ for (ScalarizeTable::iterator It = ArgIndices.begin();
+ It->second != Operands; ++It, ++TheArg) {
+ assert(It != ArgIndices.end() && "GEP not handled??");
}
- // Safe to transform, don't even bother trying to "promote" it.
- // Passing the elements as a scalar will allow sroa to hack on
- // the new alloca we introduce.
- if (AllSimple) {
- ByValArgsToTransform.insert(PtrArg);
- continue;
+ std::string NewName = I->getName();
+ for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+ NewName += "." + utostr(Operands[i]);
}
- }
- }
+ NewName += ".val";
+ TheArg->setName(NewName);
- // If the argument is a recursive type and we're in a recursive
- // function, we could end up infinitely peeling the function argument.
- if (isSelfRecursive) {
- if (StructType *STy = dyn_cast<StructType>(AgTy)) {
- bool RecursiveType = false;
- for (const auto *EltTy : STy->elements()) {
- if (EltTy == PtrArg->getType()) {
- RecursiveType = true;
- break;
- }
+ DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg->getName()
+ << "' of function '" << NF->getName() << "'\n");
+
+ // All of the uses must be load instructions. Replace them all with
+ // the argument specified by ArgNo.
+ while (!GEP->use_empty()) {
+ LoadInst *L = cast<LoadInst>(GEP->user_back());
+ L->replaceAllUsesWith(&*TheArg);
+ L->eraseFromParent();
}
- if (RecursiveType)
- continue;
+ GEP->eraseFromParent();
}
}
-
- // Otherwise, see if we can promote the pointer to its value.
- if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValOrInAllocaAttr(), AAR,
- MaxElements))
- ArgsToPromote.insert(PtrArg);
- }
- // No promotable pointer arguments.
- if (ArgsToPromote.empty() && ByValArgsToTransform.empty())
- return nullptr;
+ // Increment I2 past all of the arguments added for this promoted pointer.
+ std::advance(I2, ArgIndices.size());
+ }
- return DoPromotion(F, ArgsToPromote, ByValArgsToTransform, CG);
+ return NF;
}
/// AllCallersPassInValidPointerForArgument - Return true if we can prove that
/// all callees pass in a valid pointer for the specified function argument.
-static bool AllCallersPassInValidPointerForArgument(Argument *Arg) {
+static bool allCallersPassInValidPointerForArgument(Argument *Arg) {
Function *Callee = Arg->getParent();
const DataLayout &DL = Callee->getParent()->getDataLayout();
@@ -390,26 +473,25 @@ static bool AllCallersPassInValidPointerForArgument(Argument *Arg) {
/// elements in Prefix is the same as the corresponding elements in Longer.
///
/// This means it also returns true when Prefix and Longer are equal!
-static bool IsPrefix(const IndicesVector &Prefix, const IndicesVector &Longer) {
+static bool isPrefix(const IndicesVector &Prefix, const IndicesVector &Longer) {
if (Prefix.size() > Longer.size())
return false;
return std::equal(Prefix.begin(), Prefix.end(), Longer.begin());
}
-
/// Checks if Indices, or a prefix of Indices, is in Set.
-static bool PrefixIn(const IndicesVector &Indices,
+static bool prefixIn(const IndicesVector &Indices,
std::set<IndicesVector> &Set) {
- std::set<IndicesVector>::iterator Low;
- Low = Set.upper_bound(Indices);
- if (Low != Set.begin())
- Low--;
- // Low is now the last element smaller than or equal to Indices. This means
- // it points to a prefix of Indices (possibly Indices itself), if such
- // prefix exists.
- //
- // This load is safe if any prefix of its operands is safe to load.
- return Low != Set.end() && IsPrefix(*Low, Indices);
+ std::set<IndicesVector>::iterator Low;
+ Low = Set.upper_bound(Indices);
+ if (Low != Set.begin())
+ Low--;
+ // Low is now the last element smaller than or equal to Indices. This means
+ // it points to a prefix of Indices (possibly Indices itself), if such
+ // prefix exists.
+ //
+ // This load is safe if any prefix of its operands is safe to load.
+ return Low != Set.end() && isPrefix(*Low, Indices);
}
/// Mark the given indices (ToMark) as safe in the given set of indices
@@ -417,7 +499,7 @@ static bool PrefixIn(const IndicesVector &Indices,
/// is already a prefix of Indices in Safe, Indices are implicitely marked safe
/// already. Furthermore, any indices that Indices is itself a prefix of, are
/// removed from Safe (since they are implicitely safe because of Indices now).
-static void MarkIndicesSafe(const IndicesVector &ToMark,
+static void markIndicesSafe(const IndicesVector &ToMark,
std::set<IndicesVector> &Safe) {
std::set<IndicesVector>::iterator Low;
Low = Safe.upper_bound(ToMark);
@@ -428,7 +510,7 @@ static void MarkIndicesSafe(const IndicesVector &ToMark,
// means it points to a prefix of Indices (possibly Indices itself), if
// such prefix exists.
if (Low != Safe.end()) {
- if (IsPrefix(*Low, ToMark))
+ if (isPrefix(*Low, ToMark))
// If there is already a prefix of these indices (or exactly these
// indices) marked a safe, don't bother adding these indices
return;
@@ -441,7 +523,7 @@ static void MarkIndicesSafe(const IndicesVector &ToMark,
++Low;
// If there we're a prefix of longer index list(s), remove those
std::set<IndicesVector>::iterator End = Safe.end();
- while (Low != End && IsPrefix(ToMark, *Low)) {
+ while (Low != End && isPrefix(ToMark, *Low)) {
std::set<IndicesVector>::iterator Remove = Low;
++Low;
Safe.erase(Remove);
@@ -486,7 +568,7 @@ static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
GEPIndicesSet ToPromote;
// If the pointer is always valid, any load with first index 0 is valid.
- if (isByValOrInAlloca || AllCallersPassInValidPointerForArgument(Arg))
+ if (isByValOrInAlloca || allCallersPassInValidPointerForArgument(Arg))
SafeToUnconditionallyLoad.insert(IndicesVector(1, 0));
// First, iterate the entry block and mark loads of (geps of) arguments as
@@ -512,25 +594,26 @@ static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
return false;
// Indices checked out, mark them as safe
- MarkIndicesSafe(Indices, SafeToUnconditionallyLoad);
+ markIndicesSafe(Indices, SafeToUnconditionallyLoad);
Indices.clear();
}
} else if (V == Arg) {
// Direct loads are equivalent to a GEP with a single 0 index.
- MarkIndicesSafe(IndicesVector(1, 0), SafeToUnconditionallyLoad);
+ markIndicesSafe(IndicesVector(1, 0), SafeToUnconditionallyLoad);
}
}
// Now, iterate all uses of the argument to see if there are any uses that are
// not (GEP+)loads, or any (GEP+)loads that are not safe to promote.
- SmallVector<LoadInst*, 16> Loads;
+ SmallVector<LoadInst *, 16> Loads;
IndicesVector Operands;
for (Use &U : Arg->uses()) {
User *UR = U.getUser();
Operands.clear();
if (LoadInst *LI = dyn_cast<LoadInst>(UR)) {
// Don't hack volatile/atomic loads
- if (!LI->isSimple()) return false;
+ if (!LI->isSimple())
+ return false;
Loads.push_back(LI);
// Direct loads are equivalent to a GEP with a zero index and then a load.
Operands.push_back(0);
@@ -547,30 +630,31 @@ static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
}
// Ensure that all of the indices are constants.
- for (User::op_iterator i = GEP->idx_begin(), e = GEP->idx_end();
- i != e; ++i)
+ for (User::op_iterator i = GEP->idx_begin(), e = GEP->idx_end(); i != e;
+ ++i)
if (ConstantInt *C = dyn_cast<ConstantInt>(*i))
Operands.push_back(C->getSExtValue());
else
- return false; // Not a constant operand GEP!
+ return false; // Not a constant operand GEP!
// Ensure that the only users of the GEP are load instructions.
for (User *GEPU : GEP->users())
if (LoadInst *LI = dyn_cast<LoadInst>(GEPU)) {
// Don't hack volatile/atomic loads
- if (!LI->isSimple()) return false;
+ if (!LI->isSimple())
+ return false;
Loads.push_back(LI);
} else {
// Other uses than load?
return false;
}
} else {
- return false; // Not a load or a GEP.
+ return false; // Not a load or a GEP.
}
// Now, see if it is safe to promote this load / loads of this GEP. Loading
// is safe if Operands, or a prefix of Operands, is marked as safe.
- if (!PrefixIn(Operands, SafeToUnconditionallyLoad))
+ if (!prefixIn(Operands, SafeToUnconditionallyLoad))
return false;
// See if we are already promoting a load with these indices. If not, check
@@ -579,8 +663,10 @@ static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
if (ToPromote.find(Operands) == ToPromote.end()) {
if (MaxElements > 0 && ToPromote.size() == MaxElements) {
DEBUG(dbgs() << "argpromotion not promoting argument '"
- << Arg->getName() << "' because it would require adding more "
- << "than " << MaxElements << " arguments to the function.\n");
+ << Arg->getName()
+ << "' because it would require adding more "
+ << "than " << MaxElements
+ << " arguments to the function.\n");
// We limit aggregate promotion to only promoting up to a fixed number
// of elements of the aggregate.
return false;
@@ -589,7 +675,8 @@ static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
}
}
- if (Loads.empty()) return true; // No users, this is a dead argument.
+ if (Loads.empty())
+ return true; // No users, this is a dead argument.
// Okay, now we know that the argument is only used by load instructions and
// it is safe to unconditionally perform all of them. Use alias analysis to
@@ -598,7 +685,7 @@ static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
// Because there could be several/many load instructions, remember which
// blocks we know to be transparent to the load.
- df_iterator_default_set<BasicBlock*, 16> TranspBlocks;
+ df_iterator_default_set<BasicBlock *, 16> TranspBlocks;
for (LoadInst *Load : Loads) {
// Check to see if the load is invalidated from the start of the block to
@@ -607,7 +694,7 @@ static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
MemoryLocation Loc = MemoryLocation::get(Load);
if (AAR.canInstructionRangeModRef(BB->front(), *Load, Loc, MRI_Mod))
- return false; // Pointer is invalidated!
+ return false; // Pointer is invalidated!
// Now check every path from the entry block to the load for transparency.
// To do this, we perform a depth first search on the inverse CFG from the
@@ -625,416 +712,347 @@ static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
return true;
}
-/// DoPromotion - This method actually performs the promotion of the specified
-/// arguments, and returns the new function. At this point, we know that it's
-/// safe to do so.
-static CallGraphNode *
-DoPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
- SmallPtrSetImpl<Argument *> &ByValArgsToTransform, CallGraph &CG) {
+/// \brief Checks if a type could have padding bytes.
+static bool isDenselyPacked(Type *type, const DataLayout &DL) {
- // Start by computing a new prototype for the function, which is the same as
- // the old function, but has modified arguments.
- FunctionType *FTy = F->getFunctionType();
- std::vector<Type*> Params;
+ // There is no size information, so be conservative.
+ if (!type->isSized())
+ return false;
- typedef std::set<std::pair<Type *, IndicesVector>> ScalarizeTable;
+ // If the alloc size is not equal to the storage size, then there are padding
+ // bytes. For x86_fp80 on x86-64, size: 80 alloc size: 128.
+ if (DL.getTypeSizeInBits(type) != DL.getTypeAllocSizeInBits(type))
+ return false;
- // ScalarizedElements - If we are promoting a pointer that has elements
- // accessed out of it, keep track of which elements are accessed so that we
- // can add one argument for each.
- //
- // Arguments that are directly loaded will have a zero element value here, to
- // handle cases where there are both a direct load and GEP accesses.
- //
- std::map<Argument*, ScalarizeTable> ScalarizedElements;
+ if (!isa<CompositeType>(type))
+ return true;
- // OriginalLoads - Keep track of a representative load instruction from the
- // original function so that we can tell the alias analysis implementation
- // what the new GEP/Load instructions we are inserting look like.
- // We need to keep the original loads for each argument and the elements
- // of the argument that are accessed.
- std::map<std::pair<Argument*, IndicesVector>, LoadInst*> OriginalLoads;
+ // For homogenous sequential types, check for padding within members.
+ if (SequentialType *seqTy = dyn_cast<SequentialType>(type))
+ return isDenselyPacked(seqTy->getElementType(), DL);
- // Attribute - Keep track of the parameter attributes for the arguments
- // that we are *not* promoting. For the ones that we do promote, the parameter
- // attributes are lost
- SmallVector<AttributeSet, 8> AttributesVec;
- const AttributeSet &PAL = F->getAttributes();
+ // Check for padding within and between elements of a struct.
+ StructType *StructTy = cast<StructType>(type);
+ const StructLayout *Layout = DL.getStructLayout(StructTy);
+ uint64_t StartPos = 0;
+ for (unsigned i = 0, E = StructTy->getNumElements(); i < E; ++i) {
+ Type *ElTy = StructTy->getElementType(i);
+ if (!isDenselyPacked(ElTy, DL))
+ return false;
+ if (StartPos != Layout->getElementOffsetInBits(i))
+ return false;
+ StartPos += DL.getTypeAllocSizeInBits(ElTy);
+ }
- // Add any return attributes.
- if (PAL.hasAttributes(AttributeSet::ReturnIndex))
- AttributesVec.push_back(AttributeSet::get(F->getContext(),
- PAL.getRetAttributes()));
+ return true;
+}
- // First, determine the new argument list
- unsigned ArgIndex = 1;
- for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
- ++I, ++ArgIndex) {
- if (ByValArgsToTransform.count(&*I)) {
- // Simple byval argument? Just add all the struct element types.
- Type *AgTy = cast<PointerType>(I->getType())->getElementType();
- StructType *STy = cast<StructType>(AgTy);
- Params.insert(Params.end(), STy->element_begin(), STy->element_end());
- ++NumByValArgsPromoted;
- } else if (!ArgsToPromote.count(&*I)) {
- // Unchanged argument
- Params.push_back(I->getType());
- AttributeSet attrs = PAL.getParamAttributes(ArgIndex);
- if (attrs.hasAttributes(ArgIndex)) {
- AttrBuilder B(attrs, ArgIndex);
- AttributesVec.
- push_back(AttributeSet::get(F->getContext(), Params.size(), B));
- }
- } else if (I->use_empty()) {
- // Dead argument (which are always marked as promotable)
- ++NumArgumentsDead;
- } else {
- // Okay, this is being promoted. This means that the only uses are loads
- // or GEPs which are only used by loads
+/// \brief Checks if the padding bytes of an argument could be accessed.
+static bool canPaddingBeAccessed(Argument *arg) {
- // In this table, we will track which indices are loaded from the argument
- // (where direct loads are tracked as no indices).
- ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
- for (User *U : I->users()) {
- Instruction *UI = cast<Instruction>(U);
- Type *SrcTy;
- if (LoadInst *L = dyn_cast<LoadInst>(UI))
- SrcTy = L->getType();
- else
- SrcTy = cast<GetElementPtrInst>(UI)->getSourceElementType();
- IndicesVector Indices;
- Indices.reserve(UI->getNumOperands() - 1);
- // Since loads will only have a single operand, and GEPs only a single
- // non-index operand, this will record direct loads without any indices,
- // and gep+loads with the GEP indices.
- for (User::op_iterator II = UI->op_begin() + 1, IE = UI->op_end();
- II != IE; ++II)
- Indices.push_back(cast<ConstantInt>(*II)->getSExtValue());
- // GEPs with a single 0 index can be merged with direct loads
- if (Indices.size() == 1 && Indices.front() == 0)
- Indices.clear();
- ArgIndices.insert(std::make_pair(SrcTy, Indices));
- LoadInst *OrigLoad;
- if (LoadInst *L = dyn_cast<LoadInst>(UI))
- OrigLoad = L;
- else
- // Take any load, we will use it only to update Alias Analysis
- OrigLoad = cast<LoadInst>(UI->user_back());
- OriginalLoads[std::make_pair(&*I, Indices)] = OrigLoad;
- }
+ assert(arg->hasByValAttr());
- // Add a parameter to the function for each element passed in.
- for (const auto &ArgIndex : ArgIndices) {
- // not allowed to dereference ->begin() if size() is 0
- Params.push_back(GetElementPtrInst::getIndexedType(
- cast<PointerType>(I->getType()->getScalarType())->getElementType(),
- ArgIndex.second));
- assert(Params.back());
- }
+ // Track all the pointers to the argument to make sure they are not captured.
+ SmallPtrSet<Value *, 16> PtrValues;
+ PtrValues.insert(arg);
- if (ArgIndices.size() == 1 && ArgIndices.begin()->second.empty())
- ++NumArgumentsPromoted;
- else
- ++NumAggregatesPromoted;
+ // Track all of the stores.
+ SmallVector<StoreInst *, 16> Stores;
+
+ // Scan through the uses recursively to make sure the pointer is always used
+ // sanely.
+ SmallVector<Value *, 16> WorkList;
+ WorkList.insert(WorkList.end(), arg->user_begin(), arg->user_end());
+ while (!WorkList.empty()) {
+ Value *V = WorkList.back();
+ WorkList.pop_back();
+ if (isa<GetElementPtrInst>(V) || isa<PHINode>(V)) {
+ if (PtrValues.insert(V).second)
+ WorkList.insert(WorkList.end(), V->user_begin(), V->user_end());
+ } else if (StoreInst *Store = dyn_cast<StoreInst>(V)) {
+ Stores.push_back(Store);
+ } else if (!isa<LoadInst>(V)) {
+ return true;
}
}
- // Add any function attributes.
- if (PAL.hasAttributes(AttributeSet::FunctionIndex))
- AttributesVec.push_back(AttributeSet::get(FTy->getContext(),
- PAL.getFnAttributes()));
+ // Check to make sure the pointers aren't captured
+ for (StoreInst *Store : Stores)
+ if (PtrValues.count(Store->getValueOperand()))
+ return true;
- Type *RetTy = FTy->getReturnType();
+ return false;
+}
- // Construct the new function type using the new arguments.
- FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg());
+/// PromoteArguments - This method checks the specified function to see if there
+/// are any promotable arguments and if it is safe to promote the function (for
+/// example, all callers are direct). If safe to promote some arguments, it
+/// calls the DoPromotion method.
+///
+static Function *
+promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
+ unsigned MaxElements,
+ Optional<function_ref<void(CallSite OldCS, CallSite NewCS)>>
+ ReplaceCallSite) {
+ // Make sure that it is local to this module.
+ if (!F->hasLocalLinkage())
+ return nullptr;
- // Create the new function body and insert it into the module.
- Function *NF = Function::Create(NFTy, F->getLinkage(), F->getName());
- NF->copyAttributesFrom(F);
+ // Don't promote arguments for variadic functions. Adding, removing, or
+ // changing non-pack parameters can change the classification of pack
+ // parameters. Frontends encode that classification at the call site in the
+ // IR, while in the callee the classification is determined dynamically based
+ // on the number of registers consumed so far.
+ if (F->isVarArg())
+ return nullptr;
- // Patch the pointer to LLVM function in debug info descriptor.
- NF->setSubprogram(F->getSubprogram());
- F->setSubprogram(nullptr);
+ // First check: see if there are any pointer arguments! If not, quick exit.
+ SmallVector<Argument *, 16> PointerArgs;
+ for (Argument &I : F->args())
+ if (I.getType()->isPointerTy())
+ PointerArgs.push_back(&I);
+ if (PointerArgs.empty())
+ return nullptr;
- DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n"
- << "From: " << *F);
-
- // Recompute the parameter attributes list based on the new arguments for
- // the function.
- NF->setAttributes(AttributeSet::get(F->getContext(), AttributesVec));
- AttributesVec.clear();
+ // Second check: make sure that all callers are direct callers. We can't
+ // transform functions that have indirect callers. Also see if the function
+ // is self-recursive.
+ bool isSelfRecursive = false;
+ for (Use &U : F->uses()) {
+ CallSite CS(U.getUser());
+ // Must be a direct call.
+ if (CS.getInstruction() == nullptr || !CS.isCallee(&U))
+ return nullptr;
- F->getParent()->getFunctionList().insert(F->getIterator(), NF);
- NF->takeName(F);
+ if (CS.getInstruction()->getParent()->getParent() == F)
+ isSelfRecursive = true;
+ }
- // Get a new callgraph node for NF.
- CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF);
+ const DataLayout &DL = F->getParent()->getDataLayout();
- // Loop over all of the callers of the function, transforming the call sites
- // to pass in the loaded pointers.
- //
- SmallVector<Value*, 16> Args;
- while (!F->use_empty()) {
- CallSite CS(F->user_back());
- assert(CS.getCalledFunction() == F);
- Instruction *Call = CS.getInstruction();
- const AttributeSet &CallPAL = CS.getAttributes();
+ AAResults &AAR = AARGetter(*F);
- // Add any return attributes.
- if (CallPAL.hasAttributes(AttributeSet::ReturnIndex))
- AttributesVec.push_back(AttributeSet::get(F->getContext(),
- CallPAL.getRetAttributes()));
+ // Check to see which arguments are promotable. If an argument is promotable,
+ // add it to ArgsToPromote.
+ SmallPtrSet<Argument *, 8> ArgsToPromote;
+ SmallPtrSet<Argument *, 8> ByValArgsToTransform;
+ for (Argument *PtrArg : PointerArgs) {
+ Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType();
- // Loop over the operands, inserting GEP and loads in the caller as
- // appropriate.
- CallSite::arg_iterator AI = CS.arg_begin();
- ArgIndex = 1;
- for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
- I != E; ++I, ++AI, ++ArgIndex)
- if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) {
- Args.push_back(*AI); // Unmodified argument
+ // Replace sret attribute with noalias. This reduces register pressure by
+ // avoiding a register copy.
+ if (PtrArg->hasStructRetAttr()) {
+ unsigned ArgNo = PtrArg->getArgNo();
+ F->removeParamAttr(ArgNo, Attribute::StructRet);
+ F->addParamAttr(ArgNo, Attribute::NoAlias);
+ for (Use &U : F->uses()) {
+ CallSite CS(U.getUser());
+ CS.removeParamAttr(ArgNo, Attribute::StructRet);
+ CS.addParamAttr(ArgNo, Attribute::NoAlias);
+ }
+ }
- if (CallPAL.hasAttributes(ArgIndex)) {
- AttrBuilder B(CallPAL, ArgIndex);
- AttributesVec.
- push_back(AttributeSet::get(F->getContext(), Args.size(), B));
- }
- } else if (ByValArgsToTransform.count(&*I)) {
- // Emit a GEP and load for each element of the struct.
- Type *AgTy = cast<PointerType>(I->getType())->getElementType();
- StructType *STy = cast<StructType>(AgTy);
- Value *Idxs[2] = {
- ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr };
- for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
- Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
- Value *Idx = GetElementPtrInst::Create(
- STy, *AI, Idxs, (*AI)->getName() + "." + Twine(i), Call);
- // TODO: Tell AA about the new values?
- Args.push_back(new LoadInst(Idx, Idx->getName()+".val", Call));
+ // If this is a byval argument, and if the aggregate type is small, just
+ // pass the elements, which is always safe, if the passed value is densely
+ // packed or if we can prove the padding bytes are never accessed. This does
+ // not apply to inalloca.
+ bool isSafeToPromote =
+ PtrArg->hasByValAttr() &&
+ (isDenselyPacked(AgTy, DL) || !canPaddingBeAccessed(PtrArg));
+ if (isSafeToPromote) {
+ if (StructType *STy = dyn_cast<StructType>(AgTy)) {
+ if (MaxElements > 0 && STy->getNumElements() > MaxElements) {
+ DEBUG(dbgs() << "argpromotion disable promoting argument '"
+ << PtrArg->getName()
+ << "' because it would require adding more"
+ << " than " << MaxElements
+ << " arguments to the function.\n");
+ continue;
}
- } else if (!I->use_empty()) {
- // Non-dead argument: insert GEPs and loads as appropriate.
- ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
- // Store the Value* version of the indices in here, but declare it now
- // for reuse.
- std::vector<Value*> Ops;
- for (const auto &ArgIndex : ArgIndices) {
- Value *V = *AI;
- LoadInst *OrigLoad =
- OriginalLoads[std::make_pair(&*I, ArgIndex.second)];
- if (!ArgIndex.second.empty()) {
- Ops.reserve(ArgIndex.second.size());
- Type *ElTy = V->getType();
- for (unsigned long II : ArgIndex.second) {
- // Use i32 to index structs, and i64 for others (pointers/arrays).
- // This satisfies GEP constraints.
- Type *IdxTy = (ElTy->isStructTy() ?
- Type::getInt32Ty(F->getContext()) :
- Type::getInt64Ty(F->getContext()));
- Ops.push_back(ConstantInt::get(IdxTy, II));
- // Keep track of the type we're currently indexing.
- if (auto *ElPTy = dyn_cast<PointerType>(ElTy))
- ElTy = ElPTy->getElementType();
- else
- ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(II);
- }
- // And create a GEP to extract those indices.
- V = GetElementPtrInst::Create(ArgIndex.first, V, Ops,
- V->getName() + ".idx", Call);
- Ops.clear();
+
+ // If all the elements are single-value types, we can promote it.
+ bool AllSimple = true;
+ for (const auto *EltTy : STy->elements()) {
+ if (!EltTy->isSingleValueType()) {
+ AllSimple = false;
+ break;
}
- // Since we're replacing a load make sure we take the alignment
- // of the previous load.
- LoadInst *newLoad = new LoadInst(V, V->getName()+".val", Call);
- newLoad->setAlignment(OrigLoad->getAlignment());
- // Transfer the AA info too.
- AAMDNodes AAInfo;
- OrigLoad->getAAMetadata(AAInfo);
- newLoad->setAAMetadata(AAInfo);
+ }
- Args.push_back(newLoad);
+ // Safe to transform, don't even bother trying to "promote" it.
+ // Passing the elements as a scalar will allow sroa to hack on
+ // the new alloca we introduce.
+ if (AllSimple) {
+ ByValArgsToTransform.insert(PtrArg);
+ continue;
}
}
+ }
- // Push any varargs arguments on the list.
- for (; AI != CS.arg_end(); ++AI, ++ArgIndex) {
- Args.push_back(*AI);
- if (CallPAL.hasAttributes(ArgIndex)) {
- AttrBuilder B(CallPAL, ArgIndex);
- AttributesVec.
- push_back(AttributeSet::get(F->getContext(), Args.size(), B));
+ // If the argument is a recursive type and we're in a recursive
+ // function, we could end up infinitely peeling the function argument.
+ if (isSelfRecursive) {
+ if (StructType *STy = dyn_cast<StructType>(AgTy)) {
+ bool RecursiveType = false;
+ for (const auto *EltTy : STy->elements()) {
+ if (EltTy == PtrArg->getType()) {
+ RecursiveType = true;
+ break;
+ }
+ }
+ if (RecursiveType)
+ continue;
}
}
- // Add any function attributes.
- if (CallPAL.hasAttributes(AttributeSet::FunctionIndex))
- AttributesVec.push_back(AttributeSet::get(Call->getContext(),
- CallPAL.getFnAttributes()));
+ // Otherwise, see if we can promote the pointer to its value.
+ if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValOrInAllocaAttr(), AAR,
+ MaxElements))
+ ArgsToPromote.insert(PtrArg);
+ }
- SmallVector<OperandBundleDef, 1> OpBundles;
- CS.getOperandBundlesAsDefs(OpBundles);
+ // No promotable pointer arguments.
+ if (ArgsToPromote.empty() && ByValArgsToTransform.empty())
+ return nullptr;
- Instruction *New;
- if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
- New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
- Args, OpBundles, "", Call);
- cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
- cast<InvokeInst>(New)->setAttributes(AttributeSet::get(II->getContext(),
- AttributesVec));
- } else {
- New = CallInst::Create(NF, Args, OpBundles, "", Call);
- cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
- cast<CallInst>(New)->setAttributes(AttributeSet::get(New->getContext(),
- AttributesVec));
- cast<CallInst>(New)->setTailCallKind(
- cast<CallInst>(Call)->getTailCallKind());
- }
- New->setDebugLoc(Call->getDebugLoc());
- Args.clear();
- AttributesVec.clear();
+ return doPromotion(F, ArgsToPromote, ByValArgsToTransform, ReplaceCallSite);
+}
- // Update the callgraph to know that the callsite has been transformed.
- CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()];
- CalleeNode->replaceCallEdge(CS, CallSite(New), NF_CGN);
+PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C,
+ CGSCCAnalysisManager &AM,
+ LazyCallGraph &CG,
+ CGSCCUpdateResult &UR) {
+ bool Changed = false, LocalChange;
- if (!Call->use_empty()) {
- Call->replaceAllUsesWith(New);
- New->takeName(Call);
+ // Iterate until we stop promoting from this SCC.
+ do {
+ LocalChange = false;
+
+ for (LazyCallGraph::Node &N : C) {
+ Function &OldF = N.getFunction();
+
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
+ // FIXME: This lambda must only be used with this function. We should
+ // skip the lambda and just get the AA results directly.
+ auto AARGetter = [&](Function &F) -> AAResults & {
+ assert(&F == &OldF && "Called with an unexpected function!");
+ return FAM.getResult<AAManager>(F);
+ };
+
+ Function *NewF = promoteArguments(&OldF, AARGetter, 3u, None);
+ if (!NewF)
+ continue;
+ LocalChange = true;
+
+ // Directly substitute the functions in the call graph. Note that this
+ // requires the old function to be completely dead and completely
+ // replaced by the new function. It does no call graph updates, it merely
+ // swaps out the particular function mapped to a particular node in the
+ // graph.
+ C.getOuterRefSCC().replaceNodeFunction(N, *NewF);
+ OldF.eraseFromParent();
}
- // Finally, remove the old call from the program, reducing the use-count of
- // F.
- Call->eraseFromParent();
- }
+ Changed |= LocalChange;
+ } while (LocalChange);
- // Since we have now created the new function, splice the body of the old
- // function right into the new function, leaving the old rotting hulk of the
- // function empty.
- NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList());
+ if (!Changed)
+ return PreservedAnalyses::all();
- // Loop over the argument list, transferring uses of the old arguments over to
- // the new arguments, also transferring over the names as well.
- //
- for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(),
- I2 = NF->arg_begin(); I != E; ++I) {
- if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) {
- // If this is an unmodified argument, move the name and users over to the
- // new version.
- I->replaceAllUsesWith(&*I2);
- I2->takeName(&*I);
- ++I2;
- continue;
- }
-
- if (ByValArgsToTransform.count(&*I)) {
- // In the callee, we create an alloca, and store each of the new incoming
- // arguments into the alloca.
- Instruction *InsertPt = &NF->begin()->front();
+ return PreservedAnalyses::none();
+}
- // Just add all the struct element types.
- Type *AgTy = cast<PointerType>(I->getType())->getElementType();
- Value *TheAlloca = new AllocaInst(AgTy, nullptr, "", InsertPt);
- StructType *STy = cast<StructType>(AgTy);
- Value *Idxs[2] = {
- ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr };
+namespace {
+/// ArgPromotion - The 'by reference' to 'by value' argument promotion pass.
+///
+struct ArgPromotion : public CallGraphSCCPass {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ getAAResultsAnalysisUsage(AU);
+ CallGraphSCCPass::getAnalysisUsage(AU);
+ }
- for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
- Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
- Value *Idx = GetElementPtrInst::Create(
- AgTy, TheAlloca, Idxs, TheAlloca->getName() + "." + Twine(i),
- InsertPt);
- I2->setName(I->getName()+"."+Twine(i));
- new StoreInst(&*I2++, Idx, InsertPt);
- }
+ bool runOnSCC(CallGraphSCC &SCC) override;
+ static char ID; // Pass identification, replacement for typeid
+ explicit ArgPromotion(unsigned MaxElements = 3)
+ : CallGraphSCCPass(ID), MaxElements(MaxElements) {
+ initializeArgPromotionPass(*PassRegistry::getPassRegistry());
+ }
- // Anything that used the arg should now use the alloca.
- I->replaceAllUsesWith(TheAlloca);
- TheAlloca->takeName(&*I);
+private:
+ using llvm::Pass::doInitialization;
+ bool doInitialization(CallGraph &CG) override;
+ /// The maximum number of elements to expand, or 0 for unlimited.
+ unsigned MaxElements;
+};
+}
- // If the alloca is used in a call, we must clear the tail flag since
- // the callee now uses an alloca from the caller.
- for (User *U : TheAlloca->users()) {
- CallInst *Call = dyn_cast<CallInst>(U);
- if (!Call)
- continue;
- Call->setTailCall(false);
- }
- continue;
- }
+char ArgPromotion::ID = 0;
+INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion",
+ "Promote 'by reference' arguments to scalars", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(ArgPromotion, "argpromotion",
+ "Promote 'by reference' arguments to scalars", false, false)
- if (I->use_empty())
- continue;
+Pass *llvm::createArgumentPromotionPass(unsigned MaxElements) {
+ return new ArgPromotion(MaxElements);
+}
- // Otherwise, if we promoted this argument, then all users are load
- // instructions (or GEPs with only load users), and all loads should be
- // using the new argument that we added.
- ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
+bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) {
+ if (skipSCC(SCC))
+ return false;
- while (!I->use_empty()) {
- if (LoadInst *LI = dyn_cast<LoadInst>(I->user_back())) {
- assert(ArgIndices.begin()->second.empty() &&
- "Load element should sort to front!");
- I2->setName(I->getName()+".val");
- LI->replaceAllUsesWith(&*I2);
- LI->eraseFromParent();
- DEBUG(dbgs() << "*** Promoted load of argument '" << I->getName()
- << "' in function '" << F->getName() << "'\n");
- } else {
- GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->user_back());
- IndicesVector Operands;
- Operands.reserve(GEP->getNumIndices());
- for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end();
- II != IE; ++II)
- Operands.push_back(cast<ConstantInt>(*II)->getSExtValue());
+ // Get the callgraph information that we need to update to reflect our
+ // changes.
+ CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
- // GEPs with a single 0 index can be merged with direct loads
- if (Operands.size() == 1 && Operands.front() == 0)
- Operands.clear();
+ LegacyAARGetter AARGetter(*this);
- Function::arg_iterator TheArg = I2;
- for (ScalarizeTable::iterator It = ArgIndices.begin();
- It->second != Operands; ++It, ++TheArg) {
- assert(It != ArgIndices.end() && "GEP not handled??");
- }
+ bool Changed = false, LocalChange;
- std::string NewName = I->getName();
- for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
- NewName += "." + utostr(Operands[i]);
- }
- NewName += ".val";
- TheArg->setName(NewName);
+ // Iterate until we stop promoting from this SCC.
+ do {
+ LocalChange = false;
+ // Attempt to promote arguments from all functions in this SCC.
+ for (CallGraphNode *OldNode : SCC) {
+ Function *OldF = OldNode->getFunction();
+ if (!OldF)
+ continue;
+
+ auto ReplaceCallSite = [&](CallSite OldCS, CallSite NewCS) {
+ Function *Caller = OldCS.getInstruction()->getParent()->getParent();
+ CallGraphNode *NewCalleeNode =
+ CG.getOrInsertFunction(NewCS.getCalledFunction());
+ CallGraphNode *CallerNode = CG[Caller];
+ CallerNode->replaceCallEdge(OldCS, NewCS, NewCalleeNode);
+ };
+
+ if (Function *NewF = promoteArguments(OldF, AARGetter, MaxElements,
+ {ReplaceCallSite})) {
+ LocalChange = true;
- DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg->getName()
- << "' of function '" << NF->getName() << "'\n");
+ // Update the call graph for the newly promoted function.
+ CallGraphNode *NewNode = CG.getOrInsertFunction(NewF);
+ NewNode->stealCalledFunctionsFrom(OldNode);
+ if (OldNode->getNumReferences() == 0)
+ delete CG.removeFunctionFromModule(OldNode);
+ else
+ OldF->setLinkage(Function::ExternalLinkage);
- // All of the uses must be load instructions. Replace them all with
- // the argument specified by ArgNo.
- while (!GEP->use_empty()) {
- LoadInst *L = cast<LoadInst>(GEP->user_back());
- L->replaceAllUsesWith(&*TheArg);
- L->eraseFromParent();
- }
- GEP->eraseFromParent();
+ // And updat ethe SCC we're iterating as well.
+ SCC.ReplaceNode(OldNode, NewNode);
}
}
+ // Remember that we changed something.
+ Changed |= LocalChange;
+ } while (LocalChange);
- // Increment I2 past all of the arguments added for this promoted pointer.
- std::advance(I2, ArgIndices.size());
- }
-
- NF_CGN->stealCalledFunctionsFrom(CG[F]);
-
- // Now that the old function is dead, delete it. If there is a dangling
- // reference to the CallgraphNode, just leave the dead function around for
- // someone else to nuke.
- CallGraphNode *CGN = CG[F];
- if (CGN->getNumReferences() == 0)
- delete CG.removeFunctionFromModule(CGN);
- else
- F->setLinkage(Function::ExternalLinkage);
-
- return NF_CGN;
+ return Changed;
}
bool ArgPromotion::doInitialization(CallGraph &CG) {
diff --git a/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
index d75ed20..62b5a9c 100644
--- a/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
@@ -60,6 +60,23 @@ static bool IsBetterCanonical(const GlobalVariable &A,
return A.hasGlobalUnnamedAddr();
}
+static bool hasMetadataOtherThanDebugLoc(const GlobalVariable *GV) {
+ SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
+ GV->getAllMetadata(MDs);
+ for (const auto &V : MDs)
+ if (V.first != LLVMContext::MD_dbg)
+ return true;
+ return false;
+}
+
+static void copyDebugLocMetadata(const GlobalVariable *From,
+ GlobalVariable *To) {
+ SmallVector<DIGlobalVariableExpression *, 1> MDs;
+ From->getDebugInfo(MDs);
+ for (auto MD : MDs)
+ To->addDebugInfo(MD);
+}
+
static unsigned getAlignment(GlobalVariable *GV) {
unsigned Align = GV->getAlignment();
if (Align)
@@ -113,6 +130,10 @@ static bool mergeConstants(Module &M) {
if (GV->isWeakForLinker())
continue;
+ // Don't touch globals with metadata other then !dbg.
+ if (hasMetadataOtherThanDebugLoc(GV))
+ continue;
+
Constant *Init = GV->getInitializer();
// Check to see if the initializer is already known.
@@ -155,6 +176,9 @@ static bool mergeConstants(Module &M) {
if (!Slot->hasGlobalUnnamedAddr() && !GV->hasGlobalUnnamedAddr())
continue;
+ if (hasMetadataOtherThanDebugLoc(GV))
+ continue;
+
if (!GV->hasGlobalUnnamedAddr())
Slot->setUnnamedAddr(GlobalValue::UnnamedAddr::None);
@@ -178,6 +202,8 @@ static bool mergeConstants(Module &M) {
getAlignment(Replacements[i].second)));
}
+ copyDebugLocMetadata(Replacements[i].first, Replacements[i].second);
+
// Eliminate any uses of the dead global.
Replacements[i].first->replaceAllUsesWith(Replacements[i].second);
diff --git a/contrib/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp b/contrib/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp
index ba2e60d..d94aa5d 100644
--- a/contrib/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp
@@ -95,11 +95,25 @@ void CrossDSOCFI::buildCFICheck(Module &M) {
}
}
+ NamedMDNode *CfiFunctionsMD = M.getNamedMetadata("cfi.functions");
+ if (CfiFunctionsMD) {
+ for (auto Func : CfiFunctionsMD->operands()) {
+ assert(Func->getNumOperands() >= 2);
+ for (unsigned I = 2; I < Func->getNumOperands(); ++I)
+ if (ConstantInt *TypeId =
+ extractNumericTypeId(cast<MDNode>(Func->getOperand(I).get())))
+ TypeIds.insert(TypeId->getZExtValue());
+ }
+ }
+
LLVMContext &Ctx = M.getContext();
Constant *C = M.getOrInsertFunction(
"__cfi_check", Type::getVoidTy(Ctx), Type::getInt64Ty(Ctx),
- Type::getInt8PtrTy(Ctx), Type::getInt8PtrTy(Ctx), nullptr);
+ Type::getInt8PtrTy(Ctx), Type::getInt8PtrTy(Ctx));
Function *F = dyn_cast<Function>(C);
+ // Take over the existing function. The frontend emits a weak stub so that the
+ // linker knows about the symbol; this pass replaces the function body.
+ F->deleteBody();
F->setAlignment(4096);
auto args = F->arg_begin();
Value &CallSiteTypeId = *(args++);
@@ -117,7 +131,7 @@ void CrossDSOCFI::buildCFICheck(Module &M) {
IRBuilder<> IRBFail(TrapBB);
Constant *CFICheckFailFn = M.getOrInsertFunction(
"__cfi_check_fail", Type::getVoidTy(Ctx), Type::getInt8PtrTy(Ctx),
- Type::getInt8PtrTy(Ctx), nullptr);
+ Type::getInt8PtrTy(Ctx));
IRBFail.CreateCall(CFICheckFailFn, {&CFICheckFailData, &Addr});
IRBFail.CreateBr(ExitBB);
diff --git a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 1a5ed46..8e26849 100644
--- a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -166,41 +166,40 @@ bool DeadArgumentEliminationPass::DeleteDeadVarargs(Function &Fn) {
Args.assign(CS.arg_begin(), CS.arg_begin() + NumArgs);
// Drop any attributes that were on the vararg arguments.
- AttributeSet PAL = CS.getAttributes();
- if (!PAL.isEmpty() && PAL.getSlotIndex(PAL.getNumSlots() - 1) > NumArgs) {
- SmallVector<AttributeSet, 8> AttributesVec;
- for (unsigned i = 0; PAL.getSlotIndex(i) <= NumArgs; ++i)
- AttributesVec.push_back(PAL.getSlotAttributes(i));
- if (PAL.hasAttributes(AttributeSet::FunctionIndex))
- AttributesVec.push_back(AttributeSet::get(Fn.getContext(),
- PAL.getFnAttributes()));
- PAL = AttributeSet::get(Fn.getContext(), AttributesVec);
+ AttributeList PAL = CS.getAttributes();
+ if (!PAL.isEmpty()) {
+ SmallVector<AttributeSet, 8> ArgAttrs;
+ for (unsigned ArgNo = 0; ArgNo < NumArgs; ++ArgNo)
+ ArgAttrs.push_back(PAL.getParamAttributes(ArgNo));
+ PAL = AttributeList::get(Fn.getContext(), PAL.getFnAttributes(),
+ PAL.getRetAttributes(), ArgAttrs);
}
SmallVector<OperandBundleDef, 1> OpBundles;
CS.getOperandBundlesAsDefs(OpBundles);
- Instruction *New;
+ CallSite NewCS;
if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
- New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
- Args, OpBundles, "", Call);
- cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
- cast<InvokeInst>(New)->setAttributes(PAL);
+ NewCS = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
+ Args, OpBundles, "", Call);
} else {
- New = CallInst::Create(NF, Args, OpBundles, "", Call);
- cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
- cast<CallInst>(New)->setAttributes(PAL);
- cast<CallInst>(New)->setTailCallKind(
- cast<CallInst>(Call)->getTailCallKind());
+ NewCS = CallInst::Create(NF, Args, OpBundles, "", Call);
+ cast<CallInst>(NewCS.getInstruction())
+ ->setTailCallKind(cast<CallInst>(Call)->getTailCallKind());
}
- New->setDebugLoc(Call->getDebugLoc());
+ NewCS.setCallingConv(CS.getCallingConv());
+ NewCS.setAttributes(PAL);
+ NewCS->setDebugLoc(Call->getDebugLoc());
+ uint64_t W;
+ if (Call->extractProfTotalWeight(W))
+ NewCS->setProfWeight(W);
Args.clear();
if (!Call->use_empty())
- Call->replaceAllUsesWith(New);
+ Call->replaceAllUsesWith(NewCS.getInstruction());
- New->takeName(Call);
+ NewCS->takeName(Call);
// Finally, remove the old call from the program, reducing the use-count of
// F.
@@ -681,8 +680,8 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
bool HasLiveReturnedArg = false;
// Set up to build a new list of parameter attributes.
- SmallVector<AttributeSet, 8> AttributesVec;
- const AttributeSet &PAL = F->getAttributes();
+ SmallVector<AttributeSet, 8> ArgAttrVec;
+ const AttributeList &PAL = F->getAttributes();
// Remember which arguments are still alive.
SmallVector<bool, 10> ArgAlive(FTy->getNumParams(), false);
@@ -696,16 +695,8 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
if (LiveValues.erase(Arg)) {
Params.push_back(I->getType());
ArgAlive[i] = true;
-
- // Get the original parameter attributes (skipping the first one, that is
- // for the return value.
- if (PAL.hasAttributes(i + 1)) {
- AttrBuilder B(PAL, i + 1);
- if (B.contains(Attribute::Returned))
- HasLiveReturnedArg = true;
- AttributesVec.
- push_back(AttributeSet::get(F->getContext(), Params.size(), B));
- }
+ ArgAttrVec.push_back(PAL.getParamAttributes(i));
+ HasLiveReturnedArg |= PAL.hasParamAttribute(i, Attribute::Returned);
} else {
++NumArgumentsEliminated;
DEBUG(dbgs() << "DeadArgumentEliminationPass - Removing argument " << i
@@ -779,30 +770,24 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
assert(NRetTy && "No new return type found?");
// The existing function return attributes.
- AttributeSet RAttrs = PAL.getRetAttributes();
+ AttrBuilder RAttrs(PAL.getRetAttributes());
// Remove any incompatible attributes, but only if we removed all return
// values. Otherwise, ensure that we don't have any conflicting attributes
// here. Currently, this should not be possible, but special handling might be
// required when new return value attributes are added.
if (NRetTy->isVoidTy())
- RAttrs = RAttrs.removeAttributes(NRetTy->getContext(),
- AttributeSet::ReturnIndex,
- AttributeFuncs::typeIncompatible(NRetTy));
+ RAttrs.remove(AttributeFuncs::typeIncompatible(NRetTy));
else
- assert(!AttrBuilder(RAttrs, AttributeSet::ReturnIndex).
- overlaps(AttributeFuncs::typeIncompatible(NRetTy)) &&
+ assert(!RAttrs.overlaps(AttributeFuncs::typeIncompatible(NRetTy)) &&
"Return attributes no longer compatible?");
- if (RAttrs.hasAttributes(AttributeSet::ReturnIndex))
- AttributesVec.push_back(AttributeSet::get(NRetTy->getContext(), RAttrs));
-
- if (PAL.hasAttributes(AttributeSet::FunctionIndex))
- AttributesVec.push_back(AttributeSet::get(F->getContext(),
- PAL.getFnAttributes()));
+ AttributeSet RetAttrs = AttributeSet::get(F->getContext(), RAttrs);
// Reconstruct the AttributesList based on the vector we constructed.
- AttributeSet NewPAL = AttributeSet::get(F->getContext(), AttributesVec);
+ assert(ArgAttrVec.size() == Params.size());
+ AttributeList NewPAL = AttributeList::get(
+ F->getContext(), PAL.getFnAttributes(), RetAttrs, ArgAttrVec);
// Create the new function type based on the recomputed parameters.
FunctionType *NFTy = FunctionType::get(NRetTy, Params, FTy->isVarArg());
@@ -829,18 +814,14 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
CallSite CS(F->user_back());
Instruction *Call = CS.getInstruction();
- AttributesVec.clear();
- const AttributeSet &CallPAL = CS.getAttributes();
-
- // The call return attributes.
- AttributeSet RAttrs = CallPAL.getRetAttributes();
+ ArgAttrVec.clear();
+ const AttributeList &CallPAL = CS.getAttributes();
- // Adjust in case the function was changed to return void.
- RAttrs = RAttrs.removeAttributes(NRetTy->getContext(),
- AttributeSet::ReturnIndex,
- AttributeFuncs::typeIncompatible(NF->getReturnType()));
- if (RAttrs.hasAttributes(AttributeSet::ReturnIndex))
- AttributesVec.push_back(AttributeSet::get(NF->getContext(), RAttrs));
+ // Adjust the call return attributes in case the function was changed to
+ // return void.
+ AttrBuilder RAttrs(CallPAL.getRetAttributes());
+ RAttrs.remove(AttributeFuncs::typeIncompatible(NRetTy));
+ AttributeSet RetAttrs = AttributeSet::get(F->getContext(), RAttrs);
// Declare these outside of the loops, so we can reuse them for the second
// loop, which loops the varargs.
@@ -852,57 +833,55 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
if (ArgAlive[i]) {
Args.push_back(*I);
// Get original parameter attributes, but skip return attributes.
- if (CallPAL.hasAttributes(i + 1)) {
- AttrBuilder B(CallPAL, i + 1);
+ AttributeSet Attrs = CallPAL.getParamAttributes(i);
+ if (NRetTy != RetTy && Attrs.hasAttribute(Attribute::Returned)) {
// If the return type has changed, then get rid of 'returned' on the
// call site. The alternative is to make all 'returned' attributes on
// call sites keep the return value alive just like 'returned'
- // attributes on function declaration but it's less clearly a win
- // and this is not an expected case anyway
- if (NRetTy != RetTy && B.contains(Attribute::Returned))
- B.removeAttribute(Attribute::Returned);
- AttributesVec.
- push_back(AttributeSet::get(F->getContext(), Args.size(), B));
+ // attributes on function declaration but it's less clearly a win and
+ // this is not an expected case anyway
+ ArgAttrVec.push_back(AttributeSet::get(
+ F->getContext(),
+ AttrBuilder(Attrs).removeAttribute(Attribute::Returned)));
+ } else {
+ // Otherwise, use the original attributes.
+ ArgAttrVec.push_back(Attrs);
}
}
// Push any varargs arguments on the list. Don't forget their attributes.
for (CallSite::arg_iterator E = CS.arg_end(); I != E; ++I, ++i) {
Args.push_back(*I);
- if (CallPAL.hasAttributes(i + 1)) {
- AttrBuilder B(CallPAL, i + 1);
- AttributesVec.
- push_back(AttributeSet::get(F->getContext(), Args.size(), B));
- }
+ ArgAttrVec.push_back(CallPAL.getParamAttributes(i));
}
- if (CallPAL.hasAttributes(AttributeSet::FunctionIndex))
- AttributesVec.push_back(AttributeSet::get(Call->getContext(),
- CallPAL.getFnAttributes()));
-
// Reconstruct the AttributesList based on the vector we constructed.
- AttributeSet NewCallPAL = AttributeSet::get(F->getContext(), AttributesVec);
+ assert(ArgAttrVec.size() == Args.size());
+ AttributeList NewCallPAL = AttributeList::get(
+ F->getContext(), CallPAL.getFnAttributes(), RetAttrs, ArgAttrVec);
SmallVector<OperandBundleDef, 1> OpBundles;
CS.getOperandBundlesAsDefs(OpBundles);
- Instruction *New;
+ CallSite NewCS;
if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
- New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
- Args, OpBundles, "", Call->getParent());
- cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
- cast<InvokeInst>(New)->setAttributes(NewCallPAL);
+ NewCS = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
+ Args, OpBundles, "", Call->getParent());
} else {
- New = CallInst::Create(NF, Args, OpBundles, "", Call);
- cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
- cast<CallInst>(New)->setAttributes(NewCallPAL);
- cast<CallInst>(New)->setTailCallKind(
- cast<CallInst>(Call)->getTailCallKind());
+ NewCS = CallInst::Create(NF, Args, OpBundles, "", Call);
+ cast<CallInst>(NewCS.getInstruction())
+ ->setTailCallKind(cast<CallInst>(Call)->getTailCallKind());
}
- New->setDebugLoc(Call->getDebugLoc());
-
+ NewCS.setCallingConv(CS.getCallingConv());
+ NewCS.setAttributes(NewCallPAL);
+ NewCS->setDebugLoc(Call->getDebugLoc());
+ uint64_t W;
+ if (Call->extractProfTotalWeight(W))
+ NewCS->setProfWeight(W);
Args.clear();
+ ArgAttrVec.clear();
+ Instruction *New = NewCS.getInstruction();
if (!Call->use_empty()) {
if (New->getType() == Call->getType()) {
// Return type not changed? Just replace users then.
diff --git a/contrib/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp b/contrib/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp
index 98c4b17..ecff88c 100644
--- a/contrib/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp
@@ -17,9 +17,9 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Utils/GlobalStatus.h"
-#include "llvm/Pass.h"
using namespace llvm;
#define DEBUG_TYPE "elim-avail-extern"
diff --git a/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp b/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp
index 479fd18..d1147f7 100644
--- a/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp
@@ -11,13 +11,13 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/IPO.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
+#include "llvm/Transforms/IPO.h"
#include <algorithm>
using namespace llvm;
@@ -53,18 +53,18 @@ static void makeVisible(GlobalValue &GV, bool Delete) {
}
namespace {
- /// @brief A pass to extract specific functions and their dependencies.
+ /// @brief A pass to extract specific global values and their dependencies.
class GVExtractorPass : public ModulePass {
SetVector<GlobalValue *> Named;
bool deleteStuff;
public:
static char ID; // Pass identification, replacement for typeid
- /// FunctionExtractorPass - If deleteFn is true, this pass deletes as the
- /// specified function. Otherwise, it deletes as much of the module as
- /// possible, except for the function specified.
- ///
- explicit GVExtractorPass(std::vector<GlobalValue*>& GVs, bool deleteS = true)
+ /// If deleteS is true, this pass deletes the specified global values.
+ /// Otherwise, it deletes as much of the module as possible, except for the
+ /// global values specified.
+ explicit GVExtractorPass(std::vector<GlobalValue*> &GVs,
+ bool deleteS = true)
: ModulePass(ID), Named(GVs.begin(), GVs.end()), deleteStuff(deleteS) {}
bool runOnModule(Module &M) override {
diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
index 402a665..813a4b6 100644
--- a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -14,7 +14,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO/FunctionAttrs.h"
-#include "llvm/Transforms/IPO.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallSet.h"
@@ -34,7 +33,7 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Transforms/IPO.h"
using namespace llvm;
#define DEBUG_TYPE "functionattrs"
@@ -49,31 +48,35 @@ STATISTIC(NumNoAlias, "Number of function returns marked noalias");
STATISTIC(NumNonNullReturn, "Number of function returns marked nonnull");
STATISTIC(NumNoRecurse, "Number of functions marked as norecurse");
-namespace {
-typedef SmallSetVector<Function *, 8> SCCNodeSet;
-}
+// FIXME: This is disabled by default to avoid exposing security vulnerabilities
+// in C/C++ code compiled by clang:
+// http://lists.llvm.org/pipermail/cfe-dev/2017-January/052066.html
+static cl::opt<bool> EnableNonnullArgPropagation(
+ "enable-nonnull-arg-prop", cl::Hidden,
+ cl::desc("Try to propagate nonnull argument attributes from callsites to "
+ "caller functions."));
namespace {
-/// The three kinds of memory access relevant to 'readonly' and
-/// 'readnone' attributes.
-enum MemoryAccessKind {
- MAK_ReadNone = 0,
- MAK_ReadOnly = 1,
- MAK_MayWrite = 2
-};
+typedef SmallSetVector<Function *, 8> SCCNodeSet;
}
-static MemoryAccessKind checkFunctionMemoryAccess(Function &F, AAResults &AAR,
+/// Returns the memory access attribute for function F using AAR for AA results,
+/// where SCCNodes is the current SCC.
+///
+/// If ThisBody is true, this function may examine the function body and will
+/// return a result pertaining to this copy of the function. If it is false, the
+/// result will be based only on AA results for the function declaration; it
+/// will be assumed that some other (perhaps less optimized) version of the
+/// function may be selected at link time.
+static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody,
+ AAResults &AAR,
const SCCNodeSet &SCCNodes) {
FunctionModRefBehavior MRB = AAR.getModRefBehavior(&F);
if (MRB == FMRB_DoesNotAccessMemory)
// Already perfect!
return MAK_ReadNone;
- // Non-exact function definitions may not be selected at link time, and an
- // alternative version that writes to memory may be selected. See the comment
- // on GlobalValue::isDefinitionExact for more details.
- if (!F.hasExactDefinition()) {
+ if (!ThisBody) {
if (AliasAnalysis::onlyReadsMemory(MRB))
return MAK_ReadOnly;
@@ -172,9 +175,14 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, AAResults &AAR,
return ReadsMemory ? MAK_ReadOnly : MAK_ReadNone;
}
+MemoryAccessKind llvm::computeFunctionBodyMemoryAccess(Function &F,
+ AAResults &AAR) {
+ return checkFunctionMemoryAccess(F, /*ThisBody=*/true, AAR, {});
+}
+
/// Deduce readonly/readnone attributes for the SCC.
template <typename AARGetterT>
-static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT AARGetter) {
+static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter) {
// Check if any of the functions in the SCC read or write memory. If they
// write memory then they can't be marked readnone or readonly.
bool ReadsMemory = false;
@@ -182,7 +190,11 @@ static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT AARGetter) {
// Call the callable parameter to look up AA results for this function.
AAResults &AAR = AARGetter(*F);
- switch (checkFunctionMemoryAccess(*F, AAR, SCCNodes)) {
+ // Non-exact function definitions may not be selected at link time, and an
+ // alternative version that writes to memory may be selected. See the
+ // comment on GlobalValue::isDefinitionExact for more details.
+ switch (checkFunctionMemoryAccess(*F, F->hasExactDefinition(),
+ AAR, SCCNodes)) {
case MAK_MayWrite:
return false;
case MAK_ReadOnly:
@@ -209,15 +221,11 @@ static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT AARGetter) {
MadeChange = true;
// Clear out any existing attributes.
- AttrBuilder B;
- B.addAttribute(Attribute::ReadOnly).addAttribute(Attribute::ReadNone);
- F->removeAttributes(
- AttributeSet::FunctionIndex,
- AttributeSet::get(F->getContext(), AttributeSet::FunctionIndex, B));
+ F->removeFnAttr(Attribute::ReadOnly);
+ F->removeFnAttr(Attribute::ReadNone);
// Add in the new attribute.
- F->addAttribute(AttributeSet::FunctionIndex,
- ReadsMemory ? Attribute::ReadOnly : Attribute::ReadNone);
+ F->addFnAttr(ReadsMemory ? Attribute::ReadOnly : Attribute::ReadNone);
if (ReadsMemory)
++NumReadOnly;
@@ -482,9 +490,6 @@ determinePointerReadAttrs(Argument *A,
static bool addArgumentReturnedAttrs(const SCCNodeSet &SCCNodes) {
bool Changed = false;
- AttrBuilder B;
- B.addAttribute(Attribute::Returned);
-
// Check each function in turn, determining if an argument is always returned.
for (Function *F : SCCNodes) {
// We can infer and propagate function attributes only when we know that the
@@ -522,7 +527,7 @@ static bool addArgumentReturnedAttrs(const SCCNodeSet &SCCNodes) {
if (Value *RetArg = FindRetArg()) {
auto *A = cast<Argument>(RetArg);
- A->addAttr(AttributeSet::get(F->getContext(), A->getArgNo() + 1, B));
+ A->addAttr(Attribute::Returned);
++NumReturned;
Changed = true;
}
@@ -531,15 +536,55 @@ static bool addArgumentReturnedAttrs(const SCCNodeSet &SCCNodes) {
return Changed;
}
+/// If a callsite has arguments that are also arguments to the parent function,
+/// try to propagate attributes from the callsite's arguments to the parent's
+/// arguments. This may be important because inlining can cause information loss
+/// when attribute knowledge disappears with the inlined call.
+static bool addArgumentAttrsFromCallsites(Function &F) {
+ if (!EnableNonnullArgPropagation)
+ return false;
+
+ bool Changed = false;
+
+ // For an argument attribute to transfer from a callsite to the parent, the
+ // call must be guaranteed to execute every time the parent is called.
+ // Conservatively, just check for calls in the entry block that are guaranteed
+ // to execute.
+ // TODO: This could be enhanced by testing if the callsite post-dominates the
+ // entry block or by doing simple forward walks or backward walks to the
+ // callsite.
+ BasicBlock &Entry = F.getEntryBlock();
+ for (Instruction &I : Entry) {
+ if (auto CS = CallSite(&I)) {
+ if (auto *CalledFunc = CS.getCalledFunction()) {
+ for (auto &CSArg : CalledFunc->args()) {
+ if (!CSArg.hasNonNullAttr())
+ continue;
+
+ // If the non-null callsite argument operand is an argument to 'F'
+ // (the caller) and the call is guaranteed to execute, then the value
+ // must be non-null throughout 'F'.
+ auto *FArg = dyn_cast<Argument>(CS.getArgOperand(CSArg.getArgNo()));
+ if (FArg && !FArg->hasNonNullAttr()) {
+ FArg->addAttr(Attribute::NonNull);
+ Changed = true;
+ }
+ }
+ }
+ }
+ if (!isGuaranteedToTransferExecutionToSuccessor(&I))
+ break;
+ }
+
+ return Changed;
+}
+
/// Deduce nocapture attributes for the SCC.
static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
bool Changed = false;
ArgumentGraph AG;
- AttrBuilder B;
- B.addAttribute(Attribute::NoCapture);
-
// Check each function in turn, determining which pointer arguments are not
// captured.
for (Function *F : SCCNodes) {
@@ -549,6 +594,8 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
if (!F->hasExactDefinition())
continue;
+ Changed |= addArgumentAttrsFromCallsites(*F);
+
// Functions that are readonly (or readnone) and nounwind and don't return
// a value can't capture arguments. Don't analyze them.
if (F->onlyReadsMemory() && F->doesNotThrow() &&
@@ -556,7 +603,7 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A != E;
++A) {
if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr()) {
- A->addAttr(AttributeSet::get(F->getContext(), A->getArgNo() + 1, B));
+ A->addAttr(Attribute::NoCapture);
++NumNoCapture;
Changed = true;
}
@@ -575,8 +622,7 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
if (!Tracker.Captured) {
if (Tracker.Uses.empty()) {
// If it's trivially not captured, mark it nocapture now.
- A->addAttr(
- AttributeSet::get(F->getContext(), A->getArgNo() + 1, B));
+ A->addAttr(Attribute::NoCapture);
++NumNoCapture;
Changed = true;
} else {
@@ -602,9 +648,7 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
Self.insert(&*A);
Attribute::AttrKind R = determinePointerReadAttrs(&*A, Self);
if (R != Attribute::None) {
- AttrBuilder B;
- B.addAttribute(R);
- A->addAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, B));
+ A->addAttr(R);
Changed = true;
R == Attribute::ReadOnly ? ++NumReadOnlyArg : ++NumReadNoneArg;
}
@@ -629,7 +673,7 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
if (ArgumentSCC[0]->Uses.size() == 1 &&
ArgumentSCC[0]->Uses[0] == ArgumentSCC[0]) {
Argument *A = ArgumentSCC[0]->Definition;
- A->addAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, B));
+ A->addAttr(Attribute::NoCapture);
++NumNoCapture;
Changed = true;
}
@@ -671,7 +715,7 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) {
Argument *A = ArgumentSCC[i]->Definition;
- A->addAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, B));
+ A->addAttr(Attribute::NoCapture);
++NumNoCapture;
Changed = true;
}
@@ -702,14 +746,12 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
}
if (ReadAttr != Attribute::None) {
- AttrBuilder B, R;
- B.addAttribute(ReadAttr);
- R.addAttribute(Attribute::ReadOnly).addAttribute(Attribute::ReadNone);
for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) {
Argument *A = ArgumentSCC[i]->Definition;
// Clear out existing readonly/readnone attributes
- A->removeAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, R));
- A->addAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, B));
+ A->removeAttr(Attribute::ReadOnly);
+ A->removeAttr(Attribute::ReadNone);
+ A->addAttr(ReadAttr);
ReadAttr == Attribute::ReadOnly ? ++NumReadOnlyArg : ++NumReadNoneArg;
Changed = true;
}
@@ -769,7 +811,7 @@ static bool isFunctionMallocLike(Function *F, const SCCNodeSet &SCCNodes) {
case Instruction::Call:
case Instruction::Invoke: {
CallSite CS(RVI);
- if (CS.paramHasAttr(0, Attribute::NoAlias))
+ if (CS.hasRetAttr(Attribute::NoAlias))
break;
if (CS.getCalledFunction() && SCCNodes.count(CS.getCalledFunction()))
break;
@@ -792,7 +834,7 @@ static bool addNoAliasAttrs(const SCCNodeSet &SCCNodes) {
// pointers.
for (Function *F : SCCNodes) {
// Already noalias.
- if (F->doesNotAlias(0))
+ if (F->returnDoesNotAlias())
continue;
// We can infer and propagate function attributes only when we know that the
@@ -812,10 +854,11 @@ static bool addNoAliasAttrs(const SCCNodeSet &SCCNodes) {
bool MadeChange = false;
for (Function *F : SCCNodes) {
- if (F->doesNotAlias(0) || !F->getReturnType()->isPointerTy())
+ if (F->returnDoesNotAlias() ||
+ !F->getReturnType()->isPointerTy())
continue;
- F->setDoesNotAlias(0);
+ F->setReturnDoesNotAlias();
++NumNoAlias;
MadeChange = true;
}
@@ -905,7 +948,7 @@ static bool addNonNullAttrs(const SCCNodeSet &SCCNodes) {
// pointers.
for (Function *F : SCCNodes) {
// Already nonnull.
- if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex,
+ if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
Attribute::NonNull))
continue;
@@ -926,7 +969,7 @@ static bool addNonNullAttrs(const SCCNodeSet &SCCNodes) {
// Mark the function eagerly since we may discover a function
// which prevents us from speculating about the entire SCC
DEBUG(dbgs() << "Eagerly marking " << F->getName() << " as nonnull\n");
- F->addAttribute(AttributeSet::ReturnIndex, Attribute::NonNull);
+ F->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
++NumNonNullReturn;
MadeChange = true;
}
@@ -939,13 +982,13 @@ static bool addNonNullAttrs(const SCCNodeSet &SCCNodes) {
if (SCCReturnsNonNull) {
for (Function *F : SCCNodes) {
- if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex,
+ if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
Attribute::NonNull) ||
!F->getReturnType()->isPointerTy())
continue;
DEBUG(dbgs() << "SCC marking " << F->getName() << " as nonnull\n");
- F->addAttribute(AttributeSet::ReturnIndex, Attribute::NonNull);
+ F->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
++NumNonNullReturn;
MadeChange = true;
}
@@ -1144,6 +1187,10 @@ static bool runImpl(CallGraphSCC &SCC, AARGetterT AARGetter) {
SCCNodes.insert(F);
}
+ // Skip it if the SCC only contains optnone functions.
+ if (SCCNodes.empty())
+ return Changed;
+
Changed |= addArgumentReturnedAttrs(SCCNodes);
Changed |= addReadAttrs(SCCNodes, AARGetter);
Changed |= addArgumentAttrs(SCCNodes);
@@ -1163,19 +1210,7 @@ static bool runImpl(CallGraphSCC &SCC, AARGetterT AARGetter) {
bool PostOrderFunctionAttrsLegacyPass::runOnSCC(CallGraphSCC &SCC) {
if (skipSCC(SCC))
return false;
-
- // We compute dedicated AA results for each function in the SCC as needed. We
- // use a lambda referencing external objects so that they live long enough to
- // be queried, but we re-use them each time.
- Optional<BasicAAResult> BAR;
- Optional<AAResults> AAR;
- auto AARGetter = [&](Function &F) -> AAResults & {
- BAR.emplace(createLegacyPMBasicAAResult(*this, F));
- AAR.emplace(createLegacyPMAAResults(*this, F, *BAR));
- return *AAR;
- };
-
- return runImpl(SCC, AARGetter);
+ return runImpl(SCC, LegacyAARGetter(*this));
}
namespace {
@@ -1275,16 +1310,9 @@ PreservedAnalyses
ReversePostOrderFunctionAttrsPass::run(Module &M, ModuleAnalysisManager &AM) {
auto &CG = AM.getResult<CallGraphAnalysis>(M);
- bool Changed = deduceFunctionAttributeInRPO(M, CG);
-
- // CallGraphAnalysis holds AssertingVH and must be invalidated eagerly so
- // that other passes don't delete stuff from under it.
- // FIXME: We need to invalidate this to avoid PR28400. Is there a better
- // solution?
- AM.invalidate<CallGraphAnalysis>(M);
-
- if (!Changed)
+ if (!deduceFunctionAttributeInRPO(M, CG))
return PreservedAnalyses::all();
+
PreservedAnalyses PA;
PA.preserve<CallGraphAnalysis>();
return PA;
diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp
index 6b32f6c..233a36d 100644
--- a/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/IR/AutoUpgrade.h"
#include "llvm/IR/DiagnosticPrinter.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -25,7 +26,6 @@
#include "llvm/IRReader/IRReader.h"
#include "llvm/Linker/Linker.h"
#include "llvm/Object/IRObjectFile.h"
-#include "llvm/Object/ModuleSummaryIndexObjectFile.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/SourceMgr.h"
@@ -64,6 +64,12 @@ static cl::opt<float> ImportHotMultiplier(
"import-hot-multiplier", cl::init(3.0), cl::Hidden, cl::value_desc("x"),
cl::desc("Multiply the `import-instr-limit` threshold for hot callsites"));
+static cl::opt<float> ImportCriticalMultiplier(
+ "import-critical-multiplier", cl::init(100.0), cl::Hidden,
+ cl::value_desc("x"),
+ cl::desc(
+ "Multiply the `import-instr-limit` threshold for critical callsites"));
+
// FIXME: This multiplier was not really tuned up.
static cl::opt<float> ImportColdMultiplier(
"import-cold-multiplier", cl::init(0), cl::Hidden, cl::value_desc("N"),
@@ -75,12 +81,6 @@ static cl::opt<bool> PrintImports("print-imports", cl::init(false), cl::Hidden,
static cl::opt<bool> ComputeDead("compute-dead", cl::init(true), cl::Hidden,
cl::desc("Compute dead symbols"));
-// Temporary allows the function import pass to disable always linking
-// referenced discardable symbols.
-static cl::opt<bool>
- DontForceImportReferencedDiscardableSymbols("disable-force-link-odr",
- cl::init(false), cl::Hidden);
-
static cl::opt<bool> EnableImportMetadata(
"enable-import-metadata", cl::init(
#if !defined(NDEBUG)
@@ -123,8 +123,8 @@ namespace {
/// - [insert you fancy metric here]
static const GlobalValueSummary *
selectCallee(const ModuleSummaryIndex &Index,
- const GlobalValueSummaryList &CalleeSummaryList,
- unsigned Threshold) {
+ ArrayRef<std::unique_ptr<GlobalValueSummary>> CalleeSummaryList,
+ unsigned Threshold, StringRef CallerModulePath) {
auto It = llvm::find_if(
CalleeSummaryList,
[&](const std::unique_ptr<GlobalValueSummary> &SummaryPtr) {
@@ -145,6 +145,21 @@ selectCallee(const ModuleSummaryIndex &Index,
auto *Summary = cast<FunctionSummary>(GVSummary);
+ // If this is a local function, make sure we import the copy
+ // in the caller's module. The only time a local function can
+ // share an entry in the index is if there is a local with the same name
+ // in another module that had the same source file name (in a different
+ // directory), where each was compiled in their own directory so there
+ // was not distinguishing path.
+ // However, do the import from another module if there is only one
+ // entry in the list - in that case this must be a reference due
+ // to indirect call profile data, since a function pointer can point to
+ // a local in another module.
+ if (GlobalValue::isLocalLinkage(Summary->linkage()) &&
+ CalleeSummaryList.size() > 1 &&
+ Summary->modulePath() != CallerModulePath)
+ return false;
+
if (Summary->instCount() > Threshold)
return false;
@@ -159,17 +174,6 @@ selectCallee(const ModuleSummaryIndex &Index,
return cast<GlobalValueSummary>(It->get());
}
-/// Return the summary for the function \p GUID that fits the \p Threshold, or
-/// null if there's no match.
-static const GlobalValueSummary *selectCallee(GlobalValue::GUID GUID,
- unsigned Threshold,
- const ModuleSummaryIndex &Index) {
- auto CalleeSummaryList = Index.findGlobalValueSummaryList(GUID);
- if (CalleeSummaryList == Index.end())
- return nullptr; // This function does not have a summary
- return selectCallee(Index, CalleeSummaryList->second, Threshold);
-}
-
using EdgeInfo = std::tuple<const FunctionSummary *, unsigned /* Threshold */,
GlobalValue::GUID>;
@@ -183,10 +187,23 @@ static void computeImportForFunction(
FunctionImporter::ImportMapTy &ImportList,
StringMap<FunctionImporter::ExportSetTy> *ExportLists = nullptr) {
for (auto &Edge : Summary.calls()) {
- auto GUID = Edge.first.getGUID();
- DEBUG(dbgs() << " edge -> " << GUID << " Threshold:" << Threshold << "\n");
+ ValueInfo VI = Edge.first;
+ DEBUG(dbgs() << " edge -> " << VI.getGUID() << " Threshold:" << Threshold
+ << "\n");
+
+ if (VI.getSummaryList().empty()) {
+ // For SamplePGO, the indirect call targets for local functions will
+ // have its original name annotated in profile. We try to find the
+ // corresponding PGOFuncName as the GUID.
+ auto GUID = Index.getGUIDFromOriginalID(VI.getGUID());
+ if (GUID == 0)
+ continue;
+ VI = Index.getValueInfo(GUID);
+ if (!VI)
+ continue;
+ }
- if (DefinedGVSummaries.count(GUID)) {
+ if (DefinedGVSummaries.count(VI.getGUID())) {
DEBUG(dbgs() << "ignored! Target already in destination module.\n");
continue;
}
@@ -196,13 +213,16 @@ static void computeImportForFunction(
return ImportHotMultiplier;
if (Hotness == CalleeInfo::HotnessType::Cold)
return ImportColdMultiplier;
+ if (Hotness == CalleeInfo::HotnessType::Critical)
+ return ImportCriticalMultiplier;
return 1.0;
};
const auto NewThreshold =
Threshold * GetBonusMultiplier(Edge.second.Hotness);
- auto *CalleeSummary = selectCallee(GUID, NewThreshold, Index);
+ auto *CalleeSummary = selectCallee(Index, VI.getSummaryList(), NewThreshold,
+ Summary.modulePath());
if (!CalleeSummary) {
DEBUG(dbgs() << "ignored! No qualifying callee with summary found.\n");
continue;
@@ -234,7 +254,7 @@ static void computeImportForFunction(
const auto AdjThreshold = GetAdjustedThreshold(Threshold, IsHotCallsite);
auto ExportModulePath = ResolvedCalleeSummary->modulePath();
- auto &ProcessedThreshold = ImportList[ExportModulePath][GUID];
+ auto &ProcessedThreshold = ImportList[ExportModulePath][VI.getGUID()];
/// Since the traversal of the call graph is DFS, we can revisit a function
/// a second time with a higher threshold. In this case, it is added back to
/// the worklist with the new threshold.
@@ -250,7 +270,7 @@ static void computeImportForFunction(
// Make exports in the source module.
if (ExportLists) {
auto &ExportList = (*ExportLists)[ExportModulePath];
- ExportList.insert(GUID);
+ ExportList.insert(VI.getGUID());
if (!PreviouslyImported) {
// This is the first time this function was exported from its source
// module, so mark all functions and globals it references as exported
@@ -270,7 +290,7 @@ static void computeImportForFunction(
}
// Insert the newly imported function to the worklist.
- Worklist.emplace_back(ResolvedCalleeSummary, AdjThreshold, GUID);
+ Worklist.emplace_back(ResolvedCalleeSummary, AdjThreshold, VI.getGUID());
}
}
@@ -280,8 +300,7 @@ static void computeImportForFunction(
static void ComputeImportForModule(
const GVSummaryMapTy &DefinedGVSummaries, const ModuleSummaryIndex &Index,
FunctionImporter::ImportMapTy &ImportList,
- StringMap<FunctionImporter::ExportSetTy> *ExportLists = nullptr,
- const DenseSet<GlobalValue::GUID> *DeadSymbols = nullptr) {
+ StringMap<FunctionImporter::ExportSetTy> *ExportLists = nullptr) {
// Worklist contains the list of function imported in this module, for which
// we will analyse the callees and may import further down the callgraph.
SmallVector<EdgeInfo, 128> Worklist;
@@ -289,7 +308,7 @@ static void ComputeImportForModule(
// Populate the worklist with the import for the functions in the current
// module
for (auto &GVSummary : DefinedGVSummaries) {
- if (DeadSymbols && DeadSymbols->count(GVSummary.first)) {
+ if (!Index.isGlobalValueLive(GVSummary.second)) {
DEBUG(dbgs() << "Ignores Dead GUID: " << GVSummary.first << "\n");
continue;
}
@@ -332,15 +351,14 @@ void llvm::ComputeCrossModuleImport(
const ModuleSummaryIndex &Index,
const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
StringMap<FunctionImporter::ImportMapTy> &ImportLists,
- StringMap<FunctionImporter::ExportSetTy> &ExportLists,
- const DenseSet<GlobalValue::GUID> *DeadSymbols) {
+ StringMap<FunctionImporter::ExportSetTy> &ExportLists) {
// For each module that has function defined, compute the import/export lists.
for (auto &DefinedGVSummaries : ModuleToDefinedGVSummaries) {
auto &ImportList = ImportLists[DefinedGVSummaries.first()];
DEBUG(dbgs() << "Computing import for Module '"
<< DefinedGVSummaries.first() << "'\n");
ComputeImportForModule(DefinedGVSummaries.second, Index, ImportList,
- &ExportLists, DeadSymbols);
+ &ExportLists);
}
// When computing imports we added all GUIDs referenced by anything
@@ -402,84 +420,71 @@ void llvm::ComputeCrossModuleImportForModule(
#endif
}
-DenseSet<GlobalValue::GUID> llvm::computeDeadSymbols(
- const ModuleSummaryIndex &Index,
+void llvm::computeDeadSymbols(
+ ModuleSummaryIndex &Index,
const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) {
+ assert(!Index.withGlobalValueDeadStripping());
if (!ComputeDead)
- return DenseSet<GlobalValue::GUID>();
+ return;
if (GUIDPreservedSymbols.empty())
// Don't do anything when nothing is live, this is friendly with tests.
- return DenseSet<GlobalValue::GUID>();
- DenseSet<GlobalValue::GUID> LiveSymbols = GUIDPreservedSymbols;
- SmallVector<GlobalValue::GUID, 128> Worklist;
- Worklist.reserve(LiveSymbols.size() * 2);
- for (auto GUID : LiveSymbols) {
- DEBUG(dbgs() << "Live root: " << GUID << "\n");
- Worklist.push_back(GUID);
- }
- // Add values flagged in the index as live roots to the worklist.
- for (const auto &Entry : Index) {
- bool IsLiveRoot = llvm::any_of(
- Entry.second,
- [&](const std::unique_ptr<llvm::GlobalValueSummary> &Summary) {
- return Summary->liveRoot();
- });
- if (!IsLiveRoot)
+ return;
+ unsigned LiveSymbols = 0;
+ SmallVector<ValueInfo, 128> Worklist;
+ Worklist.reserve(GUIDPreservedSymbols.size() * 2);
+ for (auto GUID : GUIDPreservedSymbols) {
+ ValueInfo VI = Index.getValueInfo(GUID);
+ if (!VI)
continue;
- DEBUG(dbgs() << "Live root (summary): " << Entry.first << "\n");
- Worklist.push_back(Entry.first);
+ for (auto &S : VI.getSummaryList())
+ S->setLive(true);
}
- while (!Worklist.empty()) {
- auto GUID = Worklist.pop_back_val();
- auto It = Index.findGlobalValueSummaryList(GUID);
- if (It == Index.end()) {
- DEBUG(dbgs() << "Not in index: " << GUID << "\n");
- continue;
- }
-
- // FIXME: we should only make the prevailing copy live here
- for (auto &Summary : It->second) {
- for (auto Ref : Summary->refs()) {
- auto RefGUID = Ref.getGUID();
- if (LiveSymbols.insert(RefGUID).second) {
- DEBUG(dbgs() << "Marking live (ref): " << RefGUID << "\n");
- Worklist.push_back(RefGUID);
- }
- }
- if (auto *FS = dyn_cast<FunctionSummary>(Summary.get())) {
- for (auto Call : FS->calls()) {
- auto CallGUID = Call.first.getGUID();
- if (LiveSymbols.insert(CallGUID).second) {
- DEBUG(dbgs() << "Marking live (call): " << CallGUID << "\n");
- Worklist.push_back(CallGUID);
- }
- }
+ // Add values flagged in the index as live roots to the worklist.
+ for (const auto &Entry : Index)
+ for (auto &S : Entry.second.SummaryList)
+ if (S->isLive()) {
+ DEBUG(dbgs() << "Live root: " << Entry.first << "\n");
+ Worklist.push_back(ValueInfo(&Entry));
+ ++LiveSymbols;
+ break;
}
+
+ // Make value live and add it to the worklist if it was not live before.
+ // FIXME: we should only make the prevailing copy live here
+ auto visit = [&](ValueInfo VI) {
+ for (auto &S : VI.getSummaryList())
+ if (S->isLive())
+ return;
+ for (auto &S : VI.getSummaryList())
+ S->setLive(true);
+ ++LiveSymbols;
+ Worklist.push_back(VI);
+ };
+
+ while (!Worklist.empty()) {
+ auto VI = Worklist.pop_back_val();
+ for (auto &Summary : VI.getSummaryList()) {
+ for (auto Ref : Summary->refs())
+ visit(Ref);
+ if (auto *FS = dyn_cast<FunctionSummary>(Summary.get()))
+ for (auto Call : FS->calls())
+ visit(Call.first);
if (auto *AS = dyn_cast<AliasSummary>(Summary.get())) {
auto AliaseeGUID = AS->getAliasee().getOriginalName();
- if (LiveSymbols.insert(AliaseeGUID).second) {
- DEBUG(dbgs() << "Marking live (alias): " << AliaseeGUID << "\n");
- Worklist.push_back(AliaseeGUID);
- }
+ ValueInfo AliaseeVI = Index.getValueInfo(AliaseeGUID);
+ if (AliaseeVI)
+ visit(AliaseeVI);
}
}
}
- DenseSet<GlobalValue::GUID> DeadSymbols;
- DeadSymbols.reserve(
- std::min(Index.size(), Index.size() - LiveSymbols.size()));
- for (auto &Entry : Index) {
- auto GUID = Entry.first;
- if (!LiveSymbols.count(GUID)) {
- DEBUG(dbgs() << "Marking dead: " << GUID << "\n");
- DeadSymbols.insert(GUID);
- }
- }
- DEBUG(dbgs() << LiveSymbols.size() << " symbols Live, and "
- << DeadSymbols.size() << " symbols Dead \n");
- NumDeadSymbols += DeadSymbols.size();
- NumLiveSymbols += LiveSymbols.size();
- return DeadSymbols;
+ Index.setWithGlobalValueDeadStripping();
+
+ unsigned DeadSymbols = Index.size() - LiveSymbols;
+ DEBUG(dbgs() << LiveSymbols << " symbols Live, and " << DeadSymbols
+ << " symbols Dead \n");
+ NumDeadSymbols += DeadSymbols;
+ NumLiveSymbols += LiveSymbols;
}
/// Compute the set of summaries needed for a ThinLTO backend compilation of
@@ -522,9 +527,24 @@ llvm::EmitImportsFiles(StringRef ModulePath, StringRef OutputFilename,
/// Fixup WeakForLinker linkages in \p TheModule based on summary analysis.
void llvm::thinLTOResolveWeakForLinkerModule(
Module &TheModule, const GVSummaryMapTy &DefinedGlobals) {
+ auto ConvertToDeclaration = [](GlobalValue &GV) {
+ DEBUG(dbgs() << "Converting to a declaration: `" << GV.getName() << "\n");
+ if (Function *F = dyn_cast<Function>(&GV)) {
+ F->deleteBody();
+ F->clearMetadata();
+ } else if (GlobalVariable *V = dyn_cast<GlobalVariable>(&GV)) {
+ V->setInitializer(nullptr);
+ V->setLinkage(GlobalValue::ExternalLinkage);
+ V->clearMetadata();
+ } else
+ // For now we don't resolve or drop aliases. Once we do we'll
+ // need to add support here for creating either a function or
+ // variable declaration, and return the new GlobalValue* for
+ // the caller to use.
+ llvm_unreachable("Expected function or variable");
+ };
+
auto updateLinkage = [&](GlobalValue &GV) {
- if (!GlobalValue::isWeakForLinker(GV.getLinkage()))
- return;
// See if the global summary analysis computed a new resolved linkage.
const auto &GS = DefinedGlobals.find(GV.getGUID());
if (GS == DefinedGlobals.end())
@@ -532,18 +552,40 @@ void llvm::thinLTOResolveWeakForLinkerModule(
auto NewLinkage = GS->second->linkage();
if (NewLinkage == GV.getLinkage())
return;
- DEBUG(dbgs() << "ODR fixing up linkage for `" << GV.getName() << "` from "
- << GV.getLinkage() << " to " << NewLinkage << "\n");
- GV.setLinkage(NewLinkage);
- // Remove functions converted to available_externally from comdats,
+
+ // Switch the linkage to weakany if asked for, e.g. we do this for
+ // linker redefined symbols (via --wrap or --defsym).
+ // We record that the visibility should be changed here in `addThinLTO`
+ // as we need access to the resolution vectors for each input file in
+ // order to find which symbols have been redefined.
+ // We may consider reorganizing this code and moving the linkage recording
+ // somewhere else, e.g. in thinLTOResolveWeakForLinkerInIndex.
+ if (NewLinkage == GlobalValue::WeakAnyLinkage) {
+ GV.setLinkage(NewLinkage);
+ return;
+ }
+
+ if (!GlobalValue::isWeakForLinker(GV.getLinkage()))
+ return;
+ // Check for a non-prevailing def that has interposable linkage
+ // (e.g. non-odr weak or linkonce). In that case we can't simply
+ // convert to available_externally, since it would lose the
+ // interposable property and possibly get inlined. Simply drop
+ // the definition in that case.
+ if (GlobalValue::isAvailableExternallyLinkage(NewLinkage) &&
+ GlobalValue::isInterposableLinkage(GV.getLinkage()))
+ ConvertToDeclaration(GV);
+ else {
+ DEBUG(dbgs() << "ODR fixing up linkage for `" << GV.getName() << "` from "
+ << GV.getLinkage() << " to " << NewLinkage << "\n");
+ GV.setLinkage(NewLinkage);
+ }
+ // Remove declarations from comdats, including available_externally
// as this is a declaration for the linker, and will be dropped eventually.
// It is illegal for comdats to contain declarations.
auto *GO = dyn_cast_or_null<GlobalObject>(&GV);
- if (GO && GO->isDeclarationForLinker() && GO->hasComdat()) {
- assert(GO->hasAvailableExternallyLinkage() &&
- "Expected comdat on definition (possibly available external)");
+ if (GO && GO->isDeclarationForLinker() && GO->hasComdat())
GO->setComdat(nullptr);
- }
};
// Process functions and global now
@@ -562,7 +604,7 @@ void llvm::thinLTOInternalizeModule(Module &TheModule,
// the current module.
StringSet<> AsmUndefinedRefs;
ModuleSymbolTable::CollectAsmSymbols(
- Triple(TheModule.getTargetTriple()), TheModule.getModuleInlineAsm(),
+ TheModule,
[&AsmUndefinedRefs](StringRef Name, object::BasicSymbolRef::Flags Flags) {
if (Flags & object::BasicSymbolRef::SF_Undefined)
AsmUndefinedRefs.insert(Name);
@@ -576,8 +618,7 @@ void llvm::thinLTOInternalizeModule(Module &TheModule,
return true;
// Lookup the linkage recorded in the summaries during global analysis.
- const auto &GS = DefinedGlobals.find(GV.getGUID());
- GlobalValue::LinkageTypes Linkage;
+ auto GS = DefinedGlobals.find(GV.getGUID());
if (GS == DefinedGlobals.end()) {
// Must have been promoted (possibly conservatively). Find original
// name so that we can access the correct summary and see if it can
@@ -589,7 +630,7 @@ void llvm::thinLTOInternalizeModule(Module &TheModule,
std::string OrigId = GlobalValue::getGlobalIdentifier(
OrigName, GlobalValue::InternalLinkage,
TheModule.getSourceFileName());
- const auto &GS = DefinedGlobals.find(GlobalValue::getGUID(OrigId));
+ GS = DefinedGlobals.find(GlobalValue::getGUID(OrigId));
if (GS == DefinedGlobals.end()) {
// Also check the original non-promoted non-globalized name. In some
// cases a preempted weak value is linked in as a local copy because
@@ -597,15 +638,11 @@ void llvm::thinLTOInternalizeModule(Module &TheModule,
// In that case, since it was originally not a local value, it was
// recorded in the index using the original name.
// FIXME: This may not be needed once PR27866 is fixed.
- const auto &GS = DefinedGlobals.find(GlobalValue::getGUID(OrigName));
+ GS = DefinedGlobals.find(GlobalValue::getGUID(OrigName));
assert(GS != DefinedGlobals.end());
- Linkage = GS->second->linkage();
- } else {
- Linkage = GS->second->linkage();
}
- } else
- Linkage = GS->second->linkage();
- return !GlobalValue::isLocalLinkage(Linkage);
+ }
+ return !GlobalValue::isLocalLinkage(GS->second->linkage());
};
// FIXME: See if we can just internalize directly here via linkage changes
@@ -617,14 +654,12 @@ void llvm::thinLTOInternalizeModule(Module &TheModule,
// index.
//
Expected<bool> FunctionImporter::importFunctions(
- Module &DestModule, const FunctionImporter::ImportMapTy &ImportList,
- bool ForceImportReferencedDiscardableSymbols) {
+ Module &DestModule, const FunctionImporter::ImportMapTy &ImportList) {
DEBUG(dbgs() << "Starting import for Module "
<< DestModule.getModuleIdentifier() << "\n");
unsigned ImportedCount = 0;
- // Linker that will be used for importing function
- Linker TheLinker(DestModule);
+ IRMover Mover(DestModule);
// Do the actual import of functions now, one Module at a time
std::set<StringRef> ModuleNameOrderedList;
for (auto &FunctionsToImportPerModule : ImportList) {
@@ -648,7 +683,7 @@ Expected<bool> FunctionImporter::importFunctions(
auto &ImportGUIDs = FunctionsToImportPerModule->second;
// Find the globals to import
- DenseSet<const GlobalValue *> GlobalsToImport;
+ SetVector<GlobalValue *> GlobalsToImport;
for (Function &F : *SrcModule) {
if (!F.hasName())
continue;
@@ -687,6 +722,13 @@ Expected<bool> FunctionImporter::importFunctions(
}
}
for (GlobalAlias &GA : SrcModule->aliases()) {
+ // FIXME: This should eventually be controlled entirely by the summary.
+ if (FunctionImportGlobalProcessing::doImportAsDefinition(
+ &GA, &GlobalsToImport)) {
+ GlobalsToImport.insert(&GA);
+ continue;
+ }
+
if (!GA.hasName())
continue;
auto GUID = GA.getGUID();
@@ -731,12 +773,9 @@ Expected<bool> FunctionImporter::importFunctions(
<< " from " << SrcModule->getSourceFileName() << "\n";
}
- // Instruct the linker that the client will take care of linkonce resolution
- unsigned Flags = Linker::Flags::None;
- if (!ForceImportReferencedDiscardableSymbols)
- Flags |= Linker::Flags::DontForceLinkLinkonceODR;
-
- if (TheLinker.linkInModule(std::move(SrcModule), Flags, &GlobalsToImport))
+ if (Mover.move(std::move(SrcModule), GlobalsToImport.getArrayRef(),
+ [](GlobalValue &, IRMover::ValueAdder) {},
+ /*IsPerformingImport=*/true))
report_fatal_error("Function Import: link error");
ImportedCount += GlobalsToImport.size();
@@ -778,7 +817,7 @@ static bool doImportingForModule(Module &M) {
// is only enabled when testing importing via the 'opt' tool, which does
// not do the ThinLink that would normally determine what values to promote.
for (auto &I : *Index) {
- for (auto &S : I.second) {
+ for (auto &S : I.second.SummaryList) {
if (GlobalValue::isLocalLinkage(S->linkage()))
S->setLinkage(GlobalValue::ExternalLinkage);
}
@@ -796,8 +835,7 @@ static bool doImportingForModule(Module &M) {
return loadFile(Identifier, M.getContext());
};
FunctionImporter Importer(*Index, ModuleLoader);
- Expected<bool> Result = Importer.importFunctions(
- M, ImportList, !DontForceImportReferencedDiscardableSymbols);
+ Expected<bool> Result = Importer.importFunctions(M, ImportList);
// FIXME: Probably need to propagate Errors through the pass manager.
if (!Result) {
diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
index 7a04de3..c91e8b4 100644
--- a/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
@@ -25,7 +25,7 @@
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Utils/CtorUtils.h"
#include "llvm/Transforms/Utils/GlobalStatus.h"
-#include <unordered_map>
+
using namespace llvm;
#define DEBUG_TYPE "globaldce"
@@ -50,7 +50,14 @@ namespace {
if (skipModule(M))
return false;
+ // We need a minimally functional dummy module analysis manager. It needs
+ // to at least know about the possibility of proxying a function analysis
+ // manager.
+ FunctionAnalysisManager DummyFAM;
ModuleAnalysisManager DummyMAM;
+ DummyMAM.registerPass(
+ [&] { return FunctionAnalysisManagerModuleProxy(DummyFAM); });
+
auto PA = Impl.run(M, DummyMAM);
return !PA.areAllPreserved();
}
@@ -78,9 +85,67 @@ static bool isEmptyFunction(Function *F) {
return RI.getReturnValue() == nullptr;
}
-PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &) {
+/// Compute the set of GlobalValue that depends from V.
+/// The recursion stops as soon as a GlobalValue is met.
+void GlobalDCEPass::ComputeDependencies(Value *V,
+ SmallPtrSetImpl<GlobalValue *> &Deps) {
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ Function *Parent = I->getParent()->getParent();
+ Deps.insert(Parent);
+ } else if (auto *GV = dyn_cast<GlobalValue>(V)) {
+ Deps.insert(GV);
+ } else if (auto *CE = dyn_cast<Constant>(V)) {
+ // Avoid walking the whole tree of a big ConstantExprs multiple times.
+ auto Where = ConstantDependenciesCache.find(CE);
+ if (Where != ConstantDependenciesCache.end()) {
+ auto const &K = Where->second;
+ Deps.insert(K.begin(), K.end());
+ } else {
+ SmallPtrSetImpl<GlobalValue *> &LocalDeps = ConstantDependenciesCache[CE];
+ for (User *CEUser : CE->users())
+ ComputeDependencies(CEUser, LocalDeps);
+ Deps.insert(LocalDeps.begin(), LocalDeps.end());
+ }
+ }
+}
+
+void GlobalDCEPass::UpdateGVDependencies(GlobalValue &GV) {
+ SmallPtrSet<GlobalValue *, 8> Deps;
+ for (User *User : GV.users())
+ ComputeDependencies(User, Deps);
+ Deps.erase(&GV); // Remove self-reference.
+ for (GlobalValue *GVU : Deps) {
+ GVDependencies.insert(std::make_pair(GVU, &GV));
+ }
+}
+
+/// Mark Global value as Live
+void GlobalDCEPass::MarkLive(GlobalValue &GV,
+ SmallVectorImpl<GlobalValue *> *Updates) {
+ auto const Ret = AliveGlobals.insert(&GV);
+ if (!Ret.second)
+ return;
+
+ if (Updates)
+ Updates->push_back(&GV);
+ if (Comdat *C = GV.getComdat()) {
+ for (auto &&CM : make_range(ComdatMembers.equal_range(C)))
+ MarkLive(*CM.second, Updates); // Recursion depth is only two because only
+ // globals in the same comdat are visited.
+ }
+}
+
+PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) {
bool Changed = false;
+ // The algorithm first computes the set L of global variables that are
+ // trivially live. Then it walks the initialization of these variables to
+ // compute the globals used to initialize them, which effectively builds a
+ // directed graph where nodes are global variables, and an edge from A to B
+ // means B is used to initialize A. Finally, it propagates the liveness
+ // information through the graph starting from the nodes in L. Nodes note
+ // marked as alive are discarded.
+
// Remove empty functions from the global ctors list.
Changed |= optimizeGlobalCtorsList(M, isEmptyFunction);
@@ -103,21 +168,39 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &) {
// initializer.
if (!GO.isDeclaration() && !GO.hasAvailableExternallyLinkage())
if (!GO.isDiscardableIfUnused())
- GlobalIsNeeded(&GO);
+ MarkLive(GO);
+
+ UpdateGVDependencies(GO);
}
+ // Compute direct dependencies of aliases.
for (GlobalAlias &GA : M.aliases()) {
Changed |= RemoveUnusedGlobalValue(GA);
// Externally visible aliases are needed.
if (!GA.isDiscardableIfUnused())
- GlobalIsNeeded(&GA);
+ MarkLive(GA);
+
+ UpdateGVDependencies(GA);
}
+ // Compute direct dependencies of ifuncs.
for (GlobalIFunc &GIF : M.ifuncs()) {
Changed |= RemoveUnusedGlobalValue(GIF);
// Externally visible ifuncs are needed.
if (!GIF.isDiscardableIfUnused())
- GlobalIsNeeded(&GIF);
+ MarkLive(GIF);
+
+ UpdateGVDependencies(GIF);
+ }
+
+ // Propagate liveness from collected Global Values through the computed
+ // dependencies.
+ SmallVector<GlobalValue *, 8> NewLiveGVs{AliveGlobals.begin(),
+ AliveGlobals.end()};
+ while (!NewLiveGVs.empty()) {
+ GlobalValue *LGV = NewLiveGVs.pop_back_val();
+ for (auto &&GVD : make_range(GVDependencies.equal_range(LGV)))
+ MarkLive(*GVD.second, &NewLiveGVs);
}
// Now that all globals which are needed are in the AliveGlobals set, we loop
@@ -154,7 +237,7 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &) {
GA.setAliasee(nullptr);
}
- // The third pass drops targets of ifuncs which are dead...
+ // The fourth pass drops targets of ifuncs which are dead...
std::vector<GlobalIFunc*> DeadIFuncs;
for (GlobalIFunc &GIF : M.ifuncs())
if (!AliveGlobals.count(&GIF)) {
@@ -188,7 +271,8 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &) {
// Make sure that all memory is released
AliveGlobals.clear();
- SeenConstants.clear();
+ ConstantDependenciesCache.clear();
+ GVDependencies.clear();
ComdatMembers.clear();
if (Changed)
@@ -196,60 +280,6 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &) {
return PreservedAnalyses::all();
}
-/// GlobalIsNeeded - the specific global value as needed, and
-/// recursively mark anything that it uses as also needed.
-void GlobalDCEPass::GlobalIsNeeded(GlobalValue *G) {
- // If the global is already in the set, no need to reprocess it.
- if (!AliveGlobals.insert(G).second)
- return;
-
- if (Comdat *C = G->getComdat()) {
- for (auto &&CM : make_range(ComdatMembers.equal_range(C)))
- GlobalIsNeeded(CM.second);
- }
-
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(G)) {
- // If this is a global variable, we must make sure to add any global values
- // referenced by the initializer to the alive set.
- if (GV->hasInitializer())
- MarkUsedGlobalsAsNeeded(GV->getInitializer());
- } else if (GlobalIndirectSymbol *GIS = dyn_cast<GlobalIndirectSymbol>(G)) {
- // The target of a global alias or ifunc is needed.
- MarkUsedGlobalsAsNeeded(GIS->getIndirectSymbol());
- } else {
- // Otherwise this must be a function object. We have to scan the body of
- // the function looking for constants and global values which are used as
- // operands. Any operands of these types must be processed to ensure that
- // any globals used will be marked as needed.
- Function *F = cast<Function>(G);
-
- for (Use &U : F->operands())
- MarkUsedGlobalsAsNeeded(cast<Constant>(U.get()));
-
- for (BasicBlock &BB : *F)
- for (Instruction &I : BB)
- for (Use &U : I.operands())
- if (GlobalValue *GV = dyn_cast<GlobalValue>(U))
- GlobalIsNeeded(GV);
- else if (Constant *C = dyn_cast<Constant>(U))
- MarkUsedGlobalsAsNeeded(C);
- }
-}
-
-void GlobalDCEPass::MarkUsedGlobalsAsNeeded(Constant *C) {
- if (GlobalValue *GV = dyn_cast<GlobalValue>(C))
- return GlobalIsNeeded(GV);
-
- // Loop over all of the operands of the constant, adding any globals they
- // use to the list of needed globals.
- for (Use &U : C->operands()) {
- // If we've already processed this constant there's no need to do it again.
- Constant *Op = dyn_cast<Constant>(U);
- if (Op && SeenConstants.insert(Op).second)
- MarkUsedGlobalsAsNeeded(Op);
- }
-}
-
// RemoveUnusedGlobalValue - Loop over all of the uses of the specified
// GlobalValue, looking for the constant pointer ref that may be pointing to it.
// If found, check to see if the constant pointer ref is safe to destroy, and if
diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index 5b0d5e3..93eab68 100644
--- a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -239,7 +239,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
// we delete a constant array, we may also be holding pointer to one of its
// elements (or an element of one of its elements if we're dealing with an
// array of arrays) in the worklist.
- SmallVector<WeakVH, 8> WorkList(V->user_begin(), V->user_end());
+ SmallVector<WeakTrackingVH, 8> WorkList(V->user_begin(), V->user_end());
while (!WorkList.empty()) {
Value *UV = WorkList.pop_back_val();
if (!UV)
@@ -837,7 +837,7 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
if (StoreInst *SI = dyn_cast<StoreInst>(GV->user_back())) {
// The global is initialized when the store to it occurs.
new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, false, 0,
- SI->getOrdering(), SI->getSynchScope(), SI);
+ SI->getOrdering(), SI->getSyncScopeID(), SI);
SI->eraseFromParent();
continue;
}
@@ -854,7 +854,7 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
// Replace the cmp X, 0 with a use of the bool value.
// Sink the load to where the compare was, if atomic rules allow us to.
Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", false, 0,
- LI->getOrdering(), LI->getSynchScope(),
+ LI->getOrdering(), LI->getSyncScopeID(),
LI->isUnordered() ? (Instruction*)ICI : LI);
InitBoolUsed = true;
switch (ICI->getPredicate()) {
@@ -1605,7 +1605,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
assert(LI->getOperand(0) == GV && "Not a copy!");
// Insert a new load, to preserve the saved value.
StoreVal = new LoadInst(NewGV, LI->getName()+".b", false, 0,
- LI->getOrdering(), LI->getSynchScope(), LI);
+ LI->getOrdering(), LI->getSyncScopeID(), LI);
} else {
assert((isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal)) &&
"This is not a form that we understand!");
@@ -1614,12 +1614,12 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
}
}
new StoreInst(StoreVal, NewGV, false, 0,
- SI->getOrdering(), SI->getSynchScope(), SI);
+ SI->getOrdering(), SI->getSyncScopeID(), SI);
} else {
// Change the load into a load of bool then a select.
LoadInst *LI = cast<LoadInst>(UI);
LoadInst *NLI = new LoadInst(NewGV, LI->getName()+".b", false, 0,
- LI->getOrdering(), LI->getSynchScope(), LI);
+ LI->getOrdering(), LI->getSyncScopeID(), LI);
Value *NSI;
if (IsOneZero)
NSI = new ZExtInst(NLI, LI->getType(), "", LI);
@@ -1792,7 +1792,9 @@ static void makeAllConstantUsesInstructions(Constant *C) {
NewU->insertBefore(UI);
UI->replaceUsesOfWith(U, NewU);
}
- U->dropAllReferences();
+ // We've replaced all the uses, so destroy the constant. (destroyConstant
+ // will update value handles and metadata.)
+ U->destroyConstant();
}
}
@@ -1819,12 +1821,14 @@ static bool processInternalGlobal(
GS.AccessingFunction->doesNotRecurse() &&
isPointerValueDeadOnEntryToFunction(GS.AccessingFunction, GV,
LookupDomTree)) {
+ const DataLayout &DL = GV->getParent()->getDataLayout();
+
DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV << "\n");
Instruction &FirstI = const_cast<Instruction&>(*GS.AccessingFunction
->getEntryBlock().begin());
Type *ElemTy = GV->getValueType();
// FIXME: Pass Global's alignment when globals have alignment
- AllocaInst *Alloca = new AllocaInst(ElemTy, nullptr,
+ AllocaInst *Alloca = new AllocaInst(ElemTy, DL.getAllocaAddrSpace(), nullptr,
GV->getName(), &FirstI);
if (!isa<UndefValue>(GV->getInitializer()))
new StoreInst(GV->getInitializer(), Alloca, &FirstI);
@@ -1977,16 +1981,11 @@ static void ChangeCalleesToFastCall(Function *F) {
}
}
-static AttributeSet StripNest(LLVMContext &C, const AttributeSet &Attrs) {
- for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) {
- unsigned Index = Attrs.getSlotIndex(i);
- if (!Attrs.getSlotAttributes(i).hasAttribute(Index, Attribute::Nest))
- continue;
-
- // There can be only one.
- return Attrs.removeAttribute(C, Index, Attribute::Nest);
- }
-
+static AttributeList StripNest(LLVMContext &C, AttributeList Attrs) {
+ // There can be at most one attribute set with a nest attribute.
+ unsigned NestIndex;
+ if (Attrs.hasAttrSomewhere(Attribute::Nest, &NestIndex))
+ return Attrs.removeAttribute(C, NestIndex, Attribute::Nest);
return Attrs;
}
@@ -2027,6 +2026,24 @@ OptimizeFunctions(Module &M, TargetLibraryInfo *TLI,
continue;
}
+ // LLVM's definition of dominance allows instructions that are cyclic
+ // in unreachable blocks, e.g.:
+ // %pat = select i1 %condition, @global, i16* %pat
+ // because any instruction dominates an instruction in a block that's
+ // not reachable from entry.
+ // So, remove unreachable blocks from the function, because a) there's
+ // no point in analyzing them and b) GlobalOpt should otherwise grow
+ // some more complicated logic to break these cycles.
+ // Removing unreachable blocks might invalidate the dominator so we
+ // recalculate it.
+ if (!F->isDeclaration()) {
+ if (removeUnreachableBlocks(*F)) {
+ auto &DT = LookupDomTree(*F);
+ DT.recalculate(*F);
+ Changed = true;
+ }
+ }
+
Changed |= processGlobal(*F, TLI, LookupDomTree);
if (!F->hasLocalLinkage())
@@ -2387,7 +2404,7 @@ OptimizeGlobalAliases(Module &M,
}
static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) {
- LibFunc::Func F = LibFunc::cxa_atexit;
+ LibFunc F = LibFunc_cxa_atexit;
if (!TLI->has(F))
return nullptr;
@@ -2396,7 +2413,7 @@ static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) {
return nullptr;
// Make sure that the function has the correct prototype.
- if (!TLI->getLibFunc(*Fn, F) || F != LibFunc::cxa_atexit)
+ if (!TLI->getLibFunc(*Fn, F) || F != LibFunc_cxa_atexit)
return nullptr;
return Fn;
diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalSplit.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalSplit.cpp
index bbbd096..e47d881 100644
--- a/contrib/llvm/lib/Transforms/IPO/GlobalSplit.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/GlobalSplit.cpp
@@ -14,7 +14,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/GlobalSplit.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/IR/Constants.h"
@@ -23,6 +22,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/Pass.h"
+#include "llvm/Transforms/IPO.h"
#include <set>
@@ -85,7 +85,16 @@ bool splitGlobal(GlobalVariable &GV) {
uint64_t ByteOffset = cast<ConstantInt>(
cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
->getZExtValue();
- if (ByteOffset < SplitBegin || ByteOffset >= SplitEnd)
+ // Type metadata may be attached one byte after the end of the vtable, for
+ // classes without virtual methods in Itanium ABI. AFAIK, it is never
+ // attached to the first byte of a vtable. Subtract one to get the right
+ // slice.
+ // This is making an assumption that vtable groups are the only kinds of
+ // global variables that !type metadata can be attached to, and that they
+ // are either Itanium ABI vtable groups or contain a single vtable (i.e.
+ // Microsoft ABI vtables).
+ uint64_t AttachedTo = (ByteOffset == 0) ? ByteOffset : ByteOffset - 1;
+ if (AttachedTo < SplitBegin || AttachedTo >= SplitEnd)
continue;
SplitGV->addMetadata(
LLVMContext::MD_type,
diff --git a/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp b/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp
index 916135e..f79b610 100644
--- a/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp
@@ -15,7 +15,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/IPO.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -24,6 +23,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
+#include "llvm/Transforms/IPO.h"
using namespace llvm;
#define DEBUG_TYPE "ipconstprop"
@@ -136,7 +136,13 @@ static bool PropagateConstantReturn(Function &F) {
// For more details, see GlobalValue::mayBeDerefined.
if (!F.isDefinitionExact())
return false;
-
+
+ // Don't touch naked functions. The may contain asm returning
+ // value we don't see, so we may end up interprocedurally propagating
+ // the return value incorrectly.
+ if (F.hasFnAttribute(Attribute::Naked))
+ return false;
+
// Check to see if this function returns a constant.
SmallVector<Value *,4> RetVals;
StructType *STy = dyn_cast<StructType>(F.getReturnType());
diff --git a/contrib/llvm/lib/Transforms/IPO/IPO.cpp b/contrib/llvm/lib/Transforms/IPO/IPO.cpp
index 89518f3..5bb305c 100644
--- a/contrib/llvm/lib/Transforms/IPO/IPO.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/IPO.cpp
@@ -13,10 +13,10 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm-c/Initialization.h"
#include "llvm-c/Transforms/IPO.h"
-#include "llvm/InitializePasses.h"
+#include "llvm-c/Initialization.h"
#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"
#include "llvm/Transforms/IPO/FunctionAttrs.h"
diff --git a/contrib/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp
index 2ef299d..15d7515 100644
--- a/contrib/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp
@@ -8,8 +8,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO/InferFunctionAttrs.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
diff --git a/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp b/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp
index 1770445..50e7cc8 100644
--- a/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp
@@ -48,7 +48,7 @@ public:
}
explicit SimpleInliner(InlineParams Params)
- : LegacyInlinerBase(ID), Params(Params) {
+ : LegacyInlinerBase(ID), Params(std::move(Params)) {
initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
}
@@ -61,7 +61,8 @@ public:
[&](Function &F) -> AssumptionCache & {
return ACT->getAssumptionCache(F);
};
- return llvm::getInlineCost(CS, Params, TTI, GetAssumptionCache, PSI);
+ return llvm::getInlineCost(CS, Params, TTI, GetAssumptionCache,
+ /*GetBFI=*/None, PSI);
}
bool runOnSCC(CallGraphSCC &SCC) override;
@@ -92,8 +93,12 @@ Pass *llvm::createFunctionInliningPass(int Threshold) {
}
Pass *llvm::createFunctionInliningPass(unsigned OptLevel,
- unsigned SizeOptLevel) {
- return new SimpleInliner(llvm::getInlineParams(OptLevel, SizeOptLevel));
+ unsigned SizeOptLevel,
+ bool DisableInlineHotCallSite) {
+ auto Param = llvm::getInlineParams(OptLevel, SizeOptLevel);
+ if (DisableInlineHotCallSite)
+ Param.HotCallSiteThreshold = 0;
+ return new SimpleInliner(Param);
}
Pass *llvm::createFunctionInliningPass(InlineParams &Params) {
diff --git a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
index 3f4731c..317770d 100644
--- a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -19,6 +19,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
@@ -260,8 +261,8 @@ static bool InlineCallIfPossible(
/// Return true if inlining of CS can block the caller from being
/// inlined which is proved to be more beneficial. \p IC is the
/// estimated inline cost associated with callsite \p CS.
-/// \p TotalAltCost will be set to the estimated cost of inlining the caller
-/// if \p CS is suppressed for inlining.
+/// \p TotalSecondaryCost will be set to the estimated cost of inlining the
+/// caller if \p CS is suppressed for inlining.
static bool
shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC,
int &TotalSecondaryCost,
@@ -288,7 +289,7 @@ shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC,
// treating them as truly abstract units etc.
TotalSecondaryCost = 0;
// The candidate cost to be imposed upon the current function.
- int CandidateCost = IC.getCost() - (InlineConstants::CallPenalty + 1);
+ int CandidateCost = IC.getCost() - 1;
// This bool tracks what happens if we do NOT inline C into B.
bool callerWillBeRemoved = Caller->hasLocalLinkage();
// This bool tracks what happens if we DO inline C into B.
@@ -325,7 +326,7 @@ shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC,
// one is set very low by getInlineCost, in anticipation that Caller will
// be removed entirely. We did not account for this above unless there
// is only one caller of Caller.
- if (callerWillBeRemoved && !Caller->use_empty())
+ if (callerWillBeRemoved && !Caller->hasOneUse())
TotalSecondaryCost -= InlineConstants::LastCallToStaticBonus;
if (inliningPreventsSomeOuterInline && TotalSecondaryCost < IC.getCost())
@@ -342,6 +343,7 @@ static bool shouldInline(CallSite CS,
InlineCost IC = GetInlineCost(CS);
Instruction *Call = CS.getInstruction();
Function *Callee = CS.getCalledFunction();
+ Function *Caller = CS.getCaller();
if (IC.isAlways()) {
DEBUG(dbgs() << " Inlining: cost=always"
@@ -355,19 +357,20 @@ static bool shouldInline(CallSite CS,
if (IC.isNever()) {
DEBUG(dbgs() << " NOT Inlining: cost=never"
<< ", Call: " << *CS.getInstruction() << "\n");
- ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "NeverInline", Call)
- << NV("Callee", Callee)
- << " should never be inlined (cost=never)");
+ ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
+ << NV("Callee", Callee) << " not inlined into "
+ << NV("Caller", Caller)
+ << " because it should never be inlined (cost=never)");
return false;
}
- Function *Caller = CS.getCaller();
if (!IC) {
DEBUG(dbgs() << " NOT Inlining: cost=" << IC.getCost()
<< ", thres=" << (IC.getCostDelta() + IC.getCost())
<< ", Call: " << *CS.getInstruction() << "\n");
- ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", Call)
- << NV("Callee", Callee) << " too costly to inline (cost="
+ ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "TooCostly", Call)
+ << NV("Callee", Callee) << " not inlined into "
+ << NV("Caller", Caller) << " because too costly to inline (cost="
<< NV("Cost", IC.getCost()) << ", threshold="
<< NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
return false;
@@ -378,8 +381,8 @@ static bool shouldInline(CallSite CS,
DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction()
<< " Cost = " << IC.getCost()
<< ", outer Cost = " << TotalSecondaryCost << '\n');
- ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE,
- "IncreaseCostInOtherContexts", Call)
+ ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "IncreaseCostInOtherContexts",
+ Call)
<< "Not inlining. Cost of inlining " << NV("Callee", Callee)
<< " increases the cost of inlining " << NV("Caller", Caller)
<< " in other contexts");
@@ -499,7 +502,7 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
std::swap(CallSites[i--], CallSites[--FirstCallInSCC]);
InlinedArrayAllocasTy InlinedArrayAllocas;
- InlineFunctionInfo InlineInfo(&CG, &GetAssumptionCache);
+ InlineFunctionInfo InlineInfo(&CG, &GetAssumptionCache, PSI);
// Now that we have all of the call sites, loop over them and inline them if
// it looks profitable to do so.
@@ -516,52 +519,54 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
Function *Caller = CS.getCaller();
Function *Callee = CS.getCalledFunction();
- // If this call site is dead and it is to a readonly function, we should
- // just delete the call instead of trying to inline it, regardless of
- // size. This happens because IPSCCP propagates the result out of the
- // call and then we're left with the dead call.
- if (isInstructionTriviallyDead(CS.getInstruction(), &TLI)) {
- DEBUG(dbgs() << " -> Deleting dead call: " << *CS.getInstruction()
- << "\n");
- // Update the call graph by deleting the edge from Callee to Caller.
- CG[Caller]->removeCallEdgeFor(CS);
- CS.getInstruction()->eraseFromParent();
- ++NumCallsDeleted;
- } else {
- // We can only inline direct calls to non-declarations.
- if (!Callee || Callee->isDeclaration())
- continue;
+ // We can only inline direct calls to non-declarations.
+ if (!Callee || Callee->isDeclaration())
+ continue;
+ Instruction *Instr = CS.getInstruction();
+
+ bool IsTriviallyDead = isInstructionTriviallyDead(Instr, &TLI);
+
+ int InlineHistoryID;
+ if (!IsTriviallyDead) {
// If this call site was obtained by inlining another function, verify
// that the include path for the function did not include the callee
// itself. If so, we'd be recursively inlining the same function,
// which would provide the same callsites, which would cause us to
// infinitely inline.
- int InlineHistoryID = CallSites[CSi].second;
+ InlineHistoryID = CallSites[CSi].second;
if (InlineHistoryID != -1 &&
InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory))
continue;
+ }
+ // FIXME for new PM: because of the old PM we currently generate ORE and
+ // in turn BFI on demand. With the new PM, the ORE dependency should
+ // just become a regular analysis dependency.
+ OptimizationRemarkEmitter ORE(Caller);
+
+ // If the policy determines that we should inline this function,
+ // delete the call instead.
+ if (!shouldInline(CS, GetInlineCost, ORE))
+ continue;
+
+ // If this call site is dead and it is to a readonly function, we should
+ // just delete the call instead of trying to inline it, regardless of
+ // size. This happens because IPSCCP propagates the result out of the
+ // call and then we're left with the dead call.
+ if (IsTriviallyDead) {
+ DEBUG(dbgs() << " -> Deleting dead call: " << *Instr << "\n");
+ // Update the call graph by deleting the edge from Callee to Caller.
+ CG[Caller]->removeCallEdgeFor(CS);
+ Instr->eraseFromParent();
+ ++NumCallsDeleted;
+ } else {
// Get DebugLoc to report. CS will be invalid after Inliner.
- DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
+ DebugLoc DLoc = Instr->getDebugLoc();
BasicBlock *Block = CS.getParent();
- // FIXME for new PM: because of the old PM we currently generate ORE and
- // in turn BFI on demand. With the new PM, the ORE dependency should
- // just become a regular analysis dependency.
- OptimizationRemarkEmitter ORE(Caller);
-
- // If the policy determines that we should inline this function,
- // try to do so.
- using namespace ore;
- if (!shouldInline(CS, GetInlineCost, ORE)) {
- ORE.emit(
- OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block)
- << NV("Callee", Callee) << " will not be inlined into "
- << NV("Caller", Caller));
- continue;
- }
// Attempt to inline the function.
+ using namespace ore;
if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas,
InlineHistoryID, InsertLifetime, AARGetter,
ImportedFunctionsStats)) {
@@ -638,22 +643,12 @@ bool LegacyInlinerBase::inlineCalls(CallGraphSCC &SCC) {
ACT = &getAnalysis<AssumptionCacheTracker>();
PSI = getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
- // We compute dedicated AA results for each function in the SCC as needed. We
- // use a lambda referencing external objects so that they live long enough to
- // be queried, but we re-use them each time.
- Optional<BasicAAResult> BAR;
- Optional<AAResults> AAR;
- auto AARGetter = [&](Function &F) -> AAResults & {
- BAR.emplace(createLegacyPMBasicAAResult(*this, F));
- AAR.emplace(createLegacyPMAAResults(*this, F, *BAR));
- return *AAR;
- };
auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
return ACT->getAssumptionCache(F);
};
return inlineCallsImpl(SCC, CG, GetAssumptionCache, PSI, TLI, InsertLifetime,
[this](CallSite CS) { return getInlineCost(CS); },
- AARGetter, ImportedFunctionsStats);
+ LegacyAARGetter(*this), ImportedFunctionsStats);
}
/// Remove now-dead linkonce functions at the end of
@@ -750,9 +745,6 @@ bool LegacyInlinerBase::removeDeadFunctions(CallGraph &CG,
PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
CGSCCAnalysisManager &AM, LazyCallGraph &CG,
CGSCCUpdateResult &UR) {
- FunctionAnalysisManager &FAM =
- AM.getResult<FunctionAnalysisManagerCGSCCProxy>(InitialC, CG)
- .getManager();
const ModuleAnalysisManager &MAM =
AM.getResult<ModuleAnalysisManagerCGSCCProxy>(InitialC, CG).getManager();
bool Changed = false;
@@ -761,35 +753,52 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
Module &M = *InitialC.begin()->getFunction().getParent();
ProfileSummaryInfo *PSI = MAM.getCachedResult<ProfileSummaryAnalysis>(M);
- std::function<AssumptionCache &(Function &)> GetAssumptionCache =
- [&](Function &F) -> AssumptionCache & {
- return FAM.getResult<AssumptionAnalysis>(F);
- };
-
- // Setup the data structure used to plumb customization into the
- // `InlineFunction` routine.
- InlineFunctionInfo IFI(/*cg=*/nullptr, &GetAssumptionCache);
+ // We use a single common worklist for calls across the entire SCC. We
+ // process these in-order and append new calls introduced during inlining to
+ // the end.
+ //
+ // Note that this particular order of processing is actually critical to
+ // avoid very bad behaviors. Consider *highly connected* call graphs where
+ // each function contains a small amonut of code and a couple of calls to
+ // other functions. Because the LLVM inliner is fundamentally a bottom-up
+ // inliner, it can handle gracefully the fact that these all appear to be
+ // reasonable inlining candidates as it will flatten things until they become
+ // too big to inline, and then move on and flatten another batch.
+ //
+ // However, when processing call edges *within* an SCC we cannot rely on this
+ // bottom-up behavior. As a consequence, with heavily connected *SCCs* of
+ // functions we can end up incrementally inlining N calls into each of
+ // N functions because each incremental inlining decision looks good and we
+ // don't have a topological ordering to prevent explosions.
+ //
+ // To compensate for this, we don't process transitive edges made immediate
+ // by inlining until we've done one pass of inlining across the entire SCC.
+ // Large, highly connected SCCs still lead to some amount of code bloat in
+ // this model, but it is uniformly spread across all the functions in the SCC
+ // and eventually they all become too large to inline, rather than
+ // incrementally maknig a single function grow in a super linear fashion.
+ SmallVector<std::pair<CallSite, int>, 16> Calls;
- auto GetInlineCost = [&](CallSite CS) {
- Function &Callee = *CS.getCalledFunction();
- auto &CalleeTTI = FAM.getResult<TargetIRAnalysis>(Callee);
- return getInlineCost(CS, Params, CalleeTTI, GetAssumptionCache, PSI);
- };
+ // Populate the initial list of calls in this SCC.
+ for (auto &N : InitialC) {
+ // We want to generally process call sites top-down in order for
+ // simplifications stemming from replacing the call with the returned value
+ // after inlining to be visible to subsequent inlining decisions.
+ // FIXME: Using instructions sequence is a really bad way to do this.
+ // Instead we should do an actual RPO walk of the function body.
+ for (Instruction &I : instructions(N.getFunction()))
+ if (auto CS = CallSite(&I))
+ if (Function *Callee = CS.getCalledFunction())
+ if (!Callee->isDeclaration())
+ Calls.push_back({CS, -1});
+ }
+ if (Calls.empty())
+ return PreservedAnalyses::all();
- // We use a worklist of nodes to process so that we can handle if the SCC
- // structure changes and some nodes are no longer part of the current SCC. We
- // also need to use an updatable pointer for the SCC as a consequence.
- SmallVector<LazyCallGraph::Node *, 16> Nodes;
- for (auto &N : InitialC)
- Nodes.push_back(&N);
+ // Capture updatable variables for the current SCC and RefSCC.
auto *C = &InitialC;
auto *RC = &C->getOuterRefSCC();
- // We also use a secondary worklist of call sites within a particular node to
- // allow quickly continuing to inline through newly inlined call sites where
- // possible.
- SmallVector<std::pair<CallSite, int>, 16> Calls;
-
// When inlining a callee produces new call sites, we want to keep track of
// the fact that they were inlined from the callee. This allows us to avoid
// infinite inlining in some obscure cases. To represent this, we use an
@@ -805,34 +814,58 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// defer deleting these to make it easier to handle the call graph updates.
SmallVector<Function *, 4> DeadFunctions;
- do {
- auto &N = *Nodes.pop_back_val();
+ // Loop forward over all of the calls. Note that we cannot cache the size as
+ // inlining can introduce new calls that need to be processed.
+ for (int i = 0; i < (int)Calls.size(); ++i) {
+ // We expect the calls to typically be batched with sequences of calls that
+ // have the same caller, so we first set up some shared infrastructure for
+ // this caller. We also do any pruning we can at this layer on the caller
+ // alone.
+ Function &F = *Calls[i].first.getCaller();
+ LazyCallGraph::Node &N = *CG.lookup(F);
if (CG.lookupSCC(N) != C)
continue;
- Function &F = N.getFunction();
if (F.hasFnAttribute(Attribute::OptimizeNone))
continue;
+ DEBUG(dbgs() << "Inlining calls in: " << F.getName() << "\n");
+
+ // Get a FunctionAnalysisManager via a proxy for this particular node. We
+ // do this each time we visit a node as the SCC may have changed and as
+ // we're going to mutate this particular function we want to make sure the
+ // proxy is in place to forward any invalidation events. We can use the
+ // manager we get here for looking up results for functions other than this
+ // node however because those functions aren't going to be mutated by this
+ // pass.
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerCGSCCProxy>(*C, CG)
+ .getManager();
+ std::function<AssumptionCache &(Function &)> GetAssumptionCache =
+ [&](Function &F) -> AssumptionCache & {
+ return FAM.getResult<AssumptionAnalysis>(F);
+ };
+ auto GetBFI = [&](Function &F) -> BlockFrequencyInfo & {
+ return FAM.getResult<BlockFrequencyAnalysis>(F);
+ };
+
+ auto GetInlineCost = [&](CallSite CS) {
+ Function &Callee = *CS.getCalledFunction();
+ auto &CalleeTTI = FAM.getResult<TargetIRAnalysis>(Callee);
+ return getInlineCost(CS, Params, CalleeTTI, GetAssumptionCache, {GetBFI},
+ PSI);
+ };
+
// Get the remarks emission analysis for the caller.
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
- // We want to generally process call sites top-down in order for
- // simplifications stemming from replacing the call with the returned value
- // after inlining to be visible to subsequent inlining decisions. So we
- // walk the function backwards and then process the back of the vector.
- // FIXME: Using reverse is a really bad way to do this. Instead we should
- // do an actual PO walk of the function body.
- for (Instruction &I : reverse(instructions(F)))
- if (auto CS = CallSite(&I))
- if (Function *Callee = CS.getCalledFunction())
- if (!Callee->isDeclaration())
- Calls.push_back({CS, -1});
-
+ // Now process as many calls as we have within this caller in the sequnece.
+ // We bail out as soon as the caller has to change so we can update the
+ // call graph and prepare the context of that new caller.
bool DidInline = false;
- while (!Calls.empty()) {
+ for (; i < (int)Calls.size() && Calls[i].first.getCaller() == &F; ++i) {
int InlineHistoryID;
CallSite CS;
- std::tie(CS, InlineHistoryID) = Calls.pop_back_val();
+ std::tie(CS, InlineHistoryID) = Calls[i];
Function &Callee = *CS.getCalledFunction();
if (InlineHistoryID != -1 &&
@@ -843,6 +876,13 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
if (!shouldInline(CS, GetInlineCost, ORE))
continue;
+ // Setup the data structure used to plumb customization into the
+ // `InlineFunction` routine.
+ InlineFunctionInfo IFI(
+ /*cg=*/nullptr, &GetAssumptionCache, PSI,
+ &FAM.getResult<BlockFrequencyAnalysis>(*(CS.getCaller())),
+ &FAM.getResult<BlockFrequencyAnalysis>(Callee));
+
if (!InlineFunction(CS, IFI))
continue;
DidInline = true;
@@ -869,7 +909,13 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// To check this we also need to nuke any dead constant uses (perhaps
// made dead by this operation on other functions).
Callee.removeDeadConstantUsers();
- if (Callee.use_empty()) {
+ if (Callee.use_empty() && !CG.isLibFunction(Callee)) {
+ Calls.erase(
+ std::remove_if(Calls.begin() + i + 1, Calls.end(),
+ [&Callee](const std::pair<CallSite, int> &Call) {
+ return Call.first.getCaller() == &Callee;
+ }),
+ Calls.end());
// Clear the body and queue the function itself for deletion when we
// finish inlining and call graph updates.
// Note that after this point, it is an error to do anything other
@@ -882,6 +928,10 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
}
}
+ // Back the call index up by one to put us in a good position to go around
+ // the outer loop.
+ --i;
+
if (!DidInline)
continue;
Changed = true;
@@ -896,8 +946,8 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// below.
for (Function *InlinedCallee : InlinedCallees) {
LazyCallGraph::Node &CalleeN = *CG.lookup(*InlinedCallee);
- for (LazyCallGraph::Edge &E : CalleeN)
- RC->insertTrivialRefEdge(N, *E.getNode());
+ for (LazyCallGraph::Edge &E : *CalleeN)
+ RC->insertTrivialRefEdge(N, E.getNode());
}
InlinedCallees.clear();
@@ -908,8 +958,9 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// re-use the exact same logic for updating the call graph to reflect the
// change..
C = &updateCGAndAnalysisManagerForFunctionPass(CG, *C, N, AM, UR);
+ DEBUG(dbgs() << "Updated inlining SCC: " << *C << "\n");
RC = &C->getOuterRefSCC();
- } while (!Nodes.empty());
+ }
// Now that we've finished inlining all of the calls across this SCC, delete
// all of the trivially dead functions, updating the call graph and the CGSCC
@@ -920,8 +971,13 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// sets.
for (Function *DeadF : DeadFunctions) {
// Get the necessary information out of the call graph and nuke the
- // function there.
+ // function there. Also, cclear out any cached analyses.
auto &DeadC = *CG.lookupSCC(*CG.lookup(*DeadF));
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerCGSCCProxy>(DeadC, CG)
+ .getManager();
+ FAM.clear(*DeadF);
+ AM.clear(DeadC);
auto &DeadRC = DeadC.getOuterRefSCC();
CG.removeDeadFunction(*DeadF);
@@ -933,5 +989,13 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// And delete the actual function from the module.
M.getFunctionList().erase(DeadF);
}
- return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
+
+ if (!Changed)
+ return PreservedAnalyses::all();
+
+ // Even if we change the IR, we update the core CGSCC data structures and so
+ // can preserve the proxy to the function analysis manager.
+ PreservedAnalyses PA;
+ PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
+ return PA;
}
diff --git a/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp
index f898c3b..c74b0a3 100644
--- a/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp
@@ -14,7 +14,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/IPO.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/IR/Dominators.h"
@@ -22,6 +21,7 @@
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/CodeExtractor.h"
diff --git a/contrib/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/contrib/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
index deb7e81..693df5e 100644
--- a/contrib/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/TypeMetadataUtils.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
@@ -42,8 +43,6 @@
using namespace llvm;
using namespace lowertypetests;
-using SummaryAction = LowerTypeTestsSummaryAction;
-
#define DEBUG_TYPE "lowertypetests"
STATISTIC(ByteArraySizeBits, "Byte array size in bits");
@@ -57,13 +56,13 @@ static cl::opt<bool> AvoidReuse(
cl::desc("Try to avoid reuse of byte array addresses using aliases"),
cl::Hidden, cl::init(true));
-static cl::opt<SummaryAction> ClSummaryAction(
+static cl::opt<PassSummaryAction> ClSummaryAction(
"lowertypetests-summary-action",
cl::desc("What to do with the summary when running this pass"),
- cl::values(clEnumValN(SummaryAction::None, "none", "Do nothing"),
- clEnumValN(SummaryAction::Import, "import",
+ cl::values(clEnumValN(PassSummaryAction::None, "none", "Do nothing"),
+ clEnumValN(PassSummaryAction::Import, "import",
"Import typeid resolutions from summary and globals"),
- clEnumValN(SummaryAction::Export, "export",
+ clEnumValN(PassSummaryAction::Export, "export",
"Export typeid resolutions to summary and globals")),
cl::Hidden);
@@ -208,17 +207,26 @@ struct ByteArrayInfo {
class GlobalTypeMember final : TrailingObjects<GlobalTypeMember, MDNode *> {
GlobalObject *GO;
size_t NTypes;
+ // For functions: true if this is a definition (either in the merged module or
+ // in one of the thinlto modules).
+ bool IsDefinition;
+ // For functions: true if this function is either defined or used in a thinlto
+ // module and its jumptable entry needs to be exported to thinlto backends.
+ bool IsExported;
friend TrailingObjects;
size_t numTrailingObjects(OverloadToken<MDNode *>) const { return NTypes; }
public:
static GlobalTypeMember *create(BumpPtrAllocator &Alloc, GlobalObject *GO,
+ bool IsDefinition, bool IsExported,
ArrayRef<MDNode *> Types) {
auto *GTM = static_cast<GlobalTypeMember *>(Alloc.Allocate(
totalSizeToAlloc<MDNode *>(Types.size()), alignof(GlobalTypeMember)));
GTM->GO = GO;
GTM->NTypes = Types.size();
+ GTM->IsDefinition = IsDefinition;
+ GTM->IsExported = IsExported;
std::uninitialized_copy(Types.begin(), Types.end(),
GTM->getTrailingObjects<MDNode *>());
return GTM;
@@ -226,6 +234,12 @@ public:
GlobalObject *getGlobal() const {
return GO;
}
+ bool isDefinition() const {
+ return IsDefinition;
+ }
+ bool isExported() const {
+ return IsExported;
+ }
ArrayRef<MDNode *> types() const {
return makeArrayRef(getTrailingObjects<MDNode *>(), NTypes);
}
@@ -234,10 +248,9 @@ public:
class LowerTypeTestsModule {
Module &M;
- SummaryAction Action;
- ModuleSummaryIndex *Summary;
+ ModuleSummaryIndex *ExportSummary;
+ const ModuleSummaryIndex *ImportSummary;
- bool LinkerSubsectionsViaSymbols;
Triple::ArchType Arch;
Triple::OSType OS;
Triple::ObjectFormatType ObjectFormat;
@@ -253,15 +266,21 @@ class LowerTypeTestsModule {
// Indirect function call index assignment counter for WebAssembly
uint64_t IndirectIndex = 1;
- // Mapping from type identifiers to the call sites that test them.
- DenseMap<Metadata *, std::vector<CallInst *>> TypeTestCallSites;
+ // Mapping from type identifiers to the call sites that test them, as well as
+ // whether the type identifier needs to be exported to ThinLTO backends as
+ // part of the regular LTO phase of the ThinLTO pipeline (see exportTypeId).
+ struct TypeIdUserInfo {
+ std::vector<CallInst *> CallSites;
+ bool IsExported = false;
+ };
+ DenseMap<Metadata *, TypeIdUserInfo> TypeIdUsers;
/// This structure describes how to lower type tests for a particular type
/// identifier. It is either built directly from the global analysis (during
/// regular LTO or the regular LTO phase of ThinLTO), or indirectly using type
/// identifier summaries and external symbol references (in ThinLTO backends).
struct TypeIdLowering {
- TypeTestResolution::Kind TheKind;
+ TypeTestResolution::Kind TheKind = TypeTestResolution::Unsat;
/// All except Unsat: the start address within the combined global.
Constant *OffsetedGlobal;
@@ -274,9 +293,6 @@ class LowerTypeTestsModule {
/// covering members of this type identifier as a multiple of 2^AlignLog2.
Constant *SizeM1;
- /// ByteArray, Inline, AllOnes: range of SizeM1 expressed as a bit width.
- unsigned SizeM1BitWidth;
-
/// ByteArray: the byte array to test the address against.
Constant *TheByteArray;
@@ -291,6 +307,11 @@ class LowerTypeTestsModule {
Function *WeakInitializerFn = nullptr;
+ void exportTypeId(StringRef TypeId, const TypeIdLowering &TIL);
+ TypeIdLowering importTypeId(StringRef TypeId);
+ void importTypeTest(CallInst *CI);
+ void importFunction(Function *F, bool isDefinition);
+
BitSetInfo
buildBitSet(Metadata *TypeId,
const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout);
@@ -327,8 +348,8 @@ class LowerTypeTestsModule {
void createJumpTable(Function *F, ArrayRef<GlobalTypeMember *> Functions);
public:
- LowerTypeTestsModule(Module &M, SummaryAction Action,
- ModuleSummaryIndex *Summary);
+ LowerTypeTestsModule(Module &M, ModuleSummaryIndex *ExportSummary,
+ const ModuleSummaryIndex *ImportSummary);
bool lower();
// Lower the module using the action and summary passed as command line
@@ -341,15 +362,17 @@ struct LowerTypeTests : public ModulePass {
bool UseCommandLine = false;
- SummaryAction Action;
- ModuleSummaryIndex *Summary;
+ ModuleSummaryIndex *ExportSummary;
+ const ModuleSummaryIndex *ImportSummary;
LowerTypeTests() : ModulePass(ID), UseCommandLine(true) {
initializeLowerTypeTestsPass(*PassRegistry::getPassRegistry());
}
- LowerTypeTests(SummaryAction Action, ModuleSummaryIndex *Summary)
- : ModulePass(ID), Action(Action), Summary(Summary) {
+ LowerTypeTests(ModuleSummaryIndex *ExportSummary,
+ const ModuleSummaryIndex *ImportSummary)
+ : ModulePass(ID), ExportSummary(ExportSummary),
+ ImportSummary(ImportSummary) {
initializeLowerTypeTestsPass(*PassRegistry::getPassRegistry());
}
@@ -358,7 +381,7 @@ struct LowerTypeTests : public ModulePass {
return false;
if (UseCommandLine)
return LowerTypeTestsModule::runForTesting(M);
- return LowerTypeTestsModule(M, Action, Summary).lower();
+ return LowerTypeTestsModule(M, ExportSummary, ImportSummary).lower();
}
};
@@ -368,9 +391,10 @@ INITIALIZE_PASS(LowerTypeTests, "lowertypetests", "Lower type metadata", false,
false)
char LowerTypeTests::ID = 0;
-ModulePass *llvm::createLowerTypeTestsPass(SummaryAction Action,
- ModuleSummaryIndex *Summary) {
- return new LowerTypeTests(Action, Summary);
+ModulePass *
+llvm::createLowerTypeTestsPass(ModuleSummaryIndex *ExportSummary,
+ const ModuleSummaryIndex *ImportSummary) {
+ return new LowerTypeTests(ExportSummary, ImportSummary);
}
/// Build a bit set for TypeId using the object layouts in
@@ -467,13 +491,9 @@ void LowerTypeTestsModule::allocateByteArrays() {
// Create an alias instead of RAUW'ing the gep directly. On x86 this ensures
// that the pc-relative displacement is folded into the lea instead of the
// test instruction getting another displacement.
- if (LinkerSubsectionsViaSymbols) {
- BAI->ByteArray->replaceAllUsesWith(GEP);
- } else {
- GlobalAlias *Alias = GlobalAlias::create(
- Int8Ty, 0, GlobalValue::PrivateLinkage, "bits", GEP, &M);
- BAI->ByteArray->replaceAllUsesWith(Alias);
- }
+ GlobalAlias *Alias = GlobalAlias::create(
+ Int8Ty, 0, GlobalValue::PrivateLinkage, "bits", GEP, &M);
+ BAI->ByteArray->replaceAllUsesWith(Alias);
BAI->ByteArray->eraseFromParent();
}
@@ -494,10 +514,11 @@ Value *LowerTypeTestsModule::createBitSetTest(IRBuilder<> &B,
return createMaskedBitTest(B, TIL.InlineBits, BitOffset);
} else {
Constant *ByteArray = TIL.TheByteArray;
- if (!LinkerSubsectionsViaSymbols && AvoidReuse) {
+ if (AvoidReuse && !ImportSummary) {
// Each use of the byte array uses a different alias. This makes the
// backend less likely to reuse previously computed byte array addresses,
// improving the security of the CFI mechanism based on this pass.
+ // This won't work when importing because TheByteArray is external.
ByteArray = GlobalAlias::create(Int8Ty, 0, GlobalValue::PrivateLinkage,
"bits_use", ByteArray, &M);
}
@@ -593,15 +614,31 @@ Value *LowerTypeTestsModule::lowerTypeTestCall(Metadata *TypeId, CallInst *CI,
IntPtrTy));
Value *BitOffset = B.CreateOr(OffsetSHR, OffsetSHL);
- Constant *BitSizeConst = ConstantExpr::getZExt(TIL.SizeM1, IntPtrTy);
- Value *OffsetInRange = B.CreateICmpULE(BitOffset, BitSizeConst);
+ Value *OffsetInRange = B.CreateICmpULE(BitOffset, TIL.SizeM1);
// If the bit set is all ones, testing against it is unnecessary.
if (TIL.TheKind == TypeTestResolution::AllOnes)
return OffsetInRange;
- TerminatorInst *Term = SplitBlockAndInsertIfThen(OffsetInRange, CI, false);
- IRBuilder<> ThenB(Term);
+ // See if the intrinsic is used in the following common pattern:
+ // br(llvm.type.test(...), thenbb, elsebb)
+ // where nothing happens between the type test and the br.
+ // If so, create slightly simpler IR.
+ if (CI->hasOneUse())
+ if (auto *Br = dyn_cast<BranchInst>(*CI->user_begin()))
+ if (CI->getNextNode() == Br) {
+ BasicBlock *Then = InitialBB->splitBasicBlock(CI->getIterator());
+ BasicBlock *Else = Br->getSuccessor(1);
+ BranchInst *NewBr = BranchInst::Create(Then, Else, OffsetInRange);
+ NewBr->setMetadata(LLVMContext::MD_prof,
+ Br->getMetadata(LLVMContext::MD_prof));
+ ReplaceInstWithInst(InitialBB->getTerminator(), NewBr);
+
+ IRBuilder<> ThenB(CI);
+ return createBitSetTest(ThenB, TIL, BitOffset);
+ }
+
+ IRBuilder<> ThenB(SplitBlockAndInsertIfThen(OffsetInRange, CI, false));
// Now that we know that the offset is in range and aligned, load the
// appropriate bit from the bitset.
@@ -672,21 +709,174 @@ void LowerTypeTestsModule::buildBitSetsFromGlobalVariables(
ConstantInt::get(Int32Ty, I * 2)};
Constant *CombinedGlobalElemPtr = ConstantExpr::getGetElementPtr(
NewInit->getType(), CombinedGlobal, CombinedGlobalIdxs);
- if (LinkerSubsectionsViaSymbols) {
- GV->replaceAllUsesWith(CombinedGlobalElemPtr);
- } else {
- assert(GV->getType()->getAddressSpace() == 0);
- GlobalAlias *GAlias = GlobalAlias::create(NewTy->getElementType(I * 2), 0,
- GV->getLinkage(), "",
- CombinedGlobalElemPtr, &M);
- GAlias->setVisibility(GV->getVisibility());
- GAlias->takeName(GV);
- GV->replaceAllUsesWith(GAlias);
- }
+ assert(GV->getType()->getAddressSpace() == 0);
+ GlobalAlias *GAlias =
+ GlobalAlias::create(NewTy->getElementType(I * 2), 0, GV->getLinkage(),
+ "", CombinedGlobalElemPtr, &M);
+ GAlias->setVisibility(GV->getVisibility());
+ GAlias->takeName(GV);
+ GV->replaceAllUsesWith(GAlias);
GV->eraseFromParent();
}
}
+/// Export the given type identifier so that ThinLTO backends may import it.
+/// Type identifiers are exported by adding coarse-grained information about how
+/// to test the type identifier to the summary, and creating symbols in the
+/// object file (aliases and absolute symbols) containing fine-grained
+/// information about the type identifier.
+void LowerTypeTestsModule::exportTypeId(StringRef TypeId,
+ const TypeIdLowering &TIL) {
+ TypeTestResolution &TTRes =
+ ExportSummary->getOrInsertTypeIdSummary(TypeId).TTRes;
+ TTRes.TheKind = TIL.TheKind;
+
+ auto ExportGlobal = [&](StringRef Name, Constant *C) {
+ GlobalAlias *GA =
+ GlobalAlias::create(Int8Ty, 0, GlobalValue::ExternalLinkage,
+ "__typeid_" + TypeId + "_" + Name, C, &M);
+ GA->setVisibility(GlobalValue::HiddenVisibility);
+ };
+
+ if (TIL.TheKind != TypeTestResolution::Unsat)
+ ExportGlobal("global_addr", TIL.OffsetedGlobal);
+
+ if (TIL.TheKind == TypeTestResolution::ByteArray ||
+ TIL.TheKind == TypeTestResolution::Inline ||
+ TIL.TheKind == TypeTestResolution::AllOnes) {
+ ExportGlobal("align", ConstantExpr::getIntToPtr(TIL.AlignLog2, Int8PtrTy));
+ ExportGlobal("size_m1", ConstantExpr::getIntToPtr(TIL.SizeM1, Int8PtrTy));
+
+ uint64_t BitSize = cast<ConstantInt>(TIL.SizeM1)->getZExtValue() + 1;
+ if (TIL.TheKind == TypeTestResolution::Inline)
+ TTRes.SizeM1BitWidth = (BitSize <= 32) ? 5 : 6;
+ else
+ TTRes.SizeM1BitWidth = (BitSize <= 128) ? 7 : 32;
+ }
+
+ if (TIL.TheKind == TypeTestResolution::ByteArray) {
+ ExportGlobal("byte_array", TIL.TheByteArray);
+ ExportGlobal("bit_mask", TIL.BitMask);
+ }
+
+ if (TIL.TheKind == TypeTestResolution::Inline)
+ ExportGlobal("inline_bits",
+ ConstantExpr::getIntToPtr(TIL.InlineBits, Int8PtrTy));
+}
+
+LowerTypeTestsModule::TypeIdLowering
+LowerTypeTestsModule::importTypeId(StringRef TypeId) {
+ const TypeIdSummary *TidSummary = ImportSummary->getTypeIdSummary(TypeId);
+ if (!TidSummary)
+ return {}; // Unsat: no globals match this type id.
+ const TypeTestResolution &TTRes = TidSummary->TTRes;
+
+ TypeIdLowering TIL;
+ TIL.TheKind = TTRes.TheKind;
+
+ auto ImportGlobal = [&](StringRef Name, unsigned AbsWidth) {
+ Constant *C =
+ M.getOrInsertGlobal(("__typeid_" + TypeId + "_" + Name).str(), Int8Ty);
+ auto *GV = dyn_cast<GlobalVariable>(C);
+ // We only need to set metadata if the global is newly created, in which
+ // case it would not have hidden visibility.
+ if (!GV || GV->getVisibility() == GlobalValue::HiddenVisibility)
+ return C;
+
+ GV->setVisibility(GlobalValue::HiddenVisibility);
+ auto SetAbsRange = [&](uint64_t Min, uint64_t Max) {
+ auto *MinC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Min));
+ auto *MaxC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Max));
+ GV->setMetadata(LLVMContext::MD_absolute_symbol,
+ MDNode::get(M.getContext(), {MinC, MaxC}));
+ };
+ if (AbsWidth == IntPtrTy->getBitWidth())
+ SetAbsRange(~0ull, ~0ull); // Full set.
+ else if (AbsWidth)
+ SetAbsRange(0, 1ull << AbsWidth);
+ return C;
+ };
+
+ if (TIL.TheKind != TypeTestResolution::Unsat)
+ TIL.OffsetedGlobal = ImportGlobal("global_addr", 0);
+
+ if (TIL.TheKind == TypeTestResolution::ByteArray ||
+ TIL.TheKind == TypeTestResolution::Inline ||
+ TIL.TheKind == TypeTestResolution::AllOnes) {
+ TIL.AlignLog2 = ConstantExpr::getPtrToInt(ImportGlobal("align", 8), Int8Ty);
+ TIL.SizeM1 = ConstantExpr::getPtrToInt(
+ ImportGlobal("size_m1", TTRes.SizeM1BitWidth), IntPtrTy);
+ }
+
+ if (TIL.TheKind == TypeTestResolution::ByteArray) {
+ TIL.TheByteArray = ImportGlobal("byte_array", 0);
+ TIL.BitMask = ImportGlobal("bit_mask", 8);
+ }
+
+ if (TIL.TheKind == TypeTestResolution::Inline)
+ TIL.InlineBits = ConstantExpr::getPtrToInt(
+ ImportGlobal("inline_bits", 1 << TTRes.SizeM1BitWidth),
+ TTRes.SizeM1BitWidth <= 5 ? Int32Ty : Int64Ty);
+
+ return TIL;
+}
+
+void LowerTypeTestsModule::importTypeTest(CallInst *CI) {
+ auto TypeIdMDVal = dyn_cast<MetadataAsValue>(CI->getArgOperand(1));
+ if (!TypeIdMDVal)
+ report_fatal_error("Second argument of llvm.type.test must be metadata");
+
+ auto TypeIdStr = dyn_cast<MDString>(TypeIdMDVal->getMetadata());
+ if (!TypeIdStr)
+ report_fatal_error(
+ "Second argument of llvm.type.test must be a metadata string");
+
+ TypeIdLowering TIL = importTypeId(TypeIdStr->getString());
+ Value *Lowered = lowerTypeTestCall(TypeIdStr, CI, TIL);
+ CI->replaceAllUsesWith(Lowered);
+ CI->eraseFromParent();
+}
+
+// ThinLTO backend: the function F has a jump table entry; update this module
+// accordingly. isDefinition describes the type of the jump table entry.
+void LowerTypeTestsModule::importFunction(Function *F, bool isDefinition) {
+ assert(F->getType()->getAddressSpace() == 0);
+
+ // Declaration of a local function - nothing to do.
+ if (F->isDeclarationForLinker() && isDefinition)
+ return;
+
+ GlobalValue::VisibilityTypes Visibility = F->getVisibility();
+ std::string Name = F->getName();
+ Function *FDecl;
+
+ if (F->isDeclarationForLinker() && !isDefinition) {
+ // Declaration of an external function.
+ FDecl = Function::Create(F->getFunctionType(), GlobalValue::ExternalLinkage,
+ Name + ".cfi_jt", &M);
+ FDecl->setVisibility(GlobalValue::HiddenVisibility);
+ } else if (isDefinition) {
+ F->setName(Name + ".cfi");
+ F->setLinkage(GlobalValue::ExternalLinkage);
+ F->setVisibility(GlobalValue::HiddenVisibility);
+ FDecl = Function::Create(F->getFunctionType(), GlobalValue::ExternalLinkage,
+ Name, &M);
+ FDecl->setVisibility(Visibility);
+ } else {
+ // Function definition without type metadata, where some other translation
+ // unit contained a declaration with type metadata. This normally happens
+ // during mixed CFI + non-CFI compilation. We do nothing with the function
+ // so that it is treated the same way as a function defined outside of the
+ // LTO unit.
+ return;
+ }
+
+ if (F->isWeakForLinker())
+ replaceWeakDeclarationWithJumpTablePtr(F, FDecl);
+ else
+ F->replaceAllUsesWith(FDecl);
+}
+
void LowerTypeTestsModule::lowerTypeTestCalls(
ArrayRef<Metadata *> TypeIds, Constant *CombinedGlobalAddr,
const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout) {
@@ -708,16 +898,12 @@ void LowerTypeTestsModule::lowerTypeTestCalls(
TIL.OffsetedGlobal = ConstantExpr::getGetElementPtr(
Int8Ty, CombinedGlobalAddr, ConstantInt::get(IntPtrTy, BSI.ByteOffset)),
TIL.AlignLog2 = ConstantInt::get(Int8Ty, BSI.AlignLog2);
+ TIL.SizeM1 = ConstantInt::get(IntPtrTy, BSI.BitSize - 1);
if (BSI.isAllOnes()) {
TIL.TheKind = (BSI.BitSize == 1) ? TypeTestResolution::Single
: TypeTestResolution::AllOnes;
- TIL.SizeM1BitWidth = (BSI.BitSize <= 128) ? 7 : 32;
- TIL.SizeM1 = ConstantInt::get((BSI.BitSize <= 128) ? Int8Ty : Int32Ty,
- BSI.BitSize - 1);
} else if (BSI.BitSize <= 64) {
TIL.TheKind = TypeTestResolution::Inline;
- TIL.SizeM1BitWidth = (BSI.BitSize <= 32) ? 5 : 6;
- TIL.SizeM1 = ConstantInt::get(Int8Ty, BSI.BitSize - 1);
uint64_t InlineBits = 0;
for (auto Bit : BSI.Bits)
InlineBits |= uint64_t(1) << Bit;
@@ -728,17 +914,19 @@ void LowerTypeTestsModule::lowerTypeTestCalls(
(BSI.BitSize <= 32) ? Int32Ty : Int64Ty, InlineBits);
} else {
TIL.TheKind = TypeTestResolution::ByteArray;
- TIL.SizeM1BitWidth = (BSI.BitSize <= 128) ? 7 : 32;
- TIL.SizeM1 = ConstantInt::get((BSI.BitSize <= 128) ? Int8Ty : Int32Ty,
- BSI.BitSize - 1);
++NumByteArraysCreated;
ByteArrayInfo *BAI = createByteArray(BSI);
TIL.TheByteArray = BAI->ByteArray;
TIL.BitMask = BAI->MaskGlobal;
}
+ TypeIdUserInfo &TIUI = TypeIdUsers[TypeId];
+
+ if (TIUI.IsExported)
+ exportTypeId(cast<MDString>(TypeId)->getString(), TIL);
+
// Lower each call to llvm.type.test for this type identifier.
- for (CallInst *CI : TypeTestCallSites[TypeId]) {
+ for (CallInst *CI : TIUI.CallSites) {
++NumTypeTestCallsLowered;
Value *Lowered = lowerTypeTestCall(TypeId, CI, TIL);
CI->replaceAllUsesWith(Lowered);
@@ -757,9 +945,9 @@ void LowerTypeTestsModule::verifyTypeMDNode(GlobalObject *GO, MDNode *Type) {
report_fatal_error(
"A member of a type identifier may not have an explicit section");
- if (isa<GlobalVariable>(GO) && GO->isDeclarationForLinker())
- report_fatal_error(
- "A global var member of a type identifier must be a definition");
+ // FIXME: We previously checked that global var member of a type identifier
+ // must be a definition, but the IR linker may leave type metadata on
+ // declarations. We should restore this check after fixing PR31759.
auto OffsetConstMD = dyn_cast<ConstantAsMetadata>(Type->getOperand(0));
if (!OffsetConstMD)
@@ -1012,7 +1200,6 @@ void LowerTypeTestsModule::buildBitSetsFromFunctionsNative(
// arithmetic that we normally use for globals.
// FIXME: find a better way to represent the jumptable in the IR.
-
assert(!Functions.empty());
// Build a simple layout based on the regular layout of jump tables.
@@ -1036,6 +1223,7 @@ void LowerTypeTestsModule::buildBitSetsFromFunctionsNative(
// references to the original functions with references to the aliases.
for (unsigned I = 0; I != Functions.size(); ++I) {
Function *F = cast<Function>(Functions[I]->getGlobal());
+ bool IsDefinition = Functions[I]->isDefinition();
Constant *CombinedGlobalElemPtr = ConstantExpr::getBitCast(
ConstantExpr::getInBoundsGetElementPtr(
@@ -1043,8 +1231,18 @@ void LowerTypeTestsModule::buildBitSetsFromFunctionsNative(
ArrayRef<Constant *>{ConstantInt::get(IntPtrTy, 0),
ConstantInt::get(IntPtrTy, I)}),
F->getType());
- if (LinkerSubsectionsViaSymbols || F->isDeclarationForLinker()) {
-
+ if (Functions[I]->isExported()) {
+ if (IsDefinition) {
+ ExportSummary->cfiFunctionDefs().insert(F->getName());
+ } else {
+ GlobalAlias *JtAlias = GlobalAlias::create(
+ F->getValueType(), 0, GlobalValue::ExternalLinkage,
+ F->getName() + ".cfi_jt", CombinedGlobalElemPtr, &M);
+ JtAlias->setVisibility(GlobalValue::HiddenVisibility);
+ ExportSummary->cfiFunctionDecls().insert(F->getName());
+ }
+ }
+ if (!IsDefinition) {
if (F->isWeakForLinker())
replaceWeakDeclarationWithJumpTablePtr(F, CombinedGlobalElemPtr);
else
@@ -1052,9 +1250,8 @@ void LowerTypeTestsModule::buildBitSetsFromFunctionsNative(
} else {
assert(F->getType()->getAddressSpace() == 0);
- GlobalAlias *FAlias = GlobalAlias::create(F->getValueType(), 0,
- F->getLinkage(), "",
- CombinedGlobalElemPtr, &M);
+ GlobalAlias *FAlias = GlobalAlias::create(
+ F->getValueType(), 0, F->getLinkage(), "", CombinedGlobalElemPtr, &M);
FAlias->setVisibility(F->getVisibility());
FAlias->takeName(F);
if (FAlias->hasName())
@@ -1173,15 +1370,12 @@ void LowerTypeTestsModule::buildBitSetsFromDisjointSet(
}
/// Lower all type tests in this module.
-LowerTypeTestsModule::LowerTypeTestsModule(Module &M, SummaryAction Action,
- ModuleSummaryIndex *Summary)
- : M(M), Action(Action), Summary(Summary) {
- // FIXME: Use these fields.
- (void)this->Action;
- (void)this->Summary;
-
+LowerTypeTestsModule::LowerTypeTestsModule(
+ Module &M, ModuleSummaryIndex *ExportSummary,
+ const ModuleSummaryIndex *ImportSummary)
+ : M(M), ExportSummary(ExportSummary), ImportSummary(ImportSummary) {
+ assert(!(ExportSummary && ImportSummary));
Triple TargetTriple(M.getTargetTriple());
- LinkerSubsectionsViaSymbols = TargetTriple.isMacOSX();
Arch = TargetTriple.getArch();
OS = TargetTriple.getOS();
ObjectFormat = TargetTriple.getObjectFormat();
@@ -1203,7 +1397,11 @@ bool LowerTypeTestsModule::runForTesting(Module &M) {
ExitOnErr(errorCodeToError(In.error()));
}
- bool Changed = LowerTypeTestsModule(M, ClSummaryAction, &Summary).lower();
+ bool Changed =
+ LowerTypeTestsModule(
+ M, ClSummaryAction == PassSummaryAction::Export ? &Summary : nullptr,
+ ClSummaryAction == PassSummaryAction::Import ? &Summary : nullptr)
+ .lower();
if (!ClWriteSummary.empty()) {
ExitOnError ExitOnErr("-lowertypetests-write-summary: " + ClWriteSummary +
@@ -1222,9 +1420,40 @@ bool LowerTypeTestsModule::runForTesting(Module &M) {
bool LowerTypeTestsModule::lower() {
Function *TypeTestFunc =
M.getFunction(Intrinsic::getName(Intrinsic::type_test));
- if (!TypeTestFunc || TypeTestFunc->use_empty())
+ if ((!TypeTestFunc || TypeTestFunc->use_empty()) && !ExportSummary &&
+ !ImportSummary)
return false;
+ if (ImportSummary) {
+ if (TypeTestFunc) {
+ for (auto UI = TypeTestFunc->use_begin(), UE = TypeTestFunc->use_end();
+ UI != UE;) {
+ auto *CI = cast<CallInst>((*UI++).getUser());
+ importTypeTest(CI);
+ }
+ }
+
+ SmallVector<Function *, 8> Defs;
+ SmallVector<Function *, 8> Decls;
+ for (auto &F : M) {
+ // CFI functions are either external, or promoted. A local function may
+ // have the same name, but it's not the one we are looking for.
+ if (F.hasLocalLinkage())
+ continue;
+ if (ImportSummary->cfiFunctionDefs().count(F.getName()))
+ Defs.push_back(&F);
+ else if (ImportSummary->cfiFunctionDecls().count(F.getName()))
+ Decls.push_back(&F);
+ }
+
+ for (auto F : Defs)
+ importFunction(F, /*isDefinition*/ true);
+ for (auto F : Decls)
+ importFunction(F, /*isDefinition*/ false);
+
+ return true;
+ }
+
// Equivalence class set containing type identifiers and the globals that
// reference them. This is used to partition the set of type identifiers in
// the module into disjoint sets.
@@ -1247,13 +1476,76 @@ bool LowerTypeTestsModule::lower() {
llvm::DenseMap<Metadata *, TIInfo> TypeIdInfo;
unsigned I = 0;
SmallVector<MDNode *, 2> Types;
+
+ struct ExportedFunctionInfo {
+ CfiFunctionLinkage Linkage;
+ MDNode *FuncMD; // {name, linkage, type[, type...]}
+ };
+ DenseMap<StringRef, ExportedFunctionInfo> ExportedFunctions;
+ if (ExportSummary) {
+ NamedMDNode *CfiFunctionsMD = M.getNamedMetadata("cfi.functions");
+ if (CfiFunctionsMD) {
+ for (auto FuncMD : CfiFunctionsMD->operands()) {
+ assert(FuncMD->getNumOperands() >= 2);
+ StringRef FunctionName =
+ cast<MDString>(FuncMD->getOperand(0))->getString();
+ if (!ExportSummary->isGUIDLive(GlobalValue::getGUID(
+ GlobalValue::dropLLVMManglingEscape(FunctionName))))
+ continue;
+ CfiFunctionLinkage Linkage = static_cast<CfiFunctionLinkage>(
+ cast<ConstantAsMetadata>(FuncMD->getOperand(1))
+ ->getValue()
+ ->getUniqueInteger()
+ .getZExtValue());
+ auto P = ExportedFunctions.insert({FunctionName, {Linkage, FuncMD}});
+ if (!P.second && P.first->second.Linkage != CFL_Definition)
+ P.first->second = {Linkage, FuncMD};
+ }
+
+ for (const auto &P : ExportedFunctions) {
+ StringRef FunctionName = P.first;
+ CfiFunctionLinkage Linkage = P.second.Linkage;
+ MDNode *FuncMD = P.second.FuncMD;
+ Function *F = M.getFunction(FunctionName);
+ if (!F)
+ F = Function::Create(
+ FunctionType::get(Type::getVoidTy(M.getContext()), false),
+ GlobalVariable::ExternalLinkage, FunctionName, &M);
+
+ if (Linkage == CFL_Definition)
+ F->eraseMetadata(LLVMContext::MD_type);
+
+ if (F->isDeclaration()) {
+ if (Linkage == CFL_WeakDeclaration)
+ F->setLinkage(GlobalValue::ExternalWeakLinkage);
+
+ SmallVector<MDNode *, 2> Types;
+ for (unsigned I = 2; I < FuncMD->getNumOperands(); ++I)
+ F->addMetadata(LLVMContext::MD_type,
+ *cast<MDNode>(FuncMD->getOperand(I).get()));
+ }
+ }
+ }
+ }
+
for (GlobalObject &GO : M.global_objects()) {
+ if (isa<GlobalVariable>(GO) && GO.isDeclarationForLinker())
+ continue;
+
Types.clear();
GO.getMetadata(LLVMContext::MD_type, Types);
if (Types.empty())
continue;
- auto *GTM = GlobalTypeMember::create(Alloc, &GO, Types);
+ bool IsDefinition = !GO.isDeclarationForLinker();
+ bool IsExported = false;
+ if (isa<Function>(GO) && ExportedFunctions.count(GO.getName())) {
+ IsDefinition |= ExportedFunctions[GO.getName()].Linkage == CFL_Definition;
+ IsExported = true;
+ }
+
+ auto *GTM =
+ GlobalTypeMember::create(Alloc, &GO, IsDefinition, IsExported, Types);
for (MDNode *Type : Types) {
verifyTypeMDNode(&GO, Type);
auto &Info = TypeIdInfo[cast<MDNode>(Type)->getOperand(1)];
@@ -1262,33 +1554,56 @@ bool LowerTypeTestsModule::lower() {
}
}
- for (const Use &U : TypeTestFunc->uses()) {
- auto CI = cast<CallInst>(U.getUser());
+ auto AddTypeIdUse = [&](Metadata *TypeId) -> TypeIdUserInfo & {
+ // Add the call site to the list of call sites for this type identifier. We
+ // also use TypeIdUsers to keep track of whether we have seen this type
+ // identifier before. If we have, we don't need to re-add the referenced
+ // globals to the equivalence class.
+ auto Ins = TypeIdUsers.insert({TypeId, {}});
+ if (Ins.second) {
+ // Add the type identifier to the equivalence class.
+ GlobalClassesTy::iterator GCI = GlobalClasses.insert(TypeId);
+ GlobalClassesTy::member_iterator CurSet = GlobalClasses.findLeader(GCI);
+
+ // Add the referenced globals to the type identifier's equivalence class.
+ for (GlobalTypeMember *GTM : TypeIdInfo[TypeId].RefGlobals)
+ CurSet = GlobalClasses.unionSets(
+ CurSet, GlobalClasses.findLeader(GlobalClasses.insert(GTM)));
+ }
+
+ return Ins.first->second;
+ };
- auto BitSetMDVal = dyn_cast<MetadataAsValue>(CI->getArgOperand(1));
- if (!BitSetMDVal)
- report_fatal_error("Second argument of llvm.type.test must be metadata");
- auto BitSet = BitSetMDVal->getMetadata();
+ if (TypeTestFunc) {
+ for (const Use &U : TypeTestFunc->uses()) {
+ auto CI = cast<CallInst>(U.getUser());
- // Add the call site to the list of call sites for this type identifier. We
- // also use TypeTestCallSites to keep track of whether we have seen this
- // type identifier before. If we have, we don't need to re-add the
- // referenced globals to the equivalence class.
- std::pair<DenseMap<Metadata *, std::vector<CallInst *>>::iterator, bool>
- Ins = TypeTestCallSites.insert(
- std::make_pair(BitSet, std::vector<CallInst *>()));
- Ins.first->second.push_back(CI);
- if (!Ins.second)
- continue;
+ auto TypeIdMDVal = dyn_cast<MetadataAsValue>(CI->getArgOperand(1));
+ if (!TypeIdMDVal)
+ report_fatal_error("Second argument of llvm.type.test must be metadata");
+ auto TypeId = TypeIdMDVal->getMetadata();
+ AddTypeIdUse(TypeId).CallSites.push_back(CI);
+ }
+ }
- // Add the type identifier to the equivalence class.
- GlobalClassesTy::iterator GCI = GlobalClasses.insert(BitSet);
- GlobalClassesTy::member_iterator CurSet = GlobalClasses.findLeader(GCI);
+ if (ExportSummary) {
+ DenseMap<GlobalValue::GUID, TinyPtrVector<Metadata *>> MetadataByGUID;
+ for (auto &P : TypeIdInfo) {
+ if (auto *TypeId = dyn_cast<MDString>(P.first))
+ MetadataByGUID[GlobalValue::getGUID(TypeId->getString())].push_back(
+ TypeId);
+ }
- // Add the referenced globals to the type identifier's equivalence class.
- for (GlobalTypeMember *GTM : TypeIdInfo[BitSet].RefGlobals)
- CurSet = GlobalClasses.unionSets(
- CurSet, GlobalClasses.findLeader(GlobalClasses.insert(GTM)));
+ for (auto &P : *ExportSummary) {
+ for (auto &S : P.second.SummaryList) {
+ auto *FS = dyn_cast<FunctionSummary>(S.get());
+ if (!FS || !ExportSummary->isGlobalValueLive(FS))
+ continue;
+ for (GlobalValue::GUID G : FS->type_tests())
+ for (Metadata *MD : MetadataByGUID[G])
+ AddTypeIdUse(MD).IsExported = true;
+ }
+ }
}
if (GlobalClasses.empty())
@@ -1349,8 +1664,9 @@ bool LowerTypeTestsModule::lower() {
PreservedAnalyses LowerTypeTestsPass::run(Module &M,
ModuleAnalysisManager &AM) {
- bool Changed =
- LowerTypeTestsModule(M, SummaryAction::None, /*Summary=*/nullptr).lower();
+ bool Changed = LowerTypeTestsModule(M, /*ExportSummary=*/nullptr,
+ /*ImportSummary=*/nullptr)
+ .lower();
if (!Changed)
return PreservedAnalyses::all();
return PreservedAnalyses::none();
diff --git a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
index e0bb0eb..0e478ba 100644
--- a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
@@ -96,8 +96,10 @@
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/ValueHandle.h"
@@ -127,6 +129,26 @@ static cl::opt<unsigned> NumFunctionsForSanityCheck(
"'0' disables this check. Works only with '-debug' key."),
cl::init(0), cl::Hidden);
+// Under option -mergefunc-preserve-debug-info we:
+// - Do not create a new function for a thunk.
+// - Retain the debug info for a thunk's parameters (and associated
+// instructions for the debug info) from the entry block.
+// Note: -debug will display the algorithm at work.
+// - Create debug-info for the call (to the shared implementation) made by
+// a thunk and its return value.
+// - Erase the rest of the function, retaining the (minimally sized) entry
+// block to create a thunk.
+// - Preserve a thunk's call site to point to the thunk even when both occur
+// within the same translation unit, to aid debugability. Note that this
+// behaviour differs from the underlying -mergefunc implementation which
+// modifies the thunk's call site to point to the shared implementation
+// when both occur within the same translation unit.
+static cl::opt<bool>
+ MergeFunctionsPDI("mergefunc-preserve-debug-info", cl::Hidden,
+ cl::init(false),
+ cl::desc("Preserve debug info in thunk when mergefunc "
+ "transformations are made."));
+
namespace {
class FunctionNode {
@@ -185,11 +207,13 @@ private:
/// A work queue of functions that may have been modified and should be
/// analyzed again.
- std::vector<WeakVH> Deferred;
+ std::vector<WeakTrackingVH> Deferred;
/// Checks the rules of order relation introduced among functions set.
/// Returns true, if sanity check has been passed, and false if failed.
- bool doSanityCheck(std::vector<WeakVH> &Worklist);
+#ifndef NDEBUG
+ bool doSanityCheck(std::vector<WeakTrackingVH> &Worklist);
+#endif
/// Insert a ComparableFunction into the FnTree, or merge it away if it's
/// equal to one that's already present.
@@ -215,8 +239,21 @@ private:
/// Replace G with a thunk or an alias to F. Deletes G.
void writeThunkOrAlias(Function *F, Function *G);
- /// Replace G with a simple tail call to bitcast(F). Also replace direct uses
- /// of G with bitcast(F). Deletes G.
+ /// Fill PDIUnrelatedWL with instructions from the entry block that are
+ /// unrelated to parameter related debug info.
+ void filterInstsUnrelatedToPDI(BasicBlock *GEntryBlock,
+ std::vector<Instruction *> &PDIUnrelatedWL);
+
+ /// Erase the rest of the CFG (i.e. barring the entry block).
+ void eraseTail(Function *G);
+
+ /// Erase the instructions in PDIUnrelatedWL as they are unrelated to the
+ /// parameter debug info, from the entry block.
+ void eraseInstsUnrelatedToPDI(std::vector<Instruction *> &PDIUnrelatedWL);
+
+ /// Replace G with a simple tail call to bitcast(F). Also (unless
+ /// MergeFunctionsPDI holds) replace direct uses of G with bitcast(F),
+ /// delete G.
void writeThunk(Function *F, Function *G);
/// Replace G with an alias to F. Deletes G.
@@ -248,7 +285,8 @@ ModulePass *llvm::createMergeFunctionsPass() {
return new MergeFunctions();
}
-bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) {
+#ifndef NDEBUG
+bool MergeFunctions::doSanityCheck(std::vector<WeakTrackingVH> &Worklist) {
if (const unsigned Max = NumFunctionsForSanityCheck) {
unsigned TripleNumber = 0;
bool Valid = true;
@@ -256,10 +294,12 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) {
dbgs() << "MERGEFUNC-SANITY: Started for first " << Max << " functions.\n";
unsigned i = 0;
- for (std::vector<WeakVH>::iterator I = Worklist.begin(), E = Worklist.end();
+ for (std::vector<WeakTrackingVH>::iterator I = Worklist.begin(),
+ E = Worklist.end();
I != E && i < Max; ++I, ++i) {
unsigned j = i;
- for (std::vector<WeakVH>::iterator J = I; J != E && j < Max; ++J, ++j) {
+ for (std::vector<WeakTrackingVH>::iterator J = I; J != E && j < Max;
+ ++J, ++j) {
Function *F1 = cast<Function>(*I);
Function *F2 = cast<Function>(*J);
int Res1 = FunctionComparator(F1, F2, &GlobalNumbers).compare();
@@ -269,8 +309,7 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) {
if (Res1 != -Res2) {
dbgs() << "MERGEFUNC-SANITY: Non-symmetric; triple: " << TripleNumber
<< "\n";
- F1->dump();
- F2->dump();
+ dbgs() << *F1 << '\n' << *F2 << '\n';
Valid = false;
}
@@ -278,7 +317,7 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) {
continue;
unsigned k = j;
- for (std::vector<WeakVH>::iterator K = J; K != E && k < Max;
+ for (std::vector<WeakTrackingVH>::iterator K = J; K != E && k < Max;
++k, ++K, ++TripleNumber) {
if (K == J)
continue;
@@ -305,9 +344,7 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) {
<< TripleNumber << "\n";
dbgs() << "Res1, Res3, Res4: " << Res1 << ", " << Res3 << ", "
<< Res4 << "\n";
- F1->dump();
- F2->dump();
- F3->dump();
+ dbgs() << *F1 << '\n' << *F2 << '\n' << *F3 << '\n';
Valid = false;
}
}
@@ -319,6 +356,7 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) {
}
return true;
}
+#endif
bool MergeFunctions::runOnModule(Module &M) {
if (skipModule(M))
@@ -349,12 +387,12 @@ bool MergeFunctions::runOnModule(Module &M) {
// consider merging it. Otherwise it is dropped and never considered again.
if ((I != S && std::prev(I)->first == I->first) ||
(std::next(I) != IE && std::next(I)->first == I->first) ) {
- Deferred.push_back(WeakVH(I->second));
+ Deferred.push_back(WeakTrackingVH(I->second));
}
}
do {
- std::vector<WeakVH> Worklist;
+ std::vector<WeakTrackingVH> Worklist;
Deferred.swap(Worklist);
DEBUG(doSanityCheck(Worklist));
@@ -363,7 +401,7 @@ bool MergeFunctions::runOnModule(Module &M) {
DEBUG(dbgs() << "size of worklist: " << Worklist.size() << '\n');
// Insert functions and merge them.
- for (WeakVH &I : Worklist) {
+ for (WeakTrackingVH &I : Worklist) {
if (!I)
continue;
Function *F = cast<Function>(I);
@@ -400,19 +438,15 @@ void MergeFunctions::replaceDirectCallers(Function *Old, Function *New) {
// Transferring other attributes may help other optimizations, but that
// should be done uniformly and not in this ad-hoc way.
auto &Context = New->getContext();
- auto NewFuncAttrs = New->getAttributes();
- auto CallSiteAttrs = CS.getAttributes();
-
- CallSiteAttrs = CallSiteAttrs.addAttributes(
- Context, AttributeSet::ReturnIndex, NewFuncAttrs.getRetAttributes());
-
- for (unsigned argIdx = 0; argIdx < CS.arg_size(); argIdx++) {
- AttributeSet Attrs = NewFuncAttrs.getParamAttributes(argIdx);
- if (Attrs.getNumSlots())
- CallSiteAttrs = CallSiteAttrs.addAttributes(Context, argIdx, Attrs);
- }
-
- CS.setAttributes(CallSiteAttrs);
+ auto NewPAL = New->getAttributes();
+ SmallVector<AttributeSet, 4> NewArgAttrs;
+ for (unsigned argIdx = 0; argIdx < CS.arg_size(); argIdx++)
+ NewArgAttrs.push_back(NewPAL.getParamAttributes(argIdx));
+ // Don't transfer attributes from the function to the callee. Function
+ // attributes typically aren't relevant to the calling convention or ABI.
+ CS.setAttributes(AttributeList::get(Context, /*FnAttrs=*/AttributeSet(),
+ NewPAL.getRetAttributes(),
+ NewArgAttrs));
remove(CS.getInstruction()->getParent()->getParent());
U->set(BitcastNew);
@@ -461,51 +495,242 @@ static Value *createCast(IRBuilder<> &Builder, Value *V, Type *DestTy) {
return Builder.CreateBitCast(V, DestTy);
}
-// Replace G with a simple tail call to bitcast(F). Also replace direct uses
-// of G with bitcast(F). Deletes G.
+// Erase the instructions in PDIUnrelatedWL as they are unrelated to the
+// parameter debug info, from the entry block.
+void MergeFunctions::eraseInstsUnrelatedToPDI(
+ std::vector<Instruction *> &PDIUnrelatedWL) {
+
+ DEBUG(dbgs() << " Erasing instructions (in reverse order of appearance in "
+ "entry block) unrelated to parameter debug info from entry "
+ "block: {\n");
+ while (!PDIUnrelatedWL.empty()) {
+ Instruction *I = PDIUnrelatedWL.back();
+ DEBUG(dbgs() << " Deleting Instruction: ");
+ DEBUG(I->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ I->eraseFromParent();
+ PDIUnrelatedWL.pop_back();
+ }
+ DEBUG(dbgs() << " } // Done erasing instructions unrelated to parameter "
+ "debug info from entry block. \n");
+}
+
+// Reduce G to its entry block.
+void MergeFunctions::eraseTail(Function *G) {
+
+ std::vector<BasicBlock *> WorklistBB;
+ for (Function::iterator BBI = std::next(G->begin()), BBE = G->end();
+ BBI != BBE; ++BBI) {
+ BBI->dropAllReferences();
+ WorklistBB.push_back(&*BBI);
+ }
+ while (!WorklistBB.empty()) {
+ BasicBlock *BB = WorklistBB.back();
+ BB->eraseFromParent();
+ WorklistBB.pop_back();
+ }
+}
+
+// We are interested in the following instructions from the entry block as being
+// related to parameter debug info:
+// - @llvm.dbg.declare
+// - stores from the incoming parameters to locations on the stack-frame
+// - allocas that create these locations on the stack-frame
+// - @llvm.dbg.value
+// - the entry block's terminator
+// The rest are unrelated to debug info for the parameters; fill up
+// PDIUnrelatedWL with such instructions.
+void MergeFunctions::filterInstsUnrelatedToPDI(
+ BasicBlock *GEntryBlock, std::vector<Instruction *> &PDIUnrelatedWL) {
+
+ std::set<Instruction *> PDIRelated;
+ for (BasicBlock::iterator BI = GEntryBlock->begin(), BIE = GEntryBlock->end();
+ BI != BIE; ++BI) {
+ if (auto *DVI = dyn_cast<DbgValueInst>(&*BI)) {
+ DEBUG(dbgs() << " Deciding: ");
+ DEBUG(BI->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ DILocalVariable *DILocVar = DVI->getVariable();
+ if (DILocVar->isParameter()) {
+ DEBUG(dbgs() << " Include (parameter): ");
+ DEBUG(BI->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ PDIRelated.insert(&*BI);
+ } else {
+ DEBUG(dbgs() << " Delete (!parameter): ");
+ DEBUG(BI->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ }
+ } else if (auto *DDI = dyn_cast<DbgDeclareInst>(&*BI)) {
+ DEBUG(dbgs() << " Deciding: ");
+ DEBUG(BI->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ DILocalVariable *DILocVar = DDI->getVariable();
+ if (DILocVar->isParameter()) {
+ DEBUG(dbgs() << " Parameter: ");
+ DEBUG(DILocVar->print(dbgs()));
+ AllocaInst *AI = dyn_cast_or_null<AllocaInst>(DDI->getAddress());
+ if (AI) {
+ DEBUG(dbgs() << " Processing alloca users: ");
+ DEBUG(dbgs() << "\n");
+ for (User *U : AI->users()) {
+ if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ if (Value *Arg = SI->getValueOperand()) {
+ if (dyn_cast<Argument>(Arg)) {
+ DEBUG(dbgs() << " Include: ");
+ DEBUG(AI->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ PDIRelated.insert(AI);
+ DEBUG(dbgs() << " Include (parameter): ");
+ DEBUG(SI->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ PDIRelated.insert(SI);
+ DEBUG(dbgs() << " Include: ");
+ DEBUG(BI->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ PDIRelated.insert(&*BI);
+ } else {
+ DEBUG(dbgs() << " Delete (!parameter): ");
+ DEBUG(SI->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ }
+ }
+ } else {
+ DEBUG(dbgs() << " Defer: ");
+ DEBUG(U->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ }
+ }
+ } else {
+ DEBUG(dbgs() << " Delete (alloca NULL): ");
+ DEBUG(BI->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ }
+ } else {
+ DEBUG(dbgs() << " Delete (!parameter): ");
+ DEBUG(BI->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ }
+ } else if (dyn_cast<TerminatorInst>(BI) == GEntryBlock->getTerminator()) {
+ DEBUG(dbgs() << " Will Include Terminator: ");
+ DEBUG(BI->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ PDIRelated.insert(&*BI);
+ } else {
+ DEBUG(dbgs() << " Defer: ");
+ DEBUG(BI->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ }
+ }
+ DEBUG(dbgs()
+ << " Report parameter debug info related/related instructions: {\n");
+ for (BasicBlock::iterator BI = GEntryBlock->begin(), BE = GEntryBlock->end();
+ BI != BE; ++BI) {
+
+ Instruction *I = &*BI;
+ if (PDIRelated.find(I) == PDIRelated.end()) {
+ DEBUG(dbgs() << " !PDIRelated: ");
+ DEBUG(I->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ PDIUnrelatedWL.push_back(I);
+ } else {
+ DEBUG(dbgs() << " PDIRelated: ");
+ DEBUG(I->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ }
+ }
+ DEBUG(dbgs() << " }\n");
+}
+
+// Replace G with a simple tail call to bitcast(F). Also (unless
+// MergeFunctionsPDI holds) replace direct uses of G with bitcast(F),
+// delete G. Under MergeFunctionsPDI, we use G itself for creating
+// the thunk as we preserve the debug info (and associated instructions)
+// from G's entry block pertaining to G's incoming arguments which are
+// passed on as corresponding arguments in the call that G makes to F.
+// For better debugability, under MergeFunctionsPDI, we do not modify G's
+// call sites to point to F even when within the same translation unit.
void MergeFunctions::writeThunk(Function *F, Function *G) {
- if (!G->isInterposable()) {
- // Redirect direct callers of G to F.
+ if (!G->isInterposable() && !MergeFunctionsPDI) {
+ // Redirect direct callers of G to F. (See note on MergeFunctionsPDI
+ // above).
replaceDirectCallers(G, F);
}
// If G was internal then we may have replaced all uses of G with F. If so,
- // stop here and delete G. There's no need for a thunk.
- if (G->hasLocalLinkage() && G->use_empty()) {
+ // stop here and delete G. There's no need for a thunk. (See note on
+ // MergeFunctionsPDI above).
+ if (G->hasLocalLinkage() && G->use_empty() && !MergeFunctionsPDI) {
G->eraseFromParent();
return;
}
- Function *NewG = Function::Create(G->getFunctionType(), G->getLinkage(), "",
- G->getParent());
- BasicBlock *BB = BasicBlock::Create(F->getContext(), "", NewG);
- IRBuilder<> Builder(BB);
+ BasicBlock *GEntryBlock = nullptr;
+ std::vector<Instruction *> PDIUnrelatedWL;
+ BasicBlock *BB = nullptr;
+ Function *NewG = nullptr;
+ if (MergeFunctionsPDI) {
+ DEBUG(dbgs() << "writeThunk: (MergeFunctionsPDI) Do not create a new "
+ "function as thunk; retain original: "
+ << G->getName() << "()\n");
+ GEntryBlock = &G->getEntryBlock();
+ DEBUG(dbgs() << "writeThunk: (MergeFunctionsPDI) filter parameter related "
+ "debug info for "
+ << G->getName() << "() {\n");
+ filterInstsUnrelatedToPDI(GEntryBlock, PDIUnrelatedWL);
+ GEntryBlock->getTerminator()->eraseFromParent();
+ BB = GEntryBlock;
+ } else {
+ NewG = Function::Create(G->getFunctionType(), G->getLinkage(), "",
+ G->getParent());
+ BB = BasicBlock::Create(F->getContext(), "", NewG);
+ }
+ IRBuilder<> Builder(BB);
+ Function *H = MergeFunctionsPDI ? G : NewG;
SmallVector<Value *, 16> Args;
unsigned i = 0;
FunctionType *FFTy = F->getFunctionType();
- for (Argument & AI : NewG->args()) {
+ for (Argument & AI : H->args()) {
Args.push_back(createCast(Builder, &AI, FFTy->getParamType(i)));
++i;
}
CallInst *CI = Builder.CreateCall(F, Args);
+ ReturnInst *RI = nullptr;
CI->setTailCall();
CI->setCallingConv(F->getCallingConv());
CI->setAttributes(F->getAttributes());
- if (NewG->getReturnType()->isVoidTy()) {
- Builder.CreateRetVoid();
+ if (H->getReturnType()->isVoidTy()) {
+ RI = Builder.CreateRetVoid();
} else {
- Builder.CreateRet(createCast(Builder, CI, NewG->getReturnType()));
+ RI = Builder.CreateRet(createCast(Builder, CI, H->getReturnType()));
}
- NewG->copyAttributesFrom(G);
- NewG->takeName(G);
- removeUsers(G);
- G->replaceAllUsesWith(NewG);
- G->eraseFromParent();
+ if (MergeFunctionsPDI) {
+ DISubprogram *DIS = G->getSubprogram();
+ if (DIS) {
+ DebugLoc CIDbgLoc = DebugLoc::get(DIS->getScopeLine(), 0, DIS);
+ DebugLoc RIDbgLoc = DebugLoc::get(DIS->getScopeLine(), 0, DIS);
+ CI->setDebugLoc(CIDbgLoc);
+ RI->setDebugLoc(RIDbgLoc);
+ } else {
+ DEBUG(dbgs() << "writeThunk: (MergeFunctionsPDI) No DISubprogram for "
+ << G->getName() << "()\n");
+ }
+ eraseTail(G);
+ eraseInstsUnrelatedToPDI(PDIUnrelatedWL);
+ DEBUG(dbgs() << "} // End of parameter related debug info filtering for: "
+ << G->getName() << "()\n");
+ } else {
+ NewG->copyAttributesFrom(G);
+ NewG->takeName(G);
+ removeUsers(G);
+ G->replaceAllUsesWith(NewG);
+ G->eraseFromParent();
+ }
- DEBUG(dbgs() << "writeThunk: " << NewG->getName() << '\n');
+ DEBUG(dbgs() << "writeThunk: " << H->getName() << '\n');
++NumThunksWritten;
}
diff --git a/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp
index 7ef3fc1..8840435 100644
--- a/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp
@@ -16,8 +16,15 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/CFG.h"
+#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
@@ -27,19 +34,177 @@
#include "llvm/Transforms/Utils/CodeExtractor.h"
using namespace llvm;
-#define DEBUG_TYPE "partialinlining"
-
-STATISTIC(NumPartialInlined, "Number of functions partially inlined");
+#define DEBUG_TYPE "partial-inlining"
+
+STATISTIC(NumPartialInlined,
+ "Number of callsites functions partially inlined into.");
+
+// Command line option to disable partial-inlining. The default is false:
+static cl::opt<bool>
+ DisablePartialInlining("disable-partial-inlining", cl::init(false),
+ cl::Hidden, cl::desc("Disable partial ininling"));
+// This is an option used by testing:
+static cl::opt<bool> SkipCostAnalysis("skip-partial-inlining-cost-analysis",
+ cl::init(false), cl::ZeroOrMore,
+ cl::ReallyHidden,
+ cl::desc("Skip Cost Analysis"));
+
+static cl::opt<unsigned> MaxNumInlineBlocks(
+ "max-num-inline-blocks", cl::init(5), cl::Hidden,
+ cl::desc("Max Number of Blocks To be Partially Inlined"));
+
+// Command line option to set the maximum number of partial inlining allowed
+// for the module. The default value of -1 means no limit.
+static cl::opt<int> MaxNumPartialInlining(
+ "max-partial-inlining", cl::init(-1), cl::Hidden, cl::ZeroOrMore,
+ cl::desc("Max number of partial inlining. The default is unlimited"));
+
+// Used only when PGO or user annotated branch data is absent. It is
+// the least value that is used to weigh the outline region. If BFI
+// produces larger value, the BFI value will be used.
+static cl::opt<int>
+ OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75),
+ cl::Hidden, cl::ZeroOrMore,
+ cl::desc("Relative frequency of outline region to "
+ "the entry block"));
+
+static cl::opt<unsigned> ExtraOutliningPenalty(
+ "partial-inlining-extra-penalty", cl::init(0), cl::Hidden,
+ cl::desc("A debug option to add additional penalty to the computed one."));
namespace {
+
+struct FunctionOutliningInfo {
+ FunctionOutliningInfo()
+ : Entries(), ReturnBlock(nullptr), NonReturnBlock(nullptr),
+ ReturnBlockPreds() {}
+ // Returns the number of blocks to be inlined including all blocks
+ // in Entries and one return block.
+ unsigned GetNumInlinedBlocks() const { return Entries.size() + 1; }
+
+ // A set of blocks including the function entry that guard
+ // the region to be outlined.
+ SmallVector<BasicBlock *, 4> Entries;
+ // The return block that is not included in the outlined region.
+ BasicBlock *ReturnBlock;
+ // The dominating block of the region to be outlined.
+ BasicBlock *NonReturnBlock;
+ // The set of blocks in Entries that that are predecessors to ReturnBlock
+ SmallVector<BasicBlock *, 4> ReturnBlockPreds;
+};
+
struct PartialInlinerImpl {
- PartialInlinerImpl(InlineFunctionInfo IFI) : IFI(IFI) {}
+ PartialInlinerImpl(
+ std::function<AssumptionCache &(Function &)> *GetAC,
+ std::function<TargetTransformInfo &(Function &)> *GTTI,
+ Optional<function_ref<BlockFrequencyInfo &(Function &)>> GBFI,
+ ProfileSummaryInfo *ProfSI)
+ : GetAssumptionCache(GetAC), GetTTI(GTTI), GetBFI(GBFI), PSI(ProfSI) {}
bool run(Module &M);
Function *unswitchFunction(Function *F);
+ // This class speculatively clones the the function to be partial inlined.
+ // At the end of partial inlining, the remaining callsites to the cloned
+ // function that are not partially inlined will be fixed up to reference
+ // the original function, and the cloned function will be erased.
+ struct FunctionCloner {
+ FunctionCloner(Function *F, FunctionOutliningInfo *OI);
+ ~FunctionCloner();
+
+ // Prepare for function outlining: making sure there is only
+ // one incoming edge from the extracted/outlined region to
+ // the return block.
+ void NormalizeReturnBlock();
+
+ // Do function outlining:
+ Function *doFunctionOutlining();
+
+ Function *OrigFunc = nullptr;
+ Function *ClonedFunc = nullptr;
+ Function *OutlinedFunc = nullptr;
+ BasicBlock *OutliningCallBB = nullptr;
+ // ClonedFunc is inlined in one of its callers after function
+ // outlining.
+ bool IsFunctionInlined = false;
+ // The cost of the region to be outlined.
+ int OutlinedRegionCost = 0;
+ std::unique_ptr<FunctionOutliningInfo> ClonedOI = nullptr;
+ std::unique_ptr<BlockFrequencyInfo> ClonedFuncBFI = nullptr;
+ };
+
private:
- InlineFunctionInfo IFI;
+ int NumPartialInlining = 0;
+ std::function<AssumptionCache &(Function &)> *GetAssumptionCache;
+ std::function<TargetTransformInfo &(Function &)> *GetTTI;
+ Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI;
+ ProfileSummaryInfo *PSI;
+
+ // Return the frequency of the OutlininingBB relative to F's entry point.
+ // The result is no larger than 1 and is represented using BP.
+ // (Note that the outlined region's 'head' block can only have incoming
+ // edges from the guarding entry blocks).
+ BranchProbability getOutliningCallBBRelativeFreq(FunctionCloner &Cloner);
+
+ // Return true if the callee of CS should be partially inlined with
+ // profit.
+ bool shouldPartialInline(CallSite CS, FunctionCloner &Cloner,
+ BlockFrequency WeightedOutliningRcost,
+ OptimizationRemarkEmitter &ORE);
+
+ // Try to inline DuplicateFunction (cloned from F with call to
+ // the OutlinedFunction into its callers. Return true
+ // if there is any successful inlining.
+ bool tryPartialInline(FunctionCloner &Cloner);
+
+ // Compute the mapping from use site of DuplicationFunction to the enclosing
+ // BB's profile count.
+ void computeCallsiteToProfCountMap(Function *DuplicateFunction,
+ DenseMap<User *, uint64_t> &SiteCountMap);
+
+ bool IsLimitReached() {
+ return (MaxNumPartialInlining != -1 &&
+ NumPartialInlining >= MaxNumPartialInlining);
+ }
+
+ static CallSite getCallSite(User *U) {
+ CallSite CS;
+ if (CallInst *CI = dyn_cast<CallInst>(U))
+ CS = CallSite(CI);
+ else if (InvokeInst *II = dyn_cast<InvokeInst>(U))
+ CS = CallSite(II);
+ else
+ llvm_unreachable("All uses must be calls");
+ return CS;
+ }
+
+ static CallSite getOneCallSiteTo(Function *F) {
+ User *User = *F->user_begin();
+ return getCallSite(User);
+ }
+
+ std::tuple<DebugLoc, BasicBlock *> getOneDebugLoc(Function *F) {
+ CallSite CS = getOneCallSiteTo(F);
+ DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
+ BasicBlock *Block = CS.getParent();
+ return std::make_tuple(DLoc, Block);
+ }
+
+ // Returns the costs associated with function outlining:
+ // - The first value is the non-weighted runtime cost for making the call
+ // to the outlined function, including the addtional setup cost in the
+ // outlined function itself;
+ // - The second value is the estimated size of the new call sequence in
+ // basic block Cloner.OutliningCallBB;
+ std::tuple<int, int> computeOutliningCosts(FunctionCloner &Cloner);
+ // Compute the 'InlineCost' of block BB. InlineCost is a proxy used to
+ // approximate both the size and runtime cost (Note that in the current
+ // inline cost analysis, there is no clear distinction there either).
+ static int computeBBInlineCost(BasicBlock *BB);
+
+ std::unique_ptr<FunctionOutliningInfo> computeOutliningInfo(Function *F);
+
};
+
struct PartialInlinerLegacyPass : public ModulePass {
static char ID; // Pass identification, replacement for typeid
PartialInlinerLegacyPass() : ModulePass(ID) {
@@ -48,124 +213,713 @@ struct PartialInlinerLegacyPass : public ModulePass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
}
bool runOnModule(Module &M) override {
if (skipModule(M))
return false;
AssumptionCacheTracker *ACT = &getAnalysis<AssumptionCacheTracker>();
+ TargetTransformInfoWrapperPass *TTIWP =
+ &getAnalysis<TargetTransformInfoWrapperPass>();
+ ProfileSummaryInfo *PSI =
+ getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+
std::function<AssumptionCache &(Function &)> GetAssumptionCache =
[&ACT](Function &F) -> AssumptionCache & {
return ACT->getAssumptionCache(F);
};
- InlineFunctionInfo IFI(nullptr, &GetAssumptionCache);
- return PartialInlinerImpl(IFI).run(M);
+
+ std::function<TargetTransformInfo &(Function &)> GetTTI =
+ [&TTIWP](Function &F) -> TargetTransformInfo & {
+ return TTIWP->getTTI(F);
+ };
+
+ return PartialInlinerImpl(&GetAssumptionCache, &GetTTI, None, PSI).run(M);
}
};
}
-Function *PartialInlinerImpl::unswitchFunction(Function *F) {
- // First, verify that this function is an unswitching candidate...
+std::unique_ptr<FunctionOutliningInfo>
+PartialInlinerImpl::computeOutliningInfo(Function *F) {
BasicBlock *EntryBlock = &F->front();
BranchInst *BR = dyn_cast<BranchInst>(EntryBlock->getTerminator());
if (!BR || BR->isUnconditional())
- return nullptr;
+ return std::unique_ptr<FunctionOutliningInfo>();
+
+ // Returns true if Succ is BB's successor
+ auto IsSuccessor = [](BasicBlock *Succ, BasicBlock *BB) {
+ return is_contained(successors(BB), Succ);
+ };
+
+ auto SuccSize = [](BasicBlock *BB) {
+ return std::distance(succ_begin(BB), succ_end(BB));
+ };
+
+ auto IsReturnBlock = [](BasicBlock *BB) {
+ TerminatorInst *TI = BB->getTerminator();
+ return isa<ReturnInst>(TI);
+ };
+
+ auto GetReturnBlock = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
+ if (IsReturnBlock(Succ1))
+ return std::make_tuple(Succ1, Succ2);
+ if (IsReturnBlock(Succ2))
+ return std::make_tuple(Succ2, Succ1);
+
+ return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
+ };
+
+ // Detect a triangular shape:
+ auto GetCommonSucc = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
+ if (IsSuccessor(Succ1, Succ2))
+ return std::make_tuple(Succ1, Succ2);
+ if (IsSuccessor(Succ2, Succ1))
+ return std::make_tuple(Succ2, Succ1);
+
+ return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
+ };
+
+ std::unique_ptr<FunctionOutliningInfo> OutliningInfo =
+ llvm::make_unique<FunctionOutliningInfo>();
+
+ BasicBlock *CurrEntry = EntryBlock;
+ bool CandidateFound = false;
+ do {
+ // The number of blocks to be inlined has already reached
+ // the limit. When MaxNumInlineBlocks is set to 0 or 1, this
+ // disables partial inlining for the function.
+ if (OutliningInfo->GetNumInlinedBlocks() >= MaxNumInlineBlocks)
+ break;
+
+ if (SuccSize(CurrEntry) != 2)
+ break;
+
+ BasicBlock *Succ1 = *succ_begin(CurrEntry);
+ BasicBlock *Succ2 = *(succ_begin(CurrEntry) + 1);
+
+ BasicBlock *ReturnBlock, *NonReturnBlock;
+ std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
+
+ if (ReturnBlock) {
+ OutliningInfo->Entries.push_back(CurrEntry);
+ OutliningInfo->ReturnBlock = ReturnBlock;
+ OutliningInfo->NonReturnBlock = NonReturnBlock;
+ CandidateFound = true;
+ break;
+ }
+
+ BasicBlock *CommSucc;
+ BasicBlock *OtherSucc;
+ std::tie(CommSucc, OtherSucc) = GetCommonSucc(Succ1, Succ2);
+
+ if (!CommSucc)
+ break;
- BasicBlock *ReturnBlock = nullptr;
- BasicBlock *NonReturnBlock = nullptr;
- unsigned ReturnCount = 0;
- for (BasicBlock *BB : successors(EntryBlock)) {
- if (isa<ReturnInst>(BB->getTerminator())) {
- ReturnBlock = BB;
- ReturnCount++;
- } else
- NonReturnBlock = BB;
+ OutliningInfo->Entries.push_back(CurrEntry);
+ CurrEntry = OtherSucc;
+
+ } while (true);
+
+ if (!CandidateFound)
+ return std::unique_ptr<FunctionOutliningInfo>();
+
+ // Do sanity check of the entries: threre should not
+ // be any successors (not in the entry set) other than
+ // {ReturnBlock, NonReturnBlock}
+ assert(OutliningInfo->Entries[0] == &F->front() &&
+ "Function Entry must be the first in Entries vector");
+ DenseSet<BasicBlock *> Entries;
+ for (BasicBlock *E : OutliningInfo->Entries)
+ Entries.insert(E);
+
+ // Returns true of BB has Predecessor which is not
+ // in Entries set.
+ auto HasNonEntryPred = [Entries](BasicBlock *BB) {
+ for (auto Pred : predecessors(BB)) {
+ if (!Entries.count(Pred))
+ return true;
+ }
+ return false;
+ };
+ auto CheckAndNormalizeCandidate =
+ [Entries, HasNonEntryPred](FunctionOutliningInfo *OutliningInfo) {
+ for (BasicBlock *E : OutliningInfo->Entries) {
+ for (auto Succ : successors(E)) {
+ if (Entries.count(Succ))
+ continue;
+ if (Succ == OutliningInfo->ReturnBlock)
+ OutliningInfo->ReturnBlockPreds.push_back(E);
+ else if (Succ != OutliningInfo->NonReturnBlock)
+ return false;
+ }
+ // There should not be any outside incoming edges either:
+ if (HasNonEntryPred(E))
+ return false;
+ }
+ return true;
+ };
+
+ if (!CheckAndNormalizeCandidate(OutliningInfo.get()))
+ return std::unique_ptr<FunctionOutliningInfo>();
+
+ // Now further growing the candidate's inlining region by
+ // peeling off dominating blocks from the outlining region:
+ while (OutliningInfo->GetNumInlinedBlocks() < MaxNumInlineBlocks) {
+ BasicBlock *Cand = OutliningInfo->NonReturnBlock;
+ if (SuccSize(Cand) != 2)
+ break;
+
+ if (HasNonEntryPred(Cand))
+ break;
+
+ BasicBlock *Succ1 = *succ_begin(Cand);
+ BasicBlock *Succ2 = *(succ_begin(Cand) + 1);
+
+ BasicBlock *ReturnBlock, *NonReturnBlock;
+ std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
+ if (!ReturnBlock || ReturnBlock != OutliningInfo->ReturnBlock)
+ break;
+
+ if (NonReturnBlock->getSinglePredecessor() != Cand)
+ break;
+
+ // Now grow and update OutlininigInfo:
+ OutliningInfo->Entries.push_back(Cand);
+ OutliningInfo->NonReturnBlock = NonReturnBlock;
+ OutliningInfo->ReturnBlockPreds.push_back(Cand);
+ Entries.insert(Cand);
}
- if (ReturnCount != 1)
- return nullptr;
+ return OutliningInfo;
+}
+
+// Check if there is PGO data or user annoated branch data:
+static bool hasProfileData(Function *F, FunctionOutliningInfo *OI) {
+ if (F->getEntryCount())
+ return true;
+ // Now check if any of the entry block has MD_prof data:
+ for (auto *E : OI->Entries) {
+ BranchInst *BR = dyn_cast<BranchInst>(E->getTerminator());
+ if (!BR || BR->isUnconditional())
+ continue;
+ uint64_t T, F;
+ if (BR->extractProfMetadata(T, F))
+ return true;
+ }
+ return false;
+}
+
+BranchProbability
+PartialInlinerImpl::getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) {
+
+ auto EntryFreq =
+ Cloner.ClonedFuncBFI->getBlockFreq(&Cloner.ClonedFunc->getEntryBlock());
+ auto OutliningCallFreq =
+ Cloner.ClonedFuncBFI->getBlockFreq(Cloner.OutliningCallBB);
+
+ auto OutlineRegionRelFreq =
+ BranchProbability::getBranchProbability(OutliningCallFreq.getFrequency(),
+ EntryFreq.getFrequency());
+
+ if (hasProfileData(Cloner.OrigFunc, Cloner.ClonedOI.get()))
+ return OutlineRegionRelFreq;
+
+ // When profile data is not available, we need to be conservative in
+ // estimating the overall savings. Static branch prediction can usually
+ // guess the branch direction right (taken/non-taken), but the guessed
+ // branch probability is usually not biased enough. In case when the
+ // outlined region is predicted to be likely, its probability needs
+ // to be made higher (more biased) to not under-estimate the cost of
+ // function outlining. On the other hand, if the outlined region
+ // is predicted to be less likely, the predicted probablity is usually
+ // higher than the actual. For instance, the actual probability of the
+ // less likely target is only 5%, but the guessed probablity can be
+ // 40%. In the latter case, there is no need for further adjustement.
+ // FIXME: add an option for this.
+ if (OutlineRegionRelFreq < BranchProbability(45, 100))
+ return OutlineRegionRelFreq;
+
+ OutlineRegionRelFreq = std::max(
+ OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100));
+
+ return OutlineRegionRelFreq;
+}
+
+bool PartialInlinerImpl::shouldPartialInline(
+ CallSite CS, FunctionCloner &Cloner, BlockFrequency WeightedOutliningRcost,
+ OptimizationRemarkEmitter &ORE) {
+
+ using namespace ore;
+ if (SkipCostAnalysis)
+ return true;
+
+ Instruction *Call = CS.getInstruction();
+ Function *Callee = CS.getCalledFunction();
+ assert(Callee == Cloner.ClonedFunc);
+
+ Function *Caller = CS.getCaller();
+ auto &CalleeTTI = (*GetTTI)(*Callee);
+ InlineCost IC = getInlineCost(CS, getInlineParams(), CalleeTTI,
+ *GetAssumptionCache, GetBFI, PSI);
+
+ if (IC.isAlways()) {
+ ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call)
+ << NV("Callee", Cloner.OrigFunc)
+ << " should always be fully inlined, not partially");
+ return false;
+ }
+
+ if (IC.isNever()) {
+ ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
+ << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
+ << NV("Caller", Caller)
+ << " because it should never be inlined (cost=never)");
+ return false;
+ }
+
+ if (!IC) {
+ ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", Call)
+ << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
+ << NV("Caller", Caller) << " because too costly to inline (cost="
+ << NV("Cost", IC.getCost()) << ", threshold="
+ << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
+ return false;
+ }
+ const DataLayout &DL = Caller->getParent()->getDataLayout();
+
+ // The savings of eliminating the call:
+ int NonWeightedSavings = getCallsiteCost(CS, DL);
+ BlockFrequency NormWeightedSavings(NonWeightedSavings);
+
+ // Weighted saving is smaller than weighted cost, return false
+ if (NormWeightedSavings < WeightedOutliningRcost) {
+ ORE.emit(
+ OptimizationRemarkAnalysis(DEBUG_TYPE, "OutliningCallcostTooHigh", Call)
+ << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
+ << NV("Caller", Caller) << " runtime overhead (overhead="
+ << NV("Overhead", (unsigned)WeightedOutliningRcost.getFrequency())
+ << ", savings="
+ << NV("Savings", (unsigned)NormWeightedSavings.getFrequency()) << ")"
+ << " of making the outlined call is too high");
+
+ return false;
+ }
+
+ ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", Call)
+ << NV("Callee", Cloner.OrigFunc) << " can be partially inlined into "
+ << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost())
+ << " (threshold="
+ << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
+ return true;
+}
+
+// TODO: Ideally we should share Inliner's InlineCost Analysis code.
+// For now use a simplified version. The returned 'InlineCost' will be used
+// to esimate the size cost as well as runtime cost of the BB.
+int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB) {
+ int InlineCost = 0;
+ const DataLayout &DL = BB->getParent()->getParent()->getDataLayout();
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ if (isa<DbgInfoIntrinsic>(I))
+ continue;
+
+ switch (I->getOpcode()) {
+ case Instruction::BitCast:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::Alloca:
+ continue;
+ case Instruction::GetElementPtr:
+ if (cast<GetElementPtrInst>(I)->hasAllZeroIndices())
+ continue;
+ default:
+ break;
+ }
+
+ IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(I);
+ if (IntrInst) {
+ if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start ||
+ IntrInst->getIntrinsicID() == Intrinsic::lifetime_end)
+ continue;
+ }
+
+ if (CallInst *CI = dyn_cast<CallInst>(I)) {
+ InlineCost += getCallsiteCost(CallSite(CI), DL);
+ continue;
+ }
+
+ if (InvokeInst *II = dyn_cast<InvokeInst>(I)) {
+ InlineCost += getCallsiteCost(CallSite(II), DL);
+ continue;
+ }
+
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) {
+ InlineCost += (SI->getNumCases() + 1) * InlineConstants::InstrCost;
+ continue;
+ }
+ InlineCost += InlineConstants::InstrCost;
+ }
+ return InlineCost;
+}
+
+std::tuple<int, int>
+PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner) {
+
+ // Now compute the cost of the call sequence to the outlined function
+ // 'OutlinedFunction' in BB 'OutliningCallBB':
+ int OutliningFuncCallCost = computeBBInlineCost(Cloner.OutliningCallBB);
+
+ // Now compute the cost of the extracted/outlined function itself:
+ int OutlinedFunctionCost = 0;
+ for (BasicBlock &BB : *Cloner.OutlinedFunc) {
+ OutlinedFunctionCost += computeBBInlineCost(&BB);
+ }
+
+ assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost &&
+ "Outlined function cost should be no less than the outlined region");
+ // The code extractor introduces a new root and exit stub blocks with
+ // additional unconditional branches. Those branches will be eliminated
+ // later with bb layout. The cost should be adjusted accordingly:
+ OutlinedFunctionCost -= 2 * InlineConstants::InstrCost;
+
+ int OutliningRuntimeOverhead =
+ OutliningFuncCallCost +
+ (OutlinedFunctionCost - Cloner.OutlinedRegionCost) +
+ ExtraOutliningPenalty;
+
+ return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead);
+}
+
+// Create the callsite to profile count map which is
+// used to update the original function's entry count,
+// after the function is partially inlined into the callsite.
+void PartialInlinerImpl::computeCallsiteToProfCountMap(
+ Function *DuplicateFunction,
+ DenseMap<User *, uint64_t> &CallSiteToProfCountMap) {
+ std::vector<User *> Users(DuplicateFunction->user_begin(),
+ DuplicateFunction->user_end());
+ Function *CurrentCaller = nullptr;
+ std::unique_ptr<BlockFrequencyInfo> TempBFI;
+ BlockFrequencyInfo *CurrentCallerBFI = nullptr;
+
+ auto ComputeCurrBFI = [&,this](Function *Caller) {
+ // For the old pass manager:
+ if (!GetBFI) {
+ DominatorTree DT(*Caller);
+ LoopInfo LI(DT);
+ BranchProbabilityInfo BPI(*Caller, LI);
+ TempBFI.reset(new BlockFrequencyInfo(*Caller, BPI, LI));
+ CurrentCallerBFI = TempBFI.get();
+ } else {
+ // New pass manager:
+ CurrentCallerBFI = &(*GetBFI)(*Caller);
+ }
+ };
+
+ for (User *User : Users) {
+ CallSite CS = getCallSite(User);
+ Function *Caller = CS.getCaller();
+ if (CurrentCaller != Caller) {
+ CurrentCaller = Caller;
+ ComputeCurrBFI(Caller);
+ } else {
+ assert(CurrentCallerBFI && "CallerBFI is not set");
+ }
+ BasicBlock *CallBB = CS.getInstruction()->getParent();
+ auto Count = CurrentCallerBFI->getBlockProfileCount(CallBB);
+ if (Count)
+ CallSiteToProfCountMap[User] = *Count;
+ else
+ CallSiteToProfCountMap[User] = 0;
+ }
+}
+
+PartialInlinerImpl::FunctionCloner::FunctionCloner(Function *F,
+ FunctionOutliningInfo *OI)
+ : OrigFunc(F) {
+ ClonedOI = llvm::make_unique<FunctionOutliningInfo>();
// Clone the function, so that we can hack away on it.
ValueToValueMapTy VMap;
- Function *DuplicateFunction = CloneFunction(F, VMap);
- DuplicateFunction->setLinkage(GlobalValue::InternalLinkage);
- BasicBlock *NewEntryBlock = cast<BasicBlock>(VMap[EntryBlock]);
- BasicBlock *NewReturnBlock = cast<BasicBlock>(VMap[ReturnBlock]);
- BasicBlock *NewNonReturnBlock = cast<BasicBlock>(VMap[NonReturnBlock]);
+ ClonedFunc = CloneFunction(F, VMap);
+ ClonedOI->ReturnBlock = cast<BasicBlock>(VMap[OI->ReturnBlock]);
+ ClonedOI->NonReturnBlock = cast<BasicBlock>(VMap[OI->NonReturnBlock]);
+ for (BasicBlock *BB : OI->Entries) {
+ ClonedOI->Entries.push_back(cast<BasicBlock>(VMap[BB]));
+ }
+ for (BasicBlock *E : OI->ReturnBlockPreds) {
+ BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
+ ClonedOI->ReturnBlockPreds.push_back(NewE);
+ }
// Go ahead and update all uses to the duplicate, so that we can just
// use the inliner functionality when we're done hacking.
- F->replaceAllUsesWith(DuplicateFunction);
+ F->replaceAllUsesWith(ClonedFunc);
+}
+
+void PartialInlinerImpl::FunctionCloner::NormalizeReturnBlock() {
+
+ auto getFirstPHI = [](BasicBlock *BB) {
+ BasicBlock::iterator I = BB->begin();
+ PHINode *FirstPhi = nullptr;
+ while (I != BB->end()) {
+ PHINode *Phi = dyn_cast<PHINode>(I);
+ if (!Phi)
+ break;
+ if (!FirstPhi) {
+ FirstPhi = Phi;
+ break;
+ }
+ }
+ return FirstPhi;
+ };
// Special hackery is needed with PHI nodes that have inputs from more than
// one extracted block. For simplicity, just split the PHIs into a two-level
// sequence of PHIs, some of which will go in the extracted region, and some
// of which will go outside.
- BasicBlock *PreReturn = NewReturnBlock;
- NewReturnBlock = NewReturnBlock->splitBasicBlock(
- NewReturnBlock->getFirstNonPHI()->getIterator());
+ BasicBlock *PreReturn = ClonedOI->ReturnBlock;
+ // only split block when necessary:
+ PHINode *FirstPhi = getFirstPHI(PreReturn);
+ unsigned NumPredsFromEntries = ClonedOI->ReturnBlockPreds.size();
+
+ if (!FirstPhi || FirstPhi->getNumIncomingValues() <= NumPredsFromEntries + 1)
+ return;
+
+ auto IsTrivialPhi = [](PHINode *PN) -> Value * {
+ Value *CommonValue = PN->getIncomingValue(0);
+ if (all_of(PN->incoming_values(),
+ [&](Value *V) { return V == CommonValue; }))
+ return CommonValue;
+ return nullptr;
+ };
+
+ ClonedOI->ReturnBlock = ClonedOI->ReturnBlock->splitBasicBlock(
+ ClonedOI->ReturnBlock->getFirstNonPHI()->getIterator());
BasicBlock::iterator I = PreReturn->begin();
- Instruction *Ins = &NewReturnBlock->front();
+ Instruction *Ins = &ClonedOI->ReturnBlock->front();
+ SmallVector<Instruction *, 4> DeadPhis;
while (I != PreReturn->end()) {
PHINode *OldPhi = dyn_cast<PHINode>(I);
if (!OldPhi)
break;
- PHINode *RetPhi = PHINode::Create(OldPhi->getType(), 2, "", Ins);
+ PHINode *RetPhi =
+ PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "", Ins);
OldPhi->replaceAllUsesWith(RetPhi);
- Ins = NewReturnBlock->getFirstNonPHI();
+ Ins = ClonedOI->ReturnBlock->getFirstNonPHI();
RetPhi->addIncoming(&*I, PreReturn);
- RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(NewEntryBlock),
- NewEntryBlock);
- OldPhi->removeIncomingValue(NewEntryBlock);
+ for (BasicBlock *E : ClonedOI->ReturnBlockPreds) {
+ RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(E), E);
+ OldPhi->removeIncomingValue(E);
+ }
+ // After incoming values splitting, the old phi may become trivial.
+ // Keeping the trivial phi can introduce definition inside the outline
+ // region which is live-out, causing necessary overhead (load, store
+ // arg passing etc).
+ if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) {
+ OldPhi->replaceAllUsesWith(OldPhiVal);
+ DeadPhis.push_back(OldPhi);
+ }
++I;
- }
- NewEntryBlock->getTerminator()->replaceUsesOfWith(PreReturn, NewReturnBlock);
+ }
+ for (auto *DP : DeadPhis)
+ DP->eraseFromParent();
+
+ for (auto E : ClonedOI->ReturnBlockPreds) {
+ E->getTerminator()->replaceUsesOfWith(PreReturn, ClonedOI->ReturnBlock);
+ }
+}
+
+Function *PartialInlinerImpl::FunctionCloner::doFunctionOutlining() {
+ // Returns true if the block is to be partial inlined into the caller
+ // (i.e. not to be extracted to the out of line function)
+ auto ToBeInlined = [&, this](BasicBlock *BB) {
+ return BB == ClonedOI->ReturnBlock ||
+ (std::find(ClonedOI->Entries.begin(), ClonedOI->Entries.end(), BB) !=
+ ClonedOI->Entries.end());
+ };
// Gather up the blocks that we're going to extract.
std::vector<BasicBlock *> ToExtract;
- ToExtract.push_back(NewNonReturnBlock);
- for (BasicBlock &BB : *DuplicateFunction)
- if (&BB != NewEntryBlock && &BB != NewReturnBlock &&
- &BB != NewNonReturnBlock)
+ ToExtract.push_back(ClonedOI->NonReturnBlock);
+ OutlinedRegionCost +=
+ PartialInlinerImpl::computeBBInlineCost(ClonedOI->NonReturnBlock);
+ for (BasicBlock &BB : *ClonedFunc)
+ if (!ToBeInlined(&BB) && &BB != ClonedOI->NonReturnBlock) {
ToExtract.push_back(&BB);
+ // FIXME: the code extractor may hoist/sink more code
+ // into the outlined function which may make the outlining
+ // overhead (the difference of the outlined function cost
+ // and OutliningRegionCost) look larger.
+ OutlinedRegionCost += computeBBInlineCost(&BB);
+ }
// The CodeExtractor needs a dominator tree.
DominatorTree DT;
- DT.recalculate(*DuplicateFunction);
+ DT.recalculate(*ClonedFunc);
// Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
LoopInfo LI(DT);
- BranchProbabilityInfo BPI(*DuplicateFunction, LI);
- BlockFrequencyInfo BFI(*DuplicateFunction, BPI, LI);
+ BranchProbabilityInfo BPI(*ClonedFunc, LI);
+ ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
// Extract the body of the if.
- Function *ExtractedFunction =
- CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false, &BFI, &BPI)
- .extractCodeRegion();
+ OutlinedFunc = CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false,
+ ClonedFuncBFI.get(), &BPI)
+ .extractCodeRegion();
+
+ if (OutlinedFunc) {
+ OutliningCallBB = PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc)
+ .getInstruction()
+ ->getParent();
+ assert(OutliningCallBB->getParent() == ClonedFunc);
+ }
- // Inline the top-level if test into all callers.
- std::vector<User *> Users(DuplicateFunction->user_begin(),
- DuplicateFunction->user_end());
- for (User *User : Users)
- if (CallInst *CI = dyn_cast<CallInst>(User))
- InlineFunction(CI, IFI);
- else if (InvokeInst *II = dyn_cast<InvokeInst>(User))
- InlineFunction(II, IFI);
+ return OutlinedFunc;
+}
+PartialInlinerImpl::FunctionCloner::~FunctionCloner() {
// Ditch the duplicate, since we're done with it, and rewrite all remaining
// users (function pointers, etc.) back to the original function.
- DuplicateFunction->replaceAllUsesWith(F);
- DuplicateFunction->eraseFromParent();
+ ClonedFunc->replaceAllUsesWith(OrigFunc);
+ ClonedFunc->eraseFromParent();
+ if (!IsFunctionInlined) {
+ // Remove the function that is speculatively created if there is no
+ // reference.
+ if (OutlinedFunc)
+ OutlinedFunc->eraseFromParent();
+ }
+}
- ++NumPartialInlined;
+Function *PartialInlinerImpl::unswitchFunction(Function *F) {
+
+ if (F->hasAddressTaken())
+ return nullptr;
+
+ // Let inliner handle it
+ if (F->hasFnAttribute(Attribute::AlwaysInline))
+ return nullptr;
+
+ if (F->hasFnAttribute(Attribute::NoInline))
+ return nullptr;
+
+ if (PSI->isFunctionEntryCold(F))
+ return nullptr;
+
+ if (F->user_begin() == F->user_end())
+ return nullptr;
+
+ std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(F);
- return ExtractedFunction;
+ if (!OI)
+ return nullptr;
+
+ FunctionCloner Cloner(F, OI.get());
+ Cloner.NormalizeReturnBlock();
+ Function *OutlinedFunction = Cloner.doFunctionOutlining();
+
+ bool AnyInline = tryPartialInline(Cloner);
+
+ if (AnyInline)
+ return OutlinedFunction;
+
+ return nullptr;
+}
+
+bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
+ int NonWeightedRcost;
+ int SizeCost;
+
+ if (Cloner.OutlinedFunc == nullptr)
+ return false;
+
+ std::tie(SizeCost, NonWeightedRcost) = computeOutliningCosts(Cloner);
+
+ auto RelativeToEntryFreq = getOutliningCallBBRelativeFreq(Cloner);
+ auto WeightedRcost = BlockFrequency(NonWeightedRcost) * RelativeToEntryFreq;
+
+ // The call sequence to the outlined function is larger than the original
+ // outlined region size, it does not increase the chances of inlining
+ // the function with outlining (The inliner usies the size increase to
+ // model the cost of inlining a callee).
+ if (!SkipCostAnalysis && Cloner.OutlinedRegionCost < SizeCost) {
+ OptimizationRemarkEmitter ORE(Cloner.OrigFunc);
+ DebugLoc DLoc;
+ BasicBlock *Block;
+ std::tie(DLoc, Block) = getOneDebugLoc(Cloner.ClonedFunc);
+ ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "OutlineRegionTooSmall",
+ DLoc, Block)
+ << ore::NV("Function", Cloner.OrigFunc)
+ << " not partially inlined into callers (Original Size = "
+ << ore::NV("OutlinedRegionOriginalSize", Cloner.OutlinedRegionCost)
+ << ", Size of call sequence to outlined function = "
+ << ore::NV("NewSize", SizeCost) << ")");
+ return false;
+ }
+
+ assert(Cloner.OrigFunc->user_begin() == Cloner.OrigFunc->user_end() &&
+ "F's users should all be replaced!");
+
+ std::vector<User *> Users(Cloner.ClonedFunc->user_begin(),
+ Cloner.ClonedFunc->user_end());
+
+ DenseMap<User *, uint64_t> CallSiteToProfCountMap;
+ if (Cloner.OrigFunc->getEntryCount())
+ computeCallsiteToProfCountMap(Cloner.ClonedFunc, CallSiteToProfCountMap);
+
+ auto CalleeEntryCount = Cloner.OrigFunc->getEntryCount();
+ uint64_t CalleeEntryCountV = (CalleeEntryCount ? *CalleeEntryCount : 0);
+
+ bool AnyInline = false;
+ for (User *User : Users) {
+ CallSite CS = getCallSite(User);
+
+ if (IsLimitReached())
+ continue;
+
+ OptimizationRemarkEmitter ORE(CS.getCaller());
+
+ if (!shouldPartialInline(CS, Cloner, WeightedRcost, ORE))
+ continue;
+
+ ORE.emit(
+ OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", CS.getInstruction())
+ << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into "
+ << ore::NV("Caller", CS.getCaller()));
+
+ InlineFunctionInfo IFI(nullptr, GetAssumptionCache, PSI);
+ InlineFunction(CS, IFI);
+
+ // Now update the entry count:
+ if (CalleeEntryCountV && CallSiteToProfCountMap.count(User)) {
+ uint64_t CallSiteCount = CallSiteToProfCountMap[User];
+ CalleeEntryCountV -= std::min(CalleeEntryCountV, CallSiteCount);
+ }
+
+ AnyInline = true;
+ NumPartialInlining++;
+ // Update the stats
+ NumPartialInlined++;
+ }
+
+ if (AnyInline) {
+ Cloner.IsFunctionInlined = true;
+ if (CalleeEntryCount)
+ Cloner.OrigFunc->setEntryCount(CalleeEntryCountV);
+ }
+
+ return AnyInline;
}
bool PartialInlinerImpl::run(Module &M) {
+ if (DisablePartialInlining)
+ return false;
+
std::vector<Function *> Worklist;
Worklist.reserve(M.size());
for (Function &F : M)
@@ -203,6 +957,8 @@ char PartialInlinerLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(PartialInlinerLegacyPass, "partial-inliner",
"Partial Inliner", false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(PartialInlinerLegacyPass, "partial-inliner",
"Partial Inliner", false, false)
@@ -213,12 +969,25 @@ ModulePass *llvm::createPartialInliningPass() {
PreservedAnalyses PartialInlinerPass::run(Module &M,
ModuleAnalysisManager &AM) {
auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+
std::function<AssumptionCache &(Function &)> GetAssumptionCache =
[&FAM](Function &F) -> AssumptionCache & {
return FAM.getResult<AssumptionAnalysis>(F);
};
- InlineFunctionInfo IFI(nullptr, &GetAssumptionCache);
- if (PartialInlinerImpl(IFI).run(M))
+
+ std::function<BlockFrequencyInfo &(Function &)> GetBFI =
+ [&FAM](Function &F) -> BlockFrequencyInfo & {
+ return FAM.getResult<BlockFrequencyAnalysis>(F);
+ };
+
+ std::function<TargetTransformInfo &(Function &)> GetTTI =
+ [&FAM](Function &F) -> TargetTransformInfo & {
+ return FAM.getResult<TargetIRAnalysis>(F);
+ };
+
+ ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
+
+ if (PartialInlinerImpl(&GetAssumptionCache, &GetTTI, {GetBFI}, PSI).run(M))
return PreservedAnalyses::none();
return PreservedAnalyses::all();
}
diff --git a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
index 941efb2..0b319f6 100644
--- a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -38,21 +38,22 @@
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/GVN.h"
+#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
#include "llvm/Transforms/Vectorize.h"
using namespace llvm;
static cl::opt<bool>
-RunLoopVectorization("vectorize-loops", cl::Hidden,
- cl::desc("Run the Loop vectorization passes"));
+ RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden,
+ cl::ZeroOrMore, cl::desc("Run Partial inlinining pass"));
static cl::opt<bool>
-RunSLPVectorization("vectorize-slp", cl::Hidden,
- cl::desc("Run the SLP vectorization passes"));
+ RunLoopVectorization("vectorize-loops", cl::Hidden,
+ cl::desc("Run the Loop vectorization passes"));
static cl::opt<bool>
-RunBBVectorization("vectorize-slp-aggressive", cl::Hidden,
- cl::desc("Run the BB vectorization passes"));
+RunSLPVectorization("vectorize-slp", cl::Hidden,
+ cl::desc("Run the SLP vectorization passes"));
static cl::opt<bool>
UseGVNAfterVectorization("use-gvn-after-vectorization",
@@ -67,10 +68,6 @@ static cl::opt<bool>
RunLoopRerolling("reroll-loops", cl::Hidden,
cl::desc("Run the loop rerolling pass"));
-static cl::opt<bool> RunLoadCombine("combine-loads", cl::init(false),
- cl::Hidden,
- cl::desc("Run the load combining pass"));
-
static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden,
cl::desc("Run the NewGVN pass"));
@@ -93,10 +90,6 @@ static cl::opt<CFLAAType>
clEnumValN(CFLAAType::Both, "both",
"Enable both variants of CFL-AA")));
-static cl::opt<bool>
-EnableMLSM("mlsm", cl::init(true), cl::Hidden,
- cl::desc("Enable motion of merged load and store"));
-
static cl::opt<bool> EnableLoopInterchange(
"enable-loopinterchange", cl::init(false), cl::Hidden,
cl::desc("Enable the new, experimental LoopInterchange Pass"));
@@ -140,15 +133,28 @@ static cl::opt<int> PreInlineThreshold(
cl::desc("Control the amount of inlining in pre-instrumentation inliner "
"(default = 75)"));
+static cl::opt<bool> EnableEarlyCSEMemSSA(
+ "enable-earlycse-memssa", cl::init(true), cl::Hidden,
+ cl::desc("Enable the EarlyCSE w/ MemorySSA pass (default = on)"));
+
static cl::opt<bool> EnableGVNHoist(
"enable-gvn-hoist", cl::init(false), cl::Hidden,
- cl::desc("Enable the GVN hoisting pass"));
+ cl::desc("Enable the GVN hoisting pass (default = off)"));
static cl::opt<bool>
DisableLibCallsShrinkWrap("disable-libcalls-shrinkwrap", cl::init(false),
cl::Hidden,
cl::desc("Disable shrink-wrap library calls"));
+static cl::opt<bool>
+ EnableSimpleLoopUnswitch("enable-simple-loop-unswitch", cl::init(false),
+ cl::Hidden,
+ cl::desc("Enable the simple loop unswitch pass."));
+
+static cl::opt<bool> EnableGVNSink(
+ "enable-gvn-sink", cl::init(false), cl::Hidden,
+ cl::desc("Enable the GVN sinking pass (default = off)"));
+
PassManagerBuilder::PassManagerBuilder() {
OptLevel = 2;
SizeLevel = 0;
@@ -156,11 +162,9 @@ PassManagerBuilder::PassManagerBuilder() {
Inliner = nullptr;
DisableUnitAtATime = false;
DisableUnrollLoops = false;
- BBVectorize = RunBBVectorization;
SLPVectorize = RunSLPVectorization;
LoopVectorize = RunLoopVectorization;
RerollLoops = RunLoopRerolling;
- LoadCombine = RunLoadCombine;
NewGVN = RunNewGVN;
DisableGVNLoadPRE = false;
VerifyInput = false;
@@ -172,6 +176,7 @@ PassManagerBuilder::PassManagerBuilder() {
PGOInstrUse = RunPGOInstrUse;
PrepareForThinLTO = EnablePrepareForThinLTO;
PerformThinLTO = false;
+ DivergentTarget = false;
}
PassManagerBuilder::~PassManagerBuilder() {
@@ -183,6 +188,13 @@ PassManagerBuilder::~PassManagerBuilder() {
static ManagedStatic<SmallVector<std::pair<PassManagerBuilder::ExtensionPointTy,
PassManagerBuilder::ExtensionFn>, 8> > GlobalExtensions;
+/// Check if GlobalExtensions is constructed and not empty.
+/// Since GlobalExtensions is a managed static, calling 'empty()' will trigger
+/// the construction of the object.
+static bool GlobalExtensionsNotEmpty() {
+ return GlobalExtensions.isConstructed() && !GlobalExtensions->empty();
+}
+
void PassManagerBuilder::addGlobalExtension(
PassManagerBuilder::ExtensionPointTy Ty,
PassManagerBuilder::ExtensionFn Fn) {
@@ -195,9 +207,12 @@ void PassManagerBuilder::addExtension(ExtensionPointTy Ty, ExtensionFn Fn) {
void PassManagerBuilder::addExtensionsToPM(ExtensionPointTy ETy,
legacy::PassManagerBase &PM) const {
- for (unsigned i = 0, e = GlobalExtensions->size(); i != e; ++i)
- if ((*GlobalExtensions)[i].first == ETy)
- (*GlobalExtensions)[i].second(*this, PM);
+ if (GlobalExtensionsNotEmpty()) {
+ for (auto &Ext : *GlobalExtensions) {
+ if (Ext.first == ETy)
+ Ext.second(*this, PM);
+ }
+ }
for (unsigned i = 0, e = Extensions.size(); i != e; ++i)
if (Extensions[i].first == ETy)
Extensions[i].second(*this, PM);
@@ -248,18 +263,17 @@ void PassManagerBuilder::populateFunctionPassManager(
FPM.add(createCFGSimplificationPass());
FPM.add(createSROAPass());
FPM.add(createEarlyCSEPass());
- if(EnableGVNHoist)
- FPM.add(createGVNHoistPass());
FPM.add(createLowerExpectIntrinsicPass());
}
// Do PGO instrumentation generation or use pass as the option specified.
void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM) {
- if (!EnablePGOInstrGen && PGOInstrUse.empty())
+ if (!EnablePGOInstrGen && PGOInstrUse.empty() && PGOSampleUse.empty())
return;
// Perform the preinline and cleanup passes for O1 and above.
// And avoid doing them if optimizing for size.
- if (OptLevel > 0 && SizeLevel == 0 && !DisablePreInliner) {
+ if (OptLevel > 0 && SizeLevel == 0 && !DisablePreInliner &&
+ PGOSampleUse.empty()) {
// Create preinline pass. We construct an InlineParams object and specify
// the threshold here to avoid the command line options of the regular
// inliner to influence pre-inlining. The only fields of InlineParams we
@@ -283,17 +297,32 @@ void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM) {
InstrProfOptions Options;
if (!PGOInstrGen.empty())
Options.InstrProfileOutput = PGOInstrGen;
+ Options.DoCounterPromotion = true;
+ MPM.add(createLoopRotatePass());
MPM.add(createInstrProfilingLegacyPass(Options));
}
if (!PGOInstrUse.empty())
MPM.add(createPGOInstrumentationUseLegacyPass(PGOInstrUse));
+ // Indirect call promotion that promotes intra-module targets only.
+ // For ThinLTO this is done earlier due to interactions with globalopt
+ // for imported functions. We don't run this at -O0.
+ if (OptLevel > 0)
+ MPM.add(
+ createPGOIndirectCallPromotionLegacyPass(false, !PGOSampleUse.empty()));
}
void PassManagerBuilder::addFunctionSimplificationPasses(
legacy::PassManagerBase &MPM) {
// Start of function pass.
// Break up aggregate allocas, using SSAUpdater.
MPM.add(createSROAPass());
- MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
+ MPM.add(createEarlyCSEPass(EnableEarlyCSEMemSSA)); // Catch trivial redundancies
+ if (EnableGVNHoist)
+ MPM.add(createGVNHoistPass());
+ if (EnableGVNSink) {
+ MPM.add(createGVNSinkPass());
+ MPM.add(createCFGSimplificationPass());
+ }
+
// Speculative execution if the target has divergent branches; otherwise nop.
MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass());
MPM.add(createJumpThreadingPass()); // Thread jumps.
@@ -305,29 +334,37 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
MPM.add(createLibCallsShrinkWrapPass());
addExtensionsToPM(EP_Peephole, MPM);
+ // Optimize memory intrinsic calls based on the profiled size information.
+ if (SizeLevel == 0)
+ MPM.add(createPGOMemOPSizeOptLegacyPass());
+
MPM.add(createTailCallEliminationPass()); // Eliminate tail calls
MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
MPM.add(createReassociatePass()); // Reassociate expressions
// Rotate Loop - disable header duplication at -Oz
MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
MPM.add(createLICMPass()); // Hoist loop invariants
- MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3));
+ if (EnableSimpleLoopUnswitch)
+ MPM.add(createSimpleLoopUnswitchLegacyPass());
+ else
+ MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget));
MPM.add(createCFGSimplificationPass());
addInstructionCombiningPass(MPM);
MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars
MPM.add(createLoopIdiomPass()); // Recognize idioms like memset.
+ addExtensionsToPM(EP_LateLoopOptimizations, MPM);
MPM.add(createLoopDeletionPass()); // Delete dead loops
+
if (EnableLoopInterchange) {
MPM.add(createLoopInterchangePass()); // Interchange loops
MPM.add(createCFGSimplificationPass());
}
if (!DisableUnrollLoops)
- MPM.add(createSimpleLoopUnrollPass()); // Unroll small loops
+ MPM.add(createSimpleLoopUnrollPass(OptLevel)); // Unroll small loops
addExtensionsToPM(EP_LoopOptimizerEnd, MPM);
if (OptLevel > 1) {
- if (EnableMLSM)
- MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds
+ MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds
MPM.add(NewGVN ? createNewGVNPass()
: createGVNPass(DisableGVNLoadPRE)); // Remove redundancies
}
@@ -352,29 +389,8 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
if (RerollLoops)
MPM.add(createLoopRerollPass());
- if (!RunSLPAfterLoopVectorization) {
- if (SLPVectorize)
- MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
-
- if (BBVectorize) {
- MPM.add(createBBVectorizePass());
- addInstructionCombiningPass(MPM);
- addExtensionsToPM(EP_Peephole, MPM);
- if (OptLevel > 1 && UseGVNAfterVectorization)
- MPM.add(NewGVN
- ? createNewGVNPass()
- : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies
- else
- MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
-
- // BBVectorize may have significantly shortened a loop body; unroll again.
- if (!DisableUnrollLoops)
- MPM.add(createLoopUnrollPass());
- }
- }
-
- if (LoadCombine)
- MPM.add(createLoadCombinePass());
+ if (!RunSLPAfterLoopVectorization && SLPVectorize)
+ MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
MPM.add(createAggressiveDCEPass()); // Delete dead instructions
MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
@@ -409,14 +425,17 @@ void PassManagerBuilder::populateModulePassManager(
// builds. The function merging pass is
if (MergeFunctions)
MPM.add(createMergeFunctionsPass());
- else if (!GlobalExtensions->empty() || !Extensions.empty())
+ else if (GlobalExtensionsNotEmpty() || !Extensions.empty())
MPM.add(createBarrierNoopPass());
+ addExtensionsToPM(EP_EnabledOnOptLevel0, MPM);
+
+ // Rename anon globals to be able to export them in the summary.
+ // This has to be done after we add the extensions to the pass manager
+ // as there could be passes (e.g. Adddress sanitizer) which introduce
+ // new unnamed globals.
if (PrepareForThinLTO)
- // Rename anon globals to be able to export them in the summary.
MPM.add(createNameAnonGlobalPass());
-
- addExtensionsToPM(EP_EnabledOnOptLevel0, MPM);
return;
}
@@ -434,7 +453,16 @@ void PassManagerBuilder::populateModulePassManager(
// earlier in the pass pipeline, here before globalopt. Otherwise imported
// available_externally functions look unreferenced and are removed.
if (PerformThinLTO)
- MPM.add(createPGOIndirectCallPromotionLegacyPass(/*InLTO = */ true));
+ MPM.add(createPGOIndirectCallPromotionLegacyPass(/*InLTO = */ true,
+ !PGOSampleUse.empty()));
+
+ // For SamplePGO in ThinLTO compile phase, we do not want to unroll loops
+ // as it will change the CFG too much to make the 2nd profile annotation
+ // in backend more difficult.
+ bool PrepareForThinLTOUsingPGOSampleProfile =
+ PrepareForThinLTO && !PGOSampleUse.empty();
+ if (PrepareForThinLTOUsingPGOSampleProfile)
+ DisableUnrollLoops = true;
if (!DisableUnitAtATime) {
// Infer attributes about declarations if possible.
@@ -454,15 +482,13 @@ void PassManagerBuilder::populateModulePassManager(
MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE
}
- if (!PerformThinLTO) {
- /// PGO instrumentation is added during the compile phase for ThinLTO, do
- /// not run it a second time
+ // For SamplePGO in ThinLTO compile phase, we do not want to do indirect
+ // call promotion as it will change the CFG too much to make the 2nd
+ // profile annotation in backend more difficult.
+ // PGO instrumentation is added during the compile phase for ThinLTO, do
+ // not run it a second time
+ if (!PerformThinLTO && !PrepareForThinLTOUsingPGOSampleProfile)
addPGOInstrPasses(MPM);
- // Indirect call promotion that promotes intra-module targets only.
- // For ThinLTO this is done earlier due to interactions with globalopt
- // for imported functions.
- MPM.add(createPGOIndirectCallPromotionLegacyPass());
- }
if (EnableNonLTOGlobalsModRef)
// We add a module alias analysis pass here. In part due to bugs in the
@@ -489,6 +515,8 @@ void PassManagerBuilder::populateModulePassManager(
// pass manager that we are specifically trying to avoid. To prevent this
// we must insert a no-op module pass to reset the pass manager.
MPM.add(createBarrierNoopPass());
+ if (RunPartialInlining)
+ MPM.add(createPartialInliningPass());
if (!DisableUnitAtATime && OptLevel > 1 && !PrepareForLTO &&
!PrepareForThinLTO)
@@ -589,42 +617,24 @@ void PassManagerBuilder::populateModulePassManager(
MPM.add(createCorrelatedValuePropagationPass());
addInstructionCombiningPass(MPM);
MPM.add(createLICMPass());
- MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3));
+ MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget));
MPM.add(createCFGSimplificationPass());
addInstructionCombiningPass(MPM);
}
- if (RunSLPAfterLoopVectorization) {
- if (SLPVectorize) {
- MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
- if (OptLevel > 1 && ExtraVectorizerPasses) {
- MPM.add(createEarlyCSEPass());
- }
- }
-
- if (BBVectorize) {
- MPM.add(createBBVectorizePass());
- addInstructionCombiningPass(MPM);
- addExtensionsToPM(EP_Peephole, MPM);
- if (OptLevel > 1 && UseGVNAfterVectorization)
- MPM.add(NewGVN
- ? createNewGVNPass()
- : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies
- else
- MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
-
- // BBVectorize may have significantly shortened a loop body; unroll again.
- if (!DisableUnrollLoops)
- MPM.add(createLoopUnrollPass());
+ if (RunSLPAfterLoopVectorization && SLPVectorize) {
+ MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
+ if (OptLevel > 1 && ExtraVectorizerPasses) {
+ MPM.add(createEarlyCSEPass());
}
}
addExtensionsToPM(EP_Peephole, MPM);
- MPM.add(createCFGSimplificationPass());
+ MPM.add(createLateCFGSimplificationPass()); // Switches to lookup tables
addInstructionCombiningPass(MPM);
if (!DisableUnrollLoops) {
- MPM.add(createLoopUnrollPass()); // Unroll small loops
+ MPM.add(createLoopUnrollPass(OptLevel)); // Unroll small loops
// LoopUnroll may generate some redundency to cleanup.
addInstructionCombiningPass(MPM);
@@ -662,6 +672,11 @@ void PassManagerBuilder::populateModulePassManager(
MPM.add(createLoopSinkPass());
// Get rid of LCSSA nodes.
MPM.add(createInstructionSimplifierPass());
+
+ // LoopSink (and other loop passes since the last simplifyCFG) might have
+ // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
+ MPM.add(createCFGSimplificationPass());
+
addExtensionsToPM(EP_OptimizerLast, MPM);
}
@@ -684,7 +699,8 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
// left by the earlier promotion pass that promotes intra-module targets.
// This two-step promotion is to save the compile time. For LTO, it should
// produce the same result as if we only do promotion here.
- PM.add(createPGOIndirectCallPromotionLegacyPass(true));
+ PM.add(
+ createPGOIndirectCallPromotionLegacyPass(true, !PGOSampleUse.empty()));
// Propagate constants at call sites into the functions they call. This
// opens opportunities for globalopt (and inlining) by substituting function
@@ -703,7 +719,7 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
PM.add(createGlobalSplitPass());
// Apply whole-program devirtualization and virtual constant propagation.
- PM.add(createWholeProgramDevirtPass());
+ PM.add(createWholeProgramDevirtPass(ExportSummary, nullptr));
// That's all we need at opt level 1.
if (OptLevel == 1)
@@ -759,8 +775,7 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
PM.add(createGlobalsAAWrapperPass()); // IP alias analysis.
PM.add(createLICMPass()); // Hoist loop invariants.
- if (EnableMLSM)
- PM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds.
+ PM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds.
PM.add(NewGVN ? createNewGVNPass()
: createGVNPass(DisableGVNLoadPRE)); // Remove redundancies.
PM.add(createMemCpyOptPass()); // Remove dead memcpys.
@@ -775,11 +790,11 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
PM.add(createLoopInterchangePass());
if (!DisableUnrollLoops)
- PM.add(createSimpleLoopUnrollPass()); // Unroll small loops
+ PM.add(createSimpleLoopUnrollPass(OptLevel)); // Unroll small loops
PM.add(createLoopVectorizePass(true, LoopVectorize));
// The vectorizer may have significantly shortened a loop body; unroll again.
if (!DisableUnrollLoops)
- PM.add(createLoopUnrollPass());
+ PM.add(createLoopUnrollPass(OptLevel));
// Now that we've optimized loops (in particular loop induction variables),
// we may have exposed more scalar opportunities. Run parts of the scalar
@@ -799,9 +814,6 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
// alignments.
PM.add(createAlignmentFromAssumptionsPass());
- if (LoadCombine)
- PM.add(createLoadCombinePass());
-
// Cleanup and simplify the code after the scalar optimizations.
addInstructionCombiningPass(PM);
addExtensionsToPM(EP_Peephole, PM);
@@ -833,6 +845,23 @@ void PassManagerBuilder::populateThinLTOPassManager(
if (VerifyInput)
PM.add(createVerifierPass());
+ if (ImportSummary) {
+ // These passes import type identifier resolutions for whole-program
+ // devirtualization and CFI. They must run early because other passes may
+ // disturb the specific instruction patterns that these passes look for,
+ // creating dependencies on resolutions that may not appear in the summary.
+ //
+ // For example, GVN may transform the pattern assume(type.test) appearing in
+ // two basic blocks into assume(phi(type.test, type.test)), which would
+ // transform a dependency on a WPD resolution into a dependency on a type
+ // identifier resolution for CFI.
+ //
+ // Also, WPD has access to more precise information than ICP and can
+ // devirtualize more effectively, so it should operate on the IR first.
+ PM.add(createWholeProgramDevirtPass(nullptr, ImportSummary));
+ PM.add(createLowerTypeTestsPass(nullptr, ImportSummary));
+ }
+
populateModulePassManager(PM);
if (VerifyOutput)
@@ -849,6 +878,12 @@ void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) {
if (OptLevel != 0)
addLTOOptimizationPasses(PM);
+ else {
+ // The whole-program-devirt pass needs to run at -O0 because only it knows
+ // about the llvm.type.checked.load intrinsic: it needs to both lower the
+ // intrinsic itself and handle it in the summary.
+ PM.add(createWholeProgramDevirtPass(ExportSummary, nullptr));
+ }
// Create a function that performs CFI checks for cross-DSO calls with targets
// in the current module.
@@ -857,8 +892,7 @@ void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) {
// Lower type metadata and the type.test intrinsic. This pass supports Clang's
// control flow integrity mechanisms (-fsanitize=cfi*) and needs to run at
// link time if CFI is enabled. The pass does nothing if CFI is disabled.
- PM.add(createLowerTypeTestsPass(LowerTypeTestsSummaryAction::None,
- /*Summary=*/nullptr));
+ PM.add(createLowerTypeTestsPass(ExportSummary, nullptr));
if (OptLevel != 0)
addLateLTOOptimizationPasses(PM);
diff --git a/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp
index d9acb9b..3fd5984 100644
--- a/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp
@@ -14,10 +14,8 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/IPO.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/Analysis/EHPersonalities.h"
@@ -28,6 +26,8 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
using namespace llvm;
diff --git a/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp b/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 6a43f8d..6baada2 100644
--- a/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -35,6 +35,7 @@
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -42,7 +43,9 @@
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/ValueSymbolTable.h"
#include "llvm/Pass.h"
+#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/SampleProfReader.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -50,6 +53,7 @@
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include <cctype>
@@ -159,21 +163,26 @@ protected:
ErrorOr<uint64_t> getInstWeight(const Instruction &I);
ErrorOr<uint64_t> getBlockWeight(const BasicBlock *BB);
const FunctionSamples *findCalleeFunctionSamples(const Instruction &I) const;
+ std::vector<const FunctionSamples *>
+ findIndirectCallFunctionSamples(const Instruction &I) const;
const FunctionSamples *findFunctionSamples(const Instruction &I) const;
- bool inlineHotFunctions(Function &F);
+ bool inlineHotFunctions(Function &F,
+ DenseSet<GlobalValue::GUID> &ImportGUIDs);
void printEdgeWeight(raw_ostream &OS, Edge E);
void printBlockWeight(raw_ostream &OS, const BasicBlock *BB) const;
void printBlockEquivalence(raw_ostream &OS, const BasicBlock *BB);
bool computeBlockWeights(Function &F);
void findEquivalenceClasses(Function &F);
+ template <bool IsPostDom>
void findEquivalencesFor(BasicBlock *BB1, ArrayRef<BasicBlock *> Descendants,
- DominatorTreeBase<BasicBlock> *DomTree);
+ DominatorTreeBase<BasicBlock, IsPostDom> *DomTree);
+
void propagateWeights(Function &F);
uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge);
void buildEdges(Function &F);
bool propagateThroughEdges(Function &F, bool UpdateBlockCount);
void computeDominanceAndLoopInfo(Function &F);
- unsigned getOffset(unsigned L, unsigned H) const;
+ unsigned getOffset(const DILocation *DIL) const;
void clearFunctionData();
/// \brief Map basic blocks to their computed weights.
@@ -202,9 +211,15 @@ protected:
/// the same number of times.
EquivalenceClassMap EquivalenceClass;
+ /// Map from function name to Function *. Used to find the function from
+ /// the function name. If the function name contains suffix, additional
+ /// entry is added to map from the stripped name to the function if there
+ /// is one-to-one mapping.
+ StringMap<Function *> SymbolMap;
+
/// \brief Dominance, post-dominance and loop information.
std::unique_ptr<DominatorTree> DT;
- std::unique_ptr<DominatorTreeBase<BasicBlock>> PDT;
+ std::unique_ptr<PostDomTreeBase<BasicBlock>> PDT;
std::unique_ptr<LoopInfo> LI;
AssumptionCacheTracker *ACT;
@@ -326,11 +341,12 @@ SampleCoverageTracker::countUsedRecords(const FunctionSamples *FS) const {
// If there are inlined callsites in this function, count the samples found
// in the respective bodies. However, do not bother counting callees with 0
// total samples, these are callees that were never invoked at runtime.
- for (const auto &I : FS->getCallsiteSamples()) {
- const FunctionSamples *CalleeSamples = &I.second;
- if (callsiteIsHot(FS, CalleeSamples))
- Count += countUsedRecords(CalleeSamples);
- }
+ for (const auto &I : FS->getCallsiteSamples())
+ for (const auto &J : I.second) {
+ const FunctionSamples *CalleeSamples = &J.second;
+ if (callsiteIsHot(FS, CalleeSamples))
+ Count += countUsedRecords(CalleeSamples);
+ }
return Count;
}
@@ -343,11 +359,12 @@ SampleCoverageTracker::countBodyRecords(const FunctionSamples *FS) const {
unsigned Count = FS->getBodySamples().size();
// Only count records in hot callsites.
- for (const auto &I : FS->getCallsiteSamples()) {
- const FunctionSamples *CalleeSamples = &I.second;
- if (callsiteIsHot(FS, CalleeSamples))
- Count += countBodyRecords(CalleeSamples);
- }
+ for (const auto &I : FS->getCallsiteSamples())
+ for (const auto &J : I.second) {
+ const FunctionSamples *CalleeSamples = &J.second;
+ if (callsiteIsHot(FS, CalleeSamples))
+ Count += countBodyRecords(CalleeSamples);
+ }
return Count;
}
@@ -362,11 +379,12 @@ SampleCoverageTracker::countBodySamples(const FunctionSamples *FS) const {
Total += I.second.getSamples();
// Only count samples in hot callsites.
- for (const auto &I : FS->getCallsiteSamples()) {
- const FunctionSamples *CalleeSamples = &I.second;
- if (callsiteIsHot(FS, CalleeSamples))
- Total += countBodySamples(CalleeSamples);
- }
+ for (const auto &I : FS->getCallsiteSamples())
+ for (const auto &J : I.second) {
+ const FunctionSamples *CalleeSamples = &J.second;
+ if (callsiteIsHot(FS, CalleeSamples))
+ Total += countBodySamples(CalleeSamples);
+ }
return Total;
}
@@ -398,15 +416,11 @@ void SampleProfileLoader::clearFunctionData() {
CoverageTracker.clear();
}
-/// \brief Returns the offset of lineno \p L to head_lineno \p H
-///
-/// \param L Lineno
-/// \param H Header lineno of the function
-///
-/// \returns offset to the header lineno. 16 bits are used to represent offset.
+/// Returns the line offset to the start line of the subprogram.
/// We assume that a single function will not exceed 65535 LOC.
-unsigned SampleProfileLoader::getOffset(unsigned L, unsigned H) const {
- return (L - H) & 0xffff;
+unsigned SampleProfileLoader::getOffset(const DILocation *DIL) const {
+ return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &
+ 0xffff;
}
/// \brief Print the weight of edge \p E on stream \p OS.
@@ -451,8 +465,7 @@ void SampleProfileLoader::printBlockWeight(raw_ostream &OS,
/// \param Inst Instruction to query.
///
/// \returns the weight of \p Inst.
-ErrorOr<uint64_t>
-SampleProfileLoader::getInstWeight(const Instruction &Inst) {
+ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) {
const DebugLoc &DLoc = Inst.getDebugLoc();
if (!DLoc)
return std::error_code();
@@ -470,19 +483,14 @@ SampleProfileLoader::getInstWeight(const Instruction &Inst) {
// If a call/invoke instruction is inlined in profile, but not inlined here,
// it means that the inlined callsite has no sample, thus the call
// instruction should have 0 count.
- bool IsCall = isa<CallInst>(Inst) || isa<InvokeInst>(Inst);
- if (IsCall && findCalleeFunctionSamples(Inst))
+ if ((isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) &&
+ findCalleeFunctionSamples(Inst))
return 0;
const DILocation *DIL = DLoc;
- unsigned Lineno = DLoc.getLine();
- unsigned HeaderLineno = DIL->getScope()->getSubprogram()->getLine();
-
- uint32_t LineOffset = getOffset(Lineno, HeaderLineno);
- uint32_t Discriminator = DIL->getDiscriminator();
- ErrorOr<uint64_t> R = IsCall
- ? FS->findCallSamplesAt(LineOffset, Discriminator)
- : FS->findSamplesAt(LineOffset, Discriminator);
+ uint32_t LineOffset = getOffset(DIL);
+ uint32_t Discriminator = DIL->getBaseDiscriminator();
+ ErrorOr<uint64_t> R = FS->findSamplesAt(LineOffset, Discriminator);
if (R) {
bool FirstMark =
CoverageTracker.markSamplesUsed(FS, LineOffset, Discriminator, R.get());
@@ -491,13 +499,14 @@ SampleProfileLoader::getInstWeight(const Instruction &Inst) {
LLVMContext &Ctx = F->getContext();
emitOptimizationRemark(
Ctx, DEBUG_TYPE, *F, DLoc,
- Twine("Applied ") + Twine(*R) + " samples from profile (offset: " +
- Twine(LineOffset) +
+ Twine("Applied ") + Twine(*R) +
+ " samples from profile (offset: " + Twine(LineOffset) +
((Discriminator) ? Twine(".") + Twine(Discriminator) : "") + ")");
}
- DEBUG(dbgs() << " " << Lineno << "." << DIL->getDiscriminator() << ":"
- << Inst << " (line offset: " << Lineno - HeaderLineno << "."
- << DIL->getDiscriminator() << " - weight: " << R.get()
+ DEBUG(dbgs() << " " << DLoc.getLine() << "."
+ << DIL->getBaseDiscriminator() << ":" << Inst
+ << " (line offset: " << LineOffset << "."
+ << DIL->getBaseDiscriminator() << " - weight: " << R.get()
<< ")\n");
}
return R;
@@ -511,8 +520,7 @@ SampleProfileLoader::getInstWeight(const Instruction &Inst) {
/// \param BB The basic block to query.
///
/// \returns the weight for \p BB.
-ErrorOr<uint64_t>
-SampleProfileLoader::getBlockWeight(const BasicBlock *BB) {
+ErrorOr<uint64_t> SampleProfileLoader::getBlockWeight(const BasicBlock *BB) {
uint64_t Max = 0;
bool HasWeight = false;
for (auto &I : BB->getInstList()) {
@@ -565,16 +573,49 @@ SampleProfileLoader::findCalleeFunctionSamples(const Instruction &Inst) const {
if (!DIL) {
return nullptr;
}
- DISubprogram *SP = DIL->getScope()->getSubprogram();
- if (!SP)
- return nullptr;
+
+ StringRef CalleeName;
+ if (const CallInst *CI = dyn_cast<CallInst>(&Inst))
+ if (Function *Callee = CI->getCalledFunction())
+ CalleeName = Callee->getName();
const FunctionSamples *FS = findFunctionSamples(Inst);
if (FS == nullptr)
return nullptr;
- return FS->findFunctionSamplesAt(LineLocation(
- getOffset(DIL->getLine(), SP->getLine()), DIL->getDiscriminator()));
+ return FS->findFunctionSamplesAt(
+ LineLocation(getOffset(DIL), DIL->getBaseDiscriminator()), CalleeName);
+}
+
+/// Returns a vector of FunctionSamples that are the indirect call targets
+/// of \p Inst. The vector is sorted by the total number of samples.
+std::vector<const FunctionSamples *>
+SampleProfileLoader::findIndirectCallFunctionSamples(
+ const Instruction &Inst) const {
+ const DILocation *DIL = Inst.getDebugLoc();
+ std::vector<const FunctionSamples *> R;
+
+ if (!DIL) {
+ return R;
+ }
+
+ const FunctionSamples *FS = findFunctionSamples(Inst);
+ if (FS == nullptr)
+ return R;
+
+ if (const FunctionSamplesMap *M = FS->findFunctionSamplesMapAt(
+ LineLocation(getOffset(DIL), DIL->getBaseDiscriminator()))) {
+ if (M->size() == 0)
+ return R;
+ for (const auto &NameFS : *M) {
+ R.push_back(&NameFS.second);
+ }
+ std::sort(R.begin(), R.end(),
+ [](const FunctionSamples *L, const FunctionSamples *R) {
+ return L->getTotalSamples() > R->getTotalSamples();
+ });
+ }
+ return R;
}
/// \brief Get the FunctionSamples for an instruction.
@@ -588,23 +629,23 @@ SampleProfileLoader::findCalleeFunctionSamples(const Instruction &Inst) const {
/// \returns the FunctionSamples pointer to the inlined instance.
const FunctionSamples *
SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
- SmallVector<LineLocation, 10> S;
+ SmallVector<std::pair<LineLocation, StringRef>, 10> S;
const DILocation *DIL = Inst.getDebugLoc();
- if (!DIL) {
+ if (!DIL)
return Samples;
- }
+
+ const DILocation *PrevDIL = DIL;
for (DIL = DIL->getInlinedAt(); DIL; DIL = DIL->getInlinedAt()) {
- DISubprogram *SP = DIL->getScope()->getSubprogram();
- if (!SP)
- return nullptr;
- S.push_back(LineLocation(getOffset(DIL->getLine(), SP->getLine()),
- DIL->getDiscriminator()));
+ S.push_back(std::make_pair(
+ LineLocation(getOffset(DIL), DIL->getBaseDiscriminator()),
+ PrevDIL->getScope()->getSubprogram()->getLinkageName()));
+ PrevDIL = DIL;
}
if (S.size() == 0)
return Samples;
const FunctionSamples *FS = Samples;
for (int i = S.size() - 1; i >= 0 && FS != nullptr; i--) {
- FS = FS->findFunctionSamplesAt(S[i]);
+ FS = FS->findFunctionSamplesAt(S[i].first, S[i].second);
}
return FS;
}
@@ -614,14 +655,17 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
/// Iteratively traverse all callsites of the function \p F, and find if
/// the corresponding inlined instance exists and is hot in profile. If
/// it is hot enough, inline the callsites and adds new callsites of the
-/// callee into the caller.
-///
-/// TODO: investigate the possibility of not invoking InlineFunction directly.
+/// callee into the caller. If the call is an indirect call, first promote
+/// it to direct call. Each indirect call is limited with a single target.
///
/// \param F function to perform iterative inlining.
+/// \param ImportGUIDs a set to be updated to include all GUIDs that come
+/// from a different module but inlined in the profiled binary.
///
/// \returns True if there is any inline happened.
-bool SampleProfileLoader::inlineHotFunctions(Function &F) {
+bool SampleProfileLoader::inlineHotFunctions(
+ Function &F, DenseSet<GlobalValue::GUID> &ImportGUIDs) {
+ DenseSet<Instruction *> PromotedInsns;
bool Changed = false;
LLVMContext &Ctx = F.getContext();
std::function<AssumptionCache &(Function &)> GetAssumptionCache = [&](
@@ -635,7 +679,7 @@ bool SampleProfileLoader::inlineHotFunctions(Function &F) {
for (auto &I : BB.getInstList()) {
const FunctionSamples *FS = nullptr;
if ((isa<CallInst>(I) || isa<InvokeInst>(I)) &&
- (FS = findCalleeFunctionSamples(I))) {
+ !isa<IntrinsicInst>(I) && (FS = findCalleeFunctionSamples(I))) {
Candidates.push_back(&I);
if (callsiteIsHot(Samples, FS))
Hot = true;
@@ -647,18 +691,55 @@ bool SampleProfileLoader::inlineHotFunctions(Function &F) {
}
for (auto I : CIS) {
InlineFunctionInfo IFI(nullptr, ACT ? &GetAssumptionCache : nullptr);
- CallSite CS(I);
- Function *CalledFunction = CS.getCalledFunction();
- if (!CalledFunction || !CalledFunction->getSubprogram())
+ Function *CalledFunction = CallSite(I).getCalledFunction();
+ // Do not inline recursive calls.
+ if (CalledFunction == &F)
continue;
+ Instruction *DI = I;
+ if (!CalledFunction && !PromotedInsns.count(I) &&
+ CallSite(I).isIndirectCall())
+ for (const auto *FS : findIndirectCallFunctionSamples(*I)) {
+ auto CalleeFunctionName = FS->getName();
+ // If it is a recursive call, we do not inline it as it could bloat
+ // the code exponentially. There is way to better handle this, e.g.
+ // clone the caller first, and inline the cloned caller if it is
+ // recursive. As llvm does not inline recursive calls, we will simply
+ // ignore it instead of handling it explicitly.
+ if (CalleeFunctionName == F.getName())
+ continue;
+ const char *Reason = "Callee function not available";
+ auto R = SymbolMap.find(CalleeFunctionName);
+ if (R == SymbolMap.end())
+ continue;
+ CalledFunction = R->getValue();
+ if (CalledFunction && isLegalToPromote(I, CalledFunction, &Reason)) {
+ // The indirect target was promoted and inlined in the profile, as a
+ // result, we do not have profile info for the branch probability.
+ // We set the probability to 80% taken to indicate that the static
+ // call is likely taken.
+ DI = dyn_cast<Instruction>(
+ promoteIndirectCall(I, CalledFunction, 80, 100, false)
+ ->stripPointerCasts());
+ PromotedInsns.insert(I);
+ } else {
+ DEBUG(dbgs() << "\nFailed to promote indirect call to "
+ << CalleeFunctionName << " because " << Reason
+ << "\n");
+ continue;
+ }
+ }
+ if (!CalledFunction || !CalledFunction->getSubprogram()) {
+ findCalleeFunctionSamples(*I)->findImportedFunctions(
+ ImportGUIDs, F.getParent(),
+ Samples->getTotalSamples() * SampleProfileHotThreshold / 100);
+ continue;
+ }
DebugLoc DLoc = I->getDebugLoc();
- uint64_t NumSamples = findCalleeFunctionSamples(*I)->getTotalSamples();
- if (InlineFunction(CS, IFI)) {
+ if (InlineFunction(CallSite(DI), IFI)) {
LocalChanged = true;
emitOptimizationRemark(Ctx, DEBUG_TYPE, F, DLoc,
Twine("inlined hot callee '") +
- CalledFunction->getName() + "' with " +
- Twine(NumSamples) + " samples into '" +
+ CalledFunction->getName() + "' into '" +
F.getName() + "'");
}
}
@@ -694,9 +775,10 @@ bool SampleProfileLoader::inlineHotFunctions(Function &F) {
/// \param DomTree Opposite dominator tree. If \p Descendants is filled
/// with blocks from \p BB1's dominator tree, then
/// this is the post-dominator tree, and vice versa.
+template <bool IsPostDom>
void SampleProfileLoader::findEquivalencesFor(
BasicBlock *BB1, ArrayRef<BasicBlock *> Descendants,
- DominatorTreeBase<BasicBlock> *DomTree) {
+ DominatorTreeBase<BasicBlock, IsPostDom> *DomTree) {
const BasicBlock *EC = EquivalenceClass[BB1];
uint64_t Weight = BlockWeights[EC];
for (const auto *BB2 : Descendants) {
@@ -994,6 +1076,26 @@ void SampleProfileLoader::buildEdges(Function &F) {
}
}
+/// Sorts the CallTargetMap \p M by count in descending order and stores the
+/// sorted result in \p Sorted. Returns the total counts.
+static uint64_t SortCallTargets(SmallVector<InstrProfValueData, 2> &Sorted,
+ const SampleRecord::CallTargetMap &M) {
+ Sorted.clear();
+ uint64_t Sum = 0;
+ for (auto I = M.begin(); I != M.end(); ++I) {
+ Sum += I->getValue();
+ Sorted.push_back({Function::getGUID(I->getKey()), I->getValue()});
+ }
+ std::sort(Sorted.begin(), Sorted.end(),
+ [](const InstrProfValueData &L, const InstrProfValueData &R) {
+ if (L.Count == R.Count)
+ return L.Value > R.Value;
+ else
+ return L.Count > R.Count;
+ });
+ return Sum;
+}
+
/// \brief Propagate weights into edges
///
/// The following rules are applied to every block BB in the CFG:
@@ -1015,10 +1117,6 @@ void SampleProfileLoader::propagateWeights(Function &F) {
bool Changed = true;
unsigned I = 0;
- // Add an entry count to the function using the samples gathered
- // at the function entry.
- F.setEntryCount(Samples->getHeadSamples() + 1);
-
// If BB weight is larger than its corresponding loop's header BB weight,
// use the BB weight to replace the loop header BB weight.
for (auto &BI : F) {
@@ -1071,13 +1169,32 @@ void SampleProfileLoader::propagateWeights(Function &F) {
if (BlockWeights[BB]) {
for (auto &I : BB->getInstList()) {
- if (CallInst *CI = dyn_cast<CallInst>(&I)) {
- if (!dyn_cast<IntrinsicInst>(&I)) {
- SmallVector<uint32_t, 1> Weights;
- Weights.push_back(BlockWeights[BB]);
- CI->setMetadata(LLVMContext::MD_prof,
- MDB.createBranchWeights(Weights));
- }
+ if (!isa<CallInst>(I) && !isa<InvokeInst>(I))
+ continue;
+ CallSite CS(&I);
+ if (!CS.getCalledFunction()) {
+ const DebugLoc &DLoc = I.getDebugLoc();
+ if (!DLoc)
+ continue;
+ const DILocation *DIL = DLoc;
+ uint32_t LineOffset = getOffset(DIL);
+ uint32_t Discriminator = DIL->getBaseDiscriminator();
+
+ const FunctionSamples *FS = findFunctionSamples(I);
+ if (!FS)
+ continue;
+ auto T = FS->findCallTargetMapAt(LineOffset, Discriminator);
+ if (!T || T.get().size() == 0)
+ continue;
+ SmallVector<InstrProfValueData, 2> SortedCallTargets;
+ uint64_t Sum = SortCallTargets(SortedCallTargets, T.get());
+ annotateValueSite(*I.getParent()->getParent()->getParent(), I,
+ SortedCallTargets, Sum, IPVK_IndirectCallTarget,
+ SortedCallTargets.size());
+ } else if (!dyn_cast<IntrinsicInst>(&I)) {
+ SmallVector<uint32_t, 1> Weights;
+ Weights.push_back(BlockWeights[BB]);
+ I.setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
}
}
}
@@ -1087,8 +1204,11 @@ void SampleProfileLoader::propagateWeights(Function &F) {
if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI))
continue;
+ DebugLoc BranchLoc = TI->getDebugLoc();
DEBUG(dbgs() << "\nGetting weights for branch at line "
- << TI->getDebugLoc().getLine() << ".\n");
+ << ((BranchLoc) ? Twine(BranchLoc.getLine())
+ : Twine("<UNKNOWN LOCATION>"))
+ << ".\n");
SmallVector<uint32_t, 4> Weights;
uint32_t MaxWeight = 0;
DebugLoc MaxDestLoc;
@@ -1115,13 +1235,16 @@ void SampleProfileLoader::propagateWeights(Function &F) {
}
}
+ uint64_t TempWeight;
// Only set weights if there is at least one non-zero weight.
// In any other case, let the analyzer set weights.
- if (MaxWeight > 0) {
+ // Do not set weights if the weights are present. In ThinLTO, the profile
+ // annotation is done twice. If the first annotation already set the
+ // weights, the second pass does not need to set it.
+ if (MaxWeight > 0 && !TI->extractProfTotalWeight(TempWeight)) {
DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n");
TI->setMetadata(llvm::LLVMContext::MD_prof,
MDB.createBranchWeights(Weights));
- DebugLoc BranchLoc = TI->getDebugLoc();
emitOptimizationRemark(
Ctx, DEBUG_TYPE, F, MaxDestLoc,
Twine("most popular destination for conditional branches at ") +
@@ -1163,7 +1286,7 @@ void SampleProfileLoader::computeDominanceAndLoopInfo(Function &F) {
DT.reset(new DominatorTree);
DT->recalculate(F);
- PDT.reset(new DominatorTreeBase<BasicBlock>(true));
+ PDT.reset(new PostDomTreeBase<BasicBlock>());
PDT->recalculate(F);
LI.reset(new LoopInfo);
@@ -1228,12 +1351,19 @@ bool SampleProfileLoader::emitAnnotations(Function &F) {
DEBUG(dbgs() << "Line number for the first instruction in " << F.getName()
<< ": " << getFunctionLoc(F) << "\n");
- Changed |= inlineHotFunctions(F);
+ DenseSet<GlobalValue::GUID> ImportGUIDs;
+ Changed |= inlineHotFunctions(F, ImportGUIDs);
// Compute basic block weights.
Changed |= computeBlockWeights(F);
if (Changed) {
+ // Add an entry count to the function using the samples gathered at the
+ // function entry. Also sets the GUIDs that comes from a different
+ // module but inlined in the profiled binary. This is aiming at making
+ // the IR match the profiled binary before annotation.
+ F.setEntryCount(Samples->getHeadSamples() + 1, &ImportGUIDs);
+
// Compute dominance and loop info needed for propagation.
computeDominanceAndLoopInfo(F);
@@ -1309,6 +1439,26 @@ bool SampleProfileLoader::runOnModule(Module &M) {
for (const auto &I : Reader->getProfiles())
TotalCollectedSamples += I.second.getTotalSamples();
+ // Populate the symbol map.
+ for (const auto &N_F : M.getValueSymbolTable()) {
+ std::string OrigName = N_F.getKey();
+ Function *F = dyn_cast<Function>(N_F.getValue());
+ if (F == nullptr)
+ continue;
+ SymbolMap[OrigName] = F;
+ auto pos = OrigName.find('.');
+ if (pos != std::string::npos) {
+ std::string NewName = OrigName.substr(0, pos);
+ auto r = SymbolMap.insert(std::make_pair(NewName, F));
+ // Failiing to insert means there is already an entry in SymbolMap,
+ // thus there are multiple functions that are mapped to the same
+ // stripped name. In this case of name conflicting, set the value
+ // to nullptr to avoid confusion.
+ if (!r.second)
+ r.first->second = nullptr;
+ }
+ }
+
bool retval = false;
for (auto &F : M)
if (!F.isDeclaration()) {
@@ -1329,7 +1479,7 @@ bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) {
bool SampleProfileLoader::runOnFunction(Function &F) {
F.setEntryCount(0);
Samples = Reader->getSamplesFor(F);
- if (!Samples->empty())
+ if (Samples && !Samples->empty())
return emitAnnotations(F);
return false;
}
@@ -1337,7 +1487,8 @@ bool SampleProfileLoader::runOnFunction(Function &F) {
PreservedAnalyses SampleProfileLoaderPass::run(Module &M,
ModuleAnalysisManager &AM) {
- SampleProfileLoader SampleLoader(SampleProfileFile);
+ SampleProfileLoader SampleLoader(
+ ProfileFileName.empty() ? SampleProfileFile : ProfileFileName);
SampleLoader.doInitialization(M);
diff --git a/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp
index 8f6f161..de1b51e 100644
--- a/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp
@@ -20,7 +20,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/IPO.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfo.h"
@@ -30,6 +29,7 @@
#include "llvm/IR/TypeFinder.h"
#include "llvm/IR/ValueSymbolTable.h"
#include "llvm/Pass.h"
+#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -323,6 +323,14 @@ bool StripDeadDebugInfo::runOnModule(Module &M) {
LiveGVs.insert(GVE);
}
+ std::set<DICompileUnit *> LiveCUs;
+ // Any CU referenced from a subprogram is live.
+ for (DISubprogram *SP : F.subprograms()) {
+ if (SP->getUnit())
+ LiveCUs.insert(SP->getUnit());
+ }
+
+ bool HasDeadCUs = false;
for (DICompileUnit *DIC : F.compile_units()) {
// Create our live global variable list.
bool GlobalVariableChange = false;
@@ -341,6 +349,11 @@ bool StripDeadDebugInfo::runOnModule(Module &M) {
GlobalVariableChange = true;
}
+ if (!LiveGlobalVariables.empty())
+ LiveCUs.insert(DIC);
+ else if (!LiveCUs.count(DIC))
+ HasDeadCUs = true;
+
// If we found dead global variables, replace the current global
// variable list with our new live global variable list.
if (GlobalVariableChange) {
@@ -352,5 +365,16 @@ bool StripDeadDebugInfo::runOnModule(Module &M) {
LiveGlobalVariables.clear();
}
+ if (HasDeadCUs) {
+ // Delete the old node and replace it with a new one
+ NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.cu");
+ NMD->clearOperands();
+ if (!LiveCUs.empty()) {
+ for (DICompileUnit *CU : LiveCUs)
+ NMD->addOperand(CU);
+ }
+ Changed = true;
+ }
+
return Changed;
}
diff --git a/contrib/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/contrib/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
index 3680cfc..8ef6bb6 100644
--- a/contrib/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
@@ -6,99 +6,67 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-//
-// This pass prepares a module containing type metadata for ThinLTO by splitting
-// it into regular and thin LTO parts if possible, and writing both parts to
-// a multi-module bitcode file. Modules that do not contain type metadata are
-// written unmodified as a single module.
-//
-//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/ThinLTOBitcodeWriter.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/ModuleSummaryAnalysis.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TypeMetadataUtils.h"
#include "llvm/Bitcode/BitcodeWriter.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Pass.h"
+#include "llvm/Support/FileSystem.h"
#include "llvm/Support/ScopedPrinter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/FunctionAttrs.h"
#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
using namespace llvm;
namespace {
-// Produce a unique identifier for this module by taking the MD5 sum of the
-// names of the module's strong external symbols. This identifier is
-// normally guaranteed to be unique, or the program would fail to link due to
-// multiply defined symbols.
-//
-// If the module has no strong external symbols (such a module may still have a
-// semantic effect if it performs global initialization), we cannot produce a
-// unique identifier for this module, so we return the empty string, which
-// causes the entire module to be written as a regular LTO module.
-std::string getModuleId(Module *M) {
- MD5 Md5;
- bool ExportsSymbols = false;
- auto AddGlobal = [&](GlobalValue &GV) {
- if (GV.isDeclaration() || GV.getName().startswith("llvm.") ||
- !GV.hasExternalLinkage())
- return;
- ExportsSymbols = true;
- Md5.update(GV.getName());
- Md5.update(ArrayRef<uint8_t>{0});
- };
-
- for (auto &F : *M)
- AddGlobal(F);
- for (auto &GV : M->globals())
- AddGlobal(GV);
- for (auto &GA : M->aliases())
- AddGlobal(GA);
- for (auto &IF : M->ifuncs())
- AddGlobal(IF);
-
- if (!ExportsSymbols)
- return "";
-
- MD5::MD5Result R;
- Md5.final(R);
-
- SmallString<32> Str;
- MD5::stringifyResult(R, Str);
- return ("$" + Str).str();
-}
-
// Promote each local-linkage entity defined by ExportM and used by ImportM by
// changing visibility and appending the given ModuleId.
-void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId) {
- auto PromoteInternal = [&](GlobalValue &ExportGV) {
+void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId,
+ SetVector<GlobalValue *> &PromoteExtra) {
+ DenseMap<const Comdat *, Comdat *> RenamedComdats;
+ for (auto &ExportGV : ExportM.global_values()) {
if (!ExportGV.hasLocalLinkage())
- return;
+ continue;
+
+ auto Name = ExportGV.getName();
+ GlobalValue *ImportGV = ImportM.getNamedValue(Name);
+ if ((!ImportGV || ImportGV->use_empty()) && !PromoteExtra.count(&ExportGV))
+ continue;
- GlobalValue *ImportGV = ImportM.getNamedValue(ExportGV.getName());
- if (!ImportGV || ImportGV->use_empty())
- return;
+ std::string NewName = (Name + ModuleId).str();
- std::string NewName = (ExportGV.getName() + ModuleId).str();
+ if (const auto *C = ExportGV.getComdat())
+ if (C->getName() == Name)
+ RenamedComdats.try_emplace(C, ExportM.getOrInsertComdat(NewName));
ExportGV.setName(NewName);
ExportGV.setLinkage(GlobalValue::ExternalLinkage);
ExportGV.setVisibility(GlobalValue::HiddenVisibility);
- ImportGV->setName(NewName);
- ImportGV->setVisibility(GlobalValue::HiddenVisibility);
- };
+ if (ImportGV) {
+ ImportGV->setName(NewName);
+ ImportGV->setVisibility(GlobalValue::HiddenVisibility);
+ }
+ }
- for (auto &F : ExportM)
- PromoteInternal(F);
- for (auto &GV : ExportM.globals())
- PromoteInternal(GV);
- for (auto &GA : ExportM.aliases())
- PromoteInternal(GA);
- for (auto &IF : ExportM.ifuncs())
- PromoteInternal(IF);
+ if (!RenamedComdats.empty())
+ for (auto &GO : ExportM.global_objects())
+ if (auto *C = GO.getComdat()) {
+ auto Replacement = RenamedComdats.find(C);
+ if (Replacement != RenamedComdats.end())
+ GO.setComdat(Replacement->second);
+ }
}
// Promote all internal (i.e. distinct) type ids used by the module by replacing
@@ -194,24 +162,7 @@ void simplifyExternals(Module &M) {
}
void filterModule(
- Module *M, std::function<bool(const GlobalValue *)> ShouldKeepDefinition) {
- for (Function &F : *M) {
- if (ShouldKeepDefinition(&F))
- continue;
-
- F.deleteBody();
- F.clearMetadata();
- }
-
- for (GlobalVariable &GV : M->globals()) {
- if (ShouldKeepDefinition(&GV))
- continue;
-
- GV.setInitializer(nullptr);
- GV.setLinkage(GlobalValue::ExternalLinkage);
- GV.clearMetadata();
- }
-
+ Module *M, function_ref<bool(const GlobalValue *)> ShouldKeepDefinition) {
for (Module::alias_iterator I = M->alias_begin(), E = M->alias_end();
I != E;) {
GlobalAlias *GA = &*I++;
@@ -219,65 +170,227 @@ void filterModule(
continue;
GlobalObject *GO;
- if (I->getValueType()->isFunctionTy())
+ if (GA->getValueType()->isFunctionTy())
GO = Function::Create(cast<FunctionType>(GA->getValueType()),
GlobalValue::ExternalLinkage, "", M);
else
GO = new GlobalVariable(
*M, GA->getValueType(), false, GlobalValue::ExternalLinkage,
- (Constant *)nullptr, "", (GlobalVariable *)nullptr,
+ nullptr, "", nullptr,
GA->getThreadLocalMode(), GA->getType()->getAddressSpace());
GO->takeName(GA);
GA->replaceAllUsesWith(GO);
GA->eraseFromParent();
}
+
+ for (Function &F : *M) {
+ if (ShouldKeepDefinition(&F))
+ continue;
+
+ F.deleteBody();
+ F.setComdat(nullptr);
+ F.clearMetadata();
+ }
+
+ for (GlobalVariable &GV : M->globals()) {
+ if (ShouldKeepDefinition(&GV))
+ continue;
+
+ GV.setInitializer(nullptr);
+ GV.setLinkage(GlobalValue::ExternalLinkage);
+ GV.setComdat(nullptr);
+ GV.clearMetadata();
+ }
+}
+
+void forEachVirtualFunction(Constant *C, function_ref<void(Function *)> Fn) {
+ if (auto *F = dyn_cast<Function>(C))
+ return Fn(F);
+ if (isa<GlobalValue>(C))
+ return;
+ for (Value *Op : C->operands())
+ forEachVirtualFunction(cast<Constant>(Op), Fn);
}
// If it's possible to split M into regular and thin LTO parts, do so and write
// a multi-module bitcode file with the two parts to OS. Otherwise, write only a
// regular LTO bitcode file to OS.
-void splitAndWriteThinLTOBitcode(raw_ostream &OS, Module &M) {
- std::string ModuleId = getModuleId(&M);
+void splitAndWriteThinLTOBitcode(
+ raw_ostream &OS, raw_ostream *ThinLinkOS,
+ function_ref<AAResults &(Function &)> AARGetter, Module &M) {
+ std::string ModuleId = getUniqueModuleId(&M);
if (ModuleId.empty()) {
// We couldn't generate a module ID for this module, just write it out as a
// regular LTO module.
WriteBitcodeToFile(&M, OS);
+ if (ThinLinkOS)
+ // We don't have a ThinLTO part, but still write the module to the
+ // ThinLinkOS if requested so that the expected output file is produced.
+ WriteBitcodeToFile(&M, *ThinLinkOS);
return;
}
promoteTypeIds(M, ModuleId);
- auto IsInMergedM = [&](const GlobalValue *GV) {
- auto *GVar = dyn_cast<GlobalVariable>(GV->getBaseObject());
- if (!GVar)
- return false;
-
+ // Returns whether a global has attached type metadata. Such globals may
+ // participate in CFI or whole-program devirtualization, so they need to
+ // appear in the merged module instead of the thin LTO module.
+ auto HasTypeMetadata = [&](const GlobalObject *GO) {
SmallVector<MDNode *, 1> MDs;
- GVar->getMetadata(LLVMContext::MD_type, MDs);
+ GO->getMetadata(LLVMContext::MD_type, MDs);
return !MDs.empty();
};
+ // Collect the set of virtual functions that are eligible for virtual constant
+ // propagation. Each eligible function must not access memory, must return
+ // an integer of width <=64 bits, must take at least one argument, must not
+ // use its first argument (assumed to be "this") and all arguments other than
+ // the first one must be of <=64 bit integer type.
+ //
+ // Note that we test whether this copy of the function is readnone, rather
+ // than testing function attributes, which must hold for any copy of the
+ // function, even a less optimized version substituted at link time. This is
+ // sound because the virtual constant propagation optimizations effectively
+ // inline all implementations of the virtual function into each call site,
+ // rather than using function attributes to perform local optimization.
+ std::set<const Function *> EligibleVirtualFns;
+ // If any member of a comdat lives in MergedM, put all members of that
+ // comdat in MergedM to keep the comdat together.
+ DenseSet<const Comdat *> MergedMComdats;
+ for (GlobalVariable &GV : M.globals())
+ if (HasTypeMetadata(&GV)) {
+ if (const auto *C = GV.getComdat())
+ MergedMComdats.insert(C);
+ forEachVirtualFunction(GV.getInitializer(), [&](Function *F) {
+ auto *RT = dyn_cast<IntegerType>(F->getReturnType());
+ if (!RT || RT->getBitWidth() > 64 || F->arg_empty() ||
+ !F->arg_begin()->use_empty())
+ return;
+ for (auto &Arg : make_range(std::next(F->arg_begin()), F->arg_end())) {
+ auto *ArgT = dyn_cast<IntegerType>(Arg.getType());
+ if (!ArgT || ArgT->getBitWidth() > 64)
+ return;
+ }
+ if (!F->isDeclaration() &&
+ computeFunctionBodyMemoryAccess(*F, AARGetter(*F)) == MAK_ReadNone)
+ EligibleVirtualFns.insert(F);
+ });
+ }
+
ValueToValueMapTy VMap;
- std::unique_ptr<Module> MergedM(CloneModule(&M, VMap, IsInMergedM));
+ std::unique_ptr<Module> MergedM(
+ CloneModule(&M, VMap, [&](const GlobalValue *GV) -> bool {
+ if (const auto *C = GV->getComdat())
+ if (MergedMComdats.count(C))
+ return true;
+ if (auto *F = dyn_cast<Function>(GV))
+ return EligibleVirtualFns.count(F);
+ if (auto *GVar = dyn_cast_or_null<GlobalVariable>(GV->getBaseObject()))
+ return HasTypeMetadata(GVar);
+ return false;
+ }));
+ StripDebugInfo(*MergedM);
+
+ for (Function &F : *MergedM)
+ if (!F.isDeclaration()) {
+ // Reset the linkage of all functions eligible for virtual constant
+ // propagation. The canonical definitions live in the thin LTO module so
+ // that they can be imported.
+ F.setLinkage(GlobalValue::AvailableExternallyLinkage);
+ F.setComdat(nullptr);
+ }
- filterModule(&M, [&](const GlobalValue *GV) { return !IsInMergedM(GV); });
+ SetVector<GlobalValue *> CfiFunctions;
+ for (auto &F : M)
+ if ((!F.hasLocalLinkage() || F.hasAddressTaken()) && HasTypeMetadata(&F))
+ CfiFunctions.insert(&F);
+
+ // Remove all globals with type metadata, globals with comdats that live in
+ // MergedM, and aliases pointing to such globals from the thin LTO module.
+ filterModule(&M, [&](const GlobalValue *GV) {
+ if (auto *GVar = dyn_cast_or_null<GlobalVariable>(GV->getBaseObject()))
+ if (HasTypeMetadata(GVar))
+ return false;
+ if (const auto *C = GV->getComdat())
+ if (MergedMComdats.count(C))
+ return false;
+ return true;
+ });
+
+ promoteInternals(*MergedM, M, ModuleId, CfiFunctions);
+ promoteInternals(M, *MergedM, ModuleId, CfiFunctions);
+
+ SmallVector<MDNode *, 8> CfiFunctionMDs;
+ for (auto V : CfiFunctions) {
+ Function &F = *cast<Function>(V);
+ SmallVector<MDNode *, 2> Types;
+ F.getMetadata(LLVMContext::MD_type, Types);
+
+ auto &Ctx = MergedM->getContext();
+ SmallVector<Metadata *, 4> Elts;
+ Elts.push_back(MDString::get(Ctx, F.getName()));
+ CfiFunctionLinkage Linkage;
+ if (!F.isDeclarationForLinker())
+ Linkage = CFL_Definition;
+ else if (F.isWeakForLinker())
+ Linkage = CFL_WeakDeclaration;
+ else
+ Linkage = CFL_Declaration;
+ Elts.push_back(ConstantAsMetadata::get(
+ llvm::ConstantInt::get(Type::getInt8Ty(Ctx), Linkage)));
+ for (auto Type : Types)
+ Elts.push_back(Type);
+ CfiFunctionMDs.push_back(MDTuple::get(Ctx, Elts));
+ }
- promoteInternals(*MergedM, M, ModuleId);
- promoteInternals(M, *MergedM, ModuleId);
+ if(!CfiFunctionMDs.empty()) {
+ NamedMDNode *NMD = MergedM->getOrInsertNamedMetadata("cfi.functions");
+ for (auto MD : CfiFunctionMDs)
+ NMD->addOperand(MD);
+ }
simplifyExternals(*MergedM);
- SmallVector<char, 0> Buffer;
- BitcodeWriter W(Buffer);
-
// FIXME: Try to re-use BSI and PFI from the original module here.
- ModuleSummaryIndex Index = buildModuleSummaryIndex(M, nullptr, nullptr);
- W.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index,
- /*GenerateHash=*/true);
+ ProfileSummaryInfo PSI(M);
+ ModuleSummaryIndex Index = buildModuleSummaryIndex(M, nullptr, &PSI);
- W.writeModule(MergedM.get());
+ // Mark the merged module as requiring full LTO. We still want an index for
+ // it though, so that it can participate in summary-based dead stripping.
+ MergedM->addModuleFlag(Module::Error, "ThinLTO", uint32_t(0));
+ ModuleSummaryIndex MergedMIndex =
+ buildModuleSummaryIndex(*MergedM, nullptr, &PSI);
+ SmallVector<char, 0> Buffer;
+
+ BitcodeWriter W(Buffer);
+ // Save the module hash produced for the full bitcode, which will
+ // be used in the backends, and use that in the minimized bitcode
+ // produced for the full link.
+ ModuleHash ModHash = {{0}};
+ W.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index,
+ /*GenerateHash=*/true, &ModHash);
+ W.writeModule(MergedM.get(), /*ShouldPreserveUseListOrder=*/false,
+ &MergedMIndex);
+ W.writeSymtab();
+ W.writeStrtab();
OS << Buffer;
+
+ // If a minimized bitcode module was requested for the thin link,
+ // strip the debug info (the merged module was already stripped above)
+ // and write it to the given OS.
+ if (ThinLinkOS) {
+ Buffer.clear();
+ BitcodeWriter W2(Buffer);
+ StripDebugInfo(M);
+ W2.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index,
+ /*GenerateHash=*/false, &ModHash);
+ W2.writeModule(MergedM.get(), /*ShouldPreserveUseListOrder=*/false,
+ &MergedMIndex);
+ W2.writeSymtab();
+ W2.writeStrtab();
+ *ThinLinkOS << Buffer;
+ }
}
// Returns whether this module needs to be split because it uses type metadata.
@@ -292,28 +405,45 @@ bool requiresSplit(Module &M) {
return false;
}
-void writeThinLTOBitcode(raw_ostream &OS, Module &M,
- const ModuleSummaryIndex *Index) {
+void writeThinLTOBitcode(raw_ostream &OS, raw_ostream *ThinLinkOS,
+ function_ref<AAResults &(Function &)> AARGetter,
+ Module &M, const ModuleSummaryIndex *Index) {
// See if this module has any type metadata. If so, we need to split it.
if (requiresSplit(M))
- return splitAndWriteThinLTOBitcode(OS, M);
+ return splitAndWriteThinLTOBitcode(OS, ThinLinkOS, AARGetter, M);
// Otherwise we can just write it out as a regular module.
+
+ // Save the module hash produced for the full bitcode, which will
+ // be used in the backends, and use that in the minimized bitcode
+ // produced for the full link.
+ ModuleHash ModHash = {{0}};
WriteBitcodeToFile(&M, OS, /*ShouldPreserveUseListOrder=*/false, Index,
- /*GenerateHash=*/true);
+ /*GenerateHash=*/true, &ModHash);
+ // If a minimized bitcode module was requested for the thin link,
+ // strip the debug info and write it to the given OS.
+ if (ThinLinkOS) {
+ StripDebugInfo(M);
+ WriteBitcodeToFile(&M, *ThinLinkOS, /*ShouldPreserveUseListOrder=*/false,
+ Index,
+ /*GenerateHash=*/false, &ModHash);
+ }
}
class WriteThinLTOBitcode : public ModulePass {
raw_ostream &OS; // raw_ostream to print on
+ // The output stream on which to emit a minimized module for use
+ // just in the thin link, if requested.
+ raw_ostream *ThinLinkOS;
public:
static char ID; // Pass identification, replacement for typeid
- WriteThinLTOBitcode() : ModulePass(ID), OS(dbgs()) {
+ WriteThinLTOBitcode() : ModulePass(ID), OS(dbgs()), ThinLinkOS(nullptr) {
initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry());
}
- explicit WriteThinLTOBitcode(raw_ostream &o)
- : ModulePass(ID), OS(o) {
+ explicit WriteThinLTOBitcode(raw_ostream &o, raw_ostream *ThinLinkOS)
+ : ModulePass(ID), OS(o), ThinLinkOS(ThinLinkOS) {
initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry());
}
@@ -322,12 +452,14 @@ public:
bool runOnModule(Module &M) override {
const ModuleSummaryIndex *Index =
&(getAnalysis<ModuleSummaryIndexWrapperPass>().getIndex());
- writeThinLTOBitcode(OS, M, Index);
+ writeThinLTOBitcode(OS, ThinLinkOS, LegacyAARGetter(*this), M, Index);
return true;
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
+ AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<ModuleSummaryIndexWrapperPass>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
}
};
} // anonymous namespace
@@ -335,10 +467,25 @@ public:
char WriteThinLTOBitcode::ID = 0;
INITIALIZE_PASS_BEGIN(WriteThinLTOBitcode, "write-thinlto-bitcode",
"Write ThinLTO Bitcode", false, true)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(ModuleSummaryIndexWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(WriteThinLTOBitcode, "write-thinlto-bitcode",
"Write ThinLTO Bitcode", false, true)
-ModulePass *llvm::createWriteThinLTOBitcodePass(raw_ostream &Str) {
- return new WriteThinLTOBitcode(Str);
+ModulePass *llvm::createWriteThinLTOBitcodePass(raw_ostream &Str,
+ raw_ostream *ThinLinkOS) {
+ return new WriteThinLTOBitcode(Str, ThinLinkOS);
+}
+
+PreservedAnalyses
+llvm::ThinLTOBitcodeWriterPass::run(Module &M, ModuleAnalysisManager &AM) {
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ writeThinLTOBitcode(OS, ThinLinkOS,
+ [&FAM](Function &F) -> AAResults & {
+ return FAM.getResult<AAManager>(F);
+ },
+ M, &AM.getResult<ModuleSummaryIndexAnalysis>(M));
+ return PreservedAnalyses::all();
}
diff --git a/contrib/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/contrib/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
index 844cc0f..00769cd 100644
--- a/contrib/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -25,6 +25,20 @@
// returns 0, or a single vtable's function returns 1, replace each virtual
// call with a comparison of the vptr against that vtable's address.
//
+// This pass is intended to be used during the regular and thin LTO pipelines.
+// During regular LTO, the pass determines the best optimization for each
+// virtual call and applies the resolutions directly to virtual calls that are
+// eligible for virtual call optimization (i.e. calls that use either of the
+// llvm.assume(llvm.type.test) or llvm.type.checked.load intrinsics). During
+// ThinLTO, the pass operates in two phases:
+// - Export phase: this is run during the thin link over a single merged module
+// that contains all vtables with !type metadata that participate in the link.
+// The pass computes a resolution for each virtual call and stores it in the
+// type identifier summary.
+// - Import phase: this is run during the thin backends over the individual
+// modules. The pass applies the resolutions previously computed during the
+// import phase to each eligible virtual call.
+//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
@@ -32,9 +46,11 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/iterator_range.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/TypeMetadataUtils.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
@@ -54,12 +70,16 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/ModuleSummaryIndexYAML.h"
#include "llvm/Pass.h"
#include "llvm/PassRegistry.h"
#include "llvm/PassSupport.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/FunctionAttrs.h"
#include "llvm/Transforms/Utils/Evaluator.h"
#include <algorithm>
#include <cstddef>
@@ -72,6 +92,26 @@ using namespace wholeprogramdevirt;
#define DEBUG_TYPE "wholeprogramdevirt"
+static cl::opt<PassSummaryAction> ClSummaryAction(
+ "wholeprogramdevirt-summary-action",
+ cl::desc("What to do with the summary when running this pass"),
+ cl::values(clEnumValN(PassSummaryAction::None, "none", "Do nothing"),
+ clEnumValN(PassSummaryAction::Import, "import",
+ "Import typeid resolutions from summary and globals"),
+ clEnumValN(PassSummaryAction::Export, "export",
+ "Export typeid resolutions to summary and globals")),
+ cl::Hidden);
+
+static cl::opt<std::string> ClReadSummary(
+ "wholeprogramdevirt-read-summary",
+ cl::desc("Read summary from given YAML file before running pass"),
+ cl::Hidden);
+
+static cl::opt<std::string> ClWriteSummary(
+ "wholeprogramdevirt-write-summary",
+ cl::desc("Write summary to given YAML file after running pass"),
+ cl::Hidden);
+
// Find the minimum offset that we may store a value of size Size bits at. If
// IsAfter is set, look for an offset before the object, otherwise look for an
// offset after the object.
@@ -259,15 +299,92 @@ struct VirtualCallSite {
}
};
+// Call site information collected for a specific VTableSlot and possibly a list
+// of constant integer arguments. The grouping by arguments is handled by the
+// VTableSlotInfo class.
+struct CallSiteInfo {
+ /// The set of call sites for this slot. Used during regular LTO and the
+ /// import phase of ThinLTO (as well as the export phase of ThinLTO for any
+ /// call sites that appear in the merged module itself); in each of these
+ /// cases we are directly operating on the call sites at the IR level.
+ std::vector<VirtualCallSite> CallSites;
+
+ // These fields are used during the export phase of ThinLTO and reflect
+ // information collected from function summaries.
+
+ /// Whether any function summary contains an llvm.assume(llvm.type.test) for
+ /// this slot.
+ bool SummaryHasTypeTestAssumeUsers;
+
+ /// CFI-specific: a vector containing the list of function summaries that use
+ /// the llvm.type.checked.load intrinsic and therefore will require
+ /// resolutions for llvm.type.test in order to implement CFI checks if
+ /// devirtualization was unsuccessful. If devirtualization was successful, the
+ /// pass will clear this vector by calling markDevirt(). If at the end of the
+ /// pass the vector is non-empty, we will need to add a use of llvm.type.test
+ /// to each of the function summaries in the vector.
+ std::vector<FunctionSummary *> SummaryTypeCheckedLoadUsers;
+
+ bool isExported() const {
+ return SummaryHasTypeTestAssumeUsers ||
+ !SummaryTypeCheckedLoadUsers.empty();
+ }
+
+ /// As explained in the comment for SummaryTypeCheckedLoadUsers.
+ void markDevirt() { SummaryTypeCheckedLoadUsers.clear(); }
+};
+
+// Call site information collected for a specific VTableSlot.
+struct VTableSlotInfo {
+ // The set of call sites which do not have all constant integer arguments
+ // (excluding "this").
+ CallSiteInfo CSInfo;
+
+ // The set of call sites with all constant integer arguments (excluding
+ // "this"), grouped by argument list.
+ std::map<std::vector<uint64_t>, CallSiteInfo> ConstCSInfo;
+
+ void addCallSite(Value *VTable, CallSite CS, unsigned *NumUnsafeUses);
+
+private:
+ CallSiteInfo &findCallSiteInfo(CallSite CS);
+};
+
+CallSiteInfo &VTableSlotInfo::findCallSiteInfo(CallSite CS) {
+ std::vector<uint64_t> Args;
+ auto *CI = dyn_cast<IntegerType>(CS.getType());
+ if (!CI || CI->getBitWidth() > 64 || CS.arg_empty())
+ return CSInfo;
+ for (auto &&Arg : make_range(CS.arg_begin() + 1, CS.arg_end())) {
+ auto *CI = dyn_cast<ConstantInt>(Arg);
+ if (!CI || CI->getBitWidth() > 64)
+ return CSInfo;
+ Args.push_back(CI->getZExtValue());
+ }
+ return ConstCSInfo[Args];
+}
+
+void VTableSlotInfo::addCallSite(Value *VTable, CallSite CS,
+ unsigned *NumUnsafeUses) {
+ findCallSiteInfo(CS).CallSites.push_back({VTable, CS, NumUnsafeUses});
+}
+
struct DevirtModule {
Module &M;
+ function_ref<AAResults &(Function &)> AARGetter;
+
+ ModuleSummaryIndex *ExportSummary;
+ const ModuleSummaryIndex *ImportSummary;
+
IntegerType *Int8Ty;
PointerType *Int8PtrTy;
IntegerType *Int32Ty;
+ IntegerType *Int64Ty;
+ IntegerType *IntPtrTy;
bool RemarksEnabled;
- MapVector<VTableSlot, std::vector<VirtualCallSite>> CallSlots;
+ MapVector<VTableSlot, VTableSlotInfo> CallSlots;
// This map keeps track of the number of "unsafe" uses of a loaded function
// pointer. The key is the associated llvm.type.test intrinsic call generated
@@ -279,11 +396,18 @@ struct DevirtModule {
// true.
std::map<CallInst *, unsigned> NumUnsafeUsesForTypeTest;
- DevirtModule(Module &M)
- : M(M), Int8Ty(Type::getInt8Ty(M.getContext())),
+ DevirtModule(Module &M, function_ref<AAResults &(Function &)> AARGetter,
+ ModuleSummaryIndex *ExportSummary,
+ const ModuleSummaryIndex *ImportSummary)
+ : M(M), AARGetter(AARGetter), ExportSummary(ExportSummary),
+ ImportSummary(ImportSummary), Int8Ty(Type::getInt8Ty(M.getContext())),
Int8PtrTy(Type::getInt8PtrTy(M.getContext())),
Int32Ty(Type::getInt32Ty(M.getContext())),
- RemarksEnabled(areRemarksEnabled()) {}
+ Int64Ty(Type::getInt64Ty(M.getContext())),
+ IntPtrTy(M.getDataLayout().getIntPtrType(M.getContext(), 0)),
+ RemarksEnabled(areRemarksEnabled()) {
+ assert(!(ExportSummary && ImportSummary));
+ }
bool areRemarksEnabled();
@@ -298,57 +422,169 @@ struct DevirtModule {
tryFindVirtualCallTargets(std::vector<VirtualCallTarget> &TargetsForSlot,
const std::set<TypeMemberInfo> &TypeMemberInfos,
uint64_t ByteOffset);
+
+ void applySingleImplDevirt(VTableSlotInfo &SlotInfo, Constant *TheFn,
+ bool &IsExported);
bool trySingleImplDevirt(MutableArrayRef<VirtualCallTarget> TargetsForSlot,
- MutableArrayRef<VirtualCallSite> CallSites);
+ VTableSlotInfo &SlotInfo,
+ WholeProgramDevirtResolution *Res);
+
bool tryEvaluateFunctionsWithArgs(
MutableArrayRef<VirtualCallTarget> TargetsForSlot,
- ArrayRef<ConstantInt *> Args);
- bool tryUniformRetValOpt(IntegerType *RetType,
- MutableArrayRef<VirtualCallTarget> TargetsForSlot,
- MutableArrayRef<VirtualCallSite> CallSites);
+ ArrayRef<uint64_t> Args);
+
+ void applyUniformRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
+ uint64_t TheRetVal);
+ bool tryUniformRetValOpt(MutableArrayRef<VirtualCallTarget> TargetsForSlot,
+ CallSiteInfo &CSInfo,
+ WholeProgramDevirtResolution::ByArg *Res);
+
+ // Returns the global symbol name that is used to export information about the
+ // given vtable slot and list of arguments.
+ std::string getGlobalName(VTableSlot Slot, ArrayRef<uint64_t> Args,
+ StringRef Name);
+
+ // This function is called during the export phase to create a symbol
+ // definition containing information about the given vtable slot and list of
+ // arguments.
+ void exportGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args, StringRef Name,
+ Constant *C);
+
+ // This function is called during the import phase to create a reference to
+ // the symbol definition created during the export phase.
+ Constant *importGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,
+ StringRef Name, unsigned AbsWidth = 0);
+
+ void applyUniqueRetValOpt(CallSiteInfo &CSInfo, StringRef FnName, bool IsOne,
+ Constant *UniqueMemberAddr);
bool tryUniqueRetValOpt(unsigned BitWidth,
MutableArrayRef<VirtualCallTarget> TargetsForSlot,
- MutableArrayRef<VirtualCallSite> CallSites);
+ CallSiteInfo &CSInfo,
+ WholeProgramDevirtResolution::ByArg *Res,
+ VTableSlot Slot, ArrayRef<uint64_t> Args);
+
+ void applyVirtualConstProp(CallSiteInfo &CSInfo, StringRef FnName,
+ Constant *Byte, Constant *Bit);
bool tryVirtualConstProp(MutableArrayRef<VirtualCallTarget> TargetsForSlot,
- ArrayRef<VirtualCallSite> CallSites);
+ VTableSlotInfo &SlotInfo,
+ WholeProgramDevirtResolution *Res, VTableSlot Slot);
void rebuildGlobal(VTableBits &B);
+ // Apply the summary resolution for Slot to all virtual calls in SlotInfo.
+ void importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo);
+
+ // If we were able to eliminate all unsafe uses for a type checked load,
+ // eliminate the associated type tests by replacing them with true.
+ void removeRedundantTypeTests();
+
bool run();
+
+ // Lower the module using the action and summary passed as command line
+ // arguments. For testing purposes only.
+ static bool runForTesting(Module &M,
+ function_ref<AAResults &(Function &)> AARGetter);
};
struct WholeProgramDevirt : public ModulePass {
static char ID;
- WholeProgramDevirt() : ModulePass(ID) {
+ bool UseCommandLine = false;
+
+ ModuleSummaryIndex *ExportSummary;
+ const ModuleSummaryIndex *ImportSummary;
+
+ WholeProgramDevirt() : ModulePass(ID), UseCommandLine(true) {
+ initializeWholeProgramDevirtPass(*PassRegistry::getPassRegistry());
+ }
+
+ WholeProgramDevirt(ModuleSummaryIndex *ExportSummary,
+ const ModuleSummaryIndex *ImportSummary)
+ : ModulePass(ID), ExportSummary(ExportSummary),
+ ImportSummary(ImportSummary) {
initializeWholeProgramDevirtPass(*PassRegistry::getPassRegistry());
}
bool runOnModule(Module &M) override {
if (skipModule(M))
return false;
+ if (UseCommandLine)
+ return DevirtModule::runForTesting(M, LegacyAARGetter(*this));
+ return DevirtModule(M, LegacyAARGetter(*this), ExportSummary, ImportSummary)
+ .run();
+ }
- return DevirtModule(M).run();
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
}
};
} // end anonymous namespace
-INITIALIZE_PASS(WholeProgramDevirt, "wholeprogramdevirt",
- "Whole program devirtualization", false, false)
+INITIALIZE_PASS_BEGIN(WholeProgramDevirt, "wholeprogramdevirt",
+ "Whole program devirtualization", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(WholeProgramDevirt, "wholeprogramdevirt",
+ "Whole program devirtualization", false, false)
char WholeProgramDevirt::ID = 0;
-ModulePass *llvm::createWholeProgramDevirtPass() {
- return new WholeProgramDevirt;
+ModulePass *
+llvm::createWholeProgramDevirtPass(ModuleSummaryIndex *ExportSummary,
+ const ModuleSummaryIndex *ImportSummary) {
+ return new WholeProgramDevirt(ExportSummary, ImportSummary);
}
PreservedAnalyses WholeProgramDevirtPass::run(Module &M,
- ModuleAnalysisManager &) {
- if (!DevirtModule(M).run())
+ ModuleAnalysisManager &AM) {
+ auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ auto AARGetter = [&](Function &F) -> AAResults & {
+ return FAM.getResult<AAManager>(F);
+ };
+ if (!DevirtModule(M, AARGetter, nullptr, nullptr).run())
return PreservedAnalyses::all();
return PreservedAnalyses::none();
}
+bool DevirtModule::runForTesting(
+ Module &M, function_ref<AAResults &(Function &)> AARGetter) {
+ ModuleSummaryIndex Summary;
+
+ // Handle the command-line summary arguments. This code is for testing
+ // purposes only, so we handle errors directly.
+ if (!ClReadSummary.empty()) {
+ ExitOnError ExitOnErr("-wholeprogramdevirt-read-summary: " + ClReadSummary +
+ ": ");
+ auto ReadSummaryFile =
+ ExitOnErr(errorOrToExpected(MemoryBuffer::getFile(ClReadSummary)));
+
+ yaml::Input In(ReadSummaryFile->getBuffer());
+ In >> Summary;
+ ExitOnErr(errorCodeToError(In.error()));
+ }
+
+ bool Changed =
+ DevirtModule(
+ M, AARGetter,
+ ClSummaryAction == PassSummaryAction::Export ? &Summary : nullptr,
+ ClSummaryAction == PassSummaryAction::Import ? &Summary : nullptr)
+ .run();
+
+ if (!ClWriteSummary.empty()) {
+ ExitOnError ExitOnErr(
+ "-wholeprogramdevirt-write-summary: " + ClWriteSummary + ": ");
+ std::error_code EC;
+ raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::F_Text);
+ ExitOnErr(errorCodeToError(EC));
+
+ yaml::Output Out(OS);
+ Out << Summary;
+ }
+
+ return Changed;
+}
+
void DevirtModule::buildTypeIdentifierMap(
std::vector<VTableBits> &Bits,
DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap) {
@@ -443,9 +679,31 @@ bool DevirtModule::tryFindVirtualCallTargets(
return !TargetsForSlot.empty();
}
+void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo,
+ Constant *TheFn, bool &IsExported) {
+ auto Apply = [&](CallSiteInfo &CSInfo) {
+ for (auto &&VCallSite : CSInfo.CallSites) {
+ if (RemarksEnabled)
+ VCallSite.emitRemark("single-impl", TheFn->getName());
+ VCallSite.CS.setCalledFunction(ConstantExpr::getBitCast(
+ TheFn, VCallSite.CS.getCalledValue()->getType()));
+ // This use is no longer unsafe.
+ if (VCallSite.NumUnsafeUses)
+ --*VCallSite.NumUnsafeUses;
+ }
+ if (CSInfo.isExported()) {
+ IsExported = true;
+ CSInfo.markDevirt();
+ }
+ };
+ Apply(SlotInfo.CSInfo);
+ for (auto &P : SlotInfo.ConstCSInfo)
+ Apply(P.second);
+}
+
bool DevirtModule::trySingleImplDevirt(
MutableArrayRef<VirtualCallTarget> TargetsForSlot,
- MutableArrayRef<VirtualCallSite> CallSites) {
+ VTableSlotInfo &SlotInfo, WholeProgramDevirtResolution *Res) {
// See if the program contains a single implementation of this virtual
// function.
Function *TheFn = TargetsForSlot[0].Fn;
@@ -453,39 +711,51 @@ bool DevirtModule::trySingleImplDevirt(
if (TheFn != Target.Fn)
return false;
+ // If so, update each call site to call that implementation directly.
if (RemarksEnabled)
TargetsForSlot[0].WasDevirt = true;
- // If so, update each call site to call that implementation directly.
- for (auto &&VCallSite : CallSites) {
- if (RemarksEnabled)
- VCallSite.emitRemark("single-impl", TheFn->getName());
- VCallSite.CS.setCalledFunction(ConstantExpr::getBitCast(
- TheFn, VCallSite.CS.getCalledValue()->getType()));
- // This use is no longer unsafe.
- if (VCallSite.NumUnsafeUses)
- --*VCallSite.NumUnsafeUses;
+
+ bool IsExported = false;
+ applySingleImplDevirt(SlotInfo, TheFn, IsExported);
+ if (!IsExported)
+ return false;
+
+ // If the only implementation has local linkage, we must promote to external
+ // to make it visible to thin LTO objects. We can only get here during the
+ // ThinLTO export phase.
+ if (TheFn->hasLocalLinkage()) {
+ TheFn->setLinkage(GlobalValue::ExternalLinkage);
+ TheFn->setVisibility(GlobalValue::HiddenVisibility);
+ TheFn->setName(TheFn->getName() + "$merged");
}
+
+ Res->TheKind = WholeProgramDevirtResolution::SingleImpl;
+ Res->SingleImplName = TheFn->getName();
+
return true;
}
bool DevirtModule::tryEvaluateFunctionsWithArgs(
MutableArrayRef<VirtualCallTarget> TargetsForSlot,
- ArrayRef<ConstantInt *> Args) {
+ ArrayRef<uint64_t> Args) {
// Evaluate each function and store the result in each target's RetVal
// field.
for (VirtualCallTarget &Target : TargetsForSlot) {
if (Target.Fn->arg_size() != Args.size() + 1)
return false;
- for (unsigned I = 0; I != Args.size(); ++I)
- if (Target.Fn->getFunctionType()->getParamType(I + 1) !=
- Args[I]->getType())
- return false;
Evaluator Eval(M.getDataLayout(), nullptr);
SmallVector<Constant *, 2> EvalArgs;
EvalArgs.push_back(
Constant::getNullValue(Target.Fn->getFunctionType()->getParamType(0)));
- EvalArgs.insert(EvalArgs.end(), Args.begin(), Args.end());
+ for (unsigned I = 0; I != Args.size(); ++I) {
+ auto *ArgTy = dyn_cast<IntegerType>(
+ Target.Fn->getFunctionType()->getParamType(I + 1));
+ if (!ArgTy)
+ return false;
+ EvalArgs.push_back(ConstantInt::get(ArgTy, Args[I]));
+ }
+
Constant *RetVal;
if (!Eval.EvaluateFunction(Target.Fn, RetVal, EvalArgs) ||
!isa<ConstantInt>(RetVal))
@@ -495,9 +765,18 @@ bool DevirtModule::tryEvaluateFunctionsWithArgs(
return true;
}
+void DevirtModule::applyUniformRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
+ uint64_t TheRetVal) {
+ for (auto Call : CSInfo.CallSites)
+ Call.replaceAndErase(
+ "uniform-ret-val", FnName, RemarksEnabled,
+ ConstantInt::get(cast<IntegerType>(Call.CS.getType()), TheRetVal));
+ CSInfo.markDevirt();
+}
+
bool DevirtModule::tryUniformRetValOpt(
- IntegerType *RetType, MutableArrayRef<VirtualCallTarget> TargetsForSlot,
- MutableArrayRef<VirtualCallSite> CallSites) {
+ MutableArrayRef<VirtualCallTarget> TargetsForSlot, CallSiteInfo &CSInfo,
+ WholeProgramDevirtResolution::ByArg *Res) {
// Uniform return value optimization. If all functions return the same
// constant, replace all calls with that constant.
uint64_t TheRetVal = TargetsForSlot[0].RetVal;
@@ -505,19 +784,77 @@ bool DevirtModule::tryUniformRetValOpt(
if (Target.RetVal != TheRetVal)
return false;
- auto TheRetValConst = ConstantInt::get(RetType, TheRetVal);
- for (auto Call : CallSites)
- Call.replaceAndErase("uniform-ret-val", TargetsForSlot[0].Fn->getName(),
- RemarksEnabled, TheRetValConst);
+ if (CSInfo.isExported()) {
+ Res->TheKind = WholeProgramDevirtResolution::ByArg::UniformRetVal;
+ Res->Info = TheRetVal;
+ }
+
+ applyUniformRetValOpt(CSInfo, TargetsForSlot[0].Fn->getName(), TheRetVal);
if (RemarksEnabled)
for (auto &&Target : TargetsForSlot)
Target.WasDevirt = true;
return true;
}
+std::string DevirtModule::getGlobalName(VTableSlot Slot,
+ ArrayRef<uint64_t> Args,
+ StringRef Name) {
+ std::string FullName = "__typeid_";
+ raw_string_ostream OS(FullName);
+ OS << cast<MDString>(Slot.TypeID)->getString() << '_' << Slot.ByteOffset;
+ for (uint64_t Arg : Args)
+ OS << '_' << Arg;
+ OS << '_' << Name;
+ return OS.str();
+}
+
+void DevirtModule::exportGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,
+ StringRef Name, Constant *C) {
+ GlobalAlias *GA = GlobalAlias::create(Int8Ty, 0, GlobalValue::ExternalLinkage,
+ getGlobalName(Slot, Args, Name), C, &M);
+ GA->setVisibility(GlobalValue::HiddenVisibility);
+}
+
+Constant *DevirtModule::importGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,
+ StringRef Name, unsigned AbsWidth) {
+ Constant *C = M.getOrInsertGlobal(getGlobalName(Slot, Args, Name), Int8Ty);
+ auto *GV = dyn_cast<GlobalVariable>(C);
+ // We only need to set metadata if the global is newly created, in which
+ // case it would not have hidden visibility.
+ if (!GV || GV->getVisibility() == GlobalValue::HiddenVisibility)
+ return C;
+
+ GV->setVisibility(GlobalValue::HiddenVisibility);
+ auto SetAbsRange = [&](uint64_t Min, uint64_t Max) {
+ auto *MinC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Min));
+ auto *MaxC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Max));
+ GV->setMetadata(LLVMContext::MD_absolute_symbol,
+ MDNode::get(M.getContext(), {MinC, MaxC}));
+ };
+ if (AbsWidth == IntPtrTy->getBitWidth())
+ SetAbsRange(~0ull, ~0ull); // Full set.
+ else if (AbsWidth)
+ SetAbsRange(0, 1ull << AbsWidth);
+ return GV;
+}
+
+void DevirtModule::applyUniqueRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
+ bool IsOne,
+ Constant *UniqueMemberAddr) {
+ for (auto &&Call : CSInfo.CallSites) {
+ IRBuilder<> B(Call.CS.getInstruction());
+ Value *Cmp = B.CreateICmp(IsOne ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE,
+ Call.VTable, UniqueMemberAddr);
+ Cmp = B.CreateZExt(Cmp, Call.CS->getType());
+ Call.replaceAndErase("unique-ret-val", FnName, RemarksEnabled, Cmp);
+ }
+ CSInfo.markDevirt();
+}
+
bool DevirtModule::tryUniqueRetValOpt(
unsigned BitWidth, MutableArrayRef<VirtualCallTarget> TargetsForSlot,
- MutableArrayRef<VirtualCallSite> CallSites) {
+ CallSiteInfo &CSInfo, WholeProgramDevirtResolution::ByArg *Res,
+ VTableSlot Slot, ArrayRef<uint64_t> Args) {
// IsOne controls whether we look for a 0 or a 1.
auto tryUniqueRetValOptFor = [&](bool IsOne) {
const TypeMemberInfo *UniqueMember = nullptr;
@@ -533,16 +870,23 @@ bool DevirtModule::tryUniqueRetValOpt(
// checked for a uniform return value in tryUniformRetValOpt.
assert(UniqueMember);
- // Replace each call with the comparison.
- for (auto &&Call : CallSites) {
- IRBuilder<> B(Call.CS.getInstruction());
- Value *OneAddr = B.CreateBitCast(UniqueMember->Bits->GV, Int8PtrTy);
- OneAddr = B.CreateConstGEP1_64(OneAddr, UniqueMember->Offset);
- Value *Cmp = B.CreateICmp(IsOne ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE,
- Call.VTable, OneAddr);
- Call.replaceAndErase("unique-ret-val", TargetsForSlot[0].Fn->getName(),
- RemarksEnabled, Cmp);
+ Constant *UniqueMemberAddr =
+ ConstantExpr::getBitCast(UniqueMember->Bits->GV, Int8PtrTy);
+ UniqueMemberAddr = ConstantExpr::getGetElementPtr(
+ Int8Ty, UniqueMemberAddr,
+ ConstantInt::get(Int64Ty, UniqueMember->Offset));
+
+ if (CSInfo.isExported()) {
+ Res->TheKind = WholeProgramDevirtResolution::ByArg::UniqueRetVal;
+ Res->Info = IsOne;
+
+ exportGlobal(Slot, Args, "unique_member", UniqueMemberAddr);
}
+
+ // Replace each call with the comparison.
+ applyUniqueRetValOpt(CSInfo, TargetsForSlot[0].Fn->getName(), IsOne,
+ UniqueMemberAddr);
+
// Update devirtualization statistics for targets.
if (RemarksEnabled)
for (auto &&Target : TargetsForSlot)
@@ -560,9 +904,30 @@ bool DevirtModule::tryUniqueRetValOpt(
return false;
}
+void DevirtModule::applyVirtualConstProp(CallSiteInfo &CSInfo, StringRef FnName,
+ Constant *Byte, Constant *Bit) {
+ for (auto Call : CSInfo.CallSites) {
+ auto *RetType = cast<IntegerType>(Call.CS.getType());
+ IRBuilder<> B(Call.CS.getInstruction());
+ Value *Addr = B.CreateGEP(Int8Ty, Call.VTable, Byte);
+ if (RetType->getBitWidth() == 1) {
+ Value *Bits = B.CreateLoad(Addr);
+ Value *BitsAndBit = B.CreateAnd(Bits, Bit);
+ auto IsBitSet = B.CreateICmpNE(BitsAndBit, ConstantInt::get(Int8Ty, 0));
+ Call.replaceAndErase("virtual-const-prop-1-bit", FnName, RemarksEnabled,
+ IsBitSet);
+ } else {
+ Value *ValAddr = B.CreateBitCast(Addr, RetType->getPointerTo());
+ Value *Val = B.CreateLoad(RetType, ValAddr);
+ Call.replaceAndErase("virtual-const-prop", FnName, RemarksEnabled, Val);
+ }
+ }
+ CSInfo.markDevirt();
+}
+
bool DevirtModule::tryVirtualConstProp(
- MutableArrayRef<VirtualCallTarget> TargetsForSlot,
- ArrayRef<VirtualCallSite> CallSites) {
+ MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo,
+ WholeProgramDevirtResolution *Res, VTableSlot Slot) {
// This only works if the function returns an integer.
auto RetType = dyn_cast<IntegerType>(TargetsForSlot[0].Fn->getReturnType());
if (!RetType)
@@ -571,55 +936,38 @@ bool DevirtModule::tryVirtualConstProp(
if (BitWidth > 64)
return false;
- // Make sure that each function does not access memory, takes at least one
- // argument, does not use its first argument (which we assume is 'this'),
- // and has the same return type.
+ // Make sure that each function is defined, does not access memory, takes at
+ // least one argument, does not use its first argument (which we assume is
+ // 'this'), and has the same return type.
+ //
+ // Note that we test whether this copy of the function is readnone, rather
+ // than testing function attributes, which must hold for any copy of the
+ // function, even a less optimized version substituted at link time. This is
+ // sound because the virtual constant propagation optimizations effectively
+ // inline all implementations of the virtual function into each call site,
+ // rather than using function attributes to perform local optimization.
for (VirtualCallTarget &Target : TargetsForSlot) {
- if (!Target.Fn->doesNotAccessMemory() || Target.Fn->arg_empty() ||
- !Target.Fn->arg_begin()->use_empty() ||
+ if (Target.Fn->isDeclaration() ||
+ computeFunctionBodyMemoryAccess(*Target.Fn, AARGetter(*Target.Fn)) !=
+ MAK_ReadNone ||
+ Target.Fn->arg_empty() || !Target.Fn->arg_begin()->use_empty() ||
Target.Fn->getReturnType() != RetType)
return false;
}
- // Group call sites by the list of constant arguments they pass.
- // The comparator ensures deterministic ordering.
- struct ByAPIntValue {
- bool operator()(const std::vector<ConstantInt *> &A,
- const std::vector<ConstantInt *> &B) const {
- return std::lexicographical_compare(
- A.begin(), A.end(), B.begin(), B.end(),
- [](ConstantInt *AI, ConstantInt *BI) {
- return AI->getValue().ult(BI->getValue());
- });
- }
- };
- std::map<std::vector<ConstantInt *>, std::vector<VirtualCallSite>,
- ByAPIntValue>
- VCallSitesByConstantArg;
- for (auto &&VCallSite : CallSites) {
- std::vector<ConstantInt *> Args;
- if (VCallSite.CS.getType() != RetType)
- continue;
- for (auto &&Arg :
- make_range(VCallSite.CS.arg_begin() + 1, VCallSite.CS.arg_end())) {
- if (!isa<ConstantInt>(Arg))
- break;
- Args.push_back(cast<ConstantInt>(&Arg));
- }
- if (Args.size() + 1 != VCallSite.CS.arg_size())
- continue;
-
- VCallSitesByConstantArg[Args].push_back(VCallSite);
- }
-
- for (auto &&CSByConstantArg : VCallSitesByConstantArg) {
+ for (auto &&CSByConstantArg : SlotInfo.ConstCSInfo) {
if (!tryEvaluateFunctionsWithArgs(TargetsForSlot, CSByConstantArg.first))
continue;
- if (tryUniformRetValOpt(RetType, TargetsForSlot, CSByConstantArg.second))
+ WholeProgramDevirtResolution::ByArg *ResByArg = nullptr;
+ if (Res)
+ ResByArg = &Res->ResByArg[CSByConstantArg.first];
+
+ if (tryUniformRetValOpt(TargetsForSlot, CSByConstantArg.second, ResByArg))
continue;
- if (tryUniqueRetValOpt(BitWidth, TargetsForSlot, CSByConstantArg.second))
+ if (tryUniqueRetValOpt(BitWidth, TargetsForSlot, CSByConstantArg.second,
+ ResByArg, Slot, CSByConstantArg.first))
continue;
// Find an allocation offset in bits in all vtables associated with the
@@ -659,26 +1007,20 @@ bool DevirtModule::tryVirtualConstProp(
for (auto &&Target : TargetsForSlot)
Target.WasDevirt = true;
- // Rewrite each call to a load from OffsetByte/OffsetBit.
- for (auto Call : CSByConstantArg.second) {
- IRBuilder<> B(Call.CS.getInstruction());
- Value *Addr = B.CreateConstGEP1_64(Call.VTable, OffsetByte);
- if (BitWidth == 1) {
- Value *Bits = B.CreateLoad(Addr);
- Value *Bit = ConstantInt::get(Int8Ty, 1ULL << OffsetBit);
- Value *BitsAndBit = B.CreateAnd(Bits, Bit);
- auto IsBitSet = B.CreateICmpNE(BitsAndBit, ConstantInt::get(Int8Ty, 0));
- Call.replaceAndErase("virtual-const-prop-1-bit",
- TargetsForSlot[0].Fn->getName(),
- RemarksEnabled, IsBitSet);
- } else {
- Value *ValAddr = B.CreateBitCast(Addr, RetType->getPointerTo());
- Value *Val = B.CreateLoad(RetType, ValAddr);
- Call.replaceAndErase("virtual-const-prop",
- TargetsForSlot[0].Fn->getName(),
- RemarksEnabled, Val);
- }
+ Constant *ByteConst = ConstantInt::get(Int32Ty, OffsetByte);
+ Constant *BitConst = ConstantInt::get(Int8Ty, 1ULL << OffsetBit);
+
+ if (CSByConstantArg.second.isExported()) {
+ ResByArg->TheKind = WholeProgramDevirtResolution::ByArg::VirtualConstProp;
+ exportGlobal(Slot, CSByConstantArg.first, "byte",
+ ConstantExpr::getIntToPtr(ByteConst, Int8PtrTy));
+ exportGlobal(Slot, CSByConstantArg.first, "bit",
+ ConstantExpr::getIntToPtr(BitConst, Int8PtrTy));
}
+
+ // Rewrite each call to a load from OffsetByte/OffsetBit.
+ applyVirtualConstProp(CSByConstantArg.second,
+ TargetsForSlot[0].Fn->getName(), ByteConst, BitConst);
}
return true;
}
@@ -733,7 +1075,11 @@ bool DevirtModule::areRemarksEnabled() {
if (FL.empty())
return false;
const Function &Fn = FL.front();
- auto DI = OptimizationRemark(DEBUG_TYPE, Fn, DebugLoc(), "");
+
+ const auto &BBL = Fn.getBasicBlockList();
+ if (BBL.empty())
+ return false;
+ auto DI = OptimizationRemark(DEBUG_TYPE, "", DebugLoc(), &BBL.front());
return DI.isEnabled();
}
@@ -766,8 +1112,8 @@ void DevirtModule::scanTypeTestUsers(Function *TypeTestFunc,
Value *Ptr = CI->getArgOperand(0)->stripPointerCasts();
if (SeenPtrs.insert(Ptr).second) {
for (DevirtCallSite Call : DevirtCalls) {
- CallSlots[{TypeId, Call.Offset}].push_back(
- {CI->getArgOperand(0), Call.CS, nullptr});
+ CallSlots[{TypeId, Call.Offset}].addCallSite(CI->getArgOperand(0),
+ Call.CS, nullptr);
}
}
}
@@ -853,14 +1199,79 @@ void DevirtModule::scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc) {
if (HasNonCallUses)
++NumUnsafeUses;
for (DevirtCallSite Call : DevirtCalls) {
- CallSlots[{TypeId, Call.Offset}].push_back(
- {Ptr, Call.CS, &NumUnsafeUses});
+ CallSlots[{TypeId, Call.Offset}].addCallSite(Ptr, Call.CS,
+ &NumUnsafeUses);
}
CI->eraseFromParent();
}
}
+void DevirtModule::importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo) {
+ const TypeIdSummary *TidSummary =
+ ImportSummary->getTypeIdSummary(cast<MDString>(Slot.TypeID)->getString());
+ if (!TidSummary)
+ return;
+ auto ResI = TidSummary->WPDRes.find(Slot.ByteOffset);
+ if (ResI == TidSummary->WPDRes.end())
+ return;
+ const WholeProgramDevirtResolution &Res = ResI->second;
+
+ if (Res.TheKind == WholeProgramDevirtResolution::SingleImpl) {
+ // The type of the function in the declaration is irrelevant because every
+ // call site will cast it to the correct type.
+ auto *SingleImpl = M.getOrInsertFunction(
+ Res.SingleImplName, Type::getVoidTy(M.getContext()));
+
+ // This is the import phase so we should not be exporting anything.
+ bool IsExported = false;
+ applySingleImplDevirt(SlotInfo, SingleImpl, IsExported);
+ assert(!IsExported);
+ }
+
+ for (auto &CSByConstantArg : SlotInfo.ConstCSInfo) {
+ auto I = Res.ResByArg.find(CSByConstantArg.first);
+ if (I == Res.ResByArg.end())
+ continue;
+ auto &ResByArg = I->second;
+ // FIXME: We should figure out what to do about the "function name" argument
+ // to the apply* functions, as the function names are unavailable during the
+ // importing phase. For now we just pass the empty string. This does not
+ // impact correctness because the function names are just used for remarks.
+ switch (ResByArg.TheKind) {
+ case WholeProgramDevirtResolution::ByArg::UniformRetVal:
+ applyUniformRetValOpt(CSByConstantArg.second, "", ResByArg.Info);
+ break;
+ case WholeProgramDevirtResolution::ByArg::UniqueRetVal: {
+ Constant *UniqueMemberAddr =
+ importGlobal(Slot, CSByConstantArg.first, "unique_member");
+ applyUniqueRetValOpt(CSByConstantArg.second, "", ResByArg.Info,
+ UniqueMemberAddr);
+ break;
+ }
+ case WholeProgramDevirtResolution::ByArg::VirtualConstProp: {
+ Constant *Byte = importGlobal(Slot, CSByConstantArg.first, "byte", 32);
+ Byte = ConstantExpr::getPtrToInt(Byte, Int32Ty);
+ Constant *Bit = importGlobal(Slot, CSByConstantArg.first, "bit", 8);
+ Bit = ConstantExpr::getPtrToInt(Bit, Int8Ty);
+ applyVirtualConstProp(CSByConstantArg.second, "", Byte, Bit);
+ }
+ default:
+ break;
+ }
+ }
+}
+
+void DevirtModule::removeRedundantTypeTests() {
+ auto True = ConstantInt::getTrue(M.getContext());
+ for (auto &&U : NumUnsafeUsesForTypeTest) {
+ if (U.second == 0) {
+ U.first->replaceAllUsesWith(True);
+ U.first->eraseFromParent();
+ }
+ }
+}
+
bool DevirtModule::run() {
Function *TypeTestFunc =
M.getFunction(Intrinsic::getName(Intrinsic::type_test));
@@ -868,7 +1279,11 @@ bool DevirtModule::run() {
M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load));
Function *AssumeFunc = M.getFunction(Intrinsic::getName(Intrinsic::assume));
- if ((!TypeTestFunc || TypeTestFunc->use_empty() || !AssumeFunc ||
+ // Normally if there are no users of the devirtualization intrinsics in the
+ // module, this pass has nothing to do. But if we are exporting, we also need
+ // to handle any users that appear only in the function summaries.
+ if (!ExportSummary &&
+ (!TypeTestFunc || TypeTestFunc->use_empty() || !AssumeFunc ||
AssumeFunc->use_empty()) &&
(!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty()))
return false;
@@ -879,6 +1294,17 @@ bool DevirtModule::run() {
if (TypeCheckedLoadFunc)
scanTypeCheckedLoadUsers(TypeCheckedLoadFunc);
+ if (ImportSummary) {
+ for (auto &S : CallSlots)
+ importResolution(S.first, S.second);
+
+ removeRedundantTypeTests();
+
+ // The rest of the code is only necessary when exporting or during regular
+ // LTO, so we are done.
+ return true;
+ }
+
// Rebuild type metadata into a map for easy lookup.
std::vector<VTableBits> Bits;
DenseMap<Metadata *, std::set<TypeMemberInfo>> TypeIdMap;
@@ -886,6 +1312,53 @@ bool DevirtModule::run() {
if (TypeIdMap.empty())
return true;
+ // Collect information from summary about which calls to try to devirtualize.
+ if (ExportSummary) {
+ DenseMap<GlobalValue::GUID, TinyPtrVector<Metadata *>> MetadataByGUID;
+ for (auto &P : TypeIdMap) {
+ if (auto *TypeId = dyn_cast<MDString>(P.first))
+ MetadataByGUID[GlobalValue::getGUID(TypeId->getString())].push_back(
+ TypeId);
+ }
+
+ for (auto &P : *ExportSummary) {
+ for (auto &S : P.second.SummaryList) {
+ auto *FS = dyn_cast<FunctionSummary>(S.get());
+ if (!FS)
+ continue;
+ // FIXME: Only add live functions.
+ for (FunctionSummary::VFuncId VF : FS->type_test_assume_vcalls()) {
+ for (Metadata *MD : MetadataByGUID[VF.GUID]) {
+ CallSlots[{MD, VF.Offset}].CSInfo.SummaryHasTypeTestAssumeUsers =
+ true;
+ }
+ }
+ for (FunctionSummary::VFuncId VF : FS->type_checked_load_vcalls()) {
+ for (Metadata *MD : MetadataByGUID[VF.GUID]) {
+ CallSlots[{MD, VF.Offset}]
+ .CSInfo.SummaryTypeCheckedLoadUsers.push_back(FS);
+ }
+ }
+ for (const FunctionSummary::ConstVCall &VC :
+ FS->type_test_assume_const_vcalls()) {
+ for (Metadata *MD : MetadataByGUID[VC.VFunc.GUID]) {
+ CallSlots[{MD, VC.VFunc.Offset}]
+ .ConstCSInfo[VC.Args]
+ .SummaryHasTypeTestAssumeUsers = true;
+ }
+ }
+ for (const FunctionSummary::ConstVCall &VC :
+ FS->type_checked_load_const_vcalls()) {
+ for (Metadata *MD : MetadataByGUID[VC.VFunc.GUID]) {
+ CallSlots[{MD, VC.VFunc.Offset}]
+ .ConstCSInfo[VC.Args]
+ .SummaryTypeCheckedLoadUsers.push_back(FS);
+ }
+ }
+ }
+ }
+ }
+
// For each (type, offset) pair:
bool DidVirtualConstProp = false;
std::map<std::string, Function*> DevirtTargets;
@@ -894,19 +1367,39 @@ bool DevirtModule::run() {
// function implementation at offset S.first.ByteOffset, and add to
// TargetsForSlot.
std::vector<VirtualCallTarget> TargetsForSlot;
- if (!tryFindVirtualCallTargets(TargetsForSlot, TypeIdMap[S.first.TypeID],
- S.first.ByteOffset))
- continue;
-
- if (!trySingleImplDevirt(TargetsForSlot, S.second) &&
- tryVirtualConstProp(TargetsForSlot, S.second))
+ if (tryFindVirtualCallTargets(TargetsForSlot, TypeIdMap[S.first.TypeID],
+ S.first.ByteOffset)) {
+ WholeProgramDevirtResolution *Res = nullptr;
+ if (ExportSummary && isa<MDString>(S.first.TypeID))
+ Res = &ExportSummary
+ ->getOrInsertTypeIdSummary(
+ cast<MDString>(S.first.TypeID)->getString())
+ .WPDRes[S.first.ByteOffset];
+
+ if (!trySingleImplDevirt(TargetsForSlot, S.second, Res) &&
+ tryVirtualConstProp(TargetsForSlot, S.second, Res, S.first))
DidVirtualConstProp = true;
- // Collect functions devirtualized at least for one call site for stats.
- if (RemarksEnabled)
- for (const auto &T : TargetsForSlot)
- if (T.WasDevirt)
- DevirtTargets[T.Fn->getName()] = T.Fn;
+ // Collect functions devirtualized at least for one call site for stats.
+ if (RemarksEnabled)
+ for (const auto &T : TargetsForSlot)
+ if (T.WasDevirt)
+ DevirtTargets[T.Fn->getName()] = T.Fn;
+ }
+
+ // CFI-specific: if we are exporting and any llvm.type.checked.load
+ // intrinsics were *not* devirtualized, we need to add the resulting
+ // llvm.type.test intrinsics to the function summaries so that the
+ // LowerTypeTests pass will export them.
+ if (ExportSummary && isa<MDString>(S.first.TypeID)) {
+ auto GUID =
+ GlobalValue::getGUID(cast<MDString>(S.first.TypeID)->getString());
+ for (auto FS : S.second.CSInfo.SummaryTypeCheckedLoadUsers)
+ FS->addTypeTest(GUID);
+ for (auto &CCS : S.second.ConstCSInfo)
+ for (auto FS : CCS.second.SummaryTypeCheckedLoadUsers)
+ FS->addTypeTest(GUID);
+ }
}
if (RemarksEnabled) {
@@ -914,23 +1407,12 @@ bool DevirtModule::run() {
for (const auto &DT : DevirtTargets) {
Function *F = DT.second;
DISubprogram *SP = F->getSubprogram();
- DebugLoc DL = SP ? DebugLoc::get(SP->getScopeLine(), 0, SP) : DebugLoc();
- emitOptimizationRemark(F->getContext(), DEBUG_TYPE, *F, DL,
+ emitOptimizationRemark(F->getContext(), DEBUG_TYPE, *F, SP,
Twine("devirtualized ") + F->getName());
}
}
- // If we were able to eliminate all unsafe uses for a type checked load,
- // eliminate the type test by replacing it with true.
- if (TypeCheckedLoadFunc) {
- auto True = ConstantInt::getTrue(M.getContext());
- for (auto &&U : NumUnsafeUsesForTypeTest) {
- if (U.second == 0) {
- U.first->replaceAllUsesWith(True);
- U.first->eraseFromParent();
- }
- }
- }
+ removeRedundantTypeTests();
// Rebuild each global we touched as part of virtual constant propagation to
// include the before and after bytes.
OpenPOWER on IntegriCloud