diff options
Diffstat (limited to 'contrib/llvm/lib/Transforms/IPO')
26 files changed, 5376 insertions, 2172 deletions
diff --git a/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp index 4762011..0e05129 100644 --- a/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -34,8 +34,11 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CallGraphSCCPass.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" #include "llvm/IR/CallSite.h" @@ -63,7 +66,8 @@ namespace { /// struct ArgPromotion : public CallGraphSCCPass { void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<AliasAnalysis>(); + AU.addRequired<AssumptionCacheTracker>(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); CallGraphSCCPass::getAnalysisUsage(AU); } @@ -81,7 +85,8 @@ namespace { bool isDenselyPacked(Type *type, const DataLayout &DL); bool canPaddingBeAccessed(Argument *Arg); CallGraphNode *PromoteArguments(CallGraphNode *CGN); - bool isSafeToPromoteArgument(Argument *Arg, bool isByVal) const; + bool isSafeToPromoteArgument(Argument *Arg, bool isByVal, + AAResults &AAR) const; CallGraphNode *DoPromotion(Function *F, SmallPtrSetImpl<Argument*> &ArgsToPromote, SmallPtrSetImpl<Argument*> &ByValArgsToTransform); @@ -90,15 +95,15 @@ namespace { bool doInitialization(CallGraph &CG) override; /// The maximum number of elements to expand, or 0 for unlimited. unsigned maxElements; - DenseMap<const Function *, DISubprogram *> FunctionDIs; }; } char ArgPromotion::ID = 0; INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion", "Promote 'by reference' arguments to scalars", false, false) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(ArgPromotion, "argpromotion", "Promote 'by reference' arguments to scalars", false, false) @@ -217,9 +222,9 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { // First check: see if there are any pointer arguments! If not, quick exit. SmallVector<Argument*, 16> PointerArgs; - for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) - if (I->getType()->isPointerTy()) - PointerArgs.push_back(I); + for (Argument &I : F->args()) + if (I.getType()->isPointerTy()) + PointerArgs.push_back(&I); if (PointerArgs.empty()) return nullptr; // Second check: make sure that all callers are direct callers. We can't @@ -237,6 +242,14 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { const DataLayout &DL = F->getParent()->getDataLayout(); + // We need to manually construct BasicAA directly in order to disable its use + // of other function analyses. + BasicAAResult BAR(createLegacyPMBasicAAResult(*this, *F)); + + // Construct our own AA results for this function. We do this manually to + // work around the limitations of the legacy pass manager. + AAResults AAR(createLegacyPMAAResults(*this, *F, BAR)); + // Check to see which arguments are promotable. If an argument is promotable, // add it to ArgsToPromote. SmallPtrSet<Argument*, 8> ArgsToPromote; @@ -281,8 +294,8 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { // If all the elements are single-value types, we can promote it. bool AllSimple = true; - for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { - if (!STy->getElementType(i)->isSingleValueType()) { + for (const auto *EltTy : STy->elements()) { + if (!EltTy->isSingleValueType()) { AllSimple = false; break; } @@ -303,8 +316,8 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { if (isSelfRecursive) { if (StructType *STy = dyn_cast<StructType>(AgTy)) { bool RecursiveType = false; - for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { - if (STy->getElementType(i) == PtrArg->getType()) { + for (const auto *EltTy : STy->elements()) { + if (EltTy == PtrArg->getType()) { RecursiveType = true; break; } @@ -315,7 +328,7 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { } // Otherwise, see if we can promote the pointer to its value. - if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValOrInAllocaAttr())) + if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValOrInAllocaAttr(), AAR)) ArgsToPromote.insert(PtrArg); } @@ -416,7 +429,8 @@ static void MarkIndicesSafe(const ArgPromotion::IndicesVector &ToMark, /// elements of the aggregate in order to avoid exploding the number of /// arguments passed in. bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, - bool isByValOrInAlloca) const { + bool isByValOrInAlloca, + AAResults &AAR) const { typedef std::set<IndicesVector> GEPIndicesSet; // Quick exit for unused arguments @@ -453,12 +467,11 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, // First, iterate the entry block and mark loads of (geps of) arguments as // safe. - BasicBlock *EntryBlock = Arg->getParent()->begin(); + BasicBlock &EntryBlock = Arg->getParent()->front(); // Declare this here so we can reuse it IndicesVector Indices; - for (BasicBlock::iterator I = EntryBlock->begin(), E = EntryBlock->end(); - I != E; ++I) - if (LoadInst *LI = dyn_cast<LoadInst>(I)) { + for (Instruction &I : EntryBlock) + if (LoadInst *LI = dyn_cast<LoadInst>(&I)) { Value *V = LI->getPointerOperand(); if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) { V = GEP->getPointerOperand(); @@ -501,12 +514,11 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, if (GEP->use_empty()) { // Dead GEP's cause trouble later. Just remove them if we run into // them. - getAnalysis<AliasAnalysis>().deleteValue(GEP); GEP->eraseFromParent(); // TODO: This runs the above loop over and over again for dead GEPs // Couldn't we just do increment the UI iterator earlier and erase the // use? - return isSafeToPromoteArgument(Arg, isByValOrInAlloca); + return isSafeToPromoteArgument(Arg, isByValOrInAlloca, AAR); } // Ensure that all of the indices are constants. @@ -563,8 +575,6 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, // blocks we know to be transparent to the load. SmallPtrSet<BasicBlock*, 16> TranspBlocks; - AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); - for (unsigned i = 0, e = Loads.size(); i != e; ++i) { // Check to see if the load is invalidated from the start of the block to // the load itself. @@ -572,8 +582,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, BasicBlock *BB = Load->getParent(); MemoryLocation Loc = MemoryLocation::get(Load); - if (AA.canInstructionRangeModRef(BB->front(), *Load, Loc, - AliasAnalysis::Mod)) + if (AAR.canInstructionRangeModRef(BB->front(), *Load, Loc, MRI_Mod)) return false; // Pointer is invalidated! // Now check every path from the entry block to the load for transparency. @@ -581,7 +590,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, // loading block. for (BasicBlock *P : predecessors(BB)) { for (BasicBlock *TranspBB : inverse_depth_first_ext(P, TranspBlocks)) - if (AA.canBasicBlockModify(*TranspBB, Loc)) + if (AAR.canBasicBlockModify(*TranspBB, Loc)) return false; } } @@ -637,13 +646,13 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, unsigned ArgIndex = 1; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I, ++ArgIndex) { - if (ByValArgsToTransform.count(I)) { + if (ByValArgsToTransform.count(&*I)) { // Simple byval argument? Just add all the struct element types. Type *AgTy = cast<PointerType>(I->getType())->getElementType(); StructType *STy = cast<StructType>(AgTy); Params.insert(Params.end(), STy->element_begin(), STy->element_end()); ++NumByValArgsPromoted; - } else if (!ArgsToPromote.count(I)) { + } else if (!ArgsToPromote.count(&*I)) { // Unchanged argument Params.push_back(I->getType()); AttributeSet attrs = PAL.getParamAttributes(ArgIndex); @@ -661,7 +670,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // In this table, we will track which indices are loaded from the argument // (where direct loads are tracked as no indices). - ScalarizeTable &ArgIndices = ScalarizedElements[I]; + ScalarizeTable &ArgIndices = ScalarizedElements[&*I]; for (User *U : I->users()) { Instruction *UI = cast<Instruction>(U); Type *SrcTy; @@ -687,7 +696,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, else // Take any load, we will use it only to update Alias Analysis OrigLoad = cast<LoadInst>(UI->user_back()); - OriginalLoads[std::make_pair(I, Indices)] = OrigLoad; + OriginalLoads[std::make_pair(&*I, Indices)] = OrigLoad; } // Add a parameter to the function for each element passed in. @@ -722,15 +731,8 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, NF->copyAttributesFrom(F); // Patch the pointer to LLVM function in debug info descriptor. - auto DI = FunctionDIs.find(F); - if (DI != FunctionDIs.end()) { - DISubprogram *SP = DI->second; - SP->replaceFunction(NF); - // Ensure the map is updated so it can be reused on subsequent argument - // promotions of the same function. - FunctionDIs.erase(DI); - FunctionDIs[NF] = SP; - } + NF->setSubprogram(F->getSubprogram()); + F->setSubprogram(nullptr); DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n" << "From: " << *F); @@ -740,13 +742,9 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, NF->setAttributes(AttributeSet::get(F->getContext(), AttributesVec)); AttributesVec.clear(); - F->getParent()->getFunctionList().insert(F, NF); + F->getParent()->getFunctionList().insert(F->getIterator(), NF); NF->takeName(F); - // Get the alias analysis information that we need to update to reflect our - // changes. - AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); - // Get the callgraph information that we need to update to reflect our // changes. CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); @@ -775,7 +773,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, ArgIndex = 1; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I, ++AI, ++ArgIndex) - if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) { + if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) { Args.push_back(*AI); // Unmodified argument if (CallPAL.hasAttributes(ArgIndex)) { @@ -783,7 +781,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, AttributesVec. push_back(AttributeSet::get(F->getContext(), Args.size(), B)); } - } else if (ByValArgsToTransform.count(I)) { + } else if (ByValArgsToTransform.count(&*I)) { // Emit a GEP and load for each element of the struct. Type *AgTy = cast<PointerType>(I->getType())->getElementType(); StructType *STy = cast<StructType>(AgTy); @@ -798,14 +796,14 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, } } else if (!I->use_empty()) { // Non-dead argument: insert GEPs and loads as appropriate. - ScalarizeTable &ArgIndices = ScalarizedElements[I]; + ScalarizeTable &ArgIndices = ScalarizedElements[&*I]; // Store the Value* version of the indices in here, but declare it now // for reuse. std::vector<Value*> Ops; for (ScalarizeTable::iterator SI = ArgIndices.begin(), E = ArgIndices.end(); SI != E; ++SI) { Value *V = *AI; - LoadInst *OrigLoad = OriginalLoads[std::make_pair(I, SI->second)]; + LoadInst *OrigLoad = OriginalLoads[std::make_pair(&*I, SI->second)]; if (!SI->second.empty()) { Ops.reserve(SI->second.size()); Type *ElTy = V->getType(); @@ -873,10 +871,6 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, Args.clear(); AttributesVec.clear(); - // Update the alias analysis implementation to know that we are replacing - // the old call with a new one. - AA.replaceWithNewValue(Call, New); - // Update the callgraph to know that the callsite has been transformed. CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()]; CalleeNode->replaceCallEdge(CS, CallSite(New), NF_CGN); @@ -901,20 +895,19 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(), I2 = NF->arg_begin(); I != E; ++I) { - if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) { + if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) { // If this is an unmodified argument, move the name and users over to the // new version. - I->replaceAllUsesWith(I2); - I2->takeName(I); - AA.replaceWithNewValue(I, I2); + I->replaceAllUsesWith(&*I2); + I2->takeName(&*I); ++I2; continue; } - if (ByValArgsToTransform.count(I)) { + if (ByValArgsToTransform.count(&*I)) { // In the callee, we create an alloca, and store each of the new incoming // arguments into the alloca. - Instruction *InsertPt = NF->begin()->begin(); + Instruction *InsertPt = &NF->begin()->front(); // Just add all the struct element types. Type *AgTy = cast<PointerType>(I->getType())->getElementType(); @@ -929,13 +922,12 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, AgTy, TheAlloca, Idxs, TheAlloca->getName() + "." + Twine(i), InsertPt); I2->setName(I->getName()+"."+Twine(i)); - new StoreInst(I2++, Idx, InsertPt); + new StoreInst(&*I2++, Idx, InsertPt); } // Anything that used the arg should now use the alloca. I->replaceAllUsesWith(TheAlloca); - TheAlloca->takeName(I); - AA.replaceWithNewValue(I, TheAlloca); + TheAlloca->takeName(&*I); // If the alloca is used in a call, we must clear the tail flag since // the callee now uses an alloca from the caller. @@ -948,23 +940,20 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, continue; } - if (I->use_empty()) { - AA.deleteValue(I); + if (I->use_empty()) continue; - } // Otherwise, if we promoted this argument, then all users are load // instructions (or GEPs with only load users), and all loads should be // using the new argument that we added. - ScalarizeTable &ArgIndices = ScalarizedElements[I]; + ScalarizeTable &ArgIndices = ScalarizedElements[&*I]; while (!I->use_empty()) { if (LoadInst *LI = dyn_cast<LoadInst>(I->user_back())) { assert(ArgIndices.begin()->second.empty() && "Load element should sort to front!"); I2->setName(I->getName()+".val"); - LI->replaceAllUsesWith(I2); - AA.replaceWithNewValue(LI, I2); + LI->replaceAllUsesWith(&*I2); LI->eraseFromParent(); DEBUG(dbgs() << "*** Promoted load of argument '" << I->getName() << "' in function '" << F->getName() << "'\n"); @@ -1000,11 +989,9 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // the argument specified by ArgNo. while (!GEP->use_empty()) { LoadInst *L = cast<LoadInst>(GEP->user_back()); - L->replaceAllUsesWith(TheArg); - AA.replaceWithNewValue(L, TheArg); + L->replaceAllUsesWith(&*TheArg); L->eraseFromParent(); } - AA.deleteValue(GEP); GEP->eraseFromParent(); } } @@ -1013,10 +1000,6 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, std::advance(I2, ArgIndices.size()); } - // Tell the alias analysis that the old function is about to disappear. - AA.replaceWithNewValue(F, NF); - - NF_CGN->stealCalledFunctionsFrom(CG[F]); // Now that the old function is dead, delete it. If there is a dangling @@ -1032,6 +1015,5 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, } bool ArgPromotion::doInitialization(CallGraph &CG) { - FunctionDIs = makeSubprogramMap(CG.getModule()); return CallGraphSCCPass::doInitialization(CG); } diff --git a/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp index 8ce7646..0aa49d6 100644 --- a/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp +++ b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp @@ -119,7 +119,7 @@ bool ConstantMerge::runOnModule(Module &M) { // First: Find the canonical constants others will be merged with. for (Module::global_iterator GVI = M.global_begin(), E = M.global_end(); GVI != E; ) { - GlobalVariable *GV = GVI++; + GlobalVariable *GV = &*GVI++; // If this GV is dead, remove it. GV->removeDeadConstantUsers(); @@ -160,7 +160,7 @@ bool ConstantMerge::runOnModule(Module &M) { // invalidating the Constant* pointers in CMap. for (Module::global_iterator GVI = M.global_begin(), E = M.global_end(); GVI != E; ) { - GlobalVariable *GV = GVI++; + GlobalVariable *GV = &*GVI++; // Only process constants with initializers in the default address space. if (!GV->isConstant() || !GV->hasDefinitiveInitializer() || diff --git a/contrib/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp b/contrib/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp new file mode 100644 index 0000000..5bbb751 --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp @@ -0,0 +1,166 @@ +//===-- CrossDSOCFI.cpp - Externalize this module's CFI checks ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass exports all llvm.bitset's found in the module in the form of a +// __cfi_check function, which can be used to verify cross-DSO call targets. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/EquivalenceClasses.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalObject.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" + +using namespace llvm; + +#define DEBUG_TYPE "cross-dso-cfi" + +STATISTIC(TypeIds, "Number of unique type identifiers"); + +namespace { + +struct CrossDSOCFI : public ModulePass { + static char ID; + CrossDSOCFI() : ModulePass(ID) { + initializeCrossDSOCFIPass(*PassRegistry::getPassRegistry()); + } + + Module *M; + MDNode *VeryLikelyWeights; + + ConstantInt *extractBitSetTypeId(MDNode *MD); + void buildCFICheck(); + + bool doInitialization(Module &M) override; + bool runOnModule(Module &M) override; +}; + +} // anonymous namespace + +INITIALIZE_PASS_BEGIN(CrossDSOCFI, "cross-dso-cfi", "Cross-DSO CFI", false, + false) +INITIALIZE_PASS_END(CrossDSOCFI, "cross-dso-cfi", "Cross-DSO CFI", false, false) +char CrossDSOCFI::ID = 0; + +ModulePass *llvm::createCrossDSOCFIPass() { return new CrossDSOCFI; } + +bool CrossDSOCFI::doInitialization(Module &Mod) { + M = &Mod; + VeryLikelyWeights = + MDBuilder(M->getContext()).createBranchWeights((1U << 20) - 1, 1); + + return false; +} + +/// extractBitSetTypeId - Extracts TypeId from a hash-based bitset MDNode. +ConstantInt *CrossDSOCFI::extractBitSetTypeId(MDNode *MD) { + // This check excludes vtables for classes inside anonymous namespaces. + auto TM = dyn_cast<ValueAsMetadata>(MD->getOperand(0)); + if (!TM) + return nullptr; + auto C = dyn_cast_or_null<ConstantInt>(TM->getValue()); + if (!C) return nullptr; + // We are looking for i64 constants. + if (C->getBitWidth() != 64) return nullptr; + + // Sanity check. + auto FM = dyn_cast_or_null<ValueAsMetadata>(MD->getOperand(1)); + // Can be null if a function was removed by an optimization. + if (FM) { + auto F = dyn_cast<Function>(FM->getValue()); + // But can never be a function declaration. + assert(!F || !F->isDeclaration()); + (void)F; // Suppress unused variable warning in the no-asserts build. + } + return C; +} + +/// buildCFICheck - emits __cfi_check for the current module. +void CrossDSOCFI::buildCFICheck() { + // FIXME: verify that __cfi_check ends up near the end of the code section, + // but before the jump slots created in LowerBitSets. + llvm::DenseSet<uint64_t> BitSetIds; + NamedMDNode *BitSetNM = M->getNamedMetadata("llvm.bitsets"); + + if (BitSetNM) + for (unsigned I = 0, E = BitSetNM->getNumOperands(); I != E; ++I) + if (ConstantInt *TypeId = extractBitSetTypeId(BitSetNM->getOperand(I))) + BitSetIds.insert(TypeId->getZExtValue()); + + LLVMContext &Ctx = M->getContext(); + Constant *C = M->getOrInsertFunction( + "__cfi_check", + FunctionType::get( + Type::getVoidTy(Ctx), + {Type::getInt64Ty(Ctx), PointerType::getUnqual(Type::getInt8Ty(Ctx))}, + false)); + Function *F = dyn_cast<Function>(C); + F->setAlignment(4096); + auto args = F->arg_begin(); + Argument &CallSiteTypeId = *(args++); + CallSiteTypeId.setName("CallSiteTypeId"); + Argument &Addr = *(args++); + Addr.setName("Addr"); + assert(args == F->arg_end()); + + BasicBlock *BB = BasicBlock::Create(Ctx, "entry", F); + + BasicBlock *TrapBB = BasicBlock::Create(Ctx, "trap", F); + IRBuilder<> IRBTrap(TrapBB); + Function *TrapFn = Intrinsic::getDeclaration(M, Intrinsic::trap); + llvm::CallInst *TrapCall = IRBTrap.CreateCall(TrapFn); + TrapCall->setDoesNotReturn(); + TrapCall->setDoesNotThrow(); + IRBTrap.CreateUnreachable(); + + BasicBlock *ExitBB = BasicBlock::Create(Ctx, "exit", F); + IRBuilder<> IRBExit(ExitBB); + IRBExit.CreateRetVoid(); + + IRBuilder<> IRB(BB); + SwitchInst *SI = IRB.CreateSwitch(&CallSiteTypeId, TrapBB, BitSetIds.size()); + for (uint64_t TypeId : BitSetIds) { + ConstantInt *CaseTypeId = ConstantInt::get(Type::getInt64Ty(Ctx), TypeId); + BasicBlock *TestBB = BasicBlock::Create(Ctx, "test", F); + IRBuilder<> IRBTest(TestBB); + Function *BitsetTestFn = + Intrinsic::getDeclaration(M, Intrinsic::bitset_test); + + Value *Test = IRBTest.CreateCall( + BitsetTestFn, {&Addr, MetadataAsValue::get( + Ctx, ConstantAsMetadata::get(CaseTypeId))}); + BranchInst *BI = IRBTest.CreateCondBr(Test, ExitBB, TrapBB); + BI->setMetadata(LLVMContext::MD_prof, VeryLikelyWeights); + + SI->addCase(CaseTypeId, TestBB); + ++TypeIds; + } +} + +bool CrossDSOCFI::runOnModule(Module &M) { + if (M.getModuleFlag("Cross-DSO CFI") == nullptr) + return false; + buildCFICheck(); + return true; +} diff --git a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp index d044764..4de3d95 100644 --- a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -35,6 +35,7 @@ #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include <map> #include <set> #include <tuple> @@ -121,14 +122,6 @@ namespace { typedef SmallVector<RetOrArg, 5> UseVector; - // Map each LLVM function to corresponding metadata with debug info. If - // the function is replaced with another one, we should patch the pointer - // to LLVM function in metadata. - // As the code generation for module is finished (and DIBuilder is - // finalized) we assume that subprogram descriptors won't be changed, and - // they are stored in map for short duration anyway. - DenseMap<const Function *, DISubprogram *> FunctionDIs; - protected: // DAH uses this to specify a different ID. explicit DAE(char &ID) : ModulePass(ID) {} @@ -198,6 +191,13 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { if (Fn.hasAddressTaken()) return false; + // Don't touch naked functions. The assembly might be using an argument, or + // otherwise rely on the frame layout in a way that this analysis will not + // see. + if (Fn.hasFnAttribute(Attribute::Naked)) { + return false; + } + // Okay, we know we can transform this function if safe. Scan its body // looking for calls marked musttail or calls to llvm.vastart. for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { @@ -229,7 +229,7 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { // Create the new function body and insert it into the module... Function *NF = Function::Create(NFTy, Fn.getLinkage()); NF->copyAttributesFrom(&Fn); - Fn.getParent()->getFunctionList().insert(&Fn, NF); + Fn.getParent()->getFunctionList().insert(Fn.getIterator(), NF); NF->takeName(&Fn); // Loop over all of the callers of the function, transforming the call sites @@ -296,20 +296,12 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end(), I2 = NF->arg_begin(); I != E; ++I, ++I2) { // Move the name and users over to the new version. - I->replaceAllUsesWith(I2); - I2->takeName(I); + I->replaceAllUsesWith(&*I2); + I2->takeName(&*I); } // Patch the pointer to LLVM function in debug info descriptor. - auto DI = FunctionDIs.find(&Fn); - if (DI != FunctionDIs.end()) { - DISubprogram *SP = DI->second; - SP->replaceFunction(NF); - // Ensure the map is updated so it can be reused on non-varargs argument - // eliminations of the same function. - FunctionDIs.erase(DI); - FunctionDIs[NF] = SP; - } + NF->setSubprogram(Fn.getSubprogram()); // Fix up any BlockAddresses that refer to the function. Fn.replaceAllUsesWith(ConstantExpr::getBitCast(NF, Fn.getType())); @@ -345,16 +337,19 @@ bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn) if (Fn.hasLocalLinkage() && !Fn.getFunctionType()->isVarArg()) return false; + // Don't touch naked functions. The assembly might be using an argument, or + // otherwise rely on the frame layout in a way that this analysis will not + // see. + if (Fn.hasFnAttribute(Attribute::Naked)) + return false; + if (Fn.use_empty()) return false; SmallVector<unsigned, 8> UnusedArgs; - for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end(); - I != E; ++I) { - Argument *Arg = I; - - if (Arg->use_empty() && !Arg->hasByValOrInAllocaAttr()) - UnusedArgs.push_back(Arg->getArgNo()); + for (Argument &Arg : Fn.args()) { + if (Arg.use_empty() && !Arg.hasByValOrInAllocaAttr()) + UnusedArgs.push_back(Arg.getArgNo()); } if (UnusedArgs.empty()) @@ -485,6 +480,10 @@ DAE::Liveness DAE::SurveyUse(const Use *U, if (F) { // Used in a direct call. + // The function argument is live if it is used as a bundle operand. + if (CS.isBundleOperand(U)) + return Live; + // Find the argument number. We know for sure that this use is an // argument, since if it was the function argument this would be an // indirect call and the we know can't be looking at a value of the @@ -543,6 +542,14 @@ void DAE::SurveyFunction(const Function &F) { return; } + // Don't touch naked functions. The assembly might be using an argument, or + // otherwise rely on the frame layout in a way that this analysis will not + // see. + if (F.hasFnAttribute(Attribute::Naked)) { + MarkLive(F); + return; + } + unsigned RetCount = NumRetVals(&F); // Assume all return values are dead typedef SmallVector<Liveness, 5> RetVals; @@ -648,7 +655,7 @@ void DAE::SurveyFunction(const Function &F) { } else { // See what the effect of this use is (recording any uses that cause // MaybeLive in MaybeLiveArgUses). - Result = SurveyUses(AI, MaybeLiveArgUses); + Result = SurveyUses(&*AI, MaybeLiveArgUses); } // Mark the result. @@ -878,7 +885,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { NF->setAttributes(NewPAL); // Insert the new function before the old function, so we won't be processing // it again. - F->getParent()->getFunctionList().insert(F, NF); + F->getParent()->getFunctionList().insert(F->getIterator(), NF); NF->takeName(F); // Loop over all of the callers of the function, transforming the call sites @@ -946,7 +953,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { Instruction *New; if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), - Args, "", Call); + Args, "", Call->getParent()); cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv()); cast<InvokeInst>(New)->setAttributes(NewCallPAL); } else { @@ -976,9 +983,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { " must have been a struct or an array!"); Instruction *InsertPt = Call; if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { - BasicBlock::iterator IP = II->getNormalDest()->begin(); - while (isa<PHINode>(IP)) ++IP; - InsertPt = IP; + BasicBlock *NewEdge = SplitEdge(New->getParent(), II->getNormalDest()); + InsertPt = &*NewEdge->getFirstInsertionPt(); } // We used to return a struct or array. Instead of doing smart stuff @@ -1026,8 +1032,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { if (ArgAlive[i]) { // If this is a live argument, move the name and users over to the new // version. - I->replaceAllUsesWith(I2); - I2->takeName(I); + I->replaceAllUsesWith(&*I2); + I2->takeName(&*I); ++I2; } else { // If this argument is dead, replace any uses of it with null constants @@ -1079,9 +1085,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { } // Patch the pointer to LLVM function in debug info descriptor. - auto DI = FunctionDIs.find(F); - if (DI != FunctionDIs.end()) - DI->second->replaceFunction(NF); + NF->setSubprogram(F->getSubprogram()); // Now that the old function is dead, delete it. F->eraseFromParent(); @@ -1092,9 +1096,6 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { bool DAE::runOnModule(Module &M) { bool Changed = false; - // Collect debug info descriptors for functions. - FunctionDIs = makeSubprogramMap(M); - // First pass: Do a simple check to see if any functions can have their "..." // removed. We can do this if they never call va_start. This loop cannot be // fused with the next loop, because deleting a function invalidates @@ -1119,7 +1120,7 @@ bool DAE::runOnModule(Module &M) { for (Module::iterator I = M.begin(), E = M.end(); I != E; ) { // Increment now, because the function will probably get removed (ie. // replaced by a new one). - Function *F = I++; + Function *F = &*I++; Changed |= RemoveDeadStuffFromFunction(F); } diff --git a/contrib/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp b/contrib/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp index 67ba72d..af313a6 100644 --- a/contrib/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp +++ b/contrib/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp @@ -1,4 +1,5 @@ -//===-- ElimAvailExtern.cpp - DCE unreachable internal functions ----------------===// +//===-- ElimAvailExtern.cpp - DCE unreachable internal functions +//----------------===// // // The LLVM Compiler Infrastructure // @@ -15,9 +16,7 @@ #include "llvm/Transforms/IPO.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" -#include "llvm/Transforms/Utils/CtorUtils.h" #include "llvm/Transforms/Utils/GlobalStatus.h" #include "llvm/Pass.h" using namespace llvm; @@ -28,18 +27,18 @@ STATISTIC(NumFunctions, "Number of functions removed"); STATISTIC(NumVariables, "Number of global variables removed"); namespace { - struct EliminateAvailableExternally : public ModulePass { - static char ID; // Pass identification, replacement for typeid - EliminateAvailableExternally() : ModulePass(ID) { - initializeEliminateAvailableExternallyPass( - *PassRegistry::getPassRegistry()); - } +struct EliminateAvailableExternally : public ModulePass { + static char ID; // Pass identification, replacement for typeid + EliminateAvailableExternally() : ModulePass(ID) { + initializeEliminateAvailableExternallyPass( + *PassRegistry::getPassRegistry()); + } - // run - Do the EliminateAvailableExternally pass on the specified module, - // optionally updating the specified callgraph to reflect the changes. - // - bool runOnModule(Module &M) override; - }; + // run - Do the EliminateAvailableExternally pass on the specified module, + // optionally updating the specified callgraph to reflect the changes. + // + bool runOnModule(Module &M) override; +}; } char EliminateAvailableExternally::ID = 0; @@ -54,30 +53,31 @@ bool EliminateAvailableExternally::runOnModule(Module &M) { bool Changed = false; // Drop initializers of available externally global variables. - for (Module::global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) { - if (!I->hasAvailableExternallyLinkage()) + for (GlobalVariable &GV : M.globals()) { + if (!GV.hasAvailableExternallyLinkage()) continue; - if (I->hasInitializer()) { - Constant *Init = I->getInitializer(); - I->setInitializer(nullptr); + if (GV.hasInitializer()) { + Constant *Init = GV.getInitializer(); + GV.setInitializer(nullptr); if (isSafeToDestroyConstant(Init)) Init->destroyConstant(); } - I->removeDeadConstantUsers(); - I->setLinkage(GlobalValue::ExternalLinkage); + GV.removeDeadConstantUsers(); + GV.setLinkage(GlobalValue::ExternalLinkage); NumVariables++; + Changed = true; } // Drop the bodies of available externally functions. - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { - if (!I->hasAvailableExternallyLinkage()) + for (Function &F : M) { + if (!F.hasAvailableExternallyLinkage()) continue; - if (!I->isDeclaration()) + if (!F.isDeclaration()) // This will set the linkage to external - I->deleteBody(); - I->removeDeadConstantUsers(); + F.deleteBody(); + F.removeDeadConstantUsers(); NumFunctions++; + Changed = true; } return Changed; diff --git a/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp b/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp index b9462f2..1a3b925 100644 --- a/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp +++ b/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp @@ -83,7 +83,7 @@ namespace { for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) { bool Delete = - deleteStuff == (bool)Named.count(I) && !I->isDeclaration(); + deleteStuff == (bool)Named.count(&*I) && !I->isDeclaration(); if (!Delete) { if (I->hasAvailableExternallyLinkage()) continue; @@ -103,7 +103,7 @@ namespace { // Visit the Functions. for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { bool Delete = - deleteStuff == (bool)Named.count(I) && !I->isDeclaration(); + deleteStuff == (bool)Named.count(&*I) && !I->isDeclaration(); if (!Delete) { if (I->hasAvailableExternallyLinkage()) continue; @@ -124,7 +124,7 @@ namespace { Module::alias_iterator CurI = I; ++I; - bool Delete = deleteStuff == (bool)Named.count(CurI); + bool Delete = deleteStuff == (bool)Named.count(&*CurI); makeVisible(*CurI, Delete); if (Delete) { @@ -143,7 +143,7 @@ namespace { } CurI->replaceAllUsesWith(Declaration); - delete CurI; + delete &*CurI; } } diff --git a/contrib/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp new file mode 100644 index 0000000..816291d --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp @@ -0,0 +1,121 @@ +//===- ForceFunctionAttrs.cpp - Force function attrs for debugging --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO/ForceFunctionAttrs.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "forceattrs" + +static cl::list<std::string> + ForceAttributes("force-attribute", cl::Hidden, + cl::desc("Add an attribute to a function. This should be a " + "pair of 'function-name:attribute-name', for " + "example -force-add-attribute=foo:noinline. This " + "option can be specified multiple times.")); + +static Attribute::AttrKind parseAttrKind(StringRef Kind) { + return StringSwitch<Attribute::AttrKind>(Kind) + .Case("alwaysinline", Attribute::AlwaysInline) + .Case("builtin", Attribute::Builtin) + .Case("cold", Attribute::Cold) + .Case("convergent", Attribute::Convergent) + .Case("inlinehint", Attribute::InlineHint) + .Case("jumptable", Attribute::JumpTable) + .Case("minsize", Attribute::MinSize) + .Case("naked", Attribute::Naked) + .Case("nobuiltin", Attribute::NoBuiltin) + .Case("noduplicate", Attribute::NoDuplicate) + .Case("noimplicitfloat", Attribute::NoImplicitFloat) + .Case("noinline", Attribute::NoInline) + .Case("nonlazybind", Attribute::NonLazyBind) + .Case("noredzone", Attribute::NoRedZone) + .Case("noreturn", Attribute::NoReturn) + .Case("norecurse", Attribute::NoRecurse) + .Case("nounwind", Attribute::NoUnwind) + .Case("optnone", Attribute::OptimizeNone) + .Case("optsize", Attribute::OptimizeForSize) + .Case("readnone", Attribute::ReadNone) + .Case("readonly", Attribute::ReadOnly) + .Case("argmemonly", Attribute::ArgMemOnly) + .Case("returns_twice", Attribute::ReturnsTwice) + .Case("safestack", Attribute::SafeStack) + .Case("sanitize_address", Attribute::SanitizeAddress) + .Case("sanitize_memory", Attribute::SanitizeMemory) + .Case("sanitize_thread", Attribute::SanitizeThread) + .Case("ssp", Attribute::StackProtect) + .Case("sspreq", Attribute::StackProtectReq) + .Case("sspstrong", Attribute::StackProtectStrong) + .Case("uwtable", Attribute::UWTable) + .Default(Attribute::None); +} + +/// If F has any forced attributes given on the command line, add them. +static void addForcedAttributes(Function &F) { + for (auto &S : ForceAttributes) { + auto KV = StringRef(S).split(':'); + if (KV.first != F.getName()) + continue; + + auto Kind = parseAttrKind(KV.second); + if (Kind == Attribute::None) { + DEBUG(dbgs() << "ForcedAttribute: " << KV.second + << " unknown or not handled!\n"); + continue; + } + if (F.hasFnAttribute(Kind)) + continue; + F.addFnAttr(Kind); + } +} + +PreservedAnalyses ForceFunctionAttrsPass::run(Module &M) { + if (ForceAttributes.empty()) + return PreservedAnalyses::all(); + + for (Function &F : M.functions()) + addForcedAttributes(F); + + // Just conservatively invalidate analyses, this isn't likely to be important. + return PreservedAnalyses::none(); +} + +namespace { +struct ForceFunctionAttrsLegacyPass : public ModulePass { + static char ID; // Pass identification, replacement for typeid + ForceFunctionAttrsLegacyPass() : ModulePass(ID) { + initializeForceFunctionAttrsLegacyPassPass( + *PassRegistry::getPassRegistry()); + } + + bool runOnModule(Module &M) override { + if (ForceAttributes.empty()) + return false; + + for (Function &F : M.functions()) + addForcedAttributes(F); + + // Conservatively assume we changed something. + return true; + } +}; +} + +char ForceFunctionAttrsLegacyPass::ID = 0; +INITIALIZE_PASS(ForceFunctionAttrsLegacyPass, "forceattrs", + "Force set function attributes", false, false) + +Pass *llvm::createForceFunctionAttrsLegacyPass() { + return new ForceFunctionAttrsLegacyPass(); +} diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp index bb5e64a..6dcfb3f 100644 --- a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -23,14 +23,21 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Analysis/TargetLibraryInfo.h" using namespace llvm; @@ -42,230 +49,191 @@ STATISTIC(NumNoCapture, "Number of arguments marked nocapture"); STATISTIC(NumReadNoneArg, "Number of arguments marked readnone"); STATISTIC(NumReadOnlyArg, "Number of arguments marked readonly"); STATISTIC(NumNoAlias, "Number of function returns marked noalias"); -STATISTIC(NumAnnotated, "Number of attributes added to library functions"); +STATISTIC(NumNonNullReturn, "Number of function returns marked nonnull"); +STATISTIC(NumNoRecurse, "Number of functions marked as norecurse"); namespace { - struct FunctionAttrs : public CallGraphSCCPass { - static char ID; // Pass identification, replacement for typeid - FunctionAttrs() : CallGraphSCCPass(ID), AA(nullptr) { - initializeFunctionAttrsPass(*PassRegistry::getPassRegistry()); - } - - // runOnSCC - Analyze the SCC, performing the transformation if possible. - bool runOnSCC(CallGraphSCC &SCC) override; - - // AddReadAttrs - Deduce readonly/readnone attributes for the SCC. - bool AddReadAttrs(const CallGraphSCC &SCC); - - // AddArgumentAttrs - Deduce nocapture attributes for the SCC. - bool AddArgumentAttrs(const CallGraphSCC &SCC); - - // IsFunctionMallocLike - Does this function allocate new memory? - bool IsFunctionMallocLike(Function *F, - SmallPtrSet<Function*, 8> &) const; - - // AddNoAliasAttrs - Deduce noalias attributes for the SCC. - bool AddNoAliasAttrs(const CallGraphSCC &SCC); - - // Utility methods used by inferPrototypeAttributes to add attributes - // and maintain annotation statistics. - - void setDoesNotAccessMemory(Function &F) { - if (!F.doesNotAccessMemory()) { - F.setDoesNotAccessMemory(); - ++NumAnnotated; - } - } - - void setOnlyReadsMemory(Function &F) { - if (!F.onlyReadsMemory()) { - F.setOnlyReadsMemory(); - ++NumAnnotated; - } - } - - void setDoesNotThrow(Function &F) { - if (!F.doesNotThrow()) { - F.setDoesNotThrow(); - ++NumAnnotated; - } - } - - void setDoesNotCapture(Function &F, unsigned n) { - if (!F.doesNotCapture(n)) { - F.setDoesNotCapture(n); - ++NumAnnotated; - } - } - - void setOnlyReadsMemory(Function &F, unsigned n) { - if (!F.onlyReadsMemory(n)) { - F.setOnlyReadsMemory(n); - ++NumAnnotated; - } - } - - void setDoesNotAlias(Function &F, unsigned n) { - if (!F.doesNotAlias(n)) { - F.setDoesNotAlias(n); - ++NumAnnotated; - } - } - - // inferPrototypeAttributes - Analyze the name and prototype of the - // given function and set any applicable attributes. Returns true - // if any attributes were set and false otherwise. - bool inferPrototypeAttributes(Function &F); +typedef SmallSetVector<Function *, 8> SCCNodeSet; +} - // annotateLibraryCalls - Adds attributes to well-known standard library - // call declarations. - bool annotateLibraryCalls(const CallGraphSCC &SCC); +namespace { +struct FunctionAttrs : public CallGraphSCCPass { + static char ID; // Pass identification, replacement for typeid + FunctionAttrs() : CallGraphSCCPass(ID) { + initializeFunctionAttrsPass(*PassRegistry::getPassRegistry()); + } - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); - AU.addRequired<AliasAnalysis>(); - AU.addRequired<TargetLibraryInfoWrapperPass>(); - CallGraphSCCPass::getAnalysisUsage(AU); - } + bool runOnSCC(CallGraphSCC &SCC) override; + bool doInitialization(CallGraph &CG) override { + Revisit.clear(); + return false; + } + bool doFinalization(CallGraph &CG) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired<AssumptionCacheTracker>(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); + CallGraphSCCPass::getAnalysisUsage(AU); + } - private: - AliasAnalysis *AA; - TargetLibraryInfo *TLI; - }; +private: + TargetLibraryInfo *TLI; + SmallVector<WeakVH,16> Revisit; +}; } char FunctionAttrs::ID = 0; INITIALIZE_PASS_BEGIN(FunctionAttrs, "functionattrs", - "Deduce function attributes", false, false) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) + "Deduce function attributes", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(FunctionAttrs, "functionattrs", - "Deduce function attributes", false, false) + "Deduce function attributes", false, false) Pass *llvm::createFunctionAttrsPass() { return new FunctionAttrs(); } +namespace { +/// The three kinds of memory access relevant to 'readonly' and +/// 'readnone' attributes. +enum MemoryAccessKind { + MAK_ReadNone = 0, + MAK_ReadOnly = 1, + MAK_MayWrite = 2 +}; +} -/// AddReadAttrs - Deduce readonly/readnone attributes for the SCC. -bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) { - SmallPtrSet<Function*, 8> SCCNodes; - - // Fill SCCNodes with the elements of the SCC. Used for quickly - // looking up whether a given CallGraphNode is in this SCC. - for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) - SCCNodes.insert((*I)->getFunction()); +static MemoryAccessKind checkFunctionMemoryAccess(Function &F, AAResults &AAR, + const SCCNodeSet &SCCNodes) { + FunctionModRefBehavior MRB = AAR.getModRefBehavior(&F); + if (MRB == FMRB_DoesNotAccessMemory) + // Already perfect! + return MAK_ReadNone; + + // Definitions with weak linkage may be overridden at linktime with + // something that writes memory, so treat them like declarations. + if (F.isDeclaration() || F.mayBeOverridden()) { + if (AliasAnalysis::onlyReadsMemory(MRB)) + return MAK_ReadOnly; + + // Conservatively assume it writes to memory. + return MAK_MayWrite; + } - // Check if any of the functions in the SCC read or write memory. If they - // write memory then they can't be marked readnone or readonly. + // Scan the function body for instructions that may read or write memory. bool ReadsMemory = false; - for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { - Function *F = (*I)->getFunction(); - - if (!F || F->hasFnAttribute(Attribute::OptimizeNone)) - // External node or node we don't want to optimize - assume it may write - // memory and give up. - return false; + for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) { + Instruction *I = &*II; + + // Some instructions can be ignored even if they read or write memory. + // Detect these now, skipping to the next instruction if one is found. + CallSite CS(cast<Value>(I)); + if (CS) { + // Ignore calls to functions in the same SCC. + if (CS.getCalledFunction() && SCCNodes.count(CS.getCalledFunction())) + continue; + FunctionModRefBehavior MRB = AAR.getModRefBehavior(CS); - AliasAnalysis::ModRefBehavior MRB = AA->getModRefBehavior(F); - if (MRB == AliasAnalysis::DoesNotAccessMemory) - // Already perfect! - continue; + // If the call doesn't access memory, we're done. + if (!(MRB & MRI_ModRef)) + continue; - // Definitions with weak linkage may be overridden at linktime with - // something that writes memory, so treat them like declarations. - if (F->isDeclaration() || F->mayBeOverridden()) { - if (!AliasAnalysis::onlyReadsMemory(MRB)) - // May write memory. Just give up. - return false; + if (!AliasAnalysis::onlyAccessesArgPointees(MRB)) { + // The call could access any memory. If that includes writes, give up. + if (MRB & MRI_Mod) + return MAK_MayWrite; + // If it reads, note it. + if (MRB & MRI_Ref) + ReadsMemory = true; + continue; + } - ReadsMemory = true; - continue; - } + // Check whether all pointer arguments point to local memory, and + // ignore calls that only access local memory. + for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end(); + CI != CE; ++CI) { + Value *Arg = *CI; + if (!Arg->getType()->isPtrOrPtrVectorTy()) + continue; - // Scan the function body for instructions that may read or write memory. - for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) { - Instruction *I = &*II; + AAMDNodes AAInfo; + I->getAAMetadata(AAInfo); + MemoryLocation Loc(Arg, MemoryLocation::UnknownSize, AAInfo); - // Some instructions can be ignored even if they read or write memory. - // Detect these now, skipping to the next instruction if one is found. - CallSite CS(cast<Value>(I)); - if (CS) { - // Ignore calls to functions in the same SCC. - if (CS.getCalledFunction() && SCCNodes.count(CS.getCalledFunction())) + // Skip accesses to local or constant memory as they don't impact the + // externally visible mod/ref behavior. + if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) continue; - AliasAnalysis::ModRefBehavior MRB = AA->getModRefBehavior(CS); - // If the call doesn't access arbitrary memory, we may be able to - // figure out something. - if (AliasAnalysis::onlyAccessesArgPointees(MRB)) { - // If the call does access argument pointees, check each argument. - if (AliasAnalysis::doesAccessArgPointees(MRB)) - // Check whether all pointer arguments point to local memory, and - // ignore calls that only access local memory. - for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end(); - CI != CE; ++CI) { - Value *Arg = *CI; - if (Arg->getType()->isPointerTy()) { - AAMDNodes AAInfo; - I->getAAMetadata(AAInfo); - - MemoryLocation Loc(Arg, MemoryLocation::UnknownSize, AAInfo); - if (!AA->pointsToConstantMemory(Loc, /*OrLocal=*/true)) { - if (MRB & AliasAnalysis::Mod) - // Writes non-local memory. Give up. - return false; - if (MRB & AliasAnalysis::Ref) - // Ok, it reads non-local memory. - ReadsMemory = true; - } - } - } - continue; - } - // The call could access any memory. If that includes writes, give up. - if (MRB & AliasAnalysis::Mod) - return false; - // If it reads, note it. - if (MRB & AliasAnalysis::Ref) + + if (MRB & MRI_Mod) + // Writes non-local memory. Give up. + return MAK_MayWrite; + if (MRB & MRI_Ref) + // Ok, it reads non-local memory. ReadsMemory = true; - continue; - } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) { - // Ignore non-volatile loads from local memory. (Atomic is okay here.) - if (!LI->isVolatile()) { - MemoryLocation Loc = MemoryLocation::get(LI); - if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true)) - continue; - } - } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { - // Ignore non-volatile stores to local memory. (Atomic is okay here.) - if (!SI->isVolatile()) { - MemoryLocation Loc = MemoryLocation::get(SI); - if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true)) - continue; - } - } else if (VAArgInst *VI = dyn_cast<VAArgInst>(I)) { - // Ignore vaargs on local memory. - MemoryLocation Loc = MemoryLocation::get(VI); - if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true)) + } + continue; + } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) { + // Ignore non-volatile loads from local memory. (Atomic is okay here.) + if (!LI->isVolatile()) { + MemoryLocation Loc = MemoryLocation::get(LI); + if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) + continue; + } + } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { + // Ignore non-volatile stores to local memory. (Atomic is okay here.) + if (!SI->isVolatile()) { + MemoryLocation Loc = MemoryLocation::get(SI); + if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) continue; } + } else if (VAArgInst *VI = dyn_cast<VAArgInst>(I)) { + // Ignore vaargs on local memory. + MemoryLocation Loc = MemoryLocation::get(VI); + if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) + continue; + } - // Any remaining instructions need to be taken seriously! Check if they - // read or write memory. - if (I->mayWriteToMemory()) - // Writes memory. Just give up. - return false; + // Any remaining instructions need to be taken seriously! Check if they + // read or write memory. + if (I->mayWriteToMemory()) + // Writes memory. Just give up. + return MAK_MayWrite; + + // If this instruction may read memory, remember that. + ReadsMemory |= I->mayReadFromMemory(); + } + + return ReadsMemory ? MAK_ReadOnly : MAK_ReadNone; +} - // If this instruction may read memory, remember that. - ReadsMemory |= I->mayReadFromMemory(); +/// Deduce readonly/readnone attributes for the SCC. +template <typename AARGetterT> +static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT AARGetter) { + // Check if any of the functions in the SCC read or write memory. If they + // write memory then they can't be marked readnone or readonly. + bool ReadsMemory = false; + for (Function *F : SCCNodes) { + // Call the callable parameter to look up AA results for this function. + AAResults &AAR = AARGetter(*F); + + switch (checkFunctionMemoryAccess(*F, AAR, SCCNodes)) { + case MAK_MayWrite: + return false; + case MAK_ReadOnly: + ReadsMemory = true; + break; + case MAK_ReadNone: + // Nothing to do! + break; } } // Success! Functions in this SCC do not access memory, or only read memory. // Give them the appropriate attribute. bool MadeChange = false; - for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { - Function *F = (*I)->getFunction(); - + for (Function *F : SCCNodes) { if (F->doesNotAccessMemory()) // Already perfect! continue; @@ -278,11 +246,10 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) { // Clear out any existing attributes. AttrBuilder B; - B.addAttribute(Attribute::ReadOnly) - .addAttribute(Attribute::ReadNone); - F->removeAttributes(AttributeSet::FunctionIndex, - AttributeSet::get(F->getContext(), - AttributeSet::FunctionIndex, B)); + B.addAttribute(Attribute::ReadOnly).addAttribute(Attribute::ReadNone); + F->removeAttributes( + AttributeSet::FunctionIndex, + AttributeSet::get(F->getContext(), AttributeSet::FunctionIndex, B)); // Add in the new attribute. F->addAttribute(AttributeSet::FunctionIndex, @@ -298,124 +265,140 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) { } namespace { - // For a given pointer Argument, this retains a list of Arguments of functions - // in the same SCC that the pointer data flows into. We use this to build an - // SCC of the arguments. - struct ArgumentGraphNode { - Argument *Definition; - SmallVector<ArgumentGraphNode*, 4> Uses; - }; - - class ArgumentGraph { - // We store pointers to ArgumentGraphNode objects, so it's important that - // that they not move around upon insert. - typedef std::map<Argument*, ArgumentGraphNode> ArgumentMapTy; +/// For a given pointer Argument, this retains a list of Arguments of functions +/// in the same SCC that the pointer data flows into. We use this to build an +/// SCC of the arguments. +struct ArgumentGraphNode { + Argument *Definition; + SmallVector<ArgumentGraphNode *, 4> Uses; +}; + +class ArgumentGraph { + // We store pointers to ArgumentGraphNode objects, so it's important that + // that they not move around upon insert. + typedef std::map<Argument *, ArgumentGraphNode> ArgumentMapTy; + + ArgumentMapTy ArgumentMap; + + // There is no root node for the argument graph, in fact: + // void f(int *x, int *y) { if (...) f(x, y); } + // is an example where the graph is disconnected. The SCCIterator requires a + // single entry point, so we maintain a fake ("synthetic") root node that + // uses every node. Because the graph is directed and nothing points into + // the root, it will not participate in any SCCs (except for its own). + ArgumentGraphNode SyntheticRoot; + +public: + ArgumentGraph() { SyntheticRoot.Definition = nullptr; } + + typedef SmallVectorImpl<ArgumentGraphNode *>::iterator iterator; + + iterator begin() { return SyntheticRoot.Uses.begin(); } + iterator end() { return SyntheticRoot.Uses.end(); } + ArgumentGraphNode *getEntryNode() { return &SyntheticRoot; } + + ArgumentGraphNode *operator[](Argument *A) { + ArgumentGraphNode &Node = ArgumentMap[A]; + Node.Definition = A; + SyntheticRoot.Uses.push_back(&Node); + return &Node; + } +}; - ArgumentMapTy ArgumentMap; +/// This tracker checks whether callees are in the SCC, and if so it does not +/// consider that a capture, instead adding it to the "Uses" list and +/// continuing with the analysis. +struct ArgumentUsesTracker : public CaptureTracker { + ArgumentUsesTracker(const SCCNodeSet &SCCNodes) + : Captured(false), SCCNodes(SCCNodes) {} - // There is no root node for the argument graph, in fact: - // void f(int *x, int *y) { if (...) f(x, y); } - // is an example where the graph is disconnected. The SCCIterator requires a - // single entry point, so we maintain a fake ("synthetic") root node that - // uses every node. Because the graph is directed and nothing points into - // the root, it will not participate in any SCCs (except for its own). - ArgumentGraphNode SyntheticRoot; + void tooManyUses() override { Captured = true; } - public: - ArgumentGraph() { SyntheticRoot.Definition = nullptr; } + bool captured(const Use *U) override { + CallSite CS(U->getUser()); + if (!CS.getInstruction()) { + Captured = true; + return true; + } - typedef SmallVectorImpl<ArgumentGraphNode*>::iterator iterator; + Function *F = CS.getCalledFunction(); + if (!F || F->isDeclaration() || F->mayBeOverridden() || + !SCCNodes.count(F)) { + Captured = true; + return true; + } - iterator begin() { return SyntheticRoot.Uses.begin(); } - iterator end() { return SyntheticRoot.Uses.end(); } - ArgumentGraphNode *getEntryNode() { return &SyntheticRoot; } + // Note: the callee and the two successor blocks *follow* the argument + // operands. This means there is no need to adjust UseIndex to account for + // these. - ArgumentGraphNode *operator[](Argument *A) { - ArgumentGraphNode &Node = ArgumentMap[A]; - Node.Definition = A; - SyntheticRoot.Uses.push_back(&Node); - return &Node; - } - }; + unsigned UseIndex = + std::distance(const_cast<const Use *>(CS.arg_begin()), U); - // This tracker checks whether callees are in the SCC, and if so it does not - // consider that a capture, instead adding it to the "Uses" list and - // continuing with the analysis. - struct ArgumentUsesTracker : public CaptureTracker { - ArgumentUsesTracker(const SmallPtrSet<Function*, 8> &SCCNodes) - : Captured(false), SCCNodes(SCCNodes) {} + assert(UseIndex < CS.data_operands_size() && + "Indirect function calls should have been filtered above!"); - void tooManyUses() override { Captured = true; } + if (UseIndex >= CS.getNumArgOperands()) { + // Data operand, but not a argument operand -- must be a bundle operand + assert(CS.hasOperandBundles() && "Must be!"); - bool captured(const Use *U) override { - CallSite CS(U->getUser()); - if (!CS.getInstruction()) { Captured = true; return true; } + // CaptureTracking told us that we're being captured by an operand bundle + // use. In this case it does not matter if the callee is within our SCC + // or not -- we've been captured in some unknown way, and we have to be + // conservative. + Captured = true; + return true; + } - Function *F = CS.getCalledFunction(); - if (!F || !SCCNodes.count(F)) { Captured = true; return true; } - - bool Found = false; - Function::arg_iterator AI = F->arg_begin(), AE = F->arg_end(); - for (CallSite::arg_iterator PI = CS.arg_begin(), PE = CS.arg_end(); - PI != PE; ++PI, ++AI) { - if (AI == AE) { - assert(F->isVarArg() && "More params than args in non-varargs call"); - Captured = true; - return true; - } - if (PI == U) { - Uses.push_back(AI); - Found = true; - break; - } - } - assert(Found && "Capturing call-site captured nothing?"); - (void)Found; - return false; + if (UseIndex >= F->arg_size()) { + assert(F->isVarArg() && "More params than args in non-varargs call"); + Captured = true; + return true; } - bool Captured; // True only if certainly captured (used outside our SCC). - SmallVector<Argument*, 4> Uses; // Uses within our SCC. + Uses.push_back(&*std::next(F->arg_begin(), UseIndex)); + return false; + } - const SmallPtrSet<Function*, 8> &SCCNodes; - }; + bool Captured; // True only if certainly captured (used outside our SCC). + SmallVector<Argument *, 4> Uses; // Uses within our SCC. + + const SCCNodeSet &SCCNodes; +}; } namespace llvm { - template<> struct GraphTraits<ArgumentGraphNode*> { - typedef ArgumentGraphNode NodeType; - typedef SmallVectorImpl<ArgumentGraphNode*>::iterator ChildIteratorType; +template <> struct GraphTraits<ArgumentGraphNode *> { + typedef ArgumentGraphNode NodeType; + typedef SmallVectorImpl<ArgumentGraphNode *>::iterator ChildIteratorType; - static inline NodeType *getEntryNode(NodeType *A) { return A; } - static inline ChildIteratorType child_begin(NodeType *N) { - return N->Uses.begin(); - } - static inline ChildIteratorType child_end(NodeType *N) { - return N->Uses.end(); - } - }; - template<> struct GraphTraits<ArgumentGraph*> - : public GraphTraits<ArgumentGraphNode*> { - static NodeType *getEntryNode(ArgumentGraph *AG) { - return AG->getEntryNode(); - } - static ChildIteratorType nodes_begin(ArgumentGraph *AG) { - return AG->begin(); - } - static ChildIteratorType nodes_end(ArgumentGraph *AG) { - return AG->end(); - } - }; + static inline NodeType *getEntryNode(NodeType *A) { return A; } + static inline ChildIteratorType child_begin(NodeType *N) { + return N->Uses.begin(); + } + static inline ChildIteratorType child_end(NodeType *N) { + return N->Uses.end(); + } +}; +template <> +struct GraphTraits<ArgumentGraph *> : public GraphTraits<ArgumentGraphNode *> { + static NodeType *getEntryNode(ArgumentGraph *AG) { + return AG->getEntryNode(); + } + static ChildIteratorType nodes_begin(ArgumentGraph *AG) { + return AG->begin(); + } + static ChildIteratorType nodes_end(ArgumentGraph *AG) { return AG->end(); } +}; } -// Returns Attribute::None, Attribute::ReadOnly or Attribute::ReadNone. +/// Returns Attribute::None, Attribute::ReadOnly or Attribute::ReadNone. static Attribute::AttrKind determinePointerReadAttrs(Argument *A, - const SmallPtrSet<Argument*, 8> &SCCNodes) { - - SmallVector<Use*, 32> Worklist; - SmallSet<Use*, 32> Visited; - int Count = 0; + const SmallPtrSet<Argument *, 8> &SCCNodes) { + + SmallVector<Use *, 32> Worklist; + SmallSet<Use *, 32> Visited; // inalloca arguments are always clobbered by the call. if (A->hasInAllocaAttr()) @@ -425,9 +408,6 @@ determinePointerReadAttrs(Argument *A, // We don't need to track IsWritten. If A is written to, return immediately. for (Use &U : A->uses()) { - if (Count++ >= 20) - return Attribute::None; - Visited.insert(&U); Worklist.push_back(&U); } @@ -435,7 +415,6 @@ determinePointerReadAttrs(Argument *A, while (!Worklist.empty()) { Use *U = Worklist.pop_back_val(); Instruction *I = cast<Instruction>(U->getUser()); - Value *V = U->get(); switch (I->getOpcode()) { case Instruction::BitCast: @@ -479,24 +458,44 @@ determinePointerReadAttrs(Argument *A, return Attribute::None; } - Function::arg_iterator AI = F->arg_begin(), AE = F->arg_end(); - CallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end(); - for (CallSite::arg_iterator A = B; A != E; ++A, ++AI) { - if (A->get() == V) { - if (AI == AE) { - assert(F->isVarArg() && - "More params than args in non-varargs call."); - return Attribute::None; - } - Captures &= !CS.doesNotCapture(A - B); - if (SCCNodes.count(AI)) - continue; - if (!CS.onlyReadsMemory() && !CS.onlyReadsMemory(A - B)) - return Attribute::None; - if (!CS.doesNotAccessMemory(A - B)) - IsRead = true; - } + // Note: the callee and the two successor blocks *follow* the argument + // operands. This means there is no need to adjust UseIndex to account + // for these. + + unsigned UseIndex = std::distance(CS.arg_begin(), U); + + // U cannot be the callee operand use: since we're exploring the + // transitive uses of an Argument, having such a use be a callee would + // imply the CallSite is an indirect call or invoke; and we'd take the + // early exit above. + assert(UseIndex < CS.data_operands_size() && + "Data operand use expected!"); + + bool IsOperandBundleUse = UseIndex >= CS.getNumArgOperands(); + + if (UseIndex >= F->arg_size() && !IsOperandBundleUse) { + assert(F->isVarArg() && "More params than args in non-varargs call"); + return Attribute::None; } + + Captures &= !CS.doesNotCapture(UseIndex); + + // Since the optimizer (by design) cannot see the data flow corresponding + // to a operand bundle use, these cannot participate in the optimistic SCC + // analysis. Instead, we model the operand bundle uses as arguments in + // call to a function external to the SCC. + if (!SCCNodes.count(&*std::next(F->arg_begin(), UseIndex)) || + IsOperandBundleUse) { + + // The accessors used on CallSite here do the right thing for calls and + // invokes with operand bundles. + + if (!CS.onlyReadsMemory() && !CS.onlyReadsMemory(UseIndex)) + return Attribute::None; + if (!CS.doesNotAccessMemory(UseIndex)) + IsRead = true; + } + AddUsersToWorklistIfCapturing(); break; } @@ -517,21 +516,10 @@ determinePointerReadAttrs(Argument *A, return IsRead ? Attribute::ReadOnly : Attribute::ReadNone; } -/// AddArgumentAttrs - Deduce nocapture attributes for the SCC. -bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) { +/// Deduce nocapture attributes for the SCC. +static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) { bool Changed = false; - SmallPtrSet<Function*, 8> SCCNodes; - - // Fill SCCNodes with the elements of the SCC. Used for quickly - // looking up whether a given CallGraphNode is in this SCC. - for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { - Function *F = (*I)->getFunction(); - if (F && !F->isDeclaration() && !F->mayBeOverridden() && - !F->hasFnAttribute(Attribute::OptimizeNone)) - SCCNodes.insert(F); - } - ArgumentGraph AG; AttrBuilder B; @@ -539,14 +527,7 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) { // Check each function in turn, determining which pointer arguments are not // captured. - for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { - Function *F = (*I)->getFunction(); - - if (!F || F->hasFnAttribute(Attribute::OptimizeNone)) - // External node or function we're trying not to optimize - only a problem - // for arguments that we pass to it. - continue; - + for (Function *F : SCCNodes) { // Definitions with weak linkage may be overridden at linktime with // something that captures pointers, so treat them like declarations. if (F->isDeclaration() || F->mayBeOverridden()) @@ -556,8 +537,8 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) { // a value can't capture arguments. Don't analyze them. if (F->onlyReadsMemory() && F->doesNotThrow() && F->getReturnType()->isVoidTy()) { - for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); - A != E; ++A) { + for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A != E; + ++A) { if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr()) { A->addAttr(AttributeSet::get(F->getContext(), A->getArgNo() + 1, B)); ++NumNoCapture; @@ -567,26 +548,30 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) { continue; } - for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); - A != E; ++A) { - if (!A->getType()->isPointerTy()) continue; + for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A != E; + ++A) { + if (!A->getType()->isPointerTy()) + continue; bool HasNonLocalUses = false; if (!A->hasNoCaptureAttr()) { ArgumentUsesTracker Tracker(SCCNodes); - PointerMayBeCaptured(A, &Tracker); + PointerMayBeCaptured(&*A, &Tracker); if (!Tracker.Captured) { if (Tracker.Uses.empty()) { // If it's trivially not captured, mark it nocapture now. - A->addAttr(AttributeSet::get(F->getContext(), A->getArgNo()+1, B)); + A->addAttr( + AttributeSet::get(F->getContext(), A->getArgNo() + 1, B)); ++NumNoCapture; Changed = true; } else { // If it's not trivially captured and not trivially not captured, // then it must be calling into another function in our SCC. Save // its particulars for Argument-SCC analysis later. - ArgumentGraphNode *Node = AG[A]; - for (SmallVectorImpl<Argument*>::iterator UI = Tracker.Uses.begin(), - UE = Tracker.Uses.end(); UI != UE; ++UI) { + ArgumentGraphNode *Node = AG[&*A]; + for (SmallVectorImpl<Argument *>::iterator + UI = Tracker.Uses.begin(), + UE = Tracker.Uses.end(); + UI != UE; ++UI) { Node->Uses.push_back(AG[*UI]); if (*UI != A) HasNonLocalUses = true; @@ -600,9 +585,9 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) { // Note that we don't allow any calls at all here, or else our result // will be dependent on the iteration order through the functions in the // SCC. - SmallPtrSet<Argument*, 8> Self; - Self.insert(A); - Attribute::AttrKind R = determinePointerReadAttrs(A, Self); + SmallPtrSet<Argument *, 8> Self; + Self.insert(&*A); + Attribute::AttrKind R = determinePointerReadAttrs(&*A, Self); if (R != Attribute::None) { AttrBuilder B; B.addAttribute(R); @@ -621,10 +606,11 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) { // made. If the definition doesn't have a 'nocapture' attribute by now, it // captures. - for (scc_iterator<ArgumentGraph*> I = scc_begin(&AG); !I.isAtEnd(); ++I) { + for (scc_iterator<ArgumentGraph *> I = scc_begin(&AG); !I.isAtEnd(); ++I) { const std::vector<ArgumentGraphNode *> &ArgumentSCC = *I; if (ArgumentSCC.size() == 1) { - if (!ArgumentSCC[0]->Definition) continue; // synthetic root node + if (!ArgumentSCC[0]->Definition) + continue; // synthetic root node // eg. "void f(int* x) { if (...) f(x); }" if (ArgumentSCC[0]->Uses.size() == 1 && @@ -646,9 +632,10 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) { SCCCaptured = true; } } - if (SCCCaptured) continue; + if (SCCCaptured) + continue; - SmallPtrSet<Argument*, 8> ArgumentSCCNodes; + SmallPtrSet<Argument *, 8> ArgumentSCCNodes; // Fill ArgumentSCCNodes with the elements of the ArgumentSCC. Used for // quickly looking up whether a given Argument is in this ArgumentSCC. for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end(); I != E; ++I) { @@ -658,8 +645,9 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) { for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end(); I != E && !SCCCaptured; ++I) { ArgumentGraphNode *N = *I; - for (SmallVectorImpl<ArgumentGraphNode*>::iterator UI = N->Uses.begin(), - UE = N->Uses.end(); UI != UE; ++UI) { + for (SmallVectorImpl<ArgumentGraphNode *>::iterator UI = N->Uses.begin(), + UE = N->Uses.end(); + UI != UE; ++UI) { Argument *A = (*UI)->Definition; if (A->hasNoCaptureAttr() || ArgumentSCCNodes.count(A)) continue; @@ -667,7 +655,8 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) { break; } } - if (SCCCaptured) continue; + if (SCCCaptured) + continue; for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) { Argument *A = ArgumentSCC[i]->Definition; @@ -704,8 +693,7 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) { if (ReadAttr != Attribute::None) { AttrBuilder B, R; B.addAttribute(ReadAttr); - R.addAttribute(Attribute::ReadOnly) - .addAttribute(Attribute::ReadNone); + R.addAttribute(Attribute::ReadOnly).addAttribute(Attribute::ReadNone); for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) { Argument *A = ArgumentSCC[i]->Definition; // Clear out existing readonly/readnone attributes @@ -720,10 +708,11 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) { return Changed; } -/// IsFunctionMallocLike - A function is malloc-like if it returns either null -/// or a pointer that doesn't alias any other pointer visible to the caller. -bool FunctionAttrs::IsFunctionMallocLike(Function *F, - SmallPtrSet<Function*, 8> &SCCNodes) const { +/// Tests whether a function is "malloc-like". +/// +/// A function is "malloc-like" if it returns either null or a pointer that +/// doesn't alias any other pointer visible to the caller. +static bool isFunctionMallocLike(Function *F, const SCCNodeSet &SCCNodes) { SmallSetVector<Value *, 8> FlowsToReturn; for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) if (ReturnInst *Ret = dyn_cast<ReturnInst>(I->getTerminator())) @@ -744,39 +733,38 @@ bool FunctionAttrs::IsFunctionMallocLike(Function *F, if (Instruction *RVI = dyn_cast<Instruction>(RetVal)) switch (RVI->getOpcode()) { - // Extend the analysis by looking upwards. - case Instruction::BitCast: - case Instruction::GetElementPtr: - case Instruction::AddrSpaceCast: - FlowsToReturn.insert(RVI->getOperand(0)); - continue; - case Instruction::Select: { - SelectInst *SI = cast<SelectInst>(RVI); - FlowsToReturn.insert(SI->getTrueValue()); - FlowsToReturn.insert(SI->getFalseValue()); - continue; - } - case Instruction::PHI: { - PHINode *PN = cast<PHINode>(RVI); - for (Value *IncValue : PN->incoming_values()) - FlowsToReturn.insert(IncValue); - continue; - } + // Extend the analysis by looking upwards. + case Instruction::BitCast: + case Instruction::GetElementPtr: + case Instruction::AddrSpaceCast: + FlowsToReturn.insert(RVI->getOperand(0)); + continue; + case Instruction::Select: { + SelectInst *SI = cast<SelectInst>(RVI); + FlowsToReturn.insert(SI->getTrueValue()); + FlowsToReturn.insert(SI->getFalseValue()); + continue; + } + case Instruction::PHI: { + PHINode *PN = cast<PHINode>(RVI); + for (Value *IncValue : PN->incoming_values()) + FlowsToReturn.insert(IncValue); + continue; + } - // Check whether the pointer came from an allocation. - case Instruction::Alloca: + // Check whether the pointer came from an allocation. + case Instruction::Alloca: + break; + case Instruction::Call: + case Instruction::Invoke: { + CallSite CS(RVI); + if (CS.paramHasAttr(0, Attribute::NoAlias)) + break; + if (CS.getCalledFunction() && SCCNodes.count(CS.getCalledFunction())) break; - case Instruction::Call: - case Instruction::Invoke: { - CallSite CS(RVI); - if (CS.paramHasAttr(0, Attribute::NoAlias)) - break; - if (CS.getCalledFunction() && - SCCNodes.count(CS.getCalledFunction())) - break; - } // fall-through - default: - return false; // Did not come from an allocation. + } // fall-through + default: + return false; // Did not come from an allocation. } if (PointerMayBeCaptured(RetVal, false, /*StoreCaptures=*/false)) @@ -786,24 +774,11 @@ bool FunctionAttrs::IsFunctionMallocLike(Function *F, return true; } -/// AddNoAliasAttrs - Deduce noalias attributes for the SCC. -bool FunctionAttrs::AddNoAliasAttrs(const CallGraphSCC &SCC) { - SmallPtrSet<Function*, 8> SCCNodes; - - // Fill SCCNodes with the elements of the SCC. Used for quickly - // looking up whether a given CallGraphNode is in this SCC. - for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) - SCCNodes.insert((*I)->getFunction()); - +/// Deduce noalias attributes for the SCC. +static bool addNoAliasAttrs(const SCCNodeSet &SCCNodes) { // Check each function in turn, determining which functions return noalias // pointers. - for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { - Function *F = (*I)->getFunction(); - - if (!F || F->hasFnAttribute(Attribute::OptimizeNone)) - // External node or node we don't want to optimize - skip it; - return false; - + for (Function *F : SCCNodes) { // Already noalias. if (F->doesNotAlias(0)) continue; @@ -813,18 +788,17 @@ bool FunctionAttrs::AddNoAliasAttrs(const CallGraphSCC &SCC) { if (F->isDeclaration() || F->mayBeOverridden()) return false; - // We annotate noalias return values, which are only applicable to + // We annotate noalias return values, which are only applicable to // pointer types. if (!F->getReturnType()->isPointerTy()) continue; - if (!IsFunctionMallocLike(F, SCCNodes)) + if (!isFunctionMallocLike(F, SCCNodes)) return false; } bool MadeChange = false; - for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { - Function *F = (*I)->getFunction(); + for (Function *F : SCCNodes) { if (F->doesNotAlias(0) || !F->getReturnType()->isPointerTy()) continue; @@ -836,880 +810,249 @@ bool FunctionAttrs::AddNoAliasAttrs(const CallGraphSCC &SCC) { return MadeChange; } -/// inferPrototypeAttributes - Analyze the name and prototype of the -/// given function and set any applicable attributes. Returns true -/// if any attributes were set and false otherwise. -bool FunctionAttrs::inferPrototypeAttributes(Function &F) { - if (F.hasFnAttribute(Attribute::OptimizeNone)) - return false; +/// Tests whether this function is known to not return null. +/// +/// Requires that the function returns a pointer. +/// +/// Returns true if it believes the function will not return a null, and sets +/// \p Speculative based on whether the returned conclusion is a speculative +/// conclusion due to SCC calls. +static bool isReturnNonNull(Function *F, const SCCNodeSet &SCCNodes, + const TargetLibraryInfo &TLI, bool &Speculative) { + assert(F->getReturnType()->isPointerTy() && + "nonnull only meaningful on pointer types"); + Speculative = false; - FunctionType *FTy = F.getFunctionType(); - LibFunc::Func TheLibFunc; - if (!(TLI->getLibFunc(F.getName(), TheLibFunc) && TLI->has(TheLibFunc))) - return false; + SmallSetVector<Value *, 8> FlowsToReturn; + for (BasicBlock &BB : *F) + if (auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator())) + FlowsToReturn.insert(Ret->getReturnValue()); - switch (TheLibFunc) { - case LibFunc::strlen: - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return false; - setOnlyReadsMemory(F); - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - break; - case LibFunc::strchr: - case LibFunc::strrchr: - if (FTy->getNumParams() != 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isIntegerTy()) - return false; - setOnlyReadsMemory(F); - setDoesNotThrow(F); - break; - case LibFunc::strtol: - case LibFunc::strtod: - case LibFunc::strtof: - case LibFunc::strtoul: - case LibFunc::strtoll: - case LibFunc::strtold: - case LibFunc::strtoull: - if (FTy->getNumParams() < 2 || - !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - setOnlyReadsMemory(F, 1); - break; - case LibFunc::strcpy: - case LibFunc::stpcpy: - case LibFunc::strcat: - case LibFunc::strncat: - case LibFunc::strncpy: - case LibFunc::stpncpy: - if (FTy->getNumParams() < 2 || - !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - setOnlyReadsMemory(F, 2); - break; - case LibFunc::strxfrm: - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - setOnlyReadsMemory(F, 2); - break; - case LibFunc::strcmp: //0,1 - case LibFunc::strspn: // 0,1 - case LibFunc::strncmp: // 0,1 - case LibFunc::strcspn: //0,1 - case LibFunc::strcoll: //0,1 - case LibFunc::strcasecmp: // 0,1 - case LibFunc::strncasecmp: // - if (FTy->getNumParams() < 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - setOnlyReadsMemory(F); - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - break; - case LibFunc::strstr: - case LibFunc::strpbrk: - if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) - return false; - setOnlyReadsMemory(F); - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - break; - case LibFunc::strtok: - case LibFunc::strtok_r: - if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - setOnlyReadsMemory(F, 2); - break; - case LibFunc::scanf: - if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setOnlyReadsMemory(F, 1); - break; - case LibFunc::setbuf: - case LibFunc::setvbuf: - if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - break; - case LibFunc::strdup: - case LibFunc::strndup: - if (FTy->getNumParams() < 1 || !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 1); - setOnlyReadsMemory(F, 1); - break; - case LibFunc::stat: - case LibFunc::statvfs: - if (FTy->getNumParams() < 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - setOnlyReadsMemory(F, 1); - break; - case LibFunc::sscanf: - if (FTy->getNumParams() < 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - setOnlyReadsMemory(F, 1); - setOnlyReadsMemory(F, 2); - break; - case LibFunc::sprintf: - if (FTy->getNumParams() < 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - setOnlyReadsMemory(F, 2); - break; - case LibFunc::snprintf: - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(2)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 3); - setOnlyReadsMemory(F, 3); - break; - case LibFunc::setitimer: - if (FTy->getNumParams() != 3 || - !FTy->getParamType(1)->isPointerTy() || - !FTy->getParamType(2)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - setDoesNotCapture(F, 3); - setOnlyReadsMemory(F, 2); - break; - case LibFunc::system: - if (FTy->getNumParams() != 1 || - !FTy->getParamType(0)->isPointerTy()) - return false; - // May throw; "system" is a valid pthread cancellation point. - setDoesNotCapture(F, 1); - setOnlyReadsMemory(F, 1); - break; - case LibFunc::malloc: - if (FTy->getNumParams() != 1 || - !FTy->getReturnType()->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - break; - case LibFunc::memcmp: - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - setOnlyReadsMemory(F); - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - break; - case LibFunc::memchr: - case LibFunc::memrchr: - if (FTy->getNumParams() != 3) - return false; - setOnlyReadsMemory(F); - setDoesNotThrow(F); - break; - case LibFunc::modf: - case LibFunc::modff: - case LibFunc::modfl: - if (FTy->getNumParams() < 2 || - !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - break; - case LibFunc::memcpy: - case LibFunc::memccpy: - case LibFunc::memmove: - if (FTy->getNumParams() < 2 || - !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - setOnlyReadsMemory(F, 2); - break; - case LibFunc::memalign: - if (!FTy->getReturnType()->isPointerTy()) - return false; - setDoesNotAlias(F, 0); - break; - case LibFunc::mkdir: - if (FTy->getNumParams() == 0 || - !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setOnlyReadsMemory(F, 1); - break; - case LibFunc::mktime: - if (FTy->getNumParams() == 0 || - !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - break; - case LibFunc::realloc: - if (FTy->getNumParams() != 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getReturnType()->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 1); - break; - case LibFunc::read: - if (FTy->getNumParams() != 3 || - !FTy->getParamType(1)->isPointerTy()) - return false; - // May throw; "read" is a valid pthread cancellation point. - setDoesNotCapture(F, 2); - break; - case LibFunc::rewind: - if (FTy->getNumParams() < 1 || - !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - break; - case LibFunc::rmdir: - case LibFunc::remove: - case LibFunc::realpath: - if (FTy->getNumParams() < 1 || - !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setOnlyReadsMemory(F, 1); - break; - case LibFunc::rename: - if (FTy->getNumParams() < 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - setOnlyReadsMemory(F, 1); - setOnlyReadsMemory(F, 2); - break; - case LibFunc::readlink: - if (FTy->getNumParams() < 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - setOnlyReadsMemory(F, 1); - break; - case LibFunc::write: - if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy()) - return false; - // May throw; "write" is a valid pthread cancellation point. - setDoesNotCapture(F, 2); - setOnlyReadsMemory(F, 2); - break; - case LibFunc::bcopy: - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - setOnlyReadsMemory(F, 1); - break; - case LibFunc::bcmp: - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setOnlyReadsMemory(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - break; - case LibFunc::bzero: - if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - break; - case LibFunc::calloc: - if (FTy->getNumParams() != 2 || - !FTy->getReturnType()->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - break; - case LibFunc::chmod: - case LibFunc::chown: - if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setOnlyReadsMemory(F, 1); - break; - case LibFunc::ctermid: - case LibFunc::clearerr: - case LibFunc::closedir: - if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - break; - case LibFunc::atoi: - case LibFunc::atol: - case LibFunc::atof: - case LibFunc::atoll: - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setOnlyReadsMemory(F); - setDoesNotCapture(F, 1); - break; - case LibFunc::access: - if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setOnlyReadsMemory(F, 1); - break; - case LibFunc::fopen: - if (FTy->getNumParams() != 2 || - !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - setOnlyReadsMemory(F, 1); - setOnlyReadsMemory(F, 2); - break; - case LibFunc::fdopen: - if (FTy->getNumParams() != 2 || - !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 2); - setOnlyReadsMemory(F, 2); - break; - case LibFunc::feof: - case LibFunc::free: - case LibFunc::fseek: - case LibFunc::ftell: - case LibFunc::fgetc: - case LibFunc::fseeko: - case LibFunc::ftello: - case LibFunc::fileno: - case LibFunc::fflush: - case LibFunc::fclose: - case LibFunc::fsetpos: - case LibFunc::flockfile: - case LibFunc::funlockfile: - case LibFunc::ftrylockfile: - if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - break; - case LibFunc::ferror: - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setOnlyReadsMemory(F); - break; - case LibFunc::fputc: - case LibFunc::fstat: - case LibFunc::frexp: - case LibFunc::frexpf: - case LibFunc::frexpl: - case LibFunc::fstatvfs: - if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - break; - case LibFunc::fgets: - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(2)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 3); - break; - case LibFunc::fread: - if (FTy->getNumParams() != 4 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(3)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 4); - break; - case LibFunc::fwrite: - if (FTy->getNumParams() != 4 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(3)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 4); - break; - case LibFunc::fputs: - if (FTy->getNumParams() < 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - setOnlyReadsMemory(F, 1); - break; - case LibFunc::fscanf: - case LibFunc::fprintf: - if (FTy->getNumParams() < 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - setOnlyReadsMemory(F, 2); - break; - case LibFunc::fgetpos: - if (FTy->getNumParams() < 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - break; - case LibFunc::getc: - case LibFunc::getlogin_r: - case LibFunc::getc_unlocked: - if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - break; - case LibFunc::getenv: - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setOnlyReadsMemory(F); - setDoesNotCapture(F, 1); - break; - case LibFunc::gets: - case LibFunc::getchar: - setDoesNotThrow(F); - break; - case LibFunc::getitimer: - if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - break; - case LibFunc::getpwnam: - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setOnlyReadsMemory(F, 1); - break; - case LibFunc::ungetc: - if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - break; - case LibFunc::uname: - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - break; - case LibFunc::unlink: - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setOnlyReadsMemory(F, 1); - break; - case LibFunc::unsetenv: - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setOnlyReadsMemory(F, 1); - break; - case LibFunc::utime: - case LibFunc::utimes: - if (FTy->getNumParams() != 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - setOnlyReadsMemory(F, 1); - setOnlyReadsMemory(F, 2); - break; - case LibFunc::putc: - if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - break; - case LibFunc::puts: - case LibFunc::printf: - case LibFunc::perror: - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setOnlyReadsMemory(F, 1); - break; - case LibFunc::pread: - if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy()) - return false; - // May throw; "pread" is a valid pthread cancellation point. - setDoesNotCapture(F, 2); - break; - case LibFunc::pwrite: - if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy()) - return false; - // May throw; "pwrite" is a valid pthread cancellation point. - setDoesNotCapture(F, 2); - setOnlyReadsMemory(F, 2); - break; - case LibFunc::putchar: - setDoesNotThrow(F); - break; - case LibFunc::popen: - if (FTy->getNumParams() != 2 || - !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - setOnlyReadsMemory(F, 1); - setOnlyReadsMemory(F, 2); - break; - case LibFunc::pclose: - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - break; - case LibFunc::vscanf: - if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setOnlyReadsMemory(F, 1); - break; - case LibFunc::vsscanf: - if (FTy->getNumParams() != 3 || - !FTy->getParamType(1)->isPointerTy() || - !FTy->getParamType(2)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - setOnlyReadsMemory(F, 1); - setOnlyReadsMemory(F, 2); - break; - case LibFunc::vfscanf: - if (FTy->getNumParams() != 3 || - !FTy->getParamType(1)->isPointerTy() || - !FTy->getParamType(2)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - setOnlyReadsMemory(F, 2); - break; - case LibFunc::valloc: - if (!FTy->getReturnType()->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - break; - case LibFunc::vprintf: - if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setOnlyReadsMemory(F, 1); - break; - case LibFunc::vfprintf: - case LibFunc::vsprintf: - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - setOnlyReadsMemory(F, 2); - break; - case LibFunc::vsnprintf: - if (FTy->getNumParams() != 4 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(2)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 3); - setOnlyReadsMemory(F, 3); - break; - case LibFunc::open: - if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy()) - return false; - // May throw; "open" is a valid pthread cancellation point. - setDoesNotCapture(F, 1); - setOnlyReadsMemory(F, 1); - break; - case LibFunc::opendir: - if (FTy->getNumParams() != 1 || - !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 1); - setOnlyReadsMemory(F, 1); - break; - case LibFunc::tmpfile: - if (!FTy->getReturnType()->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - break; - case LibFunc::times: - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - break; - case LibFunc::htonl: - case LibFunc::htons: - case LibFunc::ntohl: - case LibFunc::ntohs: - setDoesNotThrow(F); - setDoesNotAccessMemory(F); - break; - case LibFunc::lstat: - if (FTy->getNumParams() != 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - setOnlyReadsMemory(F, 1); - break; - case LibFunc::lchown: - if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setOnlyReadsMemory(F, 1); - break; - case LibFunc::qsort: - if (FTy->getNumParams() != 4 || !FTy->getParamType(3)->isPointerTy()) - return false; - // May throw; places call through function pointer. - setDoesNotCapture(F, 4); - break; - case LibFunc::dunder_strdup: - case LibFunc::dunder_strndup: - if (FTy->getNumParams() < 1 || - !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 1); - setOnlyReadsMemory(F, 1); - break; - case LibFunc::dunder_strtok_r: - if (FTy->getNumParams() != 3 || - !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - setOnlyReadsMemory(F, 2); - break; - case LibFunc::under_IO_getc: - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - break; - case LibFunc::under_IO_putc: - if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - break; - case LibFunc::dunder_isoc99_scanf: - if (FTy->getNumParams() < 1 || - !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setOnlyReadsMemory(F, 1); - break; - case LibFunc::stat64: - case LibFunc::lstat64: - case LibFunc::statvfs64: - if (FTy->getNumParams() < 1 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - setOnlyReadsMemory(F, 1); - break; - case LibFunc::dunder_isoc99_sscanf: - if (FTy->getNumParams() < 1 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - setOnlyReadsMemory(F, 1); - setOnlyReadsMemory(F, 2); - break; - case LibFunc::fopen64: - if (FTy->getNumParams() != 2 || - !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - setOnlyReadsMemory(F, 1); - setOnlyReadsMemory(F, 2); - break; - case LibFunc::fseeko64: - case LibFunc::ftello64: - if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - break; - case LibFunc::tmpfile64: - if (!FTy->getReturnType()->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - break; - case LibFunc::fstat64: - case LibFunc::fstatvfs64: - if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - break; - case LibFunc::open64: - if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy()) + for (unsigned i = 0; i != FlowsToReturn.size(); ++i) { + Value *RetVal = FlowsToReturn[i]; + + // If this value is locally known to be non-null, we're good + if (isKnownNonNull(RetVal, &TLI)) + continue; + + // Otherwise, we need to look upwards since we can't make any local + // conclusions. + Instruction *RVI = dyn_cast<Instruction>(RetVal); + if (!RVI) return false; - // May throw; "open" is a valid pthread cancellation point. - setDoesNotCapture(F, 1); - setOnlyReadsMemory(F, 1); - break; - case LibFunc::gettimeofday: - if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) + switch (RVI->getOpcode()) { + // Extend the analysis by looking upwards. + case Instruction::BitCast: + case Instruction::GetElementPtr: + case Instruction::AddrSpaceCast: + FlowsToReturn.insert(RVI->getOperand(0)); + continue; + case Instruction::Select: { + SelectInst *SI = cast<SelectInst>(RVI); + FlowsToReturn.insert(SI->getTrueValue()); + FlowsToReturn.insert(SI->getFalseValue()); + continue; + } + case Instruction::PHI: { + PHINode *PN = cast<PHINode>(RVI); + for (int i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + FlowsToReturn.insert(PN->getIncomingValue(i)); + continue; + } + case Instruction::Call: + case Instruction::Invoke: { + CallSite CS(RVI); + Function *Callee = CS.getCalledFunction(); + // A call to a node within the SCC is assumed to return null until + // proven otherwise + if (Callee && SCCNodes.count(Callee)) { + Speculative = true; + continue; + } return false; - // Currently some platforms have the restrict keyword on the arguments to - // gettimeofday. To be conservative, do not add noalias to gettimeofday's - // arguments. - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - break; - default: - // Didn't mark any attributes. - return false; + } + default: + return false; // Unknown source, may be null + }; + llvm_unreachable("should have either continued or returned"); } return true; } -/// annotateLibraryCalls - Adds attributes to well-known standard library -/// call declarations. -bool FunctionAttrs::annotateLibraryCalls(const CallGraphSCC &SCC) { +/// Deduce nonnull attributes for the SCC. +static bool addNonNullAttrs(const SCCNodeSet &SCCNodes, + const TargetLibraryInfo &TLI) { + // Speculative that all functions in the SCC return only nonnull + // pointers. We may refute this as we analyze functions. + bool SCCReturnsNonNull = true; + bool MadeChange = false; - // Check each function in turn annotating well-known library function - // declarations with attributes. - for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { - Function *F = (*I)->getFunction(); + // Check each function in turn, determining which functions return nonnull + // pointers. + for (Function *F : SCCNodes) { + // Already nonnull. + if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, + Attribute::NonNull)) + continue; + + // Definitions with weak linkage may be overridden at linktime, so + // treat them like declarations. + if (F->isDeclaration() || F->mayBeOverridden()) + return false; + + // We annotate nonnull return values, which are only applicable to + // pointer types. + if (!F->getReturnType()->isPointerTy()) + continue; - if (F && F->isDeclaration()) - MadeChange |= inferPrototypeAttributes(*F); + bool Speculative = false; + if (isReturnNonNull(F, SCCNodes, TLI, Speculative)) { + if (!Speculative) { + // Mark the function eagerly since we may discover a function + // which prevents us from speculating about the entire SCC + DEBUG(dbgs() << "Eagerly marking " << F->getName() << " as nonnull\n"); + F->addAttribute(AttributeSet::ReturnIndex, Attribute::NonNull); + ++NumNonNullReturn; + MadeChange = true; + } + continue; + } + // At least one function returns something which could be null, can't + // speculate any more. + SCCReturnsNonNull = false; + } + + if (SCCReturnsNonNull) { + for (Function *F : SCCNodes) { + if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, + Attribute::NonNull) || + !F->getReturnType()->isPointerTy()) + continue; + + DEBUG(dbgs() << "SCC marking " << F->getName() << " as nonnull\n"); + F->addAttribute(AttributeSet::ReturnIndex, Attribute::NonNull); + ++NumNonNullReturn; + MadeChange = true; + } } return MadeChange; } +static bool setDoesNotRecurse(Function &F) { + if (F.doesNotRecurse()) + return false; + F.setDoesNotRecurse(); + ++NumNoRecurse; + return true; +} + +static bool addNoRecurseAttrs(const CallGraphSCC &SCC, + SmallVectorImpl<WeakVH> &Revisit) { + // Try and identify functions that do not recurse. + + // If the SCC contains multiple nodes we know for sure there is recursion. + if (!SCC.isSingular()) + return false; + + const CallGraphNode *CGN = *SCC.begin(); + Function *F = CGN->getFunction(); + if (!F || F->isDeclaration() || F->doesNotRecurse()) + return false; + + // If all of the calls in F are identifiable and are to norecurse functions, F + // is norecurse. This check also detects self-recursion as F is not currently + // marked norecurse, so any called from F to F will not be marked norecurse. + if (std::all_of(CGN->begin(), CGN->end(), + [](const CallGraphNode::CallRecord &CR) { + Function *F = CR.second->getFunction(); + return F && F->doesNotRecurse(); + })) + // Function calls a potentially recursive function. + return setDoesNotRecurse(*F); + + // We know that F is not obviously recursive, but we haven't been able to + // prove that it doesn't actually recurse. Add it to the Revisit list to try + // again top-down later. + Revisit.push_back(F); + return false; +} + +static bool addNoRecurseAttrsTopDownOnly(Function *F) { + // If F is internal and all uses are in norecurse functions, then F is also + // norecurse. + if (F->doesNotRecurse()) + return false; + if (F->hasInternalLinkage()) { + for (auto *U : F->users()) + if (auto *I = dyn_cast<Instruction>(U)) { + if (!I->getParent()->getParent()->doesNotRecurse()) + return false; + } else { + return false; + } + return setDoesNotRecurse(*F); + } + return false; +} + bool FunctionAttrs::runOnSCC(CallGraphSCC &SCC) { - AA = &getAnalysis<AliasAnalysis>(); TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); + bool Changed = false; - bool Changed = annotateLibraryCalls(SCC); - Changed |= AddReadAttrs(SCC); - Changed |= AddArgumentAttrs(SCC); - Changed |= AddNoAliasAttrs(SCC); + // We compute dedicated AA results for each function in the SCC as needed. We + // use a lambda referencing external objects so that they live long enough to + // be queried, but we re-use them each time. + Optional<BasicAAResult> BAR; + Optional<AAResults> AAR; + auto AARGetter = [&](Function &F) -> AAResults & { + BAR.emplace(createLegacyPMBasicAAResult(*this, F)); + AAR.emplace(createLegacyPMAAResults(*this, F, *BAR)); + return *AAR; + }; + + // Fill SCCNodes with the elements of the SCC. Used for quickly looking up + // whether a given CallGraphNode is in this SCC. Also track whether there are + // any external or opt-none nodes that will prevent us from optimizing any + // part of the SCC. + SCCNodeSet SCCNodes; + bool ExternalNode = false; + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { + Function *F = (*I)->getFunction(); + if (!F || F->hasFnAttribute(Attribute::OptimizeNone)) { + // External node or function we're trying not to optimize - we both avoid + // transform them and avoid leveraging information they provide. + ExternalNode = true; + continue; + } + + SCCNodes.insert(F); + } + + Changed |= addReadAttrs(SCCNodes, AARGetter); + Changed |= addArgumentAttrs(SCCNodes); + + // If we have no external nodes participating in the SCC, we can deduce some + // more precise attributes as well. + if (!ExternalNode) { + Changed |= addNoAliasAttrs(SCCNodes); + Changed |= addNonNullAttrs(SCCNodes, *TLI); + } + + Changed |= addNoRecurseAttrs(SCC, Revisit); + return Changed; +} + +bool FunctionAttrs::doFinalization(CallGraph &CG) { + bool Changed = false; + // When iterating over SCCs we visit functions in a bottom-up fashion. Some of + // the rules we have for identifying norecurse functions work best with a + // top-down walk, so look again at all the functions we previously marked as + // worth revisiting, in top-down order. + for (auto &F : reverse(Revisit)) + if (F) + Changed |= addNoRecurseAttrsTopDownOnly(cast<Function>((Value*)F)); return Changed; } diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp new file mode 100644 index 0000000..d8b677b --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp @@ -0,0 +1,433 @@ +//===- FunctionImport.cpp - ThinLTO Summary-based Function Import ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements Function import based on summaries. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO/FunctionImport.h" + +#include "llvm/ADT/StringSet.h" +#include "llvm/IR/AutoUpgrade.h" +#include "llvm/IR/DiagnosticPrinter.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/IRReader/IRReader.h" +#include "llvm/Linker/Linker.h" +#include "llvm/Object/FunctionIndexObjectFile.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/SourceMgr.h" + +#include <map> + +using namespace llvm; + +#define DEBUG_TYPE "function-import" + +/// Limit on instruction count of imported functions. +static cl::opt<unsigned> ImportInstrLimit( + "import-instr-limit", cl::init(100), cl::Hidden, cl::value_desc("N"), + cl::desc("Only import functions with less than N instructions")); + +// Load lazily a module from \p FileName in \p Context. +static std::unique_ptr<Module> loadFile(const std::string &FileName, + LLVMContext &Context) { + SMDiagnostic Err; + DEBUG(dbgs() << "Loading '" << FileName << "'\n"); + std::unique_ptr<Module> Result = getLazyIRFileModule(FileName, Err, Context); + if (!Result) { + Err.print("function-import", errs()); + return nullptr; + } + + Result->materializeMetadata(); + UpgradeDebugInfo(*Result); + + return Result; +} + +namespace { +/// Helper to load on demand a Module from file and cache it for subsequent +/// queries. It can be used with the FunctionImporter. +class ModuleLazyLoaderCache { + /// Cache of lazily loaded module for import. + StringMap<std::unique_ptr<Module>> ModuleMap; + + /// Retrieve a Module from the cache or lazily load it on demand. + std::function<std::unique_ptr<Module>(StringRef FileName)> createLazyModule; + +public: + /// Create the loader, Module will be initialized in \p Context. + ModuleLazyLoaderCache(std::function< + std::unique_ptr<Module>(StringRef FileName)> createLazyModule) + : createLazyModule(createLazyModule) {} + + /// Retrieve a Module from the cache or lazily load it on demand. + Module &operator()(StringRef FileName); + + std::unique_ptr<Module> takeModule(StringRef FileName) { + auto I = ModuleMap.find(FileName); + assert(I != ModuleMap.end()); + std::unique_ptr<Module> Ret = std::move(I->second); + ModuleMap.erase(I); + return Ret; + } +}; + +// Get a Module for \p FileName from the cache, or load it lazily. +Module &ModuleLazyLoaderCache::operator()(StringRef Identifier) { + auto &Module = ModuleMap[Identifier]; + if (!Module) + Module = createLazyModule(Identifier); + return *Module; +} +} // anonymous namespace + +/// Walk through the instructions in \p F looking for external +/// calls not already in the \p CalledFunctions set. If any are +/// found they are added to the \p Worklist for importing. +static void findExternalCalls(const Module &DestModule, Function &F, + const FunctionInfoIndex &Index, + StringSet<> &CalledFunctions, + SmallVector<StringRef, 64> &Worklist) { + // We need to suffix internal function calls imported from other modules, + // prepare the suffix ahead of time. + std::string Suffix; + if (F.getParent() != &DestModule) + Suffix = + (Twine(".llvm.") + + Twine(Index.getModuleId(F.getParent()->getModuleIdentifier()))).str(); + + for (auto &BB : F) { + for (auto &I : BB) { + if (isa<CallInst>(I)) { + auto CalledFunction = cast<CallInst>(I).getCalledFunction(); + // Insert any new external calls that have not already been + // added to set/worklist. + if (!CalledFunction || !CalledFunction->hasName()) + continue; + // Ignore intrinsics early + if (CalledFunction->isIntrinsic()) { + assert(CalledFunction->getIntrinsicID() != 0); + continue; + } + auto ImportedName = CalledFunction->getName(); + auto Renamed = (ImportedName + Suffix).str(); + // Rename internal functions + if (CalledFunction->hasInternalLinkage()) { + ImportedName = Renamed; + } + auto It = CalledFunctions.insert(ImportedName); + if (!It.second) { + // This is a call to a function we already considered, skip. + continue; + } + // Ignore functions already present in the destination module + auto *SrcGV = DestModule.getNamedValue(ImportedName); + if (SrcGV) { + assert(isa<Function>(SrcGV) && "Name collision during import"); + if (!cast<Function>(SrcGV)->isDeclaration()) { + DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Ignoring " + << ImportedName << " already in DestinationModule\n"); + continue; + } + } + + Worklist.push_back(It.first->getKey()); + DEBUG(dbgs() << DestModule.getModuleIdentifier() + << ": Adding callee for : " << ImportedName << " : " + << F.getName() << "\n"); + } + } + } +} + +// Helper function: given a worklist and an index, will process all the worklist +// and decide what to import based on the summary information. +// +// Nothing is actually imported, functions are materialized in their source +// module and analyzed there. +// +// \p ModuleToFunctionsToImportMap is filled with the set of Function to import +// per Module. +static void GetImportList(Module &DestModule, + SmallVector<StringRef, 64> &Worklist, + StringSet<> &CalledFunctions, + std::map<StringRef, DenseSet<const GlobalValue *>> + &ModuleToFunctionsToImportMap, + const FunctionInfoIndex &Index, + ModuleLazyLoaderCache &ModuleLoaderCache) { + while (!Worklist.empty()) { + auto CalledFunctionName = Worklist.pop_back_val(); + DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Process import for " + << CalledFunctionName << "\n"); + + // Try to get a summary for this function call. + auto InfoList = Index.findFunctionInfoList(CalledFunctionName); + if (InfoList == Index.end()) { + DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": No summary for " + << CalledFunctionName << " Ignoring.\n"); + continue; + } + assert(!InfoList->second.empty() && "No summary, error at import?"); + + // Comdat can have multiple entries, FIXME: what do we do with them? + auto &Info = InfoList->second[0]; + assert(Info && "Nullptr in list, error importing summaries?\n"); + + auto *Summary = Info->functionSummary(); + if (!Summary) { + // FIXME: in case we are lazyloading summaries, we can do it now. + DEBUG(dbgs() << DestModule.getModuleIdentifier() + << ": Missing summary for " << CalledFunctionName + << ", error at import?\n"); + llvm_unreachable("Missing summary"); + } + + if (Summary->instCount() > ImportInstrLimit) { + DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Skip import of " + << CalledFunctionName << " with " << Summary->instCount() + << " instructions (limit " << ImportInstrLimit << ")\n"); + continue; + } + + // Get the module path from the summary. + auto ModuleIdentifier = Summary->modulePath(); + DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Importing " + << CalledFunctionName << " from " << ModuleIdentifier << "\n"); + + auto &SrcModule = ModuleLoaderCache(ModuleIdentifier); + + // The function that we will import! + GlobalValue *SGV = SrcModule.getNamedValue(CalledFunctionName); + + if (!SGV) { + // The destination module is referencing function using their renamed name + // when importing a function that was originally local in the source + // module. The source module we have might not have been renamed so we try + // to remove the suffix added during the renaming to recover the original + // name in the source module. + std::pair<StringRef, StringRef> Split = + CalledFunctionName.split(".llvm."); + SGV = SrcModule.getNamedValue(Split.first); + assert(SGV && "Can't find function to import in source module"); + } + if (!SGV) { + report_fatal_error(Twine("Can't load function '") + CalledFunctionName + + "' in Module '" + SrcModule.getModuleIdentifier() + + "', error in the summary?\n"); + } + + Function *F = dyn_cast<Function>(SGV); + if (!F && isa<GlobalAlias>(SGV)) { + auto *SGA = dyn_cast<GlobalAlias>(SGV); + F = dyn_cast<Function>(SGA->getBaseObject()); + CalledFunctionName = F->getName(); + } + assert(F && "Imported Function is ... not a Function"); + + // We cannot import weak_any functions/aliases without possibly affecting + // the order they are seen and selected by the linker, changing program + // semantics. + if (SGV->hasWeakAnyLinkage()) { + DEBUG(dbgs() << DestModule.getModuleIdentifier() + << ": Ignoring import request for weak-any " + << (isa<Function>(SGV) ? "function " : "alias ") + << CalledFunctionName << " from " + << SrcModule.getModuleIdentifier() << "\n"); + continue; + } + + // Add the function to the import list + auto &Entry = ModuleToFunctionsToImportMap[SrcModule.getModuleIdentifier()]; + Entry.insert(F); + + // Process the newly imported functions and add callees to the worklist. + F->materialize(); + findExternalCalls(DestModule, *F, Index, CalledFunctions, Worklist); + } +} + +// Automatically import functions in Module \p DestModule based on the summaries +// index. +// +// The current implementation imports every called functions that exists in the +// summaries index. +bool FunctionImporter::importFunctions(Module &DestModule) { + DEBUG(dbgs() << "Starting import for Module " + << DestModule.getModuleIdentifier() << "\n"); + unsigned ImportedCount = 0; + + /// First step is collecting the called external functions. + StringSet<> CalledFunctions; + SmallVector<StringRef, 64> Worklist; + for (auto &F : DestModule) { + if (F.isDeclaration() || F.hasFnAttribute(Attribute::OptimizeNone)) + continue; + findExternalCalls(DestModule, F, Index, CalledFunctions, Worklist); + } + if (Worklist.empty()) + return false; + + /// Second step: for every call to an external function, try to import it. + + // Linker that will be used for importing function + Linker TheLinker(DestModule); + + // Map of Module -> List of Function to import from the Module + std::map<StringRef, DenseSet<const GlobalValue *>> + ModuleToFunctionsToImportMap; + + // Analyze the summaries and get the list of functions to import by + // populating ModuleToFunctionsToImportMap + ModuleLazyLoaderCache ModuleLoaderCache(ModuleLoader); + GetImportList(DestModule, Worklist, CalledFunctions, + ModuleToFunctionsToImportMap, Index, ModuleLoaderCache); + assert(Worklist.empty() && "Worklist hasn't been flushed in GetImportList"); + + StringMap<std::unique_ptr<DenseMap<unsigned, MDNode *>>> + ModuleToTempMDValsMap; + + // Do the actual import of functions now, one Module at a time + for (auto &FunctionsToImportPerModule : ModuleToFunctionsToImportMap) { + // Get the module for the import + auto &FunctionsToImport = FunctionsToImportPerModule.second; + std::unique_ptr<Module> SrcModule = + ModuleLoaderCache.takeModule(FunctionsToImportPerModule.first); + assert(&DestModule.getContext() == &SrcModule->getContext() && + "Context mismatch"); + + // Save the mapping of value ids to temporary metadata created when + // importing this function. If we have already imported from this module, + // add new temporary metadata to the existing mapping. + auto &TempMDVals = ModuleToTempMDValsMap[SrcModule->getModuleIdentifier()]; + if (!TempMDVals) + TempMDVals = llvm::make_unique<DenseMap<unsigned, MDNode *>>(); + + // Link in the specified functions. + if (TheLinker.linkInModule(std::move(SrcModule), Linker::Flags::None, + &Index, &FunctionsToImport, TempMDVals.get())) + report_fatal_error("Function Import: link error"); + + ImportedCount += FunctionsToImport.size(); + } + + // Now link in metadata for all modules from which we imported functions. + for (StringMapEntry<std::unique_ptr<DenseMap<unsigned, MDNode *>>> &SME : + ModuleToTempMDValsMap) { + // Load the specified source module. + auto &SrcModule = ModuleLoaderCache(SME.getKey()); + + // Link in all necessary metadata from this module. + if (TheLinker.linkInMetadata(SrcModule, SME.getValue().get())) + return false; + } + + DEBUG(dbgs() << "Imported " << ImportedCount << " functions for Module " + << DestModule.getModuleIdentifier() << "\n"); + return ImportedCount; +} + +/// Summary file to use for function importing when using -function-import from +/// the command line. +static cl::opt<std::string> + SummaryFile("summary-file", + cl::desc("The summary file to use for function importing.")); + +static void diagnosticHandler(const DiagnosticInfo &DI) { + raw_ostream &OS = errs(); + DiagnosticPrinterRawOStream DP(OS); + DI.print(DP); + OS << '\n'; +} + +/// Parse the function index out of an IR file and return the function +/// index object if found, or nullptr if not. +static std::unique_ptr<FunctionInfoIndex> +getFunctionIndexForFile(StringRef Path, std::string &Error, + DiagnosticHandlerFunction DiagnosticHandler) { + std::unique_ptr<MemoryBuffer> Buffer; + ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr = + MemoryBuffer::getFile(Path); + if (std::error_code EC = BufferOrErr.getError()) { + Error = EC.message(); + return nullptr; + } + Buffer = std::move(BufferOrErr.get()); + ErrorOr<std::unique_ptr<object::FunctionIndexObjectFile>> ObjOrErr = + object::FunctionIndexObjectFile::create(Buffer->getMemBufferRef(), + DiagnosticHandler); + if (std::error_code EC = ObjOrErr.getError()) { + Error = EC.message(); + return nullptr; + } + return (*ObjOrErr)->takeIndex(); +} + +namespace { +/// Pass that performs cross-module function import provided a summary file. +class FunctionImportPass : public ModulePass { + /// Optional function summary index to use for importing, otherwise + /// the summary-file option must be specified. + const FunctionInfoIndex *Index; + +public: + /// Pass identification, replacement for typeid + static char ID; + + /// Specify pass name for debug output + const char *getPassName() const override { + return "Function Importing"; + } + + explicit FunctionImportPass(const FunctionInfoIndex *Index = nullptr) + : ModulePass(ID), Index(Index) {} + + bool runOnModule(Module &M) override { + if (SummaryFile.empty() && !Index) + report_fatal_error("error: -function-import requires -summary-file or " + "file from frontend\n"); + std::unique_ptr<FunctionInfoIndex> IndexPtr; + if (!SummaryFile.empty()) { + if (Index) + report_fatal_error("error: -summary-file and index from frontend\n"); + std::string Error; + IndexPtr = getFunctionIndexForFile(SummaryFile, Error, diagnosticHandler); + if (!IndexPtr) { + errs() << "Error loading file '" << SummaryFile << "': " << Error + << "\n"; + return false; + } + Index = IndexPtr.get(); + } + + // Perform the import now. + auto ModuleLoader = [&M](StringRef Identifier) { + return loadFile(Identifier, M.getContext()); + }; + FunctionImporter Importer(*Index, ModuleLoader); + return Importer.importFunctions(M); + + return false; + } +}; +} // anonymous namespace + +char FunctionImportPass::ID = 0; +INITIALIZE_PASS_BEGIN(FunctionImportPass, "function-import", + "Summary Based Function Import", false, false) +INITIALIZE_PASS_END(FunctionImportPass, "function-import", + "Summary Based Function Import", false, false) + +namespace llvm { +Pass *createFunctionImportPass(const FunctionInfoIndex *Index = nullptr) { + return new FunctionImportPass(Index); +} +} diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp index 61d0ff9..9b276ed 100644 --- a/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp +++ b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp @@ -92,33 +92,28 @@ bool GlobalDCE::runOnModule(Module &M) { ComdatMembers.insert(std::make_pair(C, &GA)); // Loop over the module, adding globals which are obviously necessary. - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { - Changed |= RemoveUnusedGlobalValue(*I); + for (Function &F : M) { + Changed |= RemoveUnusedGlobalValue(F); // Functions with external linkage are needed if they have a body - if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage()) { - if (!I->isDiscardableIfUnused()) - GlobalIsNeeded(I); - } + if (!F.isDeclaration() && !F.hasAvailableExternallyLinkage()) + if (!F.isDiscardableIfUnused()) + GlobalIsNeeded(&F); } - for (Module::global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) { - Changed |= RemoveUnusedGlobalValue(*I); + for (GlobalVariable &GV : M.globals()) { + Changed |= RemoveUnusedGlobalValue(GV); // Externally visible & appending globals are needed, if they have an // initializer. - if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage()) { - if (!I->isDiscardableIfUnused()) - GlobalIsNeeded(I); - } + if (!GV.isDeclaration() && !GV.hasAvailableExternallyLinkage()) + if (!GV.isDiscardableIfUnused()) + GlobalIsNeeded(&GV); } - for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end(); - I != E; ++I) { - Changed |= RemoveUnusedGlobalValue(*I); + for (GlobalAlias &GA : M.aliases()) { + Changed |= RemoveUnusedGlobalValue(GA); // Externally visible aliases are needed. - if (!I->isDiscardableIfUnused()) { - GlobalIsNeeded(I); - } + if (!GA.isDiscardableIfUnused()) + GlobalIsNeeded(&GA); } // Now that all globals which are needed are in the AliveGlobals set, we loop @@ -126,52 +121,50 @@ bool GlobalDCE::runOnModule(Module &M) { // // The first pass is to drop initializers of global variables which are dead. - std::vector<GlobalVariable*> DeadGlobalVars; // Keep track of dead globals - for (Module::global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) - if (!AliveGlobals.count(I)) { - DeadGlobalVars.push_back(I); // Keep track of dead globals - if (I->hasInitializer()) { - Constant *Init = I->getInitializer(); - I->setInitializer(nullptr); + std::vector<GlobalVariable *> DeadGlobalVars; // Keep track of dead globals + for (GlobalVariable &GV : M.globals()) + if (!AliveGlobals.count(&GV)) { + DeadGlobalVars.push_back(&GV); // Keep track of dead globals + if (GV.hasInitializer()) { + Constant *Init = GV.getInitializer(); + GV.setInitializer(nullptr); if (isSafeToDestroyConstant(Init)) Init->destroyConstant(); } } // The second pass drops the bodies of functions which are dead... - std::vector<Function*> DeadFunctions; - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) - if (!AliveGlobals.count(I)) { - DeadFunctions.push_back(I); // Keep track of dead globals - if (!I->isDeclaration()) - I->deleteBody(); + std::vector<Function *> DeadFunctions; + for (Function &F : M) + if (!AliveGlobals.count(&F)) { + DeadFunctions.push_back(&F); // Keep track of dead globals + if (!F.isDeclaration()) + F.deleteBody(); } // The third pass drops targets of aliases which are dead... std::vector<GlobalAlias*> DeadAliases; - for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end(); I != E; - ++I) - if (!AliveGlobals.count(I)) { - DeadAliases.push_back(I); - I->setAliasee(nullptr); + for (GlobalAlias &GA : M.aliases()) + if (!AliveGlobals.count(&GA)) { + DeadAliases.push_back(&GA); + GA.setAliasee(nullptr); } if (!DeadFunctions.empty()) { // Now that all interferences have been dropped, delete the actual objects // themselves. - for (unsigned i = 0, e = DeadFunctions.size(); i != e; ++i) { - RemoveUnusedGlobalValue(*DeadFunctions[i]); - M.getFunctionList().erase(DeadFunctions[i]); + for (Function *F : DeadFunctions) { + RemoveUnusedGlobalValue(*F); + M.getFunctionList().erase(F); } NumFunctions += DeadFunctions.size(); Changed = true; } if (!DeadGlobalVars.empty()) { - for (unsigned i = 0, e = DeadGlobalVars.size(); i != e; ++i) { - RemoveUnusedGlobalValue(*DeadGlobalVars[i]); - M.getGlobalList().erase(DeadGlobalVars[i]); + for (GlobalVariable *GV : DeadGlobalVars) { + RemoveUnusedGlobalValue(*GV); + M.getGlobalList().erase(GV); } NumVariables += DeadGlobalVars.size(); Changed = true; @@ -179,9 +172,9 @@ bool GlobalDCE::runOnModule(Module &M) { // Now delete any dead aliases. if (!DeadAliases.empty()) { - for (unsigned i = 0, e = DeadAliases.size(); i != e; ++i) { - RemoveUnusedGlobalValue(*DeadAliases[i]); - M.getAliasList().erase(DeadAliases[i]); + for (GlobalAlias *GA : DeadAliases) { + RemoveUnusedGlobalValue(*GA); + M.getAliasList().erase(GA); } NumAliases += DeadAliases.size(); Changed = true; @@ -222,21 +215,15 @@ void GlobalDCE::GlobalIsNeeded(GlobalValue *G) { // any globals used will be marked as needed. Function *F = cast<Function>(G); - if (F->hasPrefixData()) - MarkUsedGlobalsAsNeeded(F->getPrefixData()); - - if (F->hasPrologueData()) - MarkUsedGlobalsAsNeeded(F->getPrologueData()); + for (Use &U : F->operands()) + MarkUsedGlobalsAsNeeded(cast<Constant>(U.get())); - if (F->hasPersonalityFn()) - MarkUsedGlobalsAsNeeded(F->getPersonalityFn()); - - for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) - for (User::op_iterator U = I->op_begin(), E = I->op_end(); U != E; ++U) - if (GlobalValue *GV = dyn_cast<GlobalValue>(*U)) + for (BasicBlock &BB : *F) + for (Instruction &I : BB) + for (Use &U : I.operands()) + if (GlobalValue *GV = dyn_cast<GlobalValue>(U)) GlobalIsNeeded(GV); - else if (Constant *C = dyn_cast<Constant>(*U)) + else if (Constant *C = dyn_cast<Constant>(U)) MarkUsedGlobalsAsNeeded(C); } } @@ -247,9 +234,9 @@ void GlobalDCE::MarkUsedGlobalsAsNeeded(Constant *C) { // Loop over all of the operands of the constant, adding any globals they // use to the list of needed globals. - for (User::op_iterator I = C->op_begin(), E = C->op_end(); I != E; ++I) { + for (Use &U : C->operands()) { // If we've already processed this constant there's no need to do it again. - Constant *Op = dyn_cast<Constant>(*I); + Constant *Op = dyn_cast<Constant>(U); if (Op && SeenConstants.insert(Op).second) MarkUsedGlobalsAsNeeded(Op); } @@ -262,7 +249,8 @@ void GlobalDCE::MarkUsedGlobalsAsNeeded(Constant *C) { // might make it deader. // bool GlobalDCE::RemoveUnusedGlobalValue(GlobalValue &GV) { - if (GV.use_empty()) return false; + if (GV.use_empty()) + return false; GV.removeDeadConstantUsers(); return GV.use_empty(); } diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp index 5ffe15d..fd77369 100644 --- a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -28,6 +28,7 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" @@ -54,7 +55,6 @@ STATISTIC(NumSRA , "Number of aggregate globals broken into scalars"); STATISTIC(NumHeapSRA , "Number of heap objects SRA'd"); STATISTIC(NumSubstitute,"Number of globals with initializers stored into them"); STATISTIC(NumDeleted , "Number of globals deleted"); -STATISTIC(NumFnDeleted , "Number of functions deleted"); STATISTIC(NumGlobUses , "Number of global uses devirtualized"); STATISTIC(NumLocalized , "Number of globals localized"); STATISTIC(NumShrunkToBool , "Number of global vars shrunk to booleans"); @@ -69,6 +69,7 @@ namespace { struct GlobalOpt : public ModulePass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<TargetLibraryInfoWrapperPass>(); + AU.addRequired<DominatorTreeWrapperPass>(); } static char ID; // Pass identification, replacement for typeid GlobalOpt() : ModulePass(ID) { @@ -81,11 +82,14 @@ namespace { bool OptimizeFunctions(Module &M); bool OptimizeGlobalVars(Module &M); bool OptimizeGlobalAliases(Module &M); - bool ProcessGlobal(GlobalVariable *GV,Module::global_iterator &GVI); - bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI, - const GlobalStatus &GS); + bool deleteIfDead(GlobalValue &GV); + bool processGlobal(GlobalValue &GV); + bool processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS); bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn); + bool isPointerValueDeadOnEntryToFunction(const Function *F, + GlobalValue *GV); + TargetLibraryInfo *TLI; SmallSet<const Comdat *, 8> NotDiscardableComdats; }; @@ -95,13 +99,14 @@ char GlobalOpt::ID = 0; INITIALIZE_PASS_BEGIN(GlobalOpt, "globalopt", "Global Variable Optimizer", false, false) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_END(GlobalOpt, "globalopt", "Global Variable Optimizer", false, false) ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); } -/// isLeakCheckerRoot - Is this global variable possibly used by a leak checker -/// as a root? If so, we might not really want to eliminate the stores to it. +/// Is this global variable possibly used by a leak checker as a root? If so, +/// we might not really want to eliminate the stores to it. static bool isLeakCheckerRoot(GlobalVariable *GV) { // A global variable is a root if it is a pointer, or could plausibly contain // a pointer. There are two challenges; one is that we could have a struct @@ -176,10 +181,9 @@ static bool IsSafeComputationToRemove(Value *V, const TargetLibraryInfo *TLI) { } while (1); } -/// CleanupPointerRootUsers - This GV is a pointer root. Loop over all users -/// of the global and clean up any that obviously don't assign the global a -/// value that isn't dynamically allocated. -/// +/// This GV is a pointer root. Loop over all users of the global and clean up +/// any that obviously don't assign the global a value that isn't dynamically +/// allocated. static bool CleanupPointerRootUsers(GlobalVariable *GV, const TargetLibraryInfo *TLI) { // A brief explanation of leak checkers. The goal is to find bugs where @@ -263,10 +267,9 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV, return Changed; } -/// CleanupConstantGlobalUsers - We just marked GV constant. Loop over all -/// users of the global, cleaning up the obvious ones. This is largely just a -/// quick scan over the use list to clean up the easy and obvious cruft. This -/// returns true if it made a change. +/// We just marked GV constant. Loop over all users of the global, cleaning up +/// the obvious ones. This is largely just a quick scan over the use list to +/// clean up the easy and obvious cruft. This returns true if it made a change. static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, const DataLayout &DL, TargetLibraryInfo *TLI) { @@ -353,8 +356,8 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, return Changed; } -/// isSafeSROAElementUse - Return true if the specified instruction is a safe -/// user of a derived expression from a global that we want to SROA. +/// Return true if the specified instruction is a safe user of a derived +/// expression from a global that we want to SROA. static bool isSafeSROAElementUse(Value *V) { // We might have a dead and dangling constant hanging off of here. if (Constant *C = dyn_cast<Constant>(V)) @@ -385,9 +388,8 @@ static bool isSafeSROAElementUse(Value *V) { } -/// IsUserOfGlobalSafeForSRA - U is a direct user of the specified global value. -/// Look at it and its uses and decide whether it is safe to SROA this global. -/// +/// U is a direct user of the specified global value. Look at it and its uses +/// and decide whether it is safe to SROA this global. static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) { // The user of the global must be a GEP Inst or a ConstantExpr GEP. if (!isa<GetElementPtrInst>(U) && @@ -452,9 +454,8 @@ static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) { return true; } -/// GlobalUsersSafeToSRA - Look at all uses of the global and decide whether it -/// is safe for us to perform this transformation. -/// +/// Look at all uses of the global and decide whether it is safe for us to +/// perform this transformation. static bool GlobalUsersSafeToSRA(GlobalValue *GV) { for (User *U : GV->users()) if (!IsUserOfGlobalSafeForSRA(U, GV)) @@ -464,10 +465,10 @@ static bool GlobalUsersSafeToSRA(GlobalValue *GV) { } -/// SRAGlobal - Perform scalar replacement of aggregates on the specified global -/// variable. This opens the door for other optimizations by exposing the -/// behavior of the program in a more fine-grained way. We have determined that -/// this transformation is safe already. We return the first global variable we +/// Perform scalar replacement of aggregates on the specified global variable. +/// This opens the door for other optimizations by exposing the behavior of the +/// program in a more fine-grained way. We have determined that this +/// transformation is safe already. We return the first global variable we /// insert so that the caller can reprocess it. static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { // Make sure this global only has simple uses that we can SRA. @@ -497,7 +498,8 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { In, GV->getName()+"."+Twine(i), GV->getThreadLocalMode(), GV->getType()->getAddressSpace()); - Globals.insert(GV, NGV); + NGV->setExternallyInitialized(GV->isExternallyInitialized()); + Globals.push_back(NGV); NewGlobals.push_back(NGV); // Calculate the known alignment of the field. If the original aggregate @@ -530,7 +532,8 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { In, GV->getName()+"."+Twine(i), GV->getThreadLocalMode(), GV->getType()->getAddressSpace()); - Globals.insert(GV, NGV); + NGV->setExternallyInitialized(GV->isExternallyInitialized()); + Globals.push_back(NGV); NewGlobals.push_back(NGV); // Calculate the known alignment of the field. If the original aggregate @@ -545,7 +548,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { if (NewGlobals.empty()) return nullptr; - DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV); + DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV << "\n"); Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext())); @@ -610,9 +613,9 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { return FirstGlobal != NewGlobals.size() ? NewGlobals[FirstGlobal] : nullptr; } -/// AllUsesOfValueWillTrapIfNull - Return true if all users of the specified -/// value will trap if the value is dynamically null. PHIs keeps track of any -/// phi nodes we've seen to avoid reprocessing them. +/// Return true if all users of the specified value will trap if the value is +/// dynamically null. PHIs keeps track of any phi nodes we've seen to avoid +/// reprocessing them. static bool AllUsesOfValueWillTrapIfNull(const Value *V, SmallPtrSetImpl<const PHINode*> &PHIs) { for (const User *U : V->users()) @@ -653,9 +656,9 @@ static bool AllUsesOfValueWillTrapIfNull(const Value *V, return true; } -/// AllUsesOfLoadedValueWillTrapIfNull - Return true if all uses of any loads -/// from GV will trap if the loaded value is null. Note that this also permits -/// comparisons of the loaded value against null, as a special case. +/// Return true if all uses of any loads from GV will trap if the loaded value +/// is null. Note that this also permits comparisons of the loaded value +/// against null, as a special case. static bool AllUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) { for (const User *U : GV->users()) if (const LoadInst *LI = dyn_cast<LoadInst>(U)) { @@ -735,10 +738,10 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) { } -/// OptimizeAwayTrappingUsesOfLoads - The specified global has only one non-null -/// value stored into it. If there are uses of the loaded value that would trap -/// if the loaded value is dynamically null, then we know that they cannot be -/// reachable with a null optimize away the load. +/// The specified global has only one non-null value stored into it. If there +/// are uses of the loaded value that would trap if the loaded value is +/// dynamically null, then we know that they cannot be reachable with a null +/// optimize away the load. static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV, const DataLayout &DL, TargetLibraryInfo *TLI) { @@ -778,7 +781,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV, } if (Changed) { - DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV); + DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV << "\n"); ++NumGlobUses; } @@ -801,8 +804,8 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV, return Changed; } -/// ConstantPropUsersOf - Walk the use list of V, constant folding all of the -/// instructions that are foldable. +/// Walk the use list of V, constant folding all of the instructions that are +/// foldable. static void ConstantPropUsersOf(Value *V, const DataLayout &DL, TargetLibraryInfo *TLI) { for (Value::user_iterator UI = V->user_begin(), E = V->user_end(); UI != E; ) @@ -818,11 +821,11 @@ static void ConstantPropUsersOf(Value *V, const DataLayout &DL, } } -/// OptimizeGlobalAddressOfMalloc - This function takes the specified global -/// variable, and transforms the program as if it always contained the result of -/// the specified malloc. Because it is always the result of the specified -/// malloc, there is no reason to actually DO the malloc. Instead, turn the -/// malloc into a global, and any loads of GV as uses of the new global. +/// This function takes the specified global variable, and transforms the +/// program as if it always contained the result of the specified malloc. +/// Because it is always the result of the specified malloc, there is no reason +/// to actually DO the malloc. Instead, turn the malloc into a global, and any +/// loads of GV as uses of the new global. static GlobalVariable * OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy, ConstantInt *NElements, const DataLayout &DL, @@ -838,13 +841,10 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy, // Create the new global variable. The contents of the malloc'd memory is // undefined, so initialize with an undef value. - GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(), - GlobalType, false, - GlobalValue::InternalLinkage, - UndefValue::get(GlobalType), - GV->getName()+".body", - GV, - GV->getThreadLocalMode()); + GlobalVariable *NewGV = new GlobalVariable( + *GV->getParent(), GlobalType, false, GlobalValue::InternalLinkage, + UndefValue::get(GlobalType), GV->getName() + ".body", nullptr, + GV->getThreadLocalMode()); // If there are bitcast users of the malloc (which is typical, usually we have // a malloc + bitcast) then replace them with uses of the new global. Update @@ -935,7 +935,7 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy, cast<StoreInst>(InitBool->user_back())->eraseFromParent(); delete InitBool; } else - GV->getParent()->getGlobalList().insert(GV, InitBool); + GV->getParent()->getGlobalList().insert(GV->getIterator(), InitBool); // Now the GV is dead, nuke it and the malloc.. GV->eraseFromParent(); @@ -951,10 +951,9 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy, return NewGV; } -/// ValueIsOnlyUsedLocallyOrStoredToOneGlobal - Scan the use-list of V checking -/// to make sure that there are no complex uses of V. We permit simple things -/// like dereferencing the pointer, but not storing through the address, unless -/// it is to the specified global. +/// Scan the use-list of V checking to make sure that there are no complex uses +/// of V. We permit simple things like dereferencing the pointer, but not +/// storing through the address, unless it is to the specified global. static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V, const GlobalVariable *GV, SmallPtrSetImpl<const PHINode*> &PHIs) { @@ -998,10 +997,9 @@ static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V, return true; } -/// ReplaceUsesOfMallocWithGlobal - The Alloc pointer is stored into GV -/// somewhere. Transform all uses of the allocation into loads from the -/// global and uses of the resultant pointer. Further, delete the store into -/// GV. This assumes that these value pass the +/// The Alloc pointer is stored into GV somewhere. Transform all uses of the +/// allocation into loads from the global and uses of the resultant pointer. +/// Further, delete the store into GV. This assumes that these value pass the /// 'ValueIsOnlyUsedLocallyOrStoredToOneGlobal' predicate. static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc, GlobalVariable *GV) { @@ -1043,9 +1041,9 @@ static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc, } } -/// LoadUsesSimpleEnoughForHeapSRA - Verify that all uses of V (a load, or a phi -/// of a load) are simple enough to perform heap SRA on. This permits GEP's -/// that index through the array and struct field, icmps of null, and PHIs. +/// Verify that all uses of V (a load, or a phi of a load) are simple enough to +/// perform heap SRA on. This permits GEP's that index through the array and +/// struct field, icmps of null, and PHIs. static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V, SmallPtrSetImpl<const PHINode*> &LoadUsingPHIs, SmallPtrSetImpl<const PHINode*> &LoadUsingPHIsPerLoad) { @@ -1096,8 +1094,8 @@ static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V, } -/// AllGlobalLoadUsesSimpleEnoughForHeapSRA - If all users of values loaded from -/// GV are simple enough to perform HeapSRA, return true. +/// If all users of values loaded from GV are simple enough to perform HeapSRA, +/// return true. static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV, Instruction *StoredVal) { SmallPtrSet<const PHINode*, 32> LoadUsingPHIs; @@ -1186,8 +1184,8 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo, return FieldVals[FieldNo] = Result; } -/// RewriteHeapSROALoadUser - Given a load instruction and a value derived from -/// the load, rewrite the derived value to use the HeapSRoA'd load. +/// Given a load instruction and a value derived from the load, rewrite the +/// derived value to use the HeapSRoA'd load. static void RewriteHeapSROALoadUser(Instruction *LoadUser, DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues, std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) { @@ -1248,10 +1246,9 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser, } } -/// RewriteUsesOfLoadForHeapSRoA - We are performing Heap SRoA on a global. Ptr -/// is a value loaded from the global. Eliminate all uses of Ptr, making them -/// use FieldGlobals instead. All uses of loaded values satisfy -/// AllGlobalLoadUsesSimpleEnoughForHeapSRA. +/// We are performing Heap SRoA on a global. Ptr is a value loaded from the +/// global. Eliminate all uses of Ptr, making them use FieldGlobals instead. +/// All uses of loaded values satisfy AllGlobalLoadUsesSimpleEnoughForHeapSRA. static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues, std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) { @@ -1266,8 +1263,8 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, } } -/// PerformHeapAllocSRoA - CI is an allocation of an array of structures. Break -/// it up into multiple allocations of arrays of the fields. +/// CI is an allocation of an array of structures. Break it up into multiple +/// allocations of arrays of the fields. static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, Value *NElems, const DataLayout &DL, const TargetLibraryInfo *TLI) { @@ -1291,12 +1288,10 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, Type *FieldTy = STy->getElementType(FieldNo); PointerType *PFieldTy = PointerType::get(FieldTy, AS); - GlobalVariable *NGV = - new GlobalVariable(*GV->getParent(), - PFieldTy, false, GlobalValue::InternalLinkage, - Constant::getNullValue(PFieldTy), - GV->getName() + ".f" + Twine(FieldNo), GV, - GV->getThreadLocalMode()); + GlobalVariable *NGV = new GlobalVariable( + *GV->getParent(), PFieldTy, false, GlobalValue::InternalLinkage, + Constant::getNullValue(PFieldTy), GV->getName() + ".f" + Twine(FieldNo), + nullptr, GV->getThreadLocalMode()); FieldGlobals.push_back(NGV); unsigned TypeSize = DL.getTypeAllocSize(FieldTy); @@ -1336,7 +1331,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, // Split the basic block at the old malloc. BasicBlock *OrigBB = CI->getParent(); - BasicBlock *ContBB = OrigBB->splitBasicBlock(CI, "malloc_cont"); + BasicBlock *ContBB = + OrigBB->splitBasicBlock(CI->getIterator(), "malloc_cont"); // Create the block to check the first condition. Put all these blocks at the // end of the function as they are unlikely to be executed. @@ -1376,9 +1372,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, // CI is no longer needed, remove it. CI->eraseFromParent(); - /// InsertedScalarizedLoads - As we process loads, if we can't immediately - /// update all uses of the load, keep track of what scalarized loads are - /// inserted for a given load. + /// As we process loads, if we can't immediately update all uses of the load, + /// keep track of what scalarized loads are inserted for a given load. DenseMap<Value*, std::vector<Value*> > InsertedScalarizedValues; InsertedScalarizedValues[GV] = FieldGlobals; @@ -1454,13 +1449,11 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, return cast<GlobalVariable>(FieldGlobals[0]); } -/// TryToOptimizeStoreOfMallocToGlobal - This function is called when we see a -/// pointer global variable with a single value stored it that is a malloc or -/// cast of malloc. -static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI, +/// This function is called when we see a pointer global variable with a single +/// value stored it that is a malloc or cast of malloc. +static bool tryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI, Type *AllocTy, AtomicOrdering Ordering, - Module::global_iterator &GVI, const DataLayout &DL, TargetLibraryInfo *TLI) { // If this is a malloc of an abstract type, don't touch it. @@ -1499,7 +1492,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI, // (2048 bytes currently), as we don't want to introduce a 16M global or // something. if (NElements->getZExtValue() * DL.getTypeAllocSize(AllocTy) < 2048) { - GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, DL, TLI); + OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, DL, TLI); return true; } @@ -1544,19 +1537,18 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI, CI = cast<CallInst>(Malloc); } - GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, DL, TLI, true), - DL, TLI); + PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, DL, TLI, true), DL, + TLI); return true; } return false; } -// OptimizeOnceStoredGlobal - Try to optimize globals based on the knowledge -// that only one value (besides its initializer) is ever stored to the global. -static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, +// Try to optimize globals based on the knowledge that only one value (besides +// its initializer) is ever stored to the global. +static bool optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, AtomicOrdering Ordering, - Module::global_iterator &GVI, const DataLayout &DL, TargetLibraryInfo *TLI) { // Ignore no-op GEPs and bitcasts. @@ -1577,9 +1569,8 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, return true; } else if (CallInst *CI = extractMallocCall(StoredOnceVal, TLI)) { Type *MallocType = getMallocAllocatedType(CI, TLI); - if (MallocType && - TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, Ordering, GVI, - DL, TLI)) + if (MallocType && tryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, + Ordering, DL, TLI)) return true; } } @@ -1587,10 +1578,10 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, return false; } -/// TryToShrinkGlobalToBoolean - At this point, we have learned that the only -/// two values ever stored into GV are its initializer and OtherVal. See if we -/// can shrink the global into a boolean and select between the two values -/// whenever it is used. This exposes the values to other scalar optimizations. +/// At this point, we have learned that the only two values ever stored into GV +/// are its initializer and OtherVal. See if we can shrink the global into a +/// boolean and select between the two values whenever it is used. This exposes +/// the values to other scalar optimizations. static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { Type *GVElType = GV->getType()->getElementType(); @@ -1610,7 +1601,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { if (!isa<LoadInst>(U) && !isa<StoreInst>(U)) return false; - DEBUG(dbgs() << " *** SHRINKING TO BOOL: " << *GV); + DEBUG(dbgs() << " *** SHRINKING TO BOOL: " << *GV << "\n"); // Create the new global, initializing it to false. GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()), @@ -1620,7 +1611,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { GV->getName()+".b", GV->getThreadLocalMode(), GV->getType()->getAddressSpace()); - GV->getParent()->getGlobalList().insert(GV, NewGV); + GV->getParent()->getGlobalList().insert(GV->getIterator(), NewGV); Constant *InitVal = GV->getInitializer(); assert(InitVal->getType() != Type::getInt1Ty(GV->getContext()) && @@ -1688,61 +1679,213 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { return true; } +bool GlobalOpt::deleteIfDead(GlobalValue &GV) { + GV.removeDeadConstantUsers(); -/// ProcessGlobal - Analyze the specified global variable and optimize it if -/// possible. If we make a change, return true. -bool GlobalOpt::ProcessGlobal(GlobalVariable *GV, - Module::global_iterator &GVI) { - // Do more involved optimizations if the global is internal. - GV->removeDeadConstantUsers(); + if (!GV.isDiscardableIfUnused()) + return false; - if (GV->use_empty()) { - DEBUG(dbgs() << "GLOBAL DEAD: " << *GV); - GV->eraseFromParent(); - ++NumDeleted; - return true; - } + if (const Comdat *C = GV.getComdat()) + if (!GV.hasLocalLinkage() && NotDiscardableComdats.count(C)) + return false; - if (!GV->hasLocalLinkage()) + bool Dead; + if (auto *F = dyn_cast<Function>(&GV)) + Dead = F->isDefTriviallyDead(); + else + Dead = GV.use_empty(); + if (!Dead) + return false; + + DEBUG(dbgs() << "GLOBAL DEAD: " << GV << "\n"); + GV.eraseFromParent(); + ++NumDeleted; + return true; +} + +/// Analyze the specified global variable and optimize it if possible. If we +/// make a change, return true. +bool GlobalOpt::processGlobal(GlobalValue &GV) { + // Do more involved optimizations if the global is internal. + if (!GV.hasLocalLinkage()) return false; GlobalStatus GS; - if (GlobalStatus::analyzeGlobal(GV, GS)) + if (GlobalStatus::analyzeGlobal(&GV, GS)) return false; - if (!GS.IsCompared && !GV->hasUnnamedAddr()) { - GV->setUnnamedAddr(true); + bool Changed = false; + if (!GS.IsCompared && !GV.hasUnnamedAddr()) { + GV.setUnnamedAddr(true); NumUnnamed++; + Changed = true; } - if (GV->isConstant() || !GV->hasInitializer()) + auto *GVar = dyn_cast<GlobalVariable>(&GV); + if (!GVar) + return Changed; + + if (GVar->isConstant() || !GVar->hasInitializer()) + return Changed; + + return processInternalGlobal(GVar, GS) || Changed; +} + +bool GlobalOpt::isPointerValueDeadOnEntryToFunction(const Function *F, GlobalValue *GV) { + // Find all uses of GV. We expect them all to be in F, and if we can't + // identify any of the uses we bail out. + // + // On each of these uses, identify if the memory that GV points to is + // used/required/live at the start of the function. If it is not, for example + // if the first thing the function does is store to the GV, the GV can + // possibly be demoted. + // + // We don't do an exhaustive search for memory operations - simply look + // through bitcasts as they're quite common and benign. + const DataLayout &DL = GV->getParent()->getDataLayout(); + SmallVector<LoadInst *, 4> Loads; + SmallVector<StoreInst *, 4> Stores; + for (auto *U : GV->users()) { + if (Operator::getOpcode(U) == Instruction::BitCast) { + for (auto *UU : U->users()) { + if (auto *LI = dyn_cast<LoadInst>(UU)) + Loads.push_back(LI); + else if (auto *SI = dyn_cast<StoreInst>(UU)) + Stores.push_back(SI); + else + return false; + } + continue; + } + + Instruction *I = dyn_cast<Instruction>(U); + if (!I) + return false; + assert(I->getParent()->getParent() == F); + + if (auto *LI = dyn_cast<LoadInst>(I)) + Loads.push_back(LI); + else if (auto *SI = dyn_cast<StoreInst>(I)) + Stores.push_back(SI); + else + return false; + } + + // We have identified all uses of GV into loads and stores. Now check if all + // of them are known not to depend on the value of the global at the function + // entry point. We do this by ensuring that every load is dominated by at + // least one store. + auto &DT = getAnalysis<DominatorTreeWrapperPass>(*const_cast<Function *>(F)) + .getDomTree(); + + // The below check is quadratic. Check we're not going to do too many tests. + // FIXME: Even though this will always have worst-case quadratic time, we + // could put effort into minimizing the average time by putting stores that + // have been shown to dominate at least one load at the beginning of the + // Stores array, making subsequent dominance checks more likely to succeed + // early. + // + // The threshold here is fairly large because global->local demotion is a + // very powerful optimization should it fire. + const unsigned Threshold = 100; + if (Loads.size() * Stores.size() > Threshold) return false; - return ProcessInternalGlobal(GV, GVI, GS); + for (auto *L : Loads) { + auto *LTy = L->getType(); + if (!std::any_of(Stores.begin(), Stores.end(), [&](StoreInst *S) { + auto *STy = S->getValueOperand()->getType(); + // The load is only dominated by the store if DomTree says so + // and the number of bits loaded in L is less than or equal to + // the number of bits stored in S. + return DT.dominates(S, L) && + DL.getTypeStoreSize(LTy) <= DL.getTypeStoreSize(STy); + })) + return false; + } + // All loads have known dependences inside F, so the global can be localized. + return true; +} + +/// C may have non-instruction users. Can all of those users be turned into +/// instructions? +static bool allNonInstructionUsersCanBeMadeInstructions(Constant *C) { + // We don't do this exhaustively. The most common pattern that we really need + // to care about is a constant GEP or constant bitcast - so just looking + // through one single ConstantExpr. + // + // The set of constants that this function returns true for must be able to be + // handled by makeAllConstantUsesInstructions. + for (auto *U : C->users()) { + if (isa<Instruction>(U)) + continue; + if (!isa<ConstantExpr>(U)) + // Non instruction, non-constantexpr user; cannot convert this. + return false; + for (auto *UU : U->users()) + if (!isa<Instruction>(UU)) + // A constantexpr used by another constant. We don't try and recurse any + // further but just bail out at this point. + return false; + } + + return true; +} + +/// C may have non-instruction users, and +/// allNonInstructionUsersCanBeMadeInstructions has returned true. Convert the +/// non-instruction users to instructions. +static void makeAllConstantUsesInstructions(Constant *C) { + SmallVector<ConstantExpr*,4> Users; + for (auto *U : C->users()) { + if (isa<ConstantExpr>(U)) + Users.push_back(cast<ConstantExpr>(U)); + else + // We should never get here; allNonInstructionUsersCanBeMadeInstructions + // should not have returned true for C. + assert( + isa<Instruction>(U) && + "Can't transform non-constantexpr non-instruction to instruction!"); + } + + SmallVector<Value*,4> UUsers; + for (auto *U : Users) { + UUsers.clear(); + for (auto *UU : U->users()) + UUsers.push_back(UU); + for (auto *UU : UUsers) { + Instruction *UI = cast<Instruction>(UU); + Instruction *NewU = U->getAsInstruction(); + NewU->insertBefore(UI); + UI->replaceUsesOfWith(U, NewU); + } + U->dropAllReferences(); + } } -/// ProcessInternalGlobal - Analyze the specified global variable and optimize +/// Analyze the specified global variable and optimize /// it if possible. If we make a change, return true. -bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, - Module::global_iterator &GVI, +bool GlobalOpt::processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS) { auto &DL = GV->getParent()->getDataLayout(); - // If this is a first class global and has only one accessing function - // and this function is main (which we know is not recursive), we replace - // the global with a local alloca in this function. + // If this is a first class global and has only one accessing function and + // this function is non-recursive, we replace the global with a local alloca + // in this function. // // NOTE: It doesn't make sense to promote non-single-value types since we // are just replacing static memory to stack memory. // // If the global is in different address space, don't bring it to stack. if (!GS.HasMultipleAccessingFunctions && - GS.AccessingFunction && !GS.HasNonInstructionUser && + GS.AccessingFunction && GV->getType()->getElementType()->isSingleValueType() && - GS.AccessingFunction->getName() == "main" && - GS.AccessingFunction->hasExternalLinkage() && - GV->getType()->getAddressSpace() == 0) { - DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV); + GV->getType()->getAddressSpace() == 0 && + !GV->isExternallyInitialized() && + allNonInstructionUsersCanBeMadeInstructions(GV) && + GS.AccessingFunction->doesNotRecurse() && + isPointerValueDeadOnEntryToFunction(GS.AccessingFunction, GV) ) { + DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV << "\n"); Instruction &FirstI = const_cast<Instruction&>(*GS.AccessingFunction ->getEntryBlock().begin()); Type *ElemTy = GV->getType()->getElementType(); @@ -1752,6 +1895,8 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, if (!isa<UndefValue>(GV->getInitializer())) new StoreInst(GV->getInitializer(), Alloca, &FirstI); + makeAllConstantUsesInstructions(GV); + GV->replaceAllUsesWith(Alloca); GV->eraseFromParent(); ++NumLocalized; @@ -1761,7 +1906,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, // If the global is never loaded (but may be stored to), it is dead. // Delete it now. if (!GS.IsLoaded) { - DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV); + DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV << "\n"); bool Changed; if (isLeakCheckerRoot(GV)) { @@ -1800,11 +1945,9 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, return true; } else if (!GV->getInitializer()->getType()->isSingleValueType()) { const DataLayout &DL = GV->getParent()->getDataLayout(); - if (GlobalVariable *FirstNewGV = SRAGlobal(GV, DL)) { - GVI = FirstNewGV; // Don't skip the newly produced globals! + if (SRAGlobal(GV, DL)) return true; - } - } else if (GS.StoredType == GlobalStatus::StoredOnce) { + } else if (GS.StoredType == GlobalStatus::StoredOnce && GS.StoredOnceValue) { // If the initial value for the global was an undef value, and if only // one other value was stored into it, we can just change the // initializer to be the stored value, then delete all stores to the @@ -1822,8 +1965,6 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, << "simplify all users and delete global!\n"); GV->eraseFromParent(); ++NumDeleted; - } else { - GVI = GV; } ++NumSubstitute; return true; @@ -1831,8 +1972,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, // Try to optimize globals based on the knowledge that only one value // (besides its initializer) is ever stored to the global. - if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GS.Ordering, GVI, - DL, TLI)) + if (optimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GS.Ordering, DL, TLI)) return true; // Otherwise, if the global was not a boolean, we can shrink it to be a @@ -1850,8 +1990,8 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, return false; } -/// ChangeCalleesToFastCall - Walk all of the direct calls of the specified -/// function, changing them to FastCC. +/// Walk all of the direct calls of the specified function, changing them to +/// FastCC. static void ChangeCalleesToFastCall(Function *F) { for (User *U : F->users()) { if (isa<BlockAddress>(U)) @@ -1898,38 +2038,38 @@ bool GlobalOpt::OptimizeFunctions(Module &M) { bool Changed = false; // Optimize functions. for (Module::iterator FI = M.begin(), E = M.end(); FI != E; ) { - Function *F = FI++; + Function *F = &*FI++; // Functions without names cannot be referenced outside this module. if (!F->hasName() && !F->isDeclaration() && !F->hasLocalLinkage()) F->setLinkage(GlobalValue::InternalLinkage); - const Comdat *C = F->getComdat(); - bool inComdat = C && NotDiscardableComdats.count(C); - F->removeDeadConstantUsers(); - if ((!inComdat || F->hasLocalLinkage()) && F->isDefTriviallyDead()) { - F->eraseFromParent(); + if (deleteIfDead(*F)) { Changed = true; - ++NumFnDeleted; - } else if (F->hasLocalLinkage()) { - if (isProfitableToMakeFastCC(F) && !F->isVarArg() && - !F->hasAddressTaken()) { - // If this function has a calling convention worth changing, is not a - // varargs function, and is only called directly, promote it to use the - // Fast calling convention. - F->setCallingConv(CallingConv::Fast); - ChangeCalleesToFastCall(F); - ++NumFastCallFns; - Changed = true; - } + continue; + } - if (F->getAttributes().hasAttrSomewhere(Attribute::Nest) && - !F->hasAddressTaken()) { - // The function is not used by a trampoline intrinsic, so it is safe - // to remove the 'nest' attribute. - RemoveNestAttribute(F); - ++NumNestRemoved; - Changed = true; - } + Changed |= processGlobal(*F); + + if (!F->hasLocalLinkage()) + continue; + if (isProfitableToMakeFastCC(F) && !F->isVarArg() && + !F->hasAddressTaken()) { + // If this function has a calling convention worth changing, is not a + // varargs function, and is only called directly, promote it to use the + // Fast calling convention. + F->setCallingConv(CallingConv::Fast); + ChangeCalleesToFastCall(F); + ++NumFastCallFns; + Changed = true; + } + + if (F->getAttributes().hasAttrSomewhere(Attribute::Nest) && + !F->hasAddressTaken()) { + // The function is not used by a trampoline intrinsic, so it is safe + // to remove the 'nest' attribute. + RemoveNestAttribute(F); + ++NumNestRemoved; + Changed = true; } } return Changed; @@ -1940,7 +2080,7 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) { for (Module::global_iterator GVI = M.global_begin(), E = M.global_end(); GVI != E; ) { - GlobalVariable *GV = GVI++; + GlobalVariable *GV = &*GVI++; // Global variables without names cannot be referenced outside this module. if (!GV->hasName() && !GV->isDeclaration() && !GV->hasLocalLinkage()) GV->setLinkage(GlobalValue::InternalLinkage); @@ -1953,12 +2093,12 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) { GV->setInitializer(New); } - if (GV->isDiscardableIfUnused()) { - if (const Comdat *C = GV->getComdat()) - if (NotDiscardableComdats.count(C) && !GV->hasLocalLinkage()) - continue; - Changed |= ProcessGlobal(GV, GVI); + if (deleteIfDead(*GV)) { + Changed = true; + continue; } + + Changed |= processGlobal(*GV); } return Changed; } @@ -1968,8 +2108,8 @@ isSimpleEnoughValueToCommit(Constant *C, SmallPtrSetImpl<Constant *> &SimpleConstants, const DataLayout &DL); -/// isSimpleEnoughValueToCommit - Return true if the specified constant can be -/// handled by the code generator. We don't want to generate something like: +/// Return true if the specified constant can be handled by the code generator. +/// We don't want to generate something like: /// void *X = &X/42; /// because the code generator doesn't have a relocation that can handle that. /// @@ -2044,11 +2184,11 @@ isSimpleEnoughValueToCommit(Constant *C, } -/// isSimpleEnoughPointerToCommit - Return true if this constant is simple -/// enough for us to understand. In particular, if it is a cast to anything -/// other than from one pointer type to another pointer type, we punt. -/// We basically just support direct accesses to globals and GEP's of -/// globals. This should be kept up to date with CommitValueTo. +/// Return true if this constant is simple enough for us to understand. In +/// particular, if it is a cast to anything other than from one pointer type to +/// another pointer type, we punt. We basically just support direct accesses to +/// globals and GEP's of globals. This should be kept up to date with +/// CommitValueTo. static bool isSimpleEnoughPointerToCommit(Constant *C) { // Conservatively, avoid aggregate types. This is because we don't // want to worry about them partially overlapping other stores. @@ -2095,9 +2235,9 @@ static bool isSimpleEnoughPointerToCommit(Constant *C) { return false; } -/// EvaluateStoreInto - Evaluate a piece of a constantexpr store into a global -/// initializer. This returns 'Init' modified to reflect 'Val' stored into it. -/// At this point, the GEP operands of Addr [0, OpNo) have been stepped into. +/// Evaluate a piece of a constantexpr store into a global initializer. This +/// returns 'Init' modified to reflect 'Val' stored into it. At this point, the +/// GEP operands of Addr [0, OpNo) have been stepped into. static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, ConstantExpr *Addr, unsigned OpNo) { // Base case of the recursion. @@ -2144,7 +2284,7 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, return ConstantVector::get(Elts); } -/// CommitValueTo - We have decided that Addr (which satisfies the predicate +/// We have decided that Addr (which satisfies the predicate /// isSimpleEnoughPointerToCommit) should get Val as its value. Make it happen. static void CommitValueTo(Constant *Val, Constant *Addr) { if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) { @@ -2160,10 +2300,10 @@ static void CommitValueTo(Constant *Val, Constant *Addr) { namespace { -/// Evaluator - This class evaluates LLVM IR, producing the Constant -/// representing each SSA instruction. Changes to global variables are stored -/// in a mapping that can be iterated over after the evaluation is complete. -/// Once an evaluation call fails, the evaluation object should not be reused. +/// This class evaluates LLVM IR, producing the Constant representing each SSA +/// instruction. Changes to global variables are stored in a mapping that can +/// be iterated over after the evaluation is complete. Once an evaluation call +/// fails, the evaluation object should not be reused. class Evaluator { public: Evaluator(const DataLayout &DL, const TargetLibraryInfo *TLI) @@ -2180,15 +2320,15 @@ public: Tmp->replaceAllUsesWith(Constant::getNullValue(Tmp->getType())); } - /// EvaluateFunction - Evaluate a call to function F, returning true if - /// successful, false if we can't evaluate it. ActualArgs contains the formal - /// arguments for the function. + /// Evaluate a call to function F, returning true if successful, false if we + /// can't evaluate it. ActualArgs contains the formal arguments for the + /// function. bool EvaluateFunction(Function *F, Constant *&RetVal, const SmallVectorImpl<Constant*> &ActualArgs); - /// EvaluateBlock - Evaluate all instructions in block BB, returning true if - /// successful, false if we can't evaluate it. NewBB returns the next BB that - /// control flows into, or null upon return. + /// Evaluate all instructions in block BB, returning true if successful, false + /// if we can't evaluate it. NewBB returns the next BB that control flows + /// into, or null upon return. bool EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB); Constant *getVal(Value *V) { @@ -2213,32 +2353,31 @@ public: private: Constant *ComputeLoadResult(Constant *P); - /// ValueStack - As we compute SSA register values, we store their contents - /// here. The back of the deque contains the current function and the stack - /// contains the values in the calling frames. + /// As we compute SSA register values, we store their contents here. The back + /// of the deque contains the current function and the stack contains the + /// values in the calling frames. std::deque<DenseMap<Value*, Constant*>> ValueStack; - /// CallStack - This is used to detect recursion. In pathological situations - /// we could hit exponential behavior, but at least there is nothing - /// unbounded. + /// This is used to detect recursion. In pathological situations we could hit + /// exponential behavior, but at least there is nothing unbounded. SmallVector<Function*, 4> CallStack; - /// MutatedMemory - For each store we execute, we update this map. Loads - /// check this to get the most up-to-date value. If evaluation is successful, - /// this state is committed to the process. + /// For each store we execute, we update this map. Loads check this to get + /// the most up-to-date value. If evaluation is successful, this state is + /// committed to the process. DenseMap<Constant*, Constant*> MutatedMemory; - /// AllocaTmps - To 'execute' an alloca, we create a temporary global variable - /// to represent its body. This vector is needed so we can delete the - /// temporary globals when we are done. + /// To 'execute' an alloca, we create a temporary global variable to represent + /// its body. This vector is needed so we can delete the temporary globals + /// when we are done. SmallVector<std::unique_ptr<GlobalVariable>, 32> AllocaTmps; - /// Invariants - These global variables have been marked invariant by the - /// static constructor. + /// These global variables have been marked invariant by the static + /// constructor. SmallPtrSet<GlobalVariable*, 8> Invariants; - /// SimpleConstants - These are constants we have checked and know to be - /// simple enough to live in a static initializer of a global. + /// These are constants we have checked and know to be simple enough to live + /// in a static initializer of a global. SmallPtrSet<Constant*, 8> SimpleConstants; const DataLayout &DL; @@ -2247,9 +2386,8 @@ private: } // anonymous namespace -/// ComputeLoadResult - Return the value that would be computed by a load from -/// P after the stores reflected by 'memory' have been performed. If we can't -/// decide, return null. +/// Return the value that would be computed by a load from P after the stores +/// reflected by 'memory' have been performed. If we can't decide, return null. Constant *Evaluator::ComputeLoadResult(Constant *P) { // If this memory location has been recently stored, use the stored value: it // is the most up-to-date. @@ -2275,9 +2413,9 @@ Constant *Evaluator::ComputeLoadResult(Constant *P) { return nullptr; // don't know how to evaluate. } -/// EvaluateBlock - Evaluate all instructions in block BB, returning true if -/// successful, false if we can't evaluate it. NewBB returns the next BB that -/// control flows into, or null upon return. +/// Evaluate all instructions in block BB, returning true if successful, false +/// if we can't evaluate it. NewBB returns the next BB that control flows into, +/// or null upon return. bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB) { // This is the main evaluation loop. @@ -2438,7 +2576,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, InstResult = AllocaTmps.back().get(); DEBUG(dbgs() << "Found an alloca. Result: " << *InstResult << "\n"); } else if (isa<CallInst>(CurInst) || isa<InvokeInst>(CurInst)) { - CallSite CS(CurInst); + CallSite CS(&*CurInst); // Debug info can safely be ignored here. if (isa<DbgInfoIntrinsic>(CS.getInstruction())) { @@ -2504,6 +2642,10 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, // Continue even if we do nothing. ++CurInst; continue; + } else if (II->getIntrinsicID() == Intrinsic::assume) { + DEBUG(dbgs() << "Skipping assume intrinsic.\n"); + ++CurInst; + continue; } DEBUG(dbgs() << "Unknown intrinsic. Can not evaluate.\n"); @@ -2600,7 +2742,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, if (ConstantExpr *CE = dyn_cast<ConstantExpr>(InstResult)) InstResult = ConstantFoldConstantExpression(CE, DL, TLI); - setVal(CurInst, InstResult); + setVal(&*CurInst, InstResult); } // If we just processed an invoke, we finished evaluating the block. @@ -2615,9 +2757,9 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, } } -/// EvaluateFunction - Evaluate a call to function F, returning true if -/// successful, false if we can't evaluate it. ActualArgs contains the formal -/// arguments for the function. +/// Evaluate a call to function F, returning true if successful, false if we +/// can't evaluate it. ActualArgs contains the formal arguments for the +/// function. bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal, const SmallVectorImpl<Constant*> &ActualArgs) { // Check to see if this function is already executing (recursion). If so, @@ -2631,7 +2773,7 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal, unsigned ArgNo = 0; for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E; ++AI, ++ArgNo) - setVal(AI, ActualArgs[ArgNo]); + setVal(&*AI, ActualArgs[ArgNo]); // ExecutedBlocks - We only handle non-looping, non-recursive code. As such, // we can only evaluate any one basic block at most once. This set keeps @@ -2639,7 +2781,7 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal, SmallPtrSet<BasicBlock*, 32> ExecutedBlocks; // CurBB - The current basic block we're evaluating. - BasicBlock *CurBB = F->begin(); + BasicBlock *CurBB = &F->front(); BasicBlock::iterator CurInst = CurBB->begin(); @@ -2679,8 +2821,8 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal, } } -/// EvaluateStaticConstructor - Evaluate static constructors in the function, if -/// we can. Return true if we can, false otherwise. +/// Evaluate static constructors in the function, if we can. Return true if we +/// can, false otherwise. static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL, const TargetLibraryInfo *TLI) { // Call the function. @@ -2708,7 +2850,8 @@ static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL, } static int compareNames(Constant *const *A, Constant *const *B) { - return (*A)->getName().compare((*B)->getName()); + return (*A)->stripPointerCasts()->getName().compare( + (*B)->stripPointerCasts()->getName()); } static void setUsedInitializer(GlobalVariable &V, @@ -2742,7 +2885,7 @@ static void setUsedInitializer(GlobalVariable &V, } namespace { -/// \brief An easy to access representation of llvm.used and llvm.compiler.used. +/// An easy to access representation of llvm.used and llvm.compiler.used. class LLVMUsed { SmallPtrSet<GlobalValue *, 8> Used; SmallPtrSet<GlobalValue *, 8> CompilerUsed; @@ -2861,10 +3004,17 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) { for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end(); I != E;) { - Module::alias_iterator J = I++; + GlobalAlias *J = &*I++; + // Aliases without names cannot be referenced outside this module. if (!J->hasName() && !J->isDeclaration() && !J->hasLocalLinkage()) J->setLinkage(GlobalValue::InternalLinkage); + + if (deleteIfDead(*J)) { + Changed = true; + continue; + } + // If the aliasee may change at link time, nothing can be done - bail out. if (J->mayBeOverridden()) continue; @@ -2889,15 +3039,15 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) { if (RenameTarget) { // Give the aliasee the name, linkage and other attributes of the alias. - Target->takeName(J); + Target->takeName(&*J); Target->setLinkage(J->getLinkage()); Target->setVisibility(J->getVisibility()); Target->setDLLStorageClass(J->getDLLStorageClass()); - if (Used.usedErase(J)) + if (Used.usedErase(&*J)) Used.usedInsert(Target); - if (Used.compilerUsedErase(J)) + if (Used.compilerUsedErase(&*J)) Used.compilerUsedInsert(Target); } else if (mayHaveOtherReferences(*J, Used)) continue; @@ -2936,8 +3086,8 @@ static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) { return Fn; } -/// cxxDtorIsEmpty - Returns whether the given function is an empty C++ -/// destructor and can therefore be eliminated. +/// Returns whether the given function is an empty C++ destructor and can +/// therefore be eliminated. /// Note that we assume that other optimization passes have already simplified /// the code so we only look for a function with a single basic block, where /// the only allowed instructions are 'ret', 'call' to an empty C++ dtor and @@ -3081,3 +3231,4 @@ bool GlobalOpt::runOnModule(Module &M) { return Changed; } + diff --git a/contrib/llvm/lib/Transforms/IPO/IPO.cpp b/contrib/llvm/lib/Transforms/IPO/IPO.cpp index 50f56b0..7ea6c08 100644 --- a/contrib/llvm/lib/Transforms/IPO/IPO.cpp +++ b/contrib/llvm/lib/Transforms/IPO/IPO.cpp @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// // -// This file implements the common infrastructure (including C bindings) for -// libLLVMIPO.a, which implements several transformations over the LLVM +// This file implements the common infrastructure (including C bindings) for +// libLLVMIPO.a, which implements several transformations over the LLVM // intermediate representation. // //===----------------------------------------------------------------------===// @@ -24,14 +24,17 @@ using namespace llvm; void llvm::initializeIPO(PassRegistry &Registry) { initializeArgPromotionPass(Registry); initializeConstantMergePass(Registry); + initializeCrossDSOCFIPass(Registry); initializeDAEPass(Registry); initializeDAHPass(Registry); + initializeForceFunctionAttrsLegacyPassPass(Registry); initializeFunctionAttrsPass(Registry); initializeGlobalDCEPass(Registry); initializeGlobalOptPass(Registry); initializeIPCPPass(Registry); initializeAlwaysInlinerPass(Registry); initializeSimpleInlinerPass(Registry); + initializeInferFunctionAttrsLegacyPassPass(Registry); initializeInternalizePassPass(Registry); initializeLoopExtractorPass(Registry); initializeBlockExtractorPassPass(Registry); @@ -40,13 +43,15 @@ void llvm::initializeIPO(PassRegistry &Registry) { initializeMergeFunctionsPass(Registry); initializePartialInlinerPass(Registry); initializePruneEHPass(Registry); - initializeStripDeadPrototypesPassPass(Registry); + initializeStripDeadPrototypesLegacyPassPass(Registry); initializeStripSymbolsPass(Registry); initializeStripDebugDeclarePass(Registry); initializeStripDeadDebugInfoPass(Registry); initializeStripNonDebugSymbolsPass(Registry); initializeBarrierNoopPass(Registry); initializeEliminateAvailableExternallyPass(Registry); + initializeSampleProfileLoaderPass(Registry); + initializeFunctionImportPassPass(Registry); } void LLVMInitializeIPO(LLVMPassRegistryRef R) { diff --git a/contrib/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp new file mode 100644 index 0000000..d02c861 --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp @@ -0,0 +1,937 @@ +//===- InferFunctionAttrs.cpp - Infer implicit function attributes --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO/InferFunctionAttrs.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "inferattrs" + +STATISTIC(NumReadNone, "Number of functions inferred as readnone"); +STATISTIC(NumReadOnly, "Number of functions inferred as readonly"); +STATISTIC(NumNoUnwind, "Number of functions inferred as nounwind"); +STATISTIC(NumNoCapture, "Number of arguments inferred as nocapture"); +STATISTIC(NumReadOnlyArg, "Number of arguments inferred as readonly"); +STATISTIC(NumNoAlias, "Number of function returns inferred as noalias"); + +static bool setDoesNotAccessMemory(Function &F) { + if (F.doesNotAccessMemory()) + return false; + F.setDoesNotAccessMemory(); + ++NumReadNone; + return true; +} + +static bool setOnlyReadsMemory(Function &F) { + if (F.onlyReadsMemory()) + return false; + F.setOnlyReadsMemory(); + ++NumReadOnly; + return true; +} + +static bool setDoesNotThrow(Function &F) { + if (F.doesNotThrow()) + return false; + F.setDoesNotThrow(); + ++NumNoUnwind; + return true; +} + +static bool setDoesNotCapture(Function &F, unsigned n) { + if (F.doesNotCapture(n)) + return false; + F.setDoesNotCapture(n); + ++NumNoCapture; + return true; +} + +static bool setOnlyReadsMemory(Function &F, unsigned n) { + if (F.onlyReadsMemory(n)) + return false; + F.setOnlyReadsMemory(n); + ++NumReadOnlyArg; + return true; +} + +static bool setDoesNotAlias(Function &F, unsigned n) { + if (F.doesNotAlias(n)) + return false; + F.setDoesNotAlias(n); + ++NumNoAlias; + return true; +} + +/// Analyze the name and prototype of the given function and set any applicable +/// attributes. +/// +/// Returns true if any attributes were set and false otherwise. +static bool inferPrototypeAttributes(Function &F, + const TargetLibraryInfo &TLI) { + if (F.hasFnAttribute(Attribute::OptimizeNone)) + return false; + + FunctionType *FTy = F.getFunctionType(); + LibFunc::Func TheLibFunc; + if (!(TLI.getLibFunc(F.getName(), TheLibFunc) && TLI.has(TheLibFunc))) + return false; + + bool Changed = false; + + switch (TheLibFunc) { + case LibFunc::strlen: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::strchr: + case LibFunc::strrchr: + if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isIntegerTy()) + return false; + Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotThrow(F); + return Changed; + case LibFunc::strtol: + case LibFunc::strtod: + case LibFunc::strtof: + case LibFunc::strtoul: + case LibFunc::strtoll: + case LibFunc::strtold: + case LibFunc::strtoull: + if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::strcpy: + case LibFunc::stpcpy: + case LibFunc::strcat: + case LibFunc::strncat: + case LibFunc::strncpy: + case LibFunc::stpncpy: + if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::strxfrm: + if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::strcmp: // 0,1 + case LibFunc::strspn: // 0,1 + case LibFunc::strncmp: // 0,1 + case LibFunc::strcspn: // 0,1 + case LibFunc::strcoll: // 0,1 + case LibFunc::strcasecmp: // 0,1 + case LibFunc::strncasecmp: // + if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::strstr: + case LibFunc::strpbrk: + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::strtok: + case LibFunc::strtok_r: + if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::scanf: + if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::setbuf: + case LibFunc::setvbuf: + if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::strdup: + case LibFunc::strndup: + if (FTy->getNumParams() < 1 || !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::stat: + case LibFunc::statvfs: + if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::sscanf: + if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::sprintf: + if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::snprintf: + if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 3); + Changed |= setOnlyReadsMemory(F, 3); + return Changed; + case LibFunc::setitimer: + if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + Changed |= setDoesNotCapture(F, 3); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::system: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + // May throw; "system" is a valid pthread cancellation point. + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::malloc: + if (FTy->getNumParams() != 1 || !FTy->getReturnType()->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + return Changed; + case LibFunc::memcmp: + if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::memchr: + case LibFunc::memrchr: + if (FTy->getNumParams() != 3) + return false; + Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotThrow(F); + return Changed; + case LibFunc::modf: + case LibFunc::modff: + case LibFunc::modfl: + if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::memcpy: + case LibFunc::memccpy: + case LibFunc::memmove: + if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::memalign: + if (!FTy->getReturnType()->isPointerTy()) + return false; + Changed |= setDoesNotAlias(F, 0); + return Changed; + case LibFunc::mkdir: + if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::mktime: + if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::realloc: + if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getReturnType()->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::read: + if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy()) + return false; + // May throw; "read" is a valid pthread cancellation point. + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::rewind: + if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::rmdir: + case LibFunc::remove: + case LibFunc::realpath: + if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::rename: + if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::readlink: + if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::write: + if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy()) + return false; + // May throw; "write" is a valid pthread cancellation point. + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::bcopy: + if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::bcmp: + if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::bzero: + if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::calloc: + if (FTy->getNumParams() != 2 || !FTy->getReturnType()->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + return Changed; + case LibFunc::chmod: + case LibFunc::chown: + if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::ctermid: + case LibFunc::clearerr: + case LibFunc::closedir: + if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::atoi: + case LibFunc::atol: + case LibFunc::atof: + case LibFunc::atoll: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::access: + if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::fopen: + if (FTy->getNumParams() != 2 || !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::fdopen: + if (FTy->getNumParams() != 2 || !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::feof: + case LibFunc::free: + case LibFunc::fseek: + case LibFunc::ftell: + case LibFunc::fgetc: + case LibFunc::fseeko: + case LibFunc::ftello: + case LibFunc::fileno: + case LibFunc::fflush: + case LibFunc::fclose: + case LibFunc::fsetpos: + case LibFunc::flockfile: + case LibFunc::funlockfile: + case LibFunc::ftrylockfile: + if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::ferror: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F); + return Changed; + case LibFunc::fputc: + case LibFunc::fstat: + case LibFunc::frexp: + case LibFunc::frexpf: + case LibFunc::frexpl: + case LibFunc::fstatvfs: + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::fgets: + if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 3); + return Changed; + case LibFunc::fread: + if (FTy->getNumParams() != 4 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(3)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 4); + return Changed; + case LibFunc::fwrite: + if (FTy->getNumParams() != 4 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(3)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 4); + return Changed; + case LibFunc::fputs: + if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::fscanf: + case LibFunc::fprintf: + if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::fgetpos: + if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::getc: + case LibFunc::getlogin_r: + case LibFunc::getc_unlocked: + if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::getenv: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::gets: + case LibFunc::getchar: + Changed |= setDoesNotThrow(F); + return Changed; + case LibFunc::getitimer: + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::getpwnam: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::ungetc: + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::uname: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::unlink: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::unsetenv: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::utime: + case LibFunc::utimes: + if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::putc: + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::puts: + case LibFunc::printf: + case LibFunc::perror: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::pread: + if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy()) + return false; + // May throw; "pread" is a valid pthread cancellation point. + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::pwrite: + if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy()) + return false; + // May throw; "pwrite" is a valid pthread cancellation point. + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::putchar: + Changed |= setDoesNotThrow(F); + return Changed; + case LibFunc::popen: + if (FTy->getNumParams() != 2 || !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::pclose: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::vscanf: + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::vsscanf: + if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::vfscanf: + if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::valloc: + if (!FTy->getReturnType()->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + return Changed; + case LibFunc::vprintf: + if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::vfprintf: + case LibFunc::vsprintf: + if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::vsnprintf: + if (FTy->getNumParams() != 4 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 3); + Changed |= setOnlyReadsMemory(F, 3); + return Changed; + case LibFunc::open: + if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy()) + return false; + // May throw; "open" is a valid pthread cancellation point. + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::opendir: + if (FTy->getNumParams() != 1 || !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::tmpfile: + if (!FTy->getReturnType()->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + return Changed; + case LibFunc::times: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::htonl: + case LibFunc::htons: + case LibFunc::ntohl: + case LibFunc::ntohs: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAccessMemory(F); + return Changed; + case LibFunc::lstat: + if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::lchown: + if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::qsort: + if (FTy->getNumParams() != 4 || !FTy->getParamType(3)->isPointerTy()) + return false; + // May throw; places call through function pointer. + Changed |= setDoesNotCapture(F, 4); + return Changed; + case LibFunc::dunder_strdup: + case LibFunc::dunder_strndup: + if (FTy->getNumParams() < 1 || !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::dunder_strtok_r: + if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::under_IO_getc: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::under_IO_putc: + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::dunder_isoc99_scanf: + if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::stat64: + case LibFunc::lstat64: + case LibFunc::statvfs64: + if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::dunder_isoc99_sscanf: + if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::fopen64: + if (FTy->getNumParams() != 2 || !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::fseeko64: + case LibFunc::ftello64: + if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::tmpfile64: + if (!FTy->getReturnType()->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + return Changed; + case LibFunc::fstat64: + case LibFunc::fstatvfs64: + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::open64: + if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy()) + return false; + // May throw; "open" is a valid pthread cancellation point. + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::gettimeofday: + if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + // Currently some platforms have the restrict keyword on the arguments to + // gettimeofday. To be conservative, do not add noalias to gettimeofday's + // arguments. + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + return Changed; + + default: + // FIXME: It'd be really nice to cover all the library functions we're + // aware of here. + return false; + } +} + +static bool inferAllPrototypeAttributes(Module &M, + const TargetLibraryInfo &TLI) { + bool Changed = false; + + for (Function &F : M.functions()) + // We only infer things using the prototype if the definition isn't around + // to analyze directly. + if (F.isDeclaration()) + Changed |= inferPrototypeAttributes(F, TLI); + + return Changed; +} + +PreservedAnalyses InferFunctionAttrsPass::run(Module &M, + AnalysisManager<Module> *AM) { + auto &TLI = AM->getResult<TargetLibraryAnalysis>(M); + + if (!inferAllPrototypeAttributes(M, TLI)) + // If we didn't infer anything, preserve all analyses. + return PreservedAnalyses::all(); + + // Otherwise, we may have changed fundamental function attributes, so clear + // out all the passes. + return PreservedAnalyses::none(); +} + +namespace { +struct InferFunctionAttrsLegacyPass : public ModulePass { + static char ID; // Pass identification, replacement for typeid + InferFunctionAttrsLegacyPass() : ModulePass(ID) { + initializeInferFunctionAttrsLegacyPassPass( + *PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<TargetLibraryInfoWrapperPass>(); + } + + bool runOnModule(Module &M) override { + auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); + return inferAllPrototypeAttributes(M, TLI); + } +}; +} + +char InferFunctionAttrsLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(InferFunctionAttrsLegacyPass, "inferattrs", + "Infer set function attributes", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END(InferFunctionAttrsLegacyPass, "inferattrs", + "Infer set function attributes", false, false) + +Pass *llvm::createInferFunctionAttrsLegacyPass() { + return new InferFunctionAttrsLegacyPass(); +} diff --git a/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp b/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp index dc56a02..1704bfe 100644 --- a/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp +++ b/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp @@ -14,10 +14,10 @@ #include "llvm/Transforms/IPO.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" @@ -35,17 +35,15 @@ namespace { /// \brief Inliner pass which only handles "always inline" functions. class AlwaysInliner : public Inliner { - InlineCostAnalysis *ICA; public: // Use extremely low threshold. - AlwaysInliner() : Inliner(ID, -2000000000, /*InsertLifetime*/ true), - ICA(nullptr) { + AlwaysInliner() : Inliner(ID, -2000000000, /*InsertLifetime*/ true) { initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry()); } AlwaysInliner(bool InsertLifetime) - : Inliner(ID, -2000000000, InsertLifetime), ICA(nullptr) { + : Inliner(ID, -2000000000, InsertLifetime) { initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry()); } @@ -53,9 +51,6 @@ public: InlineCost getInlineCost(CallSite CS) override; - void getAnalysisUsage(AnalysisUsage &AU) const override; - bool runOnSCC(CallGraphSCC &SCC) override; - using llvm::Pass::doFinalization; bool doFinalization(CallGraph &CG) override { return removeDeadFunctions(CG, /*AlwaysInlineOnly=*/ true); @@ -67,10 +62,9 @@ public: char AlwaysInliner::ID = 0; INITIALIZE_PASS_BEGIN(AlwaysInliner, "always-inline", "Inliner for always_inline functions", false, false) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) -INITIALIZE_PASS_DEPENDENCY(InlineCostAnalysis) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(AlwaysInliner, "always-inline", "Inliner for always_inline functions", false, false) @@ -99,19 +93,8 @@ InlineCost AlwaysInliner::getInlineCost(CallSite CS) { // that are viable for inlining. FIXME: We shouldn't even get here for // declarations. if (Callee && !Callee->isDeclaration() && - CS.hasFnAttr(Attribute::AlwaysInline) && - ICA->isInlineViable(*Callee)) + CS.hasFnAttr(Attribute::AlwaysInline) && isInlineViable(*Callee)) return InlineCost::getAlways(); return InlineCost::getNever(); } - -bool AlwaysInliner::runOnSCC(CallGraphSCC &SCC) { - ICA = &getAnalysis<InlineCostAnalysis>(); - return Inliner::runOnSCC(SCC); -} - -void AlwaysInliner::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<InlineCostAnalysis>(); - Inliner::getAnalysisUsage(AU); -} diff --git a/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp b/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp index 9b01d81..45609f8 100644 --- a/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp +++ b/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp @@ -11,11 +11,11 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/IPO.h" -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" @@ -23,6 +23,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" +#include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/InlinerPass.h" using namespace llvm; @@ -37,26 +38,30 @@ namespace { /// inliner pass and the always inliner pass. The two passes use different cost /// analyses to determine when to inline. class SimpleInliner : public Inliner { - InlineCostAnalysis *ICA; public: - SimpleInliner() : Inliner(ID), ICA(nullptr) { + SimpleInliner() : Inliner(ID) { initializeSimpleInlinerPass(*PassRegistry::getPassRegistry()); } SimpleInliner(int Threshold) - : Inliner(ID, Threshold, /*InsertLifetime*/ true), ICA(nullptr) { + : Inliner(ID, Threshold, /*InsertLifetime*/ true) { initializeSimpleInlinerPass(*PassRegistry::getPassRegistry()); } static char ID; // Pass identification, replacement for typeid InlineCost getInlineCost(CallSite CS) override { - return ICA->getInlineCost(CS, getInlineThreshold(CS)); + Function *Callee = CS.getCalledFunction(); + TargetTransformInfo &TTI = TTIWP->getTTI(*Callee); + return llvm::getInlineCost(CS, getInlineThreshold(CS), TTI, ACT); } bool runOnSCC(CallGraphSCC &SCC) override; void getAnalysisUsage(AnalysisUsage &AU) const override; + +private: + TargetTransformInfoWrapperPass *TTIWP; }; static int computeThresholdFromOptLevels(unsigned OptLevel, @@ -75,10 +80,10 @@ static int computeThresholdFromOptLevels(unsigned OptLevel, char SimpleInliner::ID = 0; INITIALIZE_PASS_BEGIN(SimpleInliner, "inline", "Function Integration/Inlining", false, false) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) -INITIALIZE_PASS_DEPENDENCY(InlineCostAnalysis) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(SimpleInliner, "inline", "Function Integration/Inlining", false, false) @@ -95,11 +100,11 @@ Pass *llvm::createFunctionInliningPass(unsigned OptLevel, } bool SimpleInliner::runOnSCC(CallGraphSCC &SCC) { - ICA = &getAnalysis<InlineCostAnalysis>(); + TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>(); return Inliner::runOnSCC(SCC); } void SimpleInliner::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<InlineCostAnalysis>(); + AU.addRequired<TargetTransformInfoWrapperPass>(); Inliner::getAnalysisUsage(AU); } diff --git a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp index 5273c3d..bbe5f876 100644 --- a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp +++ b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -64,20 +65,22 @@ ColdThreshold("inlinecold-threshold", cl::Hidden, cl::init(225), // Threshold to use when optsize is specified (and there is no -inline-limit). const int OptSizeThreshold = 75; -Inliner::Inliner(char &ID) - : CallGraphSCCPass(ID), InlineThreshold(InlineLimit), InsertLifetime(true) {} +Inliner::Inliner(char &ID) + : CallGraphSCCPass(ID), InlineThreshold(InlineLimit), InsertLifetime(true) { +} Inliner::Inliner(char &ID, int Threshold, bool InsertLifetime) - : CallGraphSCCPass(ID), InlineThreshold(InlineLimit.getNumOccurrences() > 0 ? - InlineLimit : Threshold), - InsertLifetime(InsertLifetime) {} + : CallGraphSCCPass(ID), + InlineThreshold(InlineLimit.getNumOccurrences() > 0 ? InlineLimit + : Threshold), + InsertLifetime(InsertLifetime) {} /// For this class, we declare that we require and preserve the call graph. /// If the derived class implements this method, it should /// always explicitly call the implementation here. void Inliner::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<AliasAnalysis>(); AU.addRequired<AssumptionCacheTracker>(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); CallGraphSCCPass::getAnalysisUsage(AU); } @@ -85,39 +88,6 @@ void Inliner::getAnalysisUsage(AnalysisUsage &AU) const { typedef DenseMap<ArrayType*, std::vector<AllocaInst*> > InlinedArrayAllocasTy; -/// \brief If the inlined function had a higher stack protection level than the -/// calling function, then bump up the caller's stack protection level. -static void AdjustCallerSSPLevel(Function *Caller, Function *Callee) { - // If upgrading the SSP attribute, clear out the old SSP Attributes first. - // Having multiple SSP attributes doesn't actually hurt, but it adds useless - // clutter to the IR. - AttrBuilder B; - B.addAttribute(Attribute::StackProtect) - .addAttribute(Attribute::StackProtectStrong) - .addAttribute(Attribute::StackProtectReq); - AttributeSet OldSSPAttr = AttributeSet::get(Caller->getContext(), - AttributeSet::FunctionIndex, - B); - - if (Callee->hasFnAttribute(Attribute::SafeStack)) { - Caller->removeAttributes(AttributeSet::FunctionIndex, OldSSPAttr); - Caller->addFnAttr(Attribute::SafeStack); - } else if (Callee->hasFnAttribute(Attribute::StackProtectReq) && - !Caller->hasFnAttribute(Attribute::SafeStack)) { - Caller->removeAttributes(AttributeSet::FunctionIndex, OldSSPAttr); - Caller->addFnAttr(Attribute::StackProtectReq); - } else if (Callee->hasFnAttribute(Attribute::StackProtectStrong) && - !Caller->hasFnAttribute(Attribute::SafeStack) && - !Caller->hasFnAttribute(Attribute::StackProtectReq)) { - Caller->removeAttributes(AttributeSet::FunctionIndex, OldSSPAttr); - Caller->addFnAttr(Attribute::StackProtectStrong); - } else if (Callee->hasFnAttribute(Attribute::StackProtect) && - !Caller->hasFnAttribute(Attribute::SafeStack) && - !Caller->hasFnAttribute(Attribute::StackProtectReq) && - !Caller->hasFnAttribute(Attribute::StackProtectStrong)) - Caller->addFnAttr(Attribute::StackProtect); -} - /// If it is possible to inline the specified call site, /// do so and update the CallGraph for this operation. /// @@ -126,18 +96,26 @@ static void AdjustCallerSSPLevel(Function *Caller, Function *Callee) { /// available from other functions inlined into the caller. If we are able to /// inline this call site we attempt to reuse already available allocas or add /// any new allocas to the set if not possible. -static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, +static bool InlineCallIfPossible(Pass &P, CallSite CS, InlineFunctionInfo &IFI, InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory, bool InsertLifetime) { Function *Callee = CS.getCalledFunction(); Function *Caller = CS.getCaller(); + // We need to manually construct BasicAA directly in order to disable + // its use of other function analyses. + BasicAAResult BAR(createLegacyPMBasicAAResult(P, *Callee)); + + // Construct our own AA results for this function. We do this manually to + // work around the limitations of the legacy pass manager. + AAResults AAR(createLegacyPMAAResults(P, *Callee, BAR)); + // Try to inline the function. Get the list of static allocas that were // inlined. - if (!InlineFunction(CS, IFI, InsertLifetime)) + if (!InlineFunction(CS, IFI, &AAR, InsertLifetime)) return false; - AdjustCallerSSPLevel(Caller, Callee); + AttributeFuncs::mergeAttributesForInlining(*Caller, *Callee); // Look at all of the allocas that we inlined through this call site. If we // have already inlined other allocas through other calls into this function, @@ -219,6 +197,14 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, DEBUG(dbgs() << " ***MERGED ALLOCA: " << *AI << "\n\t\tINTO: " << *AvailableAlloca << '\n'); + // Move affected dbg.declare calls immediately after the new alloca to + // avoid the situation when a dbg.declare preceeds its alloca. + if (auto *L = LocalAsMetadata::getIfExists(AI)) + if (auto *MDV = MetadataAsValue::getIfExists(AI->getContext(), L)) + for (User *U : MDV->users()) + if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U)) + DDI->moveBefore(AvailableAlloca->getNextNode()); + AI->replaceAllUsesWith(AvailableAlloca); if (Align1 != Align2) { @@ -258,39 +244,64 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, } unsigned Inliner::getInlineThreshold(CallSite CS) const { - int thres = InlineThreshold; // -inline-threshold or else selected by - // overall opt level + int Threshold = InlineThreshold; // -inline-threshold or else selected by + // overall opt level // If -inline-threshold is not given, listen to the optsize attribute when it // would decrease the threshold. Function *Caller = CS.getCaller(); bool OptSize = Caller && !Caller->isDeclaration() && + // FIXME: Use Function::optForSize(). Caller->hasFnAttribute(Attribute::OptimizeForSize); if (!(InlineLimit.getNumOccurrences() > 0) && OptSize && - OptSizeThreshold < thres) - thres = OptSizeThreshold; + OptSizeThreshold < Threshold) + Threshold = OptSizeThreshold; - // Listen to the inlinehint attribute when it would increase the threshold - // and the caller does not need to minimize its size. Function *Callee = CS.getCalledFunction(); - bool InlineHint = Callee && !Callee->isDeclaration() && - Callee->hasFnAttribute(Attribute::InlineHint); - if (InlineHint && HintThreshold > thres && - !Caller->hasFnAttribute(Attribute::MinSize)) - thres = HintThreshold; + if (!Callee || Callee->isDeclaration()) + return Threshold; + + // If profile information is available, use that to adjust threshold of hot + // and cold functions. + // FIXME: The heuristic used below for determining hotness and coldness are + // based on preliminary SPEC tuning and may not be optimal. Replace this with + // a well-tuned heuristic based on *callsite* hotness and not callee hotness. + uint64_t FunctionCount = 0, MaxFunctionCount = 0; + bool HasPGOCounts = false; + if (Callee->getEntryCount() && + Callee->getParent()->getMaximumFunctionCount()) { + HasPGOCounts = true; + FunctionCount = Callee->getEntryCount().getValue(); + MaxFunctionCount = + Callee->getParent()->getMaximumFunctionCount().getValue(); + } - // Listen to the cold attribute when it would decrease the threshold. - bool ColdCallee = Callee && !Callee->isDeclaration() && - Callee->hasFnAttribute(Attribute::Cold); + // Listen to the inlinehint attribute or profile based hotness information + // when it would increase the threshold and the caller does not need to + // minimize its size. + bool InlineHint = + Callee->hasFnAttribute(Attribute::InlineHint) || + (HasPGOCounts && + FunctionCount >= (uint64_t)(0.3 * (double)MaxFunctionCount)); + if (InlineHint && HintThreshold > Threshold && + !Caller->hasFnAttribute(Attribute::MinSize)) + Threshold = HintThreshold; + + // Listen to the cold attribute or profile based coldness information + // when it would decrease the threshold. + bool ColdCallee = + Callee->hasFnAttribute(Attribute::Cold) || + (HasPGOCounts && + FunctionCount <= (uint64_t)(0.01 * (double)MaxFunctionCount)); // Command line argument for InlineLimit will override the default // ColdThreshold. If we have -inline-threshold but no -inlinecold-threshold, // do not use the default cold threshold even if it is smaller. if ((InlineLimit.getNumOccurrences() == 0 || ColdThreshold.getNumOccurrences() > 0) && ColdCallee && - ColdThreshold < thres) - thres = ColdThreshold; + ColdThreshold < Threshold) + Threshold = ColdThreshold; - return thres; + return Threshold; } static void emitAnalysis(CallSite CS, const Twine &Msg) { @@ -430,10 +441,8 @@ static bool InlineHistoryIncludes(Function *F, int InlineHistoryID, bool Inliner::runOnSCC(CallGraphSCC &SCC) { CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); - AssumptionCacheTracker *ACT = &getAnalysis<AssumptionCacheTracker>(); - auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); - const TargetLibraryInfo *TLI = TLIP ? &TLIP->getTLI() : nullptr; - AliasAnalysis *AA = &getAnalysis<AliasAnalysis>(); + ACT = &getAnalysis<AssumptionCacheTracker>(); + auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); SmallPtrSet<Function*, 8> SCCFunctions; DEBUG(dbgs() << "Inliner visiting SCC:"); @@ -469,8 +478,9 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { // If this is a direct call to an external function, we can never inline // it. If it is an indirect call, inlining may resolve it to be a // direct call, so we keep it. - if (CS.getCalledFunction() && CS.getCalledFunction()->isDeclaration()) - continue; + if (Function *Callee = CS.getCalledFunction()) + if (Callee->isDeclaration()) + continue; CallSites.push_back(std::make_pair(CS, -1)); } @@ -492,7 +502,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { InlinedArrayAllocasTy InlinedArrayAllocas; - InlineFunctionInfo InlineInfo(&CG, AA, ACT); + InlineFunctionInfo InlineInfo(&CG, ACT); // Now that we have all of the call sites, loop over them and inline them if // it looks profitable to do so. @@ -513,7 +523,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { // just delete the call instead of trying to inline it, regardless of // size. This happens because IPSCCP propagates the result out of the // call and then we're left with the dead call. - if (isInstructionTriviallyDead(CS.getInstruction(), TLI)) { + if (isInstructionTriviallyDead(CS.getInstruction(), &TLI)) { DEBUG(dbgs() << " -> Deleting dead call: " << *CS.getInstruction() << "\n"); // Update the call graph by deleting the edge from Callee to Caller. @@ -550,7 +560,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { } // Attempt to inline the function. - if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas, + if (!InlineCallIfPossible(*this, CS, InlineInfo, InlinedArrayAllocas, InlineHistoryID, InsertLifetime)) { emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc, Twine(Callee->getName() + @@ -647,8 +657,8 @@ bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) { // Scan for all of the functions, looking for ones that should now be removed // from the program. Insert the dead ones in the FunctionsToRemove set. - for (auto I : CG) { - CallGraphNode *CGN = I.second; + for (const auto &I : CG) { + CallGraphNode *CGN = I.second.get(); Function *F = CGN->getFunction(); if (!F || F->isDeclaration()) continue; diff --git a/contrib/llvm/lib/Transforms/IPO/Internalize.cpp b/contrib/llvm/lib/Transforms/IPO/Internalize.cpp index 7950163..21bb5d0 100644 --- a/contrib/llvm/lib/Transforms/IPO/Internalize.cpp +++ b/contrib/llvm/lib/Transforms/IPO/Internalize.cpp @@ -60,6 +60,10 @@ namespace { explicit InternalizePass(); explicit InternalizePass(ArrayRef<const char *> ExportList); void LoadFile(const char *Filename); + bool maybeInternalize(GlobalValue &GV, + const std::set<const Comdat *> &ExternalComdats); + void checkComdatVisibility(GlobalValue &GV, + std::set<const Comdat *> &ExternalComdats); bool runOnModule(Module &M) override; void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -105,40 +109,85 @@ void InternalizePass::LoadFile(const char *Filename) { } } -static bool shouldInternalize(const GlobalValue &GV, - const std::set<std::string> &ExternalNames) { +static bool isExternallyVisible(const GlobalValue &GV, + const std::set<std::string> &ExternalNames) { // Function must be defined here if (GV.isDeclaration()) - return false; + return true; // Available externally is really just a "declaration with a body". if (GV.hasAvailableExternallyLinkage()) - return false; + return true; // Assume that dllexported symbols are referenced elsewhere if (GV.hasDLLExportStorageClass()) - return false; - - // Already has internal linkage - if (GV.hasLocalLinkage()) - return false; + return true; // Marked to keep external? - if (ExternalNames.count(GV.getName())) - return false; + if (!GV.hasLocalLinkage() && ExternalNames.count(GV.getName())) + return true; + + return false; +} +// Internalize GV if it is possible to do so, i.e. it is not externally visible +// and is not a member of an externally visible comdat. +bool InternalizePass::maybeInternalize( + GlobalValue &GV, const std::set<const Comdat *> &ExternalComdats) { + if (Comdat *C = GV.getComdat()) { + if (ExternalComdats.count(C)) + return false; + + // If a comdat is not externally visible we can drop it. + if (auto GO = dyn_cast<GlobalObject>(&GV)) + GO->setComdat(nullptr); + + if (GV.hasLocalLinkage()) + return false; + } else { + if (GV.hasLocalLinkage()) + return false; + + if (isExternallyVisible(GV, ExternalNames)) + return false; + } + + GV.setVisibility(GlobalValue::DefaultVisibility); + GV.setLinkage(GlobalValue::InternalLinkage); return true; } +// If GV is part of a comdat and is externally visible, keep track of its +// comdat so that we don't internalize any of its members. +void InternalizePass::checkComdatVisibility( + GlobalValue &GV, std::set<const Comdat *> &ExternalComdats) { + Comdat *C = GV.getComdat(); + if (!C) + return; + + if (isExternallyVisible(GV, ExternalNames)) + ExternalComdats.insert(C); +} + bool InternalizePass::runOnModule(Module &M) { CallGraphWrapperPass *CGPass = getAnalysisIfAvailable<CallGraphWrapperPass>(); CallGraph *CG = CGPass ? &CGPass->getCallGraph() : nullptr; CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : nullptr; - bool Changed = false; SmallPtrSet<GlobalValue *, 8> Used; collectUsedGlobalVariables(M, Used, false); + // Collect comdat visiblity information for the module. + std::set<const Comdat *> ExternalComdats; + if (!M.getComdatSymbolTable().empty()) { + for (Function &F : M) + checkComdatVisibility(F, ExternalComdats); + for (GlobalVariable &GV : M.globals()) + checkComdatVisibility(GV, ExternalComdats); + for (GlobalAlias &GA : M.aliases()) + checkComdatVisibility(GA, ExternalComdats); + } + // We must assume that globals in llvm.used have a reference that not even // the linker can see, so we don't internalize them. // For llvm.compiler.used the situation is a bit fuzzy. The assembler and @@ -153,20 +202,16 @@ bool InternalizePass::runOnModule(Module &M) { } // Mark all functions not in the api as internal. - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { - if (!shouldInternalize(*I, ExternalNames)) + for (Function &I : M) { + if (!maybeInternalize(I, ExternalComdats)) continue; - I->setVisibility(GlobalValue::DefaultVisibility); - I->setLinkage(GlobalValue::InternalLinkage); - if (ExternalNode) // Remove a callgraph edge from the external node to this function. - ExternalNode->removeOneAbstractEdgeTo((*CG)[I]); + ExternalNode->removeOneAbstractEdgeTo((*CG)[&I]); - Changed = true; ++NumFunctions; - DEBUG(dbgs() << "Internalizing func " << I->getName() << "\n"); + DEBUG(dbgs() << "Internalizing func " << I.getName() << "\n"); } // Never internalize the llvm.used symbol. It is used to implement @@ -191,12 +236,9 @@ bool InternalizePass::runOnModule(Module &M) { // internal as well. for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) { - if (!shouldInternalize(*I, ExternalNames)) + if (!maybeInternalize(*I, ExternalComdats)) continue; - I->setVisibility(GlobalValue::DefaultVisibility); - I->setLinkage(GlobalValue::InternalLinkage); - Changed = true; ++NumGlobals; DEBUG(dbgs() << "Internalized gvar " << I->getName() << "\n"); } @@ -204,17 +246,20 @@ bool InternalizePass::runOnModule(Module &M) { // Mark all aliases that are not in the api as internal as well. for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end(); I != E; ++I) { - if (!shouldInternalize(*I, ExternalNames)) + if (!maybeInternalize(*I, ExternalComdats)) continue; - I->setVisibility(GlobalValue::DefaultVisibility); - I->setLinkage(GlobalValue::InternalLinkage); - Changed = true; ++NumAliases; DEBUG(dbgs() << "Internalized alias " << I->getName() << "\n"); } - return Changed; + // We do not keep track of whether this pass changed the module because + // it adds unnecessary complexity: + // 1) This pass will generally be near the start of the pass pipeline, so + // there will be no analyses to invalidate. + // 2) This pass will most likely end up changing the module and it isn't worth + // worrying about optimizing the case where the module is unchanged. + return true; } ModulePass *llvm::createInternalizePass() { return new InternalizePass(); } diff --git a/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp index 41334ca..8e4ad64 100644 --- a/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp +++ b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp @@ -43,12 +43,13 @@ namespace { initializeLoopExtractorPass(*PassRegistry::getPassRegistry()); } - bool runOnLoop(Loop *L, LPPassManager &LPM) override; + bool runOnLoop(Loop *L, LPPassManager &) override; void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequiredID(BreakCriticalEdgesID); AU.addRequiredID(LoopSimplifyID); AU.addRequired<DominatorTreeWrapperPass>(); + AU.addRequired<LoopInfoWrapperPass>(); } }; } @@ -79,7 +80,7 @@ INITIALIZE_PASS(SingleLoopExtractor, "loop-extract-single", // Pass *llvm::createLoopExtractorPass() { return new LoopExtractor(); } -bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) { +bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &) { if (skipOptnoneFunction(L)) return false; @@ -92,6 +93,7 @@ bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) { return false; DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); bool Changed = false; // If there is more than one top-level loop in this function, extract all of @@ -120,14 +122,14 @@ bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) { } if (ShouldExtractLoop) { - // We must omit landing pads. Landing pads must accompany the invoke + // We must omit EH pads. EH pads must accompany the invoke // instruction. But this would result in a loop in the extracted // function. An infinite cycle occurs when it tries to extract that loop as // well. SmallVector<BasicBlock*, 8> ExitBlocks; L->getExitBlocks(ExitBlocks); for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) - if (ExitBlocks[i]->isLandingPad()) { + if (ExitBlocks[i]->isEHPad()) { ShouldExtractLoop = false; break; } @@ -141,7 +143,7 @@ bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) { Changed = true; // After extraction, the loop is replaced by a function call, so // we shouldn't try to run any more loop passes on it. - LPM.deleteLoopFromQueue(L); + LI.updateUnloop(L); } ++NumExtracted; } @@ -259,7 +261,7 @@ bool BlockExtractorPass::runOnModule(Module &M) { // Figure out which index the basic block is in its function. Function::iterator BBI = MF->begin(); std::advance(BBI, std::distance(F->begin(), Function::iterator(BB))); - TranslatedBlocksToNotExtract.insert(BBI); + TranslatedBlocksToNotExtract.insert(&*BBI); } while (!BlocksToNotExtractByName.empty()) { @@ -278,7 +280,7 @@ bool BlockExtractorPass::runOnModule(Module &M) { BasicBlock &BB = *BI; if (BB.getName() != BlockName) continue; - TranslatedBlocksToNotExtract.insert(BI); + TranslatedBlocksToNotExtract.insert(&*BI); } } @@ -291,8 +293,8 @@ bool BlockExtractorPass::runOnModule(Module &M) { for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { SplitLandingPadPreds(&*F); for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) - if (!TranslatedBlocksToNotExtract.count(BB)) - BlocksToExtract.push_back(BB); + if (!TranslatedBlocksToNotExtract.count(&*BB)) + BlocksToExtract.push_back(&*BB); } for (unsigned i = 0, e = BlocksToExtract.size(); i != e; ++i) { diff --git a/contrib/llvm/lib/Transforms/IPO/LowerBitSets.cpp b/contrib/llvm/lib/Transforms/IPO/LowerBitSets.cpp index c6795c6..7b51574 100644 --- a/contrib/llvm/lib/Transforms/IPO/LowerBitSets.cpp +++ b/contrib/llvm/lib/Transforms/IPO/LowerBitSets.cpp @@ -19,6 +19,8 @@ #include "llvm/ADT/Triple.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalObject.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" @@ -26,6 +28,8 @@ #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" using namespace llvm; @@ -59,9 +63,9 @@ bool BitSetInfo::containsGlobalOffset(uint64_t Offset) const { bool BitSetInfo::containsValue( const DataLayout &DL, - const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout, Value *V, + const DenseMap<GlobalObject *, uint64_t> &GlobalLayout, Value *V, uint64_t COffset) const { - if (auto GV = dyn_cast<GlobalVariable>(V)) { + if (auto GV = dyn_cast<GlobalObject>(V)) { auto I = GlobalLayout.find(GV); if (I == GlobalLayout.end()) return false; @@ -90,6 +94,21 @@ bool BitSetInfo::containsValue( return false; } +void BitSetInfo::print(raw_ostream &OS) const { + OS << "offset " << ByteOffset << " size " << BitSize << " align " + << (1 << AlignLog2); + + if (isAllOnes()) { + OS << " all-ones\n"; + return; + } + + OS << " { "; + for (uint64_t B : Bits) + OS << B << ' '; + OS << "}\n"; +} + BitSetInfo BitSetBuilder::build() { if (Min > Max) Min = 0; @@ -193,34 +212,48 @@ struct LowerBitSets : public ModulePass { Module *M; bool LinkerSubsectionsViaSymbols; + Triple::ArchType Arch; + Triple::ObjectFormatType ObjectFormat; IntegerType *Int1Ty; IntegerType *Int8Ty; IntegerType *Int32Ty; Type *Int32PtrTy; IntegerType *Int64Ty; - Type *IntPtrTy; + IntegerType *IntPtrTy; // The llvm.bitsets named metadata. NamedMDNode *BitSetNM; - // Mapping from bitset mdstrings to the call sites that test them. - DenseMap<MDString *, std::vector<CallInst *>> BitSetTestCallSites; + // Mapping from bitset identifiers to the call sites that test them. + DenseMap<Metadata *, std::vector<CallInst *>> BitSetTestCallSites; std::vector<ByteArrayInfo> ByteArrayInfos; BitSetInfo - buildBitSet(MDString *BitSet, - const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout); + buildBitSet(Metadata *BitSet, + const DenseMap<GlobalObject *, uint64_t> &GlobalLayout); ByteArrayInfo *createByteArray(BitSetInfo &BSI); void allocateByteArrays(); Value *createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI, ByteArrayInfo *&BAI, Value *BitOffset); + void lowerBitSetCalls(ArrayRef<Metadata *> BitSets, + Constant *CombinedGlobalAddr, + const DenseMap<GlobalObject *, uint64_t> &GlobalLayout); Value * lowerBitSetCall(CallInst *CI, BitSetInfo &BSI, ByteArrayInfo *&BAI, - GlobalVariable *CombinedGlobal, - const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout); - void buildBitSetsFromGlobals(const std::vector<MDString *> &BitSets, - const std::vector<GlobalVariable *> &Globals); + Constant *CombinedGlobal, + const DenseMap<GlobalObject *, uint64_t> &GlobalLayout); + void buildBitSetsFromGlobalVariables(ArrayRef<Metadata *> BitSets, + ArrayRef<GlobalVariable *> Globals); + unsigned getJumpTableEntrySize(); + Type *getJumpTableEntryType(); + Constant *createJumpTableEntry(GlobalObject *Src, Function *Dest, + unsigned Distance); + void verifyBitSetMDNode(MDNode *Op); + void buildBitSetsFromFunctions(ArrayRef<Metadata *> BitSets, + ArrayRef<Function *> Functions); + void buildBitSetsFromDisjointSet(ArrayRef<Metadata *> BitSets, + ArrayRef<GlobalObject *> Globals); bool buildBitSets(); bool eraseBitSetMetadata(); @@ -228,7 +261,7 @@ struct LowerBitSets : public ModulePass { bool runOnModule(Module &M) override; }; -} // namespace +} // anonymous namespace INITIALIZE_PASS_BEGIN(LowerBitSets, "lowerbitsets", "Lower bitset metadata", false, false) @@ -244,6 +277,8 @@ bool LowerBitSets::doInitialization(Module &Mod) { Triple TargetTriple(M->getTargetTriple()); LinkerSubsectionsViaSymbols = TargetTriple.isMacOSX(); + Arch = TargetTriple.getArch(); + ObjectFormat = TargetTriple.getObjectFormat(); Int1Ty = Type::getInt1Ty(M->getContext()); Int8Ty = Type::getInt8Ty(M->getContext()); @@ -262,8 +297,8 @@ bool LowerBitSets::doInitialization(Module &Mod) { /// Build a bit set for BitSet using the object layouts in /// GlobalLayout. BitSetInfo LowerBitSets::buildBitSet( - MDString *BitSet, - const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout) { + Metadata *BitSet, + const DenseMap<GlobalObject *, uint64_t> &GlobalLayout) { BitSetBuilder BSB; // Compute the byte offset of each element of this bitset. @@ -271,8 +306,11 @@ BitSetInfo LowerBitSets::buildBitSet( for (MDNode *Op : BitSetNM->operands()) { if (Op->getOperand(0) != BitSet || !Op->getOperand(1)) continue; - auto OpGlobal = dyn_cast<GlobalVariable>( - cast<ConstantAsMetadata>(Op->getOperand(1))->getValue()); + Constant *OpConst = + cast<ConstantAsMetadata>(Op->getOperand(1))->getValue(); + if (auto GA = dyn_cast<GlobalAlias>(OpConst)) + OpConst = GA->getAliasee(); + auto OpGlobal = dyn_cast<GlobalObject>(OpConst); if (!OpGlobal) continue; uint64_t Offset = @@ -360,9 +398,8 @@ void LowerBitSets::allocateByteArrays() { if (LinkerSubsectionsViaSymbols) { BAI->ByteArray->replaceAllUsesWith(GEP); } else { - GlobalAlias *Alias = - GlobalAlias::create(PointerType::getUnqual(Int8Ty), - GlobalValue::PrivateLinkage, "bits", GEP, M); + GlobalAlias *Alias = GlobalAlias::create( + Int8Ty, 0, GlobalValue::PrivateLinkage, "bits", GEP, M); BAI->ByteArray->replaceAllUsesWith(Alias); } BAI->ByteArray->eraseFromParent(); @@ -404,7 +441,7 @@ Value *LowerBitSets::createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI, // Each use of the byte array uses a different alias. This makes the // backend less likely to reuse previously computed byte array addresses, // improving the security of the CFI mechanism based on this pass. - ByteArray = GlobalAlias::create(BAI->ByteArray->getType(), + ByteArray = GlobalAlias::create(BAI->ByteArray->getValueType(), 0, GlobalValue::PrivateLinkage, "bits_use", ByteArray, M); } @@ -421,17 +458,16 @@ Value *LowerBitSets::createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI, /// replace the call with. Value *LowerBitSets::lowerBitSetCall( CallInst *CI, BitSetInfo &BSI, ByteArrayInfo *&BAI, - GlobalVariable *CombinedGlobal, - const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout) { + Constant *CombinedGlobalIntAddr, + const DenseMap<GlobalObject *, uint64_t> &GlobalLayout) { Value *Ptr = CI->getArgOperand(0); const DataLayout &DL = M->getDataLayout(); if (BSI.containsValue(DL, GlobalLayout, Ptr)) - return ConstantInt::getTrue(CombinedGlobal->getParent()->getContext()); + return ConstantInt::getTrue(M->getContext()); - Constant *GlobalAsInt = ConstantExpr::getPtrToInt(CombinedGlobal, IntPtrTy); Constant *OffsetedGlobalAsInt = ConstantExpr::getAdd( - GlobalAsInt, ConstantInt::get(IntPtrTy, BSI.ByteOffset)); + CombinedGlobalIntAddr, ConstantInt::get(IntPtrTy, BSI.ByteOffset)); BasicBlock *InitialBB = CI->getParent(); @@ -490,18 +526,19 @@ Value *LowerBitSets::lowerBitSetCall( /// Given a disjoint set of bitsets and globals, layout the globals, build the /// bit sets and lower the llvm.bitset.test calls. -void LowerBitSets::buildBitSetsFromGlobals( - const std::vector<MDString *> &BitSets, - const std::vector<GlobalVariable *> &Globals) { +void LowerBitSets::buildBitSetsFromGlobalVariables( + ArrayRef<Metadata *> BitSets, ArrayRef<GlobalVariable *> Globals) { // Build a new global with the combined contents of the referenced globals. + // This global is a struct whose even-indexed elements contain the original + // contents of the referenced globals and whose odd-indexed elements contain + // any padding required to align the next element to the next power of 2. std::vector<Constant *> GlobalInits; const DataLayout &DL = M->getDataLayout(); for (GlobalVariable *G : Globals) { GlobalInits.push_back(G->getInitializer()); - uint64_t InitSize = DL.getTypeAllocSize(G->getInitializer()->getType()); + uint64_t InitSize = DL.getTypeAllocSize(G->getValueType()); - // Compute the amount of padding required to align the next element to the - // next power of 2. + // Compute the amount of padding required. uint64_t Padding = NextPowerOf2(InitSize - 1) - InitSize; // Cap at 128 was found experimentally to have a good data/instruction @@ -515,34 +552,20 @@ void LowerBitSets::buildBitSetsFromGlobals( if (!GlobalInits.empty()) GlobalInits.pop_back(); Constant *NewInit = ConstantStruct::getAnon(M->getContext(), GlobalInits); - auto CombinedGlobal = + auto *CombinedGlobal = new GlobalVariable(*M, NewInit->getType(), /*isConstant=*/true, GlobalValue::PrivateLinkage, NewInit); - const StructLayout *CombinedGlobalLayout = - DL.getStructLayout(cast<StructType>(NewInit->getType())); + StructType *NewTy = cast<StructType>(NewInit->getType()); + const StructLayout *CombinedGlobalLayout = DL.getStructLayout(NewTy); // Compute the offsets of the original globals within the new global. - DenseMap<GlobalVariable *, uint64_t> GlobalLayout; + DenseMap<GlobalObject *, uint64_t> GlobalLayout; for (unsigned I = 0; I != Globals.size(); ++I) // Multiply by 2 to account for padding elements. GlobalLayout[Globals[I]] = CombinedGlobalLayout->getElementOffset(I * 2); - // For each bitset in this disjoint set... - for (MDString *BS : BitSets) { - // Build the bitset. - BitSetInfo BSI = buildBitSet(BS, GlobalLayout); - - ByteArrayInfo *BAI = 0; - - // Lower each call to llvm.bitset.test for this bitset. - for (CallInst *CI : BitSetTestCallSites[BS]) { - ++NumBitSetCallsLowered; - Value *Lowered = lowerBitSetCall(CI, BSI, BAI, CombinedGlobal, GlobalLayout); - CI->replaceAllUsesWith(Lowered); - CI->eraseFromParent(); - } - } + lowerBitSetCalls(BitSets, CombinedGlobal, GlobalLayout); // Build aliases pointing to offsets into the combined global for each // global from which we built the combined global, and replace references @@ -556,9 +579,11 @@ void LowerBitSets::buildBitSetsFromGlobals( if (LinkerSubsectionsViaSymbols) { Globals[I]->replaceAllUsesWith(CombinedGlobalElemPtr); } else { - GlobalAlias *GAlias = - GlobalAlias::create(Globals[I]->getType(), Globals[I]->getLinkage(), - "", CombinedGlobalElemPtr, M); + assert(Globals[I]->getType()->getAddressSpace() == 0); + GlobalAlias *GAlias = GlobalAlias::create(NewTy->getElementType(I * 2), 0, + Globals[I]->getLinkage(), "", + CombinedGlobalElemPtr, M); + GAlias->setVisibility(Globals[I]->getVisibility()); GAlias->takeName(Globals[I]); Globals[I]->replaceAllUsesWith(GAlias); } @@ -566,6 +591,331 @@ void LowerBitSets::buildBitSetsFromGlobals( } } +void LowerBitSets::lowerBitSetCalls( + ArrayRef<Metadata *> BitSets, Constant *CombinedGlobalAddr, + const DenseMap<GlobalObject *, uint64_t> &GlobalLayout) { + Constant *CombinedGlobalIntAddr = + ConstantExpr::getPtrToInt(CombinedGlobalAddr, IntPtrTy); + + // For each bitset in this disjoint set... + for (Metadata *BS : BitSets) { + // Build the bitset. + BitSetInfo BSI = buildBitSet(BS, GlobalLayout); + DEBUG({ + if (auto BSS = dyn_cast<MDString>(BS)) + dbgs() << BSS->getString() << ": "; + else + dbgs() << "<unnamed>: "; + BSI.print(dbgs()); + }); + + ByteArrayInfo *BAI = nullptr; + + // Lower each call to llvm.bitset.test for this bitset. + for (CallInst *CI : BitSetTestCallSites[BS]) { + ++NumBitSetCallsLowered; + Value *Lowered = + lowerBitSetCall(CI, BSI, BAI, CombinedGlobalIntAddr, GlobalLayout); + CI->replaceAllUsesWith(Lowered); + CI->eraseFromParent(); + } + } +} + +void LowerBitSets::verifyBitSetMDNode(MDNode *Op) { + if (Op->getNumOperands() != 3) + report_fatal_error( + "All operands of llvm.bitsets metadata must have 3 elements"); + if (!Op->getOperand(1)) + return; + + auto OpConstMD = dyn_cast<ConstantAsMetadata>(Op->getOperand(1)); + if (!OpConstMD) + report_fatal_error("Bit set element must be a constant"); + auto OpGlobal = dyn_cast<GlobalObject>(OpConstMD->getValue()); + if (!OpGlobal) + return; + + if (OpGlobal->isThreadLocal()) + report_fatal_error("Bit set element may not be thread-local"); + if (OpGlobal->hasSection()) + report_fatal_error("Bit set element may not have an explicit section"); + + if (isa<GlobalVariable>(OpGlobal) && OpGlobal->isDeclarationForLinker()) + report_fatal_error("Bit set global var element must be a definition"); + + auto OffsetConstMD = dyn_cast<ConstantAsMetadata>(Op->getOperand(2)); + if (!OffsetConstMD) + report_fatal_error("Bit set element offset must be a constant"); + auto OffsetInt = dyn_cast<ConstantInt>(OffsetConstMD->getValue()); + if (!OffsetInt) + report_fatal_error("Bit set element offset must be an integer constant"); +} + +static const unsigned kX86JumpTableEntrySize = 8; + +unsigned LowerBitSets::getJumpTableEntrySize() { + if (Arch != Triple::x86 && Arch != Triple::x86_64) + report_fatal_error("Unsupported architecture for jump tables"); + + return kX86JumpTableEntrySize; +} + +// Create a constant representing a jump table entry for the target. This +// consists of an instruction sequence containing a relative branch to Dest. The +// constant will be laid out at address Src+(Len*Distance) where Len is the +// target-specific jump table entry size. +Constant *LowerBitSets::createJumpTableEntry(GlobalObject *Src, Function *Dest, + unsigned Distance) { + if (Arch != Triple::x86 && Arch != Triple::x86_64) + report_fatal_error("Unsupported architecture for jump tables"); + + const unsigned kJmpPCRel32Code = 0xe9; + const unsigned kInt3Code = 0xcc; + + ConstantInt *Jmp = ConstantInt::get(Int8Ty, kJmpPCRel32Code); + + // Build a constant representing the displacement between the constant's + // address and Dest. This will resolve to a PC32 relocation referring to Dest. + Constant *DestInt = ConstantExpr::getPtrToInt(Dest, IntPtrTy); + Constant *SrcInt = ConstantExpr::getPtrToInt(Src, IntPtrTy); + Constant *Disp = ConstantExpr::getSub(DestInt, SrcInt); + ConstantInt *DispOffset = + ConstantInt::get(IntPtrTy, Distance * kX86JumpTableEntrySize + 5); + Constant *OffsetedDisp = ConstantExpr::getSub(Disp, DispOffset); + OffsetedDisp = ConstantExpr::getTruncOrBitCast(OffsetedDisp, Int32Ty); + + ConstantInt *Int3 = ConstantInt::get(Int8Ty, kInt3Code); + + Constant *Fields[] = { + Jmp, OffsetedDisp, Int3, Int3, Int3, + }; + return ConstantStruct::getAnon(Fields, /*Packed=*/true); +} + +Type *LowerBitSets::getJumpTableEntryType() { + if (Arch != Triple::x86 && Arch != Triple::x86_64) + report_fatal_error("Unsupported architecture for jump tables"); + + return StructType::get(M->getContext(), + {Int8Ty, Int32Ty, Int8Ty, Int8Ty, Int8Ty}, + /*Packed=*/true); +} + +/// Given a disjoint set of bitsets and functions, build a jump table for the +/// functions, build the bit sets and lower the llvm.bitset.test calls. +void LowerBitSets::buildBitSetsFromFunctions(ArrayRef<Metadata *> BitSets, + ArrayRef<Function *> Functions) { + // Unlike the global bitset builder, the function bitset builder cannot + // re-arrange functions in a particular order and base its calculations on the + // layout of the functions' entry points, as we have no idea how large a + // particular function will end up being (the size could even depend on what + // this pass does!) Instead, we build a jump table, which is a block of code + // consisting of one branch instruction for each of the functions in the bit + // set that branches to the target function, and redirect any taken function + // addresses to the corresponding jump table entry. In the object file's + // symbol table, the symbols for the target functions also refer to the jump + // table entries, so that addresses taken outside the module will pass any + // verification done inside the module. + // + // In more concrete terms, suppose we have three functions f, g, h which are + // members of a single bitset, and a function foo that returns their + // addresses: + // + // f: + // mov 0, %eax + // ret + // + // g: + // mov 1, %eax + // ret + // + // h: + // mov 2, %eax + // ret + // + // foo: + // mov f, %eax + // mov g, %edx + // mov h, %ecx + // ret + // + // To create a jump table for these functions, we instruct the LLVM code + // generator to output a jump table in the .text section. This is done by + // representing the instructions in the jump table as an LLVM constant and + // placing them in a global variable in the .text section. The end result will + // (conceptually) look like this: + // + // f: + // jmp .Ltmp0 ; 5 bytes + // int3 ; 1 byte + // int3 ; 1 byte + // int3 ; 1 byte + // + // g: + // jmp .Ltmp1 ; 5 bytes + // int3 ; 1 byte + // int3 ; 1 byte + // int3 ; 1 byte + // + // h: + // jmp .Ltmp2 ; 5 bytes + // int3 ; 1 byte + // int3 ; 1 byte + // int3 ; 1 byte + // + // .Ltmp0: + // mov 0, %eax + // ret + // + // .Ltmp1: + // mov 1, %eax + // ret + // + // .Ltmp2: + // mov 2, %eax + // ret + // + // foo: + // mov f, %eax + // mov g, %edx + // mov h, %ecx + // ret + // + // Because the addresses of f, g, h are evenly spaced at a power of 2, in the + // normal case the check can be carried out using the same kind of simple + // arithmetic that we normally use for globals. + + assert(!Functions.empty()); + + // Build a simple layout based on the regular layout of jump tables. + DenseMap<GlobalObject *, uint64_t> GlobalLayout; + unsigned EntrySize = getJumpTableEntrySize(); + for (unsigned I = 0; I != Functions.size(); ++I) + GlobalLayout[Functions[I]] = I * EntrySize; + + // Create a constant to hold the jump table. + ArrayType *JumpTableType = + ArrayType::get(getJumpTableEntryType(), Functions.size()); + auto JumpTable = new GlobalVariable(*M, JumpTableType, + /*isConstant=*/true, + GlobalValue::PrivateLinkage, nullptr); + JumpTable->setSection(ObjectFormat == Triple::MachO + ? "__TEXT,__text,regular,pure_instructions" + : ".text"); + lowerBitSetCalls(BitSets, JumpTable, GlobalLayout); + + // Build aliases pointing to offsets into the jump table, and replace + // references to the original functions with references to the aliases. + for (unsigned I = 0; I != Functions.size(); ++I) { + Constant *CombinedGlobalElemPtr = ConstantExpr::getBitCast( + ConstantExpr::getGetElementPtr( + JumpTableType, JumpTable, + ArrayRef<Constant *>{ConstantInt::get(IntPtrTy, 0), + ConstantInt::get(IntPtrTy, I)}), + Functions[I]->getType()); + if (LinkerSubsectionsViaSymbols || Functions[I]->isDeclarationForLinker()) { + Functions[I]->replaceAllUsesWith(CombinedGlobalElemPtr); + } else { + assert(Functions[I]->getType()->getAddressSpace() == 0); + GlobalAlias *GAlias = GlobalAlias::create(Functions[I]->getValueType(), 0, + Functions[I]->getLinkage(), "", + CombinedGlobalElemPtr, M); + GAlias->setVisibility(Functions[I]->getVisibility()); + GAlias->takeName(Functions[I]); + Functions[I]->replaceAllUsesWith(GAlias); + } + if (!Functions[I]->isDeclarationForLinker()) + Functions[I]->setLinkage(GlobalValue::PrivateLinkage); + } + + // Build and set the jump table's initializer. + std::vector<Constant *> JumpTableEntries; + for (unsigned I = 0; I != Functions.size(); ++I) + JumpTableEntries.push_back( + createJumpTableEntry(JumpTable, Functions[I], I)); + JumpTable->setInitializer( + ConstantArray::get(JumpTableType, JumpTableEntries)); +} + +void LowerBitSets::buildBitSetsFromDisjointSet( + ArrayRef<Metadata *> BitSets, ArrayRef<GlobalObject *> Globals) { + llvm::DenseMap<Metadata *, uint64_t> BitSetIndices; + llvm::DenseMap<GlobalObject *, uint64_t> GlobalIndices; + for (unsigned I = 0; I != BitSets.size(); ++I) + BitSetIndices[BitSets[I]] = I; + for (unsigned I = 0; I != Globals.size(); ++I) + GlobalIndices[Globals[I]] = I; + + // For each bitset, build a set of indices that refer to globals referenced by + // the bitset. + std::vector<std::set<uint64_t>> BitSetMembers(BitSets.size()); + if (BitSetNM) { + for (MDNode *Op : BitSetNM->operands()) { + // Op = { bitset name, global, offset } + if (!Op->getOperand(1)) + continue; + auto I = BitSetIndices.find(Op->getOperand(0)); + if (I == BitSetIndices.end()) + continue; + + auto OpGlobal = dyn_cast<GlobalObject>( + cast<ConstantAsMetadata>(Op->getOperand(1))->getValue()); + if (!OpGlobal) + continue; + BitSetMembers[I->second].insert(GlobalIndices[OpGlobal]); + } + } + + // Order the sets of indices by size. The GlobalLayoutBuilder works best + // when given small index sets first. + std::stable_sort( + BitSetMembers.begin(), BitSetMembers.end(), + [](const std::set<uint64_t> &O1, const std::set<uint64_t> &O2) { + return O1.size() < O2.size(); + }); + + // Create a GlobalLayoutBuilder and provide it with index sets as layout + // fragments. The GlobalLayoutBuilder tries to lay out members of fragments as + // close together as possible. + GlobalLayoutBuilder GLB(Globals.size()); + for (auto &&MemSet : BitSetMembers) + GLB.addFragment(MemSet); + + // Build the bitsets from this disjoint set. + if (Globals.empty() || isa<GlobalVariable>(Globals[0])) { + // Build a vector of global variables with the computed layout. + std::vector<GlobalVariable *> OrderedGVs(Globals.size()); + auto OGI = OrderedGVs.begin(); + for (auto &&F : GLB.Fragments) { + for (auto &&Offset : F) { + auto GV = dyn_cast<GlobalVariable>(Globals[Offset]); + if (!GV) + report_fatal_error( + "Bit set may not contain both global variables and functions"); + *OGI++ = GV; + } + } + + buildBitSetsFromGlobalVariables(BitSets, OrderedGVs); + } else { + // Build a vector of functions with the computed layout. + std::vector<Function *> OrderedFns(Globals.size()); + auto OFI = OrderedFns.begin(); + for (auto &&F : GLB.Fragments) { + for (auto &&Offset : F) { + auto Fn = dyn_cast<Function>(Globals[Offset]); + if (!Fn) + report_fatal_error( + "Bit set may not contain both global variables and functions"); + *OFI++ = Fn; + } + } + + buildBitSetsFromFunctions(BitSets, OrderedFns); + } +} + /// Lower all bit sets in this module. bool LowerBitSets::buildBitSets() { Function *BitSetTestFunc = @@ -576,24 +926,36 @@ bool LowerBitSets::buildBitSets() { // Equivalence class set containing bitsets and the globals they reference. // This is used to partition the set of bitsets in the module into disjoint // sets. - typedef EquivalenceClasses<PointerUnion<GlobalVariable *, MDString *>> + typedef EquivalenceClasses<PointerUnion<GlobalObject *, Metadata *>> GlobalClassesTy; GlobalClassesTy GlobalClasses; + // Verify the bitset metadata and build a mapping from bitset identifiers to + // their last observed index in BitSetNM. This will used later to + // deterministically order the list of bitset identifiers. + llvm::DenseMap<Metadata *, unsigned> BitSetIdIndices; + if (BitSetNM) { + for (unsigned I = 0, E = BitSetNM->getNumOperands(); I != E; ++I) { + MDNode *Op = BitSetNM->getOperand(I); + verifyBitSetMDNode(Op); + BitSetIdIndices[Op->getOperand(0)] = I; + } + } + for (const Use &U : BitSetTestFunc->uses()) { auto CI = cast<CallInst>(U.getUser()); auto BitSetMDVal = dyn_cast<MetadataAsValue>(CI->getArgOperand(1)); - if (!BitSetMDVal || !isa<MDString>(BitSetMDVal->getMetadata())) + if (!BitSetMDVal) report_fatal_error( - "Second argument of llvm.bitset.test must be metadata string"); - auto BitSet = cast<MDString>(BitSetMDVal->getMetadata()); + "Second argument of llvm.bitset.test must be metadata"); + auto BitSet = BitSetMDVal->getMetadata(); // Add the call site to the list of call sites for this bit set. We also use // BitSetTestCallSites to keep track of whether we have seen this bit set // before. If we have, we don't need to re-add the referenced globals to the // equivalence class. - std::pair<DenseMap<MDString *, std::vector<CallInst *>>::iterator, + std::pair<DenseMap<Metadata *, std::vector<CallInst *>>::iterator, bool> Ins = BitSetTestCallSites.insert( std::make_pair(BitSet, std::vector<CallInst *>())); @@ -608,31 +970,16 @@ bool LowerBitSets::buildBitSets() { if (!BitSetNM) continue; - // Verify the bitset metadata and add the referenced globals to the bitset's - // equivalence class. + // Add the referenced globals to the bitset's equivalence class. for (MDNode *Op : BitSetNM->operands()) { - if (Op->getNumOperands() != 3) - report_fatal_error( - "All operands of llvm.bitsets metadata must have 3 elements"); - if (Op->getOperand(0) != BitSet || !Op->getOperand(1)) continue; - auto OpConstMD = dyn_cast<ConstantAsMetadata>(Op->getOperand(1)); - if (!OpConstMD) - report_fatal_error("Bit set element must be a constant"); - auto OpGlobal = dyn_cast<GlobalVariable>(OpConstMD->getValue()); + auto OpGlobal = dyn_cast<GlobalObject>( + cast<ConstantAsMetadata>(Op->getOperand(1))->getValue()); if (!OpGlobal) continue; - auto OffsetConstMD = dyn_cast<ConstantAsMetadata>(Op->getOperand(2)); - if (!OffsetConstMD) - report_fatal_error("Bit set element offset must be a constant"); - auto OffsetInt = dyn_cast<ConstantInt>(OffsetConstMD->getValue()); - if (!OffsetInt) - report_fatal_error( - "Bit set element offset must be an integer constant"); - CurSet = GlobalClasses.unionSets( CurSet, GlobalClasses.findLeader(GlobalClasses.insert(OpGlobal))); } @@ -641,79 +988,51 @@ bool LowerBitSets::buildBitSets() { if (GlobalClasses.empty()) return false; - // For each disjoint set we found... + // Build a list of disjoint sets ordered by their maximum BitSetNM index + // for determinism. + std::vector<std::pair<GlobalClassesTy::iterator, unsigned>> Sets; for (GlobalClassesTy::iterator I = GlobalClasses.begin(), E = GlobalClasses.end(); I != E; ++I) { if (!I->isLeader()) continue; - ++NumBitSetDisjointSets; - // Build the list of bitsets and referenced globals in this disjoint set. - std::vector<MDString *> BitSets; - std::vector<GlobalVariable *> Globals; - llvm::DenseMap<MDString *, uint64_t> BitSetIndices; - llvm::DenseMap<GlobalVariable *, uint64_t> GlobalIndices; + unsigned MaxIndex = 0; for (GlobalClassesTy::member_iterator MI = GlobalClasses.member_begin(I); MI != GlobalClasses.member_end(); ++MI) { - if ((*MI).is<MDString *>()) { - BitSetIndices[MI->get<MDString *>()] = BitSets.size(); - BitSets.push_back(MI->get<MDString *>()); - } else { - GlobalIndices[MI->get<GlobalVariable *>()] = Globals.size(); - Globals.push_back(MI->get<GlobalVariable *>()); - } + if ((*MI).is<Metadata *>()) + MaxIndex = std::max(MaxIndex, BitSetIdIndices[MI->get<Metadata *>()]); } + Sets.emplace_back(I, MaxIndex); + } + std::sort(Sets.begin(), Sets.end(), + [](const std::pair<GlobalClassesTy::iterator, unsigned> &S1, + const std::pair<GlobalClassesTy::iterator, unsigned> &S2) { + return S1.second < S2.second; + }); - // For each bitset, build a set of indices that refer to globals referenced - // by the bitset. - std::vector<std::set<uint64_t>> BitSetMembers(BitSets.size()); - if (BitSetNM) { - for (MDNode *Op : BitSetNM->operands()) { - // Op = { bitset name, global, offset } - if (!Op->getOperand(1)) - continue; - auto I = BitSetIndices.find(cast<MDString>(Op->getOperand(0))); - if (I == BitSetIndices.end()) - continue; - - auto OpGlobal = dyn_cast<GlobalVariable>( - cast<ConstantAsMetadata>(Op->getOperand(1))->getValue()); - if (!OpGlobal) - continue; - BitSetMembers[I->second].insert(GlobalIndices[OpGlobal]); - } + // For each disjoint set we found... + for (const auto &S : Sets) { + // Build the list of bitsets in this disjoint set. + std::vector<Metadata *> BitSets; + std::vector<GlobalObject *> Globals; + for (GlobalClassesTy::member_iterator MI = + GlobalClasses.member_begin(S.first); + MI != GlobalClasses.member_end(); ++MI) { + if ((*MI).is<Metadata *>()) + BitSets.push_back(MI->get<Metadata *>()); + else + Globals.push_back(MI->get<GlobalObject *>()); } - // Order the sets of indices by size. The GlobalLayoutBuilder works best - // when given small index sets first. - std::stable_sort( - BitSetMembers.begin(), BitSetMembers.end(), - [](const std::set<uint64_t> &O1, const std::set<uint64_t> &O2) { - return O1.size() < O2.size(); - }); - - // Create a GlobalLayoutBuilder and provide it with index sets as layout - // fragments. The GlobalLayoutBuilder tries to lay out members of fragments - // as close together as possible. - GlobalLayoutBuilder GLB(Globals.size()); - for (auto &&MemSet : BitSetMembers) - GLB.addFragment(MemSet); - - // Build a vector of globals with the computed layout. - std::vector<GlobalVariable *> OrderedGlobals(Globals.size()); - auto OGI = OrderedGlobals.begin(); - for (auto &&F : GLB.Fragments) - for (auto &&Offset : F) - *OGI++ = Globals[Offset]; - - // Order bitsets by name for determinism. - std::sort(BitSets.begin(), BitSets.end(), [](MDString *S1, MDString *S2) { - return S1->getString() < S2->getString(); + // Order bitsets by BitSetNM index for determinism. This ordering is stable + // as there is a one-to-one mapping between metadata and indices. + std::sort(BitSets.begin(), BitSets.end(), [&](Metadata *M1, Metadata *M2) { + return BitSetIdIndices[M1] < BitSetIdIndices[M2]; }); - // Build the bitsets from this disjoint set. - buildBitSetsFromGlobals(BitSets, OrderedGlobals); + // Lower the bitsets in this disjoint set. + buildBitSetsFromDisjointSet(BitSets, Globals); } allocateByteArrays(); diff --git a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp index 2e3519e..8a209a1 100644 --- a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp +++ b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp @@ -27,6 +27,14 @@ // -- We define Function* container class with custom "operator<" (FunctionPtr). // -- "FunctionPtr" instances are stored in std::set collection, so every // std::set::insert operation will give you result in log(N) time. +// +// As an optimization, a hash of the function structure is calculated first, and +// two functions are only compared if they have the same hash. This hash is +// cheap to compute, and has the property that if function F == G according to +// the comparison function, then hash(F) == hash(G). This consistency property +// is critical to ensuring all possible merging opportunities are exploited. +// Collisions in the hash affect the speed of the pass but not the correctness +// or determinism of the resulting transformation. // // When a match is found the functions are folded. If both functions are // overridable, we move the functionality into a new internal function and @@ -87,6 +95,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/Hashing.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -97,12 +106,14 @@ #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/IR/ValueHandle.h" +#include "llvm/IR/ValueMap.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include <vector> + using namespace llvm; #define DEBUG_TYPE "mergefunc" @@ -121,21 +132,64 @@ static cl::opt<unsigned> NumFunctionsForSanityCheck( namespace { +/// GlobalNumberState assigns an integer to each global value in the program, +/// which is used by the comparison routine to order references to globals. This +/// state must be preserved throughout the pass, because Functions and other +/// globals need to maintain their relative order. Globals are assigned a number +/// when they are first visited. This order is deterministic, and so the +/// assigned numbers are as well. When two functions are merged, neither number +/// is updated. If the symbols are weak, this would be incorrect. If they are +/// strong, then one will be replaced at all references to the other, and so +/// direct callsites will now see one or the other symbol, and no update is +/// necessary. Note that if we were guaranteed unique names, we could just +/// compare those, but this would not work for stripped bitcodes or for those +/// few symbols without a name. +class GlobalNumberState { + struct Config : ValueMapConfig<GlobalValue*> { + enum { FollowRAUW = false }; + }; + // Each GlobalValue is mapped to an identifier. The Config ensures when RAUW + // occurs, the mapping does not change. Tracking changes is unnecessary, and + // also problematic for weak symbols (which may be overwritten). + typedef ValueMap<GlobalValue *, uint64_t, Config> ValueNumberMap; + ValueNumberMap GlobalNumbers; + // The next unused serial number to assign to a global. + uint64_t NextNumber; + public: + GlobalNumberState() : GlobalNumbers(), NextNumber(0) {} + uint64_t getNumber(GlobalValue* Global) { + ValueNumberMap::iterator MapIter; + bool Inserted; + std::tie(MapIter, Inserted) = GlobalNumbers.insert({Global, NextNumber}); + if (Inserted) + NextNumber++; + return MapIter->second; + } + void clear() { + GlobalNumbers.clear(); + } +}; + /// FunctionComparator - Compares two functions to determine whether or not /// they will generate machine code with the same behaviour. DataLayout is /// used if available. The comparator always fails conservatively (erring on the /// side of claiming that two functions are different). class FunctionComparator { public: - FunctionComparator(const Function *F1, const Function *F2) - : FnL(F1), FnR(F2) {} + FunctionComparator(const Function *F1, const Function *F2, + GlobalNumberState* GN) + : FnL(F1), FnR(F2), GlobalNumbers(GN) {} /// Test whether the two functions have equivalent behaviour. int compare(); + /// Hash a function. Equivalent functions will have the same hash, and unequal + /// functions will have different hashes with high probability. + typedef uint64_t FunctionHash; + static FunctionHash functionHash(Function &); private: /// Test whether two basic blocks have equivalent behaviour. - int compare(const BasicBlock *BBL, const BasicBlock *BBR); + int cmpBasicBlocks(const BasicBlock *BBL, const BasicBlock *BBR); /// Constants comparison. /// Its analog to lexicographical comparison between hypothetical numbers @@ -241,6 +295,10 @@ private: /// If these properties are equal - compare their contents. int cmpConstants(const Constant *L, const Constant *R); + /// Compares two global values by number. Uses the GlobalNumbersState to + /// identify the same gobals across function calls. + int cmpGlobalValues(GlobalValue *L, GlobalValue *R); + /// Assign or look up previously assigned numbers for the two values, and /// return whether the numbers are equal. Numbers are assigned in the order /// visited. @@ -320,8 +378,9 @@ private: /// /// 1. If types are of different kind (different type IDs). /// Return result of type IDs comparison, treating them as numbers. - /// 2. If types are vectors or integers, compare Type* values as numbers. - /// 3. Types has same ID, so check whether they belongs to the next group: + /// 2. If types are integers, check that they have the same width. If they + /// are vectors, check that they have the same count and subtype. + /// 3. Types have the same ID, so check whether they are one of: /// * Void /// * Float /// * Double @@ -330,8 +389,7 @@ private: /// * PPC_FP128 /// * Label /// * Metadata - /// If so - return 0, yes - we can treat these types as equal only because - /// their IDs are same. + /// We can treat these types as equal whenever their IDs are same. /// 4. If Left and Right are pointers, return result of address space /// comparison (numbers comparison). We can treat pointer types of same /// address space as equal. @@ -343,11 +401,13 @@ private: int cmpTypes(Type *TyL, Type *TyR) const; int cmpNumbers(uint64_t L, uint64_t R) const; - int cmpAPInts(const APInt &L, const APInt &R) const; int cmpAPFloats(const APFloat &L, const APFloat &R) const; - int cmpStrings(StringRef L, StringRef R) const; + int cmpInlineAsm(const InlineAsm *L, const InlineAsm *R) const; + int cmpMem(StringRef L, StringRef R) const; int cmpAttrs(const AttributeSet L, const AttributeSet R) const; + int cmpRangeMetadata(const MDNode* L, const MDNode* R) const; + int cmpOperandBundlesSchema(const Instruction *L, const Instruction *R) const; // The two functions undergoing comparison. const Function *FnL, *FnR; @@ -386,30 +446,30 @@ private: /// could be operands from further BBs we didn't scan yet. /// So it's impossible to use dominance properties in general. DenseMap<const Value*, int> sn_mapL, sn_mapR; + + // The global state we will use + GlobalNumberState* GlobalNumbers; }; class FunctionNode { mutable AssertingVH<Function> F; - + FunctionComparator::FunctionHash Hash; public: - FunctionNode(Function *F) : F(F) {} + // Note the hash is recalculated potentially multiple times, but it is cheap. + FunctionNode(Function *F) + : F(F), Hash(FunctionComparator::functionHash(*F)) {} Function *getFunc() const { return F; } + FunctionComparator::FunctionHash getHash() const { return Hash; } /// Replace the reference to the function F by the function G, assuming their /// implementations are equal. void replaceBy(Function *G) const { - assert(!(*this < FunctionNode(G)) && !(FunctionNode(G) < *this) && - "The two functions must be equal"); - F = G; } - void release() { F = 0; } - bool operator<(const FunctionNode &RHS) const { - return (FunctionComparator(F, RHS.getFunc()).compare()) == -1; - } + void release() { F = nullptr; } }; -} +} // end anonymous namespace int FunctionComparator::cmpNumbers(uint64_t L, uint64_t R) const { if (L < R) return -1; @@ -426,13 +486,25 @@ int FunctionComparator::cmpAPInts(const APInt &L, const APInt &R) const { } int FunctionComparator::cmpAPFloats(const APFloat &L, const APFloat &R) const { - if (int Res = cmpNumbers((uint64_t)&L.getSemantics(), - (uint64_t)&R.getSemantics())) + // Floats are ordered first by semantics (i.e. float, double, half, etc.), + // then by value interpreted as a bitstring (aka APInt). + const fltSemantics &SL = L.getSemantics(), &SR = R.getSemantics(); + if (int Res = cmpNumbers(APFloat::semanticsPrecision(SL), + APFloat::semanticsPrecision(SR))) + return Res; + if (int Res = cmpNumbers(APFloat::semanticsMaxExponent(SL), + APFloat::semanticsMaxExponent(SR))) + return Res; + if (int Res = cmpNumbers(APFloat::semanticsMinExponent(SL), + APFloat::semanticsMinExponent(SR))) + return Res; + if (int Res = cmpNumbers(APFloat::semanticsSizeInBits(SL), + APFloat::semanticsSizeInBits(SR))) return Res; return cmpAPInts(L.bitcastToAPInt(), R.bitcastToAPInt()); } -int FunctionComparator::cmpStrings(StringRef L, StringRef R) const { +int FunctionComparator::cmpMem(StringRef L, StringRef R) const { // Prevent heavy comparison, compare sizes first. if (int Res = cmpNumbers(L.size(), R.size())) return Res; @@ -466,6 +538,59 @@ int FunctionComparator::cmpAttrs(const AttributeSet L, return 0; } +int FunctionComparator::cmpRangeMetadata(const MDNode* L, + const MDNode* R) const { + if (L == R) + return 0; + if (!L) + return -1; + if (!R) + return 1; + // Range metadata is a sequence of numbers. Make sure they are the same + // sequence. + // TODO: Note that as this is metadata, it is possible to drop and/or merge + // this data when considering functions to merge. Thus this comparison would + // return 0 (i.e. equivalent), but merging would become more complicated + // because the ranges would need to be unioned. It is not likely that + // functions differ ONLY in this metadata if they are actually the same + // function semantically. + if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands())) + return Res; + for (size_t I = 0; I < L->getNumOperands(); ++I) { + ConstantInt* LLow = mdconst::extract<ConstantInt>(L->getOperand(I)); + ConstantInt* RLow = mdconst::extract<ConstantInt>(R->getOperand(I)); + if (int Res = cmpAPInts(LLow->getValue(), RLow->getValue())) + return Res; + } + return 0; +} + +int FunctionComparator::cmpOperandBundlesSchema(const Instruction *L, + const Instruction *R) const { + ImmutableCallSite LCS(L); + ImmutableCallSite RCS(R); + + assert(LCS && RCS && "Must be calls or invokes!"); + assert(LCS.isCall() == RCS.isCall() && "Can't compare otherwise!"); + + if (int Res = + cmpNumbers(LCS.getNumOperandBundles(), RCS.getNumOperandBundles())) + return Res; + + for (unsigned i = 0, e = LCS.getNumOperandBundles(); i != e; ++i) { + auto OBL = LCS.getOperandBundleAt(i); + auto OBR = RCS.getOperandBundleAt(i); + + if (int Res = OBL.getTagName().compare(OBR.getTagName())) + return Res; + + if (int Res = cmpNumbers(OBL.Inputs.size(), OBR.Inputs.size())) + return Res; + } + + return 0; +} + /// Constants comparison: /// 1. Check whether type of L constant could be losslessly bitcasted to R /// type. @@ -500,9 +625,9 @@ int FunctionComparator::cmpConstants(const Constant *L, const Constant *R) { unsigned TyLWidth = 0; unsigned TyRWidth = 0; - if (const VectorType *VecTyL = dyn_cast<VectorType>(TyL)) + if (auto *VecTyL = dyn_cast<VectorType>(TyL)) TyLWidth = VecTyL->getBitWidth(); - if (const VectorType *VecTyR = dyn_cast<VectorType>(TyR)) + if (auto *VecTyR = dyn_cast<VectorType>(TyR)) TyRWidth = VecTyR->getBitWidth(); if (TyLWidth != TyRWidth) @@ -538,11 +663,29 @@ int FunctionComparator::cmpConstants(const Constant *L, const Constant *R) { if (!L->isNullValue() && R->isNullValue()) return -1; + auto GlobalValueL = const_cast<GlobalValue*>(dyn_cast<GlobalValue>(L)); + auto GlobalValueR = const_cast<GlobalValue*>(dyn_cast<GlobalValue>(R)); + if (GlobalValueL && GlobalValueR) { + return cmpGlobalValues(GlobalValueL, GlobalValueR); + } + if (int Res = cmpNumbers(L->getValueID(), R->getValueID())) return Res; + if (const auto *SeqL = dyn_cast<ConstantDataSequential>(L)) { + const auto *SeqR = cast<ConstantDataSequential>(R); + // This handles ConstantDataArray and ConstantDataVector. Note that we + // compare the two raw data arrays, which might differ depending on the host + // endianness. This isn't a problem though, because the endiness of a module + // will affect the order of the constants, but this order is the same + // for a given input module and host platform. + return cmpMem(SeqL->getRawDataValues(), SeqR->getRawDataValues()); + } + switch (L->getValueID()) { - case Value::UndefValueVal: return TypesRes; + case Value::UndefValueVal: + case Value::ConstantTokenNoneVal: + return TypesRes; case Value::ConstantIntVal: { const APInt &LInt = cast<ConstantInt>(L)->getValue(); const APInt &RInt = cast<ConstantInt>(R)->getValue(); @@ -609,19 +752,55 @@ int FunctionComparator::cmpConstants(const Constant *L, const Constant *R) { } return 0; } - case Value::FunctionVal: - case Value::GlobalVariableVal: - case Value::GlobalAliasVal: - default: // Unknown constant, cast L and R pointers to numbers and compare. - return cmpNumbers((uint64_t)L, (uint64_t)R); + case Value::BlockAddressVal: { + const BlockAddress *LBA = cast<BlockAddress>(L); + const BlockAddress *RBA = cast<BlockAddress>(R); + if (int Res = cmpValues(LBA->getFunction(), RBA->getFunction())) + return Res; + if (LBA->getFunction() == RBA->getFunction()) { + // They are BBs in the same function. Order by which comes first in the + // BB order of the function. This order is deterministic. + Function* F = LBA->getFunction(); + BasicBlock *LBB = LBA->getBasicBlock(); + BasicBlock *RBB = RBA->getBasicBlock(); + if (LBB == RBB) + return 0; + for(BasicBlock &BB : F->getBasicBlockList()) { + if (&BB == LBB) { + assert(&BB != RBB); + return -1; + } + if (&BB == RBB) + return 1; + } + llvm_unreachable("Basic Block Address does not point to a basic block in " + "its function."); + return -1; + } else { + // cmpValues said the functions are the same. So because they aren't + // literally the same pointer, they must respectively be the left and + // right functions. + assert(LBA->getFunction() == FnL && RBA->getFunction() == FnR); + // cmpValues will tell us if these are equivalent BasicBlocks, in the + // context of their respective functions. + return cmpValues(LBA->getBasicBlock(), RBA->getBasicBlock()); + } } + default: // Unknown constant, abort. + DEBUG(dbgs() << "Looking at valueID " << L->getValueID() << "\n"); + llvm_unreachable("Constant ValueID not recognized."); + return -1; + } +} + +int FunctionComparator::cmpGlobalValues(GlobalValue *L, GlobalValue* R) { + return cmpNumbers(GlobalNumbers->getNumber(L), GlobalNumbers->getNumber(R)); } /// cmpType - compares two types, /// defines total ordering among the types set. /// See method declaration comments for more details. int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const { - PointerType *PTyL = dyn_cast<PointerType>(TyL); PointerType *PTyR = dyn_cast<PointerType>(TyR); @@ -642,10 +821,15 @@ int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const { llvm_unreachable("Unknown type!"); // Fall through in Release mode. case Type::IntegerTyID: - case Type::VectorTyID: - // TyL == TyR would have returned true earlier. - return cmpNumbers((uint64_t)TyL, (uint64_t)TyR); - + return cmpNumbers(cast<IntegerType>(TyL)->getBitWidth(), + cast<IntegerType>(TyR)->getBitWidth()); + case Type::VectorTyID: { + VectorType *VTyL = cast<VectorType>(TyL), *VTyR = cast<VectorType>(TyR); + if (int Res = cmpNumbers(VTyL->getNumElements(), VTyR->getNumElements())) + return Res; + return cmpTypes(VTyL->getElementType(), VTyR->getElementType()); + } + // TyL == TyR would have returned true earlier, because types are uniqued. case Type::VoidTyID: case Type::FloatTyID: case Type::DoubleTyID: @@ -654,6 +838,7 @@ int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const { case Type::PPC_FP128TyID: case Type::LabelTyID: case Type::MetadataTyID: + case Type::TokenTyID: return 0; case Type::PointerTyID: { @@ -759,8 +944,8 @@ int FunctionComparator::cmpOperations(const Instruction *L, if (int Res = cmpNumbers(LI->getSynchScope(), cast<LoadInst>(R)->getSynchScope())) return Res; - return cmpNumbers((uint64_t)LI->getMetadata(LLVMContext::MD_range), - (uint64_t)cast<LoadInst>(R)->getMetadata(LLVMContext::MD_range)); + return cmpRangeMetadata(LI->getMetadata(LLVMContext::MD_range), + cast<LoadInst>(R)->getMetadata(LLVMContext::MD_range)); } if (const StoreInst *SI = dyn_cast<StoreInst>(L)) { if (int Res = @@ -783,20 +968,24 @@ int FunctionComparator::cmpOperations(const Instruction *L, if (int Res = cmpAttrs(CI->getAttributes(), cast<CallInst>(R)->getAttributes())) return Res; - return cmpNumbers( - (uint64_t)CI->getMetadata(LLVMContext::MD_range), - (uint64_t)cast<CallInst>(R)->getMetadata(LLVMContext::MD_range)); + if (int Res = cmpOperandBundlesSchema(CI, R)) + return Res; + return cmpRangeMetadata( + CI->getMetadata(LLVMContext::MD_range), + cast<CallInst>(R)->getMetadata(LLVMContext::MD_range)); } - if (const InvokeInst *CI = dyn_cast<InvokeInst>(L)) { - if (int Res = cmpNumbers(CI->getCallingConv(), + if (const InvokeInst *II = dyn_cast<InvokeInst>(L)) { + if (int Res = cmpNumbers(II->getCallingConv(), cast<InvokeInst>(R)->getCallingConv())) return Res; if (int Res = - cmpAttrs(CI->getAttributes(), cast<InvokeInst>(R)->getAttributes())) + cmpAttrs(II->getAttributes(), cast<InvokeInst>(R)->getAttributes())) + return Res; + if (int Res = cmpOperandBundlesSchema(II, R)) return Res; - return cmpNumbers( - (uint64_t)CI->getMetadata(LLVMContext::MD_range), - (uint64_t)cast<InvokeInst>(R)->getMetadata(LLVMContext::MD_range)); + return cmpRangeMetadata( + II->getMetadata(LLVMContext::MD_range), + cast<InvokeInst>(R)->getMetadata(LLVMContext::MD_range)); } if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(L)) { ArrayRef<unsigned> LIndices = IVI->getIndices(); @@ -876,9 +1065,8 @@ int FunctionComparator::cmpGEPs(const GEPOperator *GEPL, if (GEPL->accumulateConstantOffset(DL, OffsetL) && GEPR->accumulateConstantOffset(DL, OffsetR)) return cmpAPInts(OffsetL, OffsetR); - - if (int Res = cmpNumbers((uint64_t)GEPL->getPointerOperand()->getType(), - (uint64_t)GEPR->getPointerOperand()->getType())) + if (int Res = cmpTypes(GEPL->getSourceElementType(), + GEPR->getSourceElementType())) return Res; if (int Res = cmpNumbers(GEPL->getNumOperands(), GEPR->getNumOperands())) @@ -892,6 +1080,28 @@ int FunctionComparator::cmpGEPs(const GEPOperator *GEPL, return 0; } +int FunctionComparator::cmpInlineAsm(const InlineAsm *L, + const InlineAsm *R) const { + // InlineAsm's are uniqued. If they are the same pointer, obviously they are + // the same, otherwise compare the fields. + if (L == R) + return 0; + if (int Res = cmpTypes(L->getFunctionType(), R->getFunctionType())) + return Res; + if (int Res = cmpMem(L->getAsmString(), R->getAsmString())) + return Res; + if (int Res = cmpMem(L->getConstraintString(), R->getConstraintString())) + return Res; + if (int Res = cmpNumbers(L->hasSideEffects(), R->hasSideEffects())) + return Res; + if (int Res = cmpNumbers(L->isAlignStack(), R->isAlignStack())) + return Res; + if (int Res = cmpNumbers(L->getDialect(), R->getDialect())) + return Res; + llvm_unreachable("InlineAsm blocks were not uniqued."); + return 0; +} + /// Compare two values used by the two functions under pair-wise comparison. If /// this is the first time the values are seen, they're added to the mapping so /// that we will detect mismatches on next use. @@ -926,7 +1136,7 @@ int FunctionComparator::cmpValues(const Value *L, const Value *R) { const InlineAsm *InlineAsmR = dyn_cast<InlineAsm>(R); if (InlineAsmL && InlineAsmR) - return cmpNumbers((uint64_t)L, (uint64_t)R); + return cmpInlineAsm(InlineAsmL, InlineAsmR); if (InlineAsmL) return 1; if (InlineAsmR) @@ -938,12 +1148,13 @@ int FunctionComparator::cmpValues(const Value *L, const Value *R) { return cmpNumbers(LeftSN.first->second, RightSN.first->second); } // Test whether two basic blocks have equivalent behaviour. -int FunctionComparator::compare(const BasicBlock *BBL, const BasicBlock *BBR) { +int FunctionComparator::cmpBasicBlocks(const BasicBlock *BBL, + const BasicBlock *BBR) { BasicBlock::const_iterator InstL = BBL->begin(), InstLE = BBL->end(); BasicBlock::const_iterator InstR = BBR->begin(), InstRE = BBR->end(); do { - if (int Res = cmpValues(InstL, InstR)) + if (int Res = cmpValues(&*InstL, &*InstR)) return Res; const GetElementPtrInst *GEPL = dyn_cast<GetElementPtrInst>(InstL); @@ -961,7 +1172,7 @@ int FunctionComparator::compare(const BasicBlock *BBL, const BasicBlock *BBR) { if (int Res = cmpGEPs(GEPL, GEPR)) return Res; } else { - if (int Res = cmpOperations(InstL, InstR)) + if (int Res = cmpOperations(&*InstL, &*InstR)) return Res; assert(InstL->getNumOperands() == InstR->getNumOperands()); @@ -970,11 +1181,8 @@ int FunctionComparator::compare(const BasicBlock *BBL, const BasicBlock *BBR) { Value *OpR = InstR->getOperand(i); if (int Res = cmpValues(OpL, OpR)) return Res; - if (int Res = cmpNumbers(OpL->getValueID(), OpR->getValueID())) - return Res; - // TODO: Already checked in cmpOperation - if (int Res = cmpTypes(OpL->getType(), OpR->getType())) - return Res; + // cmpValues should ensure this is true. + assert(cmpTypes(OpL->getType(), OpR->getType()) == 0); } } @@ -990,7 +1198,6 @@ int FunctionComparator::compare(const BasicBlock *BBL, const BasicBlock *BBR) { // Test whether the two functions have equivalent behaviour. int FunctionComparator::compare() { - sn_mapL.clear(); sn_mapR.clear(); @@ -1001,7 +1208,7 @@ int FunctionComparator::compare() { return Res; if (FnL->hasGC()) { - if (int Res = cmpNumbers((uint64_t)FnL->getGC(), (uint64_t)FnR->getGC())) + if (int Res = cmpMem(FnL->getGC(), FnR->getGC())) return Res; } @@ -1009,7 +1216,7 @@ int FunctionComparator::compare() { return Res; if (FnL->hasSection()) { - if (int Res = cmpStrings(FnL->getSection(), FnR->getSection())) + if (int Res = cmpMem(FnL->getSection(), FnR->getSection())) return Res; } @@ -1033,7 +1240,7 @@ int FunctionComparator::compare() { ArgRI = FnR->arg_begin(), ArgLE = FnL->arg_end(); ArgLI != ArgLE; ++ArgLI, ++ArgRI) { - if (cmpValues(ArgLI, ArgRI) != 0) + if (cmpValues(&*ArgLI, &*ArgRI) != 0) llvm_unreachable("Arguments repeat!"); } @@ -1055,7 +1262,7 @@ int FunctionComparator::compare() { if (int Res = cmpValues(BBL, BBR)) return Res; - if (int Res = compare(BBL, BBR)) + if (int Res = cmpBasicBlocks(BBL, BBR)) return Res; const TerminatorInst *TermL = BBL->getTerminator(); @@ -1074,6 +1281,68 @@ int FunctionComparator::compare() { } namespace { +// Accumulate the hash of a sequence of 64-bit integers. This is similar to a +// hash of a sequence of 64bit ints, but the entire input does not need to be +// available at once. This interface is necessary for functionHash because it +// needs to accumulate the hash as the structure of the function is traversed +// without saving these values to an intermediate buffer. This form of hashing +// is not often needed, as usually the object to hash is just read from a +// buffer. +class HashAccumulator64 { + uint64_t Hash; +public: + // Initialize to random constant, so the state isn't zero. + HashAccumulator64() { Hash = 0x6acaa36bef8325c5ULL; } + void add(uint64_t V) { + Hash = llvm::hashing::detail::hash_16_bytes(Hash, V); + } + // No finishing is required, because the entire hash value is used. + uint64_t getHash() { return Hash; } +}; +} // end anonymous namespace + +// A function hash is calculated by considering only the number of arguments and +// whether a function is varargs, the order of basic blocks (given by the +// successors of each basic block in depth first order), and the order of +// opcodes of each instruction within each of these basic blocks. This mirrors +// the strategy compare() uses to compare functions by walking the BBs in depth +// first order and comparing each instruction in sequence. Because this hash +// does not look at the operands, it is insensitive to things such as the +// target of calls and the constants used in the function, which makes it useful +// when possibly merging functions which are the same modulo constants and call +// targets. +FunctionComparator::FunctionHash FunctionComparator::functionHash(Function &F) { + HashAccumulator64 H; + H.add(F.isVarArg()); + H.add(F.arg_size()); + + SmallVector<const BasicBlock *, 8> BBs; + SmallSet<const BasicBlock *, 16> VisitedBBs; + + // Walk the blocks in the same order as FunctionComparator::cmpBasicBlocks(), + // accumulating the hash of the function "structure." (BB and opcode sequence) + BBs.push_back(&F.getEntryBlock()); + VisitedBBs.insert(BBs[0]); + while (!BBs.empty()) { + const BasicBlock *BB = BBs.pop_back_val(); + // This random value acts as a block header, as otherwise the partition of + // opcodes into BBs wouldn't affect the hash, only the order of the opcodes + H.add(45798); + for (auto &Inst : *BB) { + H.add(Inst.getOpcode()); + } + const TerminatorInst *Term = BB->getTerminator(); + for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) { + if (!VisitedBBs.insert(Term->getSuccessor(i)).second) + continue; + BBs.push_back(Term->getSuccessor(i)); + } + } + return H.getHash(); +} + + +namespace { /// MergeFunctions finds functions which will generate identical machine code, /// by considering all pointer types to be equivalent. Once identified, @@ -1084,14 +1353,31 @@ class MergeFunctions : public ModulePass { public: static char ID; MergeFunctions() - : ModulePass(ID), HasGlobalAliases(false) { + : ModulePass(ID), FnTree(FunctionNodeCmp(&GlobalNumbers)), FNodesInTree(), + HasGlobalAliases(false) { initializeMergeFunctionsPass(*PassRegistry::getPassRegistry()); } bool runOnModule(Module &M) override; private: - typedef std::set<FunctionNode> FnTreeType; + // The function comparison operator is provided here so that FunctionNodes do + // not need to become larger with another pointer. + class FunctionNodeCmp { + GlobalNumberState* GlobalNumbers; + public: + FunctionNodeCmp(GlobalNumberState* GN) : GlobalNumbers(GN) {} + bool operator()(const FunctionNode &LHS, const FunctionNode &RHS) const { + // Order first by hashes, then full function comparison. + if (LHS.getHash() != RHS.getHash()) + return LHS.getHash() < RHS.getHash(); + FunctionComparator FCmp(LHS.getFunc(), RHS.getFunc(), GlobalNumbers); + return FCmp.compare() == -1; + } + }; + typedef std::set<FunctionNode, FunctionNodeCmp> FnTreeType; + + GlobalNumberState GlobalNumbers; /// A work queue of functions that may have been modified and should be /// analyzed again. @@ -1133,17 +1419,23 @@ private: void writeAlias(Function *F, Function *G); /// Replace function F with function G in the function tree. - void replaceFunctionInTree(FnTreeType::iterator &IterToF, Function *G); + void replaceFunctionInTree(const FunctionNode &FN, Function *G); /// The set of all distinct functions. Use the insert() and remove() methods - /// to modify it. + /// to modify it. The map allows efficient lookup and deferring of Functions. FnTreeType FnTree; + // Map functions to the iterators of the FunctionNode which contains them + // in the FnTree. This must be updated carefully whenever the FnTree is + // modified, i.e. in insert(), remove(), and replaceFunctionInTree(), to avoid + // dangling iterators into FnTree. The invariant that preserves this is that + // there is exactly one mapping F -> FN for each FunctionNode FN in FnTree. + ValueMap<Function*, FnTreeType::iterator> FNodesInTree; /// Whether or not the target supports global aliases. bool HasGlobalAliases; }; -} // end anonymous namespace +} // end anonymous namespace char MergeFunctions::ID = 0; INITIALIZE_PASS(MergeFunctions, "mergefunc", "Merge Functions", false, false) @@ -1166,8 +1458,8 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) { for (std::vector<WeakVH>::iterator J = I; J != E && j < Max; ++J, ++j) { Function *F1 = cast<Function>(*I); Function *F2 = cast<Function>(*J); - int Res1 = FunctionComparator(F1, F2).compare(); - int Res2 = FunctionComparator(F2, F1).compare(); + int Res1 = FunctionComparator(F1, F2, &GlobalNumbers).compare(); + int Res2 = FunctionComparator(F2, F1, &GlobalNumbers).compare(); // If F1 <= F2, then F2 >= F1, otherwise report failure. if (Res1 != -Res2) { @@ -1188,8 +1480,8 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) { continue; Function *F3 = cast<Function>(*K); - int Res3 = FunctionComparator(F1, F3).compare(); - int Res4 = FunctionComparator(F2, F3).compare(); + int Res3 = FunctionComparator(F1, F3, &GlobalNumbers).compare(); + int Res4 = FunctionComparator(F2, F3, &GlobalNumbers).compare(); bool Transitive = true; @@ -1227,11 +1519,33 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) { bool MergeFunctions::runOnModule(Module &M) { bool Changed = false; - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { - if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage()) - Deferred.push_back(WeakVH(I)); + // All functions in the module, ordered by hash. Functions with a unique + // hash value are easily eliminated. + std::vector<std::pair<FunctionComparator::FunctionHash, Function *>> + HashedFuncs; + for (Function &Func : M) { + if (!Func.isDeclaration() && !Func.hasAvailableExternallyLinkage()) { + HashedFuncs.push_back({FunctionComparator::functionHash(Func), &Func}); + } } + std::stable_sort( + HashedFuncs.begin(), HashedFuncs.end(), + [](const std::pair<FunctionComparator::FunctionHash, Function *> &a, + const std::pair<FunctionComparator::FunctionHash, Function *> &b) { + return a.first < b.first; + }); + + auto S = HashedFuncs.begin(); + for (auto I = HashedFuncs.begin(), IE = HashedFuncs.end(); I != IE; ++I) { + // If the hash value matches the previous value or the next one, we must + // consider merging it. Otherwise it is dropped and never considered again. + if ((I != S && std::prev(I)->first == I->first) || + (std::next(I) != IE && std::next(I)->first == I->first) ) { + Deferred.push_back(WeakVH(I->second)); + } + } + do { std::vector<WeakVH> Worklist; Deferred.swap(Worklist); @@ -1270,6 +1584,7 @@ bool MergeFunctions::runOnModule(Module &M) { } while (!Deferred.empty()); FnTree.clear(); + GlobalNumbers.clear(); return Changed; } @@ -1282,6 +1597,32 @@ void MergeFunctions::replaceDirectCallers(Function *Old, Function *New) { ++UI; CallSite CS(U->getUser()); if (CS && CS.isCallee(U)) { + // Transfer the called function's attributes to the call site. Due to the + // bitcast we will 'lose' ABI changing attributes because the 'called + // function' is no longer a Function* but the bitcast. Code that looks up + // the attributes from the called function will fail. + + // FIXME: This is not actually true, at least not anymore. The callsite + // will always have the same ABI affecting attributes as the callee, + // because otherwise the original input has UB. Note that Old and New + // always have matching ABI, so no attributes need to be changed. + // Transferring other attributes may help other optimizations, but that + // should be done uniformly and not in this ad-hoc way. + auto &Context = New->getContext(); + auto NewFuncAttrs = New->getAttributes(); + auto CallSiteAttrs = CS.getAttributes(); + + CallSiteAttrs = CallSiteAttrs.addAttributes( + Context, AttributeSet::ReturnIndex, NewFuncAttrs.getRetAttributes()); + + for (unsigned argIdx = 0; argIdx < CS.arg_size(); argIdx++) { + AttributeSet Attrs = NewFuncAttrs.getParamAttributes(argIdx); + if (Attrs.getNumSlots()) + CallSiteAttrs = CallSiteAttrs.addAttributes(Context, argIdx, Attrs); + } + + CS.setAttributes(CallSiteAttrs); + remove(CS.getInstruction()->getParent()->getParent()); U->set(BitcastNew); } @@ -1352,15 +1693,15 @@ void MergeFunctions::writeThunk(Function *F, Function *G) { SmallVector<Value *, 16> Args; unsigned i = 0; FunctionType *FFTy = F->getFunctionType(); - for (Function::arg_iterator AI = NewG->arg_begin(), AE = NewG->arg_end(); - AI != AE; ++AI) { - Args.push_back(createCast(Builder, (Value*)AI, FFTy->getParamType(i))); + for (Argument & AI : NewG->args()) { + Args.push_back(createCast(Builder, &AI, FFTy->getParamType(i))); ++i; } CallInst *CI = Builder.CreateCall(F, Args); CI->setTailCall(); CI->setCallingConv(F->getCallingConv()); + CI->setAttributes(F->getAttributes()); if (NewG->getReturnType()->isVoidTy()) { Builder.CreateRetVoid(); } else { @@ -1379,8 +1720,7 @@ void MergeFunctions::writeThunk(Function *F, Function *G) { // Replace G with an alias to F and delete G. void MergeFunctions::writeAlias(Function *F, Function *G) { - PointerType *PTy = G->getType(); - auto *GA = GlobalAlias::create(PTy, G->getLinkage(), "", F); + auto *GA = GlobalAlias::create(G->getLinkage(), "", F); F->setAlignment(std::max(F->getAlignment(), G->getAlignment())); GA->takeName(G); GA->setVisibility(G->getVisibility()); @@ -1425,19 +1765,24 @@ void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) { ++NumFunctionsMerged; } -/// Replace function F for function G in the map. -void MergeFunctions::replaceFunctionInTree(FnTreeType::iterator &IterToF, +/// Replace function F by function G. +void MergeFunctions::replaceFunctionInTree(const FunctionNode &FN, Function *G) { - Function *F = IterToF->getFunc(); - - // A total order is already guaranteed otherwise because we process strong - // functions before weak functions. - assert(((F->mayBeOverridden() && G->mayBeOverridden()) || - (!F->mayBeOverridden() && !G->mayBeOverridden())) && - "Only change functions if both are strong or both are weak"); - (void)F; - - IterToF->replaceBy(G); + Function *F = FN.getFunc(); + assert(FunctionComparator(F, G, &GlobalNumbers).compare() == 0 && + "The two functions must be equal"); + + auto I = FNodesInTree.find(F); + assert(I != FNodesInTree.end() && "F should be in FNodesInTree"); + assert(FNodesInTree.count(G) == 0 && "FNodesInTree should not contain G"); + + FnTreeType::iterator IterToFNInFnTree = I->second; + assert(&(*IterToFNInFnTree) == &FN && "F should map to FN in FNodesInTree."); + // Remove F -> FN and insert G -> FN + FNodesInTree.erase(I); + FNodesInTree.insert({G, IterToFNInFnTree}); + // Replace F with G in FN, which is stored inside the FnTree. + FN.replaceBy(G); } // Insert a ComparableFunction into the FnTree, or merge it away if equal to one @@ -1447,6 +1792,8 @@ bool MergeFunctions::insert(Function *NewFunction) { FnTree.insert(FunctionNode(NewFunction)); if (Result.second) { + assert(FNodesInTree.count(NewFunction) == 0); + FNodesInTree.insert({NewFunction, Result.first}); DEBUG(dbgs() << "Inserting as unique: " << NewFunction->getName() << '\n'); return false; } @@ -1476,7 +1823,7 @@ bool MergeFunctions::insert(Function *NewFunction) { if (OldF.getFunc()->getName() > NewFunction->getName()) { // Swap the two functions. Function *F = OldF.getFunc(); - replaceFunctionInTree(Result.first, NewFunction); + replaceFunctionInTree(*Result.first, NewFunction); NewFunction = F; assert(OldF.getFunc() != F && "Must have swapped the functions."); } @@ -1495,18 +1842,13 @@ bool MergeFunctions::insert(Function *NewFunction) { // Remove a function from FnTree. If it was already in FnTree, add // it to Deferred so that we'll look at it in the next round. void MergeFunctions::remove(Function *F) { - // We need to make sure we remove F, not a function "equal" to F per the - // function equality comparator. - FnTreeType::iterator found = FnTree.find(FunctionNode(F)); - size_t Erased = 0; - if (found != FnTree.end() && found->getFunc() == F) { - Erased = 1; - FnTree.erase(found); - } - - if (Erased) { - DEBUG(dbgs() << "Removed " << F->getName() - << " from set and deferred it.\n"); + auto I = FNodesInTree.find(F); + if (I != FNodesInTree.end()) { + DEBUG(dbgs() << "Deferred " << F->getName()<< ".\n"); + FnTree.erase(I->second); + // I->second has been invalidated, remove it from the FNodesInTree map to + // preserve the invariant. + FNodesInTree.erase(I); Deferred.emplace_back(F); } } @@ -1516,6 +1858,8 @@ void MergeFunctions::remove(Function *F) { void MergeFunctions::removeUsers(Value *V) { std::vector<Value *> Worklist; Worklist.push_back(V); + SmallSet<Value*, 8> Visited; + Visited.insert(V); while (!Worklist.empty()) { Value *V = Worklist.back(); Worklist.pop_back(); @@ -1526,8 +1870,10 @@ void MergeFunctions::removeUsers(Value *V) { } else if (isa<GlobalValue>(U)) { // do nothing } else if (Constant *C = dyn_cast<Constant>(U)) { - for (User *UU : C->users()) - Worklist.push_back(UU); + for (User *UU : C->users()) { + if (!Visited.insert(UU).second) + Worklist.push_back(UU); + } } } } diff --git a/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp index 4a7cb7b..0c5c84b 100644 --- a/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp +++ b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp @@ -50,7 +50,7 @@ ModulePass* llvm::createPartialInliningPass() { return new PartialInliner(); } Function* PartialInliner::unswitchFunction(Function* F) { // First, verify that this function is an unswitching candidate... - BasicBlock* entryBlock = F->begin(); + BasicBlock *entryBlock = &F->front(); BranchInst *BR = dyn_cast<BranchInst>(entryBlock->getTerminator()); if (!BR || BR->isUnconditional()) return nullptr; @@ -89,18 +89,18 @@ Function* PartialInliner::unswitchFunction(Function* F) { // of which will go outside. BasicBlock* preReturn = newReturnBlock; newReturnBlock = newReturnBlock->splitBasicBlock( - newReturnBlock->getFirstNonPHI()); + newReturnBlock->getFirstNonPHI()->getIterator()); BasicBlock::iterator I = preReturn->begin(); - BasicBlock::iterator Ins = newReturnBlock->begin(); + Instruction *Ins = &newReturnBlock->front(); while (I != preReturn->end()) { PHINode* OldPhi = dyn_cast<PHINode>(I); if (!OldPhi) break; - - PHINode* retPhi = PHINode::Create(OldPhi->getType(), 2, "", Ins); + + PHINode *retPhi = PHINode::Create(OldPhi->getType(), 2, "", Ins); OldPhi->replaceAllUsesWith(retPhi); Ins = newReturnBlock->getFirstNonPHI(); - - retPhi->addIncoming(I, preReturn); + + retPhi->addIncoming(&*I, preReturn); retPhi->addIncoming(OldPhi->getIncomingValueForBlock(newEntryBlock), newEntryBlock); OldPhi->removeIncomingValue(newEntryBlock); @@ -116,8 +116,8 @@ Function* PartialInliner::unswitchFunction(Function* F) { FE = duplicateFunction->end(); FI != FE; ++FI) if (&*FI != newEntryBlock && &*FI != newReturnBlock && &*FI != newNonReturnBlock) - toExtract.push_back(FI); - + toExtract.push_back(&*FI); + // The CodeExtractor needs a dominator tree. DominatorTree DT; DT.recalculate(*duplicateFunction); diff --git a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index 909baae..9876efa 100644 --- a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -12,19 +12,26 @@ // //===----------------------------------------------------------------------===// - #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm-c/Transforms/PassManagerBuilder.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/CFLAliasAnalysis.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/ScopedNoAliasAA.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TypeBasedAliasAnalysis.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/Verifier.h" +#include "llvm/IR/FunctionInfo.h" #include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Verifier.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ManagedStatic.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/IPO/ForceFunctionAttrs.h" +#include "llvm/Transforms/IPO/InferFunctionAttrs.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Vectorize.h" @@ -89,11 +96,21 @@ static cl::opt<bool> EnableLoopDistribute( "enable-loop-distribute", cl::init(false), cl::Hidden, cl::desc("Enable the new, experimental LoopDistribution Pass")); +static cl::opt<bool> EnableNonLTOGlobalsModRef( + "enable-non-lto-gmr", cl::init(true), cl::Hidden, + cl::desc( + "Enable the GlobalsModRef AliasAnalysis outside of the LTO pipeline.")); + +static cl::opt<bool> EnableLoopLoadElim( + "enable-loop-load-elim", cl::init(false), cl::Hidden, + cl::desc("Enable the new, experimental LoopLoadElimination Pass")); + PassManagerBuilder::PassManagerBuilder() { OptLevel = 2; SizeLevel = 0; LibraryInfo = nullptr; Inliner = nullptr; + FunctionIndex = nullptr; DisableUnitAtATime = false; DisableUnrollLoops = false; BBVectorize = RunBBVectorization; @@ -143,10 +160,9 @@ void PassManagerBuilder::addInitialAliasAnalysisPasses( // BasicAliasAnalysis wins if they disagree. This is intended to help // support "obvious" type-punning idioms. if (UseCFLAA) - PM.add(createCFLAliasAnalysisPass()); - PM.add(createTypeBasedAliasAnalysisPass()); - PM.add(createScopedNoAliasAAPass()); - PM.add(createBasicAliasAnalysisPass()); + PM.add(createCFLAAWrapperPass()); + PM.add(createTypeBasedAAWrapperPass()); + PM.add(createScopedNoAliasAAWrapperPass()); } void PassManagerBuilder::populateFunctionPassManager( @@ -172,6 +188,9 @@ void PassManagerBuilder::populateFunctionPassManager( void PassManagerBuilder::populateModulePassManager( legacy::PassManagerBase &MPM) { + // Allow forcing function attributes as a debugging and tuning aid. + MPM.add(createForceFunctionAttrsLegacyPass()); + // If all optimizations are disabled, just run the always-inline pass and, // if enabled, the function merging pass. if (OptLevel == 0) { @@ -201,10 +220,15 @@ void PassManagerBuilder::populateModulePassManager( addInitialAliasAnalysisPasses(MPM); if (!DisableUnitAtATime) { + // Infer attributes about declarations if possible. + MPM.add(createInferFunctionAttrsLegacyPass()); + addExtensionsToPM(EP_ModuleOptimizerEarly, MPM); MPM.add(createIPSCCPPass()); // IP SCCP MPM.add(createGlobalOptimizerPass()); // Optimize out global vars + // Promote any localized global vars + MPM.add(createPromoteMemoryToRegisterPass()); MPM.add(createDeadArgEliminationPass()); // Dead argument elimination @@ -213,6 +237,12 @@ void PassManagerBuilder::populateModulePassManager( MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE } + if (EnableNonLTOGlobalsModRef) + // We add a module alias analysis pass here. In part due to bugs in the + // analysis infrastructure this "works" in that the analysis stays alive + // for the entire SCC pass run below. + MPM.add(createGlobalsAAWrapperPass()); + // Start of CallGraph SCC passes. if (!DisableUnitAtATime) MPM.add(createPruneEHPass()); // Remove dead EH info @@ -245,6 +275,7 @@ void PassManagerBuilder::populateModulePassManager( MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); MPM.add(createLICMPass()); // Hoist loop invariants MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3)); + MPM.add(createCFGSimplificationPass()); MPM.add(createInstructionCombiningPass()); MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars MPM.add(createLoopIdiomPass()); // Recognize idioms like memset. @@ -315,9 +346,42 @@ void PassManagerBuilder::populateModulePassManager( // we must insert a no-op module pass to reset the pass manager. MPM.add(createBarrierNoopPass()); + if (!DisableUnitAtATime && OptLevel > 1 && !PrepareForLTO) { + // Remove avail extern fns and globals definitions if we aren't + // compiling an object file for later LTO. For LTO we want to preserve + // these so they are eligible for inlining at link-time. Note if they + // are unreferenced they will be removed by GlobalDCE later, so + // this only impacts referenced available externally globals. + // Eventually they will be suppressed during codegen, but eliminating + // here enables more opportunity for GlobalDCE as it may make + // globals referenced by available external functions dead + // and saves running remaining passes on the eliminated functions. + MPM.add(createEliminateAvailableExternallyPass()); + } + + if (EnableNonLTOGlobalsModRef) + // We add a fresh GlobalsModRef run at this point. This is particularly + // useful as the above will have inlined, DCE'ed, and function-attr + // propagated everything. We should at this point have a reasonably minimal + // and richly annotated call graph. By computing aliasing and mod/ref + // information for all local globals here, the late loop passes and notably + // the vectorizer will be able to use them to help recognize vectorizable + // memory operations. + // + // Note that this relies on a bug in the pass manager which preserves + // a module analysis into a function pass pipeline (and throughout it) so + // long as the first function pass doesn't invalidate the module analysis. + // Thus both Float2Int and LoopRotate have to preserve AliasAnalysis for + // this to work. Fortunately, it is trivial to preserve AliasAnalysis + // (doing nothing preserves it as it is required to be conservatively + // correct in the face of IR changes). + MPM.add(createGlobalsAAWrapperPass()); + if (RunFloat2Int) MPM.add(createFloat2IntPass()); + addExtensionsToPM(EP_VectorizerStart, MPM); + // Re-rotate loops in all our loop nests. These may have fallout out of // rotated form due to GVN or other transformations, and the vectorizer relies // on the rotated form. Disable header duplication at -Oz. @@ -329,6 +393,12 @@ void PassManagerBuilder::populateModulePassManager( MPM.add(createLoopDistributePass()); MPM.add(createLoopVectorizePass(DisableUnrollLoops, LoopVectorize)); + + // Eliminate loads by forwarding stores from the previous iteration to loads + // of the current iteration. + if (EnableLoopLoadElim) + MPM.add(createLoopLoadEliminationPass()); + // FIXME: Because of #pragma vectorize enable, the passes below are always // inserted in the pipeline, even when the vectorizer doesn't run (ex. when // on -O1 and no #pragma is found). Would be good to have these two passes @@ -402,17 +472,6 @@ void PassManagerBuilder::populateModulePassManager( // GlobalOpt already deletes dead functions and globals, at -O2 try a // late pass of GlobalDCE. It is capable of deleting dead cycles. if (OptLevel > 1) { - if (!PrepareForLTO) { - // Remove avail extern fns and globals definitions if we aren't - // compiling an object file for later LTO. For LTO we want to preserve - // these so they are eligible for inlining at link-time. Note if they - // are unreferenced they will be removed by GlobalDCE below, so - // this only impacts referenced available externally globals. - // Eventually they will be suppressed during codegen, but eliminating - // here enables more opportunity for GlobalDCE as it may make - // globals referenced by available external functions dead. - MPM.add(createEliminateAvailableExternallyPass()); - } MPM.add(createGlobalDCEPass()); // Remove dead fns and globals. MPM.add(createConstantMergePass()); // Merge dup global constants } @@ -428,13 +487,25 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { // Provide AliasAnalysis services for optimizations. addInitialAliasAnalysisPasses(PM); + if (FunctionIndex) + PM.add(createFunctionImportPass(FunctionIndex)); + + // Allow forcing function attributes as a debugging and tuning aid. + PM.add(createForceFunctionAttrsLegacyPass()); + + // Infer attributes about declarations if possible. + PM.add(createInferFunctionAttrsLegacyPass()); + // Propagate constants at call sites into the functions they call. This // opens opportunities for globalopt (and inlining) by substituting function // pointers passed as arguments to direct uses of functions. PM.add(createIPSCCPPass()); // Now that we internalized some globals, see if we can hack on them! + PM.add(createFunctionAttrsPass()); // Add norecurse if possible. PM.add(createGlobalOptimizerPass()); + // Promote any localized global vars. + PM.add(createPromoteMemoryToRegisterPass()); // Linking modules together can lead to duplicated global constants, only // keep one copy of each constant. @@ -481,7 +552,7 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { // Run a few AA driven optimizations here and now, to cleanup the code. PM.add(createFunctionAttrsPass()); // Add nocapture. - PM.add(createGlobalsModRefPass()); // IP alias analysis. + PM.add(createGlobalsAAWrapperPass()); // IP alias analysis. PM.add(createLICMPass()); // Hoist loop invariants. if (EnableMLSM) @@ -500,6 +571,15 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { PM.add(createLoopVectorizePass(true, LoopVectorize)); + // Now that we've optimized loops (in particular loop induction variables), + // we may have exposed more scalar opportunities. Run parts of the scalar + // optimizer again at this point. + PM.add(createInstructionCombiningPass()); // Initial cleanup + PM.add(createCFGSimplificationPass()); // if-convert + PM.add(createSCCPPass()); // Propagate exposed constants + PM.add(createInstructionCombiningPass()); // Clean up again + PM.add(createBitTrackingDCEPass()); + // More scalar chains could be vectorized due to more alias information if (RunSLPAfterLoopVectorization) if (SLPVectorize) @@ -524,6 +604,9 @@ void PassManagerBuilder::addLateLTOOptimizationPasses( // Delete basic blocks, which optimization passes may have killed. PM.add(createCFGSimplificationPass()); + // Drop bodies of available externally objects to improve GlobalDCE. + PM.add(createEliminateAvailableExternallyPass()); + // Now that we have optimized the program, discard unreachable functions. PM.add(createGlobalDCEPass()); @@ -543,6 +626,10 @@ void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) { if (OptLevel > 1) addLTOOptimizationPasses(PM); + // Create a function that performs CFI checks for cross-DSO calls with targets + // in the current module. + PM.add(createCrossDSOCFIPass()); + // Lower bit sets to globals. This pass supports Clang's control flow // integrity mechanisms (-fsanitize=cfi*) and needs to run at link time if CFI // is enabled. The pass does nothing if CFI is disabled. diff --git a/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp index b2f1010..3af4afb 100644 --- a/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp +++ b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp @@ -21,7 +21,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CallGraphSCCPass.h" -#include "llvm/Analysis/LibCallSemantics.h" +#include "llvm/Analysis/EHPersonalities.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" @@ -153,21 +153,16 @@ bool PruneEH::runOnSCC(CallGraphSCC &SCC) { // If the SCC doesn't unwind or doesn't throw, note this fact. if (!SCCMightUnwind || !SCCMightReturn) for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { - AttrBuilder NewAttributes; - - if (!SCCMightUnwind) - NewAttributes.addAttribute(Attribute::NoUnwind); - if (!SCCMightReturn) - NewAttributes.addAttribute(Attribute::NoReturn); - Function *F = (*I)->getFunction(); - const AttributeSet &PAL = F->getAttributes().getFnAttributes(); - const AttributeSet &NPAL = AttributeSet::get( - F->getContext(), AttributeSet::FunctionIndex, NewAttributes); - if (PAL != NPAL) { + if (!SCCMightUnwind && !F->hasFnAttribute(Attribute::NoUnwind)) { + F->addFnAttr(Attribute::NoUnwind); + MadeChange = true; + } + + if (!SCCMightReturn && !F->hasFnAttribute(Attribute::NoReturn)) { + F->addFnAttr(Attribute::NoReturn); MadeChange = true; - F->addAttributes(AttributeSet::FunctionIndex, NPAL); } } @@ -191,9 +186,13 @@ bool PruneEH::SimplifyFunction(Function *F) { for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) if (II->doesNotThrow() && canSimplifyInvokeNoUnwind(F)) { - SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3); + SmallVector<Value*, 8> Args(II->arg_begin(), II->arg_end()); + SmallVector<OperandBundleDef, 1> OpBundles; + II->getOperandBundlesAsDefs(OpBundles); + // Insert a call instruction before the invoke. - CallInst *Call = CallInst::Create(II->getCalledValue(), Args, "", II); + CallInst *Call = CallInst::Create(II->getCalledValue(), Args, OpBundles, + "", II); Call->takeName(II); Call->setCallingConv(II->getCallingConv()); Call->setAttributes(II->getAttributes()); @@ -233,7 +232,7 @@ bool PruneEH::SimplifyFunction(Function *F) { // Remove the uncond branch and add an unreachable. BB->getInstList().pop_back(); - new UnreachableInst(BB->getContext(), BB); + new UnreachableInst(BB->getContext(), &*BB); DeleteBasicBlock(New); // Delete the new BB. MadeChange = true; diff --git a/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp b/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp new file mode 100644 index 0000000..928d92e --- /dev/null +++ b/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -0,0 +1,1265 @@ +//===- SampleProfile.cpp - Incorporate sample profiles into the IR --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SampleProfileLoader transformation. This pass +// reads a profile file generated by a sampling profiler (e.g. Linux Perf - +// http://perf.wiki.kernel.org/) and generates IR metadata to reflect the +// profile information in the given profile. +// +// This pass generates branch weight annotations on the IR: +// +// - prof: Represents branch weights. This annotation is added to branches +// to indicate the weights of each edge coming out of the branch. +// The weight of each edge is the weight of the target block for +// that edge. The weight of a block B is computed as the maximum +// number of samples found in B. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/PostDominators.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/ProfileData/SampleProfReader.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include <cctype> + +using namespace llvm; +using namespace sampleprof; + +#define DEBUG_TYPE "sample-profile" + +// Command line option to specify the file to read samples from. This is +// mainly used for debugging. +static cl::opt<std::string> SampleProfileFile( + "sample-profile-file", cl::init(""), cl::value_desc("filename"), + cl::desc("Profile file loaded by -sample-profile"), cl::Hidden); +static cl::opt<unsigned> SampleProfileMaxPropagateIterations( + "sample-profile-max-propagate-iterations", cl::init(100), + cl::desc("Maximum number of iterations to go through when propagating " + "sample block/edge weights through the CFG.")); +static cl::opt<unsigned> SampleProfileRecordCoverage( + "sample-profile-check-record-coverage", cl::init(0), cl::value_desc("N"), + cl::desc("Emit a warning if less than N% of records in the input profile " + "are matched to the IR.")); +static cl::opt<unsigned> SampleProfileSampleCoverage( + "sample-profile-check-sample-coverage", cl::init(0), cl::value_desc("N"), + cl::desc("Emit a warning if less than N% of samples in the input profile " + "are matched to the IR.")); +static cl::opt<double> SampleProfileHotThreshold( + "sample-profile-inline-hot-threshold", cl::init(0.1), cl::value_desc("N"), + cl::desc("Inlined functions that account for more than N% of all samples " + "collected in the parent function, will be inlined again.")); +static cl::opt<double> SampleProfileGlobalHotThreshold( + "sample-profile-global-hot-threshold", cl::init(30), cl::value_desc("N"), + cl::desc("Top-level functions that account for more than N% of all samples " + "collected in the profile, will be marked as hot for the inliner " + "to consider.")); +static cl::opt<double> SampleProfileGlobalColdThreshold( + "sample-profile-global-cold-threshold", cl::init(0.5), cl::value_desc("N"), + cl::desc("Top-level functions that account for less than N% of all samples " + "collected in the profile, will be marked as cold for the inliner " + "to consider.")); + +namespace { +typedef DenseMap<const BasicBlock *, uint64_t> BlockWeightMap; +typedef DenseMap<const BasicBlock *, const BasicBlock *> EquivalenceClassMap; +typedef std::pair<const BasicBlock *, const BasicBlock *> Edge; +typedef DenseMap<Edge, uint64_t> EdgeWeightMap; +typedef DenseMap<const BasicBlock *, SmallVector<const BasicBlock *, 8>> + BlockEdgeMap; + +/// \brief Sample profile pass. +/// +/// This pass reads profile data from the file specified by +/// -sample-profile-file and annotates every affected function with the +/// profile information found in that file. +class SampleProfileLoader : public ModulePass { +public: + // Class identification, replacement for typeinfo + static char ID; + + SampleProfileLoader(StringRef Name = SampleProfileFile) + : ModulePass(ID), DT(nullptr), PDT(nullptr), LI(nullptr), Reader(), + Samples(nullptr), Filename(Name), ProfileIsValid(false), + TotalCollectedSamples(0) { + initializeSampleProfileLoaderPass(*PassRegistry::getPassRegistry()); + } + + bool doInitialization(Module &M) override; + + void dump() { Reader->dump(); } + + const char *getPassName() const override { return "Sample profile pass"; } + + bool runOnModule(Module &M) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + } + +protected: + bool runOnFunction(Function &F); + unsigned getFunctionLoc(Function &F); + bool emitAnnotations(Function &F); + ErrorOr<uint64_t> getInstWeight(const Instruction &I) const; + ErrorOr<uint64_t> getBlockWeight(const BasicBlock *BB) const; + const FunctionSamples *findCalleeFunctionSamples(const CallInst &I) const; + const FunctionSamples *findFunctionSamples(const Instruction &I) const; + bool inlineHotFunctions(Function &F); + bool emitInlineHints(Function &F); + void printEdgeWeight(raw_ostream &OS, Edge E); + void printBlockWeight(raw_ostream &OS, const BasicBlock *BB) const; + void printBlockEquivalence(raw_ostream &OS, const BasicBlock *BB); + bool computeBlockWeights(Function &F); + void findEquivalenceClasses(Function &F); + void findEquivalencesFor(BasicBlock *BB1, + SmallVector<BasicBlock *, 8> Descendants, + DominatorTreeBase<BasicBlock> *DomTree); + void propagateWeights(Function &F); + uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge); + void buildEdges(Function &F); + bool propagateThroughEdges(Function &F); + void computeDominanceAndLoopInfo(Function &F); + unsigned getOffset(unsigned L, unsigned H) const; + void clearFunctionData(); + + /// \brief Map basic blocks to their computed weights. + /// + /// The weight of a basic block is defined to be the maximum + /// of all the instruction weights in that block. + BlockWeightMap BlockWeights; + + /// \brief Map edges to their computed weights. + /// + /// Edge weights are computed by propagating basic block weights in + /// SampleProfile::propagateWeights. + EdgeWeightMap EdgeWeights; + + /// \brief Set of visited blocks during propagation. + SmallPtrSet<const BasicBlock *, 128> VisitedBlocks; + + /// \brief Set of visited edges during propagation. + SmallSet<Edge, 128> VisitedEdges; + + /// \brief Equivalence classes for block weights. + /// + /// Two blocks BB1 and BB2 are in the same equivalence class if they + /// dominate and post-dominate each other, and they are in the same loop + /// nest. When this happens, the two blocks are guaranteed to execute + /// the same number of times. + EquivalenceClassMap EquivalenceClass; + + /// \brief Dominance, post-dominance and loop information. + std::unique_ptr<DominatorTree> DT; + std::unique_ptr<DominatorTreeBase<BasicBlock>> PDT; + std::unique_ptr<LoopInfo> LI; + + /// \brief Predecessors for each basic block in the CFG. + BlockEdgeMap Predecessors; + + /// \brief Successors for each basic block in the CFG. + BlockEdgeMap Successors; + + /// \brief Profile reader object. + std::unique_ptr<SampleProfileReader> Reader; + + /// \brief Samples collected for the body of this function. + FunctionSamples *Samples; + + /// \brief Name of the profile file to load. + StringRef Filename; + + /// \brief Flag indicating whether the profile input loaded successfully. + bool ProfileIsValid; + + /// \brief Total number of samples collected in this profile. + /// + /// This is the sum of all the samples collected in all the functions executed + /// at runtime. + uint64_t TotalCollectedSamples; +}; + +class SampleCoverageTracker { +public: + SampleCoverageTracker() : SampleCoverage(), TotalUsedSamples(0) {} + + bool markSamplesUsed(const FunctionSamples *FS, uint32_t LineOffset, + uint32_t Discriminator, uint64_t Samples); + unsigned computeCoverage(unsigned Used, unsigned Total) const; + unsigned countUsedRecords(const FunctionSamples *FS) const; + unsigned countBodyRecords(const FunctionSamples *FS) const; + uint64_t getTotalUsedSamples() const { return TotalUsedSamples; } + uint64_t countBodySamples(const FunctionSamples *FS) const; + void clear() { + SampleCoverage.clear(); + TotalUsedSamples = 0; + } + +private: + typedef std::map<LineLocation, unsigned> BodySampleCoverageMap; + typedef DenseMap<const FunctionSamples *, BodySampleCoverageMap> + FunctionSamplesCoverageMap; + + /// Coverage map for sampling records. + /// + /// This map keeps a record of sampling records that have been matched to + /// an IR instruction. This is used to detect some form of staleness in + /// profiles (see flag -sample-profile-check-coverage). + /// + /// Each entry in the map corresponds to a FunctionSamples instance. This is + /// another map that counts how many times the sample record at the + /// given location has been used. + FunctionSamplesCoverageMap SampleCoverage; + + /// Number of samples used from the profile. + /// + /// When a sampling record is used for the first time, the samples from + /// that record are added to this accumulator. Coverage is later computed + /// based on the total number of samples available in this function and + /// its callsites. + /// + /// Note that this accumulator tracks samples used from a single function + /// and all the inlined callsites. Strictly, we should have a map of counters + /// keyed by FunctionSamples pointers, but these stats are cleared after + /// every function, so we just need to keep a single counter. + uint64_t TotalUsedSamples; +}; + +SampleCoverageTracker CoverageTracker; + +/// Return true if the given callsite is hot wrt to its caller. +/// +/// Functions that were inlined in the original binary will be represented +/// in the inline stack in the sample profile. If the profile shows that +/// the original inline decision was "good" (i.e., the callsite is executed +/// frequently), then we will recreate the inline decision and apply the +/// profile from the inlined callsite. +/// +/// To decide whether an inlined callsite is hot, we compute the fraction +/// of samples used by the callsite with respect to the total number of samples +/// collected in the caller. +/// +/// If that fraction is larger than the default given by +/// SampleProfileHotThreshold, the callsite will be inlined again. +bool callsiteIsHot(const FunctionSamples *CallerFS, + const FunctionSamples *CallsiteFS) { + if (!CallsiteFS) + return false; // The callsite was not inlined in the original binary. + + uint64_t ParentTotalSamples = CallerFS->getTotalSamples(); + if (ParentTotalSamples == 0) + return false; // Avoid division by zero. + + uint64_t CallsiteTotalSamples = CallsiteFS->getTotalSamples(); + if (CallsiteTotalSamples == 0) + return false; // Callsite is trivially cold. + + double PercentSamples = + (double)CallsiteTotalSamples / (double)ParentTotalSamples * 100.0; + return PercentSamples >= SampleProfileHotThreshold; +} + +} + +/// Mark as used the sample record for the given function samples at +/// (LineOffset, Discriminator). +/// +/// \returns true if this is the first time we mark the given record. +bool SampleCoverageTracker::markSamplesUsed(const FunctionSamples *FS, + uint32_t LineOffset, + uint32_t Discriminator, + uint64_t Samples) { + LineLocation Loc(LineOffset, Discriminator); + unsigned &Count = SampleCoverage[FS][Loc]; + bool FirstTime = (++Count == 1); + if (FirstTime) + TotalUsedSamples += Samples; + return FirstTime; +} + +/// Return the number of sample records that were applied from this profile. +/// +/// This count does not include records from cold inlined callsites. +unsigned +SampleCoverageTracker::countUsedRecords(const FunctionSamples *FS) const { + auto I = SampleCoverage.find(FS); + + // The size of the coverage map for FS represents the number of records + // that were marked used at least once. + unsigned Count = (I != SampleCoverage.end()) ? I->second.size() : 0; + + // If there are inlined callsites in this function, count the samples found + // in the respective bodies. However, do not bother counting callees with 0 + // total samples, these are callees that were never invoked at runtime. + for (const auto &I : FS->getCallsiteSamples()) { + const FunctionSamples *CalleeSamples = &I.second; + if (callsiteIsHot(FS, CalleeSamples)) + Count += countUsedRecords(CalleeSamples); + } + + return Count; +} + +/// Return the number of sample records in the body of this profile. +/// +/// This count does not include records from cold inlined callsites. +unsigned +SampleCoverageTracker::countBodyRecords(const FunctionSamples *FS) const { + unsigned Count = FS->getBodySamples().size(); + + // Only count records in hot callsites. + for (const auto &I : FS->getCallsiteSamples()) { + const FunctionSamples *CalleeSamples = &I.second; + if (callsiteIsHot(FS, CalleeSamples)) + Count += countBodyRecords(CalleeSamples); + } + + return Count; +} + +/// Return the number of samples collected in the body of this profile. +/// +/// This count does not include samples from cold inlined callsites. +uint64_t +SampleCoverageTracker::countBodySamples(const FunctionSamples *FS) const { + uint64_t Total = 0; + for (const auto &I : FS->getBodySamples()) + Total += I.second.getSamples(); + + // Only count samples in hot callsites. + for (const auto &I : FS->getCallsiteSamples()) { + const FunctionSamples *CalleeSamples = &I.second; + if (callsiteIsHot(FS, CalleeSamples)) + Total += countBodySamples(CalleeSamples); + } + + return Total; +} + +/// Return the fraction of sample records used in this profile. +/// +/// The returned value is an unsigned integer in the range 0-100 indicating +/// the percentage of sample records that were used while applying this +/// profile to the associated function. +unsigned SampleCoverageTracker::computeCoverage(unsigned Used, + unsigned Total) const { + assert(Used <= Total && + "number of used records cannot exceed the total number of records"); + return Total > 0 ? Used * 100 / Total : 100; +} + +/// Clear all the per-function data used to load samples and propagate weights. +void SampleProfileLoader::clearFunctionData() { + BlockWeights.clear(); + EdgeWeights.clear(); + VisitedBlocks.clear(); + VisitedEdges.clear(); + EquivalenceClass.clear(); + DT = nullptr; + PDT = nullptr; + LI = nullptr; + Predecessors.clear(); + Successors.clear(); + CoverageTracker.clear(); +} + +/// \brief Returns the offset of lineno \p L to head_lineno \p H +/// +/// \param L Lineno +/// \param H Header lineno of the function +/// +/// \returns offset to the header lineno. 16 bits are used to represent offset. +/// We assume that a single function will not exceed 65535 LOC. +unsigned SampleProfileLoader::getOffset(unsigned L, unsigned H) const { + return (L - H) & 0xffff; +} + +/// \brief Print the weight of edge \p E on stream \p OS. +/// +/// \param OS Stream to emit the output to. +/// \param E Edge to print. +void SampleProfileLoader::printEdgeWeight(raw_ostream &OS, Edge E) { + OS << "weight[" << E.first->getName() << "->" << E.second->getName() + << "]: " << EdgeWeights[E] << "\n"; +} + +/// \brief Print the equivalence class of block \p BB on stream \p OS. +/// +/// \param OS Stream to emit the output to. +/// \param BB Block to print. +void SampleProfileLoader::printBlockEquivalence(raw_ostream &OS, + const BasicBlock *BB) { + const BasicBlock *Equiv = EquivalenceClass[BB]; + OS << "equivalence[" << BB->getName() + << "]: " << ((Equiv) ? EquivalenceClass[BB]->getName() : "NONE") << "\n"; +} + +/// \brief Print the weight of block \p BB on stream \p OS. +/// +/// \param OS Stream to emit the output to. +/// \param BB Block to print. +void SampleProfileLoader::printBlockWeight(raw_ostream &OS, + const BasicBlock *BB) const { + const auto &I = BlockWeights.find(BB); + uint64_t W = (I == BlockWeights.end() ? 0 : I->second); + OS << "weight[" << BB->getName() << "]: " << W << "\n"; +} + +/// \brief Get the weight for an instruction. +/// +/// The "weight" of an instruction \p Inst is the number of samples +/// collected on that instruction at runtime. To retrieve it, we +/// need to compute the line number of \p Inst relative to the start of its +/// function. We use HeaderLineno to compute the offset. We then +/// look up the samples collected for \p Inst using BodySamples. +/// +/// \param Inst Instruction to query. +/// +/// \returns the weight of \p Inst. +ErrorOr<uint64_t> +SampleProfileLoader::getInstWeight(const Instruction &Inst) const { + DebugLoc DLoc = Inst.getDebugLoc(); + if (!DLoc) + return std::error_code(); + + const FunctionSamples *FS = findFunctionSamples(Inst); + if (!FS) + return std::error_code(); + + const DILocation *DIL = DLoc; + unsigned Lineno = DLoc.getLine(); + unsigned HeaderLineno = DIL->getScope()->getSubprogram()->getLine(); + + uint32_t LineOffset = getOffset(Lineno, HeaderLineno); + uint32_t Discriminator = DIL->getDiscriminator(); + ErrorOr<uint64_t> R = FS->findSamplesAt(LineOffset, Discriminator); + if (R) { + bool FirstMark = + CoverageTracker.markSamplesUsed(FS, LineOffset, Discriminator, R.get()); + if (FirstMark) { + const Function *F = Inst.getParent()->getParent(); + LLVMContext &Ctx = F->getContext(); + emitOptimizationRemark( + Ctx, DEBUG_TYPE, *F, DLoc, + Twine("Applied ") + Twine(*R) + " samples from profile (offset: " + + Twine(LineOffset) + + ((Discriminator) ? Twine(".") + Twine(Discriminator) : "") + ")"); + } + DEBUG(dbgs() << " " << Lineno << "." << DIL->getDiscriminator() << ":" + << Inst << " (line offset: " << Lineno - HeaderLineno << "." + << DIL->getDiscriminator() << " - weight: " << R.get() + << ")\n"); + } + return R; +} + +/// \brief Compute the weight of a basic block. +/// +/// The weight of basic block \p BB is the maximum weight of all the +/// instructions in BB. +/// +/// \param BB The basic block to query. +/// +/// \returns the weight for \p BB. +ErrorOr<uint64_t> +SampleProfileLoader::getBlockWeight(const BasicBlock *BB) const { + bool Found = false; + uint64_t Weight = 0; + for (auto &I : BB->getInstList()) { + const ErrorOr<uint64_t> &R = getInstWeight(I); + if (R && R.get() >= Weight) { + Weight = R.get(); + Found = true; + } + } + if (Found) + return Weight; + else + return std::error_code(); +} + +/// \brief Compute and store the weights of every basic block. +/// +/// This populates the BlockWeights map by computing +/// the weights of every basic block in the CFG. +/// +/// \param F The function to query. +bool SampleProfileLoader::computeBlockWeights(Function &F) { + bool Changed = false; + DEBUG(dbgs() << "Block weights\n"); + for (const auto &BB : F) { + ErrorOr<uint64_t> Weight = getBlockWeight(&BB); + if (Weight) { + BlockWeights[&BB] = Weight.get(); + VisitedBlocks.insert(&BB); + Changed = true; + } + DEBUG(printBlockWeight(dbgs(), &BB)); + } + + return Changed; +} + +/// \brief Get the FunctionSamples for a call instruction. +/// +/// The FunctionSamples of a call instruction \p Inst is the inlined +/// instance in which that call instruction is calling to. It contains +/// all samples that resides in the inlined instance. We first find the +/// inlined instance in which the call instruction is from, then we +/// traverse its children to find the callsite with the matching +/// location and callee function name. +/// +/// \param Inst Call instruction to query. +/// +/// \returns The FunctionSamples pointer to the inlined instance. +const FunctionSamples * +SampleProfileLoader::findCalleeFunctionSamples(const CallInst &Inst) const { + const DILocation *DIL = Inst.getDebugLoc(); + if (!DIL) { + return nullptr; + } + DISubprogram *SP = DIL->getScope()->getSubprogram(); + if (!SP) + return nullptr; + + Function *CalleeFunc = Inst.getCalledFunction(); + if (!CalleeFunc) { + return nullptr; + } + + StringRef CalleeName = CalleeFunc->getName(); + const FunctionSamples *FS = findFunctionSamples(Inst); + if (FS == nullptr) + return nullptr; + + return FS->findFunctionSamplesAt( + CallsiteLocation(getOffset(DIL->getLine(), SP->getLine()), + DIL->getDiscriminator(), CalleeName)); +} + +/// \brief Get the FunctionSamples for an instruction. +/// +/// The FunctionSamples of an instruction \p Inst is the inlined instance +/// in which that instruction is coming from. We traverse the inline stack +/// of that instruction, and match it with the tree nodes in the profile. +/// +/// \param Inst Instruction to query. +/// +/// \returns the FunctionSamples pointer to the inlined instance. +const FunctionSamples * +SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const { + SmallVector<CallsiteLocation, 10> S; + const DILocation *DIL = Inst.getDebugLoc(); + if (!DIL) { + return Samples; + } + StringRef CalleeName; + for (const DILocation *DIL = Inst.getDebugLoc(); DIL; + DIL = DIL->getInlinedAt()) { + DISubprogram *SP = DIL->getScope()->getSubprogram(); + if (!SP) + return nullptr; + if (!CalleeName.empty()) { + S.push_back(CallsiteLocation(getOffset(DIL->getLine(), SP->getLine()), + DIL->getDiscriminator(), CalleeName)); + } + CalleeName = SP->getLinkageName(); + } + if (S.size() == 0) + return Samples; + const FunctionSamples *FS = Samples; + for (int i = S.size() - 1; i >= 0 && FS != nullptr; i--) { + FS = FS->findFunctionSamplesAt(S[i]); + } + return FS; +} + +/// \brief Emit an inline hint if \p F is globally hot or cold. +/// +/// If \p F consumes a significant fraction of samples (indicated by +/// SampleProfileGlobalHotThreshold), apply the InlineHint attribute for the +/// inliner to consider the function hot. +/// +/// If \p F consumes a small fraction of samples (indicated by +/// SampleProfileGlobalColdThreshold), apply the Cold attribute for the inliner +/// to consider the function cold. +/// +/// FIXME - This setting of inline hints is sub-optimal. Instead of marking a +/// function globally hot or cold, we should be annotating individual callsites. +/// This is not currently possible, but work on the inliner will eventually +/// provide this ability. See http://reviews.llvm.org/D15003 for details and +/// discussion. +/// +/// \returns True if either attribute was applied to \p F. +bool SampleProfileLoader::emitInlineHints(Function &F) { + if (TotalCollectedSamples == 0) + return false; + + uint64_t FunctionSamples = Samples->getTotalSamples(); + double SamplesPercent = + (double)FunctionSamples / (double)TotalCollectedSamples * 100.0; + + // If the function collected more samples than the hot threshold, mark + // it globally hot. + if (SamplesPercent >= SampleProfileGlobalHotThreshold) { + F.addFnAttr(llvm::Attribute::InlineHint); + std::string Msg; + raw_string_ostream S(Msg); + S << "Applied inline hint to globally hot function '" << F.getName() + << "' with " << format("%.2f", SamplesPercent) + << "% of samples (threshold: " + << format("%.2f", SampleProfileGlobalHotThreshold.getValue()) << "%)"; + S.flush(); + emitOptimizationRemark(F.getContext(), DEBUG_TYPE, F, DebugLoc(), Msg); + return true; + } + + // If the function collected fewer samples than the cold threshold, mark + // it globally cold. + if (SamplesPercent <= SampleProfileGlobalColdThreshold) { + F.addFnAttr(llvm::Attribute::Cold); + std::string Msg; + raw_string_ostream S(Msg); + S << "Applied cold hint to globally cold function '" << F.getName() + << "' with " << format("%.2f", SamplesPercent) + << "% of samples (threshold: " + << format("%.2f", SampleProfileGlobalColdThreshold.getValue()) << "%)"; + S.flush(); + emitOptimizationRemark(F.getContext(), DEBUG_TYPE, F, DebugLoc(), Msg); + return true; + } + + return false; +} + +/// \brief Iteratively inline hot callsites of a function. +/// +/// Iteratively traverse all callsites of the function \p F, and find if +/// the corresponding inlined instance exists and is hot in profile. If +/// it is hot enough, inline the callsites and adds new callsites of the +/// callee into the caller. +/// +/// TODO: investigate the possibility of not invoking InlineFunction directly. +/// +/// \param F function to perform iterative inlining. +/// +/// \returns True if there is any inline happened. +bool SampleProfileLoader::inlineHotFunctions(Function &F) { + bool Changed = false; + LLVMContext &Ctx = F.getContext(); + while (true) { + bool LocalChanged = false; + SmallVector<CallInst *, 10> CIS; + for (auto &BB : F) { + for (auto &I : BB.getInstList()) { + CallInst *CI = dyn_cast<CallInst>(&I); + if (CI && callsiteIsHot(Samples, findCalleeFunctionSamples(*CI))) + CIS.push_back(CI); + } + } + for (auto CI : CIS) { + InlineFunctionInfo IFI; + Function *CalledFunction = CI->getCalledFunction(); + DebugLoc DLoc = CI->getDebugLoc(); + uint64_t NumSamples = findCalleeFunctionSamples(*CI)->getTotalSamples(); + if (InlineFunction(CI, IFI)) { + LocalChanged = true; + emitOptimizationRemark(Ctx, DEBUG_TYPE, F, DLoc, + Twine("inlined hot callee '") + + CalledFunction->getName() + "' with " + + Twine(NumSamples) + " samples into '" + + F.getName() + "'"); + } + } + if (LocalChanged) { + Changed = true; + } else { + break; + } + } + return Changed; +} + +/// \brief Find equivalence classes for the given block. +/// +/// This finds all the blocks that are guaranteed to execute the same +/// number of times as \p BB1. To do this, it traverses all the +/// descendants of \p BB1 in the dominator or post-dominator tree. +/// +/// A block BB2 will be in the same equivalence class as \p BB1 if +/// the following holds: +/// +/// 1- \p BB1 is a descendant of BB2 in the opposite tree. So, if BB2 +/// is a descendant of \p BB1 in the dominator tree, then BB2 should +/// dominate BB1 in the post-dominator tree. +/// +/// 2- Both BB2 and \p BB1 must be in the same loop. +/// +/// For every block BB2 that meets those two requirements, we set BB2's +/// equivalence class to \p BB1. +/// +/// \param BB1 Block to check. +/// \param Descendants Descendants of \p BB1 in either the dom or pdom tree. +/// \param DomTree Opposite dominator tree. If \p Descendants is filled +/// with blocks from \p BB1's dominator tree, then +/// this is the post-dominator tree, and vice versa. +void SampleProfileLoader::findEquivalencesFor( + BasicBlock *BB1, SmallVector<BasicBlock *, 8> Descendants, + DominatorTreeBase<BasicBlock> *DomTree) { + const BasicBlock *EC = EquivalenceClass[BB1]; + uint64_t Weight = BlockWeights[EC]; + for (const auto *BB2 : Descendants) { + bool IsDomParent = DomTree->dominates(BB2, BB1); + bool IsInSameLoop = LI->getLoopFor(BB1) == LI->getLoopFor(BB2); + if (BB1 != BB2 && IsDomParent && IsInSameLoop) { + EquivalenceClass[BB2] = EC; + + // If BB2 is heavier than BB1, make BB2 have the same weight + // as BB1. + // + // Note that we don't worry about the opposite situation here + // (when BB2 is lighter than BB1). We will deal with this + // during the propagation phase. Right now, we just want to + // make sure that BB1 has the largest weight of all the + // members of its equivalence set. + Weight = std::max(Weight, BlockWeights[BB2]); + } + } + BlockWeights[EC] = Weight; +} + +/// \brief Find equivalence classes. +/// +/// Since samples may be missing from blocks, we can fill in the gaps by setting +/// the weights of all the blocks in the same equivalence class to the same +/// weight. To compute the concept of equivalence, we use dominance and loop +/// information. Two blocks B1 and B2 are in the same equivalence class if B1 +/// dominates B2, B2 post-dominates B1 and both are in the same loop. +/// +/// \param F The function to query. +void SampleProfileLoader::findEquivalenceClasses(Function &F) { + SmallVector<BasicBlock *, 8> DominatedBBs; + DEBUG(dbgs() << "\nBlock equivalence classes\n"); + // Find equivalence sets based on dominance and post-dominance information. + for (auto &BB : F) { + BasicBlock *BB1 = &BB; + + // Compute BB1's equivalence class once. + if (EquivalenceClass.count(BB1)) { + DEBUG(printBlockEquivalence(dbgs(), BB1)); + continue; + } + + // By default, blocks are in their own equivalence class. + EquivalenceClass[BB1] = BB1; + + // Traverse all the blocks dominated by BB1. We are looking for + // every basic block BB2 such that: + // + // 1- BB1 dominates BB2. + // 2- BB2 post-dominates BB1. + // 3- BB1 and BB2 are in the same loop nest. + // + // If all those conditions hold, it means that BB2 is executed + // as many times as BB1, so they are placed in the same equivalence + // class by making BB2's equivalence class be BB1. + DominatedBBs.clear(); + DT->getDescendants(BB1, DominatedBBs); + findEquivalencesFor(BB1, DominatedBBs, PDT.get()); + + DEBUG(printBlockEquivalence(dbgs(), BB1)); + } + + // Assign weights to equivalence classes. + // + // All the basic blocks in the same equivalence class will execute + // the same number of times. Since we know that the head block in + // each equivalence class has the largest weight, assign that weight + // to all the blocks in that equivalence class. + DEBUG(dbgs() << "\nAssign the same weight to all blocks in the same class\n"); + for (auto &BI : F) { + const BasicBlock *BB = &BI; + const BasicBlock *EquivBB = EquivalenceClass[BB]; + if (BB != EquivBB) + BlockWeights[BB] = BlockWeights[EquivBB]; + DEBUG(printBlockWeight(dbgs(), BB)); + } +} + +/// \brief Visit the given edge to decide if it has a valid weight. +/// +/// If \p E has not been visited before, we copy to \p UnknownEdge +/// and increment the count of unknown edges. +/// +/// \param E Edge to visit. +/// \param NumUnknownEdges Current number of unknown edges. +/// \param UnknownEdge Set if E has not been visited before. +/// +/// \returns E's weight, if known. Otherwise, return 0. +uint64_t SampleProfileLoader::visitEdge(Edge E, unsigned *NumUnknownEdges, + Edge *UnknownEdge) { + if (!VisitedEdges.count(E)) { + (*NumUnknownEdges)++; + *UnknownEdge = E; + return 0; + } + + return EdgeWeights[E]; +} + +/// \brief Propagate weights through incoming/outgoing edges. +/// +/// If the weight of a basic block is known, and there is only one edge +/// with an unknown weight, we can calculate the weight of that edge. +/// +/// Similarly, if all the edges have a known count, we can calculate the +/// count of the basic block, if needed. +/// +/// \param F Function to process. +/// +/// \returns True if new weights were assigned to edges or blocks. +bool SampleProfileLoader::propagateThroughEdges(Function &F) { + bool Changed = false; + DEBUG(dbgs() << "\nPropagation through edges\n"); + for (const auto &BI : F) { + const BasicBlock *BB = &BI; + const BasicBlock *EC = EquivalenceClass[BB]; + + // Visit all the predecessor and successor edges to determine + // which ones have a weight assigned already. Note that it doesn't + // matter that we only keep track of a single unknown edge. The + // only case we are interested in handling is when only a single + // edge is unknown (see setEdgeOrBlockWeight). + for (unsigned i = 0; i < 2; i++) { + uint64_t TotalWeight = 0; + unsigned NumUnknownEdges = 0; + Edge UnknownEdge, SelfReferentialEdge; + + if (i == 0) { + // First, visit all predecessor edges. + for (auto *Pred : Predecessors[BB]) { + Edge E = std::make_pair(Pred, BB); + TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge); + if (E.first == E.second) + SelfReferentialEdge = E; + } + } else { + // On the second round, visit all successor edges. + for (auto *Succ : Successors[BB]) { + Edge E = std::make_pair(BB, Succ); + TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge); + } + } + + // After visiting all the edges, there are three cases that we + // can handle immediately: + // + // - All the edge weights are known (i.e., NumUnknownEdges == 0). + // In this case, we simply check that the sum of all the edges + // is the same as BB's weight. If not, we change BB's weight + // to match. Additionally, if BB had not been visited before, + // we mark it visited. + // + // - Only one edge is unknown and BB has already been visited. + // In this case, we can compute the weight of the edge by + // subtracting the total block weight from all the known + // edge weights. If the edges weight more than BB, then the + // edge of the last remaining edge is set to zero. + // + // - There exists a self-referential edge and the weight of BB is + // known. In this case, this edge can be based on BB's weight. + // We add up all the other known edges and set the weight on + // the self-referential edge as we did in the previous case. + // + // In any other case, we must continue iterating. Eventually, + // all edges will get a weight, or iteration will stop when + // it reaches SampleProfileMaxPropagateIterations. + if (NumUnknownEdges <= 1) { + uint64_t &BBWeight = BlockWeights[EC]; + if (NumUnknownEdges == 0) { + // If we already know the weight of all edges, the weight of the + // basic block can be computed. It should be no larger than the sum + // of all edge weights. + if (TotalWeight > BBWeight) { + BBWeight = TotalWeight; + Changed = true; + DEBUG(dbgs() << "All edge weights for " << BB->getName() + << " known. Set weight for block: "; + printBlockWeight(dbgs(), BB);); + } + if (VisitedBlocks.insert(EC).second) + Changed = true; + } else if (NumUnknownEdges == 1 && VisitedBlocks.count(EC)) { + // If there is a single unknown edge and the block has been + // visited, then we can compute E's weight. + if (BBWeight >= TotalWeight) + EdgeWeights[UnknownEdge] = BBWeight - TotalWeight; + else + EdgeWeights[UnknownEdge] = 0; + VisitedEdges.insert(UnknownEdge); + Changed = true; + DEBUG(dbgs() << "Set weight for edge: "; + printEdgeWeight(dbgs(), UnknownEdge)); + } + } else if (SelfReferentialEdge.first && VisitedBlocks.count(EC)) { + uint64_t &BBWeight = BlockWeights[BB]; + // We have a self-referential edge and the weight of BB is known. + if (BBWeight >= TotalWeight) + EdgeWeights[SelfReferentialEdge] = BBWeight - TotalWeight; + else + EdgeWeights[SelfReferentialEdge] = 0; + VisitedEdges.insert(SelfReferentialEdge); + Changed = true; + DEBUG(dbgs() << "Set self-referential edge weight to: "; + printEdgeWeight(dbgs(), SelfReferentialEdge)); + } + } + } + + return Changed; +} + +/// \brief Build in/out edge lists for each basic block in the CFG. +/// +/// We are interested in unique edges. If a block B1 has multiple +/// edges to another block B2, we only add a single B1->B2 edge. +void SampleProfileLoader::buildEdges(Function &F) { + for (auto &BI : F) { + BasicBlock *B1 = &BI; + + // Add predecessors for B1. + SmallPtrSet<BasicBlock *, 16> Visited; + if (!Predecessors[B1].empty()) + llvm_unreachable("Found a stale predecessors list in a basic block."); + for (pred_iterator PI = pred_begin(B1), PE = pred_end(B1); PI != PE; ++PI) { + BasicBlock *B2 = *PI; + if (Visited.insert(B2).second) + Predecessors[B1].push_back(B2); + } + + // Add successors for B1. + Visited.clear(); + if (!Successors[B1].empty()) + llvm_unreachable("Found a stale successors list in a basic block."); + for (succ_iterator SI = succ_begin(B1), SE = succ_end(B1); SI != SE; ++SI) { + BasicBlock *B2 = *SI; + if (Visited.insert(B2).second) + Successors[B1].push_back(B2); + } + } +} + +/// \brief Propagate weights into edges +/// +/// The following rules are applied to every block BB in the CFG: +/// +/// - If BB has a single predecessor/successor, then the weight +/// of that edge is the weight of the block. +/// +/// - If all incoming or outgoing edges are known except one, and the +/// weight of the block is already known, the weight of the unknown +/// edge will be the weight of the block minus the sum of all the known +/// edges. If the sum of all the known edges is larger than BB's weight, +/// we set the unknown edge weight to zero. +/// +/// - If there is a self-referential edge, and the weight of the block is +/// known, the weight for that edge is set to the weight of the block +/// minus the weight of the other incoming edges to that block (if +/// known). +void SampleProfileLoader::propagateWeights(Function &F) { + bool Changed = true; + unsigned I = 0; + + // Add an entry count to the function using the samples gathered + // at the function entry. + F.setEntryCount(Samples->getHeadSamples()); + + // Before propagation starts, build, for each block, a list of + // unique predecessors and successors. This is necessary to handle + // identical edges in multiway branches. Since we visit all blocks and all + // edges of the CFG, it is cleaner to build these lists once at the start + // of the pass. + buildEdges(F); + + // Propagate until we converge or we go past the iteration limit. + while (Changed && I++ < SampleProfileMaxPropagateIterations) { + Changed = propagateThroughEdges(F); + } + + // Generate MD_prof metadata for every branch instruction using the + // edge weights computed during propagation. + DEBUG(dbgs() << "\nPropagation complete. Setting branch weights\n"); + LLVMContext &Ctx = F.getContext(); + MDBuilder MDB(Ctx); + for (auto &BI : F) { + BasicBlock *BB = &BI; + TerminatorInst *TI = BB->getTerminator(); + if (TI->getNumSuccessors() == 1) + continue; + if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI)) + continue; + + DEBUG(dbgs() << "\nGetting weights for branch at line " + << TI->getDebugLoc().getLine() << ".\n"); + SmallVector<uint32_t, 4> Weights; + uint32_t MaxWeight = 0; + DebugLoc MaxDestLoc; + for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) { + BasicBlock *Succ = TI->getSuccessor(I); + Edge E = std::make_pair(BB, Succ); + uint64_t Weight = EdgeWeights[E]; + DEBUG(dbgs() << "\t"; printEdgeWeight(dbgs(), E)); + // Use uint32_t saturated arithmetic to adjust the incoming weights, + // if needed. Sample counts in profiles are 64-bit unsigned values, + // but internally branch weights are expressed as 32-bit values. + if (Weight > std::numeric_limits<uint32_t>::max()) { + DEBUG(dbgs() << " (saturated due to uint32_t overflow)"); + Weight = std::numeric_limits<uint32_t>::max(); + } + Weights.push_back(static_cast<uint32_t>(Weight)); + if (Weight != 0) { + if (Weight > MaxWeight) { + MaxWeight = Weight; + MaxDestLoc = Succ->getFirstNonPHIOrDbgOrLifetime()->getDebugLoc(); + } + } + } + + // Only set weights if there is at least one non-zero weight. + // In any other case, let the analyzer set weights. + if (MaxWeight > 0) { + DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n"); + TI->setMetadata(llvm::LLVMContext::MD_prof, + MDB.createBranchWeights(Weights)); + DebugLoc BranchLoc = TI->getDebugLoc(); + emitOptimizationRemark( + Ctx, DEBUG_TYPE, F, MaxDestLoc, + Twine("most popular destination for conditional branches at ") + + ((BranchLoc) ? Twine(BranchLoc->getFilename() + ":" + + Twine(BranchLoc.getLine()) + ":" + + Twine(BranchLoc.getCol())) + : Twine("<UNKNOWN LOCATION>"))); + } else { + DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n"); + } + } +} + +/// \brief Get the line number for the function header. +/// +/// This looks up function \p F in the current compilation unit and +/// retrieves the line number where the function is defined. This is +/// line 0 for all the samples read from the profile file. Every line +/// number is relative to this line. +/// +/// \param F Function object to query. +/// +/// \returns the line number where \p F is defined. If it returns 0, +/// it means that there is no debug information available for \p F. +unsigned SampleProfileLoader::getFunctionLoc(Function &F) { + if (DISubprogram *S = getDISubprogram(&F)) + return S->getLine(); + + // If the start of \p F is missing, emit a diagnostic to inform the user + // about the missed opportunity. + F.getContext().diagnose(DiagnosticInfoSampleProfile( + "No debug information found in function " + F.getName() + + ": Function profile not used", + DS_Warning)); + return 0; +} + +void SampleProfileLoader::computeDominanceAndLoopInfo(Function &F) { + DT.reset(new DominatorTree); + DT->recalculate(F); + + PDT.reset(new DominatorTreeBase<BasicBlock>(true)); + PDT->recalculate(F); + + LI.reset(new LoopInfo); + LI->analyze(*DT); +} + +/// \brief Generate branch weight metadata for all branches in \p F. +/// +/// Branch weights are computed out of instruction samples using a +/// propagation heuristic. Propagation proceeds in 3 phases: +/// +/// 1- Assignment of block weights. All the basic blocks in the function +/// are initial assigned the same weight as their most frequently +/// executed instruction. +/// +/// 2- Creation of equivalence classes. Since samples may be missing from +/// blocks, we can fill in the gaps by setting the weights of all the +/// blocks in the same equivalence class to the same weight. To compute +/// the concept of equivalence, we use dominance and loop information. +/// Two blocks B1 and B2 are in the same equivalence class if B1 +/// dominates B2, B2 post-dominates B1 and both are in the same loop. +/// +/// 3- Propagation of block weights into edges. This uses a simple +/// propagation heuristic. The following rules are applied to every +/// block BB in the CFG: +/// +/// - If BB has a single predecessor/successor, then the weight +/// of that edge is the weight of the block. +/// +/// - If all the edges are known except one, and the weight of the +/// block is already known, the weight of the unknown edge will +/// be the weight of the block minus the sum of all the known +/// edges. If the sum of all the known edges is larger than BB's weight, +/// we set the unknown edge weight to zero. +/// +/// - If there is a self-referential edge, and the weight of the block is +/// known, the weight for that edge is set to the weight of the block +/// minus the weight of the other incoming edges to that block (if +/// known). +/// +/// Since this propagation is not guaranteed to finalize for every CFG, we +/// only allow it to proceed for a limited number of iterations (controlled +/// by -sample-profile-max-propagate-iterations). +/// +/// FIXME: Try to replace this propagation heuristic with a scheme +/// that is guaranteed to finalize. A work-list approach similar to +/// the standard value propagation algorithm used by SSA-CCP might +/// work here. +/// +/// Once all the branch weights are computed, we emit the MD_prof +/// metadata on BB using the computed values for each of its branches. +/// +/// \param F The function to query. +/// +/// \returns true if \p F was modified. Returns false, otherwise. +bool SampleProfileLoader::emitAnnotations(Function &F) { + bool Changed = false; + + if (getFunctionLoc(F) == 0) + return false; + + DEBUG(dbgs() << "Line number for the first instruction in " << F.getName() + << ": " << getFunctionLoc(F) << "\n"); + + Changed |= emitInlineHints(F); + + Changed |= inlineHotFunctions(F); + + // Compute basic block weights. + Changed |= computeBlockWeights(F); + + if (Changed) { + // Compute dominance and loop info needed for propagation. + computeDominanceAndLoopInfo(F); + + // Find equivalence classes. + findEquivalenceClasses(F); + + // Propagate weights to all edges. + propagateWeights(F); + } + + // If coverage checking was requested, compute it now. + if (SampleProfileRecordCoverage) { + unsigned Used = CoverageTracker.countUsedRecords(Samples); + unsigned Total = CoverageTracker.countBodyRecords(Samples); + unsigned Coverage = CoverageTracker.computeCoverage(Used, Total); + if (Coverage < SampleProfileRecordCoverage) { + F.getContext().diagnose(DiagnosticInfoSampleProfile( + getDISubprogram(&F)->getFilename(), getFunctionLoc(F), + Twine(Used) + " of " + Twine(Total) + " available profile records (" + + Twine(Coverage) + "%) were applied", + DS_Warning)); + } + } + + if (SampleProfileSampleCoverage) { + uint64_t Used = CoverageTracker.getTotalUsedSamples(); + uint64_t Total = CoverageTracker.countBodySamples(Samples); + unsigned Coverage = CoverageTracker.computeCoverage(Used, Total); + if (Coverage < SampleProfileSampleCoverage) { + F.getContext().diagnose(DiagnosticInfoSampleProfile( + getDISubprogram(&F)->getFilename(), getFunctionLoc(F), + Twine(Used) + " of " + Twine(Total) + " available profile samples (" + + Twine(Coverage) + "%) were applied", + DS_Warning)); + } + } + return Changed; +} + +char SampleProfileLoader::ID = 0; +INITIALIZE_PASS_BEGIN(SampleProfileLoader, "sample-profile", + "Sample Profile loader", false, false) +INITIALIZE_PASS_DEPENDENCY(AddDiscriminators) +INITIALIZE_PASS_END(SampleProfileLoader, "sample-profile", + "Sample Profile loader", false, false) + +bool SampleProfileLoader::doInitialization(Module &M) { + auto &Ctx = M.getContext(); + auto ReaderOrErr = SampleProfileReader::create(Filename, Ctx); + if (std::error_code EC = ReaderOrErr.getError()) { + std::string Msg = "Could not open profile: " + EC.message(); + Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg)); + return false; + } + Reader = std::move(ReaderOrErr.get()); + ProfileIsValid = (Reader->read() == sampleprof_error::success); + return true; +} + +ModulePass *llvm::createSampleProfileLoaderPass() { + return new SampleProfileLoader(SampleProfileFile); +} + +ModulePass *llvm::createSampleProfileLoaderPass(StringRef Name) { + return new SampleProfileLoader(Name); +} + +bool SampleProfileLoader::runOnModule(Module &M) { + if (!ProfileIsValid) + return false; + + // Compute the total number of samples collected in this profile. + for (const auto &I : Reader->getProfiles()) + TotalCollectedSamples += I.second.getTotalSamples(); + + bool retval = false; + for (auto &F : M) + if (!F.isDeclaration()) { + clearFunctionData(); + retval |= runOnFunction(F); + } + return retval; +} + +bool SampleProfileLoader::runOnFunction(Function &F) { + Samples = Reader->getSamplesFor(F); + if (!Samples->empty()) + return emitAnnotations(F); + return false; +} diff --git a/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp b/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp index 956991a..c94cc7c 100644 --- a/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp +++ b/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp @@ -7,47 +7,31 @@ // //===----------------------------------------------------------------------===// // -// This pass loops over all of the functions in the input module, looking for +// This pass loops over all of the functions in the input module, looking for // dead declarations and removes them. Dead declarations are declarations of // functions for which no implementation is available (i.e., declarations for // unused library functions). // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/IPO/StripDeadPrototypes.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" +#include "llvm/Transforms/IPO.h" + using namespace llvm; #define DEBUG_TYPE "strip-dead-prototypes" STATISTIC(NumDeadPrototypes, "Number of dead prototypes removed"); -namespace { - -/// @brief Pass to remove unused function declarations. -class StripDeadPrototypesPass : public ModulePass { -public: - static char ID; // Pass identification, replacement for typeid - StripDeadPrototypesPass() : ModulePass(ID) { - initializeStripDeadPrototypesPassPass(*PassRegistry::getPassRegistry()); - } - bool runOnModule(Module &M) override; -}; - -} // end anonymous namespace - -char StripDeadPrototypesPass::ID = 0; -INITIALIZE_PASS(StripDeadPrototypesPass, "strip-dead-prototypes", - "Strip Unused Function Prototypes", false, false) - -bool StripDeadPrototypesPass::runOnModule(Module &M) { +static bool stripDeadPrototypes(Module &M) { bool MadeChange = false; - + // Erase dead function prototypes. for (Module::iterator I = M.begin(), E = M.end(); I != E; ) { - Function *F = I++; + Function *F = &*I++; // Function must be a prototype and unused. if (F->isDeclaration() && F->use_empty()) { F->eraseFromParent(); @@ -59,16 +43,42 @@ bool StripDeadPrototypesPass::runOnModule(Module &M) { // Erase dead global var prototypes. for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; ) { - GlobalVariable *GV = I++; + GlobalVariable *GV = &*I++; // Global must be a prototype and unused. if (GV->isDeclaration() && GV->use_empty()) GV->eraseFromParent(); } - + // Return an indication of whether we changed anything or not. return MadeChange; } +PreservedAnalyses StripDeadPrototypesPass::run(Module &M) { + if (stripDeadPrototypes(M)) + return PreservedAnalyses::none(); + return PreservedAnalyses::all(); +} + +namespace { + +class StripDeadPrototypesLegacyPass : public ModulePass { +public: + static char ID; // Pass identification, replacement for typeid + StripDeadPrototypesLegacyPass() : ModulePass(ID) { + initializeStripDeadPrototypesLegacyPassPass( + *PassRegistry::getPassRegistry()); + } + bool runOnModule(Module &M) override { + return stripDeadPrototypes(M); + } +}; + +} // end anonymous namespace + +char StripDeadPrototypesLegacyPass::ID = 0; +INITIALIZE_PASS(StripDeadPrototypesLegacyPass, "strip-dead-prototypes", + "Strip Unused Function Prototypes", false, false) + ModulePass *llvm::createStripDeadPrototypesPass() { - return new StripDeadPrototypesPass(); + return new StripDeadPrototypesLegacyPass(); } diff --git a/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp index a4f30c5..46f352f 100644 --- a/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp +++ b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp @@ -211,13 +211,13 @@ static bool StripSymbolNames(Module &M, bool PreserveDbgInfo) { for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) { - if (I->hasLocalLinkage() && llvmUsedValues.count(I) == 0) + if (I->hasLocalLinkage() && llvmUsedValues.count(&*I) == 0) if (!PreserveDbgInfo || !I->getName().startswith("llvm.dbg")) I->setName(""); // Internal symbols can't participate in linkage } for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { - if (I->hasLocalLinkage() && llvmUsedValues.count(I) == 0) + if (I->hasLocalLinkage() && llvmUsedValues.count(&*I) == 0) if (!PreserveDbgInfo || !I->getName().startswith("llvm.dbg")) I->setName(""); // Internal symbols can't participate in linkage StripSymtab(I->getValueSymbolTable(), PreserveDbgInfo); @@ -305,6 +305,12 @@ bool StripDeadDebugInfo::runOnModule(Module &M) { SmallVector<Metadata *, 64> LiveSubprograms; DenseSet<const MDNode *> VisitedSet; + std::set<DISubprogram *> LiveSPs; + for (Function &F : M) { + if (DISubprogram *SP = F.getSubprogram()) + LiveSPs.insert(SP); + } + for (DICompileUnit *DIC : F.compile_units()) { // Create our live subprogram list. bool SubprogramChange = false; @@ -314,7 +320,7 @@ bool StripDeadDebugInfo::runOnModule(Module &M) { continue; // If the function referenced by DISP is not null, the function is live. - if (DISP->getFunction()) + if (LiveSPs.count(DISP)) LiveSubprograms.push_back(DISP); else SubprogramChange = true; |