diff options
Diffstat (limited to 'contrib/llvm/lib/Transforms/IPO')
21 files changed, 1779 insertions, 1146 deletions
diff --git a/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp index df08091..f9de54a 100644 --- a/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -29,7 +29,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "argpromotion" #include "llvm/Transforms/IPO.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/Statistic.h" @@ -37,18 +36,22 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CallGraphSCCPass.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" -#include "llvm/Support/CFG.h" -#include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include <set> using namespace llvm; +#define DEBUG_TYPE "argpromotion" + STATISTIC(NumArgumentsPromoted , "Number of pointer arguments promoted"); STATISTIC(NumAggregatesPromoted, "Number of aggregate arguments promoted"); STATISTIC(NumByValArgsPromoted , "Number of byval arguments promoted"); @@ -58,29 +61,32 @@ namespace { /// ArgPromotion - The 'by reference' to 'by value' argument promotion pass. /// struct ArgPromotion : public CallGraphSCCPass { - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<AliasAnalysis>(); CallGraphSCCPass::getAnalysisUsage(AU); } - virtual bool runOnSCC(CallGraphSCC &SCC); + bool runOnSCC(CallGraphSCC &SCC) override; static char ID; // Pass identification, replacement for typeid explicit ArgPromotion(unsigned maxElements = 3) - : CallGraphSCCPass(ID), maxElements(maxElements) { + : CallGraphSCCPass(ID), DL(nullptr), maxElements(maxElements) { initializeArgPromotionPass(*PassRegistry::getPassRegistry()); } /// A vector used to hold the indices of a single GEP instruction typedef std::vector<uint64_t> IndicesVector; + const DataLayout *DL; private: CallGraphNode *PromoteArguments(CallGraphNode *CGN); bool isSafeToPromoteArgument(Argument *Arg, bool isByVal) const; CallGraphNode *DoPromotion(Function *F, SmallPtrSet<Argument*, 8> &ArgsToPromote, SmallPtrSet<Argument*, 8> &ByValArgsToTransform); + bool doInitialization(CallGraph &CG) override; /// The maximum number of elements to expand, or 0 for unlimited. unsigned maxElements; + DenseMap<const Function *, DISubprogram> FunctionDIs; }; } @@ -88,7 +94,7 @@ char ArgPromotion::ID = 0; INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion", "Promote 'by reference' arguments to scalars", false, false) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) -INITIALIZE_PASS_DEPENDENCY(CallGraph) +INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) INITIALIZE_PASS_END(ArgPromotion, "argpromotion", "Promote 'by reference' arguments to scalars", false, false) @@ -99,6 +105,9 @@ Pass *llvm::createArgumentPromotionPass(unsigned maxElements) { bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) { bool Changed = false, LocalChange; + DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); + DL = DLP ? &DLP->getDataLayout() : nullptr; + do { // Iterate until we stop promoting from this SCC. LocalChange = false; // Attempt to promote arguments from all functions in this SCC. @@ -123,24 +132,23 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { Function *F = CGN->getFunction(); // Make sure that it is local to this module. - if (!F || !F->hasLocalLinkage()) return 0; + if (!F || !F->hasLocalLinkage()) return nullptr; // First check: see if there are any pointer arguments! If not, quick exit. SmallVector<Argument*, 16> PointerArgs; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) if (I->getType()->isPointerTy()) PointerArgs.push_back(I); - if (PointerArgs.empty()) return 0; + if (PointerArgs.empty()) return nullptr; // Second check: make sure that all callers are direct callers. We can't // transform functions that have indirect callers. Also see if the function // is self-recursive. bool isSelfRecursive = false; - for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); - UI != E; ++UI) { - CallSite CS(*UI); + for (Use &U : F->uses()) { + CallSite CS(U.getUser()); // Must be a direct call. - if (CS.getInstruction() == 0 || !CS.isCallee(UI)) return 0; + if (CS.getInstruction() == nullptr || !CS.isCallee(&U)) return nullptr; if (CS.getInstruction()->getParent()->getParent() == F) isSelfRecursive = true; @@ -155,7 +163,8 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType(); // If this is a byval argument, and if the aggregate type is small, just - // pass the elements, which is always safe. + // pass the elements, which is always safe. This does not apply to + // inalloca. if (PtrArg->hasByValAttr()) { if (StructType *STy = dyn_cast<StructType>(AgTy)) { if (maxElements > 0 && STy->getNumElements() > maxElements) { @@ -201,32 +210,32 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { } // Otherwise, see if we can promote the pointer to its value. - if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValAttr())) + if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValOrInAllocaAttr())) ArgsToPromote.insert(PtrArg); } // No promotable pointer arguments. if (ArgsToPromote.empty() && ByValArgsToTransform.empty()) - return 0; + return nullptr; return DoPromotion(F, ArgsToPromote, ByValArgsToTransform); } /// AllCallersPassInValidPointerForArgument - Return true if we can prove that /// all callees pass in a valid pointer for the specified function argument. -static bool AllCallersPassInValidPointerForArgument(Argument *Arg) { +static bool AllCallersPassInValidPointerForArgument(Argument *Arg, + const DataLayout *DL) { Function *Callee = Arg->getParent(); unsigned ArgNo = Arg->getArgNo(); // Look at all call sites of the function. At this pointer we know we only // have direct callees. - for (Value::use_iterator UI = Callee->use_begin(), E = Callee->use_end(); - UI != E; ++UI) { - CallSite CS(*UI); + for (User *U : Callee->users()) { + CallSite CS(U); assert(CS && "Should only have direct calls!"); - if (!CS.getArgument(ArgNo)->isDereferenceablePointer()) + if (!CS.getArgument(ArgNo)->isDereferenceablePointer(DL)) return false; } return true; @@ -301,7 +310,8 @@ static void MarkIndicesSafe(const ArgPromotion::IndicesVector &ToMark, /// This method limits promotion of aggregates to only promote up to three /// elements of the aggregate in order to avoid exploding the number of /// arguments passed in. -bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const { +bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, + bool isByValOrInAlloca) const { typedef std::set<IndicesVector> GEPIndicesSet; // Quick exit for unused arguments @@ -323,6 +333,9 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const { // // This set will contain all sets of indices that are loaded in the entry // block, and thus are safe to unconditionally load in the caller. + // + // This optimization is also safe for InAlloca parameters, because it verifies + // that the address isn't captured. GEPIndicesSet SafeToUnconditionallyLoad; // This set contains all the sets of indices that we are planning to promote. @@ -330,7 +343,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const { GEPIndicesSet ToPromote; // If the pointer is always valid, any load with first index 0 is valid. - if (isByVal || AllCallersPassInValidPointerForArgument(Arg)) + if (isByValOrInAlloca || AllCallersPassInValidPointerForArgument(Arg, DL)) SafeToUnconditionallyLoad.insert(IndicesVector(1, 0)); // First, iterate the entry block and mark loads of (geps of) arguments as @@ -370,17 +383,16 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const { // not (GEP+)loads, or any (GEP+)loads that are not safe to promote. SmallVector<LoadInst*, 16> Loads; IndicesVector Operands; - for (Value::use_iterator UI = Arg->use_begin(), E = Arg->use_end(); - UI != E; ++UI) { - User *U = *UI; + for (Use &U : Arg->uses()) { + User *UR = U.getUser(); Operands.clear(); - if (LoadInst *LI = dyn_cast<LoadInst>(U)) { + if (LoadInst *LI = dyn_cast<LoadInst>(UR)) { // Don't hack volatile/atomic loads if (!LI->isSimple()) return false; Loads.push_back(LI); // Direct loads are equivalent to a GEP with a zero index and then a load. Operands.push_back(0); - } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) { + } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(UR)) { if (GEP->use_empty()) { // Dead GEP's cause trouble later. Just remove them if we run into // them. @@ -389,7 +401,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const { // TODO: This runs the above loop over and over again for dead GEPs // Couldn't we just do increment the UI iterator earlier and erase the // use? - return isSafeToPromoteArgument(Arg, isByVal); + return isSafeToPromoteArgument(Arg, isByValOrInAlloca); } // Ensure that all of the indices are constants. @@ -401,9 +413,8 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const { return false; // Not a constant operand GEP! // Ensure that the only users of the GEP are load instructions. - for (Value::use_iterator UI = GEP->use_begin(), E = GEP->use_end(); - UI != E; ++UI) - if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) { + for (User *GEPU : GEP->users()) + if (LoadInst *LI = dyn_cast<LoadInst>(GEPU)) { // Don't hack volatile/atomic loads if (!LI->isSimple()) return false; Loads.push_back(LI); @@ -549,16 +560,15 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // In this table, we will track which indices are loaded from the argument // (where direct loads are tracked as no indices). ScalarizeTable &ArgIndices = ScalarizedElements[I]; - for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; - ++UI) { - Instruction *User = cast<Instruction>(*UI); - assert(isa<LoadInst>(User) || isa<GetElementPtrInst>(User)); + for (User *U : I->users()) { + Instruction *UI = cast<Instruction>(U); + assert(isa<LoadInst>(UI) || isa<GetElementPtrInst>(UI)); IndicesVector Indices; - Indices.reserve(User->getNumOperands() - 1); + Indices.reserve(UI->getNumOperands() - 1); // Since loads will only have a single operand, and GEPs only a single // non-index operand, this will record direct loads without any indices, // and gep+loads with the GEP indices. - for (User::op_iterator II = User->op_begin() + 1, IE = User->op_end(); + for (User::op_iterator II = UI->op_begin() + 1, IE = UI->op_end(); II != IE; ++II) Indices.push_back(cast<ConstantInt>(*II)->getSExtValue()); // GEPs with a single 0 index can be merged with direct loads @@ -566,11 +576,11 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, Indices.clear(); ArgIndices.insert(Indices); LoadInst *OrigLoad; - if (LoadInst *L = dyn_cast<LoadInst>(User)) + if (LoadInst *L = dyn_cast<LoadInst>(UI)) OrigLoad = L; else // Take any load, we will use it only to update Alias Analysis - OrigLoad = cast<LoadInst>(User->use_back()); + OrigLoad = cast<LoadInst>(UI->user_back()); OriginalLoads[std::make_pair(I, Indices)] = OrigLoad; } @@ -603,6 +613,10 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, Function *NF = Function::Create(NFTy, F->getLinkage(), F->getName()); NF->copyAttributesFrom(F); + // Patch the pointer to LLVM function in debug info descriptor. + auto DI = FunctionDIs.find(F); + if (DI != FunctionDIs.end()) + DI->second.replaceFunction(NF); DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n" << "From: " << *F); @@ -621,8 +635,8 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // Get the callgraph information that we need to update to reflect our // changes. - CallGraph &CG = getAnalysis<CallGraph>(); - + CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); + // Get a new callgraph node for NF. CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF); @@ -631,7 +645,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // SmallVector<Value*, 16> Args; while (!F->use_empty()) { - CallSite CS(F->use_back()); + CallSite CS(F->user_back()); assert(CS.getCalledFunction() == F); Instruction *Call = CS.getInstruction(); const AttributeSet &CallPAL = CS.getAttributes(); @@ -660,7 +674,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, Type *AgTy = cast<PointerType>(I->getType())->getElementType(); StructType *STy = cast<StructType>(AgTy); Value *Idxs[2] = { - ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 }; + ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr }; for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); Value *Idx = GetElementPtrInst::Create(*AI, Idxs, @@ -740,6 +754,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, if (cast<CallInst>(Call)->isTailCall()) cast<CallInst>(New)->setTailCall(); } + New->setDebugLoc(Call->getDebugLoc()); Args.clear(); AttributesVec.clear(); @@ -788,10 +803,10 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // Just add all the struct element types. Type *AgTy = cast<PointerType>(I->getType())->getElementType(); - Value *TheAlloca = new AllocaInst(AgTy, 0, "", InsertPt); + Value *TheAlloca = new AllocaInst(AgTy, nullptr, "", InsertPt); StructType *STy = cast<StructType>(AgTy); Value *Idxs[2] = { - ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 }; + ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr }; for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); @@ -807,6 +822,15 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, I->replaceAllUsesWith(TheAlloca); TheAlloca->takeName(I); AA.replaceWithNewValue(I, TheAlloca); + + // If the alloca is used in a call, we must clear the tail flag since + // the callee now uses an alloca from the caller. + for (User *U : TheAlloca->users()) { + CallInst *Call = dyn_cast<CallInst>(U); + if (!Call) + continue; + Call->setTailCall(false); + } continue; } @@ -821,7 +845,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, ScalarizeTable &ArgIndices = ScalarizedElements[I]; while (!I->use_empty()) { - if (LoadInst *LI = dyn_cast<LoadInst>(I->use_back())) { + if (LoadInst *LI = dyn_cast<LoadInst>(I->user_back())) { assert(ArgIndices.begin()->empty() && "Load element should sort to front!"); I2->setName(I->getName()+".val"); @@ -831,7 +855,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, DEBUG(dbgs() << "*** Promoted load of argument '" << I->getName() << "' in function '" << F->getName() << "'\n"); } else { - GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->use_back()); + GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->user_back()); IndicesVector Operands; Operands.reserve(GEP->getNumIndices()); for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end(); @@ -861,7 +885,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // All of the uses must be load instructions. Replace them all with // the argument specified by ArgNo. while (!GEP->use_empty()) { - LoadInst *L = cast<LoadInst>(GEP->use_back()); + LoadInst *L = cast<LoadInst>(GEP->user_back()); L->replaceAllUsesWith(TheArg); AA.replaceWithNewValue(L, TheArg); L->eraseFromParent(); @@ -892,3 +916,8 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, return NF_CGN; } + +bool ArgPromotion::doInitialization(CallGraph &CG) { + FunctionDIs = makeSubprogramMap(CG.getModule()); + return CallGraphSCCPass::doInitialization(CG); +} diff --git a/contrib/llvm/lib/Transforms/IPO/BarrierNoopPass.cpp b/contrib/llvm/lib/Transforms/IPO/BarrierNoopPass.cpp index 2e32240..6af1043 100644 --- a/contrib/llvm/lib/Transforms/IPO/BarrierNoopPass.cpp +++ b/contrib/llvm/lib/Transforms/IPO/BarrierNoopPass.cpp @@ -36,7 +36,7 @@ public: initializeBarrierNoopPass(*PassRegistry::getPassRegistry()); } - bool runOnModule(Module &M) { return false; } + bool runOnModule(Module &M) override { return false; } }; } diff --git a/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp index d94c0f4..23be081 100644 --- a/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp +++ b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp @@ -17,7 +17,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "constmerge" #include "llvm/Transforms/IPO.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PointerIntPair.h" @@ -31,6 +30,8 @@ #include "llvm/Pass.h" using namespace llvm; +#define DEBUG_TYPE "constmerge" + STATISTIC(NumMerged, "Number of global constants merged"); namespace { @@ -42,7 +43,7 @@ namespace { // For this pass, process all of the globals in the module, eliminating // duplicate constants. - bool runOnModule(Module &M); + bool runOnModule(Module &M) override; // Return true iff we can determine the alignment of this global variable. bool hasKnownAlignment(GlobalVariable *GV) const; @@ -51,7 +52,7 @@ namespace { // alignment to a concrete value. unsigned getAlignment(GlobalVariable *GV) const; - const DataLayout *TD; + const DataLayout *DL; }; } @@ -66,7 +67,7 @@ ModulePass *llvm::createConstantMergePass() { return new ConstantMerge(); } /// Find values that are marked as llvm.used. static void FindUsedValues(GlobalVariable *LLVMUsed, SmallPtrSet<const GlobalValue*, 8> &UsedValues) { - if (LLVMUsed == 0) return; + if (!LLVMUsed) return; ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer()); for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i) { @@ -77,8 +78,8 @@ static void FindUsedValues(GlobalVariable *LLVMUsed, } // True if A is better than B. -static bool IsBetterCannonical(const GlobalVariable &A, - const GlobalVariable &B) { +static bool IsBetterCanonical(const GlobalVariable &A, + const GlobalVariable &B) { if (!A.hasLocalLinkage() && B.hasLocalLinkage()) return true; @@ -89,20 +90,21 @@ static bool IsBetterCannonical(const GlobalVariable &A, } bool ConstantMerge::hasKnownAlignment(GlobalVariable *GV) const { - return TD || GV->getAlignment() != 0; + return DL || GV->getAlignment() != 0; } unsigned ConstantMerge::getAlignment(GlobalVariable *GV) const { unsigned Align = GV->getAlignment(); if (Align) return Align; - if (TD) - return TD->getPreferredAlignment(GV); + if (DL) + return DL->getPreferredAlignment(GV); return 0; } bool ConstantMerge::runOnModule(Module &M) { - TD = getAnalysisIfAvailable<DataLayout>(); + DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); + DL = DLP ? &DLP->getDataLayout() : nullptr; // Find all the globals that are marked "used". These cannot be merged. SmallPtrSet<const GlobalValue*, 8> UsedGlobals; @@ -160,7 +162,7 @@ bool ConstantMerge::runOnModule(Module &M) { // If this is the first constant we find or if the old one is local, // replace with the current one. If the current is externally visible // it cannot be replace, but can be the canonical constant we merge with. - if (Slot == 0 || IsBetterCannonical(*GV, *Slot)) + if (!Slot || IsBetterCanonical(*GV, *Slot)) Slot = GV; } diff --git a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp index 911c14e..ac3853d 100644 --- a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -17,29 +17,31 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "deadargelim" #include "llvm/Transforms/IPO.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/DIBuilder.h" -#include "llvm/DebugInfo.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constant.h" +#include "llvm/IR/DIBuilder.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" -#include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include <map> #include <set> +#include <tuple> using namespace llvm; +#define DEBUG_TYPE "deadargelim" + STATISTIC(NumArgumentsEliminated, "Number of unread args removed"); STATISTIC(NumRetValsEliminated , "Number of unused return values removed"); STATISTIC(NumArgumentsReplacedWithUndef, @@ -62,12 +64,7 @@ namespace { /// Make RetOrArg comparable, so we can put it into a map. bool operator<(const RetOrArg &O) const { - if (F != O.F) - return F < O.F; - else if (Idx != O.Idx) - return Idx < O.Idx; - else - return IsArg < O.IsArg; + return std::tie(F, Idx, IsArg) < std::tie(O.F, O.Idx, O.IsArg); } /// Make RetOrArg comparable, so we can easily iterate the multimap. @@ -130,8 +127,7 @@ namespace { // As the code generation for module is finished (and DIBuilder is // finalized) we assume that subprogram descriptors won't be changed, and // they are stored in map for short duration anyway. - typedef DenseMap<Function*, DISubprogram> FunctionDIMap; - FunctionDIMap FunctionDIs; + DenseMap<const Function *, DISubprogram> FunctionDIs; protected: // DAH uses this to specify a different ID. @@ -143,17 +139,16 @@ namespace { initializeDAEPass(*PassRegistry::getPassRegistry()); } - bool runOnModule(Module &M); + bool runOnModule(Module &M) override; virtual bool ShouldHackArguments() const { return false; } private: Liveness MarkIfNotLive(RetOrArg Use, UseVector &MaybeLiveUses); - Liveness SurveyUse(Value::const_use_iterator U, UseVector &MaybeLiveUses, + Liveness SurveyUse(const Use *U, UseVector &MaybeLiveUses, unsigned RetValNum = 0); Liveness SurveyUses(const Value *V, UseVector &MaybeLiveUses); - void CollectFunctionDIs(Module &M); void SurveyFunction(const Function &F); void MarkValue(const RetOrArg &RA, Liveness L, const UseVector &MaybeLiveUses); @@ -178,7 +173,7 @@ namespace { static char ID; DAH() : DAE(ID) {} - virtual bool ShouldHackArguments() const { return true; } + bool ShouldHackArguments() const override { return true; } }; } @@ -193,35 +188,6 @@ INITIALIZE_PASS(DAH, "deadarghaX0r", ModulePass *llvm::createDeadArgEliminationPass() { return new DAE(); } ModulePass *llvm::createDeadArgHackingPass() { return new DAH(); } -/// CollectFunctionDIs - Map each function in the module to its debug info -/// descriptor. -void DAE::CollectFunctionDIs(Module &M) { - FunctionDIs.clear(); - - for (Module::named_metadata_iterator I = M.named_metadata_begin(), - E = M.named_metadata_end(); I != E; ++I) { - NamedMDNode &NMD = *I; - for (unsigned MDIndex = 0, MDNum = NMD.getNumOperands(); - MDIndex < MDNum; ++MDIndex) { - MDNode *Node = NMD.getOperand(MDIndex); - if (!DIDescriptor(Node).isCompileUnit()) - continue; - DICompileUnit CU(Node); - const DIArray &SPs = CU.getSubprograms(); - for (unsigned SPIndex = 0, SPNum = SPs.getNumElements(); - SPIndex < SPNum; ++SPIndex) { - DISubprogram SP(SPs.getElement(SPIndex)); - assert((!SP || SP.isSubprogram()) && - "A MDNode in subprograms of a CU should be null or a DISubprogram."); - if (!SP) - continue; - if (Function *F = SP.getFunction()) - FunctionDIs[F] = SP; - } - } - } -} - /// DeleteDeadVarargs - If this is an function that takes a ... list, and if /// llvm.vastart is never called, the varargs list is dead for the function. bool DAE::DeleteDeadVarargs(Function &Fn) { @@ -265,7 +231,7 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { // to pass in a smaller number of arguments into the new function. // std::vector<Value*> Args; - for (Value::use_iterator I = Fn.use_begin(), E = Fn.use_end(); I != E; ) { + for (Value::user_iterator I = Fn.user_begin(), E = Fn.user_end(); I != E; ) { CallSite CS(*I++); if (!CS) continue; @@ -330,7 +296,7 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { } // Patch the pointer to LLVM function in debug info descriptor. - FunctionDIMap::iterator DI = FunctionDIs.find(&Fn); + auto DI = FunctionDIs.find(&Fn); if (DI != FunctionDIs.end()) DI->second.replaceFunction(NF); @@ -378,7 +344,7 @@ bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn) I != E; ++I) { Argument *Arg = I; - if (Arg->use_empty() && !Arg->hasByValAttr()) + if (Arg->use_empty() && !Arg->hasByValOrInAllocaAttr()) UnusedArgs.push_back(Arg->getArgNo()); } @@ -387,10 +353,9 @@ bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn) bool Changed = false; - for (Function::use_iterator I = Fn.use_begin(), E = Fn.use_end(); - I != E; ++I) { - CallSite CS(*I); - if (!CS || !CS.isCallee(I)) + for (Use &U : Fn.uses()) { + CallSite CS(U.getUser()); + if (!CS || !CS.isCallee(&U)) continue; // Now go through all unused args and replace them with "undef". @@ -441,9 +406,9 @@ DAE::Liveness DAE::MarkIfNotLive(RetOrArg Use, UseVector &MaybeLiveUses) { /// RetValNum is the return value number to use when this use is used in a /// return instruction. This is used in the recursion, you should always leave /// it at 0. -DAE::Liveness DAE::SurveyUse(Value::const_use_iterator U, +DAE::Liveness DAE::SurveyUse(const Use *U, UseVector &MaybeLiveUses, unsigned RetValNum) { - const User *V = *U; + const User *V = U->getUser(); if (const ReturnInst *RI = dyn_cast<ReturnInst>(V)) { // The value is returned from a function. It's only live when the // function's return value is live. We use RetValNum here, for the case @@ -454,7 +419,7 @@ DAE::Liveness DAE::SurveyUse(Value::const_use_iterator U, return MarkIfNotLive(Use, MaybeLiveUses); } if (const InsertValueInst *IV = dyn_cast<InsertValueInst>(V)) { - if (U.getOperandNo() != InsertValueInst::getAggregateOperandIndex() + if (U->getOperandNo() != InsertValueInst::getAggregateOperandIndex() && IV->hasIndices()) // The use we are examining is inserted into an aggregate. Our liveness // depends on all uses of that aggregate, but if it is used as a return @@ -465,9 +430,8 @@ DAE::Liveness DAE::SurveyUse(Value::const_use_iterator U, // we don't change RetValNum, but do survey all our uses. Liveness Result = MaybeLive; - for (Value::const_use_iterator I = IV->use_begin(), - E = V->use_end(); I != E; ++I) { - Result = SurveyUse(I, MaybeLiveUses, RetValNum); + for (const Use &UU : IV->uses()) { + Result = SurveyUse(&UU, MaybeLiveUses, RetValNum); if (Result == Live) break; } @@ -490,7 +454,7 @@ DAE::Liveness DAE::SurveyUse(Value::const_use_iterator U, return Live; assert(CS.getArgument(ArgNo) - == CS->getOperand(U.getOperandNo()) + == CS->getOperand(U->getOperandNo()) && "Argument is not where we expected it"); // Value passed to a normal call. It's only live when the corresponding @@ -513,9 +477,8 @@ DAE::Liveness DAE::SurveyUses(const Value *V, UseVector &MaybeLiveUses) { // Assume it's dead (which will only hold if there are no uses at all..). Liveness Result = MaybeLive; // Check each use. - for (Value::const_use_iterator I = V->use_begin(), - E = V->use_end(); I != E; ++I) { - Result = SurveyUse(I, MaybeLiveUses); + for (const Use &U : V->uses()) { + Result = SurveyUse(&U, MaybeLiveUses); if (Result == Live) break; } @@ -531,6 +494,13 @@ DAE::Liveness DAE::SurveyUses(const Value *V, UseVector &MaybeLiveUses) { // well as arguments to functions which have their "address taken". // void DAE::SurveyFunction(const Function &F) { + // Functions with inalloca parameters are expecting args in a particular + // register and memory layout. + if (F.getAttributes().hasAttrSomewhere(Attribute::InAlloca)) { + MarkLive(F); + return; + } + unsigned RetCount = NumRetVals(&F); // Assume all return values are dead typedef SmallVector<Liveness, 5> RetVals; @@ -562,12 +532,11 @@ void DAE::SurveyFunction(const Function &F) { unsigned NumLiveRetVals = 0; Type *STy = dyn_cast<StructType>(F.getReturnType()); // Loop all uses of the function. - for (Value::const_use_iterator I = F.use_begin(), E = F.use_end(); - I != E; ++I) { + for (const Use &U : F.uses()) { // If the function is PASSED IN as an argument, its address has been // taken. - ImmutableCallSite CS(*I); - if (!CS || !CS.isCallee(I)) { + ImmutableCallSite CS(U.getUser()); + if (!CS || !CS.isCallee(&U)) { MarkLive(F); return; } @@ -586,9 +555,8 @@ void DAE::SurveyFunction(const Function &F) { if (NumLiveRetVals != RetCount) { if (STy) { // Check all uses of the return value. - for (Value::const_use_iterator I = TheCall->use_begin(), - E = TheCall->use_end(); I != E; ++I) { - const ExtractValueInst *Ext = dyn_cast<ExtractValueInst>(*I); + for (const User *U : TheCall->users()) { + const ExtractValueInst *Ext = dyn_cast<ExtractValueInst>(U); if (Ext && Ext->hasIndices()) { // This use uses a part of our return value, survey the uses of // that part and store the results for this index only. @@ -767,7 +735,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { // Find out the new return value. Type *RetTy = FTy->getReturnType(); - Type *NRetTy = NULL; + Type *NRetTy = nullptr; unsigned RetCount = NumRetVals(F); // -1 means unused, other numbers are the new index @@ -891,7 +859,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { // std::vector<Value*> Args; while (!F->use_empty()) { - CallSite CS(F->use_back()); + CallSite CS(F->user_back()); Instruction *Call = CS.getInstruction(); AttributesVec.clear(); @@ -1053,7 +1021,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { Value *RetVal; if (NFTy->getReturnType()->isVoidTy()) { - RetVal = 0; + RetVal = nullptr; } else { assert (RetTy->isStructTy()); // The original return value was a struct, insert @@ -1088,7 +1056,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { } // Patch the pointer to LLVM function in debug info descriptor. - FunctionDIMap::iterator DI = FunctionDIs.find(F); + auto DI = FunctionDIs.find(F); if (DI != FunctionDIs.end()) DI->second.replaceFunction(NF); @@ -1102,7 +1070,7 @@ bool DAE::runOnModule(Module &M) { bool Changed = false; // Collect debug info descriptors for functions. - CollectFunctionDIs(M); + FunctionDIs = makeSubprogramMap(M); // First pass: Do a simple check to see if any functions can have their "..." // removed. We can do this if they never call va_start. This loop cannot be diff --git a/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp b/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp index 50fb3e6..40ec9fa 100644 --- a/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp +++ b/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp @@ -27,11 +27,10 @@ using namespace llvm; /// the split module remain valid. static void makeVisible(GlobalValue &GV, bool Delete) { bool Local = GV.hasLocalLinkage(); - if (Local) - GV.setVisibility(GlobalValue::HiddenVisibility); - if (Local || Delete) { GV.setLinkage(GlobalValue::ExternalLinkage); + if (Local) + GV.setVisibility(GlobalValue::HiddenVisibility); return; } @@ -68,7 +67,7 @@ namespace { explicit GVExtractorPass(std::vector<GlobalValue*>& GVs, bool deleteS = true) : ModulePass(ID), Named(GVs.begin(), GVs.end()), deleteStuff(deleteS) {} - bool runOnModule(Module &M) { + bool runOnModule(Module &M) override { // Visit the global inline asm. if (!deleteStuff) M.setModuleInlineAsm(""); @@ -95,7 +94,7 @@ namespace { makeVisible(*I, Delete); if (Delete) - I->setInitializer(0); + I->setInitializer(nullptr); } // Visit the Functions. @@ -134,7 +133,7 @@ namespace { } else { Declaration = new GlobalVariable(M, Ty, false, GlobalValue::ExternalLinkage, - 0, CurI->getName()); + nullptr, CurI->getName()); } CurI->replaceAllUsesWith(Declaration); diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp index 60e5f06..8174df9 100644 --- a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -18,7 +18,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "functionattrs" #include "llvm/Transforms/IPO.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/SetVector.h" @@ -29,12 +28,14 @@ #include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" -#include "llvm/Support/InstIterator.h" #include "llvm/Target/TargetLibraryInfo.h" using namespace llvm; +#define DEBUG_TYPE "functionattrs" + STATISTIC(NumReadNone, "Number of functions marked readnone"); STATISTIC(NumReadOnly, "Number of functions marked readonly"); STATISTIC(NumNoCapture, "Number of arguments marked nocapture"); @@ -46,12 +47,12 @@ STATISTIC(NumAnnotated, "Number of attributes added to library functions"); namespace { struct FunctionAttrs : public CallGraphSCCPass { static char ID; // Pass identification, replacement for typeid - FunctionAttrs() : CallGraphSCCPass(ID), AA(0) { + FunctionAttrs() : CallGraphSCCPass(ID), AA(nullptr) { initializeFunctionAttrsPass(*PassRegistry::getPassRegistry()); } // runOnSCC - Analyze the SCC, performing the transformation if possible. - bool runOnSCC(CallGraphSCC &SCC); + bool runOnSCC(CallGraphSCC &SCC) override; // AddReadAttrs - Deduce readonly/readnone attributes for the SCC. bool AddReadAttrs(const CallGraphSCC &SCC); @@ -120,7 +121,7 @@ namespace { // call declarations. bool annotateLibraryCalls(const CallGraphSCC &SCC); - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); AU.addRequired<AliasAnalysis>(); AU.addRequired<TargetLibraryInfo>(); @@ -137,7 +138,7 @@ char FunctionAttrs::ID = 0; INITIALIZE_PASS_BEGIN(FunctionAttrs, "functionattrs", "Deduce function attributes", false, false) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) -INITIALIZE_PASS_DEPENDENCY(CallGraph) +INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) INITIALIZE_PASS_END(FunctionAttrs, "functionattrs", "Deduce function attributes", false, false) @@ -160,7 +161,7 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) { for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { Function *F = (*I)->getFunction(); - if (F == 0) + if (!F) // External node - may write memory. Just give up. return false; @@ -319,7 +320,7 @@ namespace { ArgumentGraphNode SyntheticRoot; public: - ArgumentGraph() { SyntheticRoot.Definition = 0; } + ArgumentGraph() { SyntheticRoot.Definition = nullptr; } typedef SmallVectorImpl<ArgumentGraphNode*>::iterator iterator; @@ -342,9 +343,9 @@ namespace { ArgumentUsesTracker(const SmallPtrSet<Function*, 8> &SCCNodes) : Captured(false), SCCNodes(SCCNodes) {} - void tooManyUses() { Captured = true; } + void tooManyUses() override { Captured = true; } - bool captured(Use *U) { + bool captured(const Use *U) override { CallSite CS(U->getUser()); if (!CS.getInstruction()) { Captured = true; return true; } @@ -414,17 +415,19 @@ determinePointerReadAttrs(Argument *A, SmallSet<Use*, 32> Visited; int Count = 0; + // inalloca arguments are always clobbered by the call. + if (A->hasInAllocaAttr()) + return Attribute::None; + bool IsRead = false; // We don't need to track IsWritten. If A is written to, return immediately. - for (Value::use_iterator UI = A->use_begin(), UE = A->use_end(); - UI != UE; ++UI) { + for (Use &U : A->uses()) { if (Count++ >= 20) return Attribute::None; - Use *U = &UI.getUse(); - Visited.insert(U); - Worklist.push_back(U); + Visited.insert(&U); + Worklist.push_back(&U); } while (!Worklist.empty()) { @@ -437,25 +440,38 @@ determinePointerReadAttrs(Argument *A, case Instruction::GetElementPtr: case Instruction::PHI: case Instruction::Select: + case Instruction::AddrSpaceCast: // The original value is not read/written via this if the new value isn't. - for (Instruction::use_iterator UI = I->use_begin(), UE = I->use_end(); - UI != UE; ++UI) { - Use *U = &UI.getUse(); - if (Visited.insert(U)) - Worklist.push_back(U); - } + for (Use &UU : I->uses()) + if (Visited.insert(&UU)) + Worklist.push_back(&UU); break; case Instruction::Call: case Instruction::Invoke: { + bool Captures = true; + + if (I->getType()->isVoidTy()) + Captures = false; + + auto AddUsersToWorklistIfCapturing = [&] { + if (Captures) + for (Use &UU : I->uses()) + if (Visited.insert(&UU)) + Worklist.push_back(&UU); + }; + CallSite CS(I); - if (CS.doesNotAccessMemory()) + if (CS.doesNotAccessMemory()) { + AddUsersToWorklistIfCapturing(); continue; + } Function *F = CS.getCalledFunction(); if (!F) { if (CS.onlyReadsMemory()) { IsRead = true; + AddUsersToWorklistIfCapturing(); continue; } return Attribute::None; @@ -470,6 +486,7 @@ determinePointerReadAttrs(Argument *A, "More params than args in non-varargs call."); return Attribute::None; } + Captures &= !CS.doesNotCapture(A - B); if (SCCNodes.count(AI)) continue; if (!CS.onlyReadsMemory() && !CS.onlyReadsMemory(A - B)) @@ -478,6 +495,7 @@ determinePointerReadAttrs(Argument *A, IsRead = true; } } + AddUsersToWorklistIfCapturing(); break; } @@ -521,7 +539,7 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) { for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { Function *F = (*I)->getFunction(); - if (F == 0) + if (!F) // External node - only a problem for arguments that we pass to it. continue; @@ -599,9 +617,8 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) { // made. If the definition doesn't have a 'nocapture' attribute by now, it // captures. - for (scc_iterator<ArgumentGraph*> I = scc_begin(&AG), E = scc_end(&AG); - I != E; ++I) { - std::vector<ArgumentGraphNode*> &ArgumentSCC = *I; + for (scc_iterator<ArgumentGraph*> I = scc_begin(&AG); !I.isAtEnd(); ++I) { + const std::vector<ArgumentGraphNode *> &ArgumentSCC = *I; if (ArgumentSCC.size() == 1) { if (!ArgumentSCC[0]->Definition) continue; // synthetic root node @@ -617,8 +634,8 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) { } bool SCCCaptured = false; - for (std::vector<ArgumentGraphNode*>::iterator I = ArgumentSCC.begin(), - E = ArgumentSCC.end(); I != E && !SCCCaptured; ++I) { + for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end(); + I != E && !SCCCaptured; ++I) { ArgumentGraphNode *Node = *I; if (Node->Uses.empty()) { if (!Node->Definition->hasNoCaptureAttr()) @@ -630,13 +647,12 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) { SmallPtrSet<Argument*, 8> ArgumentSCCNodes; // Fill ArgumentSCCNodes with the elements of the ArgumentSCC. Used for // quickly looking up whether a given Argument is in this ArgumentSCC. - for (std::vector<ArgumentGraphNode*>::iterator I = ArgumentSCC.begin(), - E = ArgumentSCC.end(); I != E; ++I) { + for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end(); I != E; ++I) { ArgumentSCCNodes.insert((*I)->Definition); } - for (std::vector<ArgumentGraphNode*>::iterator I = ArgumentSCC.begin(), - E = ArgumentSCC.end(); I != E && !SCCCaptured; ++I) { + for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end(); + I != E && !SCCCaptured; ++I) { ArgumentGraphNode *N = *I; for (SmallVectorImpl<ArgumentGraphNode*>::iterator UI = N->Uses.begin(), UE = N->Uses.end(); UI != UE; ++UI) { @@ -723,6 +739,7 @@ bool FunctionAttrs::IsFunctionMallocLike(Function *F, // Extend the analysis by looking upwards. case Instruction::BitCast: case Instruction::GetElementPtr: + case Instruction::AddrSpaceCast: FlowsToReturn.insert(RVI->getOperand(0)); continue; case Instruction::Select: { @@ -775,7 +792,7 @@ bool FunctionAttrs::AddNoAliasAttrs(const CallGraphSCC &SCC) { for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { Function *F = (*I)->getFunction(); - if (F == 0) + if (!F) // External node - skip it; return false; @@ -1649,6 +1666,7 @@ bool FunctionAttrs::inferPrototypeAttributes(Function &F) { setDoesNotThrow(F); setDoesNotCapture(F, 1); setDoesNotCapture(F, 2); + break; default: // Didn't mark any attributes. return false; @@ -1667,7 +1685,7 @@ bool FunctionAttrs::annotateLibraryCalls(const CallGraphSCC &SCC) { for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { Function *F = (*I)->getFunction(); - if (F != 0 && F->isDeclaration()) + if (F && F->isDeclaration()) MadeChange |= inferPrototypeAttributes(*F); } diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp index 901295d..7e7a4c0 100644 --- a/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp +++ b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp @@ -15,15 +15,18 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "globaldce" #include "llvm/Transforms/IPO.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" +#include "llvm/Transforms/Utils/CtorUtils.h" #include "llvm/Pass.h" using namespace llvm; +#define DEBUG_TYPE "globaldce" + STATISTIC(NumAliases , "Number of global aliases removed"); STATISTIC(NumFunctions, "Number of functions removed"); STATISTIC(NumVariables, "Number of global variables removed"); @@ -38,7 +41,7 @@ namespace { // run - Do the GlobalDCE pass on the specified module, optionally updating // the specified callgraph to reflect the changes. // - bool runOnModule(Module &M); + bool runOnModule(Module &M) override; private: SmallPtrSet<GlobalValue*, 32> AliveGlobals; @@ -53,6 +56,15 @@ namespace { }; } +/// Returns true if F contains only a single "ret" instruction. +static bool isEmptyFunction(Function *F) { + BasicBlock &Entry = F->getEntryBlock(); + if (Entry.size() != 1 || !isa<ReturnInst>(Entry.front())) + return false; + ReturnInst &RI = cast<ReturnInst>(Entry.front()); + return RI.getReturnValue() == nullptr; +} + char GlobalDCE::ID = 0; INITIALIZE_PASS(GlobalDCE, "globaldce", "Dead Global Elimination", false, false) @@ -61,14 +73,23 @@ ModulePass *llvm::createGlobalDCEPass() { return new GlobalDCE(); } bool GlobalDCE::runOnModule(Module &M) { bool Changed = false; - + + // Remove empty functions from the global ctors list. + Changed |= optimizeGlobalCtorsList(M, isEmptyFunction); + + typedef std::multimap<const Comdat *, GlobalValue *> ComdatGVPairsTy; + ComdatGVPairsTy ComdatGVPairs; + // Loop over the module, adding globals which are obviously necessary. for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { Changed |= RemoveUnusedGlobalValue(*I); // Functions with external linkage are needed if they have a body - if (!I->isDiscardableIfUnused() && - !I->isDeclaration() && !I->hasAvailableExternallyLinkage()) - GlobalIsNeeded(I); + if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage()) { + if (!I->isDiscardableIfUnused()) + GlobalIsNeeded(I); + else if (const Comdat *C = I->getComdat()) + ComdatGVPairs.insert(std::make_pair(C, I)); + } } for (Module::global_iterator I = M.global_begin(), E = M.global_end(); @@ -76,17 +97,38 @@ bool GlobalDCE::runOnModule(Module &M) { Changed |= RemoveUnusedGlobalValue(*I); // Externally visible & appending globals are needed, if they have an // initializer. - if (!I->isDiscardableIfUnused() && - !I->isDeclaration() && !I->hasAvailableExternallyLinkage()) - GlobalIsNeeded(I); + if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage()) { + if (!I->isDiscardableIfUnused()) + GlobalIsNeeded(I); + else if (const Comdat *C = I->getComdat()) + ComdatGVPairs.insert(std::make_pair(C, I)); + } } for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end(); I != E; ++I) { Changed |= RemoveUnusedGlobalValue(*I); // Externally visible aliases are needed. - if (!I->isDiscardableIfUnused()) + if (!I->isDiscardableIfUnused()) { GlobalIsNeeded(I); + } else if (const Comdat *C = I->getComdat()) { + ComdatGVPairs.insert(std::make_pair(C, I)); + } + } + + for (ComdatGVPairsTy::iterator I = ComdatGVPairs.begin(), + E = ComdatGVPairs.end(); + I != E;) { + ComdatGVPairsTy::iterator UB = ComdatGVPairs.upper_bound(I->first); + bool CanDiscard = std::all_of(I, UB, [](ComdatGVPairsTy::value_type Pair) { + return Pair.second->isDiscardableIfUnused(); + }); + if (!CanDiscard) { + std::for_each(I, UB, [this](ComdatGVPairsTy::value_type Pair) { + GlobalIsNeeded(Pair.second); + }); + } + I = UB; } // Now that all globals which are needed are in the AliveGlobals set, we loop @@ -99,7 +141,7 @@ bool GlobalDCE::runOnModule(Module &M) { I != E; ++I) if (!AliveGlobals.count(I)) { DeadGlobalVars.push_back(I); // Keep track of dead globals - I->setInitializer(0); + I->setInitializer(nullptr); } // The second pass drops the bodies of functions which are dead... @@ -117,7 +159,7 @@ bool GlobalDCE::runOnModule(Module &M) { ++I) if (!AliveGlobals.count(I)) { DeadAliases.push_back(I); - I->setAliasee(0); + I->setAliasee(nullptr); } if (!DeadFunctions.empty()) { diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp index 2ea89a1..c1d0d3bc 100644 --- a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -13,37 +13,41 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "globalopt" #include "llvm/Transforms/IPO.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/ValueHandle.h" #include "llvm/Pass.h" -#include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Support/ValueHandle.h" #include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Transforms/Utils/CtorUtils.h" #include "llvm/Transforms/Utils/GlobalStatus.h" #include "llvm/Transforms/Utils/ModuleUtils.h" #include <algorithm> +#include <deque> using namespace llvm; +#define DEBUG_TYPE "globalopt" + STATISTIC(NumMarked , "Number of globals marked constant"); STATISTIC(NumUnnamed , "Number of globals marked unnamed_addr"); STATISTIC(NumSRA , "Number of aggregate globals broken into scalars"); @@ -63,7 +67,7 @@ STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed"); namespace { struct GlobalOpt : public ModulePass { - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<TargetLibraryInfo>(); } static char ID; // Pass identification, replacement for typeid @@ -71,20 +75,18 @@ namespace { initializeGlobalOptPass(*PassRegistry::getPassRegistry()); } - bool runOnModule(Module &M); + bool runOnModule(Module &M) override; private: - GlobalVariable *FindGlobalCtors(Module &M); bool OptimizeFunctions(Module &M); bool OptimizeGlobalVars(Module &M); bool OptimizeGlobalAliases(Module &M); - bool OptimizeGlobalCtorsList(GlobalVariable *&GCL); bool ProcessGlobal(GlobalVariable *GV,Module::global_iterator &GVI); bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI, const GlobalStatus &GS); bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn); - DataLayout *TD; + const DataLayout *DL; TargetLibraryInfo *TLI; }; } @@ -196,7 +198,7 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV, SmallVector<std::pair<Instruction *, Instruction *>, 32> Dead; // Constants can't be pointers to dynamically allocated memory. - for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); + for (Value::user_iterator UI = GV->user_begin(), E = GV->user_end(); UI != E;) { User *U = *UI++; if (StoreInst *SI = dyn_cast<StoreInst>(U)) { @@ -266,13 +268,14 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV, /// quick scan over the use list to clean up the easy and obvious cruft. This /// returns true if it made a change. static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, - DataLayout *TD, TargetLibraryInfo *TLI) { + const DataLayout *DL, + TargetLibraryInfo *TLI) { bool Changed = false; // Note that we need to use a weak value handle for the worklist items. When // we delete a constant array, we may also be holding pointer to one of its // elements (or an element of one of its elements if we're dealing with an // array of arrays) in the worklist. - SmallVector<WeakVH, 8> WorkList(V->use_begin(), V->use_end()); + SmallVector<WeakVH, 8> WorkList(V->user_begin(), V->user_end()); while (!WorkList.empty()) { Value *UV = WorkList.pop_back_val(); if (!UV) @@ -293,14 +296,15 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, Changed = true; } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) { if (CE->getOpcode() == Instruction::GetElementPtr) { - Constant *SubInit = 0; + Constant *SubInit = nullptr; if (Init) SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); - Changed |= CleanupConstantGlobalUsers(CE, SubInit, TD, TLI); - } else if (CE->getOpcode() == Instruction::BitCast && - CE->getType()->isPointerTy()) { + Changed |= CleanupConstantGlobalUsers(CE, SubInit, DL, TLI); + } else if ((CE->getOpcode() == Instruction::BitCast && + CE->getType()->isPointerTy()) || + CE->getOpcode() == Instruction::AddrSpaceCast) { // Pointer cast, delete any stores and memsets to the global. - Changed |= CleanupConstantGlobalUsers(CE, 0, TD, TLI); + Changed |= CleanupConstantGlobalUsers(CE, nullptr, DL, TLI); } if (CE->use_empty()) { @@ -311,10 +315,10 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, // Do not transform "gepinst (gep constexpr (GV))" here, because forming // "gepconstexpr (gep constexpr (GV))" will cause the two gep's to fold // and will invalidate our notion of what Init is. - Constant *SubInit = 0; + Constant *SubInit = nullptr; if (!isa<ConstantExpr>(GEP->getOperand(0))) { ConstantExpr *CE = - dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP, TD, TLI)); + dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP, DL, TLI)); if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr) SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); @@ -324,7 +328,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, if (Init && isa<ConstantAggregateZero>(Init) && GEP->isInBounds()) SubInit = Constant::getNullValue(GEP->getType()->getElementType()); } - Changed |= CleanupConstantGlobalUsers(GEP, SubInit, TD, TLI); + Changed |= CleanupConstantGlobalUsers(GEP, SubInit, DL, TLI); if (GEP->use_empty()) { GEP->eraseFromParent(); @@ -341,7 +345,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, // us, and if they are all dead, nuke them without remorse. if (isSafeToDestroyConstant(C)) { C->destroyConstant(); - CleanupConstantGlobalUsers(V, Init, TD, TLI); + CleanupConstantGlobalUsers(V, Init, DL, TLI); return true; } } @@ -368,15 +372,14 @@ static bool isSafeSROAElementUse(Value *V) { // Otherwise, it must be a GEP. GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I); - if (GEPI == 0) return false; + if (!GEPI) return false; if (GEPI->getNumOperands() < 3 || !isa<Constant>(GEPI->getOperand(1)) || !cast<Constant>(GEPI->getOperand(1))->isNullValue()) return false; - for (Value::use_iterator I = GEPI->use_begin(), E = GEPI->use_end(); - I != E; ++I) - if (!isSafeSROAElementUse(*I)) + for (User *U : GEPI->users()) + if (!isSafeSROAElementUse(U)) return false; return true; } @@ -442,9 +445,10 @@ static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) { } } - for (Value::use_iterator I = U->use_begin(), E = U->use_end(); I != E; ++I) - if (!isSafeSROAElementUse(*I)) + for (User *UU : U->users()) + if (!isSafeSROAElementUse(UU)) return false; + return true; } @@ -452,11 +456,10 @@ static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) { /// is safe for us to perform this transformation. /// static bool GlobalUsersSafeToSRA(GlobalValue *GV) { - for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); - UI != E; ++UI) { - if (!IsUserOfGlobalSafeForSRA(*UI, GV)) + for (User *U : GV->users()) + if (!IsUserOfGlobalSafeForSRA(U, GV)) return false; - } + return true; } @@ -466,10 +469,10 @@ static bool GlobalUsersSafeToSRA(GlobalValue *GV) { /// behavior of the program in a more fine-grained way. We have determined that /// this transformation is safe already. We return the first global variable we /// insert so that the caller can reprocess it. -static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &TD) { +static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { // Make sure this global only has simple uses that we can SRA. if (!GlobalUsersSafeToSRA(GV)) - return 0; + return nullptr; assert(GV->hasLocalLinkage() && !GV->isConstant()); Constant *Init = GV->getInitializer(); @@ -481,11 +484,11 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &TD) { // Get the alignment of the global, either explicit or target-specific. unsigned StartAlignment = GV->getAlignment(); if (StartAlignment == 0) - StartAlignment = TD.getABITypeAlignment(GV->getType()); + StartAlignment = DL.getABITypeAlignment(GV->getType()); if (StructType *STy = dyn_cast<StructType>(Ty)) { NewGlobals.reserve(STy->getNumElements()); - const StructLayout &Layout = *TD.getStructLayout(STy); + const StructLayout &Layout = *DL.getStructLayout(STy); for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { Constant *In = Init->getAggregateElement(i); assert(In && "Couldn't get element of initializer?"); @@ -502,7 +505,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &TD) { // propagate info to each field. uint64_t FieldOffset = Layout.getElementOffset(i); unsigned NewAlign = (unsigned)MinAlign(StartAlignment, FieldOffset); - if (NewAlign > TD.getABITypeAlignment(STy->getElementType(i))) + if (NewAlign > DL.getABITypeAlignment(STy->getElementType(i))) NGV->setAlignment(NewAlign); } } else if (SequentialType *STy = dyn_cast<SequentialType>(Ty)) { @@ -513,11 +516,11 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &TD) { NumElements = cast<VectorType>(STy)->getNumElements(); if (NumElements > 16 && GV->hasNUsesOrMore(16)) - return 0; // It's not worth it. + return nullptr; // It's not worth it. NewGlobals.reserve(NumElements); - uint64_t EltSize = TD.getTypeAllocSize(STy->getElementType()); - unsigned EltAlign = TD.getABITypeAlignment(STy->getElementType()); + uint64_t EltSize = DL.getTypeAllocSize(STy->getElementType()); + unsigned EltAlign = DL.getABITypeAlignment(STy->getElementType()); for (unsigned i = 0, e = NumElements; i != e; ++i) { Constant *In = Init->getAggregateElement(i); assert(In && "Couldn't get element of initializer?"); @@ -540,7 +543,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &TD) { } if (NewGlobals.empty()) - return 0; + return nullptr; DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV); @@ -549,7 +552,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &TD) { // Loop over all of the uses of the global, replacing the constantexpr geps, // with smaller constantexpr geps or direct references. while (!GV->use_empty()) { - User *GEP = GV->use_back(); + User *GEP = GV->user_back(); assert(((isa<ConstantExpr>(GEP) && cast<ConstantExpr>(GEP)->getOpcode()==Instruction::GetElementPtr)|| isa<GetElementPtrInst>(GEP)) && "NonGEP CE's are not SRAable!"); @@ -602,7 +605,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &TD) { if (FirstGlobal == i) ++FirstGlobal; } - return FirstGlobal != NewGlobals.size() ? NewGlobals[FirstGlobal] : 0; + return FirstGlobal != NewGlobals.size() ? NewGlobals[FirstGlobal] : nullptr; } /// AllUsesOfValueWillTrapIfNull - Return true if all users of the specified @@ -610,10 +613,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &TD) { /// phi nodes we've seen to avoid reprocessing them. static bool AllUsesOfValueWillTrapIfNull(const Value *V, SmallPtrSet<const PHINode*, 8> &PHIs) { - for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; - ++UI) { - const User *U = *UI; - + for (const User *U : V->users()) if (isa<LoadInst>(U)) { // Will trap. } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) { @@ -641,13 +641,13 @@ static bool AllUsesOfValueWillTrapIfNull(const Value *V, if (PHIs.insert(PN) && !AllUsesOfValueWillTrapIfNull(PN, PHIs)) return false; } else if (isa<ICmpInst>(U) && - isa<ConstantPointerNull>(UI->getOperand(1))) { + isa<ConstantPointerNull>(U->getOperand(1))) { // Ignore icmp X, null } else { //cerr << "NONTRAPPING USE: " << *U; return false; } - } + return true; } @@ -655,10 +655,7 @@ static bool AllUsesOfValueWillTrapIfNull(const Value *V, /// from GV will trap if the loaded value is null. Note that this also permits /// comparisons of the loaded value against null, as a special case. static bool AllUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) { - for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end(); - UI != E; ++UI) { - const User *U = *UI; - + for (const User *U : GV->users()) if (const LoadInst *LI = dyn_cast<LoadInst>(U)) { SmallPtrSet<const PHINode*, 8> PHIs; if (!AllUsesOfValueWillTrapIfNull(LI, PHIs)) @@ -670,13 +667,12 @@ static bool AllUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) { //cerr << "UNKNOWN USER OF GLOBAL!: " << *U; return false; } - } return true; } static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) { bool Changed = false; - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ) { + for (auto UI = V->user_begin(), E = V->user_end(); UI != E; ) { Instruction *I = cast<Instruction>(*UI++); if (LoadInst *LI = dyn_cast<LoadInst>(I)) { LI->setOperand(0, NewV); @@ -702,7 +698,7 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) { if (PassedAsArg) { // Being passed as an argument also. Be careful to not invalidate UI! - UI = V->use_begin(); + UI = V->user_begin(); } } } else if (CastInst *CI = dyn_cast<CastInst>(I)) { @@ -742,7 +738,7 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) { /// if the loaded value is dynamically null, then we know that they cannot be /// reachable with a null optimize away the load. static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV, - DataLayout *TD, + const DataLayout *DL, TargetLibraryInfo *TLI) { bool Changed = false; @@ -751,7 +747,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV, bool AllNonStoreUsesGone = true; // Replace all uses of loads with uses of uses of the stored value. - for (Value::use_iterator GUI = GV->use_begin(), E = GV->use_end(); GUI != E;){ + for (Value::user_iterator GUI = GV->user_begin(), E = GV->user_end(); GUI != E;){ User *GlobalUser = *GUI++; if (LoadInst *LI = dyn_cast<LoadInst>(GlobalUser)) { Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV); @@ -791,7 +787,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV, Changed |= CleanupPointerRootUsers(GV, TLI); } else { Changed = true; - CleanupConstantGlobalUsers(GV, 0, TD, TLI); + CleanupConstantGlobalUsers(GV, nullptr, DL, TLI); } if (GV->use_empty()) { DEBUG(dbgs() << " *** GLOBAL NOW DEAD!\n"); @@ -805,11 +801,11 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV, /// ConstantPropUsersOf - Walk the use list of V, constant folding all of the /// instructions that are foldable. -static void ConstantPropUsersOf(Value *V, - DataLayout *TD, TargetLibraryInfo *TLI) { - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ) +static void ConstantPropUsersOf(Value *V, const DataLayout *DL, + TargetLibraryInfo *TLI) { + for (Value::user_iterator UI = V->user_begin(), E = V->user_end(); UI != E; ) if (Instruction *I = dyn_cast<Instruction>(*UI++)) - if (Constant *NewC = ConstantFoldInstruction(I, TD, TLI)) { + if (Constant *NewC = ConstantFoldInstruction(I, DL, TLI)) { I->replaceAllUsesWith(NewC); // Advance UI to the next non-I use to avoid invalidating it! @@ -829,7 +825,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy, ConstantInt *NElements, - DataLayout *TD, + const DataLayout *DL, TargetLibraryInfo *TLI) { DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI << '\n'); @@ -853,9 +849,9 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, // If there are bitcast users of the malloc (which is typical, usually we have // a malloc + bitcast) then replace them with uses of the new global. Update // other users to use the global as well. - BitCastInst *TheBC = 0; + BitCastInst *TheBC = nullptr; while (!CI->use_empty()) { - Instruction *User = cast<Instruction>(CI->use_back()); + Instruction *User = cast<Instruction>(CI->user_back()); if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) { if (BCI->getType() == NewGV->getType()) { BCI->replaceAllUsesWith(NewGV); @@ -864,7 +860,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, BCI->setOperand(0, NewGV); } } else { - if (TheBC == 0) + if (!TheBC) TheBC = new BitCastInst(NewGV, CI->getType(), "newgv", CI); User->replaceUsesOfWith(CI, TheBC); } @@ -886,7 +882,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, // Loop over all uses of GV, processing them in turn. while (!GV->use_empty()) { - if (StoreInst *SI = dyn_cast<StoreInst>(GV->use_back())) { + if (StoreInst *SI = dyn_cast<StoreInst>(GV->user_back())) { // The global is initialized when the store to it occurs. new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, false, 0, SI->getOrdering(), SI->getSynchScope(), SI); @@ -894,15 +890,15 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, continue; } - LoadInst *LI = cast<LoadInst>(GV->use_back()); + LoadInst *LI = cast<LoadInst>(GV->user_back()); while (!LI->use_empty()) { - Use &LoadUse = LI->use_begin().getUse(); - if (!isa<ICmpInst>(LoadUse.getUser())) { + Use &LoadUse = *LI->use_begin(); + ICmpInst *ICI = dyn_cast<ICmpInst>(LoadUse.getUser()); + if (!ICI) { LoadUse = RepValue; continue; } - ICmpInst *ICI = cast<ICmpInst>(LoadUse.getUser()); // Replace the cmp X, 0 with a use of the bool value. // Sink the load to where the compare was, if atomic rules allow us to. Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", false, 0, @@ -936,7 +932,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, // If the initialization boolean was used, insert it, otherwise delete it. if (!InitBoolUsed) { while (!InitBool->use_empty()) // Delete initializations - cast<StoreInst>(InitBool->use_back())->eraseFromParent(); + cast<StoreInst>(InitBool->user_back())->eraseFromParent(); delete InitBool; } else GV->getParent()->getGlobalList().insert(GV, InitBool); @@ -948,9 +944,9 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, // To further other optimizations, loop over all users of NewGV and try to // constant prop them. This will promote GEP instructions with constant // indices into GEP constant-exprs, which will allow global-opt to hack on it. - ConstantPropUsersOf(NewGV, TD, TLI); + ConstantPropUsersOf(NewGV, DL, TLI); if (RepValue != NewGV) - ConstantPropUsersOf(RepValue, TD, TLI); + ConstantPropUsersOf(RepValue, DL, TLI); return NewGV; } @@ -962,9 +958,8 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V, const GlobalVariable *GV, SmallPtrSet<const PHINode*, 8> &PHIs) { - for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); - UI != E; ++UI) { - const Instruction *Inst = cast<Instruction>(*UI); + for (const User *U : V->users()) { + const Instruction *Inst = cast<Instruction>(U); if (isa<LoadInst>(Inst) || isa<CmpInst>(Inst)) { continue; // Fine, ignore. @@ -1011,7 +1006,7 @@ static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V, static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc, GlobalVariable *GV) { while (!Alloc->use_empty()) { - Instruction *U = cast<Instruction>(*Alloc->use_begin()); + Instruction *U = cast<Instruction>(*Alloc->user_begin()); Instruction *InsertPt = U; if (StoreInst *SI = dyn_cast<StoreInst>(U)) { // If this is the store of the allocation into the global, remove it. @@ -1022,7 +1017,7 @@ static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc, } else if (PHINode *PN = dyn_cast<PHINode>(U)) { // Insert the load in the corresponding predecessor, not right before the // PHI. - InsertPt = PN->getIncomingBlock(Alloc->use_begin())->getTerminator(); + InsertPt = PN->getIncomingBlock(*Alloc->use_begin())->getTerminator(); } else if (isa<BitCastInst>(U)) { // Must be bitcast between the malloc and store to initialize the global. ReplaceUsesOfMallocWithGlobal(U, GV); @@ -1032,7 +1027,7 @@ static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc, // If this is a "GEP bitcast" and the user is a store to the global, then // just process it as a bitcast. if (GEPI->hasAllZeroIndices() && GEPI->hasOneUse()) - if (StoreInst *SI = dyn_cast<StoreInst>(GEPI->use_back())) + if (StoreInst *SI = dyn_cast<StoreInst>(GEPI->user_back())) if (SI->getOperand(1) == GV) { // Must be bitcast GEP between the malloc and store to initialize // the global. @@ -1056,19 +1051,18 @@ static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V, SmallPtrSet<const PHINode*, 32> &LoadUsingPHIsPerLoad) { // We permit two users of the load: setcc comparing against the null // pointer, and a getelementptr of a specific form. - for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; - ++UI) { - const Instruction *User = cast<Instruction>(*UI); + for (const User *U : V->users()) { + const Instruction *UI = cast<Instruction>(U); // Comparison against null is ok. - if (const ICmpInst *ICI = dyn_cast<ICmpInst>(User)) { + if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UI)) { if (!isa<ConstantPointerNull>(ICI->getOperand(1))) return false; continue; } // getelementptr is also ok, but only a simple form. - if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) { + if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(UI)) { // Must index into the array and into the struct. if (GEPI->getNumOperands() < 3) return false; @@ -1077,7 +1071,7 @@ static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V, continue; } - if (const PHINode *PN = dyn_cast<PHINode>(User)) { + if (const PHINode *PN = dyn_cast<PHINode>(UI)) { if (!LoadUsingPHIsPerLoad.insert(PN)) // This means some phi nodes are dependent on each other. // Avoid infinite looping! @@ -1108,9 +1102,8 @@ static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV, Instruction *StoredVal) { SmallPtrSet<const PHINode*, 32> LoadUsingPHIs; SmallPtrSet<const PHINode*, 32> LoadUsingPHIsPerLoad; - for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end(); - UI != E; ++UI) - if (const LoadInst *LI = dyn_cast<LoadInst>(*UI)) { + for (const User *U : GV->users()) + if (const LoadInst *LI = dyn_cast<LoadInst>(U)) { if (!LoadUsesSimpleEnoughForHeapSRA(LI, LoadUsingPHIs, LoadUsingPHIsPerLoad)) return false; @@ -1178,10 +1171,13 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo, } else if (PHINode *PN = dyn_cast<PHINode>(V)) { // PN's type is pointer to struct. Make a new PHI of pointer to struct // field. - StructType *ST = cast<StructType>(PN->getType()->getPointerElementType()); + PointerType *PTy = cast<PointerType>(PN->getType()); + StructType *ST = cast<StructType>(PTy->getElementType()); + + unsigned AS = PTy->getAddressSpace(); PHINode *NewPN = - PHINode::Create(PointerType::getUnqual(ST->getElementType(FieldNo)), + PHINode::Create(PointerType::get(ST->getElementType(FieldNo), AS), PN->getNumIncomingValues(), PN->getName()+".f"+Twine(FieldNo), PN); Result = NewPN; @@ -1249,7 +1245,7 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser, // If this is the first time we've seen this PHI, recursively process all // users. - for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); UI != E; ) { + for (auto UI = PN->user_begin(), E = PN->user_end(); UI != E;) { Instruction *User = cast<Instruction>(*UI++); RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite); } @@ -1262,8 +1258,7 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser, static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues, std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) { - for (Value::use_iterator UI = Load->use_begin(), E = Load->use_end(); - UI != E; ) { + for (auto UI = Load->user_begin(), E = Load->user_end(); UI != E;) { Instruction *User = cast<Instruction>(*UI++); RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite); } @@ -1277,7 +1272,7 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, /// PerformHeapAllocSRoA - CI is an allocation of an array of structures. Break /// it up into multiple allocations of arrays of the fields. static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, - Value *NElems, DataLayout *TD, + Value *NElems, const DataLayout *DL, const TargetLibraryInfo *TLI) { DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *CI << '\n'); Type *MAT = getMallocAllocatedType(CI, TLI); @@ -1294,9 +1289,10 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, std::vector<Value*> FieldGlobals; std::vector<Value*> FieldMallocs; + unsigned AS = GV->getType()->getPointerAddressSpace(); for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){ Type *FieldTy = STy->getElementType(FieldNo); - PointerType *PFieldTy = PointerType::getUnqual(FieldTy); + PointerType *PFieldTy = PointerType::get(FieldTy, AS); GlobalVariable *NGV = new GlobalVariable(*GV->getParent(), @@ -1306,13 +1302,13 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, GV->getThreadLocalMode()); FieldGlobals.push_back(NGV); - unsigned TypeSize = TD->getTypeAllocSize(FieldTy); + unsigned TypeSize = DL->getTypeAllocSize(FieldTy); if (StructType *ST = dyn_cast<StructType>(FieldTy)) - TypeSize = TD->getStructLayout(ST)->getSizeInBytes(); - Type *IntPtrTy = TD->getIntPtrType(CI->getType()); + TypeSize = DL->getStructLayout(ST)->getSizeInBytes(); + Type *IntPtrTy = DL->getIntPtrType(CI->getType()); Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy, ConstantInt::get(IntPtrTy, TypeSize), - NElems, 0, + NElems, nullptr, CI->getName() + ".f" + Twine(FieldNo)); FieldMallocs.push_back(NMI); new StoreInst(NMI, NGV, CI); @@ -1394,7 +1390,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, // Okay, the malloc site is completely handled. All of the uses of GV are now // loads, and all uses of those loads are simple. Rewrite them to use loads // of the per-field globals instead. - for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;) { + for (auto UI = GV->user_begin(), E = GV->user_end(); UI != E;) { Instruction *User = cast<Instruction>(*UI++); if (LoadInst *LI = dyn_cast<LoadInst>(User)) { @@ -1469,9 +1465,9 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, Type *AllocTy, AtomicOrdering Ordering, Module::global_iterator &GVI, - DataLayout *TD, + const DataLayout *DL, TargetLibraryInfo *TLI) { - if (!TD) + if (!DL) return false; // If this is a malloc of an abstract type, don't touch it. @@ -1501,7 +1497,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, // This eliminates dynamic allocation, avoids an indirection accessing the // data, and exposes the resultant global to further GlobalOpt. // We cannot optimize the malloc if we cannot determine malloc array size. - Value *NElems = getMallocArraySize(CI, TD, TLI, true); + Value *NElems = getMallocArraySize(CI, DL, TLI, true); if (!NElems) return false; @@ -1509,8 +1505,8 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, // Restrict this transformation to only working on small allocations // (2048 bytes currently), as we don't want to introduce a 16M global or // something. - if (NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) { - GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, TD, TLI); + if (NElements->getZExtValue() * DL->getTypeAllocSize(AllocTy) < 2048) { + GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, DL, TLI); return true; } @@ -1539,13 +1535,13 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, // If this is a fixed size array, transform the Malloc to be an alloc of // structs. malloc [100 x struct],1 -> malloc struct, 100 if (ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI, TLI))) { - Type *IntPtrTy = TD->getIntPtrType(CI->getType()); - unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes(); + Type *IntPtrTy = DL->getIntPtrType(CI->getType()); + unsigned TypeSize = DL->getStructLayout(AllocSTy)->getSizeInBytes(); Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize); Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements()); Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy, AllocSize, NumElements, - 0, CI->getName()); + nullptr, CI->getName()); Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI); CI->replaceAllUsesWith(Cast); CI->eraseFromParent(); @@ -1555,8 +1551,8 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CI = cast<CallInst>(Malloc); } - GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, TLI, true), - TD, TLI); + GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, DL, TLI, true), + DL, TLI); return true; } @@ -1568,7 +1564,8 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, AtomicOrdering Ordering, Module::global_iterator &GVI, - DataLayout *TD, TargetLibraryInfo *TLI) { + const DataLayout *DL, + TargetLibraryInfo *TLI) { // Ignore no-op GEPs and bitcasts. StoredOnceVal = StoredOnceVal->stripPointerCasts(); @@ -1583,13 +1580,13 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, SOVC = ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType()); // Optimize away any trapping uses of the loaded value. - if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, TD, TLI)) + if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, DL, TLI)) return true; } else if (CallInst *CI = extractMallocCall(StoredOnceVal, TLI)) { Type *MallocType = getMallocAllocatedType(CI, TLI); if (MallocType && TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, Ordering, GVI, - TD, TLI)) + DL, TLI)) return true; } } @@ -1616,11 +1613,9 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { // Walk the use list of the global seeing if all the uses are load or store. // If there is anything else, bail out. - for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E; ++I){ - User *U = *I; + for (User *U : GV->users()) if (!isa<LoadInst>(U) && !isa<StoreInst>(U)) return false; - } DEBUG(dbgs() << " *** SHRINKING TO BOOL: " << *GV); @@ -1645,7 +1640,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { IsOneZero = InitVal->isNullValue() && CI->isOne(); while (!GV->use_empty()) { - Instruction *UI = cast<Instruction>(GV->use_back()); + Instruction *UI = cast<Instruction>(GV->user_back()); if (StoreInst *SI = dyn_cast<StoreInst>(UI)) { // Change the store into a boolean store. bool StoringOther = SI->getOperand(0) == OtherVal; @@ -1705,9 +1700,6 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { /// possible. If we make a change, return true. bool GlobalOpt::ProcessGlobal(GlobalVariable *GV, Module::global_iterator &GVI) { - if (!GV->isDiscardableIfUnused()) - return false; - // Do more involved optimizations if the global is internal. GV->removeDeadConstantUsers(); @@ -1746,7 +1738,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, // and this function is main (which we know is not recursive), we replace // the global with a local alloca in this function. // - // NOTE: It doesn't make sense to promote non single-value types since we + // NOTE: It doesn't make sense to promote non-single-value types since we // are just replacing static memory to stack memory. // // If the global is in different address space, don't bring it to stack. @@ -1761,7 +1753,8 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, ->getEntryBlock().begin()); Type *ElemTy = GV->getType()->getElementType(); // FIXME: Pass Global's alignment when globals have alignment - AllocaInst *Alloca = new AllocaInst(ElemTy, NULL, GV->getName(), &FirstI); + AllocaInst *Alloca = new AllocaInst(ElemTy, nullptr, + GV->getName(), &FirstI); if (!isa<UndefValue>(GV->getInitializer())) new StoreInst(GV->getInitializer(), Alloca, &FirstI); @@ -1783,7 +1776,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, } else { // Delete any stores we can find to the global. We may not be able to // make it completely dead though. - Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer(), TD, TLI); + Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, TLI); } // If the global is dead now, delete it. @@ -1799,7 +1792,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, GV->setConstant(true); // Clean up any obviously simplifiable users now. - CleanupConstantGlobalUsers(GV, GV->getInitializer(), TD, TLI); + CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, TLI); // If the global is dead now, just nuke it. if (GV->use_empty()) { @@ -1812,11 +1805,13 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, ++NumMarked; return true; } else if (!GV->getInitializer()->getType()->isSingleValueType()) { - if (DataLayout *TD = getAnalysisIfAvailable<DataLayout>()) - if (GlobalVariable *FirstNewGV = SRAGlobal(GV, *TD)) { + if (DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>()) { + const DataLayout &DL = DLP->getDataLayout(); + if (GlobalVariable *FirstNewGV = SRAGlobal(GV, DL)) { GVI = FirstNewGV; // Don't skip the newly produced globals! return true; } + } } else if (GS.StoredType == GlobalStatus::StoredOnce) { // If the initial value for the global was an undef value, and if only // one other value was stored into it, we can just change the @@ -1828,7 +1823,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, GV->setInitializer(SOVConstant); // Clean up any obviously simplifiable users now. - CleanupConstantGlobalUsers(GV, GV->getInitializer(), TD, TLI); + CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, TLI); if (GV->use_empty()) { DEBUG(dbgs() << " *** Substituting initializer allowed us to " @@ -1845,7 +1840,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, // Try to optimize globals based on the knowledge that only one value // (besides its initializer) is ever stored to the global. if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GS.Ordering, GVI, - TD, TLI)) + DL, TLI)) return true; // Otherwise, if the global was not a boolean, we can shrink it to be a @@ -1866,11 +1861,11 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, /// ChangeCalleesToFastCall - Walk all of the direct calls of the specified /// function, changing them to FastCC. static void ChangeCalleesToFastCall(Function *F) { - for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){ - if (isa<BlockAddress>(*UI)) + for (User *U : F->users()) { + if (isa<BlockAddress>(U)) continue; - CallSite User(cast<Instruction>(*UI)); - User.setCallingConv(CallingConv::Fast); + CallSite CS(cast<Instruction>(U)); + CS.setCallingConv(CallingConv::Fast); } } @@ -1889,21 +1884,31 @@ static AttributeSet StripNest(LLVMContext &C, const AttributeSet &Attrs) { static void RemoveNestAttribute(Function *F) { F->setAttributes(StripNest(F->getContext(), F->getAttributes())); - for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){ - if (isa<BlockAddress>(*UI)) + for (User *U : F->users()) { + if (isa<BlockAddress>(U)) continue; - CallSite User(cast<Instruction>(*UI)); - User.setAttributes(StripNest(F->getContext(), User.getAttributes())); + CallSite CS(cast<Instruction>(U)); + CS.setAttributes(StripNest(F->getContext(), CS.getAttributes())); } } +/// Return true if this is a calling convention that we'd like to change. The +/// idea here is that we don't want to mess with the convention if the user +/// explicitly requested something with performance implications like coldcc, +/// GHC, or anyregcc. +static bool isProfitableToMakeFastCC(Function *F) { + CallingConv::ID CC = F->getCallingConv(); + // FIXME: Is it worth transforming x86_stdcallcc and x86_fastcallcc? + return CC == CallingConv::C || CC == CallingConv::X86_ThisCall; +} + bool GlobalOpt::OptimizeFunctions(Module &M) { bool Changed = false; // Optimize functions. for (Module::iterator FI = M.begin(), E = M.end(); FI != E; ) { Function *F = FI++; // Functions without names cannot be referenced outside this module. - if (!F->hasName() && !F->isDeclaration()) + if (!F->hasName() && !F->isDeclaration() && !F->hasLocalLinkage()) F->setLinkage(GlobalValue::InternalLinkage); F->removeDeadConstantUsers(); if (F->isDefTriviallyDead()) { @@ -1911,11 +1916,11 @@ bool GlobalOpt::OptimizeFunctions(Module &M) { Changed = true; ++NumFnDeleted; } else if (F->hasLocalLinkage()) { - if (F->getCallingConv() == CallingConv::C && !F->isVarArg() && + if (isProfitableToMakeFastCC(F) && !F->isVarArg() && !F->hasAddressTaken()) { - // If this function has C calling conventions, is not a varargs - // function, and is only called directly, promote it to use the Fast - // calling convention. + // If this function has a calling convention worth changing, is not a + // varargs function, and is only called directly, promote it to use the + // Fast calling convention. F->setCallingConv(CallingConv::Fast); ChangeCalleesToFastCall(F); ++NumFastCallFns; @@ -1937,139 +1942,41 @@ bool GlobalOpt::OptimizeFunctions(Module &M) { bool GlobalOpt::OptimizeGlobalVars(Module &M) { bool Changed = false; + + SmallSet<const Comdat *, 8> NotDiscardableComdats; + for (const GlobalVariable &GV : M.globals()) + if (const Comdat *C = GV.getComdat()) + if (!GV.isDiscardableIfUnused()) + NotDiscardableComdats.insert(C); + for (Module::global_iterator GVI = M.global_begin(), E = M.global_end(); GVI != E; ) { GlobalVariable *GV = GVI++; // Global variables without names cannot be referenced outside this module. - if (!GV->hasName() && !GV->isDeclaration()) + if (!GV->hasName() && !GV->isDeclaration() && !GV->hasLocalLinkage()) GV->setLinkage(GlobalValue::InternalLinkage); // Simplify the initializer. if (GV->hasInitializer()) if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GV->getInitializer())) { - Constant *New = ConstantFoldConstantExpression(CE, TD, TLI); + Constant *New = ConstantFoldConstantExpression(CE, DL, TLI); if (New && New != CE) GV->setInitializer(New); } - Changed |= ProcessGlobal(GV, GVI); - } - return Changed; -} - -/// FindGlobalCtors - Find the llvm.global_ctors list, verifying that all -/// initializers have an init priority of 65535. -GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) { - GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors"); - if (GV == 0) return 0; - - // Verify that the initializer is simple enough for us to handle. We are - // only allowed to optimize the initializer if it is unique. - if (!GV->hasUniqueInitializer()) return 0; - - if (isa<ConstantAggregateZero>(GV->getInitializer())) - return GV; - ConstantArray *CA = cast<ConstantArray>(GV->getInitializer()); - - for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) { - if (isa<ConstantAggregateZero>(*i)) - continue; - ConstantStruct *CS = cast<ConstantStruct>(*i); - if (isa<ConstantPointerNull>(CS->getOperand(1))) - continue; - - // Must have a function or null ptr. - if (!isa<Function>(CS->getOperand(1))) - return 0; - - // Init priority must be standard. - ConstantInt *CI = cast<ConstantInt>(CS->getOperand(0)); - if (CI->getZExtValue() != 65535) - return 0; - } - - return GV; -} - -/// ParseGlobalCtors - Given a llvm.global_ctors list that we can understand, -/// return a list of the functions and null terminator as a vector. -static std::vector<Function*> ParseGlobalCtors(GlobalVariable *GV) { - if (GV->getInitializer()->isNullValue()) - return std::vector<Function*>(); - ConstantArray *CA = cast<ConstantArray>(GV->getInitializer()); - std::vector<Function*> Result; - Result.reserve(CA->getNumOperands()); - for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) { - ConstantStruct *CS = cast<ConstantStruct>(*i); - Result.push_back(dyn_cast<Function>(CS->getOperand(1))); - } - return Result; -} - -/// InstallGlobalCtors - Given a specified llvm.global_ctors list, install the -/// specified array, returning the new global to use. -static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, - const std::vector<Function*> &Ctors) { - // If we made a change, reassemble the initializer list. - Constant *CSVals[2]; - CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()), 65535); - CSVals[1] = 0; - - StructType *StructTy = - cast<StructType>(GCL->getType()->getElementType()->getArrayElementType()); - - // Create the new init list. - std::vector<Constant*> CAList; - for (unsigned i = 0, e = Ctors.size(); i != e; ++i) { - if (Ctors[i]) { - CSVals[1] = Ctors[i]; - } else { - Type *FTy = FunctionType::get(Type::getVoidTy(GCL->getContext()), - false); - PointerType *PFTy = PointerType::getUnqual(FTy); - CSVals[1] = Constant::getNullValue(PFTy); - CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()), - 0x7fffffff); + if (GV->isDiscardableIfUnused()) { + if (const Comdat *C = GV->getComdat()) + if (NotDiscardableComdats.count(C)) + continue; + Changed |= ProcessGlobal(GV, GVI); } - CAList.push_back(ConstantStruct::get(StructTy, CSVals)); - } - - // Create the array initializer. - Constant *CA = ConstantArray::get(ArrayType::get(StructTy, - CAList.size()), CAList); - - // If we didn't change the number of elements, don't create a new GV. - if (CA->getType() == GCL->getInitializer()->getType()) { - GCL->setInitializer(CA); - return GCL; - } - - // Create the new global and insert it next to the existing list. - GlobalVariable *NGV = new GlobalVariable(CA->getType(), GCL->isConstant(), - GCL->getLinkage(), CA, "", - GCL->getThreadLocalMode()); - GCL->getParent()->getGlobalList().insert(GCL, NGV); - NGV->takeName(GCL); - - // Nuke the old list, replacing any uses with the new one. - if (!GCL->use_empty()) { - Constant *V = NGV; - if (V->getType() != GCL->getType()) - V = ConstantExpr::getBitCast(V, GCL->getType()); - GCL->replaceAllUsesWith(V); } - GCL->eraseFromParent(); - - if (Ctors.size()) - return NGV; - else - return 0; + return Changed; } - static inline bool isSimpleEnoughValueToCommit(Constant *C, SmallPtrSet<Constant*, 8> &SimpleConstants, - const DataLayout *TD); + const DataLayout *DL); /// isSimpleEnoughValueToCommit - Return true if the specified constant can be @@ -2082,11 +1989,14 @@ isSimpleEnoughValueToCommit(Constant *C, /// time. static bool isSimpleEnoughValueToCommitHelper(Constant *C, SmallPtrSet<Constant*, 8> &SimpleConstants, - const DataLayout *TD) { - // Simple integer, undef, constant aggregate zero, global addresses, etc are - // all supported. - if (C->getNumOperands() == 0 || isa<BlockAddress>(C) || - isa<GlobalValue>(C)) + const DataLayout *DL) { + // Simple global addresses are supported, do not allow dllimport or + // thread-local globals. + if (auto *GV = dyn_cast<GlobalValue>(C)) + return !GV->hasDLLImportStorageClass() && !GV->isThreadLocal(); + + // Simple integer, undef, constant aggregate zero, etc are all supported. + if (C->getNumOperands() == 0 || isa<BlockAddress>(C)) return true; // Aggregate values are safe if all their elements are. @@ -2094,7 +2004,7 @@ static bool isSimpleEnoughValueToCommitHelper(Constant *C, isa<ConstantVector>(C)) { for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) { Constant *Op = cast<Constant>(C->getOperand(i)); - if (!isSimpleEnoughValueToCommit(Op, SimpleConstants, TD)) + if (!isSimpleEnoughValueToCommit(Op, SimpleConstants, DL)) return false; } return true; @@ -2107,29 +2017,29 @@ static bool isSimpleEnoughValueToCommitHelper(Constant *C, switch (CE->getOpcode()) { case Instruction::BitCast: // Bitcast is fine if the casted value is fine. - return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, TD); + return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL); case Instruction::IntToPtr: case Instruction::PtrToInt: // int <=> ptr is fine if the int type is the same size as the // pointer type. - if (!TD || TD->getTypeSizeInBits(CE->getType()) != - TD->getTypeSizeInBits(CE->getOperand(0)->getType())) + if (!DL || DL->getTypeSizeInBits(CE->getType()) != + DL->getTypeSizeInBits(CE->getOperand(0)->getType())) return false; - return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, TD); + return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL); // GEP is fine if it is simple + constant offset. case Instruction::GetElementPtr: for (unsigned i = 1, e = CE->getNumOperands(); i != e; ++i) if (!isa<ConstantInt>(CE->getOperand(i))) return false; - return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, TD); + return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL); case Instruction::Add: // We allow simple+cst. if (!isa<ConstantInt>(CE->getOperand(1))) return false; - return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, TD); + return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL); } return false; } @@ -2137,11 +2047,11 @@ static bool isSimpleEnoughValueToCommitHelper(Constant *C, static inline bool isSimpleEnoughValueToCommit(Constant *C, SmallPtrSet<Constant*, 8> &SimpleConstants, - const DataLayout *TD) { + const DataLayout *DL) { // If we already checked this constant, we win. if (!SimpleConstants.insert(C)) return true; // Check the constant. - return isSimpleEnoughValueToCommitHelper(C, SimpleConstants, TD); + return isSimpleEnoughValueToCommitHelper(C, SimpleConstants, DL); } @@ -2157,8 +2067,7 @@ static bool isSimpleEnoughPointerToCommit(Constant *C) { return false; if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) - // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or - // external globals. + // Do not allow weak/*_odr/linkonce linkage or external globals. return GV->hasUniqueInitializer(); if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { @@ -2173,7 +2082,7 @@ static bool isSimpleEnoughPointerToCommit(Constant *C) { return false; // The first index must be zero. - ConstantInt *CI = dyn_cast<ConstantInt>(*llvm::next(CE->op_begin())); + ConstantInt *CI = dyn_cast<ConstantInt>(*std::next(CE->op_begin())); if (!CI || !CI->isZero()) return false; // The remaining indices must be compile-time known integers within the @@ -2268,24 +2177,18 @@ namespace { /// Once an evaluation call fails, the evaluation object should not be reused. class Evaluator { public: - Evaluator(const DataLayout *TD, const TargetLibraryInfo *TLI) - : TD(TD), TLI(TLI) { - ValueStack.push_back(new DenseMap<Value*, Constant*>); + Evaluator(const DataLayout *DL, const TargetLibraryInfo *TLI) + : DL(DL), TLI(TLI) { + ValueStack.emplace_back(); } ~Evaluator() { - DeleteContainerPointers(ValueStack); - while (!AllocaTmps.empty()) { - GlobalVariable *Tmp = AllocaTmps.back(); - AllocaTmps.pop_back(); - + for (auto &Tmp : AllocaTmps) // If there are still users of the alloca, the program is doing something // silly, e.g. storing the address of the alloca somewhere and using it // later. Since this is undefined, we'll just make it be null. if (!Tmp->use_empty()) Tmp->replaceAllUsesWith(Constant::getNullValue(Tmp->getType())); - delete Tmp; - } } /// EvaluateFunction - Evaluate a call to function F, returning true if @@ -2301,13 +2204,13 @@ public: Constant *getVal(Value *V) { if (Constant *CV = dyn_cast<Constant>(V)) return CV; - Constant *R = ValueStack.back()->lookup(V); + Constant *R = ValueStack.back().lookup(V); assert(R && "Reference to an uncomputed value!"); return R; } void setVal(Value *V, Constant *C) { - ValueStack.back()->operator[](V) = C; + ValueStack.back()[V] = C; } const DenseMap<Constant*, Constant*> &getMutatedMemory() const { @@ -2322,9 +2225,9 @@ private: Constant *ComputeLoadResult(Constant *P); /// ValueStack - As we compute SSA register values, we store their contents - /// here. The back of the vector contains the current function and the stack + /// here. The back of the deque contains the current function and the stack /// contains the values in the calling frames. - SmallVector<DenseMap<Value*, Constant*>*, 4> ValueStack; + std::deque<DenseMap<Value*, Constant*>> ValueStack; /// CallStack - This is used to detect recursion. In pathological situations /// we could hit exponential behavior, but at least there is nothing @@ -2339,7 +2242,7 @@ private: /// AllocaTmps - To 'execute' an alloca, we create a temporary global variable /// to represent its body. This vector is needed so we can delete the /// temporary globals when we are done. - SmallVector<GlobalVariable*, 32> AllocaTmps; + SmallVector<std::unique_ptr<GlobalVariable>, 32> AllocaTmps; /// Invariants - These global variables have been marked invariant by the /// static constructor. @@ -2349,7 +2252,7 @@ private: /// simple enough to live in a static initializer of a global. SmallPtrSet<Constant*, 8> SimpleConstants; - const DataLayout *TD; + const DataLayout *DL; const TargetLibraryInfo *TLI; }; @@ -2368,7 +2271,7 @@ Constant *Evaluator::ComputeLoadResult(Constant *P) { if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) { if (GV->hasDefinitiveInitializer()) return GV->getInitializer(); - return 0; + return nullptr; } // Handle a constantexpr getelementptr. @@ -2380,7 +2283,7 @@ Constant *Evaluator::ComputeLoadResult(Constant *P) { return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE); } - return 0; // don't know how to evaluate. + return nullptr; // don't know how to evaluate. } /// EvaluateBlock - Evaluate all instructions in block BB, returning true if @@ -2390,7 +2293,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB) { // This is the main evaluation loop. while (1) { - Constant *InstResult = 0; + Constant *InstResult = nullptr; DEBUG(dbgs() << "Evaluating Instruction: " << *CurInst << "\n"); @@ -2402,7 +2305,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, Constant *Ptr = getVal(SI->getOperand(1)); if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) { DEBUG(dbgs() << "Folding constant ptr expression: " << *Ptr); - Ptr = ConstantFoldConstantExpression(CE, TD, TLI); + Ptr = ConstantFoldConstantExpression(CE, DL, TLI); DEBUG(dbgs() << "; To: " << *Ptr << "\n"); } if (!isSimpleEnoughPointerToCommit(Ptr)) { @@ -2415,7 +2318,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, // If this might be too difficult for the backend to handle (e.g. the addr // of one global variable divided by another) then we can't commit it. - if (!isSimpleEnoughValueToCommit(Val, SimpleConstants, TD)) { + if (!isSimpleEnoughValueToCommit(Val, SimpleConstants, DL)) { DEBUG(dbgs() << "Store value is too complex to evaluate store. " << *Val << "\n"); return false; @@ -2447,7 +2350,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, Ptr = ConstantExpr::getGetElementPtr(Ptr, IdxList); if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) - Ptr = ConstantFoldConstantExpression(CE, TD, TLI); + Ptr = ConstantFoldConstantExpression(CE, DL, TLI); // If we can't improve the situation by introspecting NewTy, // we have to give up. @@ -2511,12 +2414,12 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, Constant *Ptr = getVal(LI->getOperand(0)); if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) { - Ptr = ConstantFoldConstantExpression(CE, TD, TLI); + Ptr = ConstantFoldConstantExpression(CE, DL, TLI); DEBUG(dbgs() << "Found a constant pointer expression, constant " "folding: " << *Ptr << "\n"); } InstResult = ComputeLoadResult(Ptr); - if (InstResult == 0) { + if (!InstResult) { DEBUG(dbgs() << "Failed to compute load result. Can not evaluate load." "\n"); return false; // Could not evaluate load. @@ -2529,11 +2432,10 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, return false; // Cannot handle array allocs. } Type *Ty = AI->getType()->getElementType(); - AllocaTmps.push_back(new GlobalVariable(Ty, false, - GlobalValue::InternalLinkage, - UndefValue::get(Ty), - AI->getName())); - InstResult = AllocaTmps.back(); + AllocaTmps.push_back( + make_unique<GlobalVariable>(Ty, false, GlobalValue::InternalLinkage, + UndefValue::get(Ty), AI->getName())); + InstResult = AllocaTmps.back().get(); DEBUG(dbgs() << "Found an alloca. Result: " << *InstResult << "\n"); } else if (isa<CallInst>(CurInst) || isa<InvokeInst>(CurInst)) { CallSite CS(CurInst); @@ -2580,7 +2482,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, // We don't insert an entry into Values, as it doesn't have a // meaningful return value. if (!II->use_empty()) { - DEBUG(dbgs() << "Found unused invariant_start. Cant evaluate.\n"); + DEBUG(dbgs() << "Found unused invariant_start. Can't evaluate.\n"); return false; } ConstantInt *Size = cast<ConstantInt>(II->getArgOperand(0)); @@ -2588,9 +2490,9 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, Value *Ptr = PtrArg->stripPointerCasts(); if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) { Type *ElemTy = cast<PointerType>(GV->getType())->getElementType(); - if (TD && !Size->isAllOnesValue() && + if (DL && !Size->isAllOnesValue() && Size->getValue().getLimitedValue() >= - TD->getTypeStoreSize(ElemTy)) { + DL->getTypeStoreSize(ElemTy)) { Invariants.insert(GV); DEBUG(dbgs() << "Found a global var that is an invariant: " << *GV << "\n"); @@ -2635,17 +2537,17 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, return false; } - Constant *RetVal = 0; + Constant *RetVal = nullptr; // Execute the call, if successful, use the return value. - ValueStack.push_back(new DenseMap<Value*, Constant*>); + ValueStack.emplace_back(); if (!EvaluateFunction(Callee, RetVal, Formals)) { DEBUG(dbgs() << "Failed to evaluate function.\n"); return false; } - delete ValueStack.pop_back_val(); + ValueStack.pop_back(); InstResult = RetVal; - if (InstResult != NULL) { + if (InstResult) { DEBUG(dbgs() << "Successfully evaluated function. Result: " << InstResult << "\n\n"); } else { @@ -2677,7 +2579,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, else return false; // Cannot determine. } else if (isa<ReturnInst>(CurInst)) { - NextBB = 0; + NextBB = nullptr; } else { // invoke, unwind, resume, unreachable. DEBUG(dbgs() << "Can not handle terminator."); @@ -2696,7 +2598,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, if (!CurInst->use_empty()) { if (ConstantExpr *CE = dyn_cast<ConstantExpr>(InstResult)) - InstResult = ConstantFoldConstantExpression(CE, TD, TLI); + InstResult = ConstantFoldConstantExpression(CE, DL, TLI); setVal(CurInst, InstResult); } @@ -2742,13 +2644,13 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal, BasicBlock::iterator CurInst = CurBB->begin(); while (1) { - BasicBlock *NextBB = 0; // Initialized to avoid compiler warnings. + BasicBlock *NextBB = nullptr; // Initialized to avoid compiler warnings. DEBUG(dbgs() << "Trying to evaluate BB: " << *CurBB << "\n"); if (!EvaluateBlock(CurInst, NextBB)) return false; - if (NextBB == 0) { + if (!NextBB) { // Successfully running until there's no next block means that we found // the return. Fill it the return value and pop the call stack. ReturnInst *RI = cast<ReturnInst>(CurBB->getTerminator()); @@ -2767,7 +2669,7 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal, // Okay, we have never been in this block before. Check to see if there // are any PHI nodes. If so, evaluate them with information about where // we came from. - PHINode *PN = 0; + PHINode *PN = nullptr; for (CurInst = NextBB->begin(); (PN = dyn_cast<PHINode>(CurInst)); ++CurInst) setVal(PN, getVal(PN->getIncomingValueForBlock(CurBB))); @@ -2779,15 +2681,17 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal, /// EvaluateStaticConstructor - Evaluate static constructors in the function, if /// we can. Return true if we can, false otherwise. -static bool EvaluateStaticConstructor(Function *F, const DataLayout *TD, +static bool EvaluateStaticConstructor(Function *F, const DataLayout *DL, const TargetLibraryInfo *TLI) { // Call the function. - Evaluator Eval(TD, TLI); + Evaluator Eval(DL, TLI); Constant *RetValDummy; bool EvalSuccess = Eval.EvaluateFunction(F, RetValDummy, SmallVector<Constant*, 0>()); if (EvalSuccess) { + ++NumCtorsEvaluated; + // We succeeded at evaluation: commit the result. DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '" << F->getName() << "' to " << Eval.getMutatedMemory().size() @@ -2805,46 +2709,6 @@ static bool EvaluateStaticConstructor(Function *F, const DataLayout *TD, return EvalSuccess; } -/// OptimizeGlobalCtorsList - Simplify and evaluation global ctors if possible. -/// Return true if anything changed. -bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) { - std::vector<Function*> Ctors = ParseGlobalCtors(GCL); - bool MadeChange = false; - if (Ctors.empty()) return false; - - // Loop over global ctors, optimizing them when we can. - for (unsigned i = 0; i != Ctors.size(); ++i) { - Function *F = Ctors[i]; - // Found a null terminator in the middle of the list, prune off the rest of - // the list. - if (F == 0) { - if (i != Ctors.size()-1) { - Ctors.resize(i+1); - MadeChange = true; - } - break; - } - DEBUG(dbgs() << "Optimizing Global Constructor: " << *F << "\n"); - - // We cannot simplify external ctor functions. - if (F->empty()) continue; - - // If we can evaluate the ctor at compile time, do. - if (EvaluateStaticConstructor(F, TD, TLI)) { - Ctors.erase(Ctors.begin()+i); - MadeChange = true; - --i; - ++NumCtorsEvaluated; - continue; - } - } - - if (!MadeChange) return false; - - GCL = InstallGlobalCtors(GCL, Ctors); - return true; -} - static int compareNames(Constant *const *A, Constant *const *B) { return (*A)->getName().compare((*B)->getName()); } @@ -2856,12 +2720,14 @@ static void setUsedInitializer(GlobalVariable &V, return; } - SmallVector<llvm::Constant *, 8> UsedArray; - PointerType *Int8PtrTy = Type::getInt8PtrTy(V.getContext()); + // Type of pointer to the array of pointers. + PointerType *Int8PtrTy = Type::getInt8PtrTy(V.getContext(), 0); + SmallVector<llvm::Constant *, 8> UsedArray; for (SmallPtrSet<GlobalValue *, 8>::iterator I = Init.begin(), E = Init.end(); I != E; ++I) { - Constant *Cast = llvm::ConstantExpr::getBitCast(*I, Int8PtrTy); + Constant *Cast + = ConstantExpr::getPointerBitCastOrAddrSpaceCast(*I, Int8PtrTy); UsedArray.push_back(Cast); } // Sort to get deterministic order. @@ -2992,14 +2858,19 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) { I != E;) { Module::alias_iterator J = I++; // Aliases without names cannot be referenced outside this module. - if (!J->hasName() && !J->isDeclaration()) + if (!J->hasName() && !J->isDeclaration() && !J->hasLocalLinkage()) J->setLinkage(GlobalValue::InternalLinkage); // If the aliasee may change at link time, nothing can be done - bail out. if (J->mayBeOverridden()) continue; Constant *Aliasee = J->getAliasee(); - GlobalValue *Target = cast<GlobalValue>(Aliasee->stripPointerCasts()); + GlobalValue *Target = dyn_cast<GlobalValue>(Aliasee->stripPointerCasts()); + // We can't trivially replace the alias with the aliasee if the aliasee is + // non-trivial in some way. + // TODO: Try to handle non-zero GEPs of local aliasees. + if (!Target) + continue; Target->removeDeadConstantUsers(); // Make all users of the alias use the aliasee instead. @@ -3007,7 +2878,7 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) { if (!hasUsesToReplace(*J, Used, RenameTarget)) continue; - J->replaceAllUsesWith(Aliasee); + J->replaceAllUsesWith(ConstantExpr::getBitCast(Aliasee, J->getType())); ++NumAliasesResolved; Changed = true; @@ -3015,7 +2886,8 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) { // Give the aliasee the name, linkage and other attributes of the alias. Target->takeName(J); Target->setLinkage(J->getLinkage()); - Target->GlobalValue::copyAttributesFrom(J); + Target->setVisibility(J->getVisibility()); + Target->setDLLStorageClass(J->getDLLStorageClass()); if (Used.usedErase(J)) Used.usedInsert(Target); @@ -3038,12 +2910,12 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) { static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::cxa_atexit)) - return 0; + return nullptr; Function *Fn = M.getFunction(TLI->getName(LibFunc::cxa_atexit)); if (!Fn) - return 0; + return nullptr; FunctionType *FTy = Fn->getFunctionType(); @@ -3054,7 +2926,7 @@ static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) { !FTy->getParamType(0)->isPointerTy() || !FTy->getParamType(1)->isPointerTy() || !FTy->getParamType(2)->isPointerTy()) - return 0; + return nullptr; return Fn; } @@ -3122,8 +2994,8 @@ bool GlobalOpt::OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) { // and remove them. bool Changed = false; - for (Function::use_iterator I = CXAAtExitFn->use_begin(), - E = CXAAtExitFn->use_end(); I != E;) { + for (auto I = CXAAtExitFn->user_begin(), E = CXAAtExitFn->user_end(); + I != E;) { // We're only interested in calls. Theoretically, we could handle invoke // instructions as well, but neither llvm-gcc nor clang generate invokes // to __cxa_atexit. @@ -3155,12 +3027,10 @@ bool GlobalOpt::OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) { bool GlobalOpt::runOnModule(Module &M) { bool Changed = false; - TD = getAnalysisIfAvailable<DataLayout>(); + DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); + DL = DLP ? &DLP->getDataLayout() : nullptr; TLI = &getAnalysis<TargetLibraryInfo>(); - // Try to find the llvm.globalctors list. - GlobalVariable *GlobalCtors = FindGlobalCtors(M); - bool LocalChange = true; while (LocalChange) { LocalChange = false; @@ -3169,8 +3039,9 @@ bool GlobalOpt::runOnModule(Module &M) { LocalChange |= OptimizeFunctions(M); // Optimize global_ctors list. - if (GlobalCtors) - LocalChange |= OptimizeGlobalCtorsList(GlobalCtors); + LocalChange |= optimizeGlobalCtorsList(M, [&](Function *F) { + return EvaluateStaticConstructor(F, DL, TLI); + }); // Optimize non-address-taken globals. LocalChange |= OptimizeGlobalVars(M); diff --git a/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp b/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp index 4ac1dfc..af541d1 100644 --- a/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp +++ b/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp @@ -15,18 +15,19 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "ipconstprop" #include "llvm/Transforms/IPO.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" -#include "llvm/Support/CallSite.h" using namespace llvm; +#define DEBUG_TYPE "ipconstprop" + STATISTIC(NumArgumentsProped, "Number of args turned into constants"); STATISTIC(NumReturnValProped, "Number of return values turned into constants"); @@ -39,7 +40,7 @@ namespace { initializeIPCPPass(*PassRegistry::getPassRegistry()); } - bool runOnModule(Module &M); + bool runOnModule(Module &M) override; private: bool PropagateConstantsIntoArguments(Function &F); bool PropagateConstantReturn(Function &F); @@ -86,18 +87,18 @@ bool IPCP::PropagateConstantsIntoArguments(Function &F) { ArgumentConstants.resize(F.arg_size()); unsigned NumNonconstant = 0; - for (Value::use_iterator UI = F.use_begin(), E = F.use_end(); UI != E; ++UI) { - User *U = *UI; + for (Use &U : F.uses()) { + User *UR = U.getUser(); // Ignore blockaddress uses. - if (isa<BlockAddress>(U)) continue; + if (isa<BlockAddress>(UR)) continue; // Used by a non-instruction, or not the callee of a function, do not // transform. - if (!isa<CallInst>(U) && !isa<InvokeInst>(U)) + if (!isa<CallInst>(UR) && !isa<InvokeInst>(UR)) return false; - CallSite CS(cast<Instruction>(U)); - if (!CS.isCallee(UI)) + CallSite CS(cast<Instruction>(UR)); + if (!CS.isCallee(&U)) return false; // Check out all of the potentially constant arguments. Note that we don't @@ -112,7 +113,7 @@ bool IPCP::PropagateConstantsIntoArguments(Function &F) { continue; Constant *C = dyn_cast<Constant>(*AI); - if (C && ArgumentConstants[i].first == 0) { + if (C && ArgumentConstants[i].first == nullptr) { ArgumentConstants[i].first = C; // First constant seen. } else if (C && ArgumentConstants[i].first == C) { // Still the constant value we think it is. @@ -135,11 +136,11 @@ bool IPCP::PropagateConstantsIntoArguments(Function &F) { for (unsigned i = 0, e = ArgumentConstants.size(); i != e; ++i, ++AI) { // Do we have a constant argument? if (ArgumentConstants[i].second || AI->use_empty() || - (AI->hasByValAttr() && !F.onlyReadsMemory())) + AI->hasInAllocaAttr() || (AI->hasByValAttr() && !F.onlyReadsMemory())) continue; Value *V = ArgumentConstants[i].first; - if (V == 0) V = UndefValue::get(AI->getType()); + if (!V) V = UndefValue::get(AI->getType()); AI->replaceAllUsesWith(V); ++NumArgumentsProped; MadeChange = true; @@ -209,8 +210,8 @@ bool IPCP::PropagateConstantReturn(Function &F) { } // Different or no known return value? Don't propagate this return // value. - RetVals[i] = 0; - // All values non constant? Stop looking. + RetVals[i] = nullptr; + // All values non-constant? Stop looking. if (++NumNonConstant == RetVals.size()) return false; } @@ -220,13 +221,13 @@ bool IPCP::PropagateConstantReturn(Function &F) { // over all users, replacing any uses of the return value with the returned // constant. bool MadeChange = false; - for (Value::use_iterator UI = F.use_begin(), E = F.use_end(); UI != E; ++UI) { - CallSite CS(*UI); + for (Use &U : F.uses()) { + CallSite CS(U.getUser()); Instruction* Call = CS.getInstruction(); // Not a call instruction or a call instruction that's not calling F // directly? - if (!Call || !CS.isCallee(UI)) + if (!Call || !CS.isCallee(&U)) continue; // Call result not used? @@ -235,7 +236,7 @@ bool IPCP::PropagateConstantReturn(Function &F) { MadeChange = true; - if (STy == 0) { + if (!STy) { Value* New = RetVals[0]; if (Argument *A = dyn_cast<Argument>(New)) // Was an argument returned? Then find the corresponding argument in @@ -244,9 +245,8 @@ bool IPCP::PropagateConstantReturn(Function &F) { Call->replaceAllUsesWith(New); continue; } - - for (Value::use_iterator I = Call->use_begin(), E = Call->use_end(); - I != E;) { + + for (auto I = Call->user_begin(), E = Call->user_end(); I != E;) { Instruction *Ins = cast<Instruction>(*I); // Increment now, so we can remove the use diff --git a/contrib/llvm/lib/Transforms/IPO/IPO.cpp b/contrib/llvm/lib/Transforms/IPO/IPO.cpp index 5d563d8..b4d31d8 100644 --- a/contrib/llvm/lib/Transforms/IPO/IPO.cpp +++ b/contrib/llvm/lib/Transforms/IPO/IPO.cpp @@ -44,6 +44,7 @@ void llvm::initializeIPO(PassRegistry &Registry) { initializeStripDebugDeclarePass(Registry); initializeStripDeadDebugInfoPass(Registry); initializeStripNonDebugSymbolsPass(Registry); + initializeBarrierNoopPass(Registry); } void LLVMInitializeIPO(LLVMPassRegistryRef R) { diff --git a/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp b/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp index 437597e..624cb90 100644 --- a/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp +++ b/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp @@ -12,22 +12,23 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "inline" #include "llvm/Transforms/IPO.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineCost.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" -#include "llvm/Support/CallSite.h" #include "llvm/Transforms/IPO/InlinerPass.h" using namespace llvm; +#define DEBUG_TYPE "inline" + namespace { /// \brief Inliner pass which only handles "always inline" functions. @@ -36,24 +37,25 @@ class AlwaysInliner : public Inliner { public: // Use extremely low threshold. - AlwaysInliner() : Inliner(ID, -2000000000, /*InsertLifetime*/ true), ICA(0) { + AlwaysInliner() : Inliner(ID, -2000000000, /*InsertLifetime*/ true), + ICA(nullptr) { initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry()); } AlwaysInliner(bool InsertLifetime) - : Inliner(ID, -2000000000, InsertLifetime), ICA(0) { + : Inliner(ID, -2000000000, InsertLifetime), ICA(nullptr) { initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry()); } static char ID; // Pass identification, replacement for typeid - virtual InlineCost getInlineCost(CallSite CS); + InlineCost getInlineCost(CallSite CS) override; - virtual void getAnalysisUsage(AnalysisUsage &AU) const; - virtual bool runOnSCC(CallGraphSCC &SCC); + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnSCC(CallGraphSCC &SCC) override; using llvm::Pass::doFinalization; - virtual bool doFinalization(CallGraph &CG) { + bool doFinalization(CallGraph &CG) override { return removeDeadFunctions(CG, /*AlwaysInlineOnly=*/ true); } }; @@ -63,7 +65,7 @@ public: char AlwaysInliner::ID = 0; INITIALIZE_PASS_BEGIN(AlwaysInliner, "always-inline", "Inliner for always_inline functions", false, false) -INITIALIZE_PASS_DEPENDENCY(CallGraph) +INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) INITIALIZE_PASS_DEPENDENCY(InlineCostAnalysis) INITIALIZE_PASS_END(AlwaysInliner, "always-inline", "Inliner for always_inline functions", false, false) @@ -93,8 +95,7 @@ InlineCost AlwaysInliner::getInlineCost(CallSite CS) { // that are viable for inlining. FIXME: We shouldn't even get here for // declarations. if (Callee && !Callee->isDeclaration() && - Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::AlwaysInline) && + CS.hasFnAttr(Attribute::AlwaysInline) && ICA->isInlineViable(*Callee)) return InlineCost::getAlways(); diff --git a/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp b/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp index 57379a3..d189756 100644 --- a/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp +++ b/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp @@ -11,21 +11,22 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "inline" #include "llvm/Transforms/IPO.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineCost.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" -#include "llvm/Support/CallSite.h" #include "llvm/Transforms/IPO/InlinerPass.h" using namespace llvm; +#define DEBUG_TYPE "inline" + namespace { /// \brief Actual inliner pass implementation. @@ -37,31 +38,42 @@ class SimpleInliner : public Inliner { InlineCostAnalysis *ICA; public: - SimpleInliner() : Inliner(ID), ICA(0) { + SimpleInliner() : Inliner(ID), ICA(nullptr) { initializeSimpleInlinerPass(*PassRegistry::getPassRegistry()); } SimpleInliner(int Threshold) - : Inliner(ID, Threshold, /*InsertLifetime*/ true), ICA(0) { + : Inliner(ID, Threshold, /*InsertLifetime*/ true), ICA(nullptr) { initializeSimpleInlinerPass(*PassRegistry::getPassRegistry()); } static char ID; // Pass identification, replacement for typeid - InlineCost getInlineCost(CallSite CS) { + InlineCost getInlineCost(CallSite CS) override { return ICA->getInlineCost(CS, getInlineThreshold(CS)); } - virtual bool runOnSCC(CallGraphSCC &SCC); - virtual void getAnalysisUsage(AnalysisUsage &AU) const; + bool runOnSCC(CallGraphSCC &SCC) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; }; +static int computeThresholdFromOptLevels(unsigned OptLevel, + unsigned SizeOptLevel) { + if (OptLevel > 2) + return 275; + if (SizeOptLevel == 1) // -Os + return 75; + if (SizeOptLevel == 2) // -Oz + return 25; + return 225; +} + } // end anonymous namespace char SimpleInliner::ID = 0; INITIALIZE_PASS_BEGIN(SimpleInliner, "inline", "Function Integration/Inlining", false, false) -INITIALIZE_PASS_DEPENDENCY(CallGraph) +INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) INITIALIZE_PASS_DEPENDENCY(InlineCostAnalysis) INITIALIZE_PASS_END(SimpleInliner, "inline", "Function Integration/Inlining", false, false) @@ -72,6 +84,12 @@ Pass *llvm::createFunctionInliningPass(int Threshold) { return new SimpleInliner(Threshold); } +Pass *llvm::createFunctionInliningPass(unsigned OptLevel, + unsigned SizeOptLevel) { + return new SimpleInliner( + computeThresholdFromOptLevels(OptLevel, SizeOptLevel)); +} + bool SimpleInliner::runOnSCC(CallGraphSCC &SCC) { ICA = &getAnalysis<InlineCostAnalysis>(); return Inliner::runOnSCC(SCC); diff --git a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp index d75d6ca..9087ab2 100644 --- a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp +++ b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp @@ -13,17 +13,17 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "inline" #include "llvm/Transforms/IPO/InlinerPass.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineCost.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" -#include "llvm/Support/CallSite.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -32,6 +32,8 @@ #include "llvm/Transforms/Utils/Local.h" using namespace llvm; +#define DEBUG_TYPE "inline" + STATISTIC(NumInlined, "Number of functions inlined"); STATISTIC(NumCallsDeleted, "Number of call sites deleted, not inlined"); STATISTIC(NumDeleted, "Number of functions deleted because all callers found"); @@ -50,6 +52,13 @@ static cl::opt<int> HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325), cl::desc("Threshold for inlining functions with inline hint")); +// We instroduce this threshold to help performance of instrumentation based +// PGO before we actually hook up inliner with analysis passes such as BPI and +// BFI. +static cl::opt<int> +ColdThreshold("inlinecold-threshold", cl::Hidden, cl::init(225), + cl::desc("Threshold for inlining functions with cold attribute")); + // Threshold to use when optsize is specified (and there is no -inline-limit). const int OptSizeThreshold = 75; @@ -117,7 +126,7 @@ static void AdjustCallerSSPLevel(Function *Caller, Function *Callee) { static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory, bool InsertLifetime, - const DataLayout *TD) { + const DataLayout *DL) { Function *Callee = CS.getCalledFunction(); Function *Caller = CS.getCaller(); @@ -176,7 +185,7 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, // canonicalized to be an allocation *of* an array), or allocations whose // type is not itself an array (because we're afraid of pessimizing SRoA). ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType()); - if (ATy == 0 || AI->isArrayAllocation()) + if (!ATy || AI->isArrayAllocation()) continue; // Get the list of all available allocas for this array type. @@ -196,7 +205,7 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, // If we don't have data layout information, and only one alloca is using // the target default, then we can't safely merge them because we can't // pick the greater alignment. - if (!TD && (!Align1 || !Align2) && Align1 != Align2) + if (!DL && (!Align1 || !Align2) && Align1 != Align2) continue; // The available alloca has to be in the right function, not in some other @@ -218,8 +227,8 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, if (Align1 != Align2) { if (!Align1 || !Align2) { - assert(TD && "DataLayout required to compare default alignments"); - unsigned TypeAlign = TD->getABITypeAlignment(AI->getAllocatedType()); + assert(DL && "DataLayout required to compare default alignments"); + unsigned TypeAlign = DL->getABITypeAlignment(AI->getAllocatedType()); Align1 = Align1 ? Align1 : TypeAlign; Align2 = Align2 ? Align2 : TypeAlign; @@ -232,7 +241,7 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, AI->eraseFromParent(); MergedAwayAlloca = true; ++NumMergedAllocas; - IFI.StaticAllocas[AllocaNo] = 0; + IFI.StaticAllocas[AllocaNo] = nullptr; break; } @@ -277,9 +286,28 @@ unsigned Inliner::getInlineThreshold(CallSite CS) const { Attribute::MinSize)) thres = HintThreshold; + // Listen to the cold attribute when it would decrease the threshold. + bool ColdCallee = Callee && !Callee->isDeclaration() && + Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::Cold); + // Command line argument for InlineLimit will override the default + // ColdThreshold. If we have -inline-threshold but no -inlinecold-threshold, + // do not use the default cold threshold even if it is smaller. + if ((InlineLimit.getNumOccurrences() == 0 || + ColdThreshold.getNumOccurrences() > 0) && ColdCallee && + ColdThreshold < thres) + thres = ColdThreshold; + return thres; } +static void emitAnalysis(CallSite CS, const Twine &Msg) { + Function *Caller = CS.getCaller(); + LLVMContext &Ctx = Caller->getContext(); + DebugLoc DLoc = CS.getInstruction()->getDebugLoc(); + emitOptimizationRemarkAnalysis(Ctx, DEBUG_TYPE, *Caller, DLoc, Msg); +} + /// shouldInline - Return true if the inliner should attempt to inline /// at the given CallSite. bool Inliner::shouldInline(CallSite CS) { @@ -288,12 +316,16 @@ bool Inliner::shouldInline(CallSite CS) { if (IC.isAlways()) { DEBUG(dbgs() << " Inlining: cost=always" << ", Call: " << *CS.getInstruction() << "\n"); + emitAnalysis(CS, Twine(CS.getCalledFunction()->getName()) + + " should always be inlined (cost=always)"); return true; } if (IC.isNever()) { DEBUG(dbgs() << " NOT Inlining: cost=never" << ", Call: " << *CS.getInstruction() << "\n"); + emitAnalysis(CS, Twine(CS.getCalledFunction()->getName() + + " should never be inlined (cost=never)")); return false; } @@ -302,6 +334,10 @@ bool Inliner::shouldInline(CallSite CS) { DEBUG(dbgs() << " NOT Inlining: cost=" << IC.getCost() << ", thres=" << (IC.getCostDelta() + IC.getCost()) << ", Call: " << *CS.getInstruction() << "\n"); + emitAnalysis(CS, Twine(CS.getCalledFunction()->getName() + + " too costly to inline (cost=") + + Twine(IC.getCost()) + ", threshold=" + + Twine(IC.getCostDelta() + IC.getCost()) + ")"); return false; } @@ -330,9 +366,8 @@ bool Inliner::shouldInline(CallSite CS) { bool callerWillBeRemoved = Caller->hasLocalLinkage(); // This bool tracks what happens if we DO inline C into B. bool inliningPreventsSomeOuterInline = false; - for (Value::use_iterator I = Caller->use_begin(), E =Caller->use_end(); - I != E; ++I) { - CallSite CS2(*I); + for (User *U : Caller->users()) { + CallSite CS2(U); // If this isn't a call to Caller (it could be some other sort // of reference) skip it. Such references will prevent the caller @@ -363,13 +398,18 @@ bool Inliner::shouldInline(CallSite CS) { // one is set very low by getInlineCost, in anticipation that Caller will // be removed entirely. We did not account for this above unless there // is only one caller of Caller. - if (callerWillBeRemoved && Caller->use_begin() != Caller->use_end()) + if (callerWillBeRemoved && !Caller->use_empty()) TotalSecondaryCost += InlineConstants::LastCallToStaticBonus; if (inliningPreventsSomeOuterInline && TotalSecondaryCost < IC.getCost()) { DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction() << " Cost = " << IC.getCost() << ", outer Cost = " << TotalSecondaryCost << '\n'); + emitAnalysis( + CS, Twine("Not inlining. Cost of inlining " + + CS.getCalledFunction()->getName() + + " increases the cost of inlining " + + CS.getCaller()->getName() + " in other contexts")); return false; } } @@ -377,6 +417,10 @@ bool Inliner::shouldInline(CallSite CS) { DEBUG(dbgs() << " Inlining: cost=" << IC.getCost() << ", thres=" << (IC.getCostDelta() + IC.getCost()) << ", Call: " << *CS.getInstruction() << '\n'); + emitAnalysis( + CS, CS.getCalledFunction()->getName() + Twine(" can be inlined into ") + + CS.getCaller()->getName() + " with cost=" + Twine(IC.getCost()) + + " (threshold=" + Twine(IC.getCostDelta() + IC.getCost()) + ")"); return true; } @@ -395,8 +439,9 @@ static bool InlineHistoryIncludes(Function *F, int InlineHistoryID, } bool Inliner::runOnSCC(CallGraphSCC &SCC) { - CallGraph &CG = getAnalysis<CallGraph>(); - const DataLayout *TD = getAnalysisIfAvailable<DataLayout>(); + CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); + DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); + const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr; const TargetLibraryInfo *TLI = getAnalysisIfAvailable<TargetLibraryInfo>(); SmallPtrSet<Function*, 8> SCCFunctions; @@ -456,7 +501,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { InlinedArrayAllocasTy InlinedArrayAllocas; - InlineFunctionInfo InlineInfo(&CG, TD); + InlineFunctionInfo InlineInfo(&CG, DL); // Now that we have all of the call sites, loop over them and inline them if // it looks profitable to do so. @@ -485,7 +530,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { ++NumCallsDeleted; } else { // We can only inline direct calls to non-declarations. - if (Callee == 0 || Callee->isDeclaration()) continue; + if (!Callee || Callee->isDeclaration()) continue; // If this call site was obtained by inlining another function, verify // that the include path for the function did not include the callee @@ -497,18 +542,37 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)) continue; - + LLVMContext &CallerCtx = Caller->getContext(); + + // Get DebugLoc to report. CS will be invalid after Inliner. + DebugLoc DLoc = CS.getInstruction()->getDebugLoc(); + // If the policy determines that we should inline this function, // try to do so. - if (!shouldInline(CS)) + if (!shouldInline(CS)) { + emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc, + Twine(Callee->getName() + + " will not be inlined into " + + Caller->getName())); continue; + } // Attempt to inline the function. if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas, - InlineHistoryID, InsertLifetime, TD)) + InlineHistoryID, InsertLifetime, DL)) { + emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc, + Twine(Callee->getName() + + " will not be inlined into " + + Caller->getName())); continue; + } ++NumInlined; - + + // Report the inline decision. + emitOptimizationRemark( + CallerCtx, DEBUG_TYPE, *Caller, DLoc, + Twine(Callee->getName() + " inlined into " + Caller->getName())); + // If inlining this function gave us any new call sites, throw them // onto our worklist to process. They are useful inline candidates. if (!InlineInfo.InlinedCalls.empty()) { diff --git a/contrib/llvm/lib/Transforms/IPO/Internalize.cpp b/contrib/llvm/lib/Transforms/IPO/Internalize.cpp index 64e2ced..c970a1a 100644 --- a/contrib/llvm/lib/Transforms/IPO/Internalize.cpp +++ b/contrib/llvm/lib/Transforms/IPO/Internalize.cpp @@ -19,7 +19,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "internalize" #include "llvm/Transforms/IPO.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" @@ -35,6 +34,8 @@ #include <set> using namespace llvm; +#define DEBUG_TYPE "internalize" + STATISTIC(NumAliases , "Number of aliases internalized"); STATISTIC(NumFunctions, "Number of functions internalized"); STATISTIC(NumGlobals , "Number of global vars internalized"); @@ -59,11 +60,11 @@ namespace { explicit InternalizePass(); explicit InternalizePass(ArrayRef<const char *> ExportList); void LoadFile(const char *Filename); - virtual bool runOnModule(Module &M); + bool runOnModule(Module &M) override; - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); - AU.addPreserved<CallGraph>(); + AU.addPreserved<CallGraphWrapperPass>(); } }; } // end anonymous namespace @@ -72,8 +73,7 @@ char InternalizePass::ID = 0; INITIALIZE_PASS(InternalizePass, "internalize", "Internalize Global Symbols", false, false) -InternalizePass::InternalizePass() - : ModulePass(ID) { +InternalizePass::InternalizePass() : ModulePass(ID) { initializeInternalizePassPass(*PassRegistry::getPassRegistry()); if (!APIFile.empty()) // If a filename is specified, use it. LoadFile(APIFile.c_str()); @@ -81,7 +81,7 @@ InternalizePass::InternalizePass() } InternalizePass::InternalizePass(ArrayRef<const char *> ExportList) - : ModulePass(ID){ + : ModulePass(ID) { initializeInternalizePassPass(*PassRegistry::getPassRegistry()); for(ArrayRef<const char *>::const_iterator itr = ExportList.begin(); itr != ExportList.end(); itr++) { @@ -115,6 +115,10 @@ static bool shouldInternalize(const GlobalValue &GV, if (GV.hasAvailableExternallyLinkage()) return false; + // Assume that dllexported symbols are referenced elsewhere + if (GV.hasDLLExportStorageClass()) + return false; + // Already has internal linkage if (GV.hasLocalLinkage()) return false; @@ -127,8 +131,9 @@ static bool shouldInternalize(const GlobalValue &GV, } bool InternalizePass::runOnModule(Module &M) { - CallGraph *CG = getAnalysisIfAvailable<CallGraph>(); - CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : 0; + CallGraphWrapperPass *CGPass = getAnalysisIfAvailable<CallGraphWrapperPass>(); + CallGraph *CG = CGPass ? &CGPass->getCallGraph() : nullptr; + CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : nullptr; bool Changed = false; SmallPtrSet<GlobalValue *, 8> Used; @@ -150,11 +155,11 @@ bool InternalizePass::runOnModule(Module &M) { } // Mark all functions not in the api as internal. - // FIXME: maybe use private linkage? for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { if (!shouldInternalize(*I, ExternalNames)) continue; + I->setVisibility(GlobalValue::DefaultVisibility); I->setLinkage(GlobalValue::InternalLinkage); if (ExternalNode) @@ -186,12 +191,12 @@ bool InternalizePass::runOnModule(Module &M) { // Mark all global variables with initializers that are not in the api as // internal as well. - // FIXME: maybe use private linkage? for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) { if (!shouldInternalize(*I, ExternalNames)) continue; + I->setVisibility(GlobalValue::DefaultVisibility); I->setLinkage(GlobalValue::InternalLinkage); Changed = true; ++NumGlobals; @@ -204,6 +209,7 @@ bool InternalizePass::runOnModule(Module &M) { if (!shouldInternalize(*I, ExternalNames)) continue; + I->setVisibility(GlobalValue::DefaultVisibility); I->setLinkage(GlobalValue::InternalLinkage); Changed = true; ++NumAliases; @@ -213,9 +219,7 @@ bool InternalizePass::runOnModule(Module &M) { return Changed; } -ModulePass *llvm::createInternalizePass() { - return new InternalizePass(); -} +ModulePass *llvm::createInternalizePass() { return new InternalizePass(); } ModulePass *llvm::createInternalizePass(ArrayRef<const char *> ExportList) { return new InternalizePass(ExportList); diff --git a/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp index 8282a8e..20414aa 100644 --- a/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp +++ b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp @@ -14,11 +14,10 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "loop-extract" #include "llvm/Transforms/IPO.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopPass.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" @@ -30,6 +29,8 @@ #include <set> using namespace llvm; +#define DEBUG_TYPE "loop-extract" + STATISTIC(NumExtracted, "Number of loops extracted"); namespace { @@ -42,12 +43,12 @@ namespace { initializeLoopExtractorPass(*PassRegistry::getPassRegistry()); } - virtual bool runOnLoop(Loop *L, LPPassManager &LPM); + bool runOnLoop(Loop *L, LPPassManager &LPM) override; - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequiredID(BreakCriticalEdgesID); AU.addRequiredID(LoopSimplifyID); - AU.addRequired<DominatorTree>(); + AU.addRequired<DominatorTreeWrapperPass>(); } }; } @@ -57,7 +58,7 @@ INITIALIZE_PASS_BEGIN(LoopExtractor, "loop-extract", "Extract loops into new functions", false, false) INITIALIZE_PASS_DEPENDENCY(BreakCriticalEdges) INITIALIZE_PASS_DEPENDENCY(LoopSimplify) -INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_END(LoopExtractor, "loop-extract", "Extract loops into new functions", false, false) @@ -79,6 +80,9 @@ INITIALIZE_PASS(SingleLoopExtractor, "loop-extract-single", Pass *llvm::createLoopExtractorPass() { return new LoopExtractor(); } bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) { + if (skipOptnoneFunction(L)) + return false; + // Only visit top-level loops. if (L->getParentLoop()) return false; @@ -87,7 +91,7 @@ bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) { if (!L->isLoopSimplifyForm()) return false; - DominatorTree &DT = getAnalysis<DominatorTree>(); + DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); bool Changed = false; // If there is more than one top-level loop in this function, extract all of @@ -133,7 +137,7 @@ bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) { if (NumLoops == 0) return Changed; --NumLoops; CodeExtractor Extractor(DT, *L); - if (Extractor.extractCodeRegion() != 0) { + if (Extractor.extractCodeRegion() != nullptr) { Changed = true; // After extraction, the loop is replaced by a function call, so // we shouldn't try to run any more loop passes on it. @@ -177,7 +181,7 @@ namespace { LoadFile(BlockFile.c_str()); } - bool runOnModule(Module &M); + bool runOnModule(Module &M) override; }; } @@ -238,7 +242,7 @@ void BlockExtractorPass::SplitLandingPadPreds(Function *F) { if (!Split) continue; SmallVector<BasicBlock*, 2> NewBBs; - SplitLandingPadPredecessors(LPad, Parent, ".1", ".2", 0, NewBBs); + SplitLandingPadPredecessors(LPad, Parent, ".1", ".2", nullptr, NewBBs); } } diff --git a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp index 3861421..2fb0ddb 100644 --- a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp +++ b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp @@ -9,13 +9,24 @@ // // This pass looks for equivalent functions that are mergable and folds them. // -// A hash is computed from the function, based on its type and number of -// basic blocks. +// Order relation is defined on set of functions. It was made through +// special function comparison procedure that returns +// 0 when functions are equal, +// -1 when Left function is less than right function, and +// 1 for opposite case. We need total-ordering, so we need to maintain +// four properties on the functions set: +// a <= a (reflexivity) +// if a <= b and b <= a then a = b (antisymmetry) +// if a <= b and b <= c then a <= c (transitivity). +// for all a and b: a <= b or b <= a (totality). // -// Once all hashes are computed, we perform an expensive equality comparison -// on each function pair. This takes n^2/2 comparisons per bucket, so it's -// important that the hash function be high quality. The equality comparison -// iterates through each instruction in each basic block. +// Comparison iterates through each instruction in each basic block. +// Functions are kept on binary tree. For each new function F we perform +// lookup in binary tree. +// In practice it works the following way: +// -- We define Function* container class with custom "operator<" (FunctionPtr). +// -- "FunctionPtr" instances are stored in std::set collection, so every +// std::set::insert operation will give you result in log(N) time. // // When a match is found the functions are folded. If both functions are // overridable, we move the functionality into a new internal function and @@ -31,9 +42,6 @@ // the object they belong to. However, as long as it's only used for a lookup // and call, this is irrelevant, and we'd like to fold such functions. // -// * switch from n^2 pair-wise comparisons to an n-way comparison for each -// bucket. -// // * be smarter about bitcasts. // // In order to fold functions, we will sometimes add either bitcast instructions @@ -41,15 +49,45 @@ // analysis since the two functions differ where one has a bitcast and the // other doesn't. We should learn to look through bitcasts. // +// * Compare complex types with pointer types inside. +// * Compare cross-reference cases. +// * Compare complex expressions. +// +// All the three issues above could be described as ability to prove that +// fA == fB == fC == fE == fF == fG in example below: +// +// void fA() { +// fB(); +// } +// void fB() { +// fA(); +// } +// +// void fE() { +// fF(); +// } +// void fF() { +// fG(); +// } +// void fG() { +// fE(); +// } +// +// Simplest cross-reference case (fA <--> fB) was implemented in previous +// versions of MergeFunctions, though it presented only in two function pairs +// in test-suite (that counts >50k functions) +// Though possibility to detect complex cross-referencing (e.g.: A->B->C->D->A) +// could cover much more cases. +// //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "mergefunc" #include "llvm/Transforms/IPO.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/IRBuilder.h" @@ -58,103 +96,28 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/ValueHandle.h" #include "llvm/Pass.h" -#include "llvm/Support/CallSite.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" #include <vector> using namespace llvm; +#define DEBUG_TYPE "mergefunc" + STATISTIC(NumFunctionsMerged, "Number of functions merged"); STATISTIC(NumThunksWritten, "Number of thunks generated"); STATISTIC(NumAliasesWritten, "Number of aliases generated"); STATISTIC(NumDoubleWeak, "Number of new functions created"); -/// Returns the type id for a type to be hashed. We turn pointer types into -/// integers here because the actual compare logic below considers pointers and -/// integers of the same size as equal. -static Type::TypeID getTypeIDForHash(Type *Ty) { - if (Ty->isPointerTy()) - return Type::IntegerTyID; - return Ty->getTypeID(); -} - -/// Creates a hash-code for the function which is the same for any two -/// functions that will compare equal, without looking at the instructions -/// inside the function. -static unsigned profileFunction(const Function *F) { - FunctionType *FTy = F->getFunctionType(); - - FoldingSetNodeID ID; - ID.AddInteger(F->size()); - ID.AddInteger(F->getCallingConv()); - ID.AddBoolean(F->hasGC()); - ID.AddBoolean(FTy->isVarArg()); - ID.AddInteger(getTypeIDForHash(FTy->getReturnType())); - for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) - ID.AddInteger(getTypeIDForHash(FTy->getParamType(i))); - return ID.ComputeHash(); -} - -namespace { - -/// ComparableFunction - A struct that pairs together functions with a -/// DataLayout so that we can keep them together as elements in the DenseSet. -class ComparableFunction { -public: - static const ComparableFunction EmptyKey; - static const ComparableFunction TombstoneKey; - static DataLayout * const LookupOnly; - - ComparableFunction(Function *Func, DataLayout *TD) - : Func(Func), Hash(profileFunction(Func)), TD(TD) {} - - Function *getFunc() const { return Func; } - unsigned getHash() const { return Hash; } - DataLayout *getTD() const { return TD; } - - // Drops AssertingVH reference to the function. Outside of debug mode, this - // does nothing. - void release() { - assert(Func && - "Attempted to release function twice, or release empty/tombstone!"); - Func = NULL; - } - -private: - explicit ComparableFunction(unsigned Hash) - : Func(NULL), Hash(Hash), TD(NULL) {} - - AssertingVH<Function> Func; - unsigned Hash; - DataLayout *TD; -}; - -const ComparableFunction ComparableFunction::EmptyKey = ComparableFunction(0); -const ComparableFunction ComparableFunction::TombstoneKey = - ComparableFunction(1); -DataLayout *const ComparableFunction::LookupOnly = (DataLayout*)(-1); - -} - -namespace llvm { - template <> - struct DenseMapInfo<ComparableFunction> { - static ComparableFunction getEmptyKey() { - return ComparableFunction::EmptyKey; - } - static ComparableFunction getTombstoneKey() { - return ComparableFunction::TombstoneKey; - } - static unsigned getHashValue(const ComparableFunction &CF) { - return CF.getHash(); - } - static bool isEqual(const ComparableFunction &LHS, - const ComparableFunction &RHS); - }; -} +static cl::opt<unsigned> NumFunctionsForSanityCheck( + "mergefunc-sanity", + cl::desc("How many functions in module could be used for " + "MergeFunctions pass sanity check. " + "'0' disables this check. Works only with '-debug' key."), + cl::init(0), cl::Hidden); namespace { @@ -164,75 +127,518 @@ namespace { /// side of claiming that two functions are different). class FunctionComparator { public: - FunctionComparator(const DataLayout *TD, const Function *F1, + FunctionComparator(const DataLayout *DL, const Function *F1, const Function *F2) - : F1(F1), F2(F2), TD(TD) {} + : FnL(F1), FnR(F2), DL(DL) {} /// Test whether the two functions have equivalent behaviour. - bool compare(); + int compare(); private: /// Test whether two basic blocks have equivalent behaviour. - bool compare(const BasicBlock *BB1, const BasicBlock *BB2); + int compare(const BasicBlock *BBL, const BasicBlock *BBR); + + /// Constants comparison. + /// Its analog to lexicographical comparison between hypothetical numbers + /// of next format: + /// <bitcastability-trait><raw-bit-contents> + /// + /// 1. Bitcastability. + /// Check whether L's type could be losslessly bitcasted to R's type. + /// On this stage method, in case when lossless bitcast is not possible + /// method returns -1 or 1, thus also defining which type is greater in + /// context of bitcastability. + /// Stage 0: If types are equal in terms of cmpTypes, then we can go straight + /// to the contents comparison. + /// If types differ, remember types comparison result and check + /// whether we still can bitcast types. + /// Stage 1: Types that satisfies isFirstClassType conditions are always + /// greater then others. + /// Stage 2: Vector is greater then non-vector. + /// If both types are vectors, then vector with greater bitwidth is + /// greater. + /// If both types are vectors with the same bitwidth, then types + /// are bitcastable, and we can skip other stages, and go to contents + /// comparison. + /// Stage 3: Pointer types are greater than non-pointers. If both types are + /// pointers of the same address space - go to contents comparison. + /// Different address spaces: pointer with greater address space is + /// greater. + /// Stage 4: Types are neither vectors, nor pointers. And they differ. + /// We don't know how to bitcast them. So, we better don't do it, + /// and return types comparison result (so it determines the + /// relationship among constants we don't know how to bitcast). + /// + /// Just for clearance, let's see how the set of constants could look + /// on single dimension axis: + /// + /// [NFCT], [FCT, "others"], [FCT, pointers], [FCT, vectors] + /// Where: NFCT - Not a FirstClassType + /// FCT - FirstClassTyp: + /// + /// 2. Compare raw contents. + /// It ignores types on this stage and only compares bits from L and R. + /// Returns 0, if L and R has equivalent contents. + /// -1 or 1 if values are different. + /// Pretty trivial: + /// 2.1. If contents are numbers, compare numbers. + /// Ints with greater bitwidth are greater. Ints with same bitwidths + /// compared by their contents. + /// 2.2. "And so on". Just to avoid discrepancies with comments + /// perhaps it would be better to read the implementation itself. + /// 3. And again about overall picture. Let's look back at how the ordered set + /// of constants will look like: + /// [NFCT], [FCT, "others"], [FCT, pointers], [FCT, vectors] + /// + /// Now look, what could be inside [FCT, "others"], for example: + /// [FCT, "others"] = + /// [ + /// [double 0.1], [double 1.23], + /// [i32 1], [i32 2], + /// { double 1.0 }, ; StructTyID, NumElements = 1 + /// { i32 1 }, ; StructTyID, NumElements = 1 + /// { double 1, i32 1 }, ; StructTyID, NumElements = 2 + /// { i32 1, double 1 } ; StructTyID, NumElements = 2 + /// ] + /// + /// Let's explain the order. Float numbers will be less than integers, just + /// because of cmpType terms: FloatTyID < IntegerTyID. + /// Floats (with same fltSemantics) are sorted according to their value. + /// Then you can see integers, and they are, like a floats, + /// could be easy sorted among each others. + /// The structures. Structures are grouped at the tail, again because of their + /// TypeID: StructTyID > IntegerTyID > FloatTyID. + /// Structures with greater number of elements are greater. Structures with + /// greater elements going first are greater. + /// The same logic with vectors, arrays and other possible complex types. + /// + /// Bitcastable constants. + /// Let's assume, that some constant, belongs to some group of + /// "so-called-equal" values with different types, and at the same time + /// belongs to another group of constants with equal types + /// and "really" equal values. + /// + /// Now, prove that this is impossible: + /// + /// If constant A with type TyA is bitcastable to B with type TyB, then: + /// 1. All constants with equal types to TyA, are bitcastable to B. Since + /// those should be vectors (if TyA is vector), pointers + /// (if TyA is pointer), or else (if TyA equal to TyB), those types should + /// be equal to TyB. + /// 2. All constants with non-equal, but bitcastable types to TyA, are + /// bitcastable to B. + /// Once again, just because we allow it to vectors and pointers only. + /// This statement could be expanded as below: + /// 2.1. All vectors with equal bitwidth to vector A, has equal bitwidth to + /// vector B, and thus bitcastable to B as well. + /// 2.2. All pointers of the same address space, no matter what they point to, + /// bitcastable. So if C is pointer, it could be bitcasted to A and to B. + /// So any constant equal or bitcastable to A is equal or bitcastable to B. + /// QED. + /// + /// In another words, for pointers and vectors, we ignore top-level type and + /// look at their particular properties (bit-width for vectors, and + /// address space for pointers). + /// If these properties are equal - compare their contents. + int cmpConstants(const Constant *L, const Constant *R); /// Assign or look up previously assigned numbers for the two values, and /// return whether the numbers are equal. Numbers are assigned in the order /// visited. - bool enumerate(const Value *V1, const Value *V2); + /// Comparison order: + /// Stage 0: Value that is function itself is always greater then others. + /// If left and right values are references to their functions, then + /// they are equal. + /// Stage 1: Constants are greater than non-constants. + /// If both left and right are constants, then the result of + /// cmpConstants is used as cmpValues result. + /// Stage 2: InlineAsm instances are greater than others. If both left and + /// right are InlineAsm instances, InlineAsm* pointers casted to + /// integers and compared as numbers. + /// Stage 3: For all other cases we compare order we meet these values in + /// their functions. If right value was met first during scanning, + /// then left value is greater. + /// In another words, we compare serial numbers, for more details + /// see comments for sn_mapL and sn_mapR. + int cmpValues(const Value *L, const Value *R); /// Compare two Instructions for equivalence, similar to /// Instruction::isSameOperationAs but with modifications to the type /// comparison. - bool isEquivalentOperation(const Instruction *I1, - const Instruction *I2) const; + /// Stages are listed in "most significant stage first" order: + /// On each stage below, we do comparison between some left and right + /// operation parts. If parts are non-equal, we assign parts comparison + /// result to the operation comparison result and exit from method. + /// Otherwise we proceed to the next stage. + /// Stages: + /// 1. Operations opcodes. Compared as numbers. + /// 2. Number of operands. + /// 3. Operation types. Compared with cmpType method. + /// 4. Compare operation subclass optional data as stream of bytes: + /// just convert it to integers and call cmpNumbers. + /// 5. Compare in operation operand types with cmpType in + /// most significant operand first order. + /// 6. Last stage. Check operations for some specific attributes. + /// For example, for Load it would be: + /// 6.1.Load: volatile (as boolean flag) + /// 6.2.Load: alignment (as integer numbers) + /// 6.3.Load: synch-scope (as integer numbers) + /// 6.4.Load: range metadata (as integer numbers) + /// On this stage its better to see the code, since its not more than 10-15 + /// strings for particular instruction, and could change sometimes. + int cmpOperation(const Instruction *L, const Instruction *R) const; /// Compare two GEPs for equivalent pointer arithmetic. - bool isEquivalentGEP(const GEPOperator *GEP1, const GEPOperator *GEP2); - bool isEquivalentGEP(const GetElementPtrInst *GEP1, - const GetElementPtrInst *GEP2) { - return isEquivalentGEP(cast<GEPOperator>(GEP1), cast<GEPOperator>(GEP2)); + /// Parts to be compared for each comparison stage, + /// most significant stage first: + /// 1. Address space. As numbers. + /// 2. Constant offset, (if "DataLayout *DL" field is not NULL, + /// using GEPOperator::accumulateConstantOffset method). + /// 3. Pointer operand type (using cmpType method). + /// 4. Number of operands. + /// 5. Compare operands, using cmpValues method. + int cmpGEP(const GEPOperator *GEPL, const GEPOperator *GEPR); + int cmpGEP(const GetElementPtrInst *GEPL, const GetElementPtrInst *GEPR) { + return cmpGEP(cast<GEPOperator>(GEPL), cast<GEPOperator>(GEPR)); } - /// Compare two Types, treating all pointer types as equal. - bool isEquivalentType(Type *Ty1, Type *Ty2) const; + /// cmpType - compares two types, + /// defines total ordering among the types set. + /// + /// Return values: + /// 0 if types are equal, + /// -1 if Left is less than Right, + /// +1 if Left is greater than Right. + /// + /// Description: + /// Comparison is broken onto stages. Like in lexicographical comparison + /// stage coming first has higher priority. + /// On each explanation stage keep in mind total ordering properties. + /// + /// 0. Before comparison we coerce pointer types of 0 address space to + /// integer. + /// We also don't bother with same type at left and right, so + /// just return 0 in this case. + /// + /// 1. If types are of different kind (different type IDs). + /// Return result of type IDs comparison, treating them as numbers. + /// 2. If types are vectors or integers, compare Type* values as numbers. + /// 3. Types has same ID, so check whether they belongs to the next group: + /// * Void + /// * Float + /// * Double + /// * X86_FP80 + /// * FP128 + /// * PPC_FP128 + /// * Label + /// * Metadata + /// If so - return 0, yes - we can treat these types as equal only because + /// their IDs are same. + /// 4. If Left and Right are pointers, return result of address space + /// comparison (numbers comparison). We can treat pointer types of same + /// address space as equal. + /// 5. If types are complex. + /// Then both Left and Right are to be expanded and their element types will + /// be checked with the same way. If we get Res != 0 on some stage, return it. + /// Otherwise return 0. + /// 6. For all other cases put llvm_unreachable. + int cmpType(Type *TyL, Type *TyR) const; + + int cmpNumbers(uint64_t L, uint64_t R) const; + + int cmpAPInt(const APInt &L, const APInt &R) const; + int cmpAPFloat(const APFloat &L, const APFloat &R) const; + int cmpStrings(StringRef L, StringRef R) const; + int cmpAttrs(const AttributeSet L, const AttributeSet R) const; // The two functions undergoing comparison. - const Function *F1, *F2; + const Function *FnL, *FnR; + + const DataLayout *DL; + + /// Assign serial numbers to values from left function, and values from + /// right function. + /// Explanation: + /// Being comparing functions we need to compare values we meet at left and + /// right sides. + /// Its easy to sort things out for external values. It just should be + /// the same value at left and right. + /// But for local values (those were introduced inside function body) + /// we have to ensure they were introduced at exactly the same place, + /// and plays the same role. + /// Let's assign serial number to each value when we meet it first time. + /// Values that were met at same place will be with same serial numbers. + /// In this case it would be good to explain few points about values assigned + /// to BBs and other ways of implementation (see below). + /// + /// 1. Safety of BB reordering. + /// It's safe to change the order of BasicBlocks in function. + /// Relationship with other functions and serial numbering will not be + /// changed in this case. + /// As follows from FunctionComparator::compare(), we do CFG walk: we start + /// from the entry, and then take each terminator. So it doesn't matter how in + /// fact BBs are ordered in function. And since cmpValues are called during + /// this walk, the numbering depends only on how BBs located inside the CFG. + /// So the answer is - yes. We will get the same numbering. + /// + /// 2. Impossibility to use dominance properties of values. + /// If we compare two instruction operands: first is usage of local + /// variable AL from function FL, and second is usage of local variable AR + /// from FR, we could compare their origins and check whether they are + /// defined at the same place. + /// But, we are still not able to compare operands of PHI nodes, since those + /// could be operands from further BBs we didn't scan yet. + /// So it's impossible to use dominance properties in general. + DenseMap<const Value*, int> sn_mapL, sn_mapR; +}; - const DataLayout *TD; +class FunctionPtr { + AssertingVH<Function> F; + const DataLayout *DL; - DenseMap<const Value *, const Value *> id_map; - DenseSet<const Value *> seen_values; +public: + FunctionPtr(Function *F, const DataLayout *DL) : F(F), DL(DL) {} + Function *getFunc() const { return F; } + void release() { F = 0; } + bool operator<(const FunctionPtr &RHS) const { + return (FunctionComparator(DL, F, RHS.getFunc()).compare()) == -1; + } }; +} + +int FunctionComparator::cmpNumbers(uint64_t L, uint64_t R) const { + if (L < R) return -1; + if (L > R) return 1; + return 0; +} +int FunctionComparator::cmpAPInt(const APInt &L, const APInt &R) const { + if (int Res = cmpNumbers(L.getBitWidth(), R.getBitWidth())) + return Res; + if (L.ugt(R)) return 1; + if (R.ugt(L)) return -1; + return 0; } -// Any two pointers in the same address space are equivalent, intptr_t and -// pointers are equivalent. Otherwise, standard type equivalence rules apply. -bool FunctionComparator::isEquivalentType(Type *Ty1, Type *Ty2) const { +int FunctionComparator::cmpAPFloat(const APFloat &L, const APFloat &R) const { + if (int Res = cmpNumbers((uint64_t)&L.getSemantics(), + (uint64_t)&R.getSemantics())) + return Res; + return cmpAPInt(L.bitcastToAPInt(), R.bitcastToAPInt()); +} - PointerType *PTy1 = dyn_cast<PointerType>(Ty1); - PointerType *PTy2 = dyn_cast<PointerType>(Ty2); +int FunctionComparator::cmpStrings(StringRef L, StringRef R) const { + // Prevent heavy comparison, compare sizes first. + if (int Res = cmpNumbers(L.size(), R.size())) + return Res; - if (TD) { - if (PTy1 && PTy1->getAddressSpace() == 0) Ty1 = TD->getIntPtrType(Ty1); - if (PTy2 && PTy2->getAddressSpace() == 0) Ty2 = TD->getIntPtrType(Ty2); + // Compare strings lexicographically only when it is necessary: only when + // strings are equal in size. + return L.compare(R); +} + +int FunctionComparator::cmpAttrs(const AttributeSet L, + const AttributeSet R) const { + if (int Res = cmpNumbers(L.getNumSlots(), R.getNumSlots())) + return Res; + + for (unsigned i = 0, e = L.getNumSlots(); i != e; ++i) { + AttributeSet::iterator LI = L.begin(i), LE = L.end(i), RI = R.begin(i), + RE = R.end(i); + for (; LI != LE && RI != RE; ++LI, ++RI) { + Attribute LA = *LI; + Attribute RA = *RI; + if (LA < RA) + return -1; + if (RA < LA) + return 1; + } + if (LI != LE) + return 1; + if (RI != RE) + return -1; } + return 0; +} - if (Ty1 == Ty2) - return true; +/// Constants comparison: +/// 1. Check whether type of L constant could be losslessly bitcasted to R +/// type. +/// 2. Compare constant contents. +/// For more details see declaration comments. +int FunctionComparator::cmpConstants(const Constant *L, const Constant *R) { + + Type *TyL = L->getType(); + Type *TyR = R->getType(); + + // Check whether types are bitcastable. This part is just re-factored + // Type::canLosslesslyBitCastTo method, but instead of returning true/false, + // we also pack into result which type is "less" for us. + int TypesRes = cmpType(TyL, TyR); + if (TypesRes != 0) { + // Types are different, but check whether we can bitcast them. + if (!TyL->isFirstClassType()) { + if (TyR->isFirstClassType()) + return -1; + // Neither TyL nor TyR are values of first class type. Return the result + // of comparing the types + return TypesRes; + } + if (!TyR->isFirstClassType()) { + if (TyL->isFirstClassType()) + return 1; + return TypesRes; + } - if (Ty1->getTypeID() != Ty2->getTypeID()) - return false; + // Vector -> Vector conversions are always lossless if the two vector types + // have the same size, otherwise not. + unsigned TyLWidth = 0; + unsigned TyRWidth = 0; + + if (const VectorType *VecTyL = dyn_cast<VectorType>(TyL)) + TyLWidth = VecTyL->getBitWidth(); + if (const VectorType *VecTyR = dyn_cast<VectorType>(TyR)) + TyRWidth = VecTyR->getBitWidth(); + + if (TyLWidth != TyRWidth) + return cmpNumbers(TyLWidth, TyRWidth); + + // Zero bit-width means neither TyL nor TyR are vectors. + if (!TyLWidth) { + PointerType *PTyL = dyn_cast<PointerType>(TyL); + PointerType *PTyR = dyn_cast<PointerType>(TyR); + if (PTyL && PTyR) { + unsigned AddrSpaceL = PTyL->getAddressSpace(); + unsigned AddrSpaceR = PTyR->getAddressSpace(); + if (int Res = cmpNumbers(AddrSpaceL, AddrSpaceR)) + return Res; + } + if (PTyL) + return 1; + if (PTyR) + return -1; + + // TyL and TyR aren't vectors, nor pointers. We don't know how to + // bitcast them. + return TypesRes; + } + } + + // OK, types are bitcastable, now check constant contents. + + if (L->isNullValue() && R->isNullValue()) + return TypesRes; + if (L->isNullValue() && !R->isNullValue()) + return 1; + if (!L->isNullValue() && R->isNullValue()) + return -1; + + if (int Res = cmpNumbers(L->getValueID(), R->getValueID())) + return Res; + + switch (L->getValueID()) { + case Value::UndefValueVal: return TypesRes; + case Value::ConstantIntVal: { + const APInt &LInt = cast<ConstantInt>(L)->getValue(); + const APInt &RInt = cast<ConstantInt>(R)->getValue(); + return cmpAPInt(LInt, RInt); + } + case Value::ConstantFPVal: { + const APFloat &LAPF = cast<ConstantFP>(L)->getValueAPF(); + const APFloat &RAPF = cast<ConstantFP>(R)->getValueAPF(); + return cmpAPFloat(LAPF, RAPF); + } + case Value::ConstantArrayVal: { + const ConstantArray *LA = cast<ConstantArray>(L); + const ConstantArray *RA = cast<ConstantArray>(R); + uint64_t NumElementsL = cast<ArrayType>(TyL)->getNumElements(); + uint64_t NumElementsR = cast<ArrayType>(TyR)->getNumElements(); + if (int Res = cmpNumbers(NumElementsL, NumElementsR)) + return Res; + for (uint64_t i = 0; i < NumElementsL; ++i) { + if (int Res = cmpConstants(cast<Constant>(LA->getOperand(i)), + cast<Constant>(RA->getOperand(i)))) + return Res; + } + return 0; + } + case Value::ConstantStructVal: { + const ConstantStruct *LS = cast<ConstantStruct>(L); + const ConstantStruct *RS = cast<ConstantStruct>(R); + unsigned NumElementsL = cast<StructType>(TyL)->getNumElements(); + unsigned NumElementsR = cast<StructType>(TyR)->getNumElements(); + if (int Res = cmpNumbers(NumElementsL, NumElementsR)) + return Res; + for (unsigned i = 0; i != NumElementsL; ++i) { + if (int Res = cmpConstants(cast<Constant>(LS->getOperand(i)), + cast<Constant>(RS->getOperand(i)))) + return Res; + } + return 0; + } + case Value::ConstantVectorVal: { + const ConstantVector *LV = cast<ConstantVector>(L); + const ConstantVector *RV = cast<ConstantVector>(R); + unsigned NumElementsL = cast<VectorType>(TyL)->getNumElements(); + unsigned NumElementsR = cast<VectorType>(TyR)->getNumElements(); + if (int Res = cmpNumbers(NumElementsL, NumElementsR)) + return Res; + for (uint64_t i = 0; i < NumElementsL; ++i) { + if (int Res = cmpConstants(cast<Constant>(LV->getOperand(i)), + cast<Constant>(RV->getOperand(i)))) + return Res; + } + return 0; + } + case Value::ConstantExprVal: { + const ConstantExpr *LE = cast<ConstantExpr>(L); + const ConstantExpr *RE = cast<ConstantExpr>(R); + unsigned NumOperandsL = LE->getNumOperands(); + unsigned NumOperandsR = RE->getNumOperands(); + if (int Res = cmpNumbers(NumOperandsL, NumOperandsR)) + return Res; + for (unsigned i = 0; i < NumOperandsL; ++i) { + if (int Res = cmpConstants(cast<Constant>(LE->getOperand(i)), + cast<Constant>(RE->getOperand(i)))) + return Res; + } + return 0; + } + case Value::FunctionVal: + case Value::GlobalVariableVal: + case Value::GlobalAliasVal: + default: // Unknown constant, cast L and R pointers to numbers and compare. + return cmpNumbers((uint64_t)L, (uint64_t)R); + } +} + +/// cmpType - compares two types, +/// defines total ordering among the types set. +/// See method declaration comments for more details. +int FunctionComparator::cmpType(Type *TyL, Type *TyR) const { - switch (Ty1->getTypeID()) { + PointerType *PTyL = dyn_cast<PointerType>(TyL); + PointerType *PTyR = dyn_cast<PointerType>(TyR); + + if (DL) { + if (PTyL && PTyL->getAddressSpace() == 0) TyL = DL->getIntPtrType(TyL); + if (PTyR && PTyR->getAddressSpace() == 0) TyR = DL->getIntPtrType(TyR); + } + + if (TyL == TyR) + return 0; + + if (int Res = cmpNumbers(TyL->getTypeID(), TyR->getTypeID())) + return Res; + + switch (TyL->getTypeID()) { default: llvm_unreachable("Unknown type!"); // Fall through in Release mode. case Type::IntegerTyID: case Type::VectorTyID: - // Ty1 == Ty2 would have returned true earlier. - return false; + // TyL == TyR would have returned true earlier. + return cmpNumbers((uint64_t)TyL, (uint64_t)TyR); case Type::VoidTyID: case Type::FloatTyID: @@ -242,51 +648,55 @@ bool FunctionComparator::isEquivalentType(Type *Ty1, Type *Ty2) const { case Type::PPC_FP128TyID: case Type::LabelTyID: case Type::MetadataTyID: - return true; + return 0; case Type::PointerTyID: { - assert(PTy1 && PTy2 && "Both types must be pointers here."); - return PTy1->getAddressSpace() == PTy2->getAddressSpace(); + assert(PTyL && PTyR && "Both types must be pointers here."); + return cmpNumbers(PTyL->getAddressSpace(), PTyR->getAddressSpace()); } case Type::StructTyID: { - StructType *STy1 = cast<StructType>(Ty1); - StructType *STy2 = cast<StructType>(Ty2); - if (STy1->getNumElements() != STy2->getNumElements()) - return false; - - if (STy1->isPacked() != STy2->isPacked()) - return false; - - for (unsigned i = 0, e = STy1->getNumElements(); i != e; ++i) { - if (!isEquivalentType(STy1->getElementType(i), STy2->getElementType(i))) - return false; + StructType *STyL = cast<StructType>(TyL); + StructType *STyR = cast<StructType>(TyR); + if (STyL->getNumElements() != STyR->getNumElements()) + return cmpNumbers(STyL->getNumElements(), STyR->getNumElements()); + + if (STyL->isPacked() != STyR->isPacked()) + return cmpNumbers(STyL->isPacked(), STyR->isPacked()); + + for (unsigned i = 0, e = STyL->getNumElements(); i != e; ++i) { + if (int Res = cmpType(STyL->getElementType(i), + STyR->getElementType(i))) + return Res; } - return true; + return 0; } case Type::FunctionTyID: { - FunctionType *FTy1 = cast<FunctionType>(Ty1); - FunctionType *FTy2 = cast<FunctionType>(Ty2); - if (FTy1->getNumParams() != FTy2->getNumParams() || - FTy1->isVarArg() != FTy2->isVarArg()) - return false; + FunctionType *FTyL = cast<FunctionType>(TyL); + FunctionType *FTyR = cast<FunctionType>(TyR); + if (FTyL->getNumParams() != FTyR->getNumParams()) + return cmpNumbers(FTyL->getNumParams(), FTyR->getNumParams()); - if (!isEquivalentType(FTy1->getReturnType(), FTy2->getReturnType())) - return false; + if (FTyL->isVarArg() != FTyR->isVarArg()) + return cmpNumbers(FTyL->isVarArg(), FTyR->isVarArg()); + + if (int Res = cmpType(FTyL->getReturnType(), FTyR->getReturnType())) + return Res; - for (unsigned i = 0, e = FTy1->getNumParams(); i != e; ++i) { - if (!isEquivalentType(FTy1->getParamType(i), FTy2->getParamType(i))) - return false; + for (unsigned i = 0, e = FTyL->getNumParams(); i != e; ++i) { + if (int Res = cmpType(FTyL->getParamType(i), FTyR->getParamType(i))) + return Res; } - return true; + return 0; } case Type::ArrayTyID: { - ArrayType *ATy1 = cast<ArrayType>(Ty1); - ArrayType *ATy2 = cast<ArrayType>(Ty2); - return ATy1->getNumElements() == ATy2->getNumElements() && - isEquivalentType(ATy1->getElementType(), ATy2->getElementType()); + ArrayType *ATyL = cast<ArrayType>(TyL); + ArrayType *ATyR = cast<ArrayType>(TyR); + if (ATyL->getNumElements() != ATyR->getNumElements()) + return cmpNumbers(ATyL->getNumElements(), ATyR->getNumElements()); + return cmpType(ATyL->getElementType(), ATyR->getElementType()); } } } @@ -294,222 +704,323 @@ bool FunctionComparator::isEquivalentType(Type *Ty1, Type *Ty2) const { // Determine whether the two operations are the same except that pointer-to-A // and pointer-to-B are equivalent. This should be kept in sync with // Instruction::isSameOperationAs. -bool FunctionComparator::isEquivalentOperation(const Instruction *I1, - const Instruction *I2) const { +// Read method declaration comments for more details. +int FunctionComparator::cmpOperation(const Instruction *L, + const Instruction *R) const { // Differences from Instruction::isSameOperationAs: // * replace type comparison with calls to isEquivalentType. // * we test for I->hasSameSubclassOptionalData (nuw/nsw/tail) at the top // * because of the above, we don't test for the tail bit on calls later on - if (I1->getOpcode() != I2->getOpcode() || - I1->getNumOperands() != I2->getNumOperands() || - !isEquivalentType(I1->getType(), I2->getType()) || - !I1->hasSameSubclassOptionalData(I2)) - return false; + if (int Res = cmpNumbers(L->getOpcode(), R->getOpcode())) + return Res; + + if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands())) + return Res; + + if (int Res = cmpType(L->getType(), R->getType())) + return Res; + + if (int Res = cmpNumbers(L->getRawSubclassOptionalData(), + R->getRawSubclassOptionalData())) + return Res; // We have two instructions of identical opcode and #operands. Check to see // if all operands are the same type - for (unsigned i = 0, e = I1->getNumOperands(); i != e; ++i) - if (!isEquivalentType(I1->getOperand(i)->getType(), - I2->getOperand(i)->getType())) - return false; + for (unsigned i = 0, e = L->getNumOperands(); i != e; ++i) { + if (int Res = + cmpType(L->getOperand(i)->getType(), R->getOperand(i)->getType())) + return Res; + } // Check special state that is a part of some instructions. - if (const LoadInst *LI = dyn_cast<LoadInst>(I1)) - return LI->isVolatile() == cast<LoadInst>(I2)->isVolatile() && - LI->getAlignment() == cast<LoadInst>(I2)->getAlignment() && - LI->getOrdering() == cast<LoadInst>(I2)->getOrdering() && - LI->getSynchScope() == cast<LoadInst>(I2)->getSynchScope(); - if (const StoreInst *SI = dyn_cast<StoreInst>(I1)) - return SI->isVolatile() == cast<StoreInst>(I2)->isVolatile() && - SI->getAlignment() == cast<StoreInst>(I2)->getAlignment() && - SI->getOrdering() == cast<StoreInst>(I2)->getOrdering() && - SI->getSynchScope() == cast<StoreInst>(I2)->getSynchScope(); - if (const CmpInst *CI = dyn_cast<CmpInst>(I1)) - return CI->getPredicate() == cast<CmpInst>(I2)->getPredicate(); - if (const CallInst *CI = dyn_cast<CallInst>(I1)) - return CI->getCallingConv() == cast<CallInst>(I2)->getCallingConv() && - CI->getAttributes() == cast<CallInst>(I2)->getAttributes(); - if (const InvokeInst *CI = dyn_cast<InvokeInst>(I1)) - return CI->getCallingConv() == cast<InvokeInst>(I2)->getCallingConv() && - CI->getAttributes() == cast<InvokeInst>(I2)->getAttributes(); - if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(I1)) - return IVI->getIndices() == cast<InsertValueInst>(I2)->getIndices(); - if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(I1)) - return EVI->getIndices() == cast<ExtractValueInst>(I2)->getIndices(); - if (const FenceInst *FI = dyn_cast<FenceInst>(I1)) - return FI->getOrdering() == cast<FenceInst>(I2)->getOrdering() && - FI->getSynchScope() == cast<FenceInst>(I2)->getSynchScope(); - if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(I1)) - return CXI->isVolatile() == cast<AtomicCmpXchgInst>(I2)->isVolatile() && - CXI->getOrdering() == cast<AtomicCmpXchgInst>(I2)->getOrdering() && - CXI->getSynchScope() == cast<AtomicCmpXchgInst>(I2)->getSynchScope(); - if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I1)) - return RMWI->getOperation() == cast<AtomicRMWInst>(I2)->getOperation() && - RMWI->isVolatile() == cast<AtomicRMWInst>(I2)->isVolatile() && - RMWI->getOrdering() == cast<AtomicRMWInst>(I2)->getOrdering() && - RMWI->getSynchScope() == cast<AtomicRMWInst>(I2)->getSynchScope(); + if (const LoadInst *LI = dyn_cast<LoadInst>(L)) { + if (int Res = cmpNumbers(LI->isVolatile(), cast<LoadInst>(R)->isVolatile())) + return Res; + if (int Res = + cmpNumbers(LI->getAlignment(), cast<LoadInst>(R)->getAlignment())) + return Res; + if (int Res = + cmpNumbers(LI->getOrdering(), cast<LoadInst>(R)->getOrdering())) + return Res; + if (int Res = + cmpNumbers(LI->getSynchScope(), cast<LoadInst>(R)->getSynchScope())) + return Res; + return cmpNumbers((uint64_t)LI->getMetadata(LLVMContext::MD_range), + (uint64_t)cast<LoadInst>(R)->getMetadata(LLVMContext::MD_range)); + } + if (const StoreInst *SI = dyn_cast<StoreInst>(L)) { + if (int Res = + cmpNumbers(SI->isVolatile(), cast<StoreInst>(R)->isVolatile())) + return Res; + if (int Res = + cmpNumbers(SI->getAlignment(), cast<StoreInst>(R)->getAlignment())) + return Res; + if (int Res = + cmpNumbers(SI->getOrdering(), cast<StoreInst>(R)->getOrdering())) + return Res; + return cmpNumbers(SI->getSynchScope(), cast<StoreInst>(R)->getSynchScope()); + } + if (const CmpInst *CI = dyn_cast<CmpInst>(L)) + return cmpNumbers(CI->getPredicate(), cast<CmpInst>(R)->getPredicate()); + if (const CallInst *CI = dyn_cast<CallInst>(L)) { + if (int Res = cmpNumbers(CI->getCallingConv(), + cast<CallInst>(R)->getCallingConv())) + return Res; + if (int Res = + cmpAttrs(CI->getAttributes(), cast<CallInst>(R)->getAttributes())) + return Res; + return cmpNumbers( + (uint64_t)CI->getMetadata(LLVMContext::MD_range), + (uint64_t)cast<CallInst>(R)->getMetadata(LLVMContext::MD_range)); + } + if (const InvokeInst *CI = dyn_cast<InvokeInst>(L)) { + if (int Res = cmpNumbers(CI->getCallingConv(), + cast<InvokeInst>(R)->getCallingConv())) + return Res; + if (int Res = + cmpAttrs(CI->getAttributes(), cast<InvokeInst>(R)->getAttributes())) + return Res; + return cmpNumbers( + (uint64_t)CI->getMetadata(LLVMContext::MD_range), + (uint64_t)cast<InvokeInst>(R)->getMetadata(LLVMContext::MD_range)); + } + if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(L)) { + ArrayRef<unsigned> LIndices = IVI->getIndices(); + ArrayRef<unsigned> RIndices = cast<InsertValueInst>(R)->getIndices(); + if (int Res = cmpNumbers(LIndices.size(), RIndices.size())) + return Res; + for (size_t i = 0, e = LIndices.size(); i != e; ++i) { + if (int Res = cmpNumbers(LIndices[i], RIndices[i])) + return Res; + } + } + if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(L)) { + ArrayRef<unsigned> LIndices = EVI->getIndices(); + ArrayRef<unsigned> RIndices = cast<ExtractValueInst>(R)->getIndices(); + if (int Res = cmpNumbers(LIndices.size(), RIndices.size())) + return Res; + for (size_t i = 0, e = LIndices.size(); i != e; ++i) { + if (int Res = cmpNumbers(LIndices[i], RIndices[i])) + return Res; + } + } + if (const FenceInst *FI = dyn_cast<FenceInst>(L)) { + if (int Res = + cmpNumbers(FI->getOrdering(), cast<FenceInst>(R)->getOrdering())) + return Res; + return cmpNumbers(FI->getSynchScope(), cast<FenceInst>(R)->getSynchScope()); + } - return true; + if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(L)) { + if (int Res = cmpNumbers(CXI->isVolatile(), + cast<AtomicCmpXchgInst>(R)->isVolatile())) + return Res; + if (int Res = cmpNumbers(CXI->isWeak(), + cast<AtomicCmpXchgInst>(R)->isWeak())) + return Res; + if (int Res = cmpNumbers(CXI->getSuccessOrdering(), + cast<AtomicCmpXchgInst>(R)->getSuccessOrdering())) + return Res; + if (int Res = cmpNumbers(CXI->getFailureOrdering(), + cast<AtomicCmpXchgInst>(R)->getFailureOrdering())) + return Res; + return cmpNumbers(CXI->getSynchScope(), + cast<AtomicCmpXchgInst>(R)->getSynchScope()); + } + if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(L)) { + if (int Res = cmpNumbers(RMWI->getOperation(), + cast<AtomicRMWInst>(R)->getOperation())) + return Res; + if (int Res = cmpNumbers(RMWI->isVolatile(), + cast<AtomicRMWInst>(R)->isVolatile())) + return Res; + if (int Res = cmpNumbers(RMWI->getOrdering(), + cast<AtomicRMWInst>(R)->getOrdering())) + return Res; + return cmpNumbers(RMWI->getSynchScope(), + cast<AtomicRMWInst>(R)->getSynchScope()); + } + return 0; } // Determine whether two GEP operations perform the same underlying arithmetic. -bool FunctionComparator::isEquivalentGEP(const GEPOperator *GEP1, - const GEPOperator *GEP2) { - unsigned AS = GEP1->getPointerAddressSpace(); - if (AS != GEP2->getPointerAddressSpace()) - return false; - - if (TD) { - // When we have target data, we can reduce the GEP down to the value in bytes - // added to the address. - unsigned BitWidth = TD ? TD->getPointerSizeInBits(AS) : 1; - APInt Offset1(BitWidth, 0), Offset2(BitWidth, 0); - if (GEP1->accumulateConstantOffset(*TD, Offset1) && - GEP2->accumulateConstantOffset(*TD, Offset2)) { - return Offset1 == Offset2; - } +// Read method declaration comments for more details. +int FunctionComparator::cmpGEP(const GEPOperator *GEPL, + const GEPOperator *GEPR) { + + unsigned int ASL = GEPL->getPointerAddressSpace(); + unsigned int ASR = GEPR->getPointerAddressSpace(); + + if (int Res = cmpNumbers(ASL, ASR)) + return Res; + + // When we have target data, we can reduce the GEP down to the value in bytes + // added to the address. + if (DL) { + unsigned BitWidth = DL->getPointerSizeInBits(ASL); + APInt OffsetL(BitWidth, 0), OffsetR(BitWidth, 0); + if (GEPL->accumulateConstantOffset(*DL, OffsetL) && + GEPR->accumulateConstantOffset(*DL, OffsetR)) + return cmpAPInt(OffsetL, OffsetR); } - if (GEP1->getPointerOperand()->getType() != - GEP2->getPointerOperand()->getType()) - return false; + if (int Res = cmpNumbers((uint64_t)GEPL->getPointerOperand()->getType(), + (uint64_t)GEPR->getPointerOperand()->getType())) + return Res; - if (GEP1->getNumOperands() != GEP2->getNumOperands()) - return false; + if (int Res = cmpNumbers(GEPL->getNumOperands(), GEPR->getNumOperands())) + return Res; - for (unsigned i = 0, e = GEP1->getNumOperands(); i != e; ++i) { - if (!enumerate(GEP1->getOperand(i), GEP2->getOperand(i))) - return false; + for (unsigned i = 0, e = GEPL->getNumOperands(); i != e; ++i) { + if (int Res = cmpValues(GEPL->getOperand(i), GEPR->getOperand(i))) + return Res; } - return true; + return 0; } -// Compare two values used by the two functions under pair-wise comparison. If -// this is the first time the values are seen, they're added to the mapping so -// that we will detect mismatches on next use. -bool FunctionComparator::enumerate(const Value *V1, const Value *V2) { - // Check for function @f1 referring to itself and function @f2 referring to - // itself, or referring to each other, or both referring to either of them. - // They're all equivalent if the two functions are otherwise equivalent. - if (V1 == F1 && V2 == F2) - return true; - if (V1 == F2 && V2 == F1) - return true; - - if (const Constant *C1 = dyn_cast<Constant>(V1)) { - if (V1 == V2) return true; - const Constant *C2 = dyn_cast<Constant>(V2); - if (!C2) return false; - // TODO: constant expressions with GEP or references to F1 or F2. - if (C1->isNullValue() && C2->isNullValue() && - isEquivalentType(C1->getType(), C2->getType())) - return true; - // Try bitcasting C2 to C1's type. If the bitcast is legal and returns C1 - // then they must have equal bit patterns. - return C1->getType()->canLosslesslyBitCastTo(C2->getType()) && - C1 == ConstantExpr::getBitCast(const_cast<Constant*>(C2), C1->getType()); - } - - if (isa<InlineAsm>(V1) || isa<InlineAsm>(V2)) - return V1 == V2; - - // Check that V1 maps to V2. If we find a value that V1 maps to then we simply - // check whether it's equal to V2. When there is no mapping then we need to - // ensure that V2 isn't already equivalent to something else. For this - // purpose, we track the V2 values in a set. - - const Value *&map_elem = id_map[V1]; - if (map_elem) - return map_elem == V2; - if (!seen_values.insert(V2).second) - return false; - map_elem = V2; - return true; -} - -// Test whether two basic blocks have equivalent behaviour. -bool FunctionComparator::compare(const BasicBlock *BB1, const BasicBlock *BB2) { - BasicBlock::const_iterator F1I = BB1->begin(), F1E = BB1->end(); - BasicBlock::const_iterator F2I = BB2->begin(), F2E = BB2->end(); +/// Compare two values used by the two functions under pair-wise comparison. If +/// this is the first time the values are seen, they're added to the mapping so +/// that we will detect mismatches on next use. +/// See comments in declaration for more details. +int FunctionComparator::cmpValues(const Value *L, const Value *R) { + // Catch self-reference case. + if (L == FnL) { + if (R == FnR) + return 0; + return -1; + } + if (R == FnR) { + if (L == FnL) + return 0; + return 1; + } - do { - if (!enumerate(F1I, F2I)) - return false; + const Constant *ConstL = dyn_cast<Constant>(L); + const Constant *ConstR = dyn_cast<Constant>(R); + if (ConstL && ConstR) { + if (L == R) + return 0; + return cmpConstants(ConstL, ConstR); + } - if (const GetElementPtrInst *GEP1 = dyn_cast<GetElementPtrInst>(F1I)) { - const GetElementPtrInst *GEP2 = dyn_cast<GetElementPtrInst>(F2I); - if (!GEP2) - return false; + if (ConstL) + return 1; + if (ConstR) + return -1; - if (!enumerate(GEP1->getPointerOperand(), GEP2->getPointerOperand())) - return false; + const InlineAsm *InlineAsmL = dyn_cast<InlineAsm>(L); + const InlineAsm *InlineAsmR = dyn_cast<InlineAsm>(R); - if (!isEquivalentGEP(GEP1, GEP2)) - return false; - } else { - if (!isEquivalentOperation(F1I, F2I)) - return false; + if (InlineAsmL && InlineAsmR) + return cmpNumbers((uint64_t)L, (uint64_t)R); + if (InlineAsmL) + return 1; + if (InlineAsmR) + return -1; - assert(F1I->getNumOperands() == F2I->getNumOperands()); - for (unsigned i = 0, e = F1I->getNumOperands(); i != e; ++i) { - Value *OpF1 = F1I->getOperand(i); - Value *OpF2 = F2I->getOperand(i); + auto LeftSN = sn_mapL.insert(std::make_pair(L, sn_mapL.size())), + RightSN = sn_mapR.insert(std::make_pair(R, sn_mapR.size())); - if (!enumerate(OpF1, OpF2)) - return false; + return cmpNumbers(LeftSN.first->second, RightSN.first->second); +} +// Test whether two basic blocks have equivalent behaviour. +int FunctionComparator::compare(const BasicBlock *BBL, const BasicBlock *BBR) { + BasicBlock::const_iterator InstL = BBL->begin(), InstLE = BBL->end(); + BasicBlock::const_iterator InstR = BBR->begin(), InstRE = BBR->end(); - if (OpF1->getValueID() != OpF2->getValueID() || - !isEquivalentType(OpF1->getType(), OpF2->getType())) - return false; + do { + if (int Res = cmpValues(InstL, InstR)) + return Res; + + const GetElementPtrInst *GEPL = dyn_cast<GetElementPtrInst>(InstL); + const GetElementPtrInst *GEPR = dyn_cast<GetElementPtrInst>(InstR); + + if (GEPL && !GEPR) + return 1; + if (GEPR && !GEPL) + return -1; + + if (GEPL && GEPR) { + if (int Res = + cmpValues(GEPL->getPointerOperand(), GEPR->getPointerOperand())) + return Res; + if (int Res = cmpGEP(GEPL, GEPR)) + return Res; + } else { + if (int Res = cmpOperation(InstL, InstR)) + return Res; + assert(InstL->getNumOperands() == InstR->getNumOperands()); + + for (unsigned i = 0, e = InstL->getNumOperands(); i != e; ++i) { + Value *OpL = InstL->getOperand(i); + Value *OpR = InstR->getOperand(i); + if (int Res = cmpValues(OpL, OpR)) + return Res; + if (int Res = cmpNumbers(OpL->getValueID(), OpR->getValueID())) + return Res; + // TODO: Already checked in cmpOperation + if (int Res = cmpType(OpL->getType(), OpR->getType())) + return Res; } } - ++F1I, ++F2I; - } while (F1I != F1E && F2I != F2E); + ++InstL, ++InstR; + } while (InstL != InstLE && InstR != InstRE); - return F1I == F1E && F2I == F2E; + if (InstL != InstLE && InstR == InstRE) + return 1; + if (InstL == InstLE && InstR != InstRE) + return -1; + return 0; } // Test whether the two functions have equivalent behaviour. -bool FunctionComparator::compare() { - // We need to recheck everything, but check the things that weren't included - // in the hash first. +int FunctionComparator::compare() { - if (F1->getAttributes() != F2->getAttributes()) - return false; + sn_mapL.clear(); + sn_mapR.clear(); - if (F1->hasGC() != F2->hasGC()) - return false; + if (int Res = cmpAttrs(FnL->getAttributes(), FnR->getAttributes())) + return Res; - if (F1->hasGC() && F1->getGC() != F2->getGC()) - return false; + if (int Res = cmpNumbers(FnL->hasGC(), FnR->hasGC())) + return Res; - if (F1->hasSection() != F2->hasSection()) - return false; + if (FnL->hasGC()) { + if (int Res = cmpNumbers((uint64_t)FnL->getGC(), (uint64_t)FnR->getGC())) + return Res; + } - if (F1->hasSection() && F1->getSection() != F2->getSection()) - return false; + if (int Res = cmpNumbers(FnL->hasSection(), FnR->hasSection())) + return Res; - if (F1->isVarArg() != F2->isVarArg()) - return false; + if (FnL->hasSection()) { + if (int Res = cmpStrings(FnL->getSection(), FnR->getSection())) + return Res; + } + + if (int Res = cmpNumbers(FnL->isVarArg(), FnR->isVarArg())) + return Res; // TODO: if it's internal and only used in direct calls, we could handle this // case too. - if (F1->getCallingConv() != F2->getCallingConv()) - return false; + if (int Res = cmpNumbers(FnL->getCallingConv(), FnR->getCallingConv())) + return Res; - if (!isEquivalentType(F1->getFunctionType(), F2->getFunctionType())) - return false; + if (int Res = cmpType(FnL->getFunctionType(), FnR->getFunctionType())) + return Res; - assert(F1->arg_size() == F2->arg_size() && + assert(FnL->arg_size() == FnR->arg_size() && "Identically typed functions have different numbers of args!"); // Visit the arguments so that they get enumerated in the order they're // passed in. - for (Function::const_arg_iterator f1i = F1->arg_begin(), - f2i = F2->arg_begin(), f1e = F1->arg_end(); f1i != f1e; ++f1i, ++f2i) { - if (!enumerate(f1i, f2i)) + for (Function::const_arg_iterator ArgLI = FnL->arg_begin(), + ArgRI = FnR->arg_begin(), + ArgLE = FnL->arg_end(); + ArgLI != ArgLE; ++ArgLI, ++ArgRI) { + if (cmpValues(ArgLI, ArgRI) != 0) llvm_unreachable("Arguments repeat!"); } @@ -517,33 +1028,36 @@ bool FunctionComparator::compare() { // linked list is immaterial. Our walk starts at the entry block for both // functions, then takes each block from each terminator in order. As an // artifact, this also means that unreachable blocks are ignored. - SmallVector<const BasicBlock *, 8> F1BBs, F2BBs; + SmallVector<const BasicBlock *, 8> FnLBBs, FnRBBs; SmallSet<const BasicBlock *, 128> VisitedBBs; // in terms of F1. - F1BBs.push_back(&F1->getEntryBlock()); - F2BBs.push_back(&F2->getEntryBlock()); + FnLBBs.push_back(&FnL->getEntryBlock()); + FnRBBs.push_back(&FnR->getEntryBlock()); - VisitedBBs.insert(F1BBs[0]); - while (!F1BBs.empty()) { - const BasicBlock *F1BB = F1BBs.pop_back_val(); - const BasicBlock *F2BB = F2BBs.pop_back_val(); + VisitedBBs.insert(FnLBBs[0]); + while (!FnLBBs.empty()) { + const BasicBlock *BBL = FnLBBs.pop_back_val(); + const BasicBlock *BBR = FnRBBs.pop_back_val(); - if (!enumerate(F1BB, F2BB) || !compare(F1BB, F2BB)) - return false; + if (int Res = cmpValues(BBL, BBR)) + return Res; - const TerminatorInst *F1TI = F1BB->getTerminator(); - const TerminatorInst *F2TI = F2BB->getTerminator(); + if (int Res = compare(BBL, BBR)) + return Res; - assert(F1TI->getNumSuccessors() == F2TI->getNumSuccessors()); - for (unsigned i = 0, e = F1TI->getNumSuccessors(); i != e; ++i) { - if (!VisitedBBs.insert(F1TI->getSuccessor(i))) + const TerminatorInst *TermL = BBL->getTerminator(); + const TerminatorInst *TermR = BBR->getTerminator(); + + assert(TermL->getNumSuccessors() == TermR->getNumSuccessors()); + for (unsigned i = 0, e = TermL->getNumSuccessors(); i != e; ++i) { + if (!VisitedBBs.insert(TermL->getSuccessor(i))) continue; - F1BBs.push_back(F1TI->getSuccessor(i)); - F2BBs.push_back(F2TI->getSuccessor(i)); + FnLBBs.push_back(TermL->getSuccessor(i)); + FnRBBs.push_back(TermR->getSuccessor(i)); } } - return true; + return 0; } namespace { @@ -561,24 +1075,28 @@ public: initializeMergeFunctionsPass(*PassRegistry::getPassRegistry()); } - bool runOnModule(Module &M); + bool runOnModule(Module &M) override; private: - typedef DenseSet<ComparableFunction> FnSetType; + typedef std::set<FunctionPtr> FnTreeType; /// A work queue of functions that may have been modified and should be /// analyzed again. std::vector<WeakVH> Deferred; - /// Insert a ComparableFunction into the FnSet, or merge it away if it's + /// Checks the rules of order relation introduced among functions set. + /// Returns true, if sanity check has been passed, and false if failed. + bool doSanityCheck(std::vector<WeakVH> &Worklist); + + /// Insert a ComparableFunction into the FnTree, or merge it away if it's /// equal to one that's already present. - bool insert(ComparableFunction &NewF); + bool insert(Function *NewFunction); - /// Remove a Function from the FnSet and queue it up for a second sweep of + /// Remove a Function from the FnTree and queue it up for a second sweep of /// analysis. void remove(Function *F); - /// Find the functions that use this Value and remove them from FnSet and + /// Find the functions that use this Value and remove them from FnTree and /// queue the functions. void removeUsers(Value *V); @@ -603,10 +1121,10 @@ private: /// The set of all distinct functions. Use the insert() and remove() methods /// to modify it. - FnSetType FnSet; + FnTreeType FnTree; /// DataLayout for more accurate GEP comparisons. May be NULL. - DataLayout *TD; + const DataLayout *DL; /// Whether or not the target supports global aliases. bool HasGlobalAliases; @@ -621,20 +1139,94 @@ ModulePass *llvm::createMergeFunctionsPass() { return new MergeFunctions(); } +bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) { + if (const unsigned Max = NumFunctionsForSanityCheck) { + unsigned TripleNumber = 0; + bool Valid = true; + + dbgs() << "MERGEFUNC-SANITY: Started for first " << Max << " functions.\n"; + + unsigned i = 0; + for (std::vector<WeakVH>::iterator I = Worklist.begin(), E = Worklist.end(); + I != E && i < Max; ++I, ++i) { + unsigned j = i; + for (std::vector<WeakVH>::iterator J = I; J != E && j < Max; ++J, ++j) { + Function *F1 = cast<Function>(*I); + Function *F2 = cast<Function>(*J); + int Res1 = FunctionComparator(DL, F1, F2).compare(); + int Res2 = FunctionComparator(DL, F2, F1).compare(); + + // If F1 <= F2, then F2 >= F1, otherwise report failure. + if (Res1 != -Res2) { + dbgs() << "MERGEFUNC-SANITY: Non-symmetric; triple: " << TripleNumber + << "\n"; + F1->dump(); + F2->dump(); + Valid = false; + } + + if (Res1 == 0) + continue; + + unsigned k = j; + for (std::vector<WeakVH>::iterator K = J; K != E && k < Max; + ++k, ++K, ++TripleNumber) { + if (K == J) + continue; + + Function *F3 = cast<Function>(*K); + int Res3 = FunctionComparator(DL, F1, F3).compare(); + int Res4 = FunctionComparator(DL, F2, F3).compare(); + + bool Transitive = true; + + if (Res1 != 0 && Res1 == Res4) { + // F1 > F2, F2 > F3 => F1 > F3 + Transitive = Res3 == Res1; + } else if (Res3 != 0 && Res3 == -Res4) { + // F1 > F3, F3 > F2 => F1 > F2 + Transitive = Res3 == Res1; + } else if (Res4 != 0 && -Res3 == Res4) { + // F2 > F3, F3 > F1 => F2 > F1 + Transitive = Res4 == -Res1; + } + + if (!Transitive) { + dbgs() << "MERGEFUNC-SANITY: Non-transitive; triple: " + << TripleNumber << "\n"; + dbgs() << "Res1, Res3, Res4: " << Res1 << ", " << Res3 << ", " + << Res4 << "\n"; + F1->dump(); + F2->dump(); + F3->dump(); + Valid = false; + } + } + } + } + + dbgs() << "MERGEFUNC-SANITY: " << (Valid ? "Passed." : "Failed.") << "\n"; + return Valid; + } + return true; +} + bool MergeFunctions::runOnModule(Module &M) { bool Changed = false; - TD = getAnalysisIfAvailable<DataLayout>(); + DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); + DL = DLP ? &DLP->getDataLayout() : nullptr; for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage()) Deferred.push_back(WeakVH(I)); } - FnSet.resize(Deferred.size()); do { std::vector<WeakVH> Worklist; Deferred.swap(Worklist); + DEBUG(doSanityCheck(Worklist)); + DEBUG(dbgs() << "size of module: " << M.size() << '\n'); DEBUG(dbgs() << "size of worklist: " << Worklist.size() << '\n'); @@ -646,8 +1238,7 @@ bool MergeFunctions::runOnModule(Module &M) { Function *F = cast<Function>(*I); if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() && !F->mayBeOverridden()) { - ComparableFunction CF = ComparableFunction(F, TD); - Changed |= insert(CF); + Changed |= insert(F); } } @@ -661,49 +1252,27 @@ bool MergeFunctions::runOnModule(Module &M) { Function *F = cast<Function>(*I); if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() && F->mayBeOverridden()) { - ComparableFunction CF = ComparableFunction(F, TD); - Changed |= insert(CF); + Changed |= insert(F); } } - DEBUG(dbgs() << "size of FnSet: " << FnSet.size() << '\n'); + DEBUG(dbgs() << "size of FnTree: " << FnTree.size() << '\n'); } while (!Deferred.empty()); - FnSet.clear(); + FnTree.clear(); return Changed; } -bool DenseMapInfo<ComparableFunction>::isEqual(const ComparableFunction &LHS, - const ComparableFunction &RHS) { - if (LHS.getFunc() == RHS.getFunc() && - LHS.getHash() == RHS.getHash()) - return true; - if (!LHS.getFunc() || !RHS.getFunc()) - return false; - - // One of these is a special "underlying pointer comparison only" object. - if (LHS.getTD() == ComparableFunction::LookupOnly || - RHS.getTD() == ComparableFunction::LookupOnly) - return false; - - assert(LHS.getTD() == RHS.getTD() && - "Comparing functions for different targets"); - - return FunctionComparator(LHS.getTD(), LHS.getFunc(), - RHS.getFunc()).compare(); -} - // Replace direct callers of Old with New. void MergeFunctions::replaceDirectCallers(Function *Old, Function *New) { Constant *BitcastNew = ConstantExpr::getBitCast(New, Old->getType()); - for (Value::use_iterator UI = Old->use_begin(), UE = Old->use_end(); - UI != UE;) { - Value::use_iterator TheIter = UI; + for (auto UI = Old->use_begin(), UE = Old->use_end(); UI != UE;) { + Use *U = &*UI; ++UI; - CallSite CS(*TheIter); - if (CS && CS.isCallee(TheIter)) { + CallSite CS(U->getUser()); + if (CS && CS.isCallee(U)) { remove(CS.getInstruction()->getParent()->getParent()); - TheIter.getUse().set(BitcastNew); + U->set(BitcastNew); } } } @@ -723,9 +1292,24 @@ void MergeFunctions::writeThunkOrAlias(Function *F, Function *G) { // Helper for writeThunk, // Selects proper bitcast operation, -// but a bit simplier then CastInst::getCastOpcode. -static Value* createCast(IRBuilder<false> &Builder, Value *V, Type *DestTy) { +// but a bit simpler then CastInst::getCastOpcode. +static Value *createCast(IRBuilder<false> &Builder, Value *V, Type *DestTy) { Type *SrcTy = V->getType(); + if (SrcTy->isStructTy()) { + assert(DestTy->isStructTy()); + assert(SrcTy->getStructNumElements() == DestTy->getStructNumElements()); + Value *Result = UndefValue::get(DestTy); + for (unsigned int I = 0, E = SrcTy->getStructNumElements(); I < E; ++I) { + Value *Element = createCast( + Builder, Builder.CreateExtractValue(V, ArrayRef<unsigned int>(I)), + DestTy->getStructElementType(I)); + + Result = + Builder.CreateInsertValue(Result, Element, ArrayRef<unsigned int>(I)); + } + return Result; + } + assert(!DestTy->isStructTy()); if (SrcTy->isIntegerTy() && DestTy->isPointerTy()) return Builder.CreateIntToPtr(V, DestTy); else if (SrcTy->isPointerTy() && DestTy->isIntegerTy()) @@ -784,9 +1368,9 @@ void MergeFunctions::writeThunk(Function *F, Function *G) { // Replace G with an alias to F and delete G. void MergeFunctions::writeAlias(Function *F, Function *G) { - Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType()); - GlobalAlias *GA = new GlobalAlias(G->getType(), G->getLinkage(), "", - BitcastF, G->getParent()); + PointerType *PTy = G->getType(); + auto *GA = GlobalAlias::create(PTy->getElementType(), PTy->getAddressSpace(), + G->getLinkage(), "", F); F->setAlignment(std::max(F->getAlignment(), G->getAlignment())); GA->takeName(G); GA->setVisibility(G->getVisibility()); @@ -833,54 +1417,57 @@ void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) { ++NumFunctionsMerged; } -// Insert a ComparableFunction into the FnSet, or merge it away if equal to one +// Insert a ComparableFunction into the FnTree, or merge it away if equal to one // that was already inserted. -bool MergeFunctions::insert(ComparableFunction &NewF) { - std::pair<FnSetType::iterator, bool> Result = FnSet.insert(NewF); +bool MergeFunctions::insert(Function *NewFunction) { + std::pair<FnTreeType::iterator, bool> Result = + FnTree.insert(FunctionPtr(NewFunction, DL)); + if (Result.second) { - DEBUG(dbgs() << "Inserting as unique: " << NewF.getFunc()->getName() << '\n'); + DEBUG(dbgs() << "Inserting as unique: " << NewFunction->getName() << '\n'); return false; } - const ComparableFunction &OldF = *Result.first; + const FunctionPtr &OldF = *Result.first; // Don't merge tiny functions, since it can just end up making the function // larger. // FIXME: Should still merge them if they are unnamed_addr and produce an // alias. - if (NewF.getFunc()->size() == 1) { - if (NewF.getFunc()->front().size() <= 2) { - DEBUG(dbgs() << NewF.getFunc()->getName() - << " is to small to bother merging\n"); + if (NewFunction->size() == 1) { + if (NewFunction->front().size() <= 2) { + DEBUG(dbgs() << NewFunction->getName() + << " is to small to bother merging\n"); return false; } } // Never thunk a strong function to a weak function. - assert(!OldF.getFunc()->mayBeOverridden() || - NewF.getFunc()->mayBeOverridden()); + assert(!OldF.getFunc()->mayBeOverridden() || NewFunction->mayBeOverridden()); - DEBUG(dbgs() << " " << OldF.getFunc()->getName() << " == " - << NewF.getFunc()->getName() << '\n'); + DEBUG(dbgs() << " " << OldF.getFunc()->getName() + << " == " << NewFunction->getName() << '\n'); - Function *DeleteF = NewF.getFunc(); - NewF.release(); + Function *DeleteF = NewFunction; mergeTwoFunctions(OldF.getFunc(), DeleteF); return true; } -// Remove a function from FnSet. If it was already in FnSet, add it to Deferred -// so that we'll look at it in the next round. +// Remove a function from FnTree. If it was already in FnTree, add +// it to Deferred so that we'll look at it in the next round. void MergeFunctions::remove(Function *F) { // We need to make sure we remove F, not a function "equal" to F per the // function equality comparator. - // - // The special "lookup only" ComparableFunction bypasses the expensive - // function comparison in favour of a pointer comparison on the underlying - // Function*'s. - ComparableFunction CF = ComparableFunction(F, ComparableFunction::LookupOnly); - if (FnSet.erase(CF)) { - DEBUG(dbgs() << "Removed " << F->getName() << " from set and deferred it.\n"); + FnTreeType::iterator found = FnTree.find(FunctionPtr(F, DL)); + size_t Erased = 0; + if (found != FnTree.end() && found->getFunc() == F) { + Erased = 1; + FnTree.erase(found); + } + + if (Erased) { + DEBUG(dbgs() << "Removed " << F->getName() + << " from set and deferred it.\n"); Deferred.push_back(F); } } @@ -894,17 +1481,14 @@ void MergeFunctions::removeUsers(Value *V) { Value *V = Worklist.back(); Worklist.pop_back(); - for (Value::use_iterator UI = V->use_begin(), UE = V->use_end(); - UI != UE; ++UI) { - Use &U = UI.getUse(); - if (Instruction *I = dyn_cast<Instruction>(U.getUser())) { + for (User *U : V->users()) { + if (Instruction *I = dyn_cast<Instruction>(U)) { remove(I->getParent()->getParent()); - } else if (isa<GlobalValue>(U.getUser())) { + } else if (isa<GlobalValue>(U)) { // do nothing - } else if (Constant *C = dyn_cast<Constant>(U.getUser())) { - for (Value::use_iterator CUI = C->use_begin(), CUE = C->use_end(); - CUI != CUE; ++CUI) - Worklist.push_back(*CUI); + } else if (Constant *C = dyn_cast<Constant>(U)) { + for (User *UU : C->users()) + Worklist.push_back(UU); } } } diff --git a/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp index fa518cb..76d6dfa 100644 --- a/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp +++ b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp @@ -12,30 +12,31 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "partialinlining" #include "llvm/Transforms/IPO.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/Dominators.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" -#include "llvm/Support/CFG.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/CodeExtractor.h" using namespace llvm; +#define DEBUG_TYPE "partialinlining" + STATISTIC(NumPartialInlined, "Number of functions partially inlined"); namespace { struct PartialInliner : public ModulePass { - virtual void getAnalysisUsage(AnalysisUsage &AU) const { } + void getAnalysisUsage(AnalysisUsage &AU) const override { } static char ID; // Pass identification, replacement for typeid PartialInliner() : ModulePass(ID) { initializePartialInlinerPass(*PassRegistry::getPassRegistry()); } - - bool runOnModule(Module& M); - + + bool runOnModule(Module& M) override; + private: Function* unswitchFunction(Function* F); }; @@ -52,10 +53,10 @@ Function* PartialInliner::unswitchFunction(Function* F) { BasicBlock* entryBlock = F->begin(); BranchInst *BR = dyn_cast<BranchInst>(entryBlock->getTerminator()); if (!BR || BR->isUnconditional()) - return 0; + return nullptr; - BasicBlock* returnBlock = 0; - BasicBlock* nonReturnBlock = 0; + BasicBlock* returnBlock = nullptr; + BasicBlock* nonReturnBlock = nullptr; unsigned returnCount = 0; for (succ_iterator SI = succ_begin(entryBlock), SE = succ_end(entryBlock); SI != SE; ++SI) @@ -66,7 +67,7 @@ Function* PartialInliner::unswitchFunction(Function* F) { nonReturnBlock = *SI; if (returnCount != 1) - return 0; + return nullptr; // Clone the function, so that we can hack away on it. ValueToValueMapTy VMap; @@ -119,8 +120,8 @@ Function* PartialInliner::unswitchFunction(Function* F) { // The CodeExtractor needs a dominator tree. DominatorTree DT; - DT.runOnFunction(*duplicateFunction); - + DT.recalculate(*duplicateFunction); + // Extract the body of the if. Function* extractedFunction = CodeExtractor(toExtract, &DT).extractCodeRegion(); @@ -128,8 +129,8 @@ Function* PartialInliner::unswitchFunction(Function* F) { InlineFunctionInfo IFI; // Inline the top-level if test into all callers. - std::vector<User*> Users(duplicateFunction->use_begin(), - duplicateFunction->use_end()); + std::vector<User *> Users(duplicateFunction->user_begin(), + duplicateFunction->user_end()); for (std::vector<User*>::iterator UI = Users.begin(), UE = Users.end(); UI != UE; ++UI) if (CallInst *CI = dyn_cast<CallInst>(*UI)) @@ -162,9 +163,8 @@ bool PartialInliner::runOnModule(Module& M) { if (currFunc->use_empty()) continue; bool recursive = false; - for (Function::use_iterator UI = currFunc->use_begin(), - UE = currFunc->use_end(); UI != UE; ++UI) - if (Instruction* I = dyn_cast<Instruction>(*UI)) + for (User *U : currFunc->users()) + if (Instruction* I = dyn_cast<Instruction>(U)) if (I->getParent()->getParent() == currFunc) { recursive = true; break; diff --git a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index 24c5018..6d9d8be 100644 --- a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -17,7 +17,7 @@ #include "llvm-c/Transforms/PassManagerBuilder.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/Verifier.h" +#include "llvm/IR/Verifier.h" #include "llvm/PassManager.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ManagedStatic.h" @@ -33,11 +33,6 @@ RunLoopVectorization("vectorize-loops", cl::Hidden, cl::desc("Run the Loop vectorization passes")); static cl::opt<bool> -LateVectorization("late-vectorize", cl::init(true), cl::Hidden, - cl::desc("Run the vectorization pasess late in the pass " - "pipeline (after the inliner)")); - -static cl::opt<bool> RunSLPVectorization("vectorize-slp", cl::Hidden, cl::desc("Run the SLP vectorization passes")); @@ -58,18 +53,27 @@ static cl::opt<bool> RunLoopRerolling("reroll-loops", cl::Hidden, cl::desc("Run the loop rerolling pass")); +static cl::opt<bool> RunLoadCombine("combine-loads", cl::init(false), + cl::Hidden, + cl::desc("Run the load combining pass")); + +static cl::opt<bool> RunGVN("enable-gvn", cl::init(true), + cl::Hidden, + cl::desc("Run the global value numbering pass")); + PassManagerBuilder::PassManagerBuilder() { OptLevel = 2; SizeLevel = 0; - LibraryInfo = 0; - Inliner = 0; + LibraryInfo = nullptr; + Inliner = nullptr; + DisableTailCalls = false; DisableUnitAtATime = false; DisableUnrollLoops = false; BBVectorize = RunBBVectorization; SLPVectorize = RunSLPVectorization; LoopVectorize = RunLoopVectorization; - LateVectorize = LateVectorization; RerollLoops = RunLoopRerolling; + LoadCombine = RunLoadCombine; } PassManagerBuilder::~PassManagerBuilder() { @@ -134,7 +138,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { if (OptLevel == 0) { if (Inliner) { MPM.add(Inliner); - Inliner = 0; + Inliner = nullptr; } // FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC @@ -156,12 +160,13 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { if (!DisableUnitAtATime) { addExtensionsToPM(EP_ModuleOptimizerEarly, MPM); + MPM.add(createIPSCCPPass()); // IP SCCP MPM.add(createGlobalOptimizerPass()); // Optimize out global vars - MPM.add(createIPSCCPPass()); // IP SCCP MPM.add(createDeadArgEliminationPass()); // Dead argument elimination MPM.add(createInstructionCombiningPass());// Clean up after IPCP & DAE + addExtensionsToPM(EP_Peephole, MPM); MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE } @@ -170,7 +175,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { MPM.add(createPruneEHPass()); // Remove dead EH info if (Inliner) { MPM.add(Inliner); - Inliner = 0; + Inliner = nullptr; } if (!DisableUnitAtATime) MPM.add(createFunctionAttrsPass()); // Set readonly/readnone attrs @@ -188,11 +193,14 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals MPM.add(createCFGSimplificationPass()); // Merge & remove BBs MPM.add(createInstructionCombiningPass()); // Combine silly seq's + addExtensionsToPM(EP_Peephole, MPM); - MPM.add(createTailCallEliminationPass()); // Eliminate tail calls + if (!DisableTailCalls) + MPM.add(createTailCallEliminationPass()); // Eliminate tail calls MPM.add(createCFGSimplificationPass()); // Merge & remove BBs MPM.add(createReassociatePass()); // Reassociate expressions - MPM.add(createLoopRotatePass()); // Rotate Loop + // Rotate Loop - disable header duplication at -Oz + MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); MPM.add(createLICMPass()); // Hoist loop invariants MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3)); MPM.add(createInstructionCombiningPass()); @@ -200,21 +208,22 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { MPM.add(createLoopIdiomPass()); // Recognize idioms like memset. MPM.add(createLoopDeletionPass()); // Delete dead loops - if (!LateVectorize && LoopVectorize) - MPM.add(createLoopVectorizePass(DisableUnrollLoops)); - if (!DisableUnrollLoops) - MPM.add(createLoopUnrollPass()); // Unroll small loops + MPM.add(createSimpleLoopUnrollPass()); // Unroll small loops addExtensionsToPM(EP_LoopOptimizerEnd, MPM); - if (OptLevel > 1) - MPM.add(createGVNPass()); // Remove redundancies + if (OptLevel > 1) { + MPM.add(createMergedLoadStoreMotionPass()); // Merge load/stores in diamond + if (RunGVN) + MPM.add(createGVNPass()); // Remove redundancies + } MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset MPM.add(createSCCPPass()); // Constant prop with SCCP // Run instcombine after redundancy elimination to exploit opportunities // opened up by them. MPM.add(createInstructionCombiningPass()); + addExtensionsToPM(EP_Peephole, MPM); MPM.add(createJumpThreadingPass()); // Thread jumps MPM.add(createCorrelatedValuePropagationPass()); MPM.add(createDeadStoreEliminationPass()); // Delete dead stores @@ -229,6 +238,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { if (BBVectorize) { MPM.add(createBBVectorizePass()); MPM.add(createInstructionCombiningPass()); + addExtensionsToPM(EP_Peephole, MPM); if (OptLevel > 1 && UseGVNAfterVectorization) MPM.add(createGVNPass()); // Remove redundancies else @@ -239,25 +249,30 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { MPM.add(createLoopUnrollPass()); } + if (LoadCombine) + MPM.add(createLoadCombinePass()); + MPM.add(createAggressiveDCEPass()); // Delete dead instructions MPM.add(createCFGSimplificationPass()); // Merge & remove BBs MPM.add(createInstructionCombiningPass()); // Clean up after everything. + addExtensionsToPM(EP_Peephole, MPM); + + // FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC + // pass manager that we are specifically trying to avoid. To prevent this + // we must insert a no-op module pass to reset the pass manager. + MPM.add(createBarrierNoopPass()); + MPM.add(createLoopVectorizePass(DisableUnrollLoops, LoopVectorize)); + // FIXME: Because of #pragma vectorize enable, the passes below are always + // inserted in the pipeline, even when the vectorizer doesn't run (ex. when + // on -O1 and no #pragma is found). Would be good to have these two passes + // as function calls, so that we can only pass them when the vectorizer + // changed the code. + MPM.add(createInstructionCombiningPass()); + addExtensionsToPM(EP_Peephole, MPM); + MPM.add(createCFGSimplificationPass()); - // As an experimental mode, run any vectorization passes in a separate - // pipeline from the CGSCC pass manager that runs iteratively with the - // inliner. - if (LateVectorize && LoopVectorize) { - // FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC - // pass manager that we are specifically trying to avoid. To prevent this - // we must insert a no-op module pass to reset the pass manager. - MPM.add(createBarrierNoopPass()); - - // Add the various vectorization passes and relevant cleanup passes for - // them since we are no longer in the middle of the main scalar pipeline. - MPM.add(createLoopVectorizePass(DisableUnrollLoops)); - MPM.add(createInstructionCombiningPass()); - MPM.add(createCFGSimplificationPass()); - } + if (!DisableUnrollLoops) + MPM.add(createLoopUnrollPass()); // Unroll small loops if (!DisableUnitAtATime) { // FIXME: We shouldn't bother with this anymore. @@ -306,6 +321,7 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM, // function pointers. When this happens, we often have to resolve varargs // calls, etc, so let instcombine do this. PM.add(createInstructionCombiningPass()); + addExtensionsToPM(EP_Peephole, PM); // Inline small functions if (RunInliner) @@ -324,6 +340,7 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM, // The IPO passes may leave cruft around. Clean up after them. PM.add(createInstructionCombiningPass()); + addExtensionsToPM(EP_Peephole, PM); PM.add(createJumpThreadingPass()); // Break up allocas @@ -337,14 +354,27 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM, PM.add(createGlobalsModRefPass()); // IP alias analysis. PM.add(createLICMPass()); // Hoist loop invariants. + PM.add(createMergedLoadStoreMotionPass()); // Merge load/stores in diamonds PM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies. PM.add(createMemCpyOptPass()); // Remove dead memcpys. // Nuke dead stores. PM.add(createDeadStoreEliminationPass()); + // More loops are countable; try to optimize them. + PM.add(createIndVarSimplifyPass()); + PM.add(createLoopDeletionPass()); + PM.add(createLoopVectorizePass(true, true)); + + // More scalar chains could be vectorized due to more alias information + PM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. + + if (LoadCombine) + PM.add(createLoadCombinePass()); + // Cleanup and simplify the code after the scalar optimizations. PM.add(createInstructionCombiningPass()); + addExtensionsToPM(EP_Peephole, PM); PM.add(createJumpThreadingPass()); diff --git a/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp index b160913..b2c4a09 100644 --- a/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp +++ b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp @@ -14,22 +14,23 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "prune-eh" #include "llvm/Transforms/IPO.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CallGraphSCCPass.h" +#include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" -#include "llvm/Support/CFG.h" #include <algorithm> using namespace llvm; +#define DEBUG_TYPE "prune-eh" + STATISTIC(NumRemoved, "Number of invokes removed"); STATISTIC(NumUnreach, "Number of noreturn calls optimized"); @@ -41,7 +42,7 @@ namespace { } // runOnSCC - Analyze the SCC, performing the transformation if possible. - bool runOnSCC(CallGraphSCC &SCC); + bool runOnSCC(CallGraphSCC &SCC) override; bool SimplifyFunction(Function *F); void DeleteBasicBlock(BasicBlock *BB); @@ -51,7 +52,7 @@ namespace { char PruneEH::ID = 0; INITIALIZE_PASS_BEGIN(PruneEH, "prune-eh", "Remove unused exception handling info", false, false) -INITIALIZE_PASS_DEPENDENCY(CallGraph) +INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) INITIALIZE_PASS_END(PruneEH, "prune-eh", "Remove unused exception handling info", false, false) @@ -60,7 +61,7 @@ Pass *llvm::createPruneEHPass() { return new PruneEH(); } bool PruneEH::runOnSCC(CallGraphSCC &SCC) { SmallPtrSet<CallGraphNode *, 8> SCCNodes; - CallGraph &CG = getAnalysis<CallGraph>(); + CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); bool MadeChange = false; // Fill SCCNodes with the elements of the SCC. Used for quickly @@ -85,7 +86,7 @@ bool PruneEH::runOnSCC(CallGraphSCC &SCC) { for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); (!SCCMightUnwind || !SCCMightReturn) && I != E; ++I) { Function *F = (*I)->getFunction(); - if (F == 0) { + if (!F) { SCCMightUnwind = true; SCCMightReturn = true; } else if (F->isDeclaration() || F->mayBeOverridden()) { @@ -234,7 +235,7 @@ bool PruneEH::SimplifyFunction(Function *F) { /// exist in the BB. void PruneEH::DeleteBasicBlock(BasicBlock *BB) { assert(pred_begin(BB) == pred_end(BB) && "BB is not dead!"); - CallGraph &CG = getAnalysis<CallGraph>(); + CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); CallGraphNode *CGN = CG[BB->getParent()]; for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; ) { diff --git a/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp b/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp index f00830a..956991a 100644 --- a/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp +++ b/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp @@ -14,13 +14,14 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "strip-dead-prototypes" #include "llvm/Transforms/IPO.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" using namespace llvm; +#define DEBUG_TYPE "strip-dead-prototypes" + STATISTIC(NumDeadPrototypes, "Number of dead prototypes removed"); namespace { @@ -32,7 +33,7 @@ public: StripDeadPrototypesPass() : ModulePass(ID) { initializeStripDeadPrototypesPassPass(*PassRegistry::getPassRegistry()); } - virtual bool runOnModule(Module &M); + bool runOnModule(Module &M) override; }; } // end anonymous namespace diff --git a/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp index c4f5cfc..1abbccc 100644 --- a/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp +++ b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp @@ -23,8 +23,8 @@ #include "llvm/Transforms/IPO.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/DebugInfo.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" @@ -44,9 +44,9 @@ namespace { initializeStripSymbolsPass(*PassRegistry::getPassRegistry()); } - virtual bool runOnModule(Module &M); + bool runOnModule(Module &M) override; - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); } }; @@ -59,9 +59,9 @@ namespace { initializeStripNonDebugSymbolsPass(*PassRegistry::getPassRegistry()); } - virtual bool runOnModule(Module &M); + bool runOnModule(Module &M) override; - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); } }; @@ -74,9 +74,9 @@ namespace { initializeStripDebugDeclarePass(*PassRegistry::getPassRegistry()); } - virtual bool runOnModule(Module &M); + bool runOnModule(Module &M) override; - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); } }; @@ -89,9 +89,9 @@ namespace { initializeStripDeadDebugInfoPass(*PassRegistry::getPassRegistry()); } - virtual bool runOnModule(Module &M); + bool runOnModule(Module &M) override; - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); } }; @@ -132,11 +132,10 @@ ModulePass *llvm::createStripDeadDebugInfoPass() { /// OnlyUsedBy - Return true if V is only used by Usr. static bool OnlyUsedBy(Value *V, Value *Usr) { - for(Value::use_iterator I = V->use_begin(), E = V->use_end(); I != E; ++I) { - User *U = *I; + for (User *U : V->users()) if (U != Usr) return false; - } + return true; } @@ -147,7 +146,7 @@ static void RemoveDeadConstant(Constant *C) { if (OnlyUsedBy(C->getOperand(i), C)) Operands.insert(cast<Constant>(C->getOperand(i))); if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) { - if (!GV->hasLocalLinkage()) return; // Don't delete non static globals. + if (!GV->hasLocalLinkage()) return; // Don't delete non-static globals. GV->eraseFromParent(); } else if (!isa<Function>(C)) @@ -193,7 +192,7 @@ static void StripTypeNames(Module &M, bool PreserveDbgInfo) { /// Find values that are marked as llvm.used. static void findUsedValues(GlobalVariable *LLVMUsed, SmallPtrSet<const GlobalValue*, 8> &UsedValues) { - if (LLVMUsed == 0) return; + if (!LLVMUsed) return; UsedValues.insert(LLVMUsed); ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer()); @@ -250,7 +249,7 @@ bool StripDebugDeclare::runOnModule(Module &M) { if (Declare) { while (!Declare->use_empty()) { - CallInst *CI = cast<CallInst>(Declare->use_back()); + CallInst *CI = cast<CallInst>(Declare->user_back()); Value *Arg1 = CI->getArgOperand(0); Value *Arg2 = CI->getArgOperand(1); assert(CI->use_empty() && "llvm.dbg intrinsic should have void result"); @@ -307,10 +306,7 @@ bool StripDeadDebugInfo::runOnModule(Module &M) { SmallVector<Value *, 64> LiveSubprograms; DenseSet<const MDNode *> VisitedSet; - for (DebugInfoFinder::iterator CI = F.compile_unit_begin(), - CE = F.compile_unit_end(); CI != CE; ++CI) { - // Create our compile unit. - DICompileUnit DIC(*CI); + for (DICompileUnit DIC : F.compile_units()) { assert(DIC.Verify() && "DIC must verify as a DICompileUnit."); // Create our live subprogram list. |