diff options
Diffstat (limited to 'contrib/llvm/lib/Transforms/Utils')
38 files changed, 3287 insertions, 2499 deletions
diff --git a/contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp b/contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp new file mode 100644 index 0000000..cce016a --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp @@ -0,0 +1,114 @@ +//===-- ASanStackFrameLayout.cpp - helper for AddressSanitizer ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Definition of ComputeASanStackFrameLayout (see ASanStackFrameLayout.h). +// +//===----------------------------------------------------------------------===// +#include "llvm/Transforms/Utils/ASanStackFrameLayout.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> + +namespace llvm { + +// We sort the stack variables by alignment (largest first) to minimize +// unnecessary large gaps due to alignment. +// It is tempting to also sort variables by size so that larger variables +// have larger redzones at both ends. But reordering will make report analysis +// harder, especially when temporary unnamed variables are present. +// So, until we can provide more information (type, line number, etc) +// for the stack variables we avoid reordering them too much. +static inline bool CompareVars(const ASanStackVariableDescription &a, + const ASanStackVariableDescription &b) { + return a.Alignment > b.Alignment; +} + +// We also force minimal alignment for all vars to kMinAlignment so that vars +// with e.g. alignment 1 and alignment 16 do not get reordered by CompareVars. +static const size_t kMinAlignment = 16; + +static size_t RoundUpTo(size_t X, size_t RoundTo) { + assert((RoundTo & (RoundTo - 1)) == 0); + return (X + RoundTo - 1) & ~(RoundTo - 1); +} + +// The larger the variable Size the larger is the redzone. +// The resulting frame size is a multiple of Alignment. +static size_t VarAndRedzoneSize(size_t Size, size_t Alignment) { + size_t Res = 0; + if (Size <= 4) Res = 16; + else if (Size <= 16) Res = 32; + else if (Size <= 128) Res = Size + 32; + else if (Size <= 512) Res = Size + 64; + else if (Size <= 4096) Res = Size + 128; + else Res = Size + 256; + return RoundUpTo(Res, Alignment); +} + +void +ComputeASanStackFrameLayout(SmallVectorImpl<ASanStackVariableDescription> &Vars, + size_t Granularity, size_t MinHeaderSize, + ASanStackFrameLayout *Layout) { + assert(Granularity >= 8 && Granularity <= 64 && + (Granularity & (Granularity - 1)) == 0); + assert(MinHeaderSize >= 16 && (MinHeaderSize & (MinHeaderSize - 1)) == 0 && + MinHeaderSize >= Granularity); + size_t NumVars = Vars.size(); + assert(NumVars > 0); + for (size_t i = 0; i < NumVars; i++) + Vars[i].Alignment = std::max(Vars[i].Alignment, kMinAlignment); + + std::stable_sort(Vars.begin(), Vars.end(), CompareVars); + SmallString<2048> StackDescriptionStorage; + raw_svector_ostream StackDescription(StackDescriptionStorage); + StackDescription << NumVars; + Layout->FrameAlignment = std::max(Granularity, Vars[0].Alignment); + SmallVector<uint8_t, 64> &SB(Layout->ShadowBytes); + SB.clear(); + size_t Offset = std::max(std::max(MinHeaderSize, Granularity), + Vars[0].Alignment); + assert((Offset % Granularity) == 0); + SB.insert(SB.end(), Offset / Granularity, kAsanStackLeftRedzoneMagic); + for (size_t i = 0; i < NumVars; i++) { + bool IsLast = i == NumVars - 1; + size_t Alignment = std::max(Granularity, Vars[i].Alignment); + (void)Alignment; // Used only in asserts. + size_t Size = Vars[i].Size; + const char *Name = Vars[i].Name; + assert((Alignment & (Alignment - 1)) == 0); + assert(Layout->FrameAlignment >= Alignment); + assert((Offset % Alignment) == 0); + assert(Size > 0); + StackDescription << " " << Offset << " " << Size << " " << strlen(Name) + << " " << Name; + size_t NextAlignment = IsLast ? Granularity + : std::max(Granularity, Vars[i + 1].Alignment); + size_t SizeWithRedzone = VarAndRedzoneSize(Vars[i].Size, NextAlignment); + SB.insert(SB.end(), Size / Granularity, 0); + if (Size % Granularity) + SB.insert(SB.end(), Size % Granularity); + SB.insert(SB.end(), (SizeWithRedzone - Size) / Granularity, + IsLast ? kAsanStackRightRedzoneMagic + : kAsanStackMidRedzoneMagic); + Vars[i].Offset = Offset; + Offset += SizeWithRedzone; + } + if (Offset % MinHeaderSize) { + size_t ExtraRedzone = MinHeaderSize - (Offset % MinHeaderSize); + SB.insert(SB.end(), ExtraRedzone / Granularity, + kAsanStackRightRedzoneMagic); + Offset += ExtraRedzone; + } + Layout->DescriptionString = StackDescription.str(); + Layout->FrameSize = Offset; + assert((Layout->FrameSize % MinHeaderSize) == 0); + assert(Layout->FrameSize / Granularity == Layout->ShadowBytes.size()); +} + +} // llvm namespace diff --git a/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp b/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp new file mode 100644 index 0000000..196ac79 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp @@ -0,0 +1,222 @@ +//===- AddDiscriminators.cpp - Insert DWARF path discriminators -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file adds DWARF discriminators to the IR. Path discriminators are +// used to decide what CFG path was taken inside sub-graphs whose instructions +// share the same line and column number information. +// +// The main user of this is the sample profiler. Instruction samples are +// mapped to line number information. Since a single line may be spread +// out over several basic blocks, discriminators add more precise location +// for the samples. +// +// For example, +// +// 1 #define ASSERT(P) +// 2 if (!(P)) +// 3 abort() +// ... +// 100 while (true) { +// 101 ASSERT (sum < 0); +// 102 ... +// 130 } +// +// when converted to IR, this snippet looks something like: +// +// while.body: ; preds = %entry, %if.end +// %0 = load i32* %sum, align 4, !dbg !15 +// %cmp = icmp slt i32 %0, 0, !dbg !15 +// br i1 %cmp, label %if.end, label %if.then, !dbg !15 +// +// if.then: ; preds = %while.body +// call void @abort(), !dbg !15 +// br label %if.end, !dbg !15 +// +// Notice that all the instructions in blocks 'while.body' and 'if.then' +// have exactly the same debug information. When this program is sampled +// at runtime, the profiler will assume that all these instructions are +// equally frequent. This, in turn, will consider the edge while.body->if.then +// to be frequently taken (which is incorrect). +// +// By adding a discriminator value to the instructions in block 'if.then', +// we can distinguish instructions at line 101 with discriminator 0 from +// the instructions at line 101 with discriminator 1. +// +// For more details about DWARF discriminators, please visit +// http://wiki.dwarfstd.org/index.php?title=Path_Discriminators +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DIBuilder.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define DEBUG_TYPE "add-discriminators" + +namespace { + struct AddDiscriminators : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + AddDiscriminators() : FunctionPass(ID) { + initializeAddDiscriminatorsPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; + }; +} + +char AddDiscriminators::ID = 0; +INITIALIZE_PASS_BEGIN(AddDiscriminators, "add-discriminators", + "Add DWARF path discriminators", false, false) +INITIALIZE_PASS_END(AddDiscriminators, "add-discriminators", + "Add DWARF path discriminators", false, false) + +// Command line option to disable discriminator generation even in the +// presence of debug information. This is only needed when debugging +// debug info generation issues. +static cl::opt<bool> +NoDiscriminators("no-discriminators", cl::init(false), + cl::desc("Disable generation of discriminator information.")); + +FunctionPass *llvm::createAddDiscriminatorsPass() { + return new AddDiscriminators(); +} + +static bool hasDebugInfo(const Function &F) { + NamedMDNode *CUNodes = F.getParent()->getNamedMetadata("llvm.dbg.cu"); + return CUNodes != nullptr; +} + +/// \brief Assign DWARF discriminators. +/// +/// To assign discriminators, we examine the boundaries of every +/// basic block and its successors. Suppose there is a basic block B1 +/// with successor B2. The last instruction I1 in B1 and the first +/// instruction I2 in B2 are located at the same file and line number. +/// This situation is illustrated in the following code snippet: +/// +/// if (i < 10) x = i; +/// +/// entry: +/// br i1 %cmp, label %if.then, label %if.end, !dbg !10 +/// if.then: +/// %1 = load i32* %i.addr, align 4, !dbg !10 +/// store i32 %1, i32* %x, align 4, !dbg !10 +/// br label %if.end, !dbg !10 +/// if.end: +/// ret void, !dbg !12 +/// +/// Notice how the branch instruction in block 'entry' and all the +/// instructions in block 'if.then' have the exact same debug location +/// information (!dbg !10). +/// +/// To distinguish instructions in block 'entry' from instructions in +/// block 'if.then', we generate a new lexical block for all the +/// instruction in block 'if.then' that share the same file and line +/// location with the last instruction of block 'entry'. +/// +/// This new lexical block will have the same location information as +/// the previous one, but with a new DWARF discriminator value. +/// +/// One of the main uses of this discriminator value is in runtime +/// sample profilers. It allows the profiler to distinguish instructions +/// at location !dbg !10 that execute on different basic blocks. This is +/// important because while the predicate 'if (x < 10)' may have been +/// executed millions of times, the assignment 'x = i' may have only +/// executed a handful of times (meaning that the entry->if.then edge is +/// seldom taken). +/// +/// If we did not have discriminator information, the profiler would +/// assign the same weight to both blocks 'entry' and 'if.then', which +/// in turn will make it conclude that the entry->if.then edge is very +/// hot. +/// +/// To decide where to create new discriminator values, this function +/// traverses the CFG and examines instruction at basic block boundaries. +/// If the last instruction I1 of a block B1 is at the same file and line +/// location as instruction I2 of successor B2, then it creates a new +/// lexical block for I2 and all the instruction in B2 that share the same +/// file and line location as I2. This new lexical block will have a +/// different discriminator number than I1. +bool AddDiscriminators::runOnFunction(Function &F) { + // If the function has debug information, but the user has disabled + // discriminators, do nothing. + // Simlarly, if the function has no debug info, do nothing. + // Finally, if this module is built with dwarf versions earlier than 4, + // do nothing (discriminator support is a DWARF 4 feature). + if (NoDiscriminators || + !hasDebugInfo(F) || + F.getParent()->getDwarfVersion() < 4) + return false; + + bool Changed = false; + Module *M = F.getParent(); + LLVMContext &Ctx = M->getContext(); + DIBuilder Builder(*M); + + // Traverse all the blocks looking for instructions in different + // blocks that are at the same file:line location. + for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { + BasicBlock *B = I; + TerminatorInst *Last = B->getTerminator(); + DebugLoc LastLoc = Last->getDebugLoc(); + if (LastLoc.isUnknown()) continue; + DILocation LastDIL(LastLoc.getAsMDNode(Ctx)); + + for (unsigned I = 0; I < Last->getNumSuccessors(); ++I) { + BasicBlock *Succ = Last->getSuccessor(I); + Instruction *First = Succ->getFirstNonPHIOrDbgOrLifetime(); + DebugLoc FirstLoc = First->getDebugLoc(); + if (FirstLoc.isUnknown()) continue; + DILocation FirstDIL(FirstLoc.getAsMDNode(Ctx)); + + // If the first instruction (First) of Succ is at the same file + // location as B's last instruction (Last), add a new + // discriminator for First's location and all the instructions + // in Succ that share the same location with First. + if (FirstDIL.atSameLineAs(LastDIL)) { + // Create a new lexical scope and compute a new discriminator + // number for it. + StringRef Filename = FirstDIL.getFilename(); + unsigned LineNumber = FirstDIL.getLineNumber(); + unsigned ColumnNumber = FirstDIL.getColumnNumber(); + DIScope Scope = FirstDIL.getScope(); + DIFile File = Builder.createFile(Filename, Scope.getDirectory()); + unsigned Discriminator = FirstDIL.computeNewDiscriminator(Ctx); + DILexicalBlock NewScope = Builder.createLexicalBlock( + Scope, File, LineNumber, ColumnNumber, Discriminator); + DILocation NewDIL = FirstDIL.copyWithNewScope(Ctx, NewScope); + DebugLoc newDebugLoc = DebugLoc::getFromDILocation(NewDIL); + + // Attach this new debug location to First and every + // instruction following First that shares the same location. + for (BasicBlock::iterator I1(*First), E1 = Succ->end(); I1 != E1; + ++I1) { + if (I1->getDebugLoc() != FirstLoc) break; + I1->setDebugLoc(newDebugLoc); + DEBUG(dbgs() << NewDIL.getFilename() << ":" << NewDIL.getLineNumber() + << ":" << NewDIL.getColumnNumber() << ":" + << NewDIL.getDiscriminator() << *I1 << "\n"); + } + DEBUG(dbgs() << "\n"); + Changed = true; + } + } + } + return Changed; +} diff --git a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp index 12de9ee..602e8ba 100644 --- a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -15,17 +15,17 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CFG.h" -#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/IR/Constant.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Type.h" +#include "llvm/IR/ValueHandle.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/ValueHandle.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" #include <algorithm> @@ -68,8 +68,8 @@ void llvm::DeleteDeadBlock(BasicBlock *BB) { void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, Pass *P) { if (!isa<PHINode>(BB->begin())) return; - AliasAnalysis *AA = 0; - MemoryDependenceAnalysis *MemDep = 0; + AliasAnalysis *AA = nullptr; + MemoryDependenceAnalysis *MemDep = nullptr; if (P) { AA = P->getAnalysisIfAvailable<AliasAnalysis>(); MemDep = P->getAnalysisIfAvailable<MemoryDependenceAnalysis>(); @@ -130,7 +130,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) { BasicBlock *OnlySucc = BB; for (; SI != SE; ++SI) if (*SI != OnlySucc) { - OnlySucc = 0; // There are multiple distinct successors! + OnlySucc = nullptr; // There are multiple distinct successors! break; } @@ -167,15 +167,17 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) { // Finally, erase the old block and update dominator info. if (P) { - if (DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>()) { - if (DomTreeNode *DTN = DT->getNode(BB)) { - DomTreeNode *PredDTN = DT->getNode(PredBB); + if (DominatorTreeWrapperPass *DTWP = + P->getAnalysisIfAvailable<DominatorTreeWrapperPass>()) { + DominatorTree &DT = DTWP->getDomTree(); + if (DomTreeNode *DTN = DT.getNode(BB)) { + DomTreeNode *PredDTN = DT.getNode(PredBB); SmallVector<DomTreeNode*, 8> Children(DTN->begin(), DTN->end()); for (SmallVectorImpl<DomTreeNode *>::iterator DI = Children.begin(), DE = Children.end(); DI != DE; ++DI) - DT->changeImmediateDominator(*DI, PredDTN); + DT.changeImmediateDominator(*DI, PredDTN); - DT->eraseNode(BB); + DT.eraseNode(BB); } if (LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>()) @@ -215,7 +217,7 @@ void llvm::ReplaceInstWithValue(BasicBlock::InstListType &BIL, /// void llvm::ReplaceInstWithInst(BasicBlock::InstListType &BIL, BasicBlock::iterator &BI, Instruction *I) { - assert(I->getParent() == 0 && + assert(I->getParent() == nullptr && "ReplaceInstWithInst: Instruction already inserted into basic block!"); // Insert the new instruction into the basic block... @@ -252,7 +254,7 @@ BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) { // If the successor only has a single pred, split the top of the successor // block. assert(SP == BB && "CFG broken"); - SP = NULL; + SP = nullptr; return SplitBlock(Succ, Succ->begin(), P); } @@ -280,18 +282,20 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) { if (Loop *L = LI->getLoopFor(Old)) L->addBasicBlockToLoop(New, LI->getBase()); - if (DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>()) { + if (DominatorTreeWrapperPass *DTWP = + P->getAnalysisIfAvailable<DominatorTreeWrapperPass>()) { + DominatorTree &DT = DTWP->getDomTree(); // Old dominates New. New node dominates all other nodes dominated by Old. - if (DomTreeNode *OldNode = DT->getNode(Old)) { + if (DomTreeNode *OldNode = DT.getNode(Old)) { std::vector<DomTreeNode *> Children; for (DomTreeNode::iterator I = OldNode->begin(), E = OldNode->end(); I != E; ++I) Children.push_back(*I); - DomTreeNode *NewNode = DT->addNewBlock(New,Old); + DomTreeNode *NewNode = DT.addNewBlock(New, Old); for (std::vector<DomTreeNode *>::iterator I = Children.begin(), E = Children.end(); I != E; ++I) - DT->changeImmediateDominator(*I, NewNode); + DT.changeImmediateDominator(*I, NewNode); } } @@ -306,7 +310,7 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB, if (!P) return; LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>(); - Loop *L = LI ? LI->getLoopFor(OldBB) : 0; + Loop *L = LI ? LI->getLoopFor(OldBB) : nullptr; // If we need to preserve loop analyses, collect some information about how // this split will affect loops. @@ -336,9 +340,9 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB, } // Update dominator tree if available. - DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>(); - if (DT) - DT->splitBlock(NewBB); + if (DominatorTreeWrapperPass *DTWP = + P->getAnalysisIfAvailable<DominatorTreeWrapperPass>()) + DTWP->getDomTree().splitBlock(NewBB); if (!L) return; @@ -347,7 +351,7 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB, // loop). To find this, examine each of the predecessors and determine which // loops enclose them, and select the most-nested loop which contains the // loop containing the block being split. - Loop *InnermostPredLoop = 0; + Loop *InnermostPredLoop = nullptr; for (ArrayRef<BasicBlock*>::iterator i = Preds.begin(), e = Preds.end(); i != e; ++i) { BasicBlock *Pred = *i; @@ -380,51 +384,68 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB, ArrayRef<BasicBlock*> Preds, BranchInst *BI, Pass *P, bool HasLoopExit) { // Otherwise, create a new PHI node in NewBB for each PHI node in OrigBB. - AliasAnalysis *AA = P ? P->getAnalysisIfAvailable<AliasAnalysis>() : 0; + AliasAnalysis *AA = P ? P->getAnalysisIfAvailable<AliasAnalysis>() : nullptr; + SmallPtrSet<BasicBlock *, 16> PredSet(Preds.begin(), Preds.end()); for (BasicBlock::iterator I = OrigBB->begin(); isa<PHINode>(I); ) { PHINode *PN = cast<PHINode>(I++); // Check to see if all of the values coming in are the same. If so, we // don't need to create a new PHI node, unless it's needed for LCSSA. - Value *InVal = 0; + Value *InVal = nullptr; if (!HasLoopExit) { InVal = PN->getIncomingValueForBlock(Preds[0]); - for (unsigned i = 1, e = Preds.size(); i != e; ++i) - if (InVal != PN->getIncomingValueForBlock(Preds[i])) { - InVal = 0; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + if (!PredSet.count(PN->getIncomingBlock(i))) + continue; + if (!InVal) + InVal = PN->getIncomingValue(i); + else if (InVal != PN->getIncomingValue(i)) { + InVal = nullptr; break; } + } } if (InVal) { // If all incoming values for the new PHI would be the same, just don't // make a new PHI. Instead, just remove the incoming values from the old // PHI. - for (unsigned i = 0, e = Preds.size(); i != e; ++i) { - // Explicitly check the BB index here to handle duplicates in Preds. - int Idx = PN->getBasicBlockIndex(Preds[i]); - if (Idx >= 0) - PN->removeIncomingValue(Idx, false); - } - } else { - // If the values coming into the block are not the same, we need a PHI. - // Create the new PHI node, insert it into NewBB at the end of the block - PHINode *NewPHI = - PHINode::Create(PN->getType(), Preds.size(), PN->getName() + ".ph", BI); - if (AA) AA->copyValue(PN, NewPHI); - // Move all of the PHI values for 'Preds' to the new PHI. - for (unsigned i = 0, e = Preds.size(); i != e; ++i) { - Value *V = PN->removeIncomingValue(Preds[i], false); - NewPHI->addIncoming(V, Preds[i]); - } + // NOTE! This loop walks backwards for a reason! First off, this minimizes + // the cost of removal if we end up removing a large number of values, and + // second off, this ensures that the indices for the incoming values + // aren't invalidated when we remove one. + for (int64_t i = PN->getNumIncomingValues() - 1; i >= 0; --i) + if (PredSet.count(PN->getIncomingBlock(i))) + PN->removeIncomingValue(i, false); + + // Add an incoming value to the PHI node in the loop for the preheader + // edge. + PN->addIncoming(InVal, NewBB); + continue; + } - InVal = NewPHI; + // If the values coming into the block are not the same, we need a new + // PHI. + // Create the new PHI node, insert it into NewBB at the end of the block + PHINode *NewPHI = + PHINode::Create(PN->getType(), Preds.size(), PN->getName() + ".ph", BI); + if (AA) + AA->copyValue(PN, NewPHI); + + // NOTE! This loop walks backwards for a reason! First off, this minimizes + // the cost of removal if we end up removing a large number of values, and + // second off, this ensures that the indices for the incoming values aren't + // invalidated when we remove one. + for (int64_t i = PN->getNumIncomingValues() - 1; i >= 0; --i) { + BasicBlock *IncomingBB = PN->getIncomingBlock(i); + if (PredSet.count(IncomingBB)) { + Value *V = PN->removeIncomingValue(i, false); + NewPHI->addIncoming(V, IncomingBB); + } } - // Add an incoming value to the PHI node in the loop for the preheader - // edge. - PN->addIncoming(InVal, NewBB); + PN->addIncoming(NewPHI, NewBB); } } @@ -538,7 +559,7 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB, e = pred_end(OrigBB); } - BasicBlock *NewBB2 = 0; + BasicBlock *NewBB2 = nullptr; if (!NewBB2Preds.empty()) { // Create another basic block for the rest of OrigBB's predecessors. NewBB2 = BasicBlock::Create(OrigBB->getContext(), @@ -603,7 +624,7 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, for (User::op_iterator i = NewRet->op_begin(), e = NewRet->op_end(); i != e; ++i) { Value *V = *i; - Instruction *NewBC = 0; + Instruction *NewBC = nullptr; if (BitCastInst *BCI = dyn_cast<BitCastInst>(V)) { // Return value might be bitcasted. Clone and insert it before the // return instruction. @@ -630,28 +651,30 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, } /// SplitBlockAndInsertIfThen - Split the containing block at the -/// specified instruction - everything before and including Cmp stays -/// in the old basic block, and everything after Cmp is moved to a +/// specified instruction - everything before and including SplitBefore stays +/// in the old basic block, and everything after SplitBefore is moved to a /// new block. The two blocks are connected by a conditional branch /// (with value of Cmp being the condition). /// Before: /// Head -/// Cmp +/// SplitBefore /// Tail /// After: /// Head -/// Cmp -/// if (Cmp) +/// if (Cond) /// ThenBlock +/// SplitBefore /// Tail /// /// If Unreachable is true, then ThenBlock ends with /// UnreachableInst, otherwise it branches to Tail. /// Returns the NewBasicBlock's terminator. -TerminatorInst *llvm::SplitBlockAndInsertIfThen(Instruction *Cmp, - bool Unreachable, MDNode *BranchWeights) { - Instruction *SplitBefore = Cmp->getNextNode(); +TerminatorInst *llvm::SplitBlockAndInsertIfThen(Value *Cond, + Instruction *SplitBefore, + bool Unreachable, + MDNode *BranchWeights, + DominatorTree *DT) { BasicBlock *Head = SplitBefore->getParent(); BasicBlock *Tail = Head->splitBasicBlock(SplitBefore); TerminatorInst *HeadOldTerm = Head->getTerminator(); @@ -662,13 +685,65 @@ TerminatorInst *llvm::SplitBlockAndInsertIfThen(Instruction *Cmp, CheckTerm = new UnreachableInst(C, ThenBlock); else CheckTerm = BranchInst::Create(Tail, ThenBlock); + CheckTerm->setDebugLoc(SplitBefore->getDebugLoc()); BranchInst *HeadNewTerm = - BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/Tail, Cmp); + BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/Tail, Cond); + HeadNewTerm->setDebugLoc(SplitBefore->getDebugLoc()); HeadNewTerm->setMetadata(LLVMContext::MD_prof, BranchWeights); ReplaceInstWithInst(HeadOldTerm, HeadNewTerm); + + if (DT) { + if (DomTreeNode *OldNode = DT->getNode(Head)) { + std::vector<DomTreeNode *> Children(OldNode->begin(), OldNode->end()); + + DomTreeNode *NewNode = DT->addNewBlock(Tail, Head); + for (auto Child : Children) + DT->changeImmediateDominator(Child, NewNode); + + // Head dominates ThenBlock. + DT->addNewBlock(ThenBlock, Head); + } + } + return CheckTerm; } +/// SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, +/// but also creates the ElseBlock. +/// Before: +/// Head +/// SplitBefore +/// Tail +/// After: +/// Head +/// if (Cond) +/// ThenBlock +/// else +/// ElseBlock +/// SplitBefore +/// Tail +void llvm::SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore, + TerminatorInst **ThenTerm, + TerminatorInst **ElseTerm, + MDNode *BranchWeights) { + BasicBlock *Head = SplitBefore->getParent(); + BasicBlock *Tail = Head->splitBasicBlock(SplitBefore); + TerminatorInst *HeadOldTerm = Head->getTerminator(); + LLVMContext &C = Head->getContext(); + BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail); + BasicBlock *ElseBlock = BasicBlock::Create(C, "", Head->getParent(), Tail); + *ThenTerm = BranchInst::Create(Tail, ThenBlock); + (*ThenTerm)->setDebugLoc(SplitBefore->getDebugLoc()); + *ElseTerm = BranchInst::Create(Tail, ElseBlock); + (*ElseTerm)->setDebugLoc(SplitBefore->getDebugLoc()); + BranchInst *HeadNewTerm = + BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/ElseBlock, Cond); + HeadNewTerm->setDebugLoc(SplitBefore->getDebugLoc()); + HeadNewTerm->setMetadata(LLVMContext::MD_prof, BranchWeights); + ReplaceInstWithInst(HeadOldTerm, HeadNewTerm); +} + + /// GetIfCondition - Given a basic block (BB) with two predecessors, /// check to see if the merge at this block is due /// to an "if condition". If so, return the boolean condition that determines @@ -681,32 +756,32 @@ TerminatorInst *llvm::SplitBlockAndInsertIfThen(Instruction *Cmp, Value *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, BasicBlock *&IfFalse) { PHINode *SomePHI = dyn_cast<PHINode>(BB->begin()); - BasicBlock *Pred1 = NULL; - BasicBlock *Pred2 = NULL; + BasicBlock *Pred1 = nullptr; + BasicBlock *Pred2 = nullptr; if (SomePHI) { if (SomePHI->getNumIncomingValues() != 2) - return NULL; + return nullptr; Pred1 = SomePHI->getIncomingBlock(0); Pred2 = SomePHI->getIncomingBlock(1); } else { pred_iterator PI = pred_begin(BB), PE = pred_end(BB); if (PI == PE) // No predecessor - return NULL; + return nullptr; Pred1 = *PI++; if (PI == PE) // Only one predecessor - return NULL; + return nullptr; Pred2 = *PI++; if (PI != PE) // More than two predecessors - return NULL; + return nullptr; } // We can only handle branches. Other control flow will be lowered to // branches if possible anyway. BranchInst *Pred1Br = dyn_cast<BranchInst>(Pred1->getTerminator()); BranchInst *Pred2Br = dyn_cast<BranchInst>(Pred2->getTerminator()); - if (Pred1Br == 0 || Pred2Br == 0) - return 0; + if (!Pred1Br || !Pred2Br) + return nullptr; // Eliminate code duplication by ensuring that Pred1Br is conditional if // either are. @@ -716,7 +791,7 @@ Value *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, // required anyway, we stand no chance of eliminating it, so the xform is // probably not profitable. if (Pred1Br->isConditional()) - return 0; + return nullptr; std::swap(Pred1, Pred2); std::swap(Pred1Br, Pred2Br); @@ -726,8 +801,8 @@ Value *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, // The only thing we have to watch out for here is to make sure that Pred2 // doesn't have incoming edges from other blocks. If it does, the condition // doesn't dominate BB. - if (Pred2->getSinglePredecessor() == 0) - return 0; + if (!Pred2->getSinglePredecessor()) + return nullptr; // If we found a conditional branch predecessor, make sure that it branches // to BB and Pred2Br. If it doesn't, this isn't an "if statement". @@ -742,7 +817,7 @@ Value *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, } else { // We know that one arm of the conditional goes to BB, so the other must // go somewhere unrelated, and this must not be an "if statement". - return 0; + return nullptr; } return Pred1Br->getCondition(); @@ -752,12 +827,12 @@ Value *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, // BB. Don't panic! If both blocks only have a single (identical) // predecessor, and THAT is a conditional branch, then we're all ok! BasicBlock *CommonPred = Pred1->getSinglePredecessor(); - if (CommonPred == 0 || CommonPred != Pred2->getSinglePredecessor()) - return 0; + if (CommonPred == nullptr || CommonPred != Pred2->getSinglePredecessor()) + return nullptr; // Otherwise, if this is a conditional branch, then we can use it! BranchInst *BI = dyn_cast<BranchInst>(CommonPred->getTerminator()); - if (BI == 0) return 0; + if (!BI) return nullptr; assert(BI->isConditional() && "Two successors but not conditional?"); if (BI->getSuccessor(0) == Pred1) { diff --git a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp index 0e7f7f7..80bd516 100644 --- a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -15,21 +15,22 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "break-crit-edges" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/CFG.h" -#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Type.h" -#include "llvm/Support/CFG.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" using namespace llvm; +#define DEBUG_TYPE "break-crit-edges" + STATISTIC(NumBroken, "Number of blocks inserted"); namespace { @@ -39,10 +40,10 @@ namespace { initializeBreakCriticalEdgesPass(*PassRegistry::getPassRegistry()); } - virtual bool runOnFunction(Function &F); + bool runOnFunction(Function &F) override; - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addPreserved<DominatorTree>(); + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addPreserved<DominatorTreeWrapperPass>(); AU.addPreserved<LoopInfo>(); // No loop canonicalization guarantees are broken by this pass. @@ -141,7 +142,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, Pass *P, bool MergeIdenticalEdges, bool DontDeleteUselessPhis, bool SplitLandingPads) { - if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return 0; + if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return nullptr; assert(!isa<IndirectBrInst>(TI) && "Cannot split critical edge from IndirectBrInst"); @@ -151,7 +152,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, // Splitting the critical edge to a landing pad block is non-trivial. Don't do // it in this generic function. - if (DestBB->isLandingPad()) return 0; + if (DestBB->isLandingPad()) return nullptr; // Create a new basic block, linking it into the CFG. BasicBlock *NewBB = BasicBlock::Create(TI->getContext(), @@ -207,13 +208,15 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, // If we don't have a pass object, we can't update anything... - if (P == 0) return NewBB; + if (!P) return NewBB; - DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>(); + DominatorTreeWrapperPass *DTWP = + P->getAnalysisIfAvailable<DominatorTreeWrapperPass>(); + DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>(); // If we have nothing to update, just return. - if (DT == 0 && LI == 0) + if (!DT && !LI) return NewBB; // Now update analysis information. Since the only predecessor of NewBB is @@ -249,7 +252,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, // if (TINode) { // Don't break unreachable code! DomTreeNode *NewBBNode = DT->addNewBlock(NewBB, TIBB); - DomTreeNode *DestBBNode = 0; + DomTreeNode *DestBBNode = nullptr; // If NewBBDominatesDestBB hasn't been computed yet, do so with DT. if (!OtherPreds.empty()) { @@ -297,9 +300,8 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, P->addBasicBlockToLoop(NewBB, LI->getBase()); } } - // If TIBB is in a loop and DestBB is outside of that loop, split the - // other exit blocks of the loop that also have predecessors outside - // the loop, to maintain a LoopSimplify guarantee. + // If TIBB is in a loop and DestBB is outside of that loop, we may need + // to update LoopSimplify form and LCSSA form. if (!TIL->contains(DestBB) && P->mustPreserveAnalysisID(LoopSimplifyID)) { assert(!TIL->contains(NewBB) && @@ -309,50 +311,35 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, if (P->mustPreserveAnalysisID(LCSSAID)) createPHIsForSplitLoopExit(TIBB, NewBB, DestBB); - // For each unique exit block... - // FIXME: This code is functionally equivalent to the corresponding - // loop in LoopSimplify. - SmallVector<BasicBlock *, 4> ExitBlocks; - TIL->getExitBlocks(ExitBlocks); - for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { - // Collect all the preds that are inside the loop, and note - // whether there are any preds outside the loop. - SmallVector<BasicBlock *, 4> Preds; - bool HasPredOutsideOfLoop = false; - BasicBlock *Exit = ExitBlocks[i]; - for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); - I != E; ++I) { - BasicBlock *P = *I; - if (TIL->contains(P)) { - if (isa<IndirectBrInst>(P->getTerminator())) { - Preds.clear(); - break; - } - Preds.push_back(P); - } else { - HasPredOutsideOfLoop = true; - } - } - // If there are any preds not in the loop, we'll need to split - // the edges. The Preds.empty() check is needed because a block - // may appear multiple times in the list. We can't use - // getUniqueExitBlocks above because that depends on LoopSimplify - // form, which we're in the process of restoring! - if (!Preds.empty() && HasPredOutsideOfLoop) { - if (!Exit->isLandingPad()) { - BasicBlock *NewExitBB = - SplitBlockPredecessors(Exit, Preds, "split", P); - if (P->mustPreserveAnalysisID(LCSSAID)) - createPHIsForSplitLoopExit(Preds, NewExitBB, Exit); - } else if (SplitLandingPads) { - SmallVector<BasicBlock*, 8> NewBBs; - SplitLandingPadPredecessors(Exit, Preds, - ".split1", ".split2", - P, NewBBs); - if (P->mustPreserveAnalysisID(LCSSAID)) - createPHIsForSplitLoopExit(Preds, NewBBs[0], Exit); - } + // The only that we can break LoopSimplify form by splitting a critical + // edge is if after the split there exists some edge from TIL to DestBB + // *and* the only edge into DestBB from outside of TIL is that of + // NewBB. If the first isn't true, then LoopSimplify still holds, NewBB + // is the new exit block and it has no non-loop predecessors. If the + // second isn't true, then DestBB was not in LoopSimplify form prior to + // the split as it had a non-loop predecessor. In both of these cases, + // the predecessor must be directly in TIL, not in a subloop, or again + // LoopSimplify doesn't hold. + SmallVector<BasicBlock *, 4> LoopPreds; + for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB); I != E; + ++I) { + BasicBlock *P = *I; + if (P == NewBB) + continue; // The new block is known. + if (LI->getLoopFor(P) != TIL) { + // No need to re-simplify, it wasn't to start with. + LoopPreds.clear(); + break; } + LoopPreds.push_back(P); + } + if (!LoopPreds.empty()) { + assert(!DestBB->isLandingPad() && + "We don't split edges to landing pads!"); + BasicBlock *NewExitBB = + SplitBlockPredecessors(DestBB, LoopPreds, "split", P); + if (P->mustPreserveAnalysisID(LCSSAID)) + createPHIsForSplitLoopExit(LoopPreds, NewExitBB, DestBB); } } // LCSSA form was updated above for the case where LoopSimplify is diff --git a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp index 6d13217..be00b695 100644 --- a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp @@ -27,7 +27,8 @@ using namespace llvm; /// CastToCStr - Return V if it is an i8*, otherwise cast it to i8*. Value *llvm::CastToCStr(Value *V, IRBuilder<> &B) { - return B.CreateBitCast(V, B.getInt8PtrTy(), "cstr"); + unsigned AS = V->getType()->getPointerAddressSpace(); + return B.CreateBitCast(V, B.getInt8PtrTy(AS), "cstr"); } /// EmitStrLen - Emit a call to the strlen function to the builder, for the @@ -35,7 +36,7 @@ Value *llvm::CastToCStr(Value *V, IRBuilder<> &B) { Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::strlen)) - return 0; + return nullptr; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeSet AS[2]; @@ -64,7 +65,7 @@ Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout *TD, Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::strnlen)) - return 0; + return nullptr; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeSet AS[2]; @@ -94,7 +95,7 @@ Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B, Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::strchr)) - return 0; + return nullptr; Module *M = B.GetInsertBlock()->getParent()->getParent(); Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind }; @@ -120,7 +121,7 @@ Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::strncmp)) - return 0; + return nullptr; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeSet AS[3]; @@ -153,7 +154,7 @@ Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI, StringRef Name) { if (!TLI->has(LibFunc::strcpy)) - return 0; + return nullptr; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeSet AS[2]; @@ -177,7 +178,7 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len, IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI, StringRef Name) { if (!TLI->has(LibFunc::strncpy)) - return 0; + return nullptr; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeSet AS[2]; @@ -204,7 +205,7 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize, IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::memcpy_chk)) - return 0; + return nullptr; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeSet AS; @@ -232,7 +233,7 @@ Value *llvm::EmitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::memchr)) - return 0; + return nullptr; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeSet AS; @@ -260,7 +261,7 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::memcmp)) - return 0; + return nullptr; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeSet AS[3]; @@ -286,6 +287,21 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2, return CI; } +/// Append a suffix to the function name according to the type of 'Op'. +static void AppendTypeSuffix(Value *Op, StringRef &Name, SmallString<20> &NameBuffer) { + if (!Op->getType()->isDoubleTy()) { + NameBuffer += Name; + + if (Op->getType()->isFloatTy()) + NameBuffer += 'f'; + else + NameBuffer += 'l'; + + Name = NameBuffer; + } + return; +} + /// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name' (e.g. /// 'floor'). This function is known to take a single of type matching 'Op' and /// returns one value with the same type. If 'Op' is a long double, 'l' is @@ -293,15 +309,7 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2, Value *llvm::EmitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B, const AttributeSet &Attrs) { SmallString<20> NameBuffer; - if (!Op->getType()->isDoubleTy()) { - // If we need to add a suffix, copy into NameBuffer. - NameBuffer += Name; - if (Op->getType()->isFloatTy()) - NameBuffer += 'f'; // floorf - else - NameBuffer += 'l'; // floorl - Name = NameBuffer; - } + AppendTypeSuffix(Op, Name, NameBuffer); Module *M = B.GetInsertBlock()->getParent()->getParent(); Value *Callee = M->getOrInsertFunction(Name, Op->getType(), @@ -314,12 +322,33 @@ Value *llvm::EmitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B, return CI; } +/// EmitBinaryFloatFnCall - Emit a call to the binary function named 'Name' +/// (e.g. 'fmin'). This function is known to take type matching 'Op1' and 'Op2' +/// and return one value with the same type. If 'Op1/Op2' are long double, 'l' +/// is added as the suffix of name, if 'Op1/Op2' is a float, we add a 'f' +/// suffix. +Value *llvm::EmitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name, + IRBuilder<> &B, const AttributeSet &Attrs) { + SmallString<20> NameBuffer; + AppendTypeSuffix(Op1, Name, NameBuffer); + + Module *M = B.GetInsertBlock()->getParent()->getParent(); + Value *Callee = M->getOrInsertFunction(Name, Op1->getType(), + Op1->getType(), Op2->getType(), NULL); + CallInst *CI = B.CreateCall2(Callee, Op1, Op2, Name); + CI->setAttributes(Attrs); + if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; +} + /// EmitPutChar - Emit a call to the putchar function. This assumes that Char /// is an integer. Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::putchar)) - return 0; + return nullptr; Module *M = B.GetInsertBlock()->getParent()->getParent(); Value *PutChar = M->getOrInsertFunction("putchar", B.getInt32Ty(), @@ -341,7 +370,7 @@ Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const DataLayout *TD, Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::puts)) - return 0; + return nullptr; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeSet AS[2]; @@ -365,7 +394,7 @@ Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const DataLayout *TD, Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::fputc)) - return 0; + return nullptr; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeSet AS[2]; @@ -398,7 +427,7 @@ Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B, Value *llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::fputs)) - return 0; + return nullptr; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeSet AS[3]; @@ -431,7 +460,7 @@ Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::fwrite)) - return 0; + return nullptr; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeSet AS[3]; diff --git a/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp b/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp index 1f517d0..f2d5e07 100644 --- a/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp +++ b/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp @@ -15,7 +15,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "bypass-slow-division" #include "llvm/Transforms/Utils/BypassSlowDivision.h" #include "llvm/ADT/DenseMap.h" #include "llvm/IR/Function.h" @@ -24,6 +23,8 @@ using namespace llvm; +#define DEBUG_TYPE "bypass-slow-division" + namespace { struct DivOpInfo { bool SignedOp; @@ -53,11 +54,11 @@ namespace llvm { } static DivOpInfo getEmptyKey() { - return DivOpInfo(false, 0, 0); + return DivOpInfo(false, nullptr, nullptr); } static DivOpInfo getTombstoneKey() { - return DivOpInfo(true, 0, 0); + return DivOpInfo(true, nullptr, nullptr); } static unsigned getHashValue(const DivOpInfo &Val) { diff --git a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp index d105f5e..5c8f20d 100644 --- a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -17,8 +17,9 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/DebugInfo.h" +#include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" @@ -26,7 +27,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" -#include "llvm/Support/CFG.h" +#include "llvm/IR/Module.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ValueMapper.h" @@ -88,26 +89,28 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, assert(VMap.count(I) && "No mapping from source argument specified!"); #endif + // Copy all attributes other than those stored in the AttributeSet. We need + // to remap the parameter indices of the AttributeSet. + AttributeSet NewAttrs = NewFunc->getAttributes(); + NewFunc->copyAttributesFrom(OldFunc); + NewFunc->setAttributes(NewAttrs); + AttributeSet OldAttrs = OldFunc->getAttributes(); // Clone any argument attributes that are present in the VMap. - for (Function::const_arg_iterator I = OldFunc->arg_begin(), - E = OldFunc->arg_end(); - I != E; ++I) - if (Argument *Anew = dyn_cast<Argument>(VMap[I])) { + for (const Argument &OldArg : OldFunc->args()) + if (Argument *NewArg = dyn_cast<Argument>(VMap[&OldArg])) { AttributeSet attrs = - OldAttrs.getParamAttributes(I->getArgNo() + 1); + OldAttrs.getParamAttributes(OldArg.getArgNo() + 1); if (attrs.getNumSlots() > 0) - Anew->addAttr(attrs); + NewArg->addAttr(attrs); } - NewFunc->setAttributes(NewFunc->getAttributes() - .addAttributes(NewFunc->getContext(), - AttributeSet::ReturnIndex, - OldAttrs.getRetAttributes())); - NewFunc->setAttributes(NewFunc->getAttributes() - .addAttributes(NewFunc->getContext(), - AttributeSet::FunctionIndex, - OldAttrs.getFnAttributes())); + NewFunc->setAttributes( + NewFunc->getAttributes() + .addAttributes(NewFunc->getContext(), AttributeSet::ReturnIndex, + OldAttrs.getRetAttributes()) + .addAttributes(NewFunc->getContext(), AttributeSet::FunctionIndex, + OldAttrs.getFnAttributes())); // Loop over all of the basic blocks in the function, cloning them as // appropriate. Note that we save BE this way in order to handle cloning of @@ -151,6 +154,54 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, TypeMapper, Materializer); } +// Find the MDNode which corresponds to the DISubprogram data that described F. +static MDNode* FindSubprogram(const Function *F, DebugInfoFinder &Finder) { + for (DISubprogram Subprogram : Finder.subprograms()) { + if (Subprogram.describes(F)) return Subprogram; + } + return nullptr; +} + +// Add an operand to an existing MDNode. The new operand will be added at the +// back of the operand list. +static void AddOperand(MDNode *Node, Value *Operand) { + SmallVector<Value*, 16> Operands; + for (unsigned i = 0; i < Node->getNumOperands(); i++) { + Operands.push_back(Node->getOperand(i)); + } + Operands.push_back(Operand); + MDNode *NewNode = MDNode::get(Node->getContext(), Operands); + Node->replaceAllUsesWith(NewNode); +} + +// Clone the module-level debug info associated with OldFunc. The cloned data +// will point to NewFunc instead. +static void CloneDebugInfoMetadata(Function *NewFunc, const Function *OldFunc, + ValueToValueMapTy &VMap) { + DebugInfoFinder Finder; + Finder.processModule(*OldFunc->getParent()); + + const MDNode *OldSubprogramMDNode = FindSubprogram(OldFunc, Finder); + if (!OldSubprogramMDNode) return; + + // Ensure that OldFunc appears in the map. + // (if it's already there it must point to NewFunc anyway) + VMap[OldFunc] = NewFunc; + DISubprogram NewSubprogram(MapValue(OldSubprogramMDNode, VMap)); + + for (DICompileUnit CU : Finder.compile_units()) { + DIArray Subprograms(CU.getSubprograms()); + + // If the compile unit's function list contains the old function, it should + // also contain the new one. + for (unsigned i = 0; i < Subprograms.getNumElements(); i++) { + if ((MDNode*)Subprograms.getElement(i) == OldSubprogramMDNode) { + AddOperand(Subprograms, NewSubprogram); + } + } + } +} + /// CloneFunction - Return a copy of the specified function, but without /// embedding the function into another module. Also, any references specified /// in the VMap are changed to refer to their mapped value instead of the @@ -188,6 +239,9 @@ Function *llvm::CloneFunction(const Function *F, ValueToValueMapTy &VMap, VMap[I] = DestI++; // Add mapping to VMap } + if (ModuleLevelChanges) + CloneDebugInfoMetadata(NewF, F, VMap); + SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned. CloneFunctionInto(NewF, F, VMap, ModuleLevelChanges, Returns, "", CodeInfo); return NewF; @@ -205,17 +259,17 @@ namespace { bool ModuleLevelChanges; const char *NameSuffix; ClonedCodeInfo *CodeInfo; - const DataLayout *TD; + const DataLayout *DL; public: PruningFunctionCloner(Function *newFunc, const Function *oldFunc, ValueToValueMapTy &valueMap, bool moduleLevelChanges, const char *nameSuffix, ClonedCodeInfo *codeInfo, - const DataLayout *td) + const DataLayout *DL) : NewFunc(newFunc), OldFunc(oldFunc), VMap(valueMap), ModuleLevelChanges(moduleLevelChanges), - NameSuffix(nameSuffix), CodeInfo(codeInfo), TD(td) { + NameSuffix(nameSuffix), CodeInfo(codeInfo), DL(DL) { } /// CloneBlock - The specified block is found to be reachable, clone it and @@ -272,7 +326,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, // If we can simplify this instruction to some other value, simply add // a mapping to that value rather than inserting a new instruction into // the basic block. - if (Value *V = SimplifyInstruction(NewInst, TD)) { + if (Value *V = SimplifyInstruction(NewInst, DL)) { // On the off-chance that this simplifies to an instruction in the old // function, map it back into the new function. if (Value *MappedV = VMap.lookup(V)) @@ -305,7 +359,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, // If the condition was a known constant in the callee... ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition()); // Or is a known constant in the caller... - if (Cond == 0) { + if (!Cond) { Value *V = VMap[BI->getCondition()]; Cond = dyn_cast_or_null<ConstantInt>(V); } @@ -321,7 +375,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, } else if (const SwitchInst *SI = dyn_cast<SwitchInst>(OldTI)) { // If switching on a value known constant in the caller. ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition()); - if (Cond == 0) { // Or known constant after constant prop in the callee... + if (!Cond) { // Or known constant after constant prop in the callee... Value *V = VMap[SI->getCondition()]; Cond = dyn_cast_or_null<ConstantInt>(V); } @@ -368,7 +422,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, SmallVectorImpl<ReturnInst*> &Returns, const char *NameSuffix, ClonedCodeInfo *CodeInfo, - const DataLayout *TD, + const DataLayout *DL, Instruction *TheCall) { assert(NameSuffix && "NameSuffix cannot be null!"); @@ -379,7 +433,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, #endif PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges, - NameSuffix, CodeInfo, TD); + NameSuffix, CodeInfo, DL); // Clone the entry block, and anything recursively reachable from it. std::vector<const BasicBlock*> CloneWorklist; @@ -400,7 +454,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, BI != BE; ++BI) { Value *V = VMap[BI]; BasicBlock *NewBB = cast_or_null<BasicBlock>(V); - if (NewBB == 0) continue; // Dead block. + if (!NewBB) continue; // Dead block. // Add the new block to the new function. NewFunc->getBasicBlockList().push_back(NewBB); @@ -509,7 +563,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, // node). for (unsigned Idx = 0, Size = PHIToResolve.size(); Idx != Size; ++Idx) if (PHINode *PN = dyn_cast<PHINode>(VMap[PHIToResolve[Idx]])) - recursivelySimplifyInstruction(PN, TD); + recursivelySimplifyInstruction(PN, DL); // Now that the inlined function body has been fully constructed, go through // and zap unconditional fall-through branches. This happen all the time when diff --git a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp index 64df089..3f75b3e 100644 --- a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp +++ b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp @@ -47,8 +47,8 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) { GlobalVariable *GV = new GlobalVariable(*New, I->getType()->getElementType(), I->isConstant(), I->getLinkage(), - (Constant*) 0, I->getName(), - (GlobalVariable*) 0, + (Constant*) nullptr, I->getName(), + (GlobalVariable*) nullptr, I->getThreadLocalMode(), I->getType()->getAddressSpace()); GV->copyAttributesFrom(I); @@ -67,8 +67,10 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) { // Loop over the aliases in the module for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end(); I != E; ++I) { - GlobalAlias *GA = new GlobalAlias(I->getType(), I->getLinkage(), - I->getName(), NULL, New); + auto *PTy = cast<PointerType>(I->getType()); + auto *GA = + GlobalAlias::create(PTy->getElementType(), PTy->getAddressSpace(), + I->getLinkage(), I->getName(), New); GA->copyAttributesFrom(I); VMap[I] = GA; } @@ -106,7 +108,7 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) { I != E; ++I) { GlobalAlias *GA = cast<GlobalAlias>(VMap[I]); if (const Constant *C = I->getAliasee()) - GA->setAliasee(MapValue(C, VMap)); + GA->setAliasee(cast<GlobalObject>(MapValue(C, VMap))); } // And named metadata.... diff --git a/contrib/llvm/lib/Transforms/Utils/CmpInstAnalysis.cpp b/contrib/llvm/lib/Transforms/Utils/CmpInstAnalysis.cpp index 8fa412a..3b15a0a 100644 --- a/contrib/llvm/lib/Transforms/Utils/CmpInstAnalysis.cpp +++ b/contrib/llvm/lib/Transforms/Utils/CmpInstAnalysis.cpp @@ -84,7 +84,7 @@ Value *llvm::getICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS, case 7: // True. return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1); } - return NULL; + return nullptr; } /// PredicatesFoldable - Return true if both predicates match sign or if at diff --git a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 6f008644..e70a7d6 100644 --- a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -14,20 +14,20 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/CodeExtractor.h" -#include "llvm/ADT/SetVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/RegionInfo.h" #include "llvm/Analysis/RegionIterator.h" -#include "llvm/Analysis/Verifier.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Verifier.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -38,6 +38,8 @@ #include <set> using namespace llvm; +#define DEBUG_TYPE "code-extractor" + // Provide a command-line option to aggregate function arguments into a struct // for functions produced by the code extractor. This is useful when converting // extracted functions to pthread-based code, as only one argument (void*) can @@ -86,7 +88,7 @@ static SetVector<BasicBlock *> buildExtractionBlockSet(IteratorT BBBegin, } #ifndef NDEBUG - for (SetVector<BasicBlock *>::iterator I = llvm::next(Result.begin()), + for (SetVector<BasicBlock *>::iterator I = std::next(Result.begin()), E = Result.end(); I != E; ++I) for (pred_iterator PI = pred_begin(*I), PE = pred_end(*I); @@ -118,7 +120,7 @@ buildExtractionBlockSet(const RegionNode &RN) { } CodeExtractor::CodeExtractor(BasicBlock *BB, bool AggregateArgs) - : DT(0), AggregateArgs(AggregateArgs||AggregateArgsOpt), + : DT(nullptr), AggregateArgs(AggregateArgs||AggregateArgsOpt), Blocks(buildExtractionBlockSet(BB)), NumExitBlocks(~0U) {} CodeExtractor::CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT, @@ -171,9 +173,8 @@ void CodeExtractor::findInputsOutputs(ValueSet &Inputs, if (definedInCaller(Blocks, *OI)) Inputs.insert(*OI); - for (Value::use_iterator UI = II->use_begin(), UE = II->use_end(); - UI != UE; ++UI) - if (!definedInRegion(Blocks, *UI)) { + for (User *U : II->users()) + if (!definedInRegion(Blocks, U)) { Outputs.insert(II); break; } @@ -369,7 +370,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, } else RewriteVal = AI++; - std::vector<User*> Users(inputs[i]->use_begin(), inputs[i]->use_end()); + std::vector<User*> Users(inputs[i]->user_begin(), inputs[i]->user_end()); for (std::vector<User*>::iterator use = Users.begin(), useE = Users.end(); use != useE; ++use) if (Instruction* inst = dyn_cast<Instruction>(*use)) @@ -389,7 +390,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, // Rewrite branches to basic blocks outside of the loop to new dummy blocks // within the new function. This must be done before we lose track of which // blocks were originally in the code region. - std::vector<User*> Users(header->use_begin(), header->use_end()); + std::vector<User*> Users(header->user_begin(), header->user_end()); for (unsigned i = 0, e = Users.size(); i != e; ++i) // The BasicBlock which contains the branch is not in the region // modify the branch target to a new block @@ -405,14 +406,13 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, /// that uses the value within the basic block, and return the predecessor /// block associated with that use, or return 0 if none is found. static BasicBlock* FindPhiPredForUseInBlock(Value* Used, BasicBlock* BB) { - for (Value::use_iterator UI = Used->use_begin(), - UE = Used->use_end(); UI != UE; ++UI) { - PHINode *P = dyn_cast<PHINode>(*UI); + for (Use &U : Used->uses()) { + PHINode *P = dyn_cast<PHINode>(U.getUser()); if (P && P->getParent() == BB) - return P->getIncomingBlock(UI); + return P->getIncomingBlock(U); } - - return 0; + + return nullptr; } /// emitCallAndSwitchStatement - This method sets up the caller side by adding @@ -440,14 +440,14 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, StructValues.push_back(*i); } else { AllocaInst *alloca = - new AllocaInst((*i)->getType(), 0, (*i)->getName()+".loc", + new AllocaInst((*i)->getType(), nullptr, (*i)->getName()+".loc", codeReplacer->getParent()->begin()->begin()); ReloadOutputs.push_back(alloca); params.push_back(alloca); } } - AllocaInst *Struct = 0; + AllocaInst *Struct = nullptr; if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { std::vector<Type*> ArgTypes; for (ValueSet::iterator v = StructValues.begin(), @@ -457,7 +457,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, // Allocate a struct at the beginning of this function Type *StructArgTy = StructType::get(newFunction->getContext(), ArgTypes); Struct = - new AllocaInst(StructArgTy, 0, "structArg", + new AllocaInst(StructArgTy, nullptr, "structArg", codeReplacer->getParent()->begin()->begin()); params.push_back(Struct); @@ -486,7 +486,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, // Reload the outputs passed in by reference for (unsigned i = 0, e = outputs.size(); i != e; ++i) { - Value *Output = 0; + Value *Output = nullptr; if (AggregateArgs) { Value *Idx[2]; Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); @@ -502,7 +502,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, LoadInst *load = new LoadInst(Output, outputs[i]->getName()+".reload"); Reloads.push_back(load); codeReplacer->getInstList().push_back(load); - std::vector<User*> Users(outputs[i]->use_begin(), outputs[i]->use_end()); + std::vector<User*> Users(outputs[i]->user_begin(), outputs[i]->user_end()); for (unsigned u = 0, e = Users.size(); u != e; ++u) { Instruction *inst = cast<Instruction>(Users[u]); if (!Blocks.count(inst->getParent())) @@ -539,7 +539,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, newFunction); unsigned SuccNum = switchVal++; - Value *brVal = 0; + Value *brVal = nullptr; switch (NumExitBlocks) { case 0: case 1: break; // No value needed. @@ -635,7 +635,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, // Check if the function should return a value if (OldFnRetTy->isVoidTy()) { - ReturnInst::Create(Context, 0, TheSwitch); // Return void + ReturnInst::Create(Context, nullptr, TheSwitch); // Return void } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) { // return what we have ReturnInst::Create(Context, TheSwitch->getCondition(), TheSwitch); @@ -687,7 +687,7 @@ void CodeExtractor::moveCodeToFunction(Function *newFunction) { Function *CodeExtractor::extractCodeRegion() { if (!isEligible()) - return 0; + return nullptr; ValueSet inputs, outputs; diff --git a/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp b/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp new file mode 100644 index 0000000..a359424 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp @@ -0,0 +1,183 @@ +//===- CtorUtils.cpp - Helpers for working with global_ctors ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines functions that are used to process llvm.global_ctors. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/CtorUtils.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "ctor_utils" + +namespace llvm { + +namespace { +/// Given a specified llvm.global_ctors list, install the +/// specified array. +void installGlobalCtors(GlobalVariable *GCL, + const std::vector<Function *> &Ctors) { + // If we made a change, reassemble the initializer list. + Constant *CSVals[3]; + + StructType *StructTy = + cast<StructType>(GCL->getType()->getElementType()->getArrayElementType()); + + // Create the new init list. + std::vector<Constant *> CAList; + for (Function *F : Ctors) { + Type *Int32Ty = Type::getInt32Ty(GCL->getContext()); + if (F) { + CSVals[0] = ConstantInt::get(Int32Ty, 65535); + CSVals[1] = F; + } else { + CSVals[0] = ConstantInt::get(Int32Ty, 0x7fffffff); + CSVals[1] = Constant::getNullValue(StructTy->getElementType(1)); + } + // FIXME: Only allow the 3-field form in LLVM 4.0. + size_t NumElts = StructTy->getNumElements(); + if (NumElts > 2) + CSVals[2] = Constant::getNullValue(StructTy->getElementType(2)); + CAList.push_back( + ConstantStruct::get(StructTy, makeArrayRef(CSVals, NumElts))); + } + + // Create the array initializer. + Constant *CA = + ConstantArray::get(ArrayType::get(StructTy, CAList.size()), CAList); + + // If we didn't change the number of elements, don't create a new GV. + if (CA->getType() == GCL->getInitializer()->getType()) { + GCL->setInitializer(CA); + return; + } + + // Create the new global and insert it next to the existing list. + GlobalVariable *NGV = + new GlobalVariable(CA->getType(), GCL->isConstant(), GCL->getLinkage(), + CA, "", GCL->getThreadLocalMode()); + GCL->getParent()->getGlobalList().insert(GCL, NGV); + NGV->takeName(GCL); + + // Nuke the old list, replacing any uses with the new one. + if (!GCL->use_empty()) { + Constant *V = NGV; + if (V->getType() != GCL->getType()) + V = ConstantExpr::getBitCast(V, GCL->getType()); + GCL->replaceAllUsesWith(V); + } + GCL->eraseFromParent(); +} + +/// Given a llvm.global_ctors list that we can understand, +/// return a list of the functions and null terminator as a vector. +std::vector<Function*> parseGlobalCtors(GlobalVariable *GV) { + if (GV->getInitializer()->isNullValue()) + return std::vector<Function *>(); + ConstantArray *CA = cast<ConstantArray>(GV->getInitializer()); + std::vector<Function *> Result; + Result.reserve(CA->getNumOperands()); + for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) { + ConstantStruct *CS = cast<ConstantStruct>(*i); + Result.push_back(dyn_cast<Function>(CS->getOperand(1))); + } + return Result; +} + +/// Find the llvm.global_ctors list, verifying that all initializers have an +/// init priority of 65535. +GlobalVariable *findGlobalCtors(Module &M) { + GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors"); + if (!GV) + return nullptr; + + // Verify that the initializer is simple enough for us to handle. We are + // only allowed to optimize the initializer if it is unique. + if (!GV->hasUniqueInitializer()) + return nullptr; + + if (isa<ConstantAggregateZero>(GV->getInitializer())) + return GV; + ConstantArray *CA = cast<ConstantArray>(GV->getInitializer()); + + for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) { + if (isa<ConstantAggregateZero>(*i)) + continue; + ConstantStruct *CS = cast<ConstantStruct>(*i); + if (isa<ConstantPointerNull>(CS->getOperand(1))) + continue; + + // Must have a function or null ptr. + if (!isa<Function>(CS->getOperand(1))) + return nullptr; + + // Init priority must be standard. + ConstantInt *CI = cast<ConstantInt>(CS->getOperand(0)); + if (CI->getZExtValue() != 65535) + return nullptr; + } + + return GV; +} +} // namespace + +/// Call "ShouldRemove" for every entry in M's global_ctor list and remove the +/// entries for which it returns true. Return true if anything changed. +bool optimizeGlobalCtorsList(Module &M, + function_ref<bool(Function *)> ShouldRemove) { + GlobalVariable *GlobalCtors = findGlobalCtors(M); + if (!GlobalCtors) + return false; + + std::vector<Function *> Ctors = parseGlobalCtors(GlobalCtors); + if (Ctors.empty()) + return false; + + bool MadeChange = false; + + // Loop over global ctors, optimizing them when we can. + for (unsigned i = 0; i != Ctors.size(); ++i) { + Function *F = Ctors[i]; + // Found a null terminator in the middle of the list, prune off the rest of + // the list. + if (!F) { + if (i != Ctors.size() - 1) { + Ctors.resize(i + 1); + MadeChange = true; + } + break; + } + DEBUG(dbgs() << "Optimizing Global Constructor: " << *F << "\n"); + + // We cannot simplify external ctor functions. + if (F->empty()) + continue; + + // If we can evaluate the ctor at compile time, do. + if (ShouldRemove(F)) { + Ctors.erase(Ctors.begin() + i); + MadeChange = true; + --i; + continue; + } + } + + if (!MadeChange) + return false; + + installGlobalCtors(GlobalCtors, Ctors); + return true; +} + +} // End llvm namespace diff --git a/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp index 0723b35..9972b22 100644 --- a/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp +++ b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp @@ -8,12 +8,12 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/ADT/DenseMap.h" #include "llvm/Analysis/CFG.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Type.h" +#include "llvm/Transforms/Utils/Local.h" using namespace llvm; /// DemoteRegToStack - This function takes a virtual register computed by an @@ -25,23 +25,23 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads, Instruction *AllocaPoint) { if (I.use_empty()) { I.eraseFromParent(); - return 0; + return nullptr; } // Create a stack slot to hold the value. AllocaInst *Slot; if (AllocaPoint) { - Slot = new AllocaInst(I.getType(), 0, + Slot = new AllocaInst(I.getType(), nullptr, I.getName()+".reg2mem", AllocaPoint); } else { Function *F = I.getParent()->getParent(); - Slot = new AllocaInst(I.getType(), 0, I.getName()+".reg2mem", + Slot = new AllocaInst(I.getType(), nullptr, I.getName()+".reg2mem", F->getEntryBlock().begin()); } // Change all of the users of the instruction to read from the stack slot. while (!I.use_empty()) { - Instruction *U = cast<Instruction>(I.use_back()); + Instruction *U = cast<Instruction>(I.user_back()); if (PHINode *PN = dyn_cast<PHINode>(U)) { // If this is a PHI node, we can't insert a load of the value before the // use. Instead insert the load in the predecessor block corresponding @@ -56,7 +56,7 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads, for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (PN->getIncomingValue(i) == &I) { Value *&V = Loads[PN->getIncomingBlock(i)]; - if (V == 0) { + if (!V) { // Insert the load into the predecessor block V = new LoadInst(Slot, I.getName()+".reload", VolatileLoads, PN->getIncomingBlock(i)->getTerminator()); @@ -110,17 +110,17 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads, AllocaInst *llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) { if (P->use_empty()) { P->eraseFromParent(); - return 0; + return nullptr; } // Create a stack slot to hold the value. AllocaInst *Slot; if (AllocaPoint) { - Slot = new AllocaInst(P->getType(), 0, + Slot = new AllocaInst(P->getType(), nullptr, P->getName()+".reg2mem", AllocaPoint); } else { Function *F = P->getParent()->getParent(); - Slot = new AllocaInst(P->getType(), 0, P->getName()+".reg2mem", + Slot = new AllocaInst(P->getType(), nullptr, P->getName()+".reg2mem", F->getEntryBlock().begin()); } diff --git a/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp b/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp index 1da226b..51ead40 100644 --- a/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp +++ b/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "flattencfg" #include "llvm/Transforms/Utils/Local.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -22,16 +21,19 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" using namespace llvm; +#define DEBUG_TYPE "flattencfg" + namespace { class FlattenCFGOpt { AliasAnalysis *AA; /// \brief Use parallel-and or parallel-or to generate conditions for /// conditional branches. - bool FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder, Pass *P = 0); + bool FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder, + Pass *P = nullptr); /// \brief If \param BB is the merge block of an if-region, attempt to merge /// the if-region with an adjacent if-region upstream if two if-regions /// contain identical instructions. - bool MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder, Pass *P = 0); + bool MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder, Pass *P = nullptr); /// \brief Compare a pair of blocks: \p Block1 and \p Block2, which /// are from two if-regions whose entry blocks are \p Head1 and \p /// Head2. \returns true if \p Block1 and \p Block2 contain identical @@ -126,9 +128,9 @@ bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder, if (PHI) return false; // For simplicity, avoid cases containing PHI nodes. - BasicBlock *LastCondBlock = NULL; - BasicBlock *FirstCondBlock = NULL; - BasicBlock *UnCondBlock = NULL; + BasicBlock *LastCondBlock = nullptr; + BasicBlock *FirstCondBlock = nullptr; + BasicBlock *UnCondBlock = nullptr; int Idx = -1; // Check predecessors of \param BB. @@ -240,7 +242,7 @@ bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder, BranchInst *BI = dyn_cast<BranchInst>(CurrBlock->getTerminator()); CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition()); CmpInst::Predicate Predicate = CI->getPredicate(); - // Cannonicalize icmp_ne -> icmp_eq, fcmp_one -> fcmp_oeq + // Canonicalize icmp_ne -> icmp_eq, fcmp_one -> fcmp_oeq if ((Predicate == CmpInst::ICMP_NE) || (Predicate == CmpInst::FCMP_ONE)) { CI->setPredicate(ICmpInst::getInversePredicate(Predicate)); BI->swapSuccessors(); diff --git a/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp b/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp index 5f0a563..12057e4 100644 --- a/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp +++ b/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp @@ -9,9 +9,9 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/Support/CallSite.h" #include "llvm/Transforms/Utils/GlobalStatus.h" using namespace llvm; @@ -35,9 +35,8 @@ bool llvm::isSafeToDestroyConstant(const Constant *C) { if (isa<GlobalValue>(C)) return false; - for (Value::const_use_iterator UI = C->use_begin(), E = C->use_end(); UI != E; - ++UI) - if (const Constant *CU = dyn_cast<Constant>(*UI)) { + for (const User *U : C->users()) + if (const Constant *CU = dyn_cast<Constant>(U)) { if (!isSafeToDestroyConstant(CU)) return false; } else @@ -47,10 +46,9 @@ bool llvm::isSafeToDestroyConstant(const Constant *C) { static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS, SmallPtrSet<const PHINode *, 16> &PhiUsers) { - for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; - ++UI) { - const User *U = *UI; - if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) { + for (const Use &U : V->uses()) { + const User *UR = U.getUser(); + if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(UR)) { GS.HasNonInstructionUser = true; // If the result of the constantexpr isn't pointer type, then we won't @@ -60,10 +58,10 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS, if (analyzeGlobalAux(CE, GS, PhiUsers)) return true; - } else if (const Instruction *I = dyn_cast<Instruction>(U)) { + } else if (const Instruction *I = dyn_cast<Instruction>(UR)) { if (!GS.HasMultipleAccessingFunctions) { const Function *F = I->getParent()->getParent(); - if (GS.AccessingFunction == 0) + if (!GS.AccessingFunction) GS.AccessingFunction = F; else if (GS.AccessingFunction != F) GS.HasMultipleAccessingFunctions = true; @@ -150,13 +148,13 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS, return true; GS.StoredType = GlobalStatus::Stored; } else if (ImmutableCallSite C = I) { - if (!C.isCallee(UI)) + if (!C.isCallee(&U)) return true; GS.IsLoaded = true; } else { return true; // Any other non-load instruction might take address! } - } else if (const Constant *C = dyn_cast<Constant>(U)) { + } else if (const Constant *C = dyn_cast<Constant>(UR)) { GS.HasNonInstructionUser = true; // We might have a dead and dangling constant hanging off of here. if (!isSafeToDestroyConstant(C)) @@ -178,6 +176,6 @@ bool GlobalStatus::analyzeGlobal(const Value *V, GlobalStatus &GS) { GlobalStatus::GlobalStatus() : IsCompared(false), IsLoaded(false), StoredType(NotStored), - StoredOnceValue(0), AccessingFunction(0), + StoredOnceValue(nullptr), AccessingFunction(nullptr), HasMultipleAccessingFunctions(false), HasNonInstructionUser(false), Ordering(NotAtomic) {} diff --git a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp index d021bce..f0a9f2b 100644 --- a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -17,17 +17,18 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/DebugInfo.h" #include "llvm/IR/Attributes.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" -#include "llvm/Support/CallSite.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -51,8 +52,8 @@ namespace { public: InvokeInliningInfo(InvokeInst *II) - : OuterResumeDest(II->getUnwindDest()), InnerResumeDest(0), - CallerLPad(0), InnerEHValuesPHI(0) { + : OuterResumeDest(II->getUnwindDest()), InnerResumeDest(nullptr), + CallerLPad(nullptr), InnerEHValuesPHI(nullptr) { // If there are PHI nodes in the unwind destination block, we need to keep // track of which values came into them from the invoke before removing // the edge from this block. @@ -144,7 +145,6 @@ BasicBlock *InvokeInliningInfo::getInnerResumeDest() { void InvokeInliningInfo::forwardResume(ResumeInst *RI, SmallPtrSet<LandingPadInst*, 16> &InlinedLPads) { BasicBlock *Dest = getInnerResumeDest(); - LandingPadInst *OuterLPad = getLandingPadInst(); BasicBlock *Src = RI->getParent(); BranchInst::Create(Dest, Src); @@ -155,16 +155,6 @@ void InvokeInliningInfo::forwardResume(ResumeInst *RI, InnerEHValuesPHI->addIncoming(RI->getOperand(0), Src); RI->eraseFromParent(); - - // Append the clauses from the outer landing pad instruction into the inlined - // landing pad instructions. - for (SmallPtrSet<LandingPadInst*, 16>::iterator I = InlinedLPads.begin(), - E = InlinedLPads.end(); I != E; ++I) { - LandingPadInst *InlinedLPad = *I; - for (unsigned OuterIdx = 0, OuterNum = OuterLPad->getNumClauses(); - OuterIdx != OuterNum; ++OuterIdx) - InlinedLPad->addClause(OuterLPad->getClause(OuterIdx)); - } } /// HandleCallsInBlockInlinedThroughInvoke - When we inline a basic block into @@ -172,22 +162,11 @@ void InvokeInliningInfo::forwardResume(ResumeInst *RI, /// invokes. This function analyze BB to see if there are any calls, and if so, /// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI /// nodes in that block with the values specified in InvokeDestPHIValues. -/// -/// Returns true to indicate that the next block should be skipped. -static bool HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, +static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, InvokeInliningInfo &Invoke) { - LandingPadInst *LPI = Invoke.getLandingPadInst(); - for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) { Instruction *I = BBI++; - if (LandingPadInst *L = dyn_cast<LandingPadInst>(I)) { - unsigned NumClauses = LPI->getNumClauses(); - L->reserveClauses(NumClauses); - for (unsigned i = 0; i != NumClauses; ++i) - L->addClause(LPI->getClause(i)); - } - // We only need to check for function calls: inlined invoke // instructions require no special handling. CallInst *CI = dyn_cast<CallInst>(I); @@ -210,6 +189,7 @@ static bool HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, InvokeInst *II = InvokeInst::Create(CI->getCalledValue(), Split, Invoke.getOuterResumeDest(), InvokeArgs, CI->getName(), BB); + II->setDebugLoc(CI->getDebugLoc()); II->setCallingConv(CI->getCallingConv()); II->setAttributes(CI->getAttributes()); @@ -223,10 +203,8 @@ static bool HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, // Update any PHI nodes in the exceptional block to indicate that there is // now a new entry in them. Invoke.addIncomingPHIValuesFor(BB); - return false; + return; } - - return false; } /// HandleInlinedInvoke - If we inlined an invoke site, we need to convert calls @@ -252,13 +230,23 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock, if (InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) InlinedLPads.insert(II->getLandingPadInst()); + // Append the clauses from the outer landing pad instruction into the inlined + // landing pad instructions. + LandingPadInst *OuterLPad = Invoke.getLandingPadInst(); + for (SmallPtrSet<LandingPadInst*, 16>::iterator I = InlinedLPads.begin(), + E = InlinedLPads.end(); I != E; ++I) { + LandingPadInst *InlinedLPad = *I; + unsigned OuterNum = OuterLPad->getNumClauses(); + InlinedLPad->reserveClauses(OuterNum); + for (unsigned OuterIdx = 0; OuterIdx != OuterNum; ++OuterIdx) + InlinedLPad->addClause(OuterLPad->getClause(OuterIdx)); + if (OuterLPad->isCleanup()) + InlinedLPad->setCleanup(true); + } + for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB){ if (InlinedCodeInfo.ContainsCalls) - if (HandleCallsInBlockInlinedThroughInvoke(BB, Invoke)) { - // Honor a request to skip the next block. - ++BB; - continue; - } + HandleCallsInBlockInlinedThroughInvoke(BB, Invoke); // Forward any resumes that are remaining here. if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator())) @@ -303,13 +291,13 @@ static void UpdateCallGraphAfterInlining(CallSite CS, ValueToValueMapTy::iterator VMI = VMap.find(OrigCall); // Only copy the edge if the call was inlined! - if (VMI == VMap.end() || VMI->second == 0) + if (VMI == VMap.end() || VMI->second == nullptr) continue; // If the call was inlined, but then constant folded, there is no edge to // add. Check for this case. Instruction *NewCall = dyn_cast<Instruction>(VMI->second); - if (NewCall == 0) continue; + if (!NewCall) continue; // Remember that this call site got inlined for the client of // InlineFunction. @@ -320,7 +308,7 @@ static void UpdateCallGraphAfterInlining(CallSite CS, // happens, set the callee of the new call site to a more precise // destination. This can also happen if the call graph node of the caller // was just unnecessarily imprecise. - if (I->second->getFunction() == 0) + if (!I->second->getFunction()) if (Function *F = CallSite(NewCall).getCalledFunction()) { // Indirect call site resolved to direct call. CallerNode->addCalledFunction(CallSite(NewCall), CG[F]); @@ -336,13 +324,44 @@ static void UpdateCallGraphAfterInlining(CallSite CS, CallerNode->removeCallEdgeFor(CS); } +static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M, + BasicBlock *InsertBlock, + InlineFunctionInfo &IFI) { + LLVMContext &Context = Src->getContext(); + Type *VoidPtrTy = Type::getInt8PtrTy(Context); + Type *AggTy = cast<PointerType>(Src->getType())->getElementType(); + Type *Tys[3] = { VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context) }; + Function *MemCpyFn = Intrinsic::getDeclaration(M, Intrinsic::memcpy, Tys); + IRBuilder<> builder(InsertBlock->begin()); + Value *DstCast = builder.CreateBitCast(Dst, VoidPtrTy, "tmp"); + Value *SrcCast = builder.CreateBitCast(Src, VoidPtrTy, "tmp"); + + Value *Size; + if (IFI.DL == nullptr) + Size = ConstantExpr::getSizeOf(AggTy); + else + Size = ConstantInt::get(Type::getInt64Ty(Context), + IFI.DL->getTypeStoreSize(AggTy)); + + // Always generate a memcpy of alignment 1 here because we don't know + // the alignment of the src pointer. Other optimizations can infer + // better alignment. + Value *CallArgs[] = { + DstCast, SrcCast, Size, + ConstantInt::get(Type::getInt32Ty(Context), 1), + ConstantInt::getFalse(Context) // isVolatile + }; + builder.CreateCall(MemCpyFn, CallArgs); +} + /// HandleByValArgument - When inlining a call site that has a byval argument, /// we have to make the implicit memcpy explicit by adding it. static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, const Function *CalledFunc, InlineFunctionInfo &IFI, unsigned ByValAlignment) { - Type *AggTy = cast<PointerType>(Arg->getType())->getElementType(); + PointerType *ArgTy = cast<PointerType>(Arg->getType()); + Type *AggTy = ArgTy->getElementType(); // If the called function is readonly, then it could not mutate the caller's // copy of the byval'd memory. In this case, it is safe to elide the copy and @@ -357,21 +376,17 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, // If the pointer is already known to be sufficiently aligned, or if we can // round it up to a larger alignment, then we don't need a temporary. if (getOrEnforceKnownAlignment(Arg, ByValAlignment, - IFI.TD) >= ByValAlignment) + IFI.DL) >= ByValAlignment) return Arg; // Otherwise, we have to make a memcpy to get a safe alignment. This is bad // for code quality, but rarely happens and is required for correctness. } - - LLVMContext &Context = Arg->getContext(); - Type *VoidPtrTy = Type::getInt8PtrTy(Context); - // Create the alloca. If we have DataLayout, use nice alignment. unsigned Align = 1; - if (IFI.TD) - Align = IFI.TD->getPrefTypeAlignment(AggTy); + if (IFI.DL) + Align = IFI.DL->getPrefTypeAlignment(AggTy); // If the byval had an alignment specified, we *must* use at least that // alignment, as it is required by the byval argument (and uses of the @@ -380,32 +395,9 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, Function *Caller = TheCall->getParent()->getParent(); - Value *NewAlloca = new AllocaInst(AggTy, 0, Align, Arg->getName(), + Value *NewAlloca = new AllocaInst(AggTy, nullptr, Align, Arg->getName(), &*Caller->begin()->begin()); - // Emit a memcpy. - Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)}; - Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(), - Intrinsic::memcpy, - Tys); - Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall); - Value *SrcCast = new BitCastInst(Arg, VoidPtrTy, "tmp", TheCall); - - Value *Size; - if (IFI.TD == 0) - Size = ConstantExpr::getSizeOf(AggTy); - else - Size = ConstantInt::get(Type::getInt64Ty(Context), - IFI.TD->getTypeStoreSize(AggTy)); - - // Always generate a memcpy of alignment 1 here because we don't know - // the alignment of the src pointer. Other optimizations can infer - // better alignment. - Value *CallArgs[] = { - DestCast, SrcCast, Size, - ConstantInt::get(Type::getInt32Ty(Context), 1), - ConstantInt::getFalse(Context) // isVolatile - }; - IRBuilder<>(TheCall).CreateCall(MemCpyFn, CallArgs); + IFI.StaticAllocas.push_back(cast<AllocaInst>(NewAlloca)); // Uses of the argument in the function should use our new alloca // instead. @@ -415,9 +407,8 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, // isUsedByLifetimeMarker - Check whether this Value is used by a lifetime // intrinsic. static bool isUsedByLifetimeMarker(Value *V) { - for (Value::use_iterator UI = V->use_begin(), UE = V->use_end(); UI != UE; - ++UI) { - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(*UI)) { + for (User *U : V->users()) { + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) { switch (II->getIntrinsicID()) { default: break; case Intrinsic::lifetime_start: @@ -432,16 +423,17 @@ static bool isUsedByLifetimeMarker(Value *V) { // hasLifetimeMarkers - Check whether the given alloca already has // lifetime.start or lifetime.end intrinsics. static bool hasLifetimeMarkers(AllocaInst *AI) { - Type *Int8PtrTy = Type::getInt8PtrTy(AI->getType()->getContext()); - if (AI->getType() == Int8PtrTy) + Type *Ty = AI->getType(); + Type *Int8PtrTy = Type::getInt8PtrTy(Ty->getContext(), + Ty->getPointerAddressSpace()); + if (Ty == Int8PtrTy) return isUsedByLifetimeMarker(AI); // Do a scan to find all the casts to i8*. - for (Value::use_iterator I = AI->use_begin(), E = AI->use_end(); I != E; - ++I) { - if (I->getType() != Int8PtrTy) continue; - if (I->stripPointerCasts() != AI) continue; - if (isUsedByLifetimeMarker(*I)) + for (User *U : AI->users()) { + if (U->getType() != Int8PtrTy) continue; + if (U->stripPointerCasts() != AI) continue; + if (isUsedByLifetimeMarker(U)) return true; } return false; @@ -475,7 +467,13 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI, for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ++BI) { DebugLoc DL = BI->getDebugLoc(); - if (!DL.isUnknown()) { + if (DL.isUnknown()) { + // If the inlined instruction has no line number, make it look as if it + // originates from the call location. This is important for + // ((__always_inline__, __nodebug__)) functions which must use caller + // location for all instructions in their function body. + BI->setDebugLoc(TheCallDL); + } else { BI->setDebugLoc(updateInlinedAtInfo(DL, TheCallDL, BI->getContext())); if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(BI)) { LLVMContext &Ctx = BI->getContext(); @@ -488,6 +486,33 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI, } } +/// Returns a musttail call instruction if one immediately precedes the given +/// return instruction with an optional bitcast instruction between them. +static CallInst *getPrecedingMustTailCall(ReturnInst *RI) { + Instruction *Prev = RI->getPrevNode(); + if (!Prev) + return nullptr; + + if (Value *RV = RI->getReturnValue()) { + if (RV != Prev) + return nullptr; + + // Look through the optional bitcast. + if (auto *BI = dyn_cast<BitCastInst>(Prev)) { + RV = BI->getOperand(0); + Prev = BI->getPrevNode(); + if (!Prev || RV != Prev) + return nullptr; + } + } + + if (auto *CI = dyn_cast<CallInst>(Prev)) { + if (CI->isMustTailCall()) + return CI; + } + return nullptr; +} + /// InlineFunction - This function inlines the called function into the basic /// block of the caller. This returns false if it is not possible to inline /// this call. The program is still in a well defined state if this occurs @@ -507,15 +532,10 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, IFI.reset(); const Function *CalledFunc = CS.getCalledFunction(); - if (CalledFunc == 0 || // Can't inline external function or indirect + if (!CalledFunc || // Can't inline external function or indirect CalledFunc->isDeclaration() || // call, or call to a vararg function! CalledFunc->getFunctionType()->isVarArg()) return false; - // If the call to the callee is not a tail call, we must clear the 'tail' - // flags on any calls that we inline. - bool MustClearTailCallFlags = - !(isa<CallInst>(TheCall) && cast<CallInst>(TheCall)->isTailCall()); - // If the call to the callee cannot throw, set the 'nounwind' flag on any // calls that we inline. bool MarkNoUnwind = CS.doesNotThrow(); @@ -535,7 +555,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, } // Get the personality function from the callee if it contains a landing pad. - Value *CalleePersonality = 0; + Value *CalleePersonality = nullptr; for (Function::const_iterator I = CalledFunc->begin(), E = CalledFunc->end(); I != E; ++I) if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) { @@ -578,6 +598,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, { // Scope to destroy VMap after cloning. ValueToValueMapTy VMap; + // Keep a list of pair (dst, src) to emit byval initializations. + SmallVector<std::pair<Value*, Value*>, 4> ByValInit; assert(CalledFunc->arg_size() == CS.arg_size() && "No varargs calls can be inlined!"); @@ -597,11 +619,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, if (CS.isByValArgument(ArgNo)) { ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI, CalledFunc->getParamAlignment(ArgNo+1)); - - // Calls that we inline may use the new alloca, so we need to clear - // their 'tail' flags if HandleByValArgument introduced a new alloca and - // the callee has calls. - MustClearTailCallFlags |= ActualArg != *AI; + if (ActualArg != *AI) + ByValInit.push_back(std::make_pair(ActualArg, (Value*) *AI)); } VMap[I] = ActualArg; @@ -613,11 +632,16 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // happy with whatever the cloner can do. CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, /*ModuleLevelChanges=*/false, Returns, ".i", - &InlinedFunctionInfo, IFI.TD, TheCall); + &InlinedFunctionInfo, IFI.DL, TheCall); // Remember the first block that is newly cloned over. FirstNewBlock = LastBlock; ++FirstNewBlock; + // Inject byval arguments initialization. + for (std::pair<Value*, Value*> &Init : ByValInit) + HandleByValArgumentInit(Init.first, Init.second, Caller->getParent(), + FirstNewBlock, IFI); + // Update the callgraph if requested. if (IFI.CG) UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI); @@ -635,7 +659,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, for (BasicBlock::iterator I = FirstNewBlock->begin(), E = FirstNewBlock->end(); I != E; ) { AllocaInst *AI = dyn_cast<AllocaInst>(I++); - if (AI == 0) continue; + if (!AI) continue; // If the alloca is now dead, remove it. This often occurs due to code // specialization. @@ -667,6 +691,45 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, } } + bool InlinedMustTailCalls = false; + if (InlinedFunctionInfo.ContainsCalls) { + CallInst::TailCallKind CallSiteTailKind = CallInst::TCK_None; + if (CallInst *CI = dyn_cast<CallInst>(TheCall)) + CallSiteTailKind = CI->getTailCallKind(); + + for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; + ++BB) { + for (Instruction &I : *BB) { + CallInst *CI = dyn_cast<CallInst>(&I); + if (!CI) + continue; + + // We need to reduce the strength of any inlined tail calls. For + // musttail, we have to avoid introducing potential unbounded stack + // growth. For example, if functions 'f' and 'g' are mutually recursive + // with musttail, we can inline 'g' into 'f' so long as we preserve + // musttail on the cloned call to 'f'. If either the inlined call site + // or the cloned call site is *not* musttail, the program already has + // one frame of stack growth, so it's safe to remove musttail. Here is + // a table of example transformations: + // + // f -> musttail g -> musttail f ==> f -> musttail f + // f -> musttail g -> tail f ==> f -> tail f + // f -> g -> musttail f ==> f -> f + // f -> g -> tail f ==> f -> f + CallInst::TailCallKind ChildTCK = CI->getTailCallKind(); + ChildTCK = std::min(CallSiteTailKind, ChildTCK); + CI->setTailCallKind(ChildTCK); + InlinedMustTailCalls |= CI->isMustTailCall(); + + // Calls inlined through a 'nounwind' call site should be marked + // 'nounwind'. + if (MarkNoUnwind) + CI->setDoesNotThrow(); + } + } + } + // Leave lifetime markers for the static alloca's, scoping them to the // function we just inlined. if (InsertLifetime && !IFI.StaticAllocas.empty()) { @@ -680,12 +743,12 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, continue; // Try to determine the size of the allocation. - ConstantInt *AllocaSize = 0; + ConstantInt *AllocaSize = nullptr; if (ConstantInt *AIArraySize = dyn_cast<ConstantInt>(AI->getArraySize())) { - if (IFI.TD) { + if (IFI.DL) { Type *AllocaType = AI->getAllocatedType(); - uint64_t AllocaTypeSize = IFI.TD->getTypeAllocSize(AllocaType); + uint64_t AllocaTypeSize = IFI.DL->getTypeAllocSize(AllocaType); uint64_t AllocaArraySize = AIArraySize->getLimitedValue(); assert(AllocaArraySize > 0 && "array size of AllocaInst is zero"); // Check that array size doesn't saturate uint64_t and doesn't @@ -699,9 +762,12 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, } builder.CreateLifetimeStart(AI, AllocaSize); - for (unsigned ri = 0, re = Returns.size(); ri != re; ++ri) { - IRBuilder<> builder(Returns[ri]); - builder.CreateLifetimeEnd(AI, AllocaSize); + for (ReturnInst *RI : Returns) { + // Don't insert llvm.lifetime.end calls between a musttail call and a + // return. The return kills all local allocas. + if (InlinedMustTailCalls && getPrecedingMustTailCall(RI)) + continue; + IRBuilder<>(RI).CreateLifetimeEnd(AI, AllocaSize); } } } @@ -720,33 +786,56 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // Insert a call to llvm.stackrestore before any return instructions in the // inlined function. - for (unsigned i = 0, e = Returns.size(); i != e; ++i) { - IRBuilder<>(Returns[i]).CreateCall(StackRestore, SavedPtr); + for (ReturnInst *RI : Returns) { + // Don't insert llvm.stackrestore calls between a musttail call and a + // return. The return will restore the stack pointer. + if (InlinedMustTailCalls && getPrecedingMustTailCall(RI)) + continue; + IRBuilder<>(RI).CreateCall(StackRestore, SavedPtr); } } - // If we are inlining tail call instruction through a call site that isn't - // marked 'tail', we must remove the tail marker for any calls in the inlined - // code. Also, calls inlined through a 'nounwind' call site should be marked - // 'nounwind'. - if (InlinedFunctionInfo.ContainsCalls && - (MustClearTailCallFlags || MarkNoUnwind)) { - for (Function::iterator BB = FirstNewBlock, E = Caller->end(); - BB != E; ++BB) - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) - if (CallInst *CI = dyn_cast<CallInst>(I)) { - if (MustClearTailCallFlags) - CI->setTailCall(false); - if (MarkNoUnwind) - CI->setDoesNotThrow(); - } - } - // If we are inlining for an invoke instruction, we must make sure to rewrite // any call instructions into invoke instructions. if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo); + // Handle any inlined musttail call sites. In order for a new call site to be + // musttail, the source of the clone and the inlined call site must have been + // musttail. Therefore it's safe to return without merging control into the + // phi below. + if (InlinedMustTailCalls) { + // Check if we need to bitcast the result of any musttail calls. + Type *NewRetTy = Caller->getReturnType(); + bool NeedBitCast = !TheCall->use_empty() && TheCall->getType() != NewRetTy; + + // Handle the returns preceded by musttail calls separately. + SmallVector<ReturnInst *, 8> NormalReturns; + for (ReturnInst *RI : Returns) { + CallInst *ReturnedMustTail = getPrecedingMustTailCall(RI); + if (!ReturnedMustTail) { + NormalReturns.push_back(RI); + continue; + } + if (!NeedBitCast) + continue; + + // Delete the old return and any preceding bitcast. + BasicBlock *CurBB = RI->getParent(); + auto *OldCast = dyn_cast_or_null<BitCastInst>(RI->getReturnValue()); + RI->eraseFromParent(); + if (OldCast) + OldCast->eraseFromParent(); + + // Insert a new bitcast and return with the right type. + IRBuilder<> Builder(CurBB); + Builder.CreateRet(Builder.CreateBitCast(ReturnedMustTail, NewRetTy)); + } + + // Leave behind the normal returns so we can merge control flow. + std::swap(Returns, NormalReturns); + } + // If we cloned in _exactly one_ basic block, and if that block ends in a // return instruction, we splice the body of the inlined callee directly into // the calling basic block. @@ -790,7 +879,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // "starter" and "ender" blocks. How we accomplish this depends on whether // this is an invoke instruction or a call instruction. BasicBlock *AfterCallBB; - BranchInst *CreatedBranchToNormalDest = NULL; + BranchInst *CreatedBranchToNormalDest = nullptr; if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) { // Add an unconditional branch to make this look like the CallInst case... @@ -829,7 +918,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // any users of the original call/invoke instruction. Type *RTy = CalledFunc->getReturnType(); - PHINode *PHI = 0; + PHINode *PHI = nullptr; if (Returns.size() > 1) { // The PHI node should go at the front of the new basic block to merge all // possible incoming values. @@ -902,6 +991,11 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // Since we are now done with the Call/Invoke, we can delete it. TheCall->eraseFromParent(); + // If we inlined any musttail calls and the original return is now + // unreachable, delete it. It can only contain a bitcast and ret. + if (InlinedMustTailCalls && pred_begin(AfterCallBB) == pred_end(AfterCallBB)) + AfterCallBB->eraseFromParent(); + // We should always be able to fold the entry block of the function into the // single predecessor of the block... assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!"); @@ -922,7 +1016,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // the entries are the same or undef). If so, remove the PHI so it doesn't // block other optimizations. if (PHI) { - if (Value *V = SimplifyInstruction(PHI, IFI.TD)) { + if (Value *V = SimplifyInstruction(PHI, IFI.DL)) { PHI->replaceAllUsesWith(V); PHI->eraseFromParent(); } diff --git a/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp b/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp index a020bc7..da890a2 100644 --- a/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp +++ b/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp @@ -27,11 +27,11 @@ namespace { initializeInstNamerPass(*PassRegistry::getPassRegistry()); } - void getAnalysisUsage(AnalysisUsage &Info) const { + void getAnalysisUsage(AnalysisUsage &Info) const override { Info.setPreservesAll(); } - bool runOnFunction(Function &F) { + bool runOnFunction(Function &F) override { for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end(); AI != AE; ++AI) if (!AI->hasName() && !AI->getType()->isVoidTy()) diff --git a/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp b/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp index 3cb8ded..9f91eeb 100644 --- a/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp +++ b/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp @@ -7,22 +7,24 @@ // //===----------------------------------------------------------------------===// // -// This file contains an implementation of 32bit scalar integer division for -// targets that don't have native support. It's largely derived from -// compiler-rt's implementation of __udivsi3, but hand-tuned to reduce the -// amount of control flow +// This file contains an implementation of 32bit and 64bit scalar integer +// division for targets that don't have native support. It's largely derived +// from compiler-rt's implementations of __udivsi3 and __udivmoddi4, +// but hand-tuned for targets that prefer less control flow. // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "integer-division" #include "llvm/Transforms/Utils/IntegerDivision.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" +#include <utility> using namespace llvm; +#define DEBUG_TYPE "integer-division" + /// Generate code to compute the remainder of two signed integers. Returns the /// remainder, which will have the sign of the dividend. Builder's insert point /// should be pointing where the caller wants code generated, e.g. at the srem @@ -31,7 +33,18 @@ using namespace llvm; /// be expanded if the user wishes static Value *generateSignedRemainderCode(Value *Dividend, Value *Divisor, IRBuilder<> &Builder) { - ConstantInt *ThirtyOne = Builder.getInt32(31); + unsigned BitWidth = Dividend->getType()->getIntegerBitWidth(); + ConstantInt *Shift; + + if (BitWidth == 64) { + Shift = Builder.getInt64(63); + } else { + assert(BitWidth == 32 && "Unexpected bit width"); + Shift = Builder.getInt32(31); + } + + // Following instructions are generated for both i32 (shift 31) and + // i64 (shift 63). // ; %dividend_sgn = ashr i32 %dividend, 31 // ; %divisor_sgn = ashr i32 %divisor, 31 @@ -42,8 +55,8 @@ static Value *generateSignedRemainderCode(Value *Dividend, Value *Divisor, // ; %urem = urem i32 %dividend, %divisor // ; %xored = xor i32 %urem, %dividend_sgn // ; %srem = sub i32 %xored, %dividend_sgn - Value *DividendSign = Builder.CreateAShr(Dividend, ThirtyOne); - Value *DivisorSign = Builder.CreateAShr(Divisor, ThirtyOne); + Value *DividendSign = Builder.CreateAShr(Dividend, Shift); + Value *DivisorSign = Builder.CreateAShr(Divisor, Shift); Value *DvdXor = Builder.CreateXor(Dividend, DividendSign); Value *DvsXor = Builder.CreateXor(Divisor, DivisorSign); Value *UDividend = Builder.CreateSub(DvdXor, DividendSign); @@ -68,6 +81,8 @@ static Value *generatedUnsignedRemainderCode(Value *Dividend, Value *Divisor, IRBuilder<> &Builder) { // Remainder = Dividend - Quotient*Divisor + // Following instructions are generated for both i32 and i64 + // ; %quotient = udiv i32 %dividend, %divisor // ; %product = mul i32 %divisor, %quotient // ; %remainder = sub i32 %dividend, %product @@ -88,9 +103,20 @@ static Value *generatedUnsignedRemainderCode(Value *Dividend, Value *Divisor, /// present, i.e. not folded), ready to be expanded if the user wishes. static Value *generateSignedDivisionCode(Value *Dividend, Value *Divisor, IRBuilder<> &Builder) { - // Implementation taken from compiler-rt's __divsi3 + // Implementation taken from compiler-rt's __divsi3 and __divdi3 + + unsigned BitWidth = Dividend->getType()->getIntegerBitWidth(); + ConstantInt *Shift; + + if (BitWidth == 64) { + Shift = Builder.getInt64(63); + } else { + assert(BitWidth == 32 && "Unexpected bit width"); + Shift = Builder.getInt32(31); + } - ConstantInt *ThirtyOne = Builder.getInt32(31); + // Following instructions are generated for both i32 (shift 31) and + // i64 (shift 63). // ; %tmp = ashr i32 %dividend, 31 // ; %tmp1 = ashr i32 %divisor, 31 @@ -102,8 +128,8 @@ static Value *generateSignedDivisionCode(Value *Dividend, Value *Divisor, // ; %q_mag = udiv i32 %u_dvnd, %u_dvsr // ; %tmp4 = xor i32 %q_mag, %q_sgn // ; %q = sub i32 %tmp4, %q_sgn - Value *Tmp = Builder.CreateAShr(Dividend, ThirtyOne); - Value *Tmp1 = Builder.CreateAShr(Divisor, ThirtyOne); + Value *Tmp = Builder.CreateAShr(Dividend, Shift); + Value *Tmp1 = Builder.CreateAShr(Divisor, Shift); Value *Tmp2 = Builder.CreateXor(Tmp, Dividend); Value *U_Dvnd = Builder.CreateSub(Tmp2, Tmp); Value *Tmp3 = Builder.CreateXor(Tmp1, Divisor); @@ -119,9 +145,9 @@ static Value *generateSignedDivisionCode(Value *Dividend, Value *Divisor, return Q; } -/// Generates code to divide two unsigned scalar 32-bit integers. Returns the -/// quotient, rounded towards 0. Builder's insert point should be pointing where -/// the caller wants code generated, e.g. at the udiv instruction. +/// Generates code to divide two unsigned scalar 32-bit or 64-bit integers. +/// Returns the quotient, rounded towards 0. Builder's insert point should +/// point where the caller wants code generated, e.g. at the udiv instruction. static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor, IRBuilder<> &Builder) { // The basic algorithm can be found in the compiler-rt project's @@ -129,18 +155,33 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor, // that's been hand-tuned to lessen the amount of control flow involved. // Some helper values - IntegerType *I32Ty = Builder.getInt32Ty(); + IntegerType *DivTy = cast<IntegerType>(Dividend->getType()); + unsigned BitWidth = DivTy->getBitWidth(); + + ConstantInt *Zero; + ConstantInt *One; + ConstantInt *NegOne; + ConstantInt *MSB; + + if (BitWidth == 64) { + Zero = Builder.getInt64(0); + One = Builder.getInt64(1); + NegOne = ConstantInt::getSigned(DivTy, -1); + MSB = Builder.getInt64(63); + } else { + assert(BitWidth == 32 && "Unexpected bit width"); + Zero = Builder.getInt32(0); + One = Builder.getInt32(1); + NegOne = ConstantInt::getSigned(DivTy, -1); + MSB = Builder.getInt32(31); + } - ConstantInt *Zero = Builder.getInt32(0); - ConstantInt *One = Builder.getInt32(1); - ConstantInt *ThirtyOne = Builder.getInt32(31); - ConstantInt *NegOne = ConstantInt::getSigned(I32Ty, -1); - ConstantInt *True = Builder.getTrue(); + ConstantInt *True = Builder.getTrue(); BasicBlock *IBB = Builder.GetInsertBlock(); Function *F = IBB->getParent(); - Function *CTLZi32 = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz, - I32Ty); + Function *CTLZ = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz, + DivTy); // Our CFG is going to look like: // +---------------------+ @@ -190,6 +231,8 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor, // We'll be overwriting the terminator to insert our extra blocks SpecialCases->getTerminator()->eraseFromParent(); + // Same instructions are generated for both i32 (msb 31) and i64 (msb 63). + // First off, check for special cases: dividend or divisor is zero, divisor // is greater than dividend, and divisor is 1. // ; special-cases: @@ -209,12 +252,12 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor, Value *Ret0_1 = Builder.CreateICmpEQ(Divisor, Zero); Value *Ret0_2 = Builder.CreateICmpEQ(Dividend, Zero); Value *Ret0_3 = Builder.CreateOr(Ret0_1, Ret0_2); - Value *Tmp0 = Builder.CreateCall2(CTLZi32, Divisor, True); - Value *Tmp1 = Builder.CreateCall2(CTLZi32, Dividend, True); + Value *Tmp0 = Builder.CreateCall2(CTLZ, Divisor, True); + Value *Tmp1 = Builder.CreateCall2(CTLZ, Dividend, True); Value *SR = Builder.CreateSub(Tmp0, Tmp1); - Value *Ret0_4 = Builder.CreateICmpUGT(SR, ThirtyOne); + Value *Ret0_4 = Builder.CreateICmpUGT(SR, MSB); Value *Ret0 = Builder.CreateOr(Ret0_3, Ret0_4); - Value *RetDividend = Builder.CreateICmpEQ(SR, ThirtyOne); + Value *RetDividend = Builder.CreateICmpEQ(SR, MSB); Value *RetVal = Builder.CreateSelect(Ret0, Zero, Dividend); Value *EarlyRet = Builder.CreateOr(Ret0, RetDividend); Builder.CreateCondBr(EarlyRet, End, BB1); @@ -227,7 +270,7 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor, // ; br i1 %skipLoop, label %loop-exit, label %preheader Builder.SetInsertPoint(BB1); Value *SR_1 = Builder.CreateAdd(SR, One); - Value *Tmp2 = Builder.CreateSub(ThirtyOne, SR); + Value *Tmp2 = Builder.CreateSub(MSB, SR); Value *Q = Builder.CreateShl(Dividend, Tmp2); Value *SkipLoop = Builder.CreateICmpEQ(SR_1, Zero); Builder.CreateCondBr(SkipLoop, LoopExit, Preheader); @@ -260,17 +303,17 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor, // ; %tmp12 = icmp eq i32 %sr_2, 0 // ; br i1 %tmp12, label %loop-exit, label %do-while Builder.SetInsertPoint(DoWhile); - PHINode *Carry_1 = Builder.CreatePHI(I32Ty, 2); - PHINode *SR_3 = Builder.CreatePHI(I32Ty, 2); - PHINode *R_1 = Builder.CreatePHI(I32Ty, 2); - PHINode *Q_2 = Builder.CreatePHI(I32Ty, 2); + PHINode *Carry_1 = Builder.CreatePHI(DivTy, 2); + PHINode *SR_3 = Builder.CreatePHI(DivTy, 2); + PHINode *R_1 = Builder.CreatePHI(DivTy, 2); + PHINode *Q_2 = Builder.CreatePHI(DivTy, 2); Value *Tmp5 = Builder.CreateShl(R_1, One); - Value *Tmp6 = Builder.CreateLShr(Q_2, ThirtyOne); + Value *Tmp6 = Builder.CreateLShr(Q_2, MSB); Value *Tmp7 = Builder.CreateOr(Tmp5, Tmp6); Value *Tmp8 = Builder.CreateShl(Q_2, One); Value *Q_1 = Builder.CreateOr(Carry_1, Tmp8); Value *Tmp9 = Builder.CreateSub(Tmp4, Tmp7); - Value *Tmp10 = Builder.CreateAShr(Tmp9, 31); + Value *Tmp10 = Builder.CreateAShr(Tmp9, MSB); Value *Carry = Builder.CreateAnd(Tmp10, One); Value *Tmp11 = Builder.CreateAnd(Tmp10, Divisor); Value *R = Builder.CreateSub(Tmp7, Tmp11); @@ -285,8 +328,8 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor, // ; %q_4 = or i32 %carry_2, %tmp13 // ; br label %end Builder.SetInsertPoint(LoopExit); - PHINode *Carry_2 = Builder.CreatePHI(I32Ty, 2); - PHINode *Q_3 = Builder.CreatePHI(I32Ty, 2); + PHINode *Carry_2 = Builder.CreatePHI(DivTy, 2); + PHINode *Q_3 = Builder.CreatePHI(DivTy, 2); Value *Tmp13 = Builder.CreateShl(Q_3, One); Value *Q_4 = Builder.CreateOr(Carry_2, Tmp13); Builder.CreateBr(End); @@ -295,7 +338,7 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor, // ; %q_5 = phi i32 [ %q_4, %loop-exit ], [ %retVal, %special-cases ] // ; ret i32 %q_5 Builder.SetInsertPoint(End, End->begin()); - PHINode *Q_5 = Builder.CreatePHI(I32Ty, 2); + PHINode *Q_5 = Builder.CreatePHI(DivTy, 2); // Populate the Phis, since all values have now been created. Our Phis were: // ; %carry_1 = phi i32 [ 0, %preheader ], [ %carry, %do-while ] @@ -326,9 +369,8 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor, /// Generate code to calculate the remainder of two integers, replacing Rem with /// the generated code. This currently generates code using the udiv expansion, /// but future work includes generating more specialized code, e.g. when more -/// information about the operands are known. Currently only implements 32bit -/// scalar division (due to udiv's limitation), but future work is removing this -/// limitation. +/// information about the operands are known. Implements both 32bit and 64bit +/// scalar division. /// /// @brief Replace Rem with generated code. bool llvm::expandRemainder(BinaryOperator *Rem) { @@ -338,6 +380,15 @@ bool llvm::expandRemainder(BinaryOperator *Rem) { IRBuilder<> Builder(Rem); + Type *RemTy = Rem->getType(); + if (RemTy->isVectorTy()) + llvm_unreachable("Div over vectors not supported"); + + unsigned RemTyBitWidth = RemTy->getIntegerBitWidth(); + + if (RemTyBitWidth != 32 && RemTyBitWidth != 64) + llvm_unreachable("Div of bitwidth other than 32 or 64 not supported"); + // First prepare the sign if it's a signed remainder if (Rem->getOpcode() == Instruction::SRem) { Value *Remainder = generateSignedRemainderCode(Rem->getOperand(0), @@ -376,9 +427,8 @@ bool llvm::expandRemainder(BinaryOperator *Rem) { /// Generate code to divide two integers, replacing Div with the generated /// code. This currently generates code similarly to compiler-rt's /// implementations, but future work includes generating more specialized code -/// when more information about the operands are known. Currently only -/// implements 32bit scalar division, but future work is removing this -/// limitation. +/// when more information about the operands are known. Implements both +/// 32bit and 64bit scalar division. /// /// @brief Replace Div with generated code. bool llvm::expandDivision(BinaryOperator *Div) { @@ -388,9 +438,15 @@ bool llvm::expandDivision(BinaryOperator *Div) { IRBuilder<> Builder(Div); - if (Div->getType()->isVectorTy()) + Type *DivTy = Div->getType(); + if (DivTy->isVectorTy()) llvm_unreachable("Div over vectors not supported"); + unsigned DivTyBitWidth = DivTy->getIntegerBitWidth(); + + if (DivTyBitWidth != 32 && DivTyBitWidth != 64) + llvm_unreachable("Div of bitwidth other than 32 or 64 not supported"); + // First prepare the sign if it's a signed division if (Div->getOpcode() == Instruction::SDiv) { // Lower the code to unsigned division, and reset Div to point to the udiv. @@ -443,7 +499,7 @@ bool llvm::expandRemainderUpTo32Bits(BinaryOperator *Rem) { if (RemTyBitWidth == 32) return expandRemainder(Rem); - // If bitwidth smaller than 32 extend inputs, truncate output and proceed + // If bitwidth smaller than 32 extend inputs, extend output and proceed // with 32 bit division. IRBuilder<> Builder(Rem); @@ -471,6 +527,55 @@ bool llvm::expandRemainderUpTo32Bits(BinaryOperator *Rem) { return expandRemainder(cast<BinaryOperator>(ExtRem)); } +/// Generate code to compute the remainder of two integers of bitwidth up to +/// 64 bits. Uses the above routines and extends the inputs/truncates the +/// outputs to operate in 64 bits. +/// +/// @brief Replace Rem with emulation code. +bool llvm::expandRemainderUpTo64Bits(BinaryOperator *Rem) { + assert((Rem->getOpcode() == Instruction::SRem || + Rem->getOpcode() == Instruction::URem) && + "Trying to expand remainder from a non-remainder function"); + + Type *RemTy = Rem->getType(); + if (RemTy->isVectorTy()) + llvm_unreachable("Div over vectors not supported"); + + unsigned RemTyBitWidth = RemTy->getIntegerBitWidth(); + + if (RemTyBitWidth > 64) + llvm_unreachable("Div of bitwidth greater than 64 not supported"); + + if (RemTyBitWidth == 64) + return expandRemainder(Rem); + + // If bitwidth smaller than 64 extend inputs, extend output and proceed + // with 64 bit division. + IRBuilder<> Builder(Rem); + + Value *ExtDividend; + Value *ExtDivisor; + Value *ExtRem; + Value *Trunc; + Type *Int64Ty = Builder.getInt64Ty(); + + if (Rem->getOpcode() == Instruction::SRem) { + ExtDividend = Builder.CreateSExt(Rem->getOperand(0), Int64Ty); + ExtDivisor = Builder.CreateSExt(Rem->getOperand(1), Int64Ty); + ExtRem = Builder.CreateSRem(ExtDividend, ExtDivisor); + } else { + ExtDividend = Builder.CreateZExt(Rem->getOperand(0), Int64Ty); + ExtDivisor = Builder.CreateZExt(Rem->getOperand(1), Int64Ty); + ExtRem = Builder.CreateURem(ExtDividend, ExtDivisor); + } + Trunc = Builder.CreateTrunc(ExtRem, RemTy); + + Rem->replaceAllUsesWith(Trunc); + Rem->dropAllReferences(); + Rem->eraseFromParent(); + + return expandRemainder(cast<BinaryOperator>(ExtRem)); +} /// Generate code to divide two integers of bitwidth up to 32 bits. Uses the /// above routines and extends the inputs/truncates the outputs to operate @@ -495,7 +600,7 @@ bool llvm::expandDivisionUpTo32Bits(BinaryOperator *Div) { if (DivTyBitWidth == 32) return expandDivision(Div); - // If bitwidth smaller than 32 extend inputs, truncate output and proceed + // If bitwidth smaller than 32 extend inputs, extend output and proceed // with 32 bit division. IRBuilder<> Builder(Div); @@ -522,3 +627,53 @@ bool llvm::expandDivisionUpTo32Bits(BinaryOperator *Div) { return expandDivision(cast<BinaryOperator>(ExtDiv)); } + +/// Generate code to divide two integers of bitwidth up to 64 bits. Uses the +/// above routines and extends the inputs/truncates the outputs to operate +/// in 64 bits. +/// +/// @brief Replace Div with emulation code. +bool llvm::expandDivisionUpTo64Bits(BinaryOperator *Div) { + assert((Div->getOpcode() == Instruction::SDiv || + Div->getOpcode() == Instruction::UDiv) && + "Trying to expand division from a non-division function"); + + Type *DivTy = Div->getType(); + if (DivTy->isVectorTy()) + llvm_unreachable("Div over vectors not supported"); + + unsigned DivTyBitWidth = DivTy->getIntegerBitWidth(); + + if (DivTyBitWidth > 64) + llvm_unreachable("Div of bitwidth greater than 64 not supported"); + + if (DivTyBitWidth == 64) + return expandDivision(Div); + + // If bitwidth smaller than 64 extend inputs, extend output and proceed + // with 64 bit division. + IRBuilder<> Builder(Div); + + Value *ExtDividend; + Value *ExtDivisor; + Value *ExtDiv; + Value *Trunc; + Type *Int64Ty = Builder.getInt64Ty(); + + if (Div->getOpcode() == Instruction::SDiv) { + ExtDividend = Builder.CreateSExt(Div->getOperand(0), Int64Ty); + ExtDivisor = Builder.CreateSExt(Div->getOperand(1), Int64Ty); + ExtDiv = Builder.CreateSDiv(ExtDividend, ExtDivisor); + } else { + ExtDividend = Builder.CreateZExt(Div->getOperand(0), Int64Ty); + ExtDivisor = Builder.CreateZExt(Div->getOperand(1), Int64Ty); + ExtDiv = Builder.CreateUDiv(ExtDividend, ExtDivisor); + } + Trunc = Builder.CreateTrunc(ExtDiv, DivTy); + + Div->replaceAllUsesWith(Trunc); + Div->dropAllReferences(); + Div->eraseFromParent(); + + return expandDivision(cast<BinaryOperator>(ExtDiv)); +} diff --git a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp index 97e7e5d..51a3d9c 100644 --- a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp @@ -27,222 +27,108 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "lcssa" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/PredIteratorCache.h" #include "llvm/Pass.h" -#include "llvm/Support/PredIteratorCache.h" +#include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/SSAUpdater.h" using namespace llvm; -STATISTIC(NumLCSSA, "Number of live out of a loop variables"); - -namespace { - struct LCSSA : public LoopPass { - static char ID; // Pass identification, replacement for typeid - LCSSA() : LoopPass(ID) { - initializeLCSSAPass(*PassRegistry::getPassRegistry()); - } - - // Cached analysis information for the current function. - DominatorTree *DT; - LoopInfo *LI; - ScalarEvolution *SE; - PredIteratorCache PredCache; - Loop *L; - - virtual bool runOnLoop(Loop *L, LPPassManager &LPM); - - /// This transformation requires natural loop information & requires that - /// loop preheaders be inserted into the CFG. It maintains both of these, - /// as well as the CFG. It also requires dominator information. - /// - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - - AU.addRequired<DominatorTree>(); - AU.addRequired<LoopInfo>(); - AU.addPreservedID(LoopSimplifyID); - AU.addPreserved<AliasAnalysis>(); - AU.addPreserved<ScalarEvolution>(); - } - private: - bool ProcessInstruction(Instruction *Inst, - const SmallVectorImpl<BasicBlock*> &ExitBlocks); - - /// verifyAnalysis() - Verify loop nest. - virtual void verifyAnalysis() const { - // Check the special guarantees that LCSSA makes. - assert(L->isLCSSAForm(*DT) && "LCSSA form not preserved!"); - } - }; -} - -char LCSSA::ID = 0; -INITIALIZE_PASS_BEGIN(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false) -INITIALIZE_PASS_DEPENDENCY(DominatorTree) -INITIALIZE_PASS_DEPENDENCY(LoopInfo) -INITIALIZE_PASS_END(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false) - -Pass *llvm::createLCSSAPass() { return new LCSSA(); } -char &llvm::LCSSAID = LCSSA::ID; +#define DEBUG_TYPE "lcssa" +STATISTIC(NumLCSSA, "Number of live out of a loop variables"); -/// BlockDominatesAnExit - Return true if the specified block dominates at least -/// one of the blocks in the specified list. -static bool BlockDominatesAnExit(BasicBlock *BB, - const SmallVectorImpl<BasicBlock*> &ExitBlocks, - DominatorTree *DT) { - DomTreeNode *DomNode = DT->getNode(BB); +/// Return true if the specified block is in the list. +static bool isExitBlock(BasicBlock *BB, + const SmallVectorImpl<BasicBlock *> &ExitBlocks) { for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) - if (DT->dominates(DomNode, DT->getNode(ExitBlocks[i]))) + if (ExitBlocks[i] == BB) return true; - return false; } +/// Given an instruction in the loop, check to see if it has any uses that are +/// outside the current loop. If so, insert LCSSA PHI nodes and rewrite the +/// uses. +static bool processInstruction(Loop &L, Instruction &Inst, DominatorTree &DT, + const SmallVectorImpl<BasicBlock *> &ExitBlocks, + PredIteratorCache &PredCache) { + SmallVector<Use *, 16> UsesToRewrite; -/// runOnFunction - Process all loops in the function, inner-most out. -bool LCSSA::runOnLoop(Loop *TheLoop, LPPassManager &LPM) { - L = TheLoop; - - DT = &getAnalysis<DominatorTree>(); - LI = &getAnalysis<LoopInfo>(); - SE = getAnalysisIfAvailable<ScalarEvolution>(); - - // Get the set of exiting blocks. - SmallVector<BasicBlock*, 8> ExitBlocks; - L->getExitBlocks(ExitBlocks); - - if (ExitBlocks.empty()) - return false; - - // Look at all the instructions in the loop, checking to see if they have uses - // outside the loop. If so, rewrite those uses. - bool MadeChange = false; - - for (Loop::block_iterator BBI = L->block_begin(), E = L->block_end(); - BBI != E; ++BBI) { - BasicBlock *BB = *BBI; - - // For large loops, avoid use-scanning by using dominance information: In - // particular, if a block does not dominate any of the loop exits, then none - // of the values defined in the block could be used outside the loop. - if (!BlockDominatesAnExit(BB, ExitBlocks, DT)) - continue; - - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); - I != E; ++I) { - // Reject two common cases fast: instructions with no uses (like stores) - // and instructions with one use that is in the same block as this. - if (I->use_empty() || - (I->hasOneUse() && I->use_back()->getParent() == BB && - !isa<PHINode>(I->use_back()))) - continue; - - MadeChange |= ProcessInstruction(I, ExitBlocks); - } - } - - // If we modified the code, remove any caches about the loop from SCEV to - // avoid dangling entries. - // FIXME: This is a big hammer, can we clear the cache more selectively? - if (SE && MadeChange) - SE->forgetLoop(L); - - assert(L->isLCSSAForm(*DT)); - PredCache.clear(); - - return MadeChange; -} + BasicBlock *InstBB = Inst.getParent(); -/// isExitBlock - Return true if the specified block is in the list. -static bool isExitBlock(BasicBlock *BB, - const SmallVectorImpl<BasicBlock*> &ExitBlocks) { - for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) - if (ExitBlocks[i] == BB) - return true; - return false; -} + for (Use &U : Inst.uses()) { + Instruction *User = cast<Instruction>(U.getUser()); + BasicBlock *UserBB = User->getParent(); + if (PHINode *PN = dyn_cast<PHINode>(User)) + UserBB = PN->getIncomingBlock(U); -/// ProcessInstruction - Given an instruction in the loop, check to see if it -/// has any uses that are outside the current loop. If so, insert LCSSA PHI -/// nodes and rewrite the uses. -bool LCSSA::ProcessInstruction(Instruction *Inst, - const SmallVectorImpl<BasicBlock*> &ExitBlocks) { - SmallVector<Use*, 16> UsesToRewrite; - - BasicBlock *InstBB = Inst->getParent(); - - for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end(); - UI != E; ++UI) { - User *U = *UI; - BasicBlock *UserBB = cast<Instruction>(U)->getParent(); - if (PHINode *PN = dyn_cast<PHINode>(U)) - UserBB = PN->getIncomingBlock(UI); - - if (InstBB != UserBB && !L->contains(UserBB)) - UsesToRewrite.push_back(&UI.getUse()); + if (InstBB != UserBB && !L.contains(UserBB)) + UsesToRewrite.push_back(&U); } // If there are no uses outside the loop, exit with no change. - if (UsesToRewrite.empty()) return false; - + if (UsesToRewrite.empty()) + return false; + ++NumLCSSA; // We are applying the transformation // Invoke instructions are special in that their result value is not available - // along their unwind edge. The code below tests to see whether DomBB dominates + // along their unwind edge. The code below tests to see whether DomBB + // dominates // the value, so adjust DomBB to the normal destination block, which is // effectively where the value is first usable. - BasicBlock *DomBB = Inst->getParent(); - if (InvokeInst *Inv = dyn_cast<InvokeInst>(Inst)) + BasicBlock *DomBB = Inst.getParent(); + if (InvokeInst *Inv = dyn_cast<InvokeInst>(&Inst)) DomBB = Inv->getNormalDest(); - DomTreeNode *DomNode = DT->getNode(DomBB); + DomTreeNode *DomNode = DT.getNode(DomBB); - SmallVector<PHINode*, 16> AddedPHIs; + SmallVector<PHINode *, 16> AddedPHIs; SSAUpdater SSAUpdate; - SSAUpdate.Initialize(Inst->getType(), Inst->getName()); - + SSAUpdate.Initialize(Inst.getType(), Inst.getName()); + // Insert the LCSSA phi's into all of the exit blocks dominated by the // value, and add them to the Phi's map. - for (SmallVectorImpl<BasicBlock*>::const_iterator BBI = ExitBlocks.begin(), - BBE = ExitBlocks.end(); BBI != BBE; ++BBI) { + for (SmallVectorImpl<BasicBlock *>::const_iterator BBI = ExitBlocks.begin(), + BBE = ExitBlocks.end(); + BBI != BBE; ++BBI) { BasicBlock *ExitBB = *BBI; - if (!DT->dominates(DomNode, DT->getNode(ExitBB))) continue; - + if (!DT.dominates(DomNode, DT.getNode(ExitBB))) + continue; + // If we already inserted something for this BB, don't reprocess it. - if (SSAUpdate.HasValueForBlock(ExitBB)) continue; - - PHINode *PN = PHINode::Create(Inst->getType(), - PredCache.GetNumPreds(ExitBB), - Inst->getName()+".lcssa", - ExitBB->begin()); + if (SSAUpdate.HasValueForBlock(ExitBB)) + continue; + + PHINode *PN = PHINode::Create(Inst.getType(), PredCache.GetNumPreds(ExitBB), + Inst.getName() + ".lcssa", ExitBB->begin()); // Add inputs from inside the loop for this PHI. for (BasicBlock **PI = PredCache.GetPreds(ExitBB); *PI; ++PI) { - PN->addIncoming(Inst, *PI); + PN->addIncoming(&Inst, *PI); // If the exit block has a predecessor not within the loop, arrange for // the incoming value use corresponding to that predecessor to be // rewritten in terms of a different LCSSA PHI. - if (!L->contains(*PI)) + if (!L.contains(*PI)) UsesToRewrite.push_back( - &PN->getOperandUse( - PN->getOperandNumForIncomingValue(PN->getNumIncomingValues()-1))); + &PN->getOperandUse(PN->getOperandNumForIncomingValue( + PN->getNumIncomingValues() - 1))); } AddedPHIs.push_back(PN); - + // Remember that this phi makes the value alive in this block. SSAUpdate.AddAvailableValue(ExitBB, PN); } @@ -259,15 +145,14 @@ bool LCSSA::ProcessInstruction(Instruction *Inst, if (PHINode *PN = dyn_cast<PHINode>(User)) UserBB = PN->getIncomingBlock(*UsesToRewrite[i]); - if (isa<PHINode>(UserBB->begin()) && - isExitBlock(UserBB, ExitBlocks)) { + if (isa<PHINode>(UserBB->begin()) && isExitBlock(UserBB, ExitBlocks)) { // Tell the VHs that the uses changed. This updates SCEV's caches. if (UsesToRewrite[i]->get()->hasValueHandle()) ValueHandleBase::ValueIsRAUWd(*UsesToRewrite[i], UserBB->begin()); UsesToRewrite[i]->set(UserBB->begin()); continue; } - + // Otherwise, do full PHI insertion. SSAUpdate.RewriteUse(*UsesToRewrite[i]); } @@ -277,7 +162,152 @@ bool LCSSA::ProcessInstruction(Instruction *Inst, if (AddedPHIs[i]->use_empty()) AddedPHIs[i]->eraseFromParent(); } - + return true; } +/// Return true if the specified block dominates at least +/// one of the blocks in the specified list. +static bool +blockDominatesAnExit(BasicBlock *BB, + DominatorTree &DT, + const SmallVectorImpl<BasicBlock *> &ExitBlocks) { + DomTreeNode *DomNode = DT.getNode(BB); + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) + if (DT.dominates(DomNode, DT.getNode(ExitBlocks[i]))) + return true; + + return false; +} + +bool llvm::formLCSSA(Loop &L, DominatorTree &DT, ScalarEvolution *SE) { + bool Changed = false; + + // Get the set of exiting blocks. + SmallVector<BasicBlock *, 8> ExitBlocks; + L.getExitBlocks(ExitBlocks); + + if (ExitBlocks.empty()) + return false; + + PredIteratorCache PredCache; + + // Look at all the instructions in the loop, checking to see if they have uses + // outside the loop. If so, rewrite those uses. + for (Loop::block_iterator BBI = L.block_begin(), BBE = L.block_end(); + BBI != BBE; ++BBI) { + BasicBlock *BB = *BBI; + + // For large loops, avoid use-scanning by using dominance information: In + // particular, if a block does not dominate any of the loop exits, then none + // of the values defined in the block could be used outside the loop. + if (!blockDominatesAnExit(BB, DT, ExitBlocks)) + continue; + + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + // Reject two common cases fast: instructions with no uses (like stores) + // and instructions with one use that is in the same block as this. + if (I->use_empty() || + (I->hasOneUse() && I->user_back()->getParent() == BB && + !isa<PHINode>(I->user_back()))) + continue; + + Changed |= processInstruction(L, *I, DT, ExitBlocks, PredCache); + } + } + + // If we modified the code, remove any caches about the loop from SCEV to + // avoid dangling entries. + // FIXME: This is a big hammer, can we clear the cache more selectively? + if (SE && Changed) + SE->forgetLoop(&L); + + assert(L.isLCSSAForm(DT)); + + return Changed; +} + +/// Process a loop nest depth first. +bool llvm::formLCSSARecursively(Loop &L, DominatorTree &DT, + ScalarEvolution *SE) { + bool Changed = false; + + // Recurse depth-first through inner loops. + for (Loop::iterator LI = L.begin(), LE = L.end(); LI != LE; ++LI) + Changed |= formLCSSARecursively(**LI, DT, SE); + + Changed |= formLCSSA(L, DT, SE); + return Changed; +} + +namespace { +struct LCSSA : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + LCSSA() : FunctionPass(ID) { + initializeLCSSAPass(*PassRegistry::getPassRegistry()); + } + + // Cached analysis information for the current function. + DominatorTree *DT; + LoopInfo *LI; + ScalarEvolution *SE; + + bool runOnFunction(Function &F) override; + + /// This transformation requires natural loop information & requires that + /// loop preheaders be inserted into the CFG. It maintains both of these, + /// as well as the CFG. It also requires dominator information. + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addRequired<LoopInfo>(); + AU.addPreservedID(LoopSimplifyID); + AU.addPreserved<AliasAnalysis>(); + AU.addPreserved<ScalarEvolution>(); + } + +private: + void verifyAnalysis() const override; +}; +} + +char LCSSA::ID = 0; +INITIALIZE_PASS_BEGIN(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_END(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false) + +Pass *llvm::createLCSSAPass() { return new LCSSA(); } +char &llvm::LCSSAID = LCSSA::ID; + + +/// Process all loops in the function, inner-most out. +bool LCSSA::runOnFunction(Function &F) { + bool Changed = false; + LI = &getAnalysis<LoopInfo>(); + DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + SE = getAnalysisIfAvailable<ScalarEvolution>(); + + // Simplify each loop nest in the function. + for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) + Changed |= formLCSSARecursively(**I, *DT, SE); + + return Changed; +} + +static void verifyLoop(Loop &L, DominatorTree &DT) { + // Recurse depth-first through inner loops. + for (Loop::iterator LI = L.begin(), LE = L.end(); LI != LE; ++LI) + verifyLoop(**LI, DT); + + // Check the special guarantees that LCSSA makes. + //assert(L.isLCSSAForm(DT) && "LCSSA form not preserved!"); +} + +void LCSSA::verifyAnalysis() const { + // Verify each loop nest in the function, assuming LI still points at that + // function's loop info. + for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) + verifyLoop(**I, *DT); +} diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp index 2768041..a5e443f 100644 --- a/contrib/llvm/lib/Transforms/Utils/Local.cpp +++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp @@ -17,15 +17,17 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/DIBuilder.h" -#include "llvm/DebugInfo.h" +#include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DIBuilder.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" @@ -35,14 +37,14 @@ #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Operator.h" -#include "llvm/Support/CFG.h" +#include "llvm/IR/ValueHandle.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "local" + STATISTIC(NumRemoved, "Number of unreachable basic blocks removed"); //===----------------------------------------------------------------------===// @@ -127,8 +129,10 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, // dest. If so, eliminate it as an explicit compare. if (i.getCaseSuccessor() == DefaultDest) { MDNode* MD = SI->getMetadata(LLVMContext::MD_prof); - // MD should have 2 + NumCases operands. - if (MD && MD->getNumOperands() == 2 + SI->getNumCases()) { + unsigned NCases = SI->getNumCases(); + // Fold the case metadata into the default if there will be any branches + // left, unless the metadata doesn't match the switch. + if (NCases > 1 && MD && MD->getNumOperands() == 2 + NCases) { // Collect branch weights into a vector. SmallVector<uint32_t, 8> Weights; for (unsigned MD_i = 1, MD_e = MD->getNumOperands(); MD_i < MD_e; @@ -157,7 +161,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, // Otherwise, check to see if the switch only branches to one destination. // We do this by reseting "TheOnlyDest" to null when we find two non-equal // destinations. - if (i.getCaseSuccessor() != TheOnlyDest) TheOnlyDest = 0; + if (i.getCaseSuccessor() != TheOnlyDest) TheOnlyDest = nullptr; } if (CI && !TheOnlyDest) { @@ -178,7 +182,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, // Found case matching a constant operand? BasicBlock *Succ = SI->getSuccessor(i); if (Succ == TheOnlyDest) - TheOnlyDest = 0; // Don't modify the first branch to TheOnlyDest + TheOnlyDest = nullptr; // Don't modify the first branch to TheOnlyDest else Succ->removePredecessor(BB); } @@ -231,7 +235,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) { if (IBI->getDestination(i) == TheOnlyDest) - TheOnlyDest = 0; + TheOnlyDest = nullptr; else IBI->getDestination(i)->removePredecessor(IBI->getParent()); } @@ -329,7 +333,7 @@ llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V, // dead as we go. for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { Value *OpV = I->getOperand(i); - I->setOperand(i, 0); + I->setOperand(i, nullptr); if (!OpV->use_empty()) continue; @@ -352,8 +356,8 @@ llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V, /// true when there are no uses or multiple uses that all refer to the same /// value. static bool areAllUsesEqual(Instruction *I) { - Value::use_iterator UI = I->use_begin(); - Value::use_iterator UE = I->use_end(); + Value::user_iterator UI = I->user_begin(); + Value::user_iterator UE = I->user_end(); if (UI == UE) return true; @@ -374,7 +378,7 @@ bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN, const TargetLibraryInfo *TLI) { SmallPtrSet<Instruction*, 4> Visited; for (Instruction *I = PN; areAllUsesEqual(I) && !I->mayHaveSideEffects(); - I = cast<Instruction>(*I->use_begin())) { + I = cast<Instruction>(*I->user_begin())) { if (I->use_empty()) return RecursivelyDeleteTriviallyDeadInstructions(I, TLI); @@ -505,12 +509,18 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) { PredBB->getTerminator()->eraseFromParent(); DestBB->getInstList().splice(DestBB->begin(), PredBB->getInstList()); + // If the PredBB is the entry block of the function, move DestBB up to + // become the entry block after we erase PredBB. + if (PredBB == &DestBB->getParent()->getEntryBlock()) + DestBB->moveAfter(PredBB); + if (P) { - DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>(); - if (DT) { - BasicBlock *PredBBIDom = DT->getNode(PredBB)->getIDom()->getBlock(); - DT->changeImmediateDominator(DestBB, PredBBIDom); - DT->eraseNode(PredBB); + if (DominatorTreeWrapperPass *DTWP = + P->getAnalysisIfAvailable<DominatorTreeWrapperPass>()) { + DominatorTree &DT = DTWP->getDomTree(); + BasicBlock *PredBBIDom = DT.getNode(PredBB)->getIDom()->getBlock(); + DT.changeImmediateDominator(DestBB, PredBBIDom); + DT.eraseNode(PredBB); } } // Nuke BB. @@ -749,10 +759,9 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) { if (!Succ->getSinglePredecessor()) { BasicBlock::iterator BBI = BB->begin(); while (isa<PHINode>(*BBI)) { - for (Value::use_iterator UI = BBI->use_begin(), E = BBI->use_end(); - UI != E; ++UI) { - if (PHINode* PN = dyn_cast<PHINode>(*UI)) { - if (PN->getIncomingBlock(UI) != BB) + for (Use &U : BBI->uses()) { + if (PHINode* PN = dyn_cast<PHINode>(U.getUser())) { + if (PN->getIncomingBlock(U) != BB) return false; } else { return false; @@ -892,24 +901,26 @@ static unsigned enforceKnownAlignment(Value *V, unsigned Align, return PrefAlign; } - if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) { + if (auto *GO = dyn_cast<GlobalObject>(V)) { // If there is a large requested alignment and we can, bump up the alignment // of the global. - if (GV->isDeclaration()) return Align; + if (GO->isDeclaration()) + return Align; // If the memory we set aside for the global may not be the memory used by // the final program then it is impossible for us to reliably enforce the // preferred alignment. - if (GV->isWeakForLinker()) return Align; + if (GO->isWeakForLinker()) + return Align; - if (GV->getAlignment() >= PrefAlign) - return GV->getAlignment(); + if (GO->getAlignment() >= PrefAlign) + return GO->getAlignment(); // We can only increase the alignment of the global if it has no alignment // specified or if it is not assigned a section. If it is assigned a // section, the global could be densely packed with other objects in the // section, increasing the alignment could cause padding issues. - if (!GV->hasSection() || GV->getAlignment() == 0) - GV->setAlignment(PrefAlign); - return GV->getAlignment(); + if (!GO->hasSection() || GO->getAlignment() == 0) + GO->setAlignment(PrefAlign); + return GO->getAlignment(); } return Align; @@ -926,7 +937,7 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, unsigned BitWidth = DL ? DL->getPointerTypeSizeInBits(V->getType()) : 64; APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - ComputeMaskedBits(V, KnownZero, KnownOne, DL); + computeKnownBits(V, KnownZero, KnownOne, DL); unsigned TrailZ = KnownZero.countTrailingOnes(); // Avoid trouble with ridiculously large TrailZ values, such as @@ -979,10 +990,10 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, if (LdStHasDebugValue(DIVar, SI)) return true; - Instruction *DbgVal = NULL; + Instruction *DbgVal = nullptr; // If an argument is zero extended then use argument directly. The ZExt // may be zapped by an optimization pass in future. - Argument *ExtendedArg = NULL; + Argument *ExtendedArg = nullptr; if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0))) ExtendedArg = dyn_cast<Argument>(ZExt->getOperand(0)); if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0))) @@ -991,14 +1002,7 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, DbgVal = Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar, SI); else DbgVal = Builder.insertDbgValueIntrinsic(SI->getOperand(0), 0, DIVar, SI); - - // Propagate any debug metadata from the store onto the dbg.value. - DebugLoc SIDL = SI->getDebugLoc(); - if (!SIDL.isUnknown()) - DbgVal->setDebugLoc(SIDL); - // Otherwise propagate debug metadata from dbg.declare. - else - DbgVal->setDebugLoc(DDI->getDebugLoc()); + DbgVal->setDebugLoc(DDI->getDebugLoc()); return true; } @@ -1018,51 +1022,54 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, Instruction *DbgVal = Builder.insertDbgValueIntrinsic(LI->getOperand(0), 0, DIVar, LI); - - // Propagate any debug metadata from the store onto the dbg.value. - DebugLoc LIDL = LI->getDebugLoc(); - if (!LIDL.isUnknown()) - DbgVal->setDebugLoc(LIDL); - // Otherwise propagate debug metadata from dbg.declare. - else - DbgVal->setDebugLoc(DDI->getDebugLoc()); + DbgVal->setDebugLoc(DDI->getDebugLoc()); return true; } +/// Determine whether this alloca is either a VLA or an array. +static bool isArray(AllocaInst *AI) { + return AI->isArrayAllocation() || + AI->getType()->getElementType()->isArrayTy(); +} + /// LowerDbgDeclare - Lowers llvm.dbg.declare intrinsics into appropriate set /// of llvm.dbg.value intrinsics. bool llvm::LowerDbgDeclare(Function &F) { DIBuilder DIB(*F.getParent()); SmallVector<DbgDeclareInst *, 4> Dbgs; - for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) - for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ++BI) { - if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI)) + for (auto &FI : F) + for (BasicBlock::iterator BI : FI) + if (auto DDI = dyn_cast<DbgDeclareInst>(BI)) Dbgs.push_back(DDI); - } + if (Dbgs.empty()) return false; - for (SmallVectorImpl<DbgDeclareInst *>::iterator I = Dbgs.begin(), - E = Dbgs.end(); I != E; ++I) { - DbgDeclareInst *DDI = *I; + for (auto &I : Dbgs) { + DbgDeclareInst *DDI = I; AllocaInst *AI = dyn_cast_or_null<AllocaInst>(DDI->getAddress()); // If this is an alloca for a scalar variable, insert a dbg.value // at each load and store to the alloca and erase the dbg.declare. - if (AI && !AI->isArrayAllocation()) { - - // We only remove the dbg.declare intrinsic if all uses are - // converted to dbg.value intrinsics. - bool RemoveDDI = true; - for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); - UI != E; ++UI) - if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) + // The dbg.values allow tracking a variable even if it is not + // stored on the stack, while the dbg.declare can only describe + // the stack slot (and at a lexical-scope granularity). Later + // passes will attempt to elide the stack slot. + if (AI && !isArray(AI)) { + for (User *U : AI->users()) + if (StoreInst *SI = dyn_cast<StoreInst>(U)) ConvertDebugDeclareToDebugValue(DDI, SI, DIB); - else if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) + else if (LoadInst *LI = dyn_cast<LoadInst>(U)) ConvertDebugDeclareToDebugValue(DDI, LI, DIB); - else - RemoveDDI = false; - if (RemoveDDI) - DDI->eraseFromParent(); + else if (CallInst *CI = dyn_cast<CallInst>(U)) { + // This is a call by-value or some other instruction that + // takes a pointer to the variable. Insert a *value* + // intrinsic that describes the alloca. + auto DbgVal = + DIB.insertDbgValueIntrinsic(AI, 0, + DIVariable(DDI->getVariable()), CI); + DbgVal->setDebugLoc(DDI->getDebugLoc()); + } + DDI->eraseFromParent(); } } return true; @@ -1072,12 +1079,11 @@ bool llvm::LowerDbgDeclare(Function &F) { /// alloca 'V', if any. DbgDeclareInst *llvm::FindAllocaDbgDeclare(Value *V) { if (MDNode *DebugNode = MDNode::getIfExists(V->getContext(), V)) - for (Value::use_iterator UI = DebugNode->use_begin(), - E = DebugNode->use_end(); UI != E; ++UI) - if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(*UI)) + for (User *U : DebugNode->users()) + if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U)) return DDI; - return 0; + return nullptr; } bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress, diff --git a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp index 6d5f16c..ef42291 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp @@ -37,331 +37,72 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "loop-simplify" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/DependenceAnalysis.h" -#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Type.h" -#include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" using namespace llvm; +#define DEBUG_TYPE "loop-simplify" + STATISTIC(NumInserted, "Number of pre-header or exit blocks inserted"); STATISTIC(NumNested , "Number of nested loops split out"); -namespace { - struct LoopSimplify : public LoopPass { - static char ID; // Pass identification, replacement for typeid - LoopSimplify() : LoopPass(ID) { - initializeLoopSimplifyPass(*PassRegistry::getPassRegistry()); - } - - // AA - If we have an alias analysis object to update, this is it, otherwise - // this is null. - AliasAnalysis *AA; - LoopInfo *LI; - DominatorTree *DT; - ScalarEvolution *SE; - Loop *L; - virtual bool runOnLoop(Loop *L, LPPassManager &LPM); - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - // We need loop information to identify the loops... - AU.addRequired<DominatorTree>(); - AU.addPreserved<DominatorTree>(); - - AU.addRequired<LoopInfo>(); - AU.addPreserved<LoopInfo>(); - - AU.addPreserved<AliasAnalysis>(); - AU.addPreserved<ScalarEvolution>(); - AU.addPreserved<DependenceAnalysis>(); - AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added. - } - - /// verifyAnalysis() - Verify LoopSimplifyForm's guarantees. - void verifyAnalysis() const; - - private: - bool ProcessLoop(Loop *L, LPPassManager &LPM); - BasicBlock *RewriteLoopExitBlock(Loop *L, BasicBlock *Exit); - Loop *SeparateNestedLoop(Loop *L, LPPassManager &LPM, - BasicBlock *Preheader); - BasicBlock *InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader); - }; -} - -static void PlaceSplitBlockCarefully(BasicBlock *NewBB, - SmallVectorImpl<BasicBlock*> &SplitPreds, - Loop *L); - -char LoopSimplify::ID = 0; -INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify", - "Canonicalize natural loops", true, false) -INITIALIZE_PASS_DEPENDENCY(DominatorTree) -INITIALIZE_PASS_DEPENDENCY(LoopInfo) -INITIALIZE_PASS_END(LoopSimplify, "loop-simplify", - "Canonicalize natural loops", true, false) - -// Publicly exposed interface to pass... -char &llvm::LoopSimplifyID = LoopSimplify::ID; -Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); } - -/// runOnLoop - Run down all loops in the CFG (recursively, but we could do -/// it in any convenient order) inserting preheaders... -/// -bool LoopSimplify::runOnLoop(Loop *l, LPPassManager &LPM) { - L = l; - bool Changed = false; - LI = &getAnalysis<LoopInfo>(); - AA = getAnalysisIfAvailable<AliasAnalysis>(); - DT = &getAnalysis<DominatorTree>(); - SE = getAnalysisIfAvailable<ScalarEvolution>(); - - Changed |= ProcessLoop(L, LPM); - - return Changed; -} - -/// ProcessLoop - Walk the loop structure in depth first order, ensuring that -/// all loops have preheaders. -/// -bool LoopSimplify::ProcessLoop(Loop *L, LPPassManager &LPM) { - bool Changed = false; -ReprocessLoop: - - // Check to see that no blocks (other than the header) in this loop have - // predecessors that are not in the loop. This is not valid for natural - // loops, but can occur if the blocks are unreachable. Since they are - // unreachable we can just shamelessly delete those CFG edges! - for (Loop::block_iterator BB = L->block_begin(), E = L->block_end(); - BB != E; ++BB) { - if (*BB == L->getHeader()) continue; - - SmallPtrSet<BasicBlock*, 4> BadPreds; - for (pred_iterator PI = pred_begin(*BB), - PE = pred_end(*BB); PI != PE; ++PI) { - BasicBlock *P = *PI; - if (!L->contains(P)) - BadPreds.insert(P); - } - - // Delete each unique out-of-loop (and thus dead) predecessor. - for (SmallPtrSet<BasicBlock*, 4>::iterator I = BadPreds.begin(), - E = BadPreds.end(); I != E; ++I) { - - DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor " - << (*I)->getName() << "\n"); - - // Inform each successor of each dead pred. - for (succ_iterator SI = succ_begin(*I), SE = succ_end(*I); SI != SE; ++SI) - (*SI)->removePredecessor(*I); - // Zap the dead pred's terminator and replace it with unreachable. - TerminatorInst *TI = (*I)->getTerminator(); - TI->replaceAllUsesWith(UndefValue::get(TI->getType())); - (*I)->getTerminator()->eraseFromParent(); - new UnreachableInst((*I)->getContext(), *I); - Changed = true; - } - } - - // If there are exiting blocks with branches on undef, resolve the undef in - // the direction which will exit the loop. This will help simplify loop - // trip count computations. - SmallVector<BasicBlock*, 8> ExitingBlocks; - L->getExitingBlocks(ExitingBlocks); - for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(), - E = ExitingBlocks.end(); I != E; ++I) - if (BranchInst *BI = dyn_cast<BranchInst>((*I)->getTerminator())) - if (BI->isConditional()) { - if (UndefValue *Cond = dyn_cast<UndefValue>(BI->getCondition())) { - - DEBUG(dbgs() << "LoopSimplify: Resolving \"br i1 undef\" to exit in " - << (*I)->getName() << "\n"); - - BI->setCondition(ConstantInt::get(Cond->getType(), - !L->contains(BI->getSuccessor(0)))); - - // This may make the loop analyzable, force SCEV recomputation. - if (SE) - SE->forgetLoop(L); - - Changed = true; - } - } - - // Does the loop already have a preheader? If so, don't insert one. - BasicBlock *Preheader = L->getLoopPreheader(); - if (!Preheader) { - Preheader = InsertPreheaderForLoop(L, this); - if (Preheader) { - ++NumInserted; - Changed = true; - } - } - - // Next, check to make sure that all exit nodes of the loop only have - // predecessors that are inside of the loop. This check guarantees that the - // loop preheader/header will dominate the exit blocks. If the exit block has - // predecessors from outside of the loop, split the edge now. - SmallVector<BasicBlock*, 8> ExitBlocks; - L->getExitBlocks(ExitBlocks); - - SmallSetVector<BasicBlock *, 8> ExitBlockSet(ExitBlocks.begin(), - ExitBlocks.end()); - for (SmallSetVector<BasicBlock *, 8>::iterator I = ExitBlockSet.begin(), - E = ExitBlockSet.end(); I != E; ++I) { - BasicBlock *ExitBlock = *I; - for (pred_iterator PI = pred_begin(ExitBlock), PE = pred_end(ExitBlock); - PI != PE; ++PI) - // Must be exactly this loop: no subloops, parent loops, or non-loop preds - // allowed. - if (!L->contains(*PI)) { - if (RewriteLoopExitBlock(L, ExitBlock)) { - ++NumInserted; - Changed = true; - } - break; - } - } - - // If the header has more than two predecessors at this point (from the - // preheader and from multiple backedges), we must adjust the loop. - BasicBlock *LoopLatch = L->getLoopLatch(); - if (!LoopLatch) { - // If this is really a nested loop, rip it out into a child loop. Don't do - // this for loops with a giant number of backedges, just factor them into a - // common backedge instead. - if (L->getNumBackEdges() < 8) { - if (SeparateNestedLoop(L, LPM, Preheader)) { - ++NumNested; - // This is a big restructuring change, reprocess the whole loop. - Changed = true; - // GCC doesn't tail recursion eliminate this. - goto ReprocessLoop; - } - } - - // If we either couldn't, or didn't want to, identify nesting of the loops, - // insert a new block that all backedges target, then make it jump to the - // loop header. - LoopLatch = InsertUniqueBackedgeBlock(L, Preheader); - if (LoopLatch) { - ++NumInserted; - Changed = true; - } +// If the block isn't already, move the new block to right after some 'outside +// block' block. This prevents the preheader from being placed inside the loop +// body, e.g. when the loop hasn't been rotated. +static void placeSplitBlockCarefully(BasicBlock *NewBB, + SmallVectorImpl<BasicBlock *> &SplitPreds, + Loop *L) { + // Check to see if NewBB is already well placed. + Function::iterator BBI = NewBB; --BBI; + for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) { + if (&*BBI == SplitPreds[i]) + return; } - // Scan over the PHI nodes in the loop header. Since they now have only two - // incoming values (the loop is canonicalized), we may have simplified the PHI - // down to 'X = phi [X, Y]', which should be replaced with 'Y'. - PHINode *PN; - for (BasicBlock::iterator I = L->getHeader()->begin(); - (PN = dyn_cast<PHINode>(I++)); ) - if (Value *V = SimplifyInstruction(PN, 0, 0, DT)) { - if (AA) AA->deleteValue(PN); - if (SE) SE->forgetValue(PN); - PN->replaceAllUsesWith(V); - PN->eraseFromParent(); - } - - // If this loop has multiple exits and the exits all go to the same - // block, attempt to merge the exits. This helps several passes, such - // as LoopRotation, which do not support loops with multiple exits. - // SimplifyCFG also does this (and this code uses the same utility - // function), however this code is loop-aware, where SimplifyCFG is - // not. That gives it the advantage of being able to hoist - // loop-invariant instructions out of the way to open up more - // opportunities, and the disadvantage of having the responsibility - // to preserve dominator information. - bool UniqueExit = true; - if (!ExitBlocks.empty()) - for (unsigned i = 1, e = ExitBlocks.size(); i != e; ++i) - if (ExitBlocks[i] != ExitBlocks[0]) { - UniqueExit = false; - break; - } - if (UniqueExit) { - for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { - BasicBlock *ExitingBlock = ExitingBlocks[i]; - if (!ExitingBlock->getSinglePredecessor()) continue; - BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator()); - if (!BI || !BI->isConditional()) continue; - CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition()); - if (!CI || CI->getParent() != ExitingBlock) continue; - - // Attempt to hoist out all instructions except for the - // comparison and the branch. - bool AllInvariant = true; - for (BasicBlock::iterator I = ExitingBlock->begin(); &*I != BI; ) { - Instruction *Inst = I++; - // Skip debug info intrinsics. - if (isa<DbgInfoIntrinsic>(Inst)) - continue; - if (Inst == CI) - continue; - if (!L->makeLoopInvariant(Inst, Changed, - Preheader ? Preheader->getTerminator() : 0)) { - AllInvariant = false; - break; - } - } - if (!AllInvariant) continue; - - // The block has now been cleared of all instructions except for - // a comparison and a conditional branch. SimplifyCFG may be able - // to fold it now. - if (!FoldBranchToCommonDest(BI)) continue; - - // Success. The block is now dead, so remove it from the loop, - // update the dominator tree and delete it. - DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block " - << ExitingBlock->getName() << "\n"); - - // If any reachable control flow within this loop has changed, notify - // ScalarEvolution. Currently assume the parent loop doesn't change - // (spliting edges doesn't count). If blocks, CFG edges, or other values - // in the parent loop change, then we need call to forgetLoop() for the - // parent instead. - if (SE) - SE->forgetLoop(L); - - assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock)); - Changed = true; - LI->removeBlock(ExitingBlock); - - DomTreeNode *Node = DT->getNode(ExitingBlock); - const std::vector<DomTreeNodeBase<BasicBlock> *> &Children = - Node->getChildren(); - while (!Children.empty()) { - DomTreeNode *Child = Children.front(); - DT->changeImmediateDominator(Child, Node->getIDom()); - } - DT->eraseNode(ExitingBlock); + // If it isn't already after an outside block, move it after one. This is + // always good as it makes the uncond branch from the outside block into a + // fall-through. - BI->getSuccessor(0)->removePredecessor(ExitingBlock); - BI->getSuccessor(1)->removePredecessor(ExitingBlock); - ExitingBlock->eraseFromParent(); + // Figure out *which* outside block to put this after. Prefer an outside + // block that neighbors a BB actually in the loop. + BasicBlock *FoundBB = nullptr; + for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) { + Function::iterator BBI = SplitPreds[i]; + if (++BBI != NewBB->getParent()->end() && + L->contains(BBI)) { + FoundBB = SplitPreds[i]; + break; } } - return Changed; + // If our heuristic for a *good* bb to place this after doesn't find + // anything, just pick something. It's likely better than leaving it within + // the loop. + if (!FoundBB) + FoundBB = SplitPreds[0]; + NewBB->moveAfter(FoundBB); } /// InsertPreheaderForLoop - Once we discover that a loop doesn't have a @@ -380,7 +121,7 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, Pass *PP) { // If the loop is branched to from an indirect branch, we won't // be able to fully transform the loop, because it prohibits // edge splitting. - if (isa<IndirectBrInst>(P->getTerminator())) return 0; + if (isa<IndirectBrInst>(P->getTerminator())) return nullptr; // Keep track of it. OutsideBlocks.push_back(P); @@ -406,38 +147,39 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, Pass *PP) { // Make sure that NewBB is put someplace intelligent, which doesn't mess up // code layout too horribly. - PlaceSplitBlockCarefully(PreheaderBB, OutsideBlocks, L); + placeSplitBlockCarefully(PreheaderBB, OutsideBlocks, L); return PreheaderBB; } -/// RewriteLoopExitBlock - Ensure that the loop preheader dominates all exit -/// blocks. This method is used to split exit blocks that have predecessors -/// outside of the loop. -BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) { +/// \brief Ensure that the loop preheader dominates all exit blocks. +/// +/// This method is used to split exit blocks that have predecessors outside of +/// the loop. +static BasicBlock *rewriteLoopExitBlock(Loop *L, BasicBlock *Exit, Pass *PP) { SmallVector<BasicBlock*, 8> LoopBlocks; for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) { BasicBlock *P = *I; if (L->contains(P)) { // Don't do this if the loop is exited via an indirect branch. - if (isa<IndirectBrInst>(P->getTerminator())) return 0; + if (isa<IndirectBrInst>(P->getTerminator())) return nullptr; LoopBlocks.push_back(P); } } assert(!LoopBlocks.empty() && "No edges coming in from outside the loop?"); - BasicBlock *NewExitBB = 0; + BasicBlock *NewExitBB = nullptr; if (Exit->isLandingPad()) { SmallVector<BasicBlock*, 2> NewBBs; SplitLandingPadPredecessors(Exit, ArrayRef<BasicBlock*>(&LoopBlocks[0], LoopBlocks.size()), ".loopexit", ".nonloopexit", - this, NewBBs); + PP, NewBBs); NewExitBB = NewBBs[0]; } else { - NewExitBB = SplitBlockPredecessors(Exit, LoopBlocks, ".loopexit", this); + NewExitBB = SplitBlockPredecessors(Exit, LoopBlocks, ".loopexit", PP); } DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block " @@ -445,33 +187,33 @@ BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) { return NewExitBB; } -/// AddBlockAndPredsToSet - Add the specified block, and all of its -/// predecessors, to the specified set, if it's not already in there. Stop -/// predecessor traversal when we reach StopBlock. -static void AddBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock, +/// Add the specified block, and all of its predecessors, to the specified set, +/// if it's not already in there. Stop predecessor traversal when we reach +/// StopBlock. +static void addBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock, std::set<BasicBlock*> &Blocks) { - std::vector<BasicBlock *> WorkList; - WorkList.push_back(InputBB); + SmallVector<BasicBlock *, 8> Worklist; + Worklist.push_back(InputBB); do { - BasicBlock *BB = WorkList.back(); WorkList.pop_back(); + BasicBlock *BB = Worklist.pop_back_val(); if (Blocks.insert(BB).second && BB != StopBlock) // If BB is not already processed and it is not a stop block then // insert its predecessor in the work list for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) { BasicBlock *WBB = *I; - WorkList.push_back(WBB); + Worklist.push_back(WBB); } - } while(!WorkList.empty()); + } while (!Worklist.empty()); } -/// FindPHIToPartitionLoops - The first part of loop-nestification is to find a -/// PHI node that tells us how to partition the loops. -static PHINode *FindPHIToPartitionLoops(Loop *L, DominatorTree *DT, - AliasAnalysis *AA, LoopInfo *LI) { +/// \brief The first part of loop-nestification is to find a PHI node that tells +/// us how to partition the loops. +static PHINode *findPHIToPartitionLoops(Loop *L, AliasAnalysis *AA, + DominatorTree *DT) { for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) { PHINode *PN = cast<PHINode>(I); ++I; - if (Value *V = SimplifyInstruction(PN, 0, 0, DT)) { + if (Value *V = SimplifyInstruction(PN, nullptr, nullptr, DT)) { // This is a degenerate PHI already, don't modify it! PN->replaceAllUsesWith(V); if (AA) AA->deleteValue(PN); @@ -486,49 +228,13 @@ static PHINode *FindPHIToPartitionLoops(Loop *L, DominatorTree *DT, // We found something tasty to remove. return PN; } - return 0; + return nullptr; } -// PlaceSplitBlockCarefully - If the block isn't already, move the new block to -// right after some 'outside block' block. This prevents the preheader from -// being placed inside the loop body, e.g. when the loop hasn't been rotated. -void PlaceSplitBlockCarefully(BasicBlock *NewBB, - SmallVectorImpl<BasicBlock*> &SplitPreds, - Loop *L) { - // Check to see if NewBB is already well placed. - Function::iterator BBI = NewBB; --BBI; - for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) { - if (&*BBI == SplitPreds[i]) - return; - } - - // If it isn't already after an outside block, move it after one. This is - // always good as it makes the uncond branch from the outside block into a - // fall-through. - - // Figure out *which* outside block to put this after. Prefer an outside - // block that neighbors a BB actually in the loop. - BasicBlock *FoundBB = 0; - for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) { - Function::iterator BBI = SplitPreds[i]; - if (++BBI != NewBB->getParent()->end() && - L->contains(BBI)) { - FoundBB = SplitPreds[i]; - break; - } - } - - // If our heuristic for a *good* bb to place this after doesn't find - // anything, just pick something. It's likely better than leaving it within - // the loop. - if (!FoundBB) - FoundBB = SplitPreds[0]; - NewBB->moveAfter(FoundBB); -} - - -/// SeparateNestedLoop - If this loop has multiple backedges, try to pull one of -/// them out into a nested loop. This is important for code that looks like +/// \brief If this loop has multiple backedges, try to pull one of them out into +/// a nested loop. +/// +/// This is important for code that looks like /// this: /// /// Loop: @@ -544,18 +250,19 @@ void PlaceSplitBlockCarefully(BasicBlock *NewBB, /// If we are able to separate out a loop, return the new outer loop that was /// created. /// -Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM, - BasicBlock *Preheader) { +static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader, + AliasAnalysis *AA, DominatorTree *DT, + LoopInfo *LI, ScalarEvolution *SE, Pass *PP) { // Don't try to separate loops without a preheader. if (!Preheader) - return 0; + return nullptr; // The header is not a landing pad; preheader insertion should ensure this. assert(!L->getHeader()->isLandingPad() && "Can't insert backedge to landing pad"); - PHINode *PN = FindPHIToPartitionLoops(L, DT, AA, LI); - if (PN == 0) return 0; // No known way to partition. + PHINode *PN = findPHIToPartitionLoops(L, AA, DT); + if (!PN) return nullptr; // No known way to partition. // Pull out all predecessors that have varying values in the loop. This // handles the case when a PHI node has multiple instances of itself as @@ -566,7 +273,7 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM, !L->contains(PN->getIncomingBlock(i))) { // We can't split indirectbr edges. if (isa<IndirectBrInst>(PN->getIncomingBlock(i)->getTerminator())) - return 0; + return nullptr; OuterLoopPreds.push_back(PN->getIncomingBlock(i)); } } @@ -580,11 +287,11 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM, BasicBlock *Header = L->getHeader(); BasicBlock *NewBB = - SplitBlockPredecessors(Header, OuterLoopPreds, ".outer", this); + SplitBlockPredecessors(Header, OuterLoopPreds, ".outer", PP); // Make sure that NewBB is put someplace intelligent, which doesn't mess up // code layout too horribly. - PlaceSplitBlockCarefully(NewBB, OuterLoopPreds, L); + placeSplitBlockCarefully(NewBB, OuterLoopPreds, L); // Create the new outer loop. Loop *NewOuter = new Loop(); @@ -598,9 +305,6 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM, // L is now a subloop of our outer loop. NewOuter->addChildLoop(L); - // Add the new loop to the pass manager queue. - LPM.insertLoopIntoQueue(NewOuter); - for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; ++I) NewOuter->addBlockEntry(*I); @@ -615,7 +319,7 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM, for (pred_iterator PI=pred_begin(Header), E = pred_end(Header); PI!=E; ++PI) { BasicBlock *P = *PI; if (DT->dominates(Header, P)) - AddBlockAndPredsToSet(P, Header, BlocksInL); + addBlockAndPredsToSet(P, Header, BlocksInL); } // Scan all of the loop children of L, moving them to OuterLoop if they are @@ -643,15 +347,15 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM, return NewOuter; } - - -/// InsertUniqueBackedgeBlock - This method is called when the specified loop -/// has more than one backedge in it. If this occurs, revector all of these -/// backedges to target a new basic block and have that block branch to the loop -/// header. This ensures that loops have exactly one backedge. +/// \brief This method is called when the specified loop has more than one +/// backedge in it. /// -BasicBlock * -LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) { +/// If this occurs, revector all of these backedges to target a new basic block +/// and have that block branch to the loop header. This ensures that loops +/// have exactly one backedge. +static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader, + AliasAnalysis *AA, + DominatorTree *DT, LoopInfo *LI) { assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!"); // Get information about the loop @@ -660,7 +364,7 @@ LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) { // Unique backedge insertion currently depends on having a preheader. if (!Preheader) - return 0; + return nullptr; // The header is not a landing pad; preheader insertion should ensure this. assert(!Header->isLandingPad() && "Can't insert backedge to landing pad"); @@ -672,7 +376,7 @@ LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) { // Indirectbr edges cannot be split, so we must fail if we find one. if (isa<IndirectBrInst>(P->getTerminator())) - return 0; + return nullptr; if (P != Preheader) BackedgeBlocks.push_back(P); } @@ -701,7 +405,7 @@ LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) { // preheader over to the new PHI node. unsigned PreheaderIdx = ~0U; bool HasUniqueIncomingValue = true; - Value *UniqueValue = 0; + Value *UniqueValue = nullptr; for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { BasicBlock *IBB = PN->getIncomingBlock(i); Value *IV = PN->getIncomingValue(i); @@ -710,7 +414,7 @@ LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) { } else { NewPN->addIncoming(IV, IBB); if (HasUniqueIncomingValue) { - if (UniqueValue == 0) + if (!UniqueValue) UniqueValue = IV; else if (UniqueValue != IV) HasUniqueIncomingValue = false; @@ -762,7 +466,350 @@ LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) { return BEBlock; } -void LoopSimplify::verifyAnalysis() const { +/// \brief Simplify one loop and queue further loops for simplification. +/// +/// FIXME: Currently this accepts both lots of analyses that it uses and a raw +/// Pass pointer. The Pass pointer is used by numerous utilities to update +/// specific analyses. Rather than a pass it would be much cleaner and more +/// explicit if they accepted the analysis directly and then updated it. +static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist, + AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI, + ScalarEvolution *SE, Pass *PP, + const DataLayout *DL) { + bool Changed = false; +ReprocessLoop: + + // Check to see that no blocks (other than the header) in this loop have + // predecessors that are not in the loop. This is not valid for natural + // loops, but can occur if the blocks are unreachable. Since they are + // unreachable we can just shamelessly delete those CFG edges! + for (Loop::block_iterator BB = L->block_begin(), E = L->block_end(); + BB != E; ++BB) { + if (*BB == L->getHeader()) continue; + + SmallPtrSet<BasicBlock*, 4> BadPreds; + for (pred_iterator PI = pred_begin(*BB), + PE = pred_end(*BB); PI != PE; ++PI) { + BasicBlock *P = *PI; + if (!L->contains(P)) + BadPreds.insert(P); + } + + // Delete each unique out-of-loop (and thus dead) predecessor. + for (SmallPtrSet<BasicBlock*, 4>::iterator I = BadPreds.begin(), + E = BadPreds.end(); I != E; ++I) { + + DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor " + << (*I)->getName() << "\n"); + + // Inform each successor of each dead pred. + for (succ_iterator SI = succ_begin(*I), SE = succ_end(*I); SI != SE; ++SI) + (*SI)->removePredecessor(*I); + // Zap the dead pred's terminator and replace it with unreachable. + TerminatorInst *TI = (*I)->getTerminator(); + TI->replaceAllUsesWith(UndefValue::get(TI->getType())); + (*I)->getTerminator()->eraseFromParent(); + new UnreachableInst((*I)->getContext(), *I); + Changed = true; + } + } + + // If there are exiting blocks with branches on undef, resolve the undef in + // the direction which will exit the loop. This will help simplify loop + // trip count computations. + SmallVector<BasicBlock*, 8> ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(), + E = ExitingBlocks.end(); I != E; ++I) + if (BranchInst *BI = dyn_cast<BranchInst>((*I)->getTerminator())) + if (BI->isConditional()) { + if (UndefValue *Cond = dyn_cast<UndefValue>(BI->getCondition())) { + + DEBUG(dbgs() << "LoopSimplify: Resolving \"br i1 undef\" to exit in " + << (*I)->getName() << "\n"); + + BI->setCondition(ConstantInt::get(Cond->getType(), + !L->contains(BI->getSuccessor(0)))); + + // This may make the loop analyzable, force SCEV recomputation. + if (SE) + SE->forgetLoop(L); + + Changed = true; + } + } + + // Does the loop already have a preheader? If so, don't insert one. + BasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader) { + Preheader = InsertPreheaderForLoop(L, PP); + if (Preheader) { + ++NumInserted; + Changed = true; + } + } + + // Next, check to make sure that all exit nodes of the loop only have + // predecessors that are inside of the loop. This check guarantees that the + // loop preheader/header will dominate the exit blocks. If the exit block has + // predecessors from outside of the loop, split the edge now. + SmallVector<BasicBlock*, 8> ExitBlocks; + L->getExitBlocks(ExitBlocks); + + SmallSetVector<BasicBlock *, 8> ExitBlockSet(ExitBlocks.begin(), + ExitBlocks.end()); + for (SmallSetVector<BasicBlock *, 8>::iterator I = ExitBlockSet.begin(), + E = ExitBlockSet.end(); I != E; ++I) { + BasicBlock *ExitBlock = *I; + for (pred_iterator PI = pred_begin(ExitBlock), PE = pred_end(ExitBlock); + PI != PE; ++PI) + // Must be exactly this loop: no subloops, parent loops, or non-loop preds + // allowed. + if (!L->contains(*PI)) { + if (rewriteLoopExitBlock(L, ExitBlock, PP)) { + ++NumInserted; + Changed = true; + } + break; + } + } + + // If the header has more than two predecessors at this point (from the + // preheader and from multiple backedges), we must adjust the loop. + BasicBlock *LoopLatch = L->getLoopLatch(); + if (!LoopLatch) { + // If this is really a nested loop, rip it out into a child loop. Don't do + // this for loops with a giant number of backedges, just factor them into a + // common backedge instead. + if (L->getNumBackEdges() < 8) { + if (Loop *OuterL = separateNestedLoop(L, Preheader, AA, DT, LI, SE, PP)) { + ++NumNested; + // Enqueue the outer loop as it should be processed next in our + // depth-first nest walk. + Worklist.push_back(OuterL); + + // This is a big restructuring change, reprocess the whole loop. + Changed = true; + // GCC doesn't tail recursion eliminate this. + // FIXME: It isn't clear we can't rely on LLVM to TRE this. + goto ReprocessLoop; + } + } + + // If we either couldn't, or didn't want to, identify nesting of the loops, + // insert a new block that all backedges target, then make it jump to the + // loop header. + LoopLatch = insertUniqueBackedgeBlock(L, Preheader, AA, DT, LI); + if (LoopLatch) { + ++NumInserted; + Changed = true; + } + } + + // Scan over the PHI nodes in the loop header. Since they now have only two + // incoming values (the loop is canonicalized), we may have simplified the PHI + // down to 'X = phi [X, Y]', which should be replaced with 'Y'. + PHINode *PN; + for (BasicBlock::iterator I = L->getHeader()->begin(); + (PN = dyn_cast<PHINode>(I++)); ) + if (Value *V = SimplifyInstruction(PN, nullptr, nullptr, DT)) { + if (AA) AA->deleteValue(PN); + if (SE) SE->forgetValue(PN); + PN->replaceAllUsesWith(V); + PN->eraseFromParent(); + } + + // If this loop has multiple exits and the exits all go to the same + // block, attempt to merge the exits. This helps several passes, such + // as LoopRotation, which do not support loops with multiple exits. + // SimplifyCFG also does this (and this code uses the same utility + // function), however this code is loop-aware, where SimplifyCFG is + // not. That gives it the advantage of being able to hoist + // loop-invariant instructions out of the way to open up more + // opportunities, and the disadvantage of having the responsibility + // to preserve dominator information. + bool UniqueExit = true; + if (!ExitBlocks.empty()) + for (unsigned i = 1, e = ExitBlocks.size(); i != e; ++i) + if (ExitBlocks[i] != ExitBlocks[0]) { + UniqueExit = false; + break; + } + if (UniqueExit) { + for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { + BasicBlock *ExitingBlock = ExitingBlocks[i]; + if (!ExitingBlock->getSinglePredecessor()) continue; + BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator()); + if (!BI || !BI->isConditional()) continue; + CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition()); + if (!CI || CI->getParent() != ExitingBlock) continue; + + // Attempt to hoist out all instructions except for the + // comparison and the branch. + bool AllInvariant = true; + bool AnyInvariant = false; + for (BasicBlock::iterator I = ExitingBlock->begin(); &*I != BI; ) { + Instruction *Inst = I++; + // Skip debug info intrinsics. + if (isa<DbgInfoIntrinsic>(Inst)) + continue; + if (Inst == CI) + continue; + if (!L->makeLoopInvariant(Inst, AnyInvariant, + Preheader ? Preheader->getTerminator() + : nullptr)) { + AllInvariant = false; + break; + } + } + if (AnyInvariant) { + Changed = true; + // The loop disposition of all SCEV expressions that depend on any + // hoisted values have also changed. + if (SE) + SE->forgetLoopDispositions(L); + } + if (!AllInvariant) continue; + + // The block has now been cleared of all instructions except for + // a comparison and a conditional branch. SimplifyCFG may be able + // to fold it now. + if (!FoldBranchToCommonDest(BI, DL)) continue; + + // Success. The block is now dead, so remove it from the loop, + // update the dominator tree and delete it. + DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block " + << ExitingBlock->getName() << "\n"); + + // Notify ScalarEvolution before deleting this block. Currently assume the + // parent loop doesn't change (spliting edges doesn't count). If blocks, + // CFG edges, or other values in the parent loop change, then we need call + // to forgetLoop() for the parent instead. + if (SE) + SE->forgetLoop(L); + + assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock)); + Changed = true; + LI->removeBlock(ExitingBlock); + + DomTreeNode *Node = DT->getNode(ExitingBlock); + const std::vector<DomTreeNodeBase<BasicBlock> *> &Children = + Node->getChildren(); + while (!Children.empty()) { + DomTreeNode *Child = Children.front(); + DT->changeImmediateDominator(Child, Node->getIDom()); + } + DT->eraseNode(ExitingBlock); + + BI->getSuccessor(0)->removePredecessor(ExitingBlock); + BI->getSuccessor(1)->removePredecessor(ExitingBlock); + ExitingBlock->eraseFromParent(); + } + } + + return Changed; +} + +bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, Pass *PP, + AliasAnalysis *AA, ScalarEvolution *SE, + const DataLayout *DL) { + bool Changed = false; + + // Worklist maintains our depth-first queue of loops in this nest to process. + SmallVector<Loop *, 4> Worklist; + Worklist.push_back(L); + + // Walk the worklist from front to back, pushing newly found sub loops onto + // the back. This will let us process loops from back to front in depth-first + // order. We can use this simple process because loops form a tree. + for (unsigned Idx = 0; Idx != Worklist.size(); ++Idx) { + Loop *L2 = Worklist[Idx]; + for (Loop::iterator I = L2->begin(), E = L2->end(); I != E; ++I) + Worklist.push_back(*I); + } + + while (!Worklist.empty()) + Changed |= simplifyOneLoop(Worklist.pop_back_val(), Worklist, AA, DT, LI, + SE, PP, DL); + + return Changed; +} + +namespace { + struct LoopSimplify : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + LoopSimplify() : FunctionPass(ID) { + initializeLoopSimplifyPass(*PassRegistry::getPassRegistry()); + } + + // AA - If we have an alias analysis object to update, this is it, otherwise + // this is null. + AliasAnalysis *AA; + DominatorTree *DT; + LoopInfo *LI; + ScalarEvolution *SE; + const DataLayout *DL; + + bool runOnFunction(Function &F) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + // We need loop information to identify the loops... + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addPreserved<DominatorTreeWrapperPass>(); + + AU.addRequired<LoopInfo>(); + AU.addPreserved<LoopInfo>(); + + AU.addPreserved<AliasAnalysis>(); + AU.addPreserved<ScalarEvolution>(); + AU.addPreserved<DependenceAnalysis>(); + AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added. + } + + /// verifyAnalysis() - Verify LoopSimplifyForm's guarantees. + void verifyAnalysis() const override; + }; +} + +char LoopSimplify::ID = 0; +INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify", + "Canonicalize natural loops", true, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_END(LoopSimplify, "loop-simplify", + "Canonicalize natural loops", true, false) + +// Publicly exposed interface to pass... +char &llvm::LoopSimplifyID = LoopSimplify::ID; +Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); } + +/// runOnFunction - Run down all loops in the CFG (recursively, but we could do +/// it in any convenient order) inserting preheaders... +/// +bool LoopSimplify::runOnFunction(Function &F) { + bool Changed = false; + AA = getAnalysisIfAvailable<AliasAnalysis>(); + LI = &getAnalysis<LoopInfo>(); + DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + SE = getAnalysisIfAvailable<ScalarEvolution>(); + DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); + DL = DLP ? &DLP->getDataLayout() : nullptr; + + // Simplify each loop nest in the function. + for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) + Changed |= simplifyLoop(*I, DT, LI, this, AA, SE, DL); + + return Changed; +} + +// FIXME: Restore this code when we re-enable verification in verifyAnalysis +// below. +#if 0 +static void verifyLoop(Loop *L) { + // Verify subloops. + for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) + verifyLoop(*I); + // It used to be possible to just assert L->isLoopSimplifyForm(), however // with the introduction of indirectbr, there are now cases where it's // not possible to transform a loop as necessary. We can at least check @@ -799,3 +846,15 @@ void LoopSimplify::verifyAnalysis() const { (void)HasIndBrExiting; } } +#endif + +void LoopSimplify::verifyAnalysis() const { + // FIXME: This routine is being called mid-way through the loop pass manager + // as loop passes destroy this analysis. That's actually fine, but we have no + // way of expressing that here. Once all of the passes that destroy this are + // hoisted out of the loop pass manager we can add back verification here. +#if 0 + for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) + verifyLoop(*I); +#endif +} diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp index 162807d..ab1c25a 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -16,22 +16,29 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "loop-unroll" #include "llvm/Transforms/Utils/UnrollLoop.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/IR/BasicBlock.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/SimplifyIndVar.h" using namespace llvm; +#define DEBUG_TYPE "loop-unroll" + // TODO: Should these be here or in LoopUnroll? STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled"); STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)"); @@ -58,18 +65,23 @@ static inline void RemapInstruction(Instruction *I, /// FoldBlockIntoPredecessor - Folds a basic block into its predecessor if it /// only has one predecessor, and that predecessor only has one successor. -/// The LoopInfo Analysis that is passed will be kept consistent. -/// Returns the new combined block. -static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, - LPPassManager *LPM) { +/// The LoopInfo Analysis that is passed will be kept consistent. If folding is +/// successful references to the containing loop must be removed from +/// ScalarEvolution by calling ScalarEvolution::forgetLoop because SE may have +/// references to the eliminated BB. The argument ForgottenLoops contains a set +/// of loops that have already been forgotten to prevent redundant, expensive +/// calls to ScalarEvolution::forgetLoop. Returns the new combined block. +static BasicBlock * +FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, LPPassManager *LPM, + SmallPtrSetImpl<Loop *> &ForgottenLoops) { // Merge basic blocks into their predecessor if there is only one distinct // pred, and if there is only one distinct successor of the predecessor, and // if there are no PHI nodes. BasicBlock *OnlyPred = BB->getSinglePredecessor(); - if (!OnlyPred) return 0; + if (!OnlyPred) return nullptr; if (OnlyPred->getTerminator()->getNumSuccessors() != 1) - return 0; + return nullptr; DEBUG(dbgs() << "Merging: " << *BB << "into: " << *OnlyPred); @@ -98,8 +110,10 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, // ScalarEvolution holds references to loop exit blocks. if (LPM) { if (ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>()) { - if (Loop *L = LI->getLoopFor(BB)) - SE->forgetLoop(L); + if (Loop *L = LI->getLoopFor(BB)) { + if (ForgottenLoops.insert(L)) + SE->forgetLoop(L); + } } } LI->removeBlock(BB); @@ -137,10 +151,10 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, /// removed from the LoopPassManager as well. LPM can also be NULL. /// /// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are -/// available it must also preserve those analyses. +/// available from the Pass it must also preserve those analyses. bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool AllowRuntime, unsigned TripMultiple, - LoopInfo *LI, LPPassManager *LPM) { + LoopInfo *LI, Pass *PP, LPPassManager *LPM) { BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) { DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n"); @@ -208,8 +222,8 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, // Notify ScalarEvolution that the loop will be substantially changed, // if not outright eliminated. - if (LPM) { - ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>(); + if (PP) { + ScalarEvolution *SE = PP->getAnalysisIfAvailable<ScalarEvolution>(); if (SE) SE->forgetLoop(L); } @@ -225,18 +239,35 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, (unsigned)GreatestCommonDivisor64(Count, TripMultiple); } + // Report the unrolling decision. + DebugLoc LoopLoc = L->getStartLoc(); + Function *F = Header->getParent(); + LLVMContext &Ctx = F->getContext(); + if (CompletelyUnroll) { DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName() << " with trip count " << TripCount << "!\n"); + emitOptimizationRemark(Ctx, DEBUG_TYPE, *F, LoopLoc, + Twine("completely unrolled loop with ") + + Twine(TripCount) + " iterations"); } else { + auto EmitDiag = [&](const Twine &T) { + emitOptimizationRemark(Ctx, DEBUG_TYPE, *F, LoopLoc, + "unrolled loop by a factor of " + Twine(Count) + + T); + }; + DEBUG(dbgs() << "UNROLLING loop %" << Header->getName() << " by " << Count); if (TripMultiple == 0 || BreakoutTrip != TripMultiple) { DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip); + EmitDiag(" with a breakout at trip " + Twine(BreakoutTrip)); } else if (TripMultiple != 1) { DEBUG(dbgs() << " with " << TripMultiple << " trips per branch"); + EmitDiag(" with " + Twine(TripMultiple) + " trips per branch"); } else if (RuntimeTripCount) { DEBUG(dbgs() << " with run-time trip count"); + EmitDiag(" with run-time trip count"); } DEBUG(dbgs() << "!\n"); } @@ -400,23 +431,29 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, } // Merge adjacent basic blocks, if possible. + SmallPtrSet<Loop *, 4> ForgottenLoops; for (unsigned i = 0, e = Latches.size(); i != e; ++i) { BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator()); if (Term->isUnconditional()) { BasicBlock *Dest = Term->getSuccessor(0); - if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI, LPM)) + if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI, LPM, + ForgottenLoops)) std::replace(Latches.begin(), Latches.end(), Dest, Fold); } } - if (LPM) { + DominatorTree *DT = nullptr; + if (PP) { // FIXME: Reconstruct dom info, because it is not preserved properly. // Incrementally updating domtree after loop unrolling would be easy. - if (DominatorTree *DT = LPM->getAnalysisIfAvailable<DominatorTree>()) - DT->runOnFunction(*L->getHeader()->getParent()); + if (DominatorTreeWrapperPass *DTWP = + PP->getAnalysisIfAvailable<DominatorTreeWrapperPass>()) { + DT = &DTWP->getDomTree(); + DT->recalculate(*L->getHeader()->getParent()); + } // Simplify any new induction variables in the partially unrolled loop. - ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>(); + ScalarEvolution *SE = PP->getAnalysisIfAvailable<ScalarEvolution>(); if (SE && !CompletelyUnroll) { SmallVector<WeakVH, 16> DeadInsts; simplifyLoopIVs(L, SE, LPM, DeadInsts); @@ -449,9 +486,36 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, NumCompletelyUnrolled += CompletelyUnroll; ++NumUnrolled; + + Loop *OuterL = L->getParentLoop(); // Remove the loop from the LoopPassManager if it's completely removed. - if (CompletelyUnroll && LPM != NULL) + if (CompletelyUnroll && LPM != nullptr) LPM->deleteLoopFromQueue(L); + // If we have a pass and a DominatorTree we should re-simplify impacted loops + // to ensure subsequent analyses can rely on this form. We want to simplify + // at least one layer outside of the loop that was unrolled so that any + // changes to the parent loop exposed by the unrolling are considered. + if (PP && DT) { + if (!OuterL && !CompletelyUnroll) + OuterL = L; + if (OuterL) { + DataLayoutPass *DLP = PP->getAnalysisIfAvailable<DataLayoutPass>(); + const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr; + ScalarEvolution *SE = PP->getAnalysisIfAvailable<ScalarEvolution>(); + simplifyLoop(OuterL, DT, LI, PP, /*AliasAnalysis*/ nullptr, SE, DL); + + // LCSSA must be performed on the outermost affected loop. The unrolled + // loop's last loop latch is guaranteed to be in the outermost loop after + // deleteLoopFromQueue updates LoopInfo. + Loop *LatchLoop = LI->getLoopFor(Latches.back()); + if (!OuterL->contains(LatchLoop)) + while (OuterL->getParentLoop() != LatchLoop) + OuterL = OuterL->getParentLoop(); + + formLCSSARecursively(*OuterL, *DT, SE); + } + } + return true; } diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp index d801d5f..a96c46a 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -21,7 +21,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "loop-unroll" #include "llvm/Transforms/Utils/UnrollLoop.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/LoopIterator.h" @@ -37,6 +36,8 @@ using namespace llvm; +#define DEBUG_TYPE "loop-unroll" + STATISTIC(NumRuntimeUnrolled, "Number of loops unrolled with run-time trip counts"); @@ -58,7 +59,7 @@ static void ConnectProlog(Loop *L, Value *TripCount, unsigned Count, BasicBlock *OrigPH, BasicBlock *NewPH, ValueToValueMapTy &LVMap, Pass *P) { BasicBlock *Latch = L->getLoopLatch(); - assert(Latch != 0 && "Loop must have a latch"); + assert(Latch && "Loop must have a latch"); // Create a PHI node for each outgoing value from the original loop // (which means it is an outgoing value from the prolog code too). @@ -110,7 +111,7 @@ static void ConnectProlog(Loop *L, Value *TripCount, unsigned Count, new ICmpInst(InsertPt, ICmpInst::ICMP_ULT, TripCount, ConstantInt::get(TripCount->getType(), Count)); BasicBlock *Exit = L->getUniqueExitBlock(); - assert(Exit != 0 && "Loop must have a single exit block only"); + assert(Exit && "Loop must have a single exit block only"); // Split the exit to maintain loop canonicalization guarantees SmallVector<BasicBlock*, 4> Preds(pred_begin(Exit), pred_end(Exit)); if (!Exit->isLandingPad()) { @@ -232,7 +233,7 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI, // Make sure the loop is in canonical form, and there is a single // exit block only. - if (!L->isLoopSimplifyForm() || L->getUniqueExitBlock() == 0) + if (!L->isLoopSimplifyForm() || !L->getUniqueExitBlock()) return false; // Use Scalar Evolution to compute the trip count. This allows more @@ -240,7 +241,7 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI, if (!LPM) return false; ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>(); - if (SE == 0) + if (!SE) return false; // Only unroll loops with a computable trip count and the trip count needs @@ -279,17 +280,17 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI, SCEVExpander Expander(*SE, "loop-unroll"); Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(), PreHeaderBR); - Type *CountTy = TripCount->getType(); - BinaryOperator *ModVal = - BinaryOperator::CreateURem(TripCount, - ConstantInt::get(CountTy, Count), - "xtraiter"); - ModVal->insertBefore(PreHeaderBR); - - // Check if for no extra iterations, then jump to unrolled loop - Value *BranchVal = new ICmpInst(PreHeaderBR, - ICmpInst::ICMP_NE, ModVal, - ConstantInt::get(CountTy, 0), "lcmp"); + + IRBuilder<> B(PreHeaderBR); + Value *ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter"); + + // Check if for no extra iterations, then jump to unrolled loop. We have to + // check that the trip count computation didn't overflow when adding one to + // the backedge taken count. + Value *LCmp = B.CreateIsNotNull(ModVal, "lcmp.mod"); + Value *OverflowCheck = B.CreateIsNull(TripCount, "lcmp.overflow"); + Value *BranchVal = B.CreateOr(OverflowCheck, LCmp, "lcmp.or"); + // Branch to either the extra iterations or the unrolled loop // We will fix up the true branch label when adding loop body copies BranchInst::Create(PEnd, PEnd, BranchVal, PreHeaderBR); @@ -301,7 +302,7 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI, ValueToValueMapTy LVMap; Function *F = Header->getParent(); // These variables are used to update the CFG links in each iteration - BasicBlock *CompareBB = 0; + BasicBlock *CompareBB = nullptr; BasicBlock *LastLoopBB = PH; // Get an ordered list of blocks in the loop to help with the ordering of the // cloned blocks in the prolog code @@ -343,6 +344,7 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI, } // The comparison w/ the extra iteration value and branch + Type *CountTy = TripCount->getType(); Value *BranchVal = new ICmpInst(*NewBB, ICmpInst::ICMP_EQ, ModVal, ConstantInt::get(CountTy, leftOverIters), "un.tmp"); diff --git a/contrib/llvm/lib/Transforms/Utils/LowerExpectIntrinsic.cpp b/contrib/llvm/lib/Transforms/Utils/LowerExpectIntrinsic.cpp index e017f50..ff89e74 100644 --- a/contrib/llvm/lib/Transforms/Utils/LowerExpectIntrinsic.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LowerExpectIntrinsic.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "lower-expect-intrinsic" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/BasicBlock.h" @@ -29,6 +28,8 @@ using namespace llvm; +#define DEBUG_TYPE "lower-expect-intrinsic" + STATISTIC(IfHandled, "Number of 'expect' intrinsic instructions handled"); static cl::opt<uint32_t> @@ -52,7 +53,7 @@ namespace { initializeLowerExpectIntrinsicPass(*PassRegistry::getPassRegistry()); } - bool runOnFunction(Function &F); + bool runOnFunction(Function &F) override; }; } @@ -94,15 +95,25 @@ bool LowerExpectIntrinsic::HandleIfExpect(BranchInst *BI) { return false; // Handle non-optimized IR code like: - // %expval = call i64 @llvm.expect.i64.i64(i64 %conv1, i64 1) + // %expval = call i64 @llvm.expect.i64(i64 %conv1, i64 1) // %tobool = icmp ne i64 %expval, 0 // br i1 %tobool, label %if.then, label %if.end + // + // Or the following simpler case: + // %expval = call i1 @llvm.expect.i1(i1 %cmp, i1 1) + // br i1 %expval, label %if.then, label %if.end + + CallInst *CI; ICmpInst *CmpI = dyn_cast<ICmpInst>(BI->getCondition()); - if (!CmpI || CmpI->getPredicate() != CmpInst::ICMP_NE) - return false; + if (!CmpI) { + CI = dyn_cast<CallInst>(BI->getCondition()); + } else { + if (CmpI->getPredicate() != CmpInst::ICMP_NE) + return false; + CI = dyn_cast<CallInst>(CmpI->getOperand(0)); + } - CallInst *CI = dyn_cast<CallInst>(CmpI->getOperand(0)); if (!CI) return false; @@ -127,7 +138,10 @@ bool LowerExpectIntrinsic::HandleIfExpect(BranchInst *BI) { BI->setMetadata(LLVMContext::MD_prof, Node); - CmpI->setOperand(0, ArgValue); + if (CmpI) + CmpI->setOperand(0, ArgValue); + else + BI->setCondition(ArgValue); return true; } diff --git a/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp b/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp index 9799a30..66d57b0 100644 --- a/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp @@ -1,4 +1,4 @@ -//===- LowerInvoke.cpp - Eliminate Invoke & Unwind instructions -----------===// +//===- LowerInvoke.cpp - Eliminate Invoke instructions --------------------===// // // The LLVM Compiler Infrastructure // @@ -8,94 +8,34 @@ //===----------------------------------------------------------------------===// // // This transformation is designed for use by code generators which do not yet -// support stack unwinding. This pass supports two models of exception handling -// lowering, the 'cheap' support and the 'expensive' support. -// -// 'Cheap' exception handling support gives the program the ability to execute -// any program which does not "throw an exception", by turning 'invoke' -// instructions into calls and by turning 'unwind' instructions into calls to -// abort(). If the program does dynamically use the unwind instruction, the -// program will print a message then abort. -// -// 'Expensive' exception handling support gives the full exception handling -// support to the program at the cost of making the 'invoke' instruction -// really expensive. It basically inserts setjmp/longjmp calls to emulate the -// exception handling as necessary. -// -// Because the 'expensive' support slows down programs a lot, and EH is only -// used for a subset of the programs, it must be specifically enabled by an -// option. -// -// Note that after this pass runs the CFG is not entirely accurate (exceptional -// control flow edges are not correct anymore) so only very simple things should -// be done after the lowerinvoke pass has run (like generation of native code). -// This should not be used as a general purpose "my LLVM-to-LLVM pass doesn't -// support the invoke instruction yet" lowering pass. +// support stack unwinding. This pass converts 'invoke' instructions to 'call' +// instructions, so that any exception-handling 'landingpad' blocks become dead +// code (which can be removed by running the '-simplifycfg' pass afterwards). // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "lowerinvoke" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h" -#include <csetjmp> -#include <set> using namespace llvm; -STATISTIC(NumInvokes, "Number of invokes replaced"); -STATISTIC(NumSpilled, "Number of registers live across unwind edges"); +#define DEBUG_TYPE "lowerinvoke" -static cl::opt<bool> ExpensiveEHSupport("enable-correct-eh-support", - cl::desc("Make the -lowerinvoke pass insert expensive, but correct, EH code")); +STATISTIC(NumInvokes, "Number of invokes replaced"); namespace { class LowerInvoke : public FunctionPass { - const TargetMachine *TM; - - // Used for both models. - Constant *AbortFn; - - // Used for expensive EH support. - StructType *JBLinkTy; - GlobalVariable *JBListHead; - Constant *SetJmpFn, *LongJmpFn, *StackSaveFn, *StackRestoreFn; - bool useExpensiveEHSupport; - public: static char ID; // Pass identification, replacement for typeid - explicit LowerInvoke(const TargetMachine *TM = 0, - bool useExpensiveEHSupport = ExpensiveEHSupport) - : FunctionPass(ID), TM(TM), - useExpensiveEHSupport(useExpensiveEHSupport) { + explicit LowerInvoke() : FunctionPass(ID) { initializeLowerInvokePass(*PassRegistry::getPassRegistry()); } - bool doInitialization(Module &M); - bool runOnFunction(Function &F); - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - // This is a cluster of orthogonal Transforms - AU.addPreserved("mem2reg"); - AU.addPreservedID(LowerSwitchID); - } - - private: - bool insertCheapEHSupport(Function &F); - void splitLiveRangesLiveAcrossInvokes(SmallVectorImpl<InvokeInst*>&Invokes); - void rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo, - AllocaInst *InvokeNum, AllocaInst *StackPtr, - SwitchInst *CatchSwitch); - bool insertExpensiveEHSupport(Function &F); + bool runOnFunction(Function &F) override; }; } @@ -107,65 +47,11 @@ INITIALIZE_PASS(LowerInvoke, "lowerinvoke", char &llvm::LowerInvokePassID = LowerInvoke::ID; // Public Interface To the LowerInvoke pass. -FunctionPass *llvm::createLowerInvokePass(const TargetMachine *TM, - bool useExpensiveEHSupport) { - return new LowerInvoke(TM, useExpensiveEHSupport || ExpensiveEHSupport); +FunctionPass *llvm::createLowerInvokePass() { + return new LowerInvoke(); } -// doInitialization - Make sure that there is a prototype for abort in the -// current module. -bool LowerInvoke::doInitialization(Module &M) { - Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext()); - if (useExpensiveEHSupport) { - // Insert a type for the linked list of jump buffers. - const TargetLowering *TLI = TM ? TM->getTargetLowering() : 0; - unsigned JBSize = TLI ? TLI->getJumpBufSize() : 0; - JBSize = JBSize ? JBSize : 200; - Type *JmpBufTy = ArrayType::get(VoidPtrTy, JBSize); - - JBLinkTy = StructType::create(M.getContext(), "llvm.sjljeh.jmpbufty"); - Type *Elts[] = { JmpBufTy, PointerType::getUnqual(JBLinkTy) }; - JBLinkTy->setBody(Elts); - - Type *PtrJBList = PointerType::getUnqual(JBLinkTy); - - // Now that we've done that, insert the jmpbuf list head global, unless it - // already exists. - if (!(JBListHead = M.getGlobalVariable("llvm.sjljeh.jblist", PtrJBList))) { - JBListHead = new GlobalVariable(M, PtrJBList, false, - GlobalValue::LinkOnceAnyLinkage, - Constant::getNullValue(PtrJBList), - "llvm.sjljeh.jblist"); - } - -// VisualStudio defines setjmp as _setjmp -#if defined(_MSC_VER) && defined(setjmp) && \ - !defined(setjmp_undefined_for_msvc) -# pragma push_macro("setjmp") -# undef setjmp -# define setjmp_undefined_for_msvc -#endif - - SetJmpFn = Intrinsic::getDeclaration(&M, Intrinsic::setjmp); - -#if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc) - // let's return it to _setjmp state -# pragma pop_macro("setjmp") -# undef setjmp_undefined_for_msvc -#endif - - LongJmpFn = Intrinsic::getDeclaration(&M, Intrinsic::longjmp); - StackSaveFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave); - StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore); - } - - // We need the 'write' and 'abort' functions for both models. - AbortFn = M.getOrInsertFunction("abort", Type::getVoidTy(M.getContext()), - (Type *)0); - return true; -} - -bool LowerInvoke::insertCheapEHSupport(Function &F) { +bool LowerInvoke::runOnFunction(Function &F) { bool Changed = false; for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) { @@ -192,388 +78,3 @@ bool LowerInvoke::insertCheapEHSupport(Function &F) { } return Changed; } - -/// rewriteExpensiveInvoke - Insert code and hack the function to replace the -/// specified invoke instruction with a call. -void LowerInvoke::rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo, - AllocaInst *InvokeNum, - AllocaInst *StackPtr, - SwitchInst *CatchSwitch) { - ConstantInt *InvokeNoC = ConstantInt::get(Type::getInt32Ty(II->getContext()), - InvokeNo); - - // If the unwind edge has phi nodes, split the edge. - if (isa<PHINode>(II->getUnwindDest()->begin())) { - SplitCriticalEdge(II, 1, this); - - // If there are any phi nodes left, they must have a single predecessor. - while (PHINode *PN = dyn_cast<PHINode>(II->getUnwindDest()->begin())) { - PN->replaceAllUsesWith(PN->getIncomingValue(0)); - PN->eraseFromParent(); - } - } - - // Insert a store of the invoke num before the invoke and store zero into the - // location afterward. - new StoreInst(InvokeNoC, InvokeNum, true, II); // volatile - - // Insert a store of the stack ptr before the invoke, so we can restore it - // later in the exception case. - CallInst* StackSaveRet = CallInst::Create(StackSaveFn, "ssret", II); - new StoreInst(StackSaveRet, StackPtr, true, II); // volatile - - BasicBlock::iterator NI = II->getNormalDest()->getFirstInsertionPt(); - // nonvolatile. - new StoreInst(Constant::getNullValue(Type::getInt32Ty(II->getContext())), - InvokeNum, false, NI); - - Instruction* StackPtrLoad = - new LoadInst(StackPtr, "stackptr.restore", true, - II->getUnwindDest()->getFirstInsertionPt()); - CallInst::Create(StackRestoreFn, StackPtrLoad, "")->insertAfter(StackPtrLoad); - - // Add a switch case to our unwind block. - CatchSwitch->addCase(InvokeNoC, II->getUnwindDest()); - - // Insert a normal call instruction. - SmallVector<Value*,16> CallArgs(II->op_begin(), II->op_end() - 3); - CallInst *NewCall = CallInst::Create(II->getCalledValue(), - CallArgs, "", II); - NewCall->takeName(II); - NewCall->setCallingConv(II->getCallingConv()); - NewCall->setAttributes(II->getAttributes()); - NewCall->setDebugLoc(II->getDebugLoc()); - II->replaceAllUsesWith(NewCall); - - // Replace the invoke with an uncond branch. - BranchInst::Create(II->getNormalDest(), NewCall->getParent()); - II->eraseFromParent(); -} - -/// MarkBlocksLiveIn - Insert BB and all of its predescessors into LiveBBs until -/// we reach blocks we've already seen. -static void MarkBlocksLiveIn(BasicBlock *BB, std::set<BasicBlock*> &LiveBBs) { - if (!LiveBBs.insert(BB).second) return; // already been here. - - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) - MarkBlocksLiveIn(*PI, LiveBBs); -} - -// First thing we need to do is scan the whole function for values that are -// live across unwind edges. Each value that is live across an unwind edge -// we spill into a stack location, guaranteeing that there is nothing live -// across the unwind edge. This process also splits all critical edges -// coming out of invoke's. -void LowerInvoke:: -splitLiveRangesLiveAcrossInvokes(SmallVectorImpl<InvokeInst*> &Invokes) { - // First step, split all critical edges from invoke instructions. - for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { - InvokeInst *II = Invokes[i]; - SplitCriticalEdge(II, 0, this); - SplitCriticalEdge(II, 1, this); - assert(!isa<PHINode>(II->getNormalDest()) && - !isa<PHINode>(II->getUnwindDest()) && - "critical edge splitting left single entry phi nodes?"); - } - - Function *F = Invokes.back()->getParent()->getParent(); - - // To avoid having to handle incoming arguments specially, we lower each arg - // to a copy instruction in the entry block. This ensures that the argument - // value itself cannot be live across the entry block. - BasicBlock::iterator AfterAllocaInsertPt = F->begin()->begin(); - while (isa<AllocaInst>(AfterAllocaInsertPt) && - isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsertPt)->getArraySize())) - ++AfterAllocaInsertPt; - for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); - AI != E; ++AI) { - Type *Ty = AI->getType(); - // Aggregate types can't be cast, but are legal argument types, so we have - // to handle them differently. We use an extract/insert pair as a - // lightweight method to achieve the same goal. - if (isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)) { - Instruction *EI = ExtractValueInst::Create(AI, 0, "",AfterAllocaInsertPt); - Instruction *NI = InsertValueInst::Create(AI, EI, 0); - NI->insertAfter(EI); - AI->replaceAllUsesWith(NI); - // Set the operand of the instructions back to the AllocaInst. - EI->setOperand(0, AI); - NI->setOperand(0, AI); - } else { - // This is always a no-op cast because we're casting AI to AI->getType() - // so src and destination types are identical. BitCast is the only - // possibility. - CastInst *NC = new BitCastInst( - AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt); - AI->replaceAllUsesWith(NC); - // Set the operand of the cast instruction back to the AllocaInst. - // Normally it's forbidden to replace a CastInst's operand because it - // could cause the opcode to reflect an illegal conversion. However, - // we're replacing it here with the same value it was constructed with. - // We do this because the above replaceAllUsesWith() clobbered the - // operand, but we want this one to remain. - NC->setOperand(0, AI); - } - } - - // Finally, scan the code looking for instructions with bad live ranges. - for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) - for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) { - // Ignore obvious cases we don't have to handle. In particular, most - // instructions either have no uses or only have a single use inside the - // current block. Ignore them quickly. - Instruction *Inst = II; - if (Inst->use_empty()) continue; - if (Inst->hasOneUse() && - cast<Instruction>(Inst->use_back())->getParent() == BB && - !isa<PHINode>(Inst->use_back())) continue; - - // If this is an alloca in the entry block, it's not a real register - // value. - if (AllocaInst *AI = dyn_cast<AllocaInst>(Inst)) - if (isa<ConstantInt>(AI->getArraySize()) && BB == F->begin()) - continue; - - // Avoid iterator invalidation by copying users to a temporary vector. - SmallVector<Instruction*,16> Users; - for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end(); - UI != E; ++UI) { - Instruction *User = cast<Instruction>(*UI); - if (User->getParent() != BB || isa<PHINode>(User)) - Users.push_back(User); - } - - // Scan all of the uses and see if the live range is live across an unwind - // edge. If we find a use live across an invoke edge, create an alloca - // and spill the value. - - // Find all of the blocks that this value is live in. - std::set<BasicBlock*> LiveBBs; - LiveBBs.insert(Inst->getParent()); - while (!Users.empty()) { - Instruction *U = Users.back(); - Users.pop_back(); - - if (!isa<PHINode>(U)) { - MarkBlocksLiveIn(U->getParent(), LiveBBs); - } else { - // Uses for a PHI node occur in their predecessor block. - PHINode *PN = cast<PHINode>(U); - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if (PN->getIncomingValue(i) == Inst) - MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs); - } - } - - // Now that we know all of the blocks that this thing is live in, see if - // it includes any of the unwind locations. - bool NeedsSpill = false; - for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { - BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest(); - if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) { - NeedsSpill = true; - } - } - - // If we decided we need a spill, do it. - if (NeedsSpill) { - ++NumSpilled; - DemoteRegToStack(*Inst, true); - } - } -} - -bool LowerInvoke::insertExpensiveEHSupport(Function &F) { - SmallVector<ReturnInst*,16> Returns; - SmallVector<InvokeInst*,16> Invokes; - UnreachableInst* UnreachablePlaceholder = 0; - - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) - if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) { - // Remember all return instructions in case we insert an invoke into this - // function. - Returns.push_back(RI); - } else if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) { - Invokes.push_back(II); - } - - if (Invokes.empty()) return false; - - NumInvokes += Invokes.size(); - - // TODO: This is not an optimal way to do this. In particular, this always - // inserts setjmp calls into the entries of functions with invoke instructions - // even though there are possibly paths through the function that do not - // execute any invokes. In particular, for functions with early exits, e.g. - // the 'addMove' method in hexxagon, it would be nice to not have to do the - // setjmp stuff on the early exit path. This requires a bit of dataflow, but - // would not be too hard to do. - - // If we have an invoke instruction, insert a setjmp that dominates all - // invokes. After the setjmp, use a cond branch that goes to the original - // code path on zero, and to a designated 'catch' block of nonzero. - Value *OldJmpBufPtr = 0; - if (!Invokes.empty()) { - // First thing we need to do is scan the whole function for values that are - // live across unwind edges. Each value that is live across an unwind edge - // we spill into a stack location, guaranteeing that there is nothing live - // across the unwind edge. This process also splits all critical edges - // coming out of invoke's. - splitLiveRangesLiveAcrossInvokes(Invokes); - - BasicBlock *EntryBB = F.begin(); - - // Create an alloca for the incoming jump buffer ptr and the new jump buffer - // that needs to be restored on all exits from the function. This is an - // alloca because the value needs to be live across invokes. - const TargetLowering *TLI = TM ? TM->getTargetLowering() : 0; - unsigned Align = TLI ? TLI->getJumpBufAlignment() : 0; - AllocaInst *JmpBuf = - new AllocaInst(JBLinkTy, 0, Align, - "jblink", F.begin()->begin()); - - Value *Idx[] = { Constant::getNullValue(Type::getInt32Ty(F.getContext())), - ConstantInt::get(Type::getInt32Ty(F.getContext()), 1) }; - OldJmpBufPtr = GetElementPtrInst::Create(JmpBuf, Idx, "OldBuf", - EntryBB->getTerminator()); - - // Copy the JBListHead to the alloca. - Value *OldBuf = new LoadInst(JBListHead, "oldjmpbufptr", true, - EntryBB->getTerminator()); - new StoreInst(OldBuf, OldJmpBufPtr, true, EntryBB->getTerminator()); - - // Add the new jumpbuf to the list. - new StoreInst(JmpBuf, JBListHead, true, EntryBB->getTerminator()); - - // Create the catch block. The catch block is basically a big switch - // statement that goes to all of the invoke catch blocks. - BasicBlock *CatchBB = - BasicBlock::Create(F.getContext(), "setjmp.catch", &F); - - // Create an alloca which keeps track of the stack pointer before every - // invoke, this allows us to properly restore the stack pointer after - // long jumping. - AllocaInst *StackPtr = new AllocaInst(Type::getInt8PtrTy(F.getContext()), 0, - "stackptr", EntryBB->begin()); - - // Create an alloca which keeps track of which invoke is currently - // executing. For normal calls it contains zero. - AllocaInst *InvokeNum = new AllocaInst(Type::getInt32Ty(F.getContext()), 0, - "invokenum",EntryBB->begin()); - new StoreInst(ConstantInt::get(Type::getInt32Ty(F.getContext()), 0), - InvokeNum, true, EntryBB->getTerminator()); - - // Insert a load in the Catch block, and a switch on its value. By default, - // we go to a block that just does an unwind (which is the correct action - // for a standard call). We insert an unreachable instruction here and - // modify the block to jump to the correct unwinding pad later. - BasicBlock *UnwindBB = BasicBlock::Create(F.getContext(), "unwindbb", &F); - UnreachablePlaceholder = new UnreachableInst(F.getContext(), UnwindBB); - - Value *CatchLoad = new LoadInst(InvokeNum, "invoke.num", true, CatchBB); - SwitchInst *CatchSwitch = - SwitchInst::Create(CatchLoad, UnwindBB, Invokes.size(), CatchBB); - - // Now that things are set up, insert the setjmp call itself. - - // Split the entry block to insert the conditional branch for the setjmp. - BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(), - "setjmp.cont"); - - Idx[1] = ConstantInt::get(Type::getInt32Ty(F.getContext()), 0); - Value *JmpBufPtr = GetElementPtrInst::Create(JmpBuf, Idx, "TheJmpBuf", - EntryBB->getTerminator()); - JmpBufPtr = new BitCastInst(JmpBufPtr, - Type::getInt8PtrTy(F.getContext()), - "tmp", EntryBB->getTerminator()); - Value *SJRet = CallInst::Create(SetJmpFn, JmpBufPtr, "sjret", - EntryBB->getTerminator()); - - // Compare the return value to zero. - Value *IsNormal = new ICmpInst(EntryBB->getTerminator(), - ICmpInst::ICMP_EQ, SJRet, - Constant::getNullValue(SJRet->getType()), - "notunwind"); - // Nuke the uncond branch. - EntryBB->getTerminator()->eraseFromParent(); - - // Put in a new condbranch in its place. - BranchInst::Create(ContBlock, CatchBB, IsNormal, EntryBB); - - // At this point, we are all set up, rewrite each invoke instruction. - for (unsigned i = 0, e = Invokes.size(); i != e; ++i) - rewriteExpensiveInvoke(Invokes[i], i+1, InvokeNum, StackPtr, CatchSwitch); - } - - // We know that there is at least one unwind. - - // Create three new blocks, the block to load the jmpbuf ptr and compare - // against null, the block to do the longjmp, and the error block for if it - // is null. Add them at the end of the function because they are not hot. - BasicBlock *UnwindHandler = BasicBlock::Create(F.getContext(), - "dounwind", &F); - BasicBlock *UnwindBlock = BasicBlock::Create(F.getContext(), "unwind", &F); - BasicBlock *TermBlock = BasicBlock::Create(F.getContext(), "unwinderror", &F); - - // If this function contains an invoke, restore the old jumpbuf ptr. - Value *BufPtr; - if (OldJmpBufPtr) { - // Before the return, insert a copy from the saved value to the new value. - BufPtr = new LoadInst(OldJmpBufPtr, "oldjmpbufptr", UnwindHandler); - new StoreInst(BufPtr, JBListHead, UnwindHandler); - } else { - BufPtr = new LoadInst(JBListHead, "ehlist", UnwindHandler); - } - - // Load the JBList, if it's null, then there was no catch! - Value *NotNull = new ICmpInst(*UnwindHandler, ICmpInst::ICMP_NE, BufPtr, - Constant::getNullValue(BufPtr->getType()), - "notnull"); - BranchInst::Create(UnwindBlock, TermBlock, NotNull, UnwindHandler); - - // Create the block to do the longjmp. - // Get a pointer to the jmpbuf and longjmp. - Value *Idx[] = { Constant::getNullValue(Type::getInt32Ty(F.getContext())), - ConstantInt::get(Type::getInt32Ty(F.getContext()), 0) }; - Idx[0] = GetElementPtrInst::Create(BufPtr, Idx, "JmpBuf", UnwindBlock); - Idx[0] = new BitCastInst(Idx[0], - Type::getInt8PtrTy(F.getContext()), - "tmp", UnwindBlock); - Idx[1] = ConstantInt::get(Type::getInt32Ty(F.getContext()), 1); - CallInst::Create(LongJmpFn, Idx, "", UnwindBlock); - new UnreachableInst(F.getContext(), UnwindBlock); - - // Set up the term block ("throw without a catch"). - new UnreachableInst(F.getContext(), TermBlock); - - // Insert a call to abort() - CallInst::Create(AbortFn, "", - TermBlock->getTerminator())->setTailCall(); - - // Replace the inserted unreachable with a branch to the unwind handler. - if (UnreachablePlaceholder) { - BranchInst::Create(UnwindHandler, UnreachablePlaceholder); - UnreachablePlaceholder->eraseFromParent(); - } - - // Finally, for any returns from this function, if this function contains an - // invoke, restore the old jmpbuf pointer to its input value. - if (OldJmpBufPtr) { - for (unsigned i = 0, e = Returns.size(); i != e; ++i) { - ReturnInst *R = Returns[i]; - - // Before the return, insert a copy from the saved value to the new value. - Value *OldBuf = new LoadInst(OldJmpBufPtr, "oldjmpbufptr", true, R); - new StoreInst(OldBuf, JBListHead, true, R); - } - } - - return true; -} - -bool LowerInvoke::runOnFunction(Function &F) { - if (useExpensiveEHSupport) - return insertExpensiveEHSupport(F); - else - return insertCheapEHSupport(F); -} diff --git a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp index 2d2a8a5..d6e5bb6 100644 --- a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp @@ -14,11 +14,13 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/ADT/STLExtras.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/CFG.h" #include "llvm/Pass.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" @@ -27,6 +29,8 @@ #include <algorithm> using namespace llvm; +#define DEBUG_TYPE "lower-switch" + namespace { /// LowerSwitch Pass - Replace all SwitchInst instructions with chained branch /// instructions. @@ -37,9 +41,9 @@ namespace { initializeLowerSwitchPass(*PassRegistry::getPassRegistry()); } - virtual bool runOnFunction(Function &F); - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + bool runOnFunction(Function &F) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { // This is a cluster of orthogonal Transforms AU.addPreserved<UnifyFunctionExitNodes>(); AU.addPreserved("mem2reg"); @@ -51,20 +55,23 @@ namespace { Constant* High; BasicBlock* BB; - CaseRange(Constant *low = 0, Constant *high = 0, BasicBlock *bb = 0) : + CaseRange(Constant *low = nullptr, Constant *high = nullptr, + BasicBlock *bb = nullptr) : Low(low), High(high), BB(bb) { } }; - typedef std::vector<CaseRange> CaseVector; + typedef std::vector<CaseRange> CaseVector; typedef std::vector<CaseRange>::iterator CaseItr; private: void processSwitchInst(SwitchInst *SI); - BasicBlock* switchConvert(CaseItr Begin, CaseItr End, Value* Val, - BasicBlock* OrigBlock, BasicBlock* Default); - BasicBlock* newLeafBlock(CaseRange& Leaf, Value* Val, - BasicBlock* OrigBlock, BasicBlock* Default); - unsigned Clusterify(CaseVector& Cases, SwitchInst *SI); + BasicBlock *switchConvert(CaseItr Begin, CaseItr End, + ConstantInt *LowerBound, ConstantInt *UpperBound, + Value *Val, BasicBlock *Predecessor, + BasicBlock *OrigBlock, BasicBlock *Default); + BasicBlock *newLeafBlock(CaseRange &Leaf, Value *Val, BasicBlock *OrigBlock, + BasicBlock *Default); + unsigned Clusterify(CaseVector &Cases, SwitchInst *SI); }; /// The comparison function for sorting the switch case values in the vector. @@ -124,17 +131,45 @@ static raw_ostream& operator<<(raw_ostream &O, return O << "]"; } +static void fixPhis(BasicBlock *Succ, + BasicBlock *OrigBlock, + BasicBlock *NewNode) { + for (BasicBlock::iterator I = Succ->begin(), + E = Succ->getFirstNonPHI(); + I != E; ++I) { + PHINode *PN = cast<PHINode>(I); + + for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) { + if (PN->getIncomingBlock(I) == OrigBlock) + PN->setIncomingBlock(I, NewNode); + } + } +} + // switchConvert - Convert the switch statement into a binary lookup of // the case values. The function recursively builds this tree. -// -BasicBlock* LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, - Value* Val, BasicBlock* OrigBlock, - BasicBlock* Default) -{ +// LowerBound and UpperBound are used to keep track of the bounds for Val +// that have already been checked by a block emitted by one of the previous +// calls to switchConvert in the call stack. +BasicBlock *LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, + ConstantInt *LowerBound, + ConstantInt *UpperBound, Value *Val, + BasicBlock *Predecessor, + BasicBlock *OrigBlock, + BasicBlock *Default) { unsigned Size = End - Begin; - if (Size == 1) + if (Size == 1) { + // Check if the Case Range is perfectly squeezed in between + // already checked Upper and Lower bounds. If it is then we can avoid + // emitting the code that checks if the value actually falls in the range + // because the bounds already tell us so. + if (Begin->Low == LowerBound && Begin->High == UpperBound) { + fixPhis(Begin->BB, OrigBlock, Predecessor); + return Begin->BB; + } return newLeafBlock(*Begin, Val, OrigBlock, Default); + } unsigned Mid = Size / 2; std::vector<CaseRange> LHS(Begin, Begin + Mid); @@ -142,26 +177,65 @@ BasicBlock* LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, std::vector<CaseRange> RHS(Begin + Mid, End); DEBUG(dbgs() << "RHS: " << RHS << "\n"); - CaseRange& Pivot = *(Begin + Mid); - DEBUG(dbgs() << "Pivot ==> " - << cast<ConstantInt>(Pivot.Low)->getValue() << " -" - << cast<ConstantInt>(Pivot.High)->getValue() << "\n"); + CaseRange &Pivot = *(Begin + Mid); + DEBUG(dbgs() << "Pivot ==> " + << cast<ConstantInt>(Pivot.Low)->getValue() + << " -" << cast<ConstantInt>(Pivot.High)->getValue() << "\n"); + + // NewLowerBound here should never be the integer minimal value. + // This is because it is computed from a case range that is never + // the smallest, so there is always a case range that has at least + // a smaller value. + ConstantInt *NewLowerBound = cast<ConstantInt>(Pivot.Low); + ConstantInt *NewUpperBound; + + // If we don't have a Default block then it means that we can never + // have a value outside of a case range, so set the UpperBound to the highest + // value in the LHS part of the case ranges. + if (Default != nullptr) { + // Because NewLowerBound is never the smallest representable integer + // it is safe here to subtract one. + NewUpperBound = ConstantInt::get(NewLowerBound->getContext(), + NewLowerBound->getValue() - 1); + } else { + CaseItr LastLHS = LHS.begin() + LHS.size() - 1; + NewUpperBound = cast<ConstantInt>(LastLHS->High); + } - BasicBlock* LBranch = switchConvert(LHS.begin(), LHS.end(), Val, - OrigBlock, Default); - BasicBlock* RBranch = switchConvert(RHS.begin(), RHS.end(), Val, - OrigBlock, Default); + DEBUG(dbgs() << "LHS Bounds ==> "; + if (LowerBound) { + dbgs() << cast<ConstantInt>(LowerBound)->getSExtValue(); + } else { + dbgs() << "NONE"; + } + dbgs() << " - " << NewUpperBound->getSExtValue() << "\n"; + dbgs() << "RHS Bounds ==> "; + dbgs() << NewLowerBound->getSExtValue() << " - "; + if (UpperBound) { + dbgs() << cast<ConstantInt>(UpperBound)->getSExtValue() << "\n"; + } else { + dbgs() << "NONE\n"; + }); // Create a new node that checks if the value is < pivot. Go to the // left branch if it is and right branch if not. Function* F = OrigBlock->getParent(); BasicBlock* NewNode = BasicBlock::Create(Val->getContext(), "NodeBlock"); - Function::iterator FI = OrigBlock; - F->getBasicBlockList().insert(++FI, NewNode); ICmpInst* Comp = new ICmpInst(ICmpInst::ICMP_SLT, Val, Pivot.Low, "Pivot"); + + BasicBlock *LBranch = switchConvert(LHS.begin(), LHS.end(), LowerBound, + NewUpperBound, Val, NewNode, OrigBlock, + Default); + BasicBlock *RBranch = switchConvert(RHS.begin(), RHS.end(), NewLowerBound, + UpperBound, Val, NewNode, OrigBlock, + Default); + + Function::iterator FI = OrigBlock; + F->getBasicBlockList().insert(++FI, NewNode); NewNode->getInstList().push_back(Comp); + BranchInst::Create(LBranch, RBranch, Comp, NewNode); return NewNode; } @@ -182,7 +256,7 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val, F->getBasicBlockList().insert(++FI, NewLeaf); // Emit comparison - ICmpInst* Comp = NULL; + ICmpInst* Comp = nullptr; if (Leaf.Low == Leaf.High) { // Make the seteq instruction... Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_EQ, Val, @@ -245,7 +319,8 @@ unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) { // Merge case into clusters if (Cases.size()>=2) - for (CaseItr I=Cases.begin(), J=llvm::next(Cases.begin()); J!=Cases.end(); ) { + for (CaseItr I = Cases.begin(), J = std::next(Cases.begin()); + J != Cases.end();) { int64_t nextValue = cast<ConstantInt>(J->Low)->getSExtValue(); int64_t currentValue = cast<ConstantInt>(I->High)->getSExtValue(); BasicBlock* nextBB = J->BB; @@ -287,13 +362,19 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) { return; } + const bool DefaultIsUnreachable = + Default->size() == 1 && isa<UnreachableInst>(Default->getTerminator()); // Create a new, empty default block so that the new hierarchy of // if-then statements go to this and the PHI nodes are happy. - BasicBlock* NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault"); - F->getBasicBlockList().insert(Default, NewDefault); - - BranchInst::Create(Default, NewDefault); - + // if the default block is set as an unreachable we avoid creating one + // because will never be a valid target. + BasicBlock *NewDefault = nullptr; + if (!DefaultIsUnreachable) { + NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault"); + F->getBasicBlockList().insert(Default, NewDefault); + + BranchInst::Create(Default, NewDefault); + } // If there is an entry in any PHI nodes for the default edge, make sure // to update them as well. for (BasicBlock::iterator I = Default->begin(); isa<PHINode>(I); ++I) { @@ -312,12 +393,31 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) { DEBUG(dbgs() << "Cases: " << Cases << "\n"); (void)numCmps; - BasicBlock* SwitchBlock = switchConvert(Cases.begin(), Cases.end(), Val, - OrigBlock, NewDefault); + ConstantInt *UpperBound = nullptr; + ConstantInt *LowerBound = nullptr; + + // Optimize the condition where Default is an unreachable block. In this case + // we can make the bounds tightly fitted around the case value ranges, + // because we know that the value passed to the switch should always be + // exactly one of the case values. + if (DefaultIsUnreachable) { + CaseItr LastCase = Cases.begin() + Cases.size() - 1; + UpperBound = cast<ConstantInt>(LastCase->High); + LowerBound = cast<ConstantInt>(Cases.begin()->Low); + } + BasicBlock *SwitchBlock = + switchConvert(Cases.begin(), Cases.end(), LowerBound, UpperBound, Val, + OrigBlock, OrigBlock, NewDefault); // Branch to our shiny new if-then stuff... BranchInst::Create(SwitchBlock, OrigBlock); // We are now done with the switch instruction, delete it. CurBlock->getInstList().erase(SI); + + pred_iterator PI = pred_begin(Default), E = pred_end(Default); + // If the Default block has no more predecessors just remove it + if (PI == E) { + DeleteDeadBlock(Default); + } } diff --git a/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp b/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp index 61b3965..189caa7 100644 --- a/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp +++ b/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp @@ -12,16 +12,17 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "mem2reg" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/Dominators.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" #include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" using namespace llvm; +#define DEBUG_TYPE "mem2reg" + STATISTIC(NumPromoted, "Number of alloca's promoted"); namespace { @@ -34,10 +35,10 @@ namespace { // runOnFunction - To run this pass, first we calculate the alloca // instructions that are safe for promotion, then we promote each one. // - virtual bool runOnFunction(Function &F); + bool runOnFunction(Function &F) override; - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<DominatorTree>(); + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<DominatorTreeWrapperPass>(); AU.setPreservesCFG(); // This is a cluster of orthogonal Transforms AU.addPreserved<UnifyFunctionExitNodes>(); @@ -50,7 +51,7 @@ namespace { char PromotePass::ID = 0; INITIALIZE_PASS_BEGIN(PromotePass, "mem2reg", "Promote Memory to Register", false, false) -INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_END(PromotePass, "mem2reg", "Promote Memory to Register", false, false) @@ -61,7 +62,7 @@ bool PromotePass::runOnFunction(Function &F) { bool Changed = false; - DominatorTree &DT = getAnalysis<DominatorTree>(); + DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); while (1) { Allocas.clear(); diff --git a/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp b/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp index c3704531..395a46b 100644 --- a/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp +++ b/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp @@ -48,11 +48,11 @@ namespace { initializeMetaRenamerPass(*PassRegistry::getPassRegistry()); } - void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); } - bool runOnModule(Module &M) { + bool runOnModule(Module &M) override { static const char *const metaNames[] = { // See http://en.wikipedia.org/wiki/Metasyntactic_variable "foo", "bar", "baz", "quux", "barney", "snork", "zot", "blam", "hoge", diff --git a/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp index ff6e6f9..d9dbbca 100644 --- a/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp +++ b/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp @@ -24,16 +24,16 @@ static void appendToGlobalArray(const char *Array, Module &M, Function *F, int Priority) { IRBuilder<> IRB(M.getContext()); FunctionType *FnTy = FunctionType::get(IRB.getVoidTy(), false); - StructType *Ty = StructType::get( - IRB.getInt32Ty(), PointerType::getUnqual(FnTy), NULL); - - Constant *RuntimeCtorInit = ConstantStruct::get( - Ty, IRB.getInt32(Priority), F, NULL); // Get the current set of static global constructors and add the new ctor // to the list. SmallVector<Constant *, 16> CurrentCtors; - if (GlobalVariable * GVCtor = M.getNamedGlobal(Array)) { + StructType *EltTy; + if (GlobalVariable *GVCtor = M.getNamedGlobal(Array)) { + // If there is a global_ctors array, use the existing struct type, which can + // have 2 or 3 fields. + ArrayType *ATy = cast<ArrayType>(GVCtor->getType()->getElementType()); + EltTy = cast<StructType>(ATy->getElementType()); if (Constant *Init = GVCtor->getInitializer()) { unsigned n = Init->getNumOperands(); CurrentCtors.reserve(n + 1); @@ -41,13 +41,26 @@ static void appendToGlobalArray(const char *Array, CurrentCtors.push_back(cast<Constant>(Init->getOperand(i))); } GVCtor->eraseFromParent(); + } else { + // Use a simple two-field struct if there isn't one already. + EltTy = StructType::get(IRB.getInt32Ty(), PointerType::getUnqual(FnTy), + nullptr); } + // Build a 2 or 3 field global_ctor entry. We don't take a comdat key. + Constant *CSVals[3]; + CSVals[0] = IRB.getInt32(Priority); + CSVals[1] = F; + // FIXME: Drop support for the two element form in LLVM 4.0. + if (EltTy->getNumElements() >= 3) + CSVals[2] = llvm::Constant::getNullValue(IRB.getInt8PtrTy()); + Constant *RuntimeCtorInit = + ConstantStruct::get(EltTy, makeArrayRef(CSVals, EltTy->getNumElements())); + CurrentCtors.push_back(RuntimeCtorInit); // Create a new initializer. - ArrayType *AT = ArrayType::get(RuntimeCtorInit->getType(), - CurrentCtors.size()); + ArrayType *AT = ArrayType::get(EltTy, CurrentCtors.size()); Constant *NewInit = ConstantArray::get(AT, CurrentCtors); // Create the new global variable and replace all uses of diff --git a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index 8f6eee3..06d73fe 100644 --- a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -25,7 +25,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "mem2reg" #include "llvm/Transforms/Utils/PromoteMemToReg.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" @@ -34,23 +33,25 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasSetTracker.h" -#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/DIBuilder.h" -#include "llvm/DebugInfo.h" +#include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DIBuilder.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Metadata.h" -#include "llvm/Support/CFG.h" #include "llvm/Transforms/Utils/Local.h" #include <algorithm> #include <queue> using namespace llvm; +#define DEBUG_TYPE "mem2reg" + STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block"); STATISTIC(NumSingleStore, "Number of alloca's promoted with a single store"); STATISTIC(NumDeadAlloca, "Number of dead alloca's removed"); @@ -59,11 +60,10 @@ STATISTIC(NumPHIInsert, "Number of PHI nodes inserted"); bool llvm::isAllocaPromotable(const AllocaInst *AI) { // FIXME: If the memory unit is of pointer or integer type, we can permit // assignments to subsections of the memory unit. + unsigned AS = AI->getType()->getAddressSpace(); // Only allow direct and non-volatile loads and stores... - for (Value::const_use_iterator UI = AI->use_begin(), UE = AI->use_end(); - UI != UE; ++UI) { // Loop over all of the uses of the alloca - const User *U = *UI; + for (const User *U : AI->users()) { if (const LoadInst *LI = dyn_cast<LoadInst>(U)) { // Note that atomic loads can be transformed; atomic semantics do // not have any meaning for a local alloca. @@ -81,12 +81,12 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) { II->getIntrinsicID() != Intrinsic::lifetime_end) return false; } else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { - if (BCI->getType() != Type::getInt8PtrTy(U->getContext())) + if (BCI->getType() != Type::getInt8PtrTy(U->getContext(), AS)) return false; if (!onlyUsedByLifetimeMarkers(BCI)) return false; } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) { - if (GEPI->getType() != Type::getInt8PtrTy(U->getContext())) + if (GEPI->getType() != Type::getInt8PtrTy(U->getContext(), AS)) return false; if (!GEPI->hasAllZeroIndices()) return false; @@ -116,11 +116,11 @@ struct AllocaInfo { void clear() { DefiningBlocks.clear(); UsingBlocks.clear(); - OnlyStore = 0; - OnlyBlock = 0; + OnlyStore = nullptr; + OnlyBlock = nullptr; OnlyUsedInOneBlock = true; - AllocaPointerVal = 0; - DbgDeclare = 0; + AllocaPointerVal = nullptr; + DbgDeclare = nullptr; } /// Scan the uses of the specified alloca, filling in the AllocaInfo used @@ -131,8 +131,7 @@ struct AllocaInfo { // As we scan the uses of the alloca instruction, keep track of stores, // and decide whether all of the loads and stores to the alloca are within // the same basic block. - for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); - UI != E;) { + for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) { Instruction *User = cast<Instruction>(*UI++); if (StoreInst *SI = dyn_cast<StoreInst>(User)) { @@ -149,7 +148,7 @@ struct AllocaInfo { } if (OnlyUsedInOneBlock) { - if (OnlyBlock == 0) + if (!OnlyBlock) OnlyBlock = User->getParent(); else if (OnlyBlock != User->getParent()) OnlyUsedInOneBlock = false; @@ -165,7 +164,7 @@ class RenamePassData { public: typedef std::vector<Value *> ValVector; - RenamePassData() : BB(NULL), Pred(NULL), Values() {} + RenamePassData() : BB(nullptr), Pred(nullptr), Values() {} RenamePassData(BasicBlock *B, BasicBlock *P, const ValVector &V) : BB(B), Pred(P), Values(V) {} BasicBlock *BB; @@ -317,8 +316,7 @@ static void removeLifetimeIntrinsicUsers(AllocaInst *AI) { // Knowing that this alloca is promotable, we know that it's safe to kill all // instructions except for load and store. - for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end(); - UI != UE;) { + for (auto UI = AI->user_begin(), UE = AI->user_end(); UI != UE;) { Instruction *I = cast<Instruction>(*UI); ++UI; if (isa<LoadInst>(I) || isa<StoreInst>(I)) @@ -328,10 +326,9 @@ static void removeLifetimeIntrinsicUsers(AllocaInst *AI) { // The only users of this bitcast/GEP instruction are lifetime intrinsics. // Follow the use/def chain to erase them now instead of leaving it for // dead code elimination later. - for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); - UI != UE;) { - Instruction *Inst = cast<Instruction>(*UI); - ++UI; + for (auto UUI = I->user_begin(), UUE = I->user_end(); UUI != UUE;) { + Instruction *Inst = cast<Instruction>(*UUI); + ++UUI; Inst->eraseFromParent(); } } @@ -359,7 +356,7 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, // Clear out UsingBlocks. We will reconstruct it here if needed. Info.UsingBlocks.clear(); - for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) { + for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) { Instruction *UserInst = cast<Instruction>(*UI++); if (!isa<LoadInst>(UserInst)) { assert(UserInst == OnlyStore && "Should only have load/stores"); @@ -456,9 +453,8 @@ static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, typedef SmallVector<std::pair<unsigned, StoreInst *>, 64> StoresByIndexTy; StoresByIndexTy StoresByIndex; - for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E; - ++UI) - if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) + for (User *U : AI->users()) + if (StoreInst *SI = dyn_cast<StoreInst>(U)) StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI)); // Sort the stores by their index, making it efficient to do a lookup with a @@ -467,7 +463,7 @@ static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, // Walk all of the loads from this alloca, replacing them with the nearest // store above them, if any. - for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) { + for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) { LoadInst *LI = dyn_cast<LoadInst>(*UI++); if (!LI) continue; @@ -477,7 +473,8 @@ static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, // Find the nearest store that has a lower index than this load. StoresByIndexTy::iterator I = std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(), - std::make_pair(LoadIdx, static_cast<StoreInst *>(0)), + std::make_pair(LoadIdx, + static_cast<StoreInst *>(nullptr)), less_first()); if (I == StoresByIndex.begin()) @@ -485,7 +482,7 @@ static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, LI->replaceAllUsesWith(UndefValue::get(LI->getType())); else // Otherwise, there was a store before this load, the load takes its value. - LI->replaceAllUsesWith(llvm::prior(I)->second->getOperand(0)); + LI->replaceAllUsesWith(std::prev(I)->second->getOperand(0)); if (AST && LI->getType()->isPointerTy()) AST->deleteValue(LI); @@ -495,7 +492,7 @@ static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, // Remove the (now dead) stores and alloca. while (!AI->use_empty()) { - StoreInst *SI = cast<StoreInst>(AI->use_back()); + StoreInst *SI = cast<StoreInst>(AI->user_back()); // Record debuginfo for the store before removing it. if (DbgDeclareInst *DDI = Info.DbgDeclare) { DIBuilder DIB(*AI->getParent()->getParent()->getParent()); @@ -638,7 +635,7 @@ void PromoteMem2Reg::run() { // and inserting the phi nodes we marked as necessary // std::vector<RenamePassData> RenamePassWorkList; - RenamePassWorkList.push_back(RenamePassData(F.begin(), 0, Values)); + RenamePassWorkList.push_back(RenamePassData(F.begin(), nullptr, Values)); do { RenamePassData RPD; RPD.swap(RenamePassWorkList.back()); @@ -679,8 +676,8 @@ void PromoteMem2Reg::run() { // Iterating over NewPhiNodes is deterministic, so it is safe to try to // simplify and RAUW them as we go. If it was not, we could add uses to - // the values we replace with in a non deterministic order, thus creating - // non deterministic def->use chains. + // the values we replace with in a non-deterministic order, thus creating + // non-deterministic def->use chains. for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator I = NewPhiNodes.begin(), E = NewPhiNodes.end(); @@ -688,7 +685,7 @@ void PromoteMem2Reg::run() { PHINode *PN = I->second; // If this PHI node merges one value and/or undefs, get the value. - if (Value *V = SimplifyInstruction(PN, 0, 0, &DT)) { + if (Value *V = SimplifyInstruction(PN, nullptr, nullptr, &DT)) { if (AST && PN->getType()->isPointerTy()) AST->deleteValue(PN); PN->replaceAllUsesWith(V); @@ -996,7 +993,7 @@ NextIteration: // Get the next phi node. ++PNI; APN = dyn_cast<PHINode>(PNI); - if (APN == 0) + if (!APN) break; // Verify that it is missing entries. If not, it is not being inserted diff --git a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp index 30adbfa..3fcb789 100644 --- a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp @@ -11,17 +11,14 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "ssaupdater" #include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/TinyPtrVector.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/Support/AlignOf.h" -#include "llvm/Support/Allocator.h" -#include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -30,20 +27,22 @@ using namespace llvm; +#define DEBUG_TYPE "ssaupdater" + typedef DenseMap<BasicBlock*, Value*> AvailableValsTy; static AvailableValsTy &getAvailableVals(void *AV) { return *static_cast<AvailableValsTy*>(AV); } SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode*> *NewPHI) - : AV(0), ProtoType(0), ProtoName(), InsertedPHIs(NewPHI) {} + : AV(nullptr), ProtoType(nullptr), ProtoName(), InsertedPHIs(NewPHI) {} SSAUpdater::~SSAUpdater() { delete static_cast<AvailableValsTy*>(AV); } void SSAUpdater::Initialize(Type *Ty, StringRef Name) { - if (AV == 0) + if (!AV) AV = new AvailableValsTy(); else getAvailableVals(AV).clear(); @@ -56,7 +55,7 @@ bool SSAUpdater::HasValueForBlock(BasicBlock *BB) const { } void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) { - assert(ProtoType != 0 && "Need to initialize SSAUpdater"); + assert(ProtoType && "Need to initialize SSAUpdater"); assert(ProtoType == V->getType() && "All rewritten values must have the same type"); getAvailableVals(AV)[BB] = V; @@ -92,7 +91,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { // Otherwise, we have the hard case. Get the live-in values for each // predecessor. SmallVector<std::pair<BasicBlock*, Value*>, 8> PredValues; - Value *SingularValue = 0; + Value *SingularValue = nullptr; // We can get our predecessor info by walking the pred_iterator list, but it // is relatively slow. If we already have PHI nodes in this block, walk one @@ -107,7 +106,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { if (i == 0) SingularValue = PredVal; else if (PredVal != SingularValue) - SingularValue = 0; + SingularValue = nullptr; } } else { bool isFirstPred = true; @@ -121,7 +120,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { SingularValue = PredVal; isFirstPred = false; } else if (PredVal != SingularValue) - SingularValue = 0; + SingularValue = nullptr; } } @@ -130,7 +129,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { return UndefValue::get(ProtoType); // Otherwise, if all the merged values are the same, just use it. - if (SingularValue != 0) + if (SingularValue) return SingularValue; // Otherwise, we do need a PHI: check to see if we already have one available @@ -293,7 +292,7 @@ public: PHINode *PHI = ValueIsPHI(Val, Updater); if (PHI && PHI->getNumIncomingValues() == 0) return PHI; - return 0; + return nullptr; } /// GetPHIValue - For the specified PHI instruction, return the value @@ -403,7 +402,7 @@ run(const SmallVectorImpl<Instruction*> &Insts) const { // the order of these instructions in the block. If the first use in the // block is a load, then it uses the live in value. The last store defines // the live out value. We handle this by doing a linear scan of the block. - Value *StoredValue = 0; + Value *StoredValue = nullptr; for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) { if (LoadInst *L = dyn_cast<LoadInst>(II)) { // If this is a load from an unrelated pointer, ignore it. diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index ff50b12..24bb63b 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "simplifycfg" #include "llvm/Transforms/Utils/Local.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" @@ -23,6 +22,8 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" @@ -34,14 +35,12 @@ #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" +#include "llvm/IR/NoFolder.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" -#include "llvm/Support/CFG.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/ConstantRange.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/NoFolder.h" -#include "llvm/Support/PatternMatch.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include <algorithm> @@ -50,6 +49,8 @@ using namespace llvm; using namespace PatternMatch; +#define DEBUG_TYPE "simplifycfg" + static cl::opt<unsigned> PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(1), cl::desc("Control the amount of phi node folding to perform (default = 1)")); @@ -62,12 +63,13 @@ static cl::opt<bool> SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), cl::desc("Sink common instructions down to the end block")); -static cl::opt<bool> -HoistCondStores("simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true), - cl::desc("Hoist conditional stores if an unconditional store preceeds")); +static cl::opt<bool> HoistCondStores( + "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true), + cl::desc("Hoist conditional stores if an unconditional store precedes")); STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps"); STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables"); +STATISTIC(NumLookupTablesHoles, "Number of switch instructions turned into lookup tables (holes checked)"); STATISTIC(NumSinkCommons, "Number of common instructions sunk down to the end block"); STATISTIC(NumSpeculations, "Number of speculative executed instructions"); @@ -90,7 +92,7 @@ namespace { class SimplifyCFGOpt { const TargetTransformInfo &TTI; - const DataLayout *const TD; + const DataLayout *const DL; Value *isValueEqualityComparison(TerminatorInst *TI); BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI, std::vector<ValueEqualityComparisonCase> &Cases); @@ -109,8 +111,8 @@ class SimplifyCFGOpt { bool SimplifyCondBranch(BranchInst *BI, IRBuilder <>&Builder); public: - SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout *TD) - : TTI(TTI), TD(TD) {} + SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout *DL) + : TTI(TTI), DL(DL) {} bool run(BasicBlock *BB); }; } @@ -199,8 +201,8 @@ static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, /// ComputeSpeculationCost - Compute an abstract "cost" of speculating the /// given instruction, which is assumed to be safe to speculate. 1 means /// cheap, 2 means less cheap, and UINT_MAX means prohibitively expensive. -static unsigned ComputeSpeculationCost(const User *I) { - assert(isSafeToSpeculativelyExecute(I) && +static unsigned ComputeSpeculationCost(const User *I, const DataLayout *DL) { + assert(isSafeToSpeculativelyExecute(I, DL) && "Instruction is not safe to speculatively execute!"); switch (Operator::getOpcode(I)) { default: @@ -211,6 +213,7 @@ static unsigned ComputeSpeculationCost(const User *I) { if (!cast<GEPOperator>(I)->hasAllConstantIndices()) return UINT_MAX; return 1; + case Instruction::ExtractValue: case Instruction::Load: case Instruction::Add: case Instruction::Sub: @@ -224,6 +227,9 @@ static unsigned ComputeSpeculationCost(const User *I) { case Instruction::Trunc: case Instruction::ZExt: case Instruction::SExt: + case Instruction::BitCast: + case Instruction::ExtractElement: + case Instruction::InsertElement: return 1; // These are all cheap. case Instruction::Call: @@ -251,7 +257,8 @@ static unsigned ComputeSpeculationCost(const User *I) { /// CostRemaining, false is returned and CostRemaining is undefined. static bool DominatesMergePoint(Value *V, BasicBlock *BB, SmallPtrSet<Instruction*, 4> *AggressiveInsts, - unsigned &CostRemaining) { + unsigned &CostRemaining, + const DataLayout *DL) { Instruction *I = dyn_cast<Instruction>(V); if (!I) { // Non-instructions all dominate instructions, but not all constantexprs @@ -271,12 +278,12 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, // branch to BB, then it must be in the 'conditional' part of the "if // statement". If not, it definitely dominates the region. BranchInst *BI = dyn_cast<BranchInst>(PBB->getTerminator()); - if (BI == 0 || BI->isConditional() || BI->getSuccessor(0) != BB) + if (!BI || BI->isConditional() || BI->getSuccessor(0) != BB) return true; // If we aren't allowing aggressive promotion anymore, then don't consider // instructions in the 'if region'. - if (AggressiveInsts == 0) return false; + if (!AggressiveInsts) return false; // If we have seen this instruction before, don't count it again. if (AggressiveInsts->count(I)) return true; @@ -284,10 +291,10 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, // Okay, it looks like the instruction IS in the "condition". Check to // see if it's a cheap instruction to unconditionally compute, and if it // only uses stuff defined outside of the condition. If so, hoist it out. - if (!isSafeToSpeculativelyExecute(I)) + if (!isSafeToSpeculativelyExecute(I, DL)) return false; - unsigned Cost = ComputeSpeculationCost(I); + unsigned Cost = ComputeSpeculationCost(I, DL); if (Cost > CostRemaining) return false; @@ -297,7 +304,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, // Okay, we can only really hoist these out if their operands do // not take us over the cost threshold. for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) - if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining)) + if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining, DL)) return false; // Okay, it's safe to do this! Remember this instruction. AggressiveInsts->insert(I); @@ -306,15 +313,15 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, /// GetConstantInt - Extract ConstantInt from value, looking through IntToPtr /// and PointerNullValue. Return NULL if value is not a constant int. -static ConstantInt *GetConstantInt(Value *V, const DataLayout *TD) { +static ConstantInt *GetConstantInt(Value *V, const DataLayout *DL) { // Normal constant int. ConstantInt *CI = dyn_cast<ConstantInt>(V); - if (CI || !TD || !isa<Constant>(V) || !V->getType()->isPointerTy()) + if (CI || !DL || !isa<Constant>(V) || !V->getType()->isPointerTy()) return CI; // This is some kind of pointer constant. Turn it into a pointer-sized // ConstantInt if possible. - IntegerType *PtrTy = cast<IntegerType>(TD->getIntPtrType(V->getType())); + IntegerType *PtrTy = cast<IntegerType>(DL->getIntPtrType(V->getType())); // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*). if (isa<ConstantPointerNull>(V)) @@ -331,7 +338,7 @@ static ConstantInt *GetConstantInt(Value *V, const DataLayout *TD) { return cast<ConstantInt> (ConstantExpr::getIntegerCast(CI, PtrTy, /*isSigned=*/false)); } - return 0; + return nullptr; } /// GatherConstantCompares - Given a potentially 'or'd or 'and'd together @@ -340,13 +347,13 @@ static ConstantInt *GetConstantInt(Value *V, const DataLayout *TD) { /// Values vector. static Value * GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra, - const DataLayout *TD, bool isEQ, unsigned &UsedICmps) { + const DataLayout *DL, bool isEQ, unsigned &UsedICmps) { Instruction *I = dyn_cast<Instruction>(V); - if (I == 0) return 0; + if (!I) return nullptr; // If this is an icmp against a constant, handle this as one of the cases. if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) { - if (ConstantInt *C = GetConstantInt(I->getOperand(1), TD)) { + if (ConstantInt *C = GetConstantInt(I->getOperand(1), DL)) { Value *RHSVal; ConstantInt *RHSC; @@ -389,27 +396,27 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra, // If there are a ton of values, we don't want to make a ginormous switch. if (Span.getSetSize().ugt(8) || Span.isEmptySet()) - return 0; + return nullptr; for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp) Vals.push_back(ConstantInt::get(V->getContext(), Tmp)); UsedICmps++; return hasAdd ? RHSVal : I->getOperand(0); } - return 0; + return nullptr; } // Otherwise, we can only handle an | or &, depending on isEQ. if (I->getOpcode() != (isEQ ? Instruction::Or : Instruction::And)) - return 0; + return nullptr; unsigned NumValsBeforeLHS = Vals.size(); unsigned UsedICmpsBeforeLHS = UsedICmps; - if (Value *LHS = GatherConstantCompares(I->getOperand(0), Vals, Extra, TD, + if (Value *LHS = GatherConstantCompares(I->getOperand(0), Vals, Extra, DL, isEQ, UsedICmps)) { unsigned NumVals = Vals.size(); unsigned UsedICmpsBeforeRHS = UsedICmps; - if (Value *RHS = GatherConstantCompares(I->getOperand(1), Vals, Extra, TD, + if (Value *RHS = GatherConstantCompares(I->getOperand(1), Vals, Extra, DL, isEQ, UsedICmps)) { if (LHS == RHS) return LHS; @@ -419,33 +426,33 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra, // The RHS of the or/and can't be folded in and we haven't used "Extra" yet, // set it and return success. - if (Extra == 0 || Extra == I->getOperand(1)) { + if (Extra == nullptr || Extra == I->getOperand(1)) { Extra = I->getOperand(1); return LHS; } Vals.resize(NumValsBeforeLHS); UsedICmps = UsedICmpsBeforeLHS; - return 0; + return nullptr; } // If the LHS can't be folded in, but Extra is available and RHS can, try to // use LHS as Extra. - if (Extra == 0 || Extra == I->getOperand(0)) { + if (Extra == nullptr || Extra == I->getOperand(0)) { Value *OldExtra = Extra; Extra = I->getOperand(0); - if (Value *RHS = GatherConstantCompares(I->getOperand(1), Vals, Extra, TD, + if (Value *RHS = GatherConstantCompares(I->getOperand(1), Vals, Extra, DL, isEQ, UsedICmps)) return RHS; assert(Vals.size() == NumValsBeforeLHS); Extra = OldExtra; } - return 0; + return nullptr; } static void EraseTerminatorInstAndDCECond(TerminatorInst *TI) { - Instruction *Cond = 0; + Instruction *Cond = nullptr; if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { Cond = dyn_cast<Instruction>(SI->getCondition()); } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { @@ -462,7 +469,7 @@ static void EraseTerminatorInstAndDCECond(TerminatorInst *TI) { /// isValueEqualityComparison - Return true if the specified terminator checks /// to see if a value is equal to constant integer value. Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) { - Value *CV = 0; + Value *CV = nullptr; if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { // Do not permit merging of large switch instructions into their // predecessors unless there is only one predecessor. @@ -472,14 +479,14 @@ Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) { } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) if (BI->isConditional() && BI->getCondition()->hasOneUse()) if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) - if (ICI->isEquality() && GetConstantInt(ICI->getOperand(1), TD)) + if (ICI->isEquality() && GetConstantInt(ICI->getOperand(1), DL)) CV = ICI->getOperand(0); // Unwrap any lossless ptrtoint cast. - if (TD && CV) { + if (DL && CV) { if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) { Value *Ptr = PTII->getPointerOperand(); - if (PTII->getType() == TD->getIntPtrType(Ptr->getType())) + if (PTII->getType() == DL->getIntPtrType(Ptr->getType())) CV = Ptr; } } @@ -504,7 +511,7 @@ GetValueEqualityComparisonCases(TerminatorInst *TI, ICmpInst *ICI = cast<ICmpInst>(BI->getCondition()); BasicBlock *Succ = BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_NE); Cases.push_back(ValueEqualityComparisonCase(GetConstantInt(ICI->getOperand(1), - TD), + DL), Succ)); return BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_EQ); } @@ -652,11 +659,11 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, // Otherwise, TI's block must correspond to some matched value. Find out // which value (or set of values) this is. - ConstantInt *TIV = 0; + ConstantInt *TIV = nullptr; BasicBlock *TIBB = TI->getParent(); for (unsigned i = 0, e = PredCases.size(); i != e; ++i) if (PredCases[i].Dest == TIBB) { - if (TIV != 0) + if (TIV) return false; // Cannot handle multiple values coming to this block. TIV = PredCases[i].Value; } @@ -664,7 +671,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, // Okay, we found the one constant that our value can be if we get into TI's // BB. Find out which successor will unconditionally be branched to. - BasicBlock *TheRealDest = 0; + BasicBlock *TheRealDest = nullptr; for (unsigned i = 0, e = ThisCases.size(); i != e; ++i) if (ThisCases[i].Value == TIV) { TheRealDest = ThisCases[i].Dest; @@ -672,7 +679,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, } // If not handled by any explicit cases, it is handled by the default case. - if (TheRealDest == 0) TheRealDest = ThisDef; + if (!TheRealDest) TheRealDest = ThisDef; // Remove PHI node entries for dead edges. BasicBlock *CheckEdge = TheRealDest; @@ -680,7 +687,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, if (*SI != CheckEdge) (*SI)->removePredecessor(TIBB); else - CheckEdge = 0; + CheckEdge = nullptr; // Insert the new branch. Instruction *NI = Builder.CreateBr(TheRealDest); @@ -732,8 +739,7 @@ static void GetBranchWeights(TerminatorInst *TI, MDNode* MD = TI->getMetadata(LLVMContext::MD_prof); assert(MD); for (unsigned i = 1, e = MD->getNumOperands(); i < e; ++i) { - ConstantInt* CI = dyn_cast<ConstantInt>(MD->getOperand(i)); - assert(CI); + ConstantInt *CI = cast<ConstantInt>(MD->getOperand(i)); Weights.push_back(CI->getValue().getZExtValue()); } @@ -748,21 +754,14 @@ static void GetBranchWeights(TerminatorInst *TI, } } -/// Sees if any of the weights are too big for a uint32_t, and halves all the -/// weights if any are. +/// Keep halving the weights until all can fit in uint32_t. static void FitWeights(MutableArrayRef<uint64_t> Weights) { - bool Halve = false; - for (unsigned i = 0; i < Weights.size(); ++i) - if (Weights[i] > UINT_MAX) { - Halve = true; - break; - } - - if (! Halve) - return; - - for (unsigned i = 0; i < Weights.size(); ++i) - Weights[i] /= 2; + uint64_t Max = *std::max_element(Weights.begin(), Weights.end()); + if (Max > UINT_MAX) { + unsigned Offset = 32 - countLeadingZeros(Max); + for (uint64_t &I : Weights) + I >>= Offset; + } } /// FoldValueComparisonIntoPredecessors - The specified terminator is a value @@ -929,8 +928,8 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, Builder.SetInsertPoint(PTI); // Convert pointer to int before we switch. if (CV->getType()->isPointerTy()) { - assert(TD && "Cannot switch on pointer without DataLayout"); - CV = Builder.CreatePtrToInt(CV, TD->getIntPtrType(CV->getType()), + assert(DL && "Cannot switch on pointer without DataLayout"); + CV = Builder.CreatePtrToInt(CV, DL->getIntPtrType(CV->getType()), "magicptr"); } @@ -957,10 +956,10 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, // Okay, last check. If BB is still a successor of PSI, then we must // have an infinite loop case. If so, add an infinitely looping block // to handle the case to preserve the behavior of the code. - BasicBlock *InfLoopBlock = 0; + BasicBlock *InfLoopBlock = nullptr; for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i) if (NewSI->getSuccessor(i) == BB) { - if (InfLoopBlock == 0) { + if (!InfLoopBlock) { // Insert it at the end of the function, because it's either code, // or it won't matter if it's hot. :) InfLoopBlock = BasicBlock::Create(BB->getContext(), @@ -999,7 +998,7 @@ static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2, /// HoistThenElseCodeToIf - Given a conditional branch that goes to BB1 and /// BB2, hoist any common code in the two blocks up into the branch block. The /// caller of this function guarantees that BI's block dominates BB1 and BB2. -static bool HoistThenElseCodeToIf(BranchInst *BI) { +static bool HoistThenElseCodeToIf(BranchInst *BI, const DataLayout *DL) { // This does very trivial matching, with limited scanning, to find identical // instructions in the two blocks. In particular, we don't want to get into // O(M*N) situations here where M and N are the sizes of BB1 and BB2. As @@ -1073,9 +1072,9 @@ HoistTerminator: if (BB1V == BB2V) continue; - if (isa<ConstantExpr>(BB1V) && !isSafeToSpeculativelyExecute(BB1V)) + if (isa<ConstantExpr>(BB1V) && !isSafeToSpeculativelyExecute(BB1V, DL)) return Changed; - if (isa<ConstantExpr>(BB2V) && !isSafeToSpeculativelyExecute(BB2V)) + if (isa<ConstantExpr>(BB2V) && !isSafeToSpeculativelyExecute(BB2V, DL)) return Changed; } } @@ -1106,7 +1105,7 @@ HoistTerminator: // These values do not agree. Insert a select instruction before NT // that determines the right value. SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)]; - if (SI == 0) + if (!SI) SI = cast<SelectInst> (Builder.CreateSelect(BI->getCondition(), BB1V, BB2V, BB1V->getName()+"."+BB2V->getName())); @@ -1151,7 +1150,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) { // Gather the PHI nodes in BBEnd. std::map<Value*, std::pair<Value*, PHINode*> > MapValueFromBB1ToBB2; - Instruction *FirstNonPhiInBBEnd = 0; + Instruction *FirstNonPhiInBBEnd = nullptr; for (BasicBlock::iterator I = BBEnd->begin(), E = BBEnd->end(); I != E; ++I) { if (PHINode *PN = dyn_cast<PHINode>(I)) { @@ -1229,7 +1228,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) { // The operands should be either the same or they need to be generated // with a PHI node after sinking. We only handle the case where there is // a single pair of different operands. - Value *DifferentOp1 = 0, *DifferentOp2 = 0; + Value *DifferentOp1 = nullptr, *DifferentOp2 = nullptr; unsigned Op1Idx = 0; for (unsigned I = 0, E = I1->getNumOperands(); I != E; ++I) { if (I1->getOperand(I) == I2->getOperand(I)) @@ -1325,11 +1324,11 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, BasicBlock *StoreBB, BasicBlock *EndBB) { StoreInst *StoreToHoist = dyn_cast<StoreInst>(I); if (!StoreToHoist) - return 0; + return nullptr; // Volatile or atomic. if (!StoreToHoist->isSimple()) - return 0; + return nullptr; Value *StorePtr = StoreToHoist->getPointerOperand(); @@ -1341,7 +1340,7 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, // Could be calling an instruction that effects memory like free(). if (CurI->mayHaveSideEffects() && !isa<StoreInst>(CurI)) - return 0; + return nullptr; StoreInst *SI = dyn_cast<StoreInst>(CurI); // Found the previous store make sure it stores to the same location. @@ -1349,10 +1348,10 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, // Found the previous store, return its value operand. return SI->getValueOperand(); else if (SI) - return 0; // Unknown store. + return nullptr; // Unknown store. } - return 0; + return nullptr; } /// \brief Speculate a conditional basic block flattening the CFG. @@ -1392,7 +1391,8 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, /// \endcode /// /// \returns true if the conditional block is removed. -static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) { +static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, + const DataLayout *DL) { // Be conservative for now. FP select instruction can often be expensive. Value *BrCond = BI->getCondition(); if (isa<FCmpInst>(BrCond)) @@ -1418,10 +1418,10 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) { SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts; unsigned SpeculationCost = 0; - Value *SpeculatedStoreValue = 0; - StoreInst *SpeculatedStore = 0; + Value *SpeculatedStoreValue = nullptr; + StoreInst *SpeculatedStore = nullptr; for (BasicBlock::iterator BBI = ThenBB->begin(), - BBE = llvm::prior(ThenBB->end()); + BBE = std::prev(ThenBB->end()); BBI != BBE; ++BBI) { Instruction *I = BBI; // Skip debug info. @@ -1435,13 +1435,13 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) { return false; // Don't hoist the instruction if it's unsafe or expensive. - if (!isSafeToSpeculativelyExecute(I) && + if (!isSafeToSpeculativelyExecute(I, DL) && !(HoistCondStores && (SpeculatedStoreValue = isSafeToSpeculateStore(I, BB, ThenBB, EndBB)))) return false; if (!SpeculatedStoreValue && - ComputeSpeculationCost(I) > PHINodeFoldingThreshold) + ComputeSpeculationCost(I, DL) > PHINodeFoldingThreshold) return false; // Store the store speculation candidate. @@ -1492,11 +1492,11 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) { if (!OrigCE && !ThenCE) continue; // Known safe and cheap. - if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE)) || - (OrigCE && !isSafeToSpeculativelyExecute(OrigCE))) + if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE, DL)) || + (OrigCE && !isSafeToSpeculativelyExecute(OrigCE, DL))) return false; - unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE) : 0; - unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE) : 0; + unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE, DL) : 0; + unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE, DL) : 0; if (OrigCost + ThenCost > 2 * PHINodeFoldingThreshold) return false; @@ -1531,7 +1531,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) { // Hoist the instructions. BB->getInstList().splice(BI, ThenBB->getInstList(), ThenBB->begin(), - llvm::prior(ThenBB->end())); + std::prev(ThenBB->end())); // Insert selects and rewrite the PHI operands. IRBuilder<true, NoFolder> Builder(BI); @@ -1589,10 +1589,9 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) { // We can only support instructions that do not define values that are // live outside of the current basic block. - for (Value::use_iterator UI = BBI->use_begin(), E = BBI->use_end(); - UI != E; ++UI) { - Instruction *U = cast<Instruction>(*UI); - if (U->getParent() != BB || isa<PHINode>(U)) return false; + for (User *U : BBI->users()) { + Instruction *UI = cast<Instruction>(U); + if (UI->getParent() != BB || isa<PHINode>(UI)) return false; } // Looks ok, continue checking. @@ -1605,7 +1604,7 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) { /// that is defined in the same block as the branch and if any PHI entries are /// constants, thread edges corresponding to that entry to be branches to their /// ultimate destination. -static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout *TD) { +static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout *DL) { BasicBlock *BB = BI->getParent(); PHINode *PN = dyn_cast<PHINode>(BI->getCondition()); // NOTE: we currently cannot transform this case if the PHI node is used @@ -1628,7 +1627,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout *TD) { // constants. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { ConstantInt *CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i)); - if (CB == 0 || !CB->getType()->isIntegerTy(1)) continue; + if (!CB || !CB->getType()->isIntegerTy(1)) continue; // Okay, we now know that all edges from PredBB should be revectored to // branch to RealDest. @@ -1674,7 +1673,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout *TD) { } // Check for trivial simplification. - if (Value *V = SimplifyInstruction(N, TD)) { + if (Value *V = SimplifyInstruction(N, DL)) { TranslateMap[BBI] = V; delete N; // Instruction folded away, don't need actual inst } else { @@ -1695,7 +1694,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout *TD) { } // Recurse, simplifying any other constants. - return FoldCondBranchOnPHI(BI, TD) | true; + return FoldCondBranchOnPHI(BI, DL) | true; } return false; @@ -1703,7 +1702,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout *TD) { /// FoldTwoEntryPHINode - Given a BB that starts with the specified two-entry /// PHI node, see if we can eliminate it. -static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *TD) { +static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *DL) { // Ok, this is a two entry PHI node. Check to see if this is a simple "if // statement", which has a very simple dominance structure. Basically, we // are trying to find the condition that is being branched on, which @@ -1737,23 +1736,23 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *TD) { for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) { PHINode *PN = cast<PHINode>(II++); - if (Value *V = SimplifyInstruction(PN, TD)) { + if (Value *V = SimplifyInstruction(PN, DL)) { PN->replaceAllUsesWith(V); PN->eraseFromParent(); continue; } if (!DominatesMergePoint(PN->getIncomingValue(0), BB, &AggressiveInsts, - MaxCostVal0) || + MaxCostVal0, DL) || !DominatesMergePoint(PN->getIncomingValue(1), BB, &AggressiveInsts, - MaxCostVal1)) + MaxCostVal1, DL)) return false; } // If we folded the first phi, PN dangles at this point. Refresh it. If // we ran out of PHIs then we simplified them all. PN = dyn_cast<PHINode>(BB->begin()); - if (PN == 0) return true; + if (!PN) return true; // Don't fold i1 branches on PHIs which contain binary operators. These can // often be turned into switches and other things. @@ -1767,11 +1766,11 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *TD) { // instructions in the predecessor blocks can be promoted as well. If // not, we won't be able to get rid of the control flow, so it's not // worth promoting to select instructions. - BasicBlock *DomBlock = 0; + BasicBlock *DomBlock = nullptr; BasicBlock *IfBlock1 = PN->getIncomingBlock(0); BasicBlock *IfBlock2 = PN->getIncomingBlock(1); if (cast<BranchInst>(IfBlock1->getTerminator())->isConditional()) { - IfBlock1 = 0; + IfBlock1 = nullptr; } else { DomBlock = *pred_begin(IfBlock1); for (BasicBlock::iterator I = IfBlock1->begin();!isa<TerminatorInst>(I);++I) @@ -1784,7 +1783,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *TD) { } if (cast<BranchInst>(IfBlock2->getTerminator())->isConditional()) { - IfBlock2 = 0; + IfBlock2 = nullptr; } else { DomBlock = *pred_begin(IfBlock2); for (BasicBlock::iterator I = IfBlock2->begin();!isa<TerminatorInst>(I);++I) @@ -1964,10 +1963,10 @@ static bool checkCSEInPredecessor(Instruction *Inst, BasicBlock *PB) { /// FoldBranchToCommonDest - If this basic block is simple enough, and if a /// predecessor branches to us and one of our successors, fold the block into /// the predecessor and use logical operations to pick the right destination. -bool llvm::FoldBranchToCommonDest(BranchInst *BI) { +bool llvm::FoldBranchToCommonDest(BranchInst *BI, const DataLayout *DL) { BasicBlock *BB = BI->getParent(); - Instruction *Cond = 0; + Instruction *Cond = nullptr; if (BI->isConditional()) Cond = dyn_cast<Instruction>(BI->getCondition()); else { @@ -1993,12 +1992,12 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { } } - if (Cond == 0) + if (!Cond) return false; } - if (Cond == 0 || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) || - Cond->getParent() != BB || !Cond->hasOneUse()) + if (!Cond || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) || + Cond->getParent() != BB || !Cond->hasOneUse()) return false; // Only allow this if the condition is a simple instruction that can be @@ -2013,10 +2012,10 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { // that feeds the branch. We later ensure that any values that _it_ uses // were also live in the predecessor, so that we don't unnecessarily create // register pressure or inhibit out-of-order execution. - Instruction *BonusInst = 0; + Instruction *BonusInst = nullptr; if (&*FrontIt != Cond && - FrontIt->hasOneUse() && *FrontIt->use_begin() == Cond && - isSafeToSpeculativelyExecute(FrontIt)) { + FrontIt->hasOneUse() && FrontIt->user_back() == Cond && + isSafeToSpeculativelyExecute(FrontIt, DL)) { BonusInst = &*FrontIt; ++FrontIt; @@ -2031,7 +2030,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { // Make sure the instruction after the condition is the cond branch. BasicBlock::iterator CondIt = Cond; ++CondIt; - // Ingore dbg intrinsics. + // Ignore dbg intrinsics. while (isa<DbgInfoIntrinsic>(CondIt)) ++CondIt; if (&*CondIt != BI) @@ -2048,7 +2047,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { // Finally, don't infinitely unroll conditional loops. BasicBlock *TrueDest = BI->getSuccessor(0); - BasicBlock *FalseDest = (BI->isConditional()) ? BI->getSuccessor(1) : 0; + BasicBlock *FalseDest = (BI->isConditional()) ? BI->getSuccessor(1) : nullptr; if (TrueDest == BB || FalseDest == BB) return false; @@ -2060,7 +2059,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { // the common successor, verify that the same value flows in from both // blocks. SmallVector<PHINode*, 4> PHIs; - if (PBI == 0 || PBI->isUnconditional() || + if (!PBI || PBI->isUnconditional() || (BI->isConditional() && !SafeToMergeTerminators(BI, PBI)) || (!BI->isConditional() && @@ -2094,7 +2093,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { // instructions that are used by the terminator's condition because it // exposes more merging opportunities. bool UsedByBranch = (BonusInst && BonusInst->hasOneUse() && - *BonusInst->use_begin() == Cond); + BonusInst->user_back() == Cond); if (BonusInst && !UsedByBranch) { // Collect the values used by the bonus inst @@ -2150,9 +2149,17 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { } // If we have a bonus inst, clone it into the predecessor block. - Instruction *NewBonus = 0; + Instruction *NewBonus = nullptr; if (BonusInst) { NewBonus = BonusInst->clone(); + + // If we moved a load, we cannot any longer claim any knowledge about + // its potential value. The previous information might have been valid + // only given the branch precondition. + // For an analogous reason, we must also drop all the metadata whose + // semantics we don't understand. + NewBonus->dropUnknownMetadata(LLVMContext::MD_dbg); + PredBlock->getInstList().insert(PBI, NewBonus); NewBonus->takeName(BonusInst); BonusInst->setName(BonusInst->getName()+".old"); @@ -2218,14 +2225,14 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { MDBuilder(BI->getContext()). createBranchWeights(MDWeights)); } else - PBI->setMetadata(LLVMContext::MD_prof, NULL); + PBI->setMetadata(LLVMContext::MD_prof, nullptr); } else { // Update PHI nodes in the common successors. for (unsigned i = 0, e = PHIs.size(); i != e; ++i) { ConstantInt *PBI_C = cast<ConstantInt>( PHIs[i]->getIncomingValueForBlock(PBI->getParent())); assert(PBI_C->getType()->isIntegerTy(1)); - Instruction *MergedCond = 0; + Instruction *MergedCond = nullptr; if (PBI->getSuccessor(0) == TrueDest) { // Create (PBI_Cond and PBI_C) or (!PBI_Cond and BI_Value) // PBI_C is true: PBI_Cond or (!PBI_Cond and BI_Value) @@ -2338,7 +2345,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { } // If this is a conditional branch in an empty block, and if any - // predecessors is a conditional branch to one of our destinations, + // predecessors are a conditional branch to one of our destinations, // fold the conditions into logical ops and one cond br. BasicBlock::iterator BBI = BB->begin(); // Ignore dbg intrinsics. @@ -2373,16 +2380,33 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { // Do not perform this transformation if it would require // insertion of a large number of select instructions. For targets // without predication/cmovs, this is a big pessimization. - BasicBlock *CommonDest = PBI->getSuccessor(PBIOp); + // Also do not perform this transformation if any phi node in the common + // destination block can trap when reached by BB or PBB (PR17073). In that + // case, it would be unsafe to hoist the operation into a select instruction. + + BasicBlock *CommonDest = PBI->getSuccessor(PBIOp); unsigned NumPhis = 0; for (BasicBlock::iterator II = CommonDest->begin(); - isa<PHINode>(II); ++II, ++NumPhis) + isa<PHINode>(II); ++II, ++NumPhis) { if (NumPhis > 2) // Disable this xform. return false; + PHINode *PN = cast<PHINode>(II); + Value *BIV = PN->getIncomingValueForBlock(BB); + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(BIV)) + if (CE->canTrap()) + return false; + + unsigned PBBIdx = PN->getBasicBlockIndex(PBI->getParent()); + Value *PBIV = PN->getIncomingValue(PBBIdx); + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(PBIV)) + if (CE->canTrap()) + return false; + } + // Finally, if everything is ok, fold the branches to logical ops. - BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1); + BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1); DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent() << "AND: " << *BI->getParent()); @@ -2498,16 +2522,16 @@ static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond, // If TrueBB and FalseBB are equal, only try to preserve one copy of that // successor. BasicBlock *KeepEdge1 = TrueBB; - BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : 0; + BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr; // Then remove the rest. for (unsigned I = 0, E = OldTerm->getNumSuccessors(); I != E; ++I) { BasicBlock *Succ = OldTerm->getSuccessor(I); // Make sure only to keep exactly one copy of each edge. if (Succ == KeepEdge1) - KeepEdge1 = 0; + KeepEdge1 = nullptr; else if (Succ == KeepEdge2) - KeepEdge2 = 0; + KeepEdge2 = nullptr; else Succ->removePredecessor(OldTerm->getParent()); } @@ -2516,7 +2540,7 @@ static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond, Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc()); // Insert an appropriate new terminator. - if ((KeepEdge1 == 0) && (KeepEdge2 == 0)) { + if (!KeepEdge1 && !KeepEdge2) { if (TrueBB == FalseBB) // We were only looking for one successor, and it was present. // Create an unconditional branch to it. @@ -2538,7 +2562,7 @@ static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond, // One of the selected values was a successor, but the other wasn't. // Insert an unconditional branch to the one that was found; // the edge to the one that wasn't must be unreachable. - if (KeepEdge1 == 0) + if (!KeepEdge1) // Only TrueBB was found. Builder.CreateBr(TrueBB); else @@ -2625,7 +2649,7 @@ static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) { /// the PHI, merging the third icmp into the switch. static bool TryToSimplifyUncondBranchWithICmpInIt( ICmpInst *ICI, IRBuilder<> &Builder, const TargetTransformInfo &TTI, - const DataLayout *TD) { + const DataLayout *DL) { BasicBlock *BB = ICI->getParent(); // If the block has any PHIs in it or the icmp has multiple uses, it is too @@ -2639,7 +2663,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt( // 'V' and this block is the default case for the switch. In this case we can // fold the compared value into the switch to simplify things. BasicBlock *Pred = BB->getSinglePredecessor(); - if (Pred == 0 || !isa<SwitchInst>(Pred->getTerminator())) return false; + if (!Pred || !isa<SwitchInst>(Pred->getTerminator())) return false; SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator()); if (SI->getCondition() != V) @@ -2653,12 +2677,12 @@ static bool TryToSimplifyUncondBranchWithICmpInIt( assert(VVal && "Should have a unique destination value"); ICI->setOperand(0, VVal); - if (Value *V = SimplifyInstruction(ICI, TD)) { + if (Value *V = SimplifyInstruction(ICI, DL)) { ICI->replaceAllUsesWith(V); ICI->eraseFromParent(); } // BB is now empty, so it is likely to simplify away. - return SimplifyCFG(BB, TTI, TD) | true; + return SimplifyCFG(BB, TTI, DL) | true; } // Ok, the block is reachable from the default dest. If the constant we're @@ -2674,14 +2698,14 @@ static bool TryToSimplifyUncondBranchWithICmpInIt( ICI->replaceAllUsesWith(V); ICI->eraseFromParent(); // BB is now empty, so it is likely to simplify away. - return SimplifyCFG(BB, TTI, TD) | true; + return SimplifyCFG(BB, TTI, DL) | true; } // The use of the icmp has to be in the 'end' block, by the only PHI node in // the block. BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0); - PHINode *PHIUse = dyn_cast<PHINode>(ICI->use_back()); - if (PHIUse == 0 || PHIUse != &SuccBlock->front() || + PHINode *PHIUse = dyn_cast<PHINode>(ICI->user_back()); + if (PHIUse == nullptr || PHIUse != &SuccBlock->front() || isa<PHINode>(++BasicBlock::iterator(PHIUse))) return false; @@ -2730,32 +2754,32 @@ static bool TryToSimplifyUncondBranchWithICmpInIt( /// SimplifyBranchOnICmpChain - The specified branch is a conditional branch. /// Check to see if it is branching on an or/and chain of icmp instructions, and /// fold it into a switch instruction if so. -static bool SimplifyBranchOnICmpChain(BranchInst *BI, const DataLayout *TD, +static bool SimplifyBranchOnICmpChain(BranchInst *BI, const DataLayout *DL, IRBuilder<> &Builder) { Instruction *Cond = dyn_cast<Instruction>(BI->getCondition()); - if (Cond == 0) return false; + if (!Cond) return false; // Change br (X == 0 | X == 1), T, F into a switch instruction. // If this is a bunch of seteq's or'd together, or if it's a bunch of // 'setne's and'ed together, collect them. - Value *CompVal = 0; + Value *CompVal = nullptr; std::vector<ConstantInt*> Values; bool TrueWhenEqual = true; - Value *ExtraCase = 0; + Value *ExtraCase = nullptr; unsigned UsedICmps = 0; if (Cond->getOpcode() == Instruction::Or) { - CompVal = GatherConstantCompares(Cond, Values, ExtraCase, TD, true, + CompVal = GatherConstantCompares(Cond, Values, ExtraCase, DL, true, UsedICmps); } else if (Cond->getOpcode() == Instruction::And) { - CompVal = GatherConstantCompares(Cond, Values, ExtraCase, TD, false, + CompVal = GatherConstantCompares(Cond, Values, ExtraCase, DL, false, UsedICmps); TrueWhenEqual = false; } // If we didn't have a multiply compared value, fail. - if (CompVal == 0) return false; + if (!CompVal) return false; // Avoid turning single icmps into a switch. if (UsedICmps <= 1) @@ -2811,9 +2835,9 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const DataLayout *TD, Builder.SetInsertPoint(BI); // Convert pointer to int before we switch. if (CompVal->getType()->isPointerTy()) { - assert(TD && "Cannot switch on pointer without DataLayout"); + assert(DL && "Cannot switch on pointer without DataLayout"); CompVal = Builder.CreatePtrToInt(CompVal, - TD->getIntPtrType(CompVal->getType()), + DL->getIntPtrType(CompVal->getType()), "magicptr"); } @@ -3050,7 +3074,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { // Find the most popular block. unsigned MaxPop = 0; unsigned MaxIndex = 0; - BasicBlock *MaxBlock = 0; + BasicBlock *MaxBlock = nullptr; for (std::map<BasicBlock*, std::pair<unsigned, unsigned> >::iterator I = Popularity.begin(), E = Popularity.end(); I != E; ++I) { if (I->second.first > MaxPop || @@ -3188,7 +3212,7 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI) { Value *Cond = SI->getCondition(); unsigned Bits = Cond->getType()->getIntegerBitWidth(); APInt KnownZero(Bits, 0), KnownOne(Bits, 0); - ComputeMaskedBits(Cond, KnownZero, KnownOne); + computeKnownBits(Cond, KnownZero, KnownOne); // Gather dead cases. SmallVector<ConstantInt*, 8> DeadCases; @@ -3222,7 +3246,7 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI) { Case.getCaseSuccessor()->removePredecessor(SI->getParent()); SI->removeCase(Case); } - if (HasWeight) { + if (HasWeight && Weights.size() >= 2) { SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end()); SI->setMetadata(LLVMContext::MD_prof, MDBuilder(SI->getParent()->getContext()). @@ -3241,13 +3265,13 @@ static PHINode *FindPHIForConditionForwarding(ConstantInt *CaseValue, BasicBlock *BB, int *PhiIndex) { if (BB->getFirstNonPHIOrDbg() != BB->getTerminator()) - return NULL; // BB must be empty to be a candidate for simplification. + return nullptr; // BB must be empty to be a candidate for simplification. if (!BB->getSinglePredecessor()) - return NULL; // BB must be dominated by the switch. + return nullptr; // BB must be dominated by the switch. BranchInst *Branch = dyn_cast<BranchInst>(BB->getTerminator()); if (!Branch || !Branch->isUnconditional()) - return NULL; // Terminator must be unconditional branch. + return nullptr; // Terminator must be unconditional branch. BasicBlock *Succ = Branch->getSuccessor(0); @@ -3263,7 +3287,7 @@ static PHINode *FindPHIForConditionForwarding(ConstantInt *CaseValue, return PHI; } - return NULL; + return nullptr; } /// ForwardSwitchConditionToPHI - Try to forward the condition of a switch @@ -3306,6 +3330,11 @@ static bool ForwardSwitchConditionToPHI(SwitchInst *SI) { /// ValidLookupTableConstant - Return true if the backend will be able to handle /// initializing an array of constants like C. static bool ValidLookupTableConstant(Constant *C) { + if (C->isThreadDependent()) + return false; + if (C->isDLLImportDependent()) + return false; + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) return CE->isGEPWithNoNotionalOverIndexing(); @@ -3336,12 +3365,12 @@ ConstantFold(Instruction *I, if (SelectInst *Select = dyn_cast<SelectInst>(I)) { Constant *A = LookupConstant(Select->getCondition(), ConstantPool); if (!A) - return 0; + return nullptr; if (A->isAllOnesValue()) return LookupConstant(Select->getTrueValue(), ConstantPool); if (A->isNullValue()) return LookupConstant(Select->getFalseValue(), ConstantPool); - return 0; + return nullptr; } SmallVector<Constant *, 4> COps; @@ -3349,7 +3378,7 @@ ConstantFold(Instruction *I, if (Constant *A = LookupConstant(I->getOperand(N), ConstantPool)) COps.push_back(A); else - return 0; + return nullptr; } if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) @@ -3428,7 +3457,7 @@ GetCaseResults(SwitchInst *SI, Res.push_back(std::make_pair(PHI, ConstVal)); } - return true; + return Res.size() > 0; } namespace { @@ -3444,7 +3473,7 @@ namespace { ConstantInt *Offset, const SmallVectorImpl<std::pair<ConstantInt*, Constant*> >& Values, Constant *DefaultValue, - const DataLayout *TD); + const DataLayout *DL); /// BuildLookup - Build instructions with Builder to retrieve the value at /// the position given by Index in the lookup table. @@ -3452,7 +3481,7 @@ namespace { /// WouldFitInRegister - Return true if a table with TableSize elements of /// type ElementType would fit in a target-legal register. - static bool WouldFitInRegister(const DataLayout *TD, + static bool WouldFitInRegister(const DataLayout *DL, uint64_t TableSize, const Type *ElementType); @@ -3491,38 +3520,44 @@ SwitchLookupTable::SwitchLookupTable(Module &M, ConstantInt *Offset, const SmallVectorImpl<std::pair<ConstantInt*, Constant*> >& Values, Constant *DefaultValue, - const DataLayout *TD) - : SingleValue(0), BitMap(0), BitMapElementTy(0), Array(0) { + const DataLayout *DL) + : SingleValue(nullptr), BitMap(nullptr), BitMapElementTy(nullptr), + Array(nullptr) { assert(Values.size() && "Can't build lookup table without values!"); assert(TableSize >= Values.size() && "Can't fit values in table!"); // If all values in the table are equal, this is that value. SingleValue = Values.begin()->second; + Type *ValueType = Values.begin()->second->getType(); + // Build up the table contents. SmallVector<Constant*, 64> TableContents(TableSize); for (size_t I = 0, E = Values.size(); I != E; ++I) { ConstantInt *CaseVal = Values[I].first; Constant *CaseRes = Values[I].second; - assert(CaseRes->getType() == DefaultValue->getType()); + assert(CaseRes->getType() == ValueType); uint64_t Idx = (CaseVal->getValue() - Offset->getValue()) .getLimitedValue(); TableContents[Idx] = CaseRes; if (CaseRes != SingleValue) - SingleValue = 0; + SingleValue = nullptr; } // Fill in any holes in the table with the default result. if (Values.size() < TableSize) { + assert(DefaultValue && + "Need a default value to fill the lookup table holes."); + assert(DefaultValue->getType() == ValueType); for (uint64_t I = 0; I < TableSize; ++I) { if (!TableContents[I]) TableContents[I] = DefaultValue; } if (DefaultValue != SingleValue) - SingleValue = 0; + SingleValue = nullptr; } // If each element in the table contains the same value, we only need to store @@ -3533,8 +3568,8 @@ SwitchLookupTable::SwitchLookupTable(Module &M, } // If the type is integer and the table fits in a register, build a bitmap. - if (WouldFitInRegister(TD, TableSize, DefaultValue->getType())) { - IntegerType *IT = cast<IntegerType>(DefaultValue->getType()); + if (WouldFitInRegister(DL, TableSize, ValueType)) { + IntegerType *IT = cast<IntegerType>(ValueType); APInt TableInt(TableSize * IT->getBitWidth(), 0); for (uint64_t I = TableSize; I > 0; --I) { TableInt <<= IT->getBitWidth(); @@ -3552,7 +3587,7 @@ SwitchLookupTable::SwitchLookupTable(Module &M, } // Store the table in an array. - ArrayType *ArrayTy = ArrayType::get(DefaultValue->getType(), TableSize); + ArrayType *ArrayTy = ArrayType::get(ValueType, TableSize); Constant *Initializer = ConstantArray::get(ArrayTy, TableContents); Array = new GlobalVariable(M, ArrayTy, /*constant=*/ true, @@ -3589,6 +3624,16 @@ Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) { "switch.masked"); } case ArrayKind: { + // Make sure the table index will not overflow when treated as signed. + IntegerType *IT = cast<IntegerType>(Index->getType()); + uint64_t TableSize = Array->getInitializer()->getType() + ->getArrayNumElements(); + if (TableSize > (1ULL << (IT->getBitWidth() - 1))) + Index = Builder.CreateZExt(Index, + IntegerType::get(IT->getContext(), + IT->getBitWidth() + 1), + "switch.tableidx.zext"); + Value *GEPIndices[] = { Builder.getInt32(0), Index }; Value *GEP = Builder.CreateInBoundsGEP(Array, GEPIndices, "switch.gep"); @@ -3598,10 +3643,10 @@ Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) { llvm_unreachable("Unknown lookup table kind!"); } -bool SwitchLookupTable::WouldFitInRegister(const DataLayout *TD, +bool SwitchLookupTable::WouldFitInRegister(const DataLayout *DL, uint64_t TableSize, const Type *ElementType) { - if (!TD) + if (!DL) return false; const IntegerType *IT = dyn_cast<IntegerType>(ElementType); if (!IT) @@ -3612,7 +3657,7 @@ bool SwitchLookupTable::WouldFitInRegister(const DataLayout *TD, // Avoid overflow, fitsInLegalInteger uses unsigned int for the width. if (TableSize >= UINT_MAX/IT->getBitWidth()) return false; - return TD->fitsInLegalInteger(TableSize * IT->getBitWidth()); + return DL->fitsInLegalInteger(TableSize * IT->getBitWidth()); } /// ShouldBuildLookupTable - Determine whether a lookup table should be built @@ -3621,7 +3666,7 @@ bool SwitchLookupTable::WouldFitInRegister(const DataLayout *TD, static bool ShouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize, const TargetTransformInfo &TTI, - const DataLayout *TD, + const DataLayout *DL, const SmallDenseMap<PHINode*, Type*>& ResultTypes) { if (SI->getNumCases() > TableSize || TableSize >= UINT64_MAX / 10) return false; // TableSize overflowed, or mul below might overflow. @@ -3637,7 +3682,7 @@ static bool ShouldBuildLookupTable(SwitchInst *SI, // Saturate this flag to false. AllTablesFitInRegister = AllTablesFitInRegister && - SwitchLookupTable::WouldFitInRegister(TD, TableSize, Ty); + SwitchLookupTable::WouldFitInRegister(DL, TableSize, Ty); // If both flags saturate, we're done. NOTE: This *only* works with // saturating flags, and all flags have to saturate first due to the @@ -3666,7 +3711,7 @@ static bool ShouldBuildLookupTable(SwitchInst *SI, static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, const TargetTransformInfo &TTI, - const DataLayout* TD) { + const DataLayout* DL) { assert(SI->getNumCases() > 1 && "Degenerate switch?"); // Only build lookup table when we have a target that supports it. @@ -3680,11 +3725,9 @@ static bool SwitchToLookupTable(SwitchInst *SI, // GEP needs a runtime relocation in PIC code. We should just build one big // string and lookup indices into that. - // Ignore the switch if the number of cases is too small. - // This is similar to the check when building jump tables in - // SelectionDAGBuilder::handleJTSwitchCase. - // FIXME: Determine the best cut-off. - if (SI->getNumCases() < 4) + // Ignore switches with less than three cases. Lookup tables will not make them + // faster, so we don't analyze them. + if (SI->getNumCases() < 3) return false; // Figure out the corresponding result for each case value and phi node in the @@ -3694,7 +3737,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, ConstantInt *MinCaseVal = CI.getCaseValue(); ConstantInt *MaxCaseVal = CI.getCaseValue(); - BasicBlock *CommonDest = 0; + BasicBlock *CommonDest = nullptr; typedef SmallVector<std::pair<ConstantInt*, Constant*>, 4> ResultListTy; SmallDenseMap<PHINode*, ResultListTy> ResultLists; SmallDenseMap<PHINode*, Constant*> DefaultResults; @@ -3712,7 +3755,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, typedef SmallVector<std::pair<PHINode*, Constant*>, 4> ResultsTy; ResultsTy Results; if (!GetCaseResults(SI, CaseVal, CI.getCaseSuccessor(), &CommonDest, - Results, TD)) + Results, DL)) return false; // Append the result from this case to the list for each phi. @@ -3723,21 +3766,41 @@ static bool SwitchToLookupTable(SwitchInst *SI, } } - // Get the resulting values for the default case. + // Keep track of the result types. + for (size_t I = 0, E = PHIs.size(); I != E; ++I) { + PHINode *PHI = PHIs[I]; + ResultTypes[PHI] = ResultLists[PHI][0].second->getType(); + } + + uint64_t NumResults = ResultLists[PHIs[0]].size(); + APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue(); + uint64_t TableSize = RangeSpread.getLimitedValue() + 1; + bool TableHasHoles = (NumResults < TableSize); + + // If the table has holes, we need a constant result for the default case + // or a bitmask that fits in a register. SmallVector<std::pair<PHINode*, Constant*>, 4> DefaultResultsList; - if (!GetCaseResults(SI, 0, SI->getDefaultDest(), &CommonDest, - DefaultResultsList, TD)) - return false; + bool HasDefaultResults = false; + if (TableHasHoles) { + HasDefaultResults = GetCaseResults(SI, nullptr, SI->getDefaultDest(), + &CommonDest, DefaultResultsList, DL); + } + bool NeedMask = (TableHasHoles && !HasDefaultResults); + if (NeedMask) { + // As an extra penalty for the validity test we require more cases. + if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark). + return false; + if (!(DL && DL->fitsInLegalInteger(TableSize))) + return false; + } + for (size_t I = 0, E = DefaultResultsList.size(); I != E; ++I) { PHINode *PHI = DefaultResultsList[I].first; Constant *Result = DefaultResultsList[I].second; DefaultResults[PHI] = Result; - ResultTypes[PHI] = Result->getType(); } - APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue(); - uint64_t TableSize = RangeSpread.getLimitedValue() + 1; - if (!ShouldBuildLookupTable(SI, TableSize, TTI, TD, ResultTypes)) + if (!ShouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes)) return false; // Create the BB that does the lookups. @@ -3755,7 +3818,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, // Compute the maximum table size representable by the integer type we are // switching upon. unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits(); - uint64_t MaxTableSize = CaseSize > 63? UINT64_MAX : 1ULL << CaseSize; + uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize; assert(MaxTableSize >= TableSize && "It is impossible for a switch to have more entries than the max " "representable value of its input integer type's size."); @@ -3770,25 +3833,67 @@ static bool SwitchToLookupTable(SwitchInst *SI, SI->getDefaultDest()->removePredecessor(SI->getParent()); } else { Value *Cmp = Builder.CreateICmpULT(TableIndex, ConstantInt::get( - MinCaseVal->getType(), TableSize)); + MinCaseVal->getType(), TableSize)); Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest()); } // Populate the BB that does the lookups. Builder.SetInsertPoint(LookupBB); + + if (NeedMask) { + // Before doing the lookup we do the hole check. + // The LookupBB is therefore re-purposed to do the hole check + // and we create a new LookupBB. + BasicBlock *MaskBB = LookupBB; + MaskBB->setName("switch.hole_check"); + LookupBB = BasicBlock::Create(Mod.getContext(), + "switch.lookup", + CommonDest->getParent(), + CommonDest); + + // Build bitmask; fill in a 1 bit for every case. + APInt MaskInt(TableSize, 0); + APInt One(TableSize, 1); + const ResultListTy &ResultList = ResultLists[PHIs[0]]; + for (size_t I = 0, E = ResultList.size(); I != E; ++I) { + uint64_t Idx = (ResultList[I].first->getValue() - + MinCaseVal->getValue()).getLimitedValue(); + MaskInt |= One << Idx; + } + ConstantInt *TableMask = ConstantInt::get(Mod.getContext(), MaskInt); + + // Get the TableIndex'th bit of the bitmask. + // If this bit is 0 (meaning hole) jump to the default destination, + // else continue with table lookup. + IntegerType *MapTy = TableMask->getType(); + Value *MaskIndex = Builder.CreateZExtOrTrunc(TableIndex, MapTy, + "switch.maskindex"); + Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex, + "switch.shifted"); + Value *LoBit = Builder.CreateTrunc(Shifted, + Type::getInt1Ty(Mod.getContext()), + "switch.lobit"); + Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest()); + + Builder.SetInsertPoint(LookupBB); + AddPredecessorToBlock(SI->getDefaultDest(), MaskBB, SI->getParent()); + } + bool ReturnedEarly = false; for (size_t I = 0, E = PHIs.size(); I != E; ++I) { PHINode *PHI = PHIs[I]; + // If using a bitmask, use any value to fill the lookup table holes. + Constant *DV = NeedMask ? ResultLists[PHI][0].second : DefaultResults[PHI]; SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultLists[PHI], - DefaultResults[PHI], TD); + DV, DL); Value *Result = Table.BuildLookup(TableIndex, Builder); // If the result is used to return immediately from the function, we want to // do that right here. - if (PHI->hasOneUse() && isa<ReturnInst>(*PHI->use_begin()) && - *PHI->use_begin() == CommonDest->getFirstNonPHIOrDbg()) { + if (PHI->hasOneUse() && isa<ReturnInst>(*PHI->user_begin()) && + PHI->user_back() == CommonDest->getFirstNonPHIOrDbg()) { Builder.CreateRet(Result); ReturnedEarly = true; break; @@ -3811,6 +3916,8 @@ static bool SwitchToLookupTable(SwitchInst *SI, SI->eraseFromParent(); ++NumLookupTables; + if (NeedMask) + ++NumLookupTablesHoles; return true; } @@ -3822,12 +3929,12 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { // see if that predecessor totally determines the outcome of this switch. if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder)) - return SimplifyCFG(BB, TTI, TD) | true; + return SimplifyCFG(BB, TTI, DL) | true; Value *Cond = SI->getCondition(); if (SelectInst *Select = dyn_cast<SelectInst>(Cond)) if (SimplifySwitchOnSelect(SI, Select)) - return SimplifyCFG(BB, TTI, TD) | true; + return SimplifyCFG(BB, TTI, DL) | true; // If the block only contains the switch, see if we can fold the block // away into any preds. @@ -3837,22 +3944,22 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { ++BBI; if (SI == &*BBI) if (FoldValueComparisonIntoPredecessors(SI, Builder)) - return SimplifyCFG(BB, TTI, TD) | true; + return SimplifyCFG(BB, TTI, DL) | true; } // Try to transform the switch into an icmp and a branch. if (TurnSwitchRangeIntoICmp(SI, Builder)) - return SimplifyCFG(BB, TTI, TD) | true; + return SimplifyCFG(BB, TTI, DL) | true; // Remove unreachable cases. if (EliminateDeadSwitchCases(SI)) - return SimplifyCFG(BB, TTI, TD) | true; + return SimplifyCFG(BB, TTI, DL) | true; if (ForwardSwitchConditionToPHI(SI)) - return SimplifyCFG(BB, TTI, TD) | true; + return SimplifyCFG(BB, TTI, DL) | true; - if (SwitchToLookupTable(SI, Builder, TTI, TD)) - return SimplifyCFG(BB, TTI, TD) | true; + if (SwitchToLookupTable(SI, Builder, TTI, DL)) + return SimplifyCFG(BB, TTI, DL) | true; return false; } @@ -3889,7 +3996,7 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) { if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) { if (SimplifyIndirectBrOnSelect(IBI, SI)) - return SimplifyCFG(BB, TTI, TD) | true; + return SimplifyCFG(BB, TTI, DL) | true; } return Changed; } @@ -3913,7 +4020,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){ for (++I; isa<DbgInfoIntrinsic>(I); ++I) ; if (I->isTerminator() && - TryToSimplifyUncondBranchWithICmpInIt(ICI, Builder, TTI, TD)) + TryToSimplifyUncondBranchWithICmpInIt(ICI, Builder, TTI, DL)) return true; } @@ -3921,8 +4028,8 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){ // branches to us and our successor, fold the comparison into the // predecessor and use logical operations to update the incoming value // for PHI nodes in common successor. - if (FoldBranchToCommonDest(BI)) - return SimplifyCFG(BB, TTI, TD) | true; + if (FoldBranchToCommonDest(BI, DL)) + return SimplifyCFG(BB, TTI, DL) | true; return false; } @@ -3937,7 +4044,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { // switch. if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder)) - return SimplifyCFG(BB, TTI, TD) | true; + return SimplifyCFG(BB, TTI, DL) | true; // This block must be empty, except for the setcond inst, if it exists. // Ignore dbg intrinsics. @@ -3947,67 +4054,67 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { ++I; if (&*I == BI) { if (FoldValueComparisonIntoPredecessors(BI, Builder)) - return SimplifyCFG(BB, TTI, TD) | true; + return SimplifyCFG(BB, TTI, DL) | true; } else if (&*I == cast<Instruction>(BI->getCondition())){ ++I; // Ignore dbg intrinsics. while (isa<DbgInfoIntrinsic>(I)) ++I; if (&*I == BI && FoldValueComparisonIntoPredecessors(BI, Builder)) - return SimplifyCFG(BB, TTI, TD) | true; + return SimplifyCFG(BB, TTI, DL) | true; } } // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction. - if (SimplifyBranchOnICmpChain(BI, TD, Builder)) + if (SimplifyBranchOnICmpChain(BI, DL, Builder)) return true; // If this basic block is ONLY a compare and a branch, and if a predecessor // branches to us and one of our successors, fold the comparison into the // predecessor and use logical operations to pick the right destination. - if (FoldBranchToCommonDest(BI)) - return SimplifyCFG(BB, TTI, TD) | true; + if (FoldBranchToCommonDest(BI, DL)) + return SimplifyCFG(BB, TTI, DL) | true; // We have a conditional branch to two blocks that are only reachable // from BI. We know that the condbr dominates the two blocks, so see if // there is any identical code in the "then" and "else" blocks. If so, we // can hoist it up to the branching block. - if (BI->getSuccessor(0)->getSinglePredecessor() != 0) { - if (BI->getSuccessor(1)->getSinglePredecessor() != 0) { - if (HoistThenElseCodeToIf(BI)) - return SimplifyCFG(BB, TTI, TD) | true; + if (BI->getSuccessor(0)->getSinglePredecessor()) { + if (BI->getSuccessor(1)->getSinglePredecessor()) { + if (HoistThenElseCodeToIf(BI, DL)) + return SimplifyCFG(BB, TTI, DL) | true; } else { // If Successor #1 has multiple preds, we may be able to conditionally - // execute Successor #0 if it branches to successor #1. + // execute Successor #0 if it branches to Successor #1. TerminatorInst *Succ0TI = BI->getSuccessor(0)->getTerminator(); if (Succ0TI->getNumSuccessors() == 1 && Succ0TI->getSuccessor(0) == BI->getSuccessor(1)) - if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0))) - return SimplifyCFG(BB, TTI, TD) | true; + if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0), DL)) + return SimplifyCFG(BB, TTI, DL) | true; } - } else if (BI->getSuccessor(1)->getSinglePredecessor() != 0) { + } else if (BI->getSuccessor(1)->getSinglePredecessor()) { // If Successor #0 has multiple preds, we may be able to conditionally - // execute Successor #1 if it branches to successor #0. + // execute Successor #1 if it branches to Successor #0. TerminatorInst *Succ1TI = BI->getSuccessor(1)->getTerminator(); if (Succ1TI->getNumSuccessors() == 1 && Succ1TI->getSuccessor(0) == BI->getSuccessor(0)) - if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1))) - return SimplifyCFG(BB, TTI, TD) | true; + if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1), DL)) + return SimplifyCFG(BB, TTI, DL) | true; } // If this is a branch on a phi node in the current block, thread control // through this block if any PHI node entries are constants. if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition())) if (PN->getParent() == BI->getParent()) - if (FoldCondBranchOnPHI(BI, TD)) - return SimplifyCFG(BB, TTI, TD) | true; + if (FoldCondBranchOnPHI(BI, DL)) + return SimplifyCFG(BB, TTI, DL) | true; // Scan predecessor blocks for conditional branches. for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator())) if (PBI != BI && PBI->isConditional()) if (SimplifyCondBranchToCondBranch(PBI, BI)) - return SimplifyCFG(BB, TTI, TD) | true; + return SimplifyCFG(BB, TTI, DL) | true; return false; } @@ -4023,7 +4130,7 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) { if (C->isNullValue()) { // Only look at the first use, avoid hurting compile time with long uselists - User *Use = *I->use_begin(); + User *Use = *I->user_begin(); // Now make sure that there are no instructions in between that can alter // control flow (eg. calls) @@ -4119,7 +4226,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { // eliminate it, do so now. if (PHINode *PN = dyn_cast<PHINode>(BB->begin())) if (PN->getNumIncomingValues() == 2) - Changed |= FoldTwoEntryPHINode(PN, TD); + Changed |= FoldTwoEntryPHINode(PN, DL); Builder.SetInsertPoint(BB->getTerminator()); if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) { @@ -4151,6 +4258,6 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { /// of the CFG. It returns true if a modification was made. /// bool llvm::SimplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, - const DataLayout *TD) { - return SimplifyCFGOpt(TTI, TD).run(BB); + const DataLayout *DL) { + return SimplifyCFGOpt(TTI, DL).run(BB); } diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp index bf3442a..b284e6f 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -13,9 +13,8 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "indvars" - #include "llvm/Transforms/Utils/SimplifyIndVar.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/IVUsers.h" @@ -23,13 +22,18 @@ #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "indvars" + STATISTIC(NumElimIdentity, "Number of IV identities eliminated"); STATISTIC(NumElimOperand, "Number of IV operands folded into a use"); STATISTIC(NumElimRem , "Number of IV remainder operations eliminated"); @@ -44,7 +48,7 @@ namespace { Loop *L; LoopInfo *LI; ScalarEvolution *SE; - const DataLayout *TD; // May be NULL + const DataLayout *DL; // May be NULL SmallVectorImpl<WeakVH> &DeadInsts; @@ -52,13 +56,14 @@ namespace { public: SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, LPPassManager *LPM, - SmallVectorImpl<WeakVH> &Dead, IVUsers *IVU = NULL) : + SmallVectorImpl<WeakVH> &Dead, IVUsers *IVU = nullptr) : L(Loop), LI(LPM->getAnalysisIfAvailable<LoopInfo>()), SE(SE), - TD(LPM->getAnalysisIfAvailable<DataLayout>()), DeadInsts(Dead), Changed(false) { + DataLayoutPass *DLP = LPM->getAnalysisIfAvailable<DataLayoutPass>(); + DL = DLP ? &DLP->getDataLayout() : nullptr; assert(LI && "IV simplification requires LoopInfo"); } @@ -67,7 +72,7 @@ namespace { /// Iteratively perform simplification on a worklist of users of the /// specified induction variable. This is the top-level driver that applies /// all simplicitions to users of an IV. - void simplifyUsers(PHINode *CurrIV, IVVisitor *V = NULL); + void simplifyUsers(PHINode *CurrIV, IVVisitor *V = nullptr); Value *foldIVUser(Instruction *UseInst, Instruction *IVOperand); @@ -75,6 +80,9 @@ namespace { void eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand); void eliminateIVRemainder(BinaryOperator *Rem, Value *IVOperand, bool IsSigned); + + Instruction *splitOverflowIntrinsic(Instruction *IVUser, + const DominatorTree *DT); }; } @@ -87,25 +95,25 @@ namespace { /// be folded (in case more folding opportunities have been exposed). /// Otherwise return null. Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand) { - Value *IVSrc = 0; + Value *IVSrc = nullptr; unsigned OperIdx = 0; - const SCEV *FoldedExpr = 0; + const SCEV *FoldedExpr = nullptr; switch (UseInst->getOpcode()) { default: - return 0; + return nullptr; case Instruction::UDiv: case Instruction::LShr: // We're only interested in the case where we know something about // the numerator and have a constant denominator. if (IVOperand != UseInst->getOperand(OperIdx) || !isa<ConstantInt>(UseInst->getOperand(1))) - return 0; + return nullptr; // Attempt to fold a binary operator with constant operand. // e.g. ((I + 1) >> 2) => I >> 2 if (!isa<BinaryOperator>(IVOperand) || !isa<ConstantInt>(IVOperand->getOperand(1))) - return 0; + return nullptr; IVSrc = IVOperand->getOperand(0); // IVSrc must be the (SCEVable) IV, since the other operand is const. @@ -116,7 +124,7 @@ Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand) // Get a constant for the divisor. See createSCEV. uint32_t BitWidth = cast<IntegerType>(UseInst->getType())->getBitWidth(); if (D->getValue().uge(BitWidth)) - return 0; + return nullptr; D = ConstantInt::get(UseInst->getContext(), APInt::getOneBitSet(BitWidth, D->getZExtValue())); @@ -125,11 +133,11 @@ Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand) } // We have something that might fold it's operand. Compare SCEVs. if (!SE->isSCEVable(UseInst->getType())) - return 0; + return nullptr; // Bypass the operand if SCEV can prove it has no effect. if (SE->getSCEV(UseInst) != FoldedExpr) - return 0; + return nullptr; DEBUG(dbgs() << "INDVARS: Eliminated IV operand: " << *IVOperand << " -> " << *UseInst << '\n'); @@ -263,6 +271,69 @@ bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst, return true; } +/// \brief Split sadd.with.overflow into add + sadd.with.overflow to allow +/// analysis and optimization. +/// +/// \return A new value representing the non-overflowing add if possible, +/// otherwise return the original value. +Instruction *SimplifyIndvar::splitOverflowIntrinsic(Instruction *IVUser, + const DominatorTree *DT) { + IntrinsicInst *II = dyn_cast<IntrinsicInst>(IVUser); + if (!II || II->getIntrinsicID() != Intrinsic::sadd_with_overflow) + return IVUser; + + // Find a branch guarded by the overflow check. + BranchInst *Branch = nullptr; + Instruction *AddVal = nullptr; + for (User *U : II->users()) { + if (ExtractValueInst *ExtractInst = dyn_cast<ExtractValueInst>(U)) { + if (ExtractInst->getNumIndices() != 1) + continue; + if (ExtractInst->getIndices()[0] == 0) + AddVal = ExtractInst; + else if (ExtractInst->getIndices()[0] == 1 && ExtractInst->hasOneUse()) + Branch = dyn_cast<BranchInst>(ExtractInst->user_back()); + } + } + if (!AddVal || !Branch) + return IVUser; + + BasicBlock *ContinueBB = Branch->getSuccessor(1); + if (std::next(pred_begin(ContinueBB)) != pred_end(ContinueBB)) + return IVUser; + + // Check if all users of the add are provably NSW. + bool AllNSW = true; + for (Use &U : AddVal->uses()) { + if (Instruction *UseInst = dyn_cast<Instruction>(U.getUser())) { + BasicBlock *UseBB = UseInst->getParent(); + if (PHINode *PHI = dyn_cast<PHINode>(UseInst)) + UseBB = PHI->getIncomingBlock(U); + if (!DT->dominates(ContinueBB, UseBB)) { + AllNSW = false; + break; + } + } + } + if (!AllNSW) + return IVUser; + + // Go for it... + IRBuilder<> Builder(IVUser); + Instruction *AddInst = dyn_cast<Instruction>( + Builder.CreateNSWAdd(II->getOperand(0), II->getOperand(1))); + + // The caller expects the new add to have the same form as the intrinsic. The + // IV operand position must be the same. + assert((AddInst->getOpcode() == Instruction::Add && + AddInst->getOperand(0) == II->getOperand(0)) && + "Bad add instruction created from overflow intrinsic."); + + AddVal->replaceAllUsesWith(AddInst); + DeadInsts.push_back(AddVal); + return AddInst; +} + /// pushIVUsers - Add all uses of Def to the current IV's worklist. /// static void pushIVUsers( @@ -270,16 +341,15 @@ static void pushIVUsers( SmallPtrSet<Instruction*,16> &Simplified, SmallVectorImpl< std::pair<Instruction*,Instruction*> > &SimpleIVUsers) { - for (Value::use_iterator UI = Def->use_begin(), E = Def->use_end(); - UI != E; ++UI) { - Instruction *User = cast<Instruction>(*UI); + for (User *U : Def->users()) { + Instruction *UI = cast<Instruction>(U); // Avoid infinite or exponential worklist processing. // Also ensure unique worklist users. // If Def is a LoopPhi, it may not be in the Simplified set, so check for // self edges first. - if (User != Def && Simplified.insert(User)) - SimpleIVUsers.push_back(std::make_pair(User, Def)); + if (UI != Def && Simplified.insert(UI)) + SimpleIVUsers.push_back(std::make_pair(UI, Def)); } } @@ -334,8 +404,16 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) { while (!SimpleIVUsers.empty()) { std::pair<Instruction*, Instruction*> UseOper = SimpleIVUsers.pop_back_val(); + Instruction *UseInst = UseOper.first; + // Bypass back edges to avoid extra work. - if (UseOper.first == CurrIV) continue; + if (UseInst == CurrIV) continue; + + if (V && V->shouldSplitOverflowInstrinsics()) { + UseInst = splitOverflowIntrinsic(UseInst, V->getDomTree()); + if (!UseInst) + continue; + } Instruction *IVOperand = UseOper.second; for (unsigned N = 0; IVOperand; ++N) { diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp index f9687e4d..33b3637 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp @@ -14,14 +14,13 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "instsimplify" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/Type.h" #include "llvm/Pass.h" @@ -29,6 +28,8 @@ #include "llvm/Transforms/Utils/Local.h" using namespace llvm; +#define DEBUG_TYPE "instsimplify" + STATISTIC(NumSimplified, "Number of redundant instructions removed"); namespace { @@ -38,23 +39,27 @@ namespace { initializeInstSimplifierPass(*PassRegistry::getPassRegistry()); } - void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); AU.addRequired<TargetLibraryInfo>(); } /// runOnFunction - Remove instructions that simplify. - bool runOnFunction(Function &F) { - const DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>(); - const DataLayout *TD = getAnalysisIfAvailable<DataLayout>(); + bool runOnFunction(Function &F) override { + const DominatorTreeWrapperPass *DTWP = + getAnalysisIfAvailable<DominatorTreeWrapperPass>(); + const DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; + DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); + const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr; const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>(); SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2; bool Changed = false; do { - for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()), - DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) - for (BasicBlock::iterator BI = DI->begin(), BE = DI->end(); BI != BE;) { + for (BasicBlock *BB : depth_first(&F.getEntryBlock())) + // Here be subtlety: the iterator must be incremented before the loop + // body (not sure why), so a range-for loop won't work here. + for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { Instruction *I = BI++; // The first time through the loop ToSimplify is empty and we try to // simplify all instructions. On later iterations ToSimplify is not @@ -63,16 +68,23 @@ namespace { continue; // Don't waste time simplifying unused instructions. if (!I->use_empty()) - if (Value *V = SimplifyInstruction(I, TD, TLI, DT)) { + if (Value *V = SimplifyInstruction(I, DL, TLI, DT)) { // Mark all uses for resimplification next time round the loop. - for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); - UI != UE; ++UI) - Next->insert(cast<Instruction>(*UI)); + for (User *U : I->users()) + Next->insert(cast<Instruction>(U)); I->replaceAllUsesWith(V); ++NumSimplified; Changed = true; } - Changed |= RecursivelyDeleteTriviallyDeadInstructions(I, TLI); + bool res = RecursivelyDeleteTriviallyDeadInstructions(I, TLI); + if (res) { + // RecursivelyDeleteTriviallyDeadInstruction can remove + // more than one instruction, so simply incrementing the + // iterator does not work. When instructions get deleted + // re-iterate instead. + BI = BB->begin(); BE = BB->end(); + Changed |= res; + } } // Place the list of instructions to simplify on the next loop iteration diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index 15b3e66..3b61bb5 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/Triple.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicInst.h" @@ -43,7 +44,7 @@ namespace { class LibCallOptimization { protected: Function *Caller; - const DataLayout *TD; + const DataLayout *DL; const TargetLibraryInfo *TLI; const LibCallSimplifier *LCS; LLVMContext* Context; @@ -63,11 +64,11 @@ public: /// change the calling convention. virtual bool ignoreCallingConv() { return false; } - Value *optimizeCall(CallInst *CI, const DataLayout *TD, + Value *optimizeCall(CallInst *CI, const DataLayout *DL, const TargetLibraryInfo *TLI, const LibCallSimplifier *LCS, IRBuilder<> &B) { Caller = CI->getParent()->getParent(); - this->TD = TD; + this->DL = DL; this->TLI = TLI; this->LCS = LCS; if (CI->getCalledFunction()) @@ -75,7 +76,7 @@ public: // We never change the calling convention. if (!ignoreCallingConv() && CI->getCallingConv() != llvm::CallingConv::C) - return NULL; + return nullptr; return callOptimizer(CI->getCalledFunction(), CI, B); } @@ -88,9 +89,8 @@ public: /// isOnlyUsedInZeroEqualityComparison - Return true if it only matters that the /// value is equal or not-equal to zero. static bool isOnlyUsedInZeroEqualityComparison(Value *V) { - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); - UI != E; ++UI) { - if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI)) + for (User *U : V->users()) { + if (ICmpInst *IC = dyn_cast<ICmpInst>(U)) if (IC->isEquality()) if (Constant *C = dyn_cast<Constant>(IC->getOperand(1))) if (C->isNullValue()) @@ -104,9 +104,8 @@ static bool isOnlyUsedInZeroEqualityComparison(Value *V) { /// isOnlyUsedInEqualityComparison - Return true if it is only used in equality /// comparisons with With. static bool isOnlyUsedInEqualityComparison(Value *V, Value *With) { - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); - UI != E; ++UI) { - if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI)) + for (User *U : V->users()) { + if (ICmpInst *IC = dyn_cast<ICmpInst>(U)) if (IC->isEquality() && IC->getOperand(1) == With) continue; // Unknown instruction. @@ -152,7 +151,8 @@ protected: struct InstFortifiedLibCallOptimization : public FortifiedLibCallOptimization { CallInst *CI; - bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp, bool isString) const { + bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp, + bool isString) const override { if (CI->getArgOperand(SizeCIOp) == CI->getArgOperand(SizeArgOp)) return true; if (ConstantInt *SizeCI = @@ -175,7 +175,8 @@ struct InstFortifiedLibCallOptimization : public FortifiedLibCallOptimization { }; struct MemCpyChkOpt : public InstFortifiedLibCallOptimization { - virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *callOptimizer(Function *Callee, CallInst *CI, + IRBuilder<> &B) override { this->CI = CI; FunctionType *FT = Callee->getFunctionType(); LLVMContext &Context = CI->getParent()->getContext(); @@ -184,21 +185,22 @@ struct MemCpyChkOpt : public InstFortifiedLibCallOptimization { if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isPointerTy() || - FT->getParamType(2) != TD->getIntPtrType(Context) || - FT->getParamType(3) != TD->getIntPtrType(Context)) - return 0; + FT->getParamType(2) != DL->getIntPtrType(Context) || + FT->getParamType(3) != DL->getIntPtrType(Context)) + return nullptr; if (isFoldable(3, 2, false)) { B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), 1); return CI->getArgOperand(0); } - return 0; + return nullptr; } }; struct MemMoveChkOpt : public InstFortifiedLibCallOptimization { - virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *callOptimizer(Function *Callee, CallInst *CI, + IRBuilder<> &B) override { this->CI = CI; FunctionType *FT = Callee->getFunctionType(); LLVMContext &Context = CI->getParent()->getContext(); @@ -207,21 +209,22 @@ struct MemMoveChkOpt : public InstFortifiedLibCallOptimization { if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isPointerTy() || - FT->getParamType(2) != TD->getIntPtrType(Context) || - FT->getParamType(3) != TD->getIntPtrType(Context)) - return 0; + FT->getParamType(2) != DL->getIntPtrType(Context) || + FT->getParamType(3) != DL->getIntPtrType(Context)) + return nullptr; if (isFoldable(3, 2, false)) { B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), 1); return CI->getArgOperand(0); } - return 0; + return nullptr; } }; struct MemSetChkOpt : public InstFortifiedLibCallOptimization { - virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *callOptimizer(Function *Callee, CallInst *CI, + IRBuilder<> &B) override { this->CI = CI; FunctionType *FT = Callee->getFunctionType(); LLVMContext &Context = CI->getParent()->getContext(); @@ -230,9 +233,9 @@ struct MemSetChkOpt : public InstFortifiedLibCallOptimization { if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isIntegerTy() || - FT->getParamType(2) != TD->getIntPtrType(Context) || - FT->getParamType(3) != TD->getIntPtrType(Context)) - return 0; + FT->getParamType(2) != DL->getIntPtrType(Context) || + FT->getParamType(3) != DL->getIntPtrType(Context)) + return nullptr; if (isFoldable(3, 2, false)) { Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), @@ -240,12 +243,13 @@ struct MemSetChkOpt : public InstFortifiedLibCallOptimization { B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1); return CI->getArgOperand(0); } - return 0; + return nullptr; } }; struct StrCpyChkOpt : public InstFortifiedLibCallOptimization { - virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *callOptimizer(Function *Callee, CallInst *CI, + IRBuilder<> &B) override { this->CI = CI; StringRef Name = Callee->getName(); FunctionType *FT = Callee->getFunctionType(); @@ -256,8 +260,8 @@ struct StrCpyChkOpt : public InstFortifiedLibCallOptimization { FT->getReturnType() != FT->getParamType(0) || FT->getParamType(0) != FT->getParamType(1) || FT->getParamType(0) != Type::getInt8PtrTy(Context) || - FT->getParamType(2) != TD->getIntPtrType(Context)) - return 0; + FT->getParamType(2) != DL->getIntPtrType(Context)) + return nullptr; Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); if (Dst == Src) // __strcpy_chk(x,x) -> x @@ -269,28 +273,29 @@ struct StrCpyChkOpt : public InstFortifiedLibCallOptimization { // TODO: It might be nice to get a maximum length out of the possible // string lengths for varying. if (isFoldable(2, 1, true)) { - Value *Ret = EmitStrCpy(Dst, Src, B, TD, TLI, Name.substr(2, 6)); + Value *Ret = EmitStrCpy(Dst, Src, B, DL, TLI, Name.substr(2, 6)); return Ret; } else { // Maybe we can stil fold __strcpy_chk to __memcpy_chk. uint64_t Len = GetStringLength(Src); - if (Len == 0) return 0; + if (Len == 0) return nullptr; // This optimization require DataLayout. - if (!TD) return 0; + if (!DL) return nullptr; Value *Ret = EmitMemCpyChk(Dst, Src, - ConstantInt::get(TD->getIntPtrType(Context), Len), - CI->getArgOperand(2), B, TD, TLI); + ConstantInt::get(DL->getIntPtrType(Context), Len), + CI->getArgOperand(2), B, DL, TLI); return Ret; } - return 0; + return nullptr; } }; struct StpCpyChkOpt : public InstFortifiedLibCallOptimization { - virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *callOptimizer(Function *Callee, CallInst *CI, + IRBuilder<> &B) override { this->CI = CI; StringRef Name = Callee->getName(); FunctionType *FT = Callee->getFunctionType(); @@ -301,13 +306,13 @@ struct StpCpyChkOpt : public InstFortifiedLibCallOptimization { FT->getReturnType() != FT->getParamType(0) || FT->getParamType(0) != FT->getParamType(1) || FT->getParamType(0) != Type::getInt8PtrTy(Context) || - FT->getParamType(2) != TD->getIntPtrType(FT->getParamType(0))) - return 0; + FT->getParamType(2) != DL->getIntPtrType(FT->getParamType(0))) + return nullptr; Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x) - Value *StrLen = EmitStrLen(Src, B, TD, TLI); - return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : 0; + Value *StrLen = EmitStrLen(Src, B, DL, TLI); + return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : nullptr; } // If a) we don't have any length information, or b) we know this will @@ -316,31 +321,32 @@ struct StpCpyChkOpt : public InstFortifiedLibCallOptimization { // TODO: It might be nice to get a maximum length out of the possible // string lengths for varying. if (isFoldable(2, 1, true)) { - Value *Ret = EmitStrCpy(Dst, Src, B, TD, TLI, Name.substr(2, 6)); + Value *Ret = EmitStrCpy(Dst, Src, B, DL, TLI, Name.substr(2, 6)); return Ret; } else { // Maybe we can stil fold __stpcpy_chk to __memcpy_chk. uint64_t Len = GetStringLength(Src); - if (Len == 0) return 0; + if (Len == 0) return nullptr; // This optimization require DataLayout. - if (!TD) return 0; + if (!DL) return nullptr; Type *PT = FT->getParamType(0); - Value *LenV = ConstantInt::get(TD->getIntPtrType(PT), Len); + Value *LenV = ConstantInt::get(DL->getIntPtrType(PT), Len); Value *DstEnd = B.CreateGEP(Dst, - ConstantInt::get(TD->getIntPtrType(PT), + ConstantInt::get(DL->getIntPtrType(PT), Len - 1)); - if (!EmitMemCpyChk(Dst, Src, LenV, CI->getArgOperand(2), B, TD, TLI)) - return 0; + if (!EmitMemCpyChk(Dst, Src, LenV, CI->getArgOperand(2), B, DL, TLI)) + return nullptr; return DstEnd; } - return 0; + return nullptr; } }; struct StrNCpyChkOpt : public InstFortifiedLibCallOptimization { - virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *callOptimizer(Function *Callee, CallInst *CI, + IRBuilder<> &B) override { this->CI = CI; StringRef Name = Callee->getName(); FunctionType *FT = Callee->getFunctionType(); @@ -351,16 +357,16 @@ struct StrNCpyChkOpt : public InstFortifiedLibCallOptimization { FT->getParamType(0) != FT->getParamType(1) || FT->getParamType(0) != Type::getInt8PtrTy(Context) || !FT->getParamType(2)->isIntegerTy() || - FT->getParamType(3) != TD->getIntPtrType(Context)) - return 0; + FT->getParamType(3) != DL->getIntPtrType(Context)) + return nullptr; if (isFoldable(3, 2, false)) { Value *Ret = EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), B, TD, TLI, + CI->getArgOperand(2), B, DL, TLI, Name.substr(2, 7)); return Ret; } - return 0; + return nullptr; } }; @@ -369,14 +375,15 @@ struct StrNCpyChkOpt : public InstFortifiedLibCallOptimization { //===----------------------------------------------------------------------===// struct StrCatOpt : public LibCallOptimization { - virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *callOptimizer(Function *Callee, CallInst *CI, + IRBuilder<> &B) override { // Verify the "strcat" function prototype. FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || FT->getReturnType() != B.getInt8PtrTy() || FT->getParamType(0) != FT->getReturnType() || FT->getParamType(1) != FT->getReturnType()) - return 0; + return nullptr; // Extract some information from the instruction Value *Dst = CI->getArgOperand(0); @@ -384,7 +391,7 @@ struct StrCatOpt : public LibCallOptimization { // See if we can get the length of the input string. uint64_t Len = GetStringLength(Src); - if (Len == 0) return 0; + if (Len == 0) return nullptr; --Len; // Unbias length. // Handle the simple, do-nothing case: strcat(x, "") -> x @@ -392,7 +399,7 @@ struct StrCatOpt : public LibCallOptimization { return Dst; // These optimizations require DataLayout. - if (!TD) return 0; + if (!DL) return nullptr; return emitStrLenMemCpy(Src, Dst, Len, B); } @@ -401,9 +408,9 @@ struct StrCatOpt : public LibCallOptimization { IRBuilder<> &B) { // We need to find the end of the destination string. That's where the // memory is to be moved to. We just generate a call to strlen. - Value *DstLen = EmitStrLen(Dst, B, TD, TLI); + Value *DstLen = EmitStrLen(Dst, B, DL, TLI); if (!DstLen) - return 0; + return nullptr; // Now that we have the destination's length, we must index into the // destination's pointer to get the actual memcpy destination (end of @@ -413,13 +420,14 @@ struct StrCatOpt : public LibCallOptimization { // We have enough information to now generate the memcpy call to do the // concatenation for us. Make a memcpy to copy the nul byte with align = 1. B.CreateMemCpy(CpyDst, Src, - ConstantInt::get(TD->getIntPtrType(*Context), Len + 1), 1); + ConstantInt::get(DL->getIntPtrType(*Context), Len + 1), 1); return Dst; } }; struct StrNCatOpt : public StrCatOpt { - virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *callOptimizer(Function *Callee, CallInst *CI, + IRBuilder<> &B) override { // Verify the "strncat" function prototype. FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || @@ -427,7 +435,7 @@ struct StrNCatOpt : public StrCatOpt { FT->getParamType(0) != FT->getReturnType() || FT->getParamType(1) != FT->getReturnType() || !FT->getParamType(2)->isIntegerTy()) - return 0; + return nullptr; // Extract some information from the instruction Value *Dst = CI->getArgOperand(0); @@ -438,11 +446,11 @@ struct StrNCatOpt : public StrCatOpt { if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2))) Len = LengthArg->getZExtValue(); else - return 0; + return nullptr; // See if we can get the length of the input string. uint64_t SrcLen = GetStringLength(Src); - if (SrcLen == 0) return 0; + if (SrcLen == 0) return nullptr; --SrcLen; // Unbias length. // Handle the simple, do-nothing cases: @@ -451,10 +459,10 @@ struct StrNCatOpt : public StrCatOpt { if (SrcLen == 0 || Len == 0) return Dst; // These optimizations require DataLayout. - if (!TD) return 0; + if (!DL) return nullptr; // We don't optimize this case - if (Len < SrcLen) return 0; + if (Len < SrcLen) return nullptr; // strncat(x, s, c) -> strcat(x, s) // s is constant so the strcat can be optimized further @@ -463,38 +471,42 @@ struct StrNCatOpt : public StrCatOpt { }; struct StrChrOpt : public LibCallOptimization { - virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *callOptimizer(Function *Callee, CallInst *CI, + IRBuilder<> &B) override { // Verify the "strchr" function prototype. FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || FT->getReturnType() != B.getInt8PtrTy() || FT->getParamType(0) != FT->getReturnType() || !FT->getParamType(1)->isIntegerTy(32)) - return 0; + return nullptr; Value *SrcStr = CI->getArgOperand(0); // If the second operand is non-constant, see if we can compute the length // of the input string and turn this into memchr. ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); - if (CharC == 0) { + if (!CharC) { // These optimizations require DataLayout. - if (!TD) return 0; + if (!DL) return nullptr; uint64_t Len = GetStringLength(SrcStr); if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32))// memchr needs i32. - return 0; + return nullptr; return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul. - ConstantInt::get(TD->getIntPtrType(*Context), Len), - B, TD, TLI); + ConstantInt::get(DL->getIntPtrType(*Context), Len), + B, DL, TLI); } // Otherwise, the character is a constant, see if the first argument is // a string literal. If so, we can constant fold. StringRef Str; - if (!getConstantStringInfo(SrcStr, Str)) - return 0; + if (!getConstantStringInfo(SrcStr, Str)) { + if (DL && CharC->isZero()) // strchr(p, 0) -> p + strlen(p) + return B.CreateGEP(SrcStr, EmitStrLen(SrcStr, B, DL, TLI), "strchr"); + return nullptr; + } // Compute the offset, make sure to handle the case when we're searching for // zero (a weird way to spell strlen). @@ -509,28 +521,29 @@ struct StrChrOpt : public LibCallOptimization { }; struct StrRChrOpt : public LibCallOptimization { - virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *callOptimizer(Function *Callee, CallInst *CI, + IRBuilder<> &B) override { // Verify the "strrchr" function prototype. FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || FT->getReturnType() != B.getInt8PtrTy() || FT->getParamType(0) != FT->getReturnType() || !FT->getParamType(1)->isIntegerTy(32)) - return 0; + return nullptr; Value *SrcStr = CI->getArgOperand(0); ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); // Cannot fold anything if we're not looking for a constant. if (!CharC) - return 0; + return nullptr; StringRef Str; if (!getConstantStringInfo(SrcStr, Str)) { // strrchr(s, 0) -> strchr(s, 0) - if (TD && CharC->isZero()) - return EmitStrChr(SrcStr, '\0', B, TD, TLI); - return 0; + if (DL && CharC->isZero()) + return EmitStrChr(SrcStr, '\0', B, DL, TLI); + return nullptr; } // Compute the offset. @@ -545,14 +558,15 @@ struct StrRChrOpt : public LibCallOptimization { }; struct StrCmpOpt : public LibCallOptimization { - virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *callOptimizer(Function *Callee, CallInst *CI, + IRBuilder<> &B) override { // Verify the "strcmp" function prototype. FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || !FT->getReturnType()->isIntegerTy(32) || FT->getParamType(0) != FT->getParamType(1) || FT->getParamType(0) != B.getInt8PtrTy()) - return 0; + return nullptr; Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1); if (Str1P == Str2P) // strcmp(x,x) -> 0 @@ -578,19 +592,20 @@ struct StrCmpOpt : public LibCallOptimization { uint64_t Len2 = GetStringLength(Str2P); if (Len1 && Len2) { // These optimizations require DataLayout. - if (!TD) return 0; + if (!DL) return nullptr; return EmitMemCmp(Str1P, Str2P, - ConstantInt::get(TD->getIntPtrType(*Context), - std::min(Len1, Len2)), B, TD, TLI); + ConstantInt::get(DL->getIntPtrType(*Context), + std::min(Len1, Len2)), B, DL, TLI); } - return 0; + return nullptr; } }; struct StrNCmpOpt : public LibCallOptimization { - virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *callOptimizer(Function *Callee, CallInst *CI, + IRBuilder<> &B) override { // Verify the "strncmp" function prototype. FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || @@ -598,7 +613,7 @@ struct StrNCmpOpt : public LibCallOptimization { FT->getParamType(0) != FT->getParamType(1) || FT->getParamType(0) != B.getInt8PtrTy() || !FT->getParamType(2)->isIntegerTy()) - return 0; + return nullptr; Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1); if (Str1P == Str2P) // strncmp(x,x,n) -> 0 @@ -609,13 +624,13 @@ struct StrNCmpOpt : public LibCallOptimization { if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2))) Length = LengthArg->getZExtValue(); else - return 0; + return nullptr; if (Length == 0) // strncmp(x,y,0) -> 0 return ConstantInt::get(CI->getType(), 0); - if (TD && Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1) - return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, TD, TLI); + if (DL && Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1) + return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, DL, TLI); StringRef Str1, Str2; bool HasStr1 = getConstantStringInfo(Str1P, Str1); @@ -635,66 +650,68 @@ struct StrNCmpOpt : public LibCallOptimization { if (HasStr2 && Str2.empty()) // strncmp(x, "", n) -> *x return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType()); - return 0; + return nullptr; } }; struct StrCpyOpt : public LibCallOptimization { - virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *callOptimizer(Function *Callee, CallInst *CI, + IRBuilder<> &B) override { // Verify the "strcpy" function prototype. FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) || FT->getParamType(0) != FT->getParamType(1) || FT->getParamType(0) != B.getInt8PtrTy()) - return 0; + return nullptr; Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); if (Dst == Src) // strcpy(x,x) -> x return Src; // These optimizations require DataLayout. - if (!TD) return 0; + if (!DL) return nullptr; // See if we can get the length of the input string. uint64_t Len = GetStringLength(Src); - if (Len == 0) return 0; + if (Len == 0) return nullptr; // We have enough information to now generate the memcpy call to do the // copy for us. Make a memcpy to copy the nul byte with align = 1. B.CreateMemCpy(Dst, Src, - ConstantInt::get(TD->getIntPtrType(*Context), Len), 1); + ConstantInt::get(DL->getIntPtrType(*Context), Len), 1); return Dst; } }; struct StpCpyOpt: public LibCallOptimization { - virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *callOptimizer(Function *Callee, CallInst *CI, + IRBuilder<> &B) override { // Verify the "stpcpy" function prototype. FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) || FT->getParamType(0) != FT->getParamType(1) || FT->getParamType(0) != B.getInt8PtrTy()) - return 0; + return nullptr; // These optimizations require DataLayout. - if (!TD) return 0; + if (!DL) return nullptr; Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x) - Value *StrLen = EmitStrLen(Src, B, TD, TLI); - return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : 0; + Value *StrLen = EmitStrLen(Src, B, DL, TLI); + return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : nullptr; } // See if we can get the length of the input string. uint64_t Len = GetStringLength(Src); - if (Len == 0) return 0; + if (Len == 0) return nullptr; Type *PT = FT->getParamType(0); - Value *LenV = ConstantInt::get(TD->getIntPtrType(PT), Len); + Value *LenV = ConstantInt::get(DL->getIntPtrType(PT), Len); Value *DstEnd = B.CreateGEP(Dst, - ConstantInt::get(TD->getIntPtrType(PT), + ConstantInt::get(DL->getIntPtrType(PT), Len - 1)); // We have enough information to now generate the memcpy call to do the @@ -705,13 +722,14 @@ struct StpCpyOpt: public LibCallOptimization { }; struct StrNCpyOpt : public LibCallOptimization { - virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *callOptimizer(Function *Callee, CallInst *CI, + IRBuilder<> &B) override { FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || FT->getParamType(0) != FT->getParamType(1) || FT->getParamType(0) != B.getInt8PtrTy() || !FT->getParamType(2)->isIntegerTy()) - return 0; + return nullptr; Value *Dst = CI->getArgOperand(0); Value *Src = CI->getArgOperand(1); @@ -719,7 +737,7 @@ struct StrNCpyOpt : public LibCallOptimization { // See if we can get the length of the input string. uint64_t SrcLen = GetStringLength(Src); - if (SrcLen == 0) return 0; + if (SrcLen == 0) return nullptr; --SrcLen; if (SrcLen == 0) { @@ -732,33 +750,34 @@ struct StrNCpyOpt : public LibCallOptimization { if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(LenOp)) Len = LengthArg->getZExtValue(); else - return 0; + return nullptr; if (Len == 0) return Dst; // strncpy(x, y, 0) -> x // These optimizations require DataLayout. - if (!TD) return 0; + if (!DL) return nullptr; // Let strncpy handle the zero padding - if (Len > SrcLen+1) return 0; + if (Len > SrcLen+1) return nullptr; Type *PT = FT->getParamType(0); // strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant] B.CreateMemCpy(Dst, Src, - ConstantInt::get(TD->getIntPtrType(PT), Len), 1); + ConstantInt::get(DL->getIntPtrType(PT), Len), 1); return Dst; } }; struct StrLenOpt : public LibCallOptimization { - virtual bool ignoreCallingConv() { return true; } - virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + bool ignoreCallingConv() override { return true; } + Value *callOptimizer(Function *Callee, CallInst *CI, + IRBuilder<> &B) override { FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 1 || FT->getParamType(0) != B.getInt8PtrTy() || !FT->getReturnType()->isIntegerTy()) - return 0; + return nullptr; Value *Src = CI->getArgOperand(0); @@ -766,22 +785,38 @@ struct StrLenOpt : public LibCallOptimization { if (uint64_t Len = GetStringLength(Src)) return ConstantInt::get(CI->getType(), Len-1); + // strlen(x?"foo":"bars") --> x ? 3 : 4 + if (SelectInst *SI = dyn_cast<SelectInst>(Src)) { + uint64_t LenTrue = GetStringLength(SI->getTrueValue()); + uint64_t LenFalse = GetStringLength(SI->getFalseValue()); + if (LenTrue && LenFalse) { + emitOptimizationRemark(*Context, "simplify-libcalls", *Caller, + SI->getDebugLoc(), + "folded strlen(select) to select of constants"); + return B.CreateSelect(SI->getCondition(), + ConstantInt::get(CI->getType(), LenTrue-1), + ConstantInt::get(CI->getType(), LenFalse-1)); + } + } + // strlen(x) != 0 --> *x != 0 // strlen(x) == 0 --> *x == 0 if (isOnlyUsedInZeroEqualityComparison(CI)) return B.CreateZExt(B.CreateLoad(Src, "strlenfirst"), CI->getType()); - return 0; + + return nullptr; } }; struct StrPBrkOpt : public LibCallOptimization { - virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *callOptimizer(Function *Callee, CallInst *CI, + IRBuilder<> &B) override { FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || FT->getParamType(0) != B.getInt8PtrTy() || FT->getParamType(1) != FT->getParamType(0) || FT->getReturnType() != FT->getParamType(0)) - return 0; + return nullptr; StringRef S1, S2; bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1); @@ -802,20 +837,21 @@ struct StrPBrkOpt : public LibCallOptimization { } // strpbrk(s, "a") -> strchr(s, 'a') - if (TD && HasS2 && S2.size() == 1) - return EmitStrChr(CI->getArgOperand(0), S2[0], B, TD, TLI); + if (DL && HasS2 && S2.size() == 1) + return EmitStrChr(CI->getArgOperand(0), S2[0], B, DL, TLI); - return 0; + return nullptr; } }; struct StrToOpt : public LibCallOptimization { - virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *callOptimizer(Function *Callee, CallInst *CI, + IRBuilder<> &B) override { FunctionType *FT = Callee->getFunctionType(); if ((FT->getNumParams() != 2 && FT->getNumParams() != 3) || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isPointerTy()) - return 0; + return nullptr; Value *EndPtr = CI->getArgOperand(1); if (isa<ConstantPointerNull>(EndPtr)) { @@ -824,18 +860,19 @@ struct StrToOpt : public LibCallOptimization { CI->addAttribute(1, Attribute::NoCapture); } - return 0; + return nullptr; } }; struct StrSpnOpt : public LibCallOptimization { - virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *callOptimizer(Function *Callee, CallInst *CI, + IRBuilder<> &B) override { FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || FT->getParamType(0) != B.getInt8PtrTy() || FT->getParamType(1) != FT->getParamType(0) || !FT->getReturnType()->isIntegerTy()) - return 0; + return nullptr; StringRef S1, S2; bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1); @@ -853,18 +890,19 @@ struct StrSpnOpt : public LibCallOptimization { return ConstantInt::get(CI->getType(), Pos); } - return 0; + return nullptr; } }; struct StrCSpnOpt : public LibCallOptimization { - virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *callOptimizer(Function *Callee, CallInst *CI, + IRBuilder<> &B) override { FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || FT->getParamType(0) != B.getInt8PtrTy() || FT->getParamType(1) != FT->getParamType(0) || !FT->getReturnType()->isIntegerTy()) - return 0; + return nullptr; StringRef S1, S2; bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1); @@ -882,37 +920,37 @@ struct StrCSpnOpt : public LibCallOptimization { } // strcspn(s, "") -> strlen(s) - if (TD && HasS2 && S2.empty()) - return EmitStrLen(CI->getArgOperand(0), B, TD, TLI); + if (DL && HasS2 && S2.empty()) + return EmitStrLen(CI->getArgOperand(0), B, DL, TLI); - return 0; + return nullptr; } }; struct StrStrOpt : public LibCallOptimization { - virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *callOptimizer(Function *Callee, CallInst *CI, + IRBuilder<> &B) override { FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isPointerTy() || !FT->getReturnType()->isPointerTy()) - return 0; + return nullptr; // fold strstr(x, x) -> x. if (CI->getArgOperand(0) == CI->getArgOperand(1)) return B.CreateBitCast(CI->getArgOperand(0), CI->getType()); // fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0 - if (TD && isOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) { - Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, TD, TLI); + if (DL && isOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) { + Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, DL, TLI); if (!StrLen) - return 0; + return nullptr; Value *StrNCmp = EmitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1), - StrLen, B, TD, TLI); + StrLen, B, DL, TLI); if (!StrNCmp) - return 0; - for (Value::use_iterator UI = CI->use_begin(), UE = CI->use_end(); - UI != UE; ) { + return nullptr; + for (auto UI = CI->user_begin(), UE = CI->user_end(); UI != UE;) { ICmpInst *Old = cast<ICmpInst>(*UI++); Value *Cmp = B.CreateICmp(Old->getPredicate(), StrNCmp, ConstantInt::getNullValue(StrNCmp->getType()), @@ -946,20 +984,21 @@ struct StrStrOpt : public LibCallOptimization { // fold strstr(x, "y") -> strchr(x, 'y'). if (HasStr2 && ToFindStr.size() == 1) { - Value *StrChr= EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TD, TLI); - return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : 0; + Value *StrChr= EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, DL, TLI); + return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : nullptr; } - return 0; + return nullptr; } }; struct MemCmpOpt : public LibCallOptimization { - virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *callOptimizer(Function *Callee, CallInst *CI, + IRBuilder<> &B) override { FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isPointerTy() || !FT->getReturnType()->isIntegerTy(32)) - return 0; + return nullptr; Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1); @@ -968,7 +1007,7 @@ struct MemCmpOpt : public LibCallOptimization { // Make sure we have a constant length. ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2)); - if (!LenC) return 0; + if (!LenC) return nullptr; uint64_t Len = LenC->getZExtValue(); if (Len == 0) // memcmp(s1,s2,0) -> 0 @@ -989,7 +1028,7 @@ struct MemCmpOpt : public LibCallOptimization { getConstantStringInfo(RHS, RHSStr)) { // Make sure we're not reading out-of-bounds memory. if (Len > LHSStr.size() || Len > RHSStr.size()) - return 0; + return nullptr; // Fold the memcmp and normalize the result. This way we get consistent // results across multiple platforms. uint64_t Ret = 0; @@ -1001,21 +1040,22 @@ struct MemCmpOpt : public LibCallOptimization { return ConstantInt::get(CI->getType(), Ret); } - return 0; + return nullptr; } }; struct MemCpyOpt : public LibCallOptimization { - virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *callOptimizer(Function *Callee, CallInst *CI, + IRBuilder<> &B) override { // These optimizations require DataLayout. - if (!TD) return 0; + if (!DL) return nullptr; FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isPointerTy() || - FT->getParamType(2) != TD->getIntPtrType(*Context)) - return 0; + FT->getParamType(2) != DL->getIntPtrType(*Context)) + return nullptr; // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1) B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), @@ -1025,16 +1065,17 @@ struct MemCpyOpt : public LibCallOptimization { }; struct MemMoveOpt : public LibCallOptimization { - virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *callOptimizer(Function *Callee, CallInst *CI, + IRBuilder<> &B) override { // These optimizations require DataLayout. - if (!TD) return 0; + if (!DL) return nullptr; FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isPointerTy() || - FT->getParamType(2) != TD->getIntPtrType(*Context)) - return 0; + FT->getParamType(2) != DL->getIntPtrType(*Context)) + return nullptr; // memmove(x, y, n) -> llvm.memmove(x, y, n, 1) B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1), @@ -1044,16 +1085,17 @@ struct MemMoveOpt : public LibCallOptimization { }; struct MemSetOpt : public LibCallOptimization { - virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *callOptimizer(Function *Callee, CallInst *CI, + IRBuilder<> &B) override { // These optimizations require DataLayout. - if (!TD) return 0; + if (!DL) return nullptr; FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isIntegerTy() || - FT->getParamType(2) != TD->getIntPtrType(FT->getParamType(0))) - return 0; + FT->getParamType(2) != DL->getIntPtrType(FT->getParamType(0))) + return nullptr; // memset(p, v, n) -> llvm.memset(p, v, n, 1) Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false); @@ -1072,26 +1114,26 @@ struct MemSetOpt : public LibCallOptimization { struct UnaryDoubleFPOpt : public LibCallOptimization { bool CheckRetType; UnaryDoubleFPOpt(bool CheckReturnType): CheckRetType(CheckReturnType) {} - virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *callOptimizer(Function *Callee, CallInst *CI, + IRBuilder<> &B) override { FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() || !FT->getParamType(0)->isDoubleTy()) - return 0; + return nullptr; if (CheckRetType) { // Check if all the uses for function like 'sin' are converted to float. - for (Value::use_iterator UseI = CI->use_begin(); UseI != CI->use_end(); - ++UseI) { - FPTruncInst *Cast = dyn_cast<FPTruncInst>(*UseI); - if (Cast == 0 || !Cast->getType()->isFloatTy()) - return 0; + for (User *U : CI->users()) { + FPTruncInst *Cast = dyn_cast<FPTruncInst>(U); + if (!Cast || !Cast->getType()->isFloatTy()) + return nullptr; } } // If this is something like 'floor((double)floatval)', convert to floorf. FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getArgOperand(0)); - if (Cast == 0 || !Cast->getOperand(0)->getType()->isFloatTy()) - return 0; + if (!Cast || !Cast->getOperand(0)->getType()->isFloatTy()) + return nullptr; // floor((double)floatval) -> (double)floorf(floatval) Value *V = Cast->getOperand(0); @@ -1100,6 +1142,49 @@ struct UnaryDoubleFPOpt : public LibCallOptimization { } }; +// Double -> Float Shrinking Optimizations for Binary Functions like 'fmin/fmax' +struct BinaryDoubleFPOpt : public LibCallOptimization { + bool CheckRetType; + BinaryDoubleFPOpt(bool CheckReturnType): CheckRetType(CheckReturnType) {} + Value *callOptimizer(Function *Callee, CallInst *CI, + IRBuilder<> &B) override { + FunctionType *FT = Callee->getFunctionType(); + // Just make sure this has 2 arguments of the same FP type, which match the + // result type. + if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) || + FT->getParamType(0) != FT->getParamType(1) || + !FT->getParamType(0)->isFloatingPointTy()) + return nullptr; + + if (CheckRetType) { + // Check if all the uses for function like 'fmin/fmax' are converted to + // float. + for (User *U : CI->users()) { + FPTruncInst *Cast = dyn_cast<FPTruncInst>(U); + if (!Cast || !Cast->getType()->isFloatTy()) + return nullptr; + } + } + + // If this is something like 'fmin((double)floatval1, (double)floatval2)', + // we convert it to fminf. + FPExtInst *Cast1 = dyn_cast<FPExtInst>(CI->getArgOperand(0)); + FPExtInst *Cast2 = dyn_cast<FPExtInst>(CI->getArgOperand(1)); + if (!Cast1 || !Cast1->getOperand(0)->getType()->isFloatTy() || + !Cast2 || !Cast2->getOperand(0)->getType()->isFloatTy()) + return nullptr; + + // fmin((double)floatval1, (double)floatval2) + // -> (double)fmin(floatval1, floatval2) + Value *V = nullptr; + Value *V1 = Cast1->getOperand(0); + Value *V2 = Cast2->getOperand(0); + V = EmitBinaryFloatFnCall(V1, V2, Callee->getName(), B, + Callee->getAttributes()); + return B.CreateFPExt(V, B.getDoubleTy()); + } +}; + struct UnsafeFPLibCallOptimization : public LibCallOptimization { bool UnsafeFPShrink; UnsafeFPLibCallOptimization(bool UnsafeFPShrink) { @@ -1109,8 +1194,9 @@ struct UnsafeFPLibCallOptimization : public LibCallOptimization { struct CosOpt : public UnsafeFPLibCallOptimization { CosOpt(bool UnsafeFPShrink) : UnsafeFPLibCallOptimization(UnsafeFPShrink) {} - virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - Value *Ret = NULL; + Value *callOptimizer(Function *Callee, CallInst *CI, + IRBuilder<> &B) override { + Value *Ret = nullptr; if (UnsafeFPShrink && Callee->getName() == "cos" && TLI->has(LibFunc::cosf)) { UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true); @@ -1136,8 +1222,9 @@ struct CosOpt : public UnsafeFPLibCallOptimization { struct PowOpt : public UnsafeFPLibCallOptimization { PowOpt(bool UnsafeFPShrink) : UnsafeFPLibCallOptimization(UnsafeFPShrink) {} - virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - Value *Ret = NULL; + Value *callOptimizer(Function *Callee, CallInst *CI, + IRBuilder<> &B) override { + Value *Ret = nullptr; if (UnsafeFPShrink && Callee->getName() == "pow" && TLI->has(LibFunc::powf)) { UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true); @@ -1162,10 +1249,16 @@ struct PowOpt : public UnsafeFPLibCallOptimization { hasUnaryFloatFn(TLI, Op1->getType(), LibFunc::exp2, LibFunc::exp2f, LibFunc::exp2l)) return EmitUnaryFloatFnCall(Op2, "exp2", B, Callee->getAttributes()); + // pow(10.0, x) -> exp10(x) + if (Op1C->isExactlyValue(10.0) && + hasUnaryFloatFn(TLI, Op1->getType(), LibFunc::exp10, LibFunc::exp10f, + LibFunc::exp10l)) + return EmitUnaryFloatFnCall(Op2, TLI->getName(LibFunc::exp10), B, + Callee->getAttributes()); } ConstantFP *Op2C = dyn_cast<ConstantFP>(Op2); - if (Op2C == 0) return Ret; + if (!Op2C) return Ret; if (Op2C->getValueAPF().isZero()) // pow(x, 0.0) -> 1.0 return ConstantFP::get(CI->getType(), 1.0); @@ -1198,14 +1291,15 @@ struct PowOpt : public UnsafeFPLibCallOptimization { if (Op2C->isExactlyValue(-1.0)) // pow(x, -1.0) -> 1.0/x return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), Op1, "powrecip"); - return 0; + return nullptr; } }; struct Exp2Opt : public UnsafeFPLibCallOptimization { Exp2Opt(bool UnsafeFPShrink) : UnsafeFPLibCallOptimization(UnsafeFPShrink) {} - virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - Value *Ret = NULL; + Value *callOptimizer(Function *Callee, CallInst *CI, + IRBuilder<> &B) override { + Value *Ret = nullptr; if (UnsafeFPShrink && Callee->getName() == "exp2" && TLI->has(LibFunc::exp2f)) { UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true); @@ -1222,37 +1316,37 @@ struct Exp2Opt : public UnsafeFPLibCallOptimization { Value *Op = CI->getArgOperand(0); // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= 32 // Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < 32 - Value *LdExpArg = 0; - if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) { - if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32) - LdExpArg = B.CreateSExt(OpC->getOperand(0), B.getInt32Ty()); - } else if (UIToFPInst *OpC = dyn_cast<UIToFPInst>(Op)) { - if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32) - LdExpArg = B.CreateZExt(OpC->getOperand(0), B.getInt32Ty()); - } + LibFunc::Func LdExp = LibFunc::ldexpl; + if (Op->getType()->isFloatTy()) + LdExp = LibFunc::ldexpf; + else if (Op->getType()->isDoubleTy()) + LdExp = LibFunc::ldexp; + + if (TLI->has(LdExp)) { + Value *LdExpArg = nullptr; + if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) { + if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32) + LdExpArg = B.CreateSExt(OpC->getOperand(0), B.getInt32Ty()); + } else if (UIToFPInst *OpC = dyn_cast<UIToFPInst>(Op)) { + if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32) + LdExpArg = B.CreateZExt(OpC->getOperand(0), B.getInt32Ty()); + } - if (LdExpArg) { - const char *Name; - if (Op->getType()->isFloatTy()) - Name = "ldexpf"; - else if (Op->getType()->isDoubleTy()) - Name = "ldexp"; - else - Name = "ldexpl"; - - Constant *One = ConstantFP::get(*Context, APFloat(1.0f)); - if (!Op->getType()->isFloatTy()) - One = ConstantExpr::getFPExtend(One, Op->getType()); - - Module *M = Caller->getParent(); - Value *Callee = M->getOrInsertFunction(Name, Op->getType(), - Op->getType(), - B.getInt32Ty(), NULL); - CallInst *CI = B.CreateCall2(Callee, One, LdExpArg); - if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts())) - CI->setCallingConv(F->getCallingConv()); + if (LdExpArg) { + Constant *One = ConstantFP::get(*Context, APFloat(1.0f)); + if (!Op->getType()->isFloatTy()) + One = ConstantExpr::getFPExtend(One, Op->getType()); - return CI; + Module *M = Caller->getParent(); + Value *Callee = + M->getOrInsertFunction(TLI->getName(LdExp), Op->getType(), + Op->getType(), B.getInt32Ty(), NULL); + CallInst *CI = B.CreateCall2(Callee, One, LdExpArg); + if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; + } } return Ret; } @@ -1261,11 +1355,12 @@ struct Exp2Opt : public UnsafeFPLibCallOptimization { struct SinCosPiOpt : public LibCallOptimization { SinCosPiOpt() {} - virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *callOptimizer(Function *Callee, CallInst *CI, + IRBuilder<> &B) override { // Make sure the prototype is as expected, otherwise the rest of the // function is probably invalid and likely to abort. if (!isTrigLibCall(CI)) - return 0; + return nullptr; Value *Arg = CI->getArgOperand(0); SmallVector<CallInst *, 1> SinCalls; @@ -1277,14 +1372,13 @@ struct SinCosPiOpt : public LibCallOptimization { // Look for all compatible sinpi, cospi and sincospi calls with the same // argument. If there are enough (in some sense) we can make the // substitution. - for (Value::use_iterator UI = Arg->use_begin(), UE = Arg->use_end(); - UI != UE; ++UI) - classifyArgUse(*UI, CI->getParent(), IsFloat, SinCalls, CosCalls, + for (User *U : Arg->users()) + classifyArgUse(U, CI->getParent(), IsFloat, SinCalls, CosCalls, SinCosCalls); // It's only worthwhile if both sinpi and cospi are actually used. if (SinCosCalls.empty() && (SinCalls.empty() || CosCalls.empty())) - return 0; + return nullptr; Value *Sin, *Cos, *SinCos; insertSinCosCall(B, CI->getCalledFunction(), Arg, IsFloat, Sin, Cos, @@ -1294,7 +1388,7 @@ struct SinCosPiOpt : public LibCallOptimization { replaceTrigInsts(CosCalls, Cos); replaceTrigInsts(SinCosCalls, SinCos); - return 0; + return nullptr; } bool isTrigLibCall(CallInst *CI) { @@ -1334,7 +1428,7 @@ struct SinCosPiOpt : public LibCallOptimization { SinCalls.push_back(CI); else if (Func == LibFunc::cospif) CosCalls.push_back(CI); - else if (Func == LibFunc::sincospi_stretf) + else if (Func == LibFunc::sincospif_stret) SinCosCalls.push_back(CI); } else { if (Func == LibFunc::sinpi) @@ -1363,7 +1457,7 @@ struct SinCosPiOpt : public LibCallOptimization { Triple T(OrigCallee->getParent()->getTargetTriple()); if (UseFloat) { - Name = "__sincospi_stretf"; + Name = "__sincospif_stret"; assert(T.getArch() != Triple::x86 && "x86 messy and unsupported for now"); // x86_64 can't use {float, float} since that would be returned in both @@ -1412,14 +1506,15 @@ struct SinCosPiOpt : public LibCallOptimization { //===----------------------------------------------------------------------===// struct FFSOpt : public LibCallOptimization { - virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *callOptimizer(Function *Callee, CallInst *CI, + IRBuilder<> &B) override { FunctionType *FT = Callee->getFunctionType(); // Just make sure this has 2 arguments of the same FP type, which match the // result type. if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy(32) || !FT->getParamType(0)->isIntegerTy()) - return 0; + return nullptr; Value *Op = CI->getArgOperand(0); @@ -1445,13 +1540,14 @@ struct FFSOpt : public LibCallOptimization { }; struct AbsOpt : public LibCallOptimization { - virtual bool ignoreCallingConv() { return true; } - virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + bool ignoreCallingConv() override { return true; } + Value *callOptimizer(Function *Callee, CallInst *CI, + IRBuilder<> &B) override { FunctionType *FT = Callee->getFunctionType(); // We require integer(integer) where the types agree. if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() || FT->getParamType(0) != FT->getReturnType()) - return 0; + return nullptr; // abs(x) -> x >s -1 ? x : -x Value *Op = CI->getArgOperand(0); @@ -1463,12 +1559,13 @@ struct AbsOpt : public LibCallOptimization { }; struct IsDigitOpt : public LibCallOptimization { - virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *callOptimizer(Function *Callee, CallInst *CI, + IRBuilder<> &B) override { FunctionType *FT = Callee->getFunctionType(); // We require integer(i32) if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() || !FT->getParamType(0)->isIntegerTy(32)) - return 0; + return nullptr; // isdigit(c) -> (c-'0') <u 10 Value *Op = CI->getArgOperand(0); @@ -1479,12 +1576,13 @@ struct IsDigitOpt : public LibCallOptimization { }; struct IsAsciiOpt : public LibCallOptimization { - virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *callOptimizer(Function *Callee, CallInst *CI, + IRBuilder<> &B) override { FunctionType *FT = Callee->getFunctionType(); // We require integer(i32) if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() || !FT->getParamType(0)->isIntegerTy(32)) - return 0; + return nullptr; // isascii(c) -> c <u 128 Value *Op = CI->getArgOperand(0); @@ -1494,12 +1592,13 @@ struct IsAsciiOpt : public LibCallOptimization { }; struct ToAsciiOpt : public LibCallOptimization { - virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *callOptimizer(Function *Callee, CallInst *CI, + IRBuilder<> &B) override { FunctionType *FT = Callee->getFunctionType(); // We require i32(i32) if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || !FT->getParamType(0)->isIntegerTy(32)) - return 0; + return nullptr; // toascii(c) -> c & 0x7f return B.CreateAnd(CI->getArgOperand(0), @@ -1514,7 +1613,8 @@ struct ToAsciiOpt : public LibCallOptimization { struct ErrorReportingOpt : public LibCallOptimization { ErrorReportingOpt(int S = -1) : StreamArg(S) {} - virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &) { + Value *callOptimizer(Function *Callee, CallInst *CI, + IRBuilder<> &) override { // Error reporting calls should be cold, mark them as such. // This applies even to non-builtin calls: it is only a hint and applies to // functions that the frontend might not understand as builtins. @@ -1528,7 +1628,7 @@ struct ErrorReportingOpt : public LibCallOptimization { CI->addAttribute(AttributeSet::FunctionIndex, Attribute::Cold); } - return 0; + return nullptr; } protected: @@ -1565,7 +1665,7 @@ struct PrintFOpt : public LibCallOptimization { // Check for a fixed format string. StringRef FormatStr; if (!getConstantStringInfo(CI->getArgOperand(0), FormatStr)) - return 0; + return nullptr; // Empty format string -> noop. if (FormatStr.empty()) // Tolerate printf's declared void. @@ -1576,11 +1676,11 @@ struct PrintFOpt : public LibCallOptimization { // is used, in general the printf return value is not compatible with either // putchar() or puts(). if (!CI->use_empty()) - return 0; + return nullptr; // printf("x") -> putchar('x'), even for '%'. if (FormatStr.size() == 1) { - Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, TD, TLI); + Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, DL, TLI); if (CI->use_empty() || !Res) return Res; return B.CreateIntCast(Res, CI->getType(), true); } @@ -1592,7 +1692,7 @@ struct PrintFOpt : public LibCallOptimization { // pass to be run after this pass, to merge duplicate strings. FormatStr = FormatStr.drop_back(); Value *GV = B.CreateGlobalString(FormatStr, "str"); - Value *NewCI = EmitPutS(GV, B, TD, TLI); + Value *NewCI = EmitPutS(GV, B, DL, TLI); return (CI->use_empty() || !NewCI) ? NewCI : ConstantInt::get(CI->getType(), FormatStr.size()+1); @@ -1602,7 +1702,7 @@ struct PrintFOpt : public LibCallOptimization { // printf("%c", chr) --> putchar(chr) if (FormatStr == "%c" && CI->getNumArgOperands() > 1 && CI->getArgOperand(1)->getType()->isIntegerTy()) { - Value *Res = EmitPutChar(CI->getArgOperand(1), B, TD, TLI); + Value *Res = EmitPutChar(CI->getArgOperand(1), B, DL, TLI); if (CI->use_empty() || !Res) return Res; return B.CreateIntCast(Res, CI->getType(), true); @@ -1611,18 +1711,19 @@ struct PrintFOpt : public LibCallOptimization { // printf("%s\n", str) --> puts(str) if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 && CI->getArgOperand(1)->getType()->isPointerTy()) { - return EmitPutS(CI->getArgOperand(1), B, TD, TLI); + return EmitPutS(CI->getArgOperand(1), B, DL, TLI); } - return 0; + return nullptr; } - virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *callOptimizer(Function *Callee, CallInst *CI, + IRBuilder<> &B) override { // Require one fixed pointer argument and an integer/void result. FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() || !(FT->getReturnType()->isIntegerTy() || FT->getReturnType()->isVoidTy())) - return 0; + return nullptr; if (Value *V = optimizeFixedFormatString(Callee, CI, B)) { return V; @@ -1639,7 +1740,7 @@ struct PrintFOpt : public LibCallOptimization { B.Insert(New); return New; } - return 0; + return nullptr; } }; @@ -1649,7 +1750,7 @@ struct SPrintFOpt : public LibCallOptimization { // Check for a fixed format string. StringRef FormatStr; if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr)) - return 0; + return nullptr; // If we just have a format string (nothing else crazy) transform it. if (CI->getNumArgOperands() == 2) { @@ -1657,14 +1758,14 @@ struct SPrintFOpt : public LibCallOptimization { // %% -> % in the future if we cared. for (unsigned i = 0, e = FormatStr.size(); i != e; ++i) if (FormatStr[i] == '%') - return 0; // we found a format specifier, bail out. + return nullptr; // we found a format specifier, bail out. // These optimizations require DataLayout. - if (!TD) return 0; + if (!DL) return nullptr; // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1) B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), - ConstantInt::get(TD->getIntPtrType(*Context), // Copy the + ConstantInt::get(DL->getIntPtrType(*Context), // Copy the FormatStr.size() + 1), 1); // nul byte. return ConstantInt::get(CI->getType(), FormatStr.size()); } @@ -1673,12 +1774,12 @@ struct SPrintFOpt : public LibCallOptimization { // and have an extra operand. if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->getNumArgOperands() < 3) - return 0; + return nullptr; // Decode the second character of the format string. if (FormatStr[1] == 'c') { // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0 - if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0; + if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return nullptr; Value *V = B.CreateTrunc(CI->getArgOperand(2), B.getInt8Ty(), "char"); Value *Ptr = CastToCStr(CI->getArgOperand(0), B); B.CreateStore(V, Ptr); @@ -1690,14 +1791,14 @@ struct SPrintFOpt : public LibCallOptimization { if (FormatStr[1] == 's') { // These optimizations require DataLayout. - if (!TD) return 0; + if (!DL) return nullptr; // sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1) - if (!CI->getArgOperand(2)->getType()->isPointerTy()) return 0; + if (!CI->getArgOperand(2)->getType()->isPointerTy()) return nullptr; - Value *Len = EmitStrLen(CI->getArgOperand(2), B, TD, TLI); + Value *Len = EmitStrLen(CI->getArgOperand(2), B, DL, TLI); if (!Len) - return 0; + return nullptr; Value *IncLen = B.CreateAdd(Len, ConstantInt::get(Len->getType(), 1), "leninc"); @@ -1706,16 +1807,17 @@ struct SPrintFOpt : public LibCallOptimization { // The sprintf result is the unincremented number of bytes in the string. return B.CreateIntCast(Len, CI->getType(), false); } - return 0; + return nullptr; } - virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *callOptimizer(Function *Callee, CallInst *CI, + IRBuilder<> &B) override { // Require two fixed pointer arguments and an integer result. FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isPointerTy() || !FT->getReturnType()->isIntegerTy()) - return 0; + return nullptr; if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) { return V; @@ -1732,7 +1834,7 @@ struct SPrintFOpt : public LibCallOptimization { B.Insert(New); return New; } - return 0; + return nullptr; } }; @@ -1745,58 +1847,59 @@ struct FPrintFOpt : public LibCallOptimization { // All the optimizations depend on the format string. StringRef FormatStr; if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr)) - return 0; + return nullptr; // Do not do any of the following transformations if the fprintf return // value is used, in general the fprintf return value is not compatible // with fwrite(), fputc() or fputs(). if (!CI->use_empty()) - return 0; + return nullptr; // fprintf(F, "foo") --> fwrite("foo", 3, 1, F) if (CI->getNumArgOperands() == 2) { for (unsigned i = 0, e = FormatStr.size(); i != e; ++i) if (FormatStr[i] == '%') // Could handle %% -> % if we cared. - return 0; // We found a format specifier. + return nullptr; // We found a format specifier. // These optimizations require DataLayout. - if (!TD) return 0; + if (!DL) return nullptr; return EmitFWrite(CI->getArgOperand(1), - ConstantInt::get(TD->getIntPtrType(*Context), + ConstantInt::get(DL->getIntPtrType(*Context), FormatStr.size()), - CI->getArgOperand(0), B, TD, TLI); + CI->getArgOperand(0), B, DL, TLI); } // The remaining optimizations require the format string to be "%s" or "%c" // and have an extra operand. if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->getNumArgOperands() < 3) - return 0; + return nullptr; // Decode the second character of the format string. if (FormatStr[1] == 'c') { // fprintf(F, "%c", chr) --> fputc(chr, F) - if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0; - return EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI); + if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return nullptr; + return EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, DL, TLI); } if (FormatStr[1] == 's') { // fprintf(F, "%s", str) --> fputs(str, F) if (!CI->getArgOperand(2)->getType()->isPointerTy()) - return 0; - return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI); + return nullptr; + return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, DL, TLI); } - return 0; + return nullptr; } - virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *callOptimizer(Function *Callee, CallInst *CI, + IRBuilder<> &B) override { // Require two fixed paramters as pointers and integer result. FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isPointerTy() || !FT->getReturnType()->isIntegerTy()) - return 0; + return nullptr; if (Value *V = optimizeFixedFormatString(Callee, CI, B)) { return V; @@ -1813,12 +1916,13 @@ struct FPrintFOpt : public LibCallOptimization { B.Insert(New); return New; } - return 0; + return nullptr; } }; struct FWriteOpt : public LibCallOptimization { - virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *callOptimizer(Function *Callee, CallInst *CI, + IRBuilder<> &B) override { ErrorReportingOpt ER(/* StreamArg = */ 3); (void) ER.callOptimizer(Callee, CI, B); @@ -1829,12 +1933,12 @@ struct FWriteOpt : public LibCallOptimization { !FT->getParamType(2)->isIntegerTy() || !FT->getParamType(3)->isPointerTy() || !FT->getReturnType()->isIntegerTy()) - return 0; + return nullptr; // Get the element size and count. ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getArgOperand(2)); - if (!SizeC || !CountC) return 0; + if (!SizeC || !CountC) return nullptr; uint64_t Bytes = SizeC->getZExtValue()*CountC->getZExtValue(); // If this is writing zero records, remove the call (it's a noop). @@ -1845,61 +1949,63 @@ struct FWriteOpt : public LibCallOptimization { // This optimisation is only valid, if the return value is unused. if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F) Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char"); - Value *NewCI = EmitFPutC(Char, CI->getArgOperand(3), B, TD, TLI); - return NewCI ? ConstantInt::get(CI->getType(), 1) : 0; + Value *NewCI = EmitFPutC(Char, CI->getArgOperand(3), B, DL, TLI); + return NewCI ? ConstantInt::get(CI->getType(), 1) : nullptr; } - return 0; + return nullptr; } }; struct FPutsOpt : public LibCallOptimization { - virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *callOptimizer(Function *Callee, CallInst *CI, + IRBuilder<> &B) override { ErrorReportingOpt ER(/* StreamArg = */ 1); (void) ER.callOptimizer(Callee, CI, B); // These optimizations require DataLayout. - if (!TD) return 0; + if (!DL) return nullptr; // Require two pointers. Also, we can't optimize if return value is used. FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isPointerTy() || !CI->use_empty()) - return 0; + return nullptr; // fputs(s,F) --> fwrite(s,1,strlen(s),F) uint64_t Len = GetStringLength(CI->getArgOperand(0)); - if (!Len) return 0; + if (!Len) return nullptr; // Known to have no uses (see above). return EmitFWrite(CI->getArgOperand(0), - ConstantInt::get(TD->getIntPtrType(*Context), Len-1), - CI->getArgOperand(1), B, TD, TLI); + ConstantInt::get(DL->getIntPtrType(*Context), Len-1), + CI->getArgOperand(1), B, DL, TLI); } }; struct PutsOpt : public LibCallOptimization { - virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *callOptimizer(Function *Callee, CallInst *CI, + IRBuilder<> &B) override { // Require one fixed pointer argument and an integer/void result. FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() || !(FT->getReturnType()->isIntegerTy() || FT->getReturnType()->isVoidTy())) - return 0; + return nullptr; // Check for a constant string. StringRef Str; if (!getConstantStringInfo(CI->getArgOperand(0), Str)) - return 0; + return nullptr; if (Str.empty() && CI->use_empty()) { // puts("") -> putchar('\n') - Value *Res = EmitPutChar(B.getInt32('\n'), B, TD, TLI); + Value *Res = EmitPutChar(B.getInt32('\n'), B, DL, TLI); if (CI->use_empty() || !Res) return Res; return B.CreateIntCast(Res, CI->getType(), true); } - return 0; + return nullptr; } }; @@ -1908,7 +2014,7 @@ struct PutsOpt : public LibCallOptimization { namespace llvm { class LibCallSimplifierImpl { - const DataLayout *TD; + const DataLayout *DL; const TargetLibraryInfo *TLI; const LibCallSimplifier *LCS; bool UnsafeFPShrink; @@ -1918,11 +2024,11 @@ class LibCallSimplifierImpl { PowOpt Pow; Exp2Opt Exp2; public: - LibCallSimplifierImpl(const DataLayout *TD, const TargetLibraryInfo *TLI, + LibCallSimplifierImpl(const DataLayout *DL, const TargetLibraryInfo *TLI, const LibCallSimplifier *LCS, bool UnsafeFPShrink = false) : Cos(UnsafeFPShrink), Pow(UnsafeFPShrink), Exp2(UnsafeFPShrink) { - this->TD = TD; + this->DL = DL; this->TLI = TLI; this->LCS = LCS; this->UnsafeFPShrink = UnsafeFPShrink; @@ -1975,6 +2081,7 @@ static MemSetOpt MemSet; // Math library call optimizations. static UnaryDoubleFPOpt UnaryDoubleFP(false); +static BinaryDoubleFPOpt BinaryDoubleFP(false); static UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true); static SinCosPiOpt SinCosPi; @@ -2009,7 +2116,7 @@ LibCallOptimization *LibCallSimplifierImpl::lookupOptimization(CallInst *CI) { case Intrinsic::exp2: return &Exp2; default: - return 0; + return nullptr; } } @@ -2119,7 +2226,7 @@ LibCallOptimization *LibCallSimplifierImpl::lookupOptimization(CallInst *CI) { case LibFunc::trunc: if (hasFloatVersion(FuncName)) return &UnaryDoubleFP; - return 0; + return nullptr; case LibFunc::acos: case LibFunc::acosh: case LibFunc::asin: @@ -2143,11 +2250,16 @@ LibCallOptimization *LibCallSimplifierImpl::lookupOptimization(CallInst *CI) { case LibFunc::tanh: if (UnsafeFPShrink && hasFloatVersion(FuncName)) return &UnsafeUnaryDoubleFP; - return 0; + return nullptr; + case LibFunc::fmin: + case LibFunc::fmax: + if (hasFloatVersion(FuncName)) + return &BinaryDoubleFP; + return nullptr; case LibFunc::memcpy_chk: return &MemCpyChk; default: - return 0; + return nullptr; } } @@ -2167,7 +2279,7 @@ LibCallOptimization *LibCallSimplifierImpl::lookupOptimization(CallInst *CI) { return &StrNCpyChk; } - return 0; + return nullptr; } @@ -2175,15 +2287,15 @@ Value *LibCallSimplifierImpl::optimizeCall(CallInst *CI) { LibCallOptimization *LCO = lookupOptimization(CI); if (LCO) { IRBuilder<> Builder(CI); - return LCO->optimizeCall(CI, TD, TLI, LCS, Builder); + return LCO->optimizeCall(CI, DL, TLI, LCS, Builder); } - return 0; + return nullptr; } -LibCallSimplifier::LibCallSimplifier(const DataLayout *TD, +LibCallSimplifier::LibCallSimplifier(const DataLayout *DL, const TargetLibraryInfo *TLI, bool UnsafeFPShrink) { - Impl = new LibCallSimplifierImpl(TD, TLI, this, UnsafeFPShrink); + Impl = new LibCallSimplifierImpl(DL, TLI, this, UnsafeFPShrink); } LibCallSimplifier::~LibCallSimplifier() { @@ -2191,7 +2303,7 @@ LibCallSimplifier::~LibCallSimplifier() { } Value *LibCallSimplifier::optimizeCall(CallInst *CI) { - if (CI->isNoBuiltin()) return 0; + if (CI->isNoBuiltin()) return nullptr; return Impl->optimizeCall(CI); } @@ -2242,8 +2354,6 @@ void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) const { // * sqrt(Nroot(x)) -> pow(x,1/(2*N)) // * sqrt(pow(x,y)) -> pow(|x|,y*0.5) // -// strchr: -// * strchr(p, 0) -> strlen(p) // tan, tanf, tanl: // * tan(atan(x)) -> x // diff --git a/contrib/llvm/lib/Transforms/Utils/SpecialCaseList.cpp b/contrib/llvm/lib/Transforms/Utils/SpecialCaseList.cpp deleted file mode 100644 index 2ef692c..0000000 --- a/contrib/llvm/lib/Transforms/Utils/SpecialCaseList.cpp +++ /dev/null @@ -1,222 +0,0 @@ -//===-- SpecialCaseList.cpp - special case list for sanitizers ------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This is a utility class for instrumentation passes (like AddressSanitizer -// or ThreadSanitizer) to avoid instrumenting some functions or global -// variables, or to instrument some functions or global variables in a specific -// way, based on a user-supplied list. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Utils/SpecialCaseList.h" -#include "llvm/ADT/OwningPtr.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/StringSet.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Regex.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Support/system_error.h" -#include <string> -#include <utility> - -namespace llvm { - -/// Represents a set of regular expressions. Regular expressions which are -/// "literal" (i.e. no regex metacharacters) are stored in Strings, while all -/// others are represented as a single pipe-separated regex in RegEx. The -/// reason for doing so is efficiency; StringSet is much faster at matching -/// literal strings than Regex. -struct SpecialCaseList::Entry { - StringSet<> Strings; - Regex *RegEx; - - Entry() : RegEx(0) {} - - bool match(StringRef Query) const { - return Strings.count(Query) || (RegEx && RegEx->match(Query)); - } -}; - -SpecialCaseList::SpecialCaseList() : Entries() {} - -SpecialCaseList *SpecialCaseList::create( - const StringRef Path, std::string &Error) { - if (Path.empty()) - return new SpecialCaseList(); - OwningPtr<MemoryBuffer> File; - if (error_code EC = MemoryBuffer::getFile(Path, File)) { - Error = (Twine("Can't open file '") + Path + "': " + EC.message()).str(); - return 0; - } - return create(File.get(), Error); -} - -SpecialCaseList *SpecialCaseList::create( - const MemoryBuffer *MB, std::string &Error) { - OwningPtr<SpecialCaseList> SCL(new SpecialCaseList()); - if (!SCL->parse(MB, Error)) - return 0; - return SCL.take(); -} - -SpecialCaseList *SpecialCaseList::createOrDie(const StringRef Path) { - std::string Error; - if (SpecialCaseList *SCL = create(Path, Error)) - return SCL; - report_fatal_error(Error); -} - -bool SpecialCaseList::parse(const MemoryBuffer *MB, std::string &Error) { - // Iterate through each line in the blacklist file. - SmallVector<StringRef, 16> Lines; - SplitString(MB->getBuffer(), Lines, "\n\r"); - StringMap<StringMap<std::string> > Regexps; - assert(Entries.empty() && - "parse() should be called on an empty SpecialCaseList"); - int LineNo = 1; - for (SmallVectorImpl<StringRef>::iterator I = Lines.begin(), E = Lines.end(); - I != E; ++I, ++LineNo) { - // Ignore empty lines and lines starting with "#" - if (I->empty() || I->startswith("#")) - continue; - // Get our prefix and unparsed regexp. - std::pair<StringRef, StringRef> SplitLine = I->split(":"); - StringRef Prefix = SplitLine.first; - if (SplitLine.second.empty()) { - // Missing ':' in the line. - Error = (Twine("Malformed line ") + Twine(LineNo) + ": '" + - SplitLine.first + "'").str(); - return false; - } - - std::pair<StringRef, StringRef> SplitRegexp = SplitLine.second.split("="); - std::string Regexp = SplitRegexp.first; - StringRef Category = SplitRegexp.second; - - // Backwards compatibility. - if (Prefix == "global-init") { - Prefix = "global"; - Category = "init"; - } else if (Prefix == "global-init-type") { - Prefix = "type"; - Category = "init"; - } else if (Prefix == "global-init-src") { - Prefix = "src"; - Category = "init"; - } - - // See if we can store Regexp in Strings. - if (Regex::isLiteralERE(Regexp)) { - Entries[Prefix][Category].Strings.insert(Regexp); - continue; - } - - // Replace * with .* - for (size_t pos = 0; (pos = Regexp.find("*", pos)) != std::string::npos; - pos += strlen(".*")) { - Regexp.replace(pos, strlen("*"), ".*"); - } - - // Check that the regexp is valid. - Regex CheckRE(Regexp); - std::string REError; - if (!CheckRE.isValid(REError)) { - Error = (Twine("Malformed regex in line ") + Twine(LineNo) + ": '" + - SplitLine.second + "': " + REError).str(); - return false; - } - - // Add this regexp into the proper group by its prefix. - if (!Regexps[Prefix][Category].empty()) - Regexps[Prefix][Category] += "|"; - Regexps[Prefix][Category] += "^" + Regexp + "$"; - } - - // Iterate through each of the prefixes, and create Regexs for them. - for (StringMap<StringMap<std::string> >::const_iterator I = Regexps.begin(), - E = Regexps.end(); - I != E; ++I) { - for (StringMap<std::string>::const_iterator II = I->second.begin(), - IE = I->second.end(); - II != IE; ++II) { - Entries[I->getKey()][II->getKey()].RegEx = new Regex(II->getValue()); - } - } - return true; -} - -SpecialCaseList::~SpecialCaseList() { - for (StringMap<StringMap<Entry> >::iterator I = Entries.begin(), - E = Entries.end(); - I != E; ++I) { - for (StringMap<Entry>::const_iterator II = I->second.begin(), - IE = I->second.end(); - II != IE; ++II) { - delete II->second.RegEx; - } - } -} - -bool SpecialCaseList::isIn(const Function& F, const StringRef Category) const { - return isIn(*F.getParent(), Category) || - inSectionCategory("fun", F.getName(), Category); -} - -static StringRef GetGlobalTypeString(const GlobalValue &G) { - // Types of GlobalVariables are always pointer types. - Type *GType = G.getType()->getElementType(); - // For now we support blacklisting struct types only. - if (StructType *SGType = dyn_cast<StructType>(GType)) { - if (!SGType->isLiteral()) - return SGType->getName(); - } - return "<unknown type>"; -} - -bool SpecialCaseList::isIn(const GlobalVariable &G, - const StringRef Category) const { - return isIn(*G.getParent(), Category) || - inSectionCategory("global", G.getName(), Category) || - inSectionCategory("type", GetGlobalTypeString(G), Category); -} - -bool SpecialCaseList::isIn(const GlobalAlias &GA, - const StringRef Category) const { - if (isIn(*GA.getParent(), Category)) - return true; - - if (isa<FunctionType>(GA.getType()->getElementType())) - return inSectionCategory("fun", GA.getName(), Category); - - return inSectionCategory("global", GA.getName(), Category) || - inSectionCategory("type", GetGlobalTypeString(GA), Category); -} - -bool SpecialCaseList::isIn(const Module &M, const StringRef Category) const { - return inSectionCategory("src", M.getModuleIdentifier(), Category); -} - -bool SpecialCaseList::inSectionCategory(const StringRef Section, - const StringRef Query, - const StringRef Category) const { - StringMap<StringMap<Entry> >::const_iterator I = Entries.find(Section); - if (I == Entries.end()) return false; - StringMap<Entry>::const_iterator II = I->second.find(Category); - if (II == I->second.end()) return false; - - return II->getValue().match(Query); -} - -} // namespace llvm diff --git a/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp index 560f581..0c2fc0a 100644 --- a/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp +++ b/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp @@ -59,7 +59,7 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) { // Then unreachable blocks. if (UnreachableBlocks.empty()) { - UnreachableBlock = 0; + UnreachableBlock = nullptr; } else if (UnreachableBlocks.size() == 1) { UnreachableBlock = UnreachableBlocks.front(); } else { @@ -77,7 +77,7 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) { // Now handle return blocks. if (ReturningBlocks.empty()) { - ReturnBlock = 0; + ReturnBlock = nullptr; return false; // No blocks return } else if (ReturningBlocks.size() == 1) { ReturnBlock = ReturningBlocks.front(); // Already has a single return block @@ -91,9 +91,9 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) { BasicBlock *NewRetBlock = BasicBlock::Create(F.getContext(), "UnifiedReturnBlock", &F); - PHINode *PN = 0; + PHINode *PN = nullptr; if (F.getReturnType()->isVoidTy()) { - ReturnInst::Create(F.getContext(), NULL, NewRetBlock); + ReturnInst::Create(F.getContext(), nullptr, NewRetBlock); } else { // If the function doesn't return void... add a PHI node to the block... PN = PHINode::Create(F.getReturnType(), ReturningBlocks.size(), diff --git a/contrib/llvm/lib/Transforms/Utils/Utils.cpp b/contrib/llvm/lib/Transforms/Utils/Utils.cpp index c3df215..ed4f45c 100644 --- a/contrib/llvm/lib/Transforms/Utils/Utils.cpp +++ b/contrib/llvm/lib/Transforms/Utils/Utils.cpp @@ -13,14 +13,15 @@ //===----------------------------------------------------------------------===// #include "llvm/InitializePasses.h" -#include "llvm/PassRegistry.h" #include "llvm-c/Initialization.h" +#include "llvm/PassRegistry.h" using namespace llvm; /// initializeTransformUtils - Initialize all passes in the TransformUtils /// library. void llvm::initializeTransformUtils(PassRegistry &Registry) { + initializeAddDiscriminatorsPass(Registry); initializeBreakCriticalEdgesPass(Registry); initializeInstNamerPass(Registry); initializeLCSSAPass(Registry); diff --git a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp index 457fc80..0f20e6d 100644 --- a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp +++ b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp @@ -71,12 +71,12 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, // Check all operands to see if any need to be remapped. for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) { Value *OP = MD->getOperand(i); - if (OP == 0) continue; + if (!OP) continue; Value *Mapped_OP = MapValue(OP, VM, Flags, TypeMapper, Materializer); // Use identity map if Mapped_Op is null and we can ignore missing // entries. if (Mapped_OP == OP || - (Mapped_OP == 0 && (Flags & RF_IgnoreMissingEntries))) + (Mapped_OP == nullptr && (Flags & RF_IgnoreMissingEntries))) continue; // Ok, at least one operand needs remapping. @@ -84,13 +84,13 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, Elts.reserve(MD->getNumOperands()); for (i = 0; i != e; ++i) { Value *Op = MD->getOperand(i); - if (Op == 0) - Elts.push_back(0); + if (!Op) + Elts.push_back(nullptr); else { Value *Mapped_Op = MapValue(Op, VM, Flags, TypeMapper, Materializer); // Use identity map if Mapped_Op is null and we can ignore missing // entries. - if (Mapped_Op == 0 && (Flags & RF_IgnoreMissingEntries)) + if (Mapped_Op == nullptr && (Flags & RF_IgnoreMissingEntries)) Mapped_Op = Op; Elts.push_back(Mapped_Op); } @@ -112,8 +112,8 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, // Okay, this either must be a constant (which may or may not be mappable) or // is something that is not in the mapping table. Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V)); - if (C == 0) - return 0; + if (!C) + return nullptr; if (BlockAddress *BA = dyn_cast<BlockAddress>(C)) { Function *F = @@ -126,7 +126,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, // Otherwise, we have some other constant to remap. Start by checking to see // if all operands have an identity remapping. unsigned OpNo = 0, NumOperands = C->getNumOperands(); - Value *Mapped = 0; + Value *Mapped = nullptr; for (; OpNo != NumOperands; ++OpNo) { Value *Op = C->getOperand(OpNo); Mapped = MapValue(Op, VM, Flags, TypeMapper, Materializer); @@ -187,7 +187,7 @@ void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap, for (User::op_iterator op = I->op_begin(), E = I->op_end(); op != E; ++op) { Value *V = MapValue(*op, VMap, Flags, TypeMapper, Materializer); // If we aren't ignoring missing entries, assert that something happened. - if (V != 0) + if (V) *op = V; else assert((Flags & RF_IgnoreMissingEntries) && @@ -199,7 +199,7 @@ void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap, for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { Value *V = MapValue(PN->getIncomingBlock(i), VMap, Flags); // If we aren't ignoring missing entries, assert that something happened. - if (V != 0) + if (V) PN->setIncomingBlock(i, cast<BasicBlock>(V)); else assert((Flags & RF_IgnoreMissingEntries) && |