diff options
Diffstat (limited to 'contrib/llvm/lib/Transforms/Utils')
30 files changed, 2204 insertions, 1752 deletions
diff --git a/contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp b/contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp index cce016a..03c3a80 100644 --- a/contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp +++ b/contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp @@ -13,6 +13,7 @@ #include "llvm/Transforms/Utils/ASanStackFrameLayout.h" #include "llvm/ADT/SmallString.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/MathExtras.h" #include <algorithm> namespace llvm { @@ -33,11 +34,6 @@ static inline bool CompareVars(const ASanStackVariableDescription &a, // with e.g. alignment 1 and alignment 16 do not get reordered by CompareVars. static const size_t kMinAlignment = 16; -static size_t RoundUpTo(size_t X, size_t RoundTo) { - assert((RoundTo & (RoundTo - 1)) == 0); - return (X + RoundTo - 1) & ~(RoundTo - 1); -} - // The larger the variable Size the larger is the redzone. // The resulting frame size is a multiple of Alignment. static size_t VarAndRedzoneSize(size_t Size, size_t Alignment) { @@ -48,7 +44,7 @@ static size_t VarAndRedzoneSize(size_t Size, size_t Alignment) { else if (Size <= 512) Res = Size + 64; else if (Size <= 4096) Res = Size + 128; else Res = Size + 256; - return RoundUpTo(Res, Alignment); + return RoundUpToAlignment(Res, Alignment); } void diff --git a/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp b/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp index 820544b..e9f6239 100644 --- a/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp +++ b/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp @@ -174,42 +174,51 @@ bool AddDiscriminators::runOnFunction(Function &F) { for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { BasicBlock *B = I; TerminatorInst *Last = B->getTerminator(); - DebugLoc LastLoc = Last->getDebugLoc(); - if (LastLoc.isUnknown()) continue; - DILocation LastDIL(LastLoc.getAsMDNode(Ctx)); + const DILocation *LastDIL = Last->getDebugLoc(); + if (!LastDIL) + continue; for (unsigned I = 0; I < Last->getNumSuccessors(); ++I) { BasicBlock *Succ = Last->getSuccessor(I); Instruction *First = Succ->getFirstNonPHIOrDbgOrLifetime(); - DebugLoc FirstLoc = First->getDebugLoc(); - if (FirstLoc.isUnknown()) continue; - DILocation FirstDIL(FirstLoc.getAsMDNode(Ctx)); + const DILocation *FirstDIL = First->getDebugLoc(); + if (!FirstDIL) + continue; // If the first instruction (First) of Succ is at the same file // location as B's last instruction (Last), add a new // discriminator for First's location and all the instructions // in Succ that share the same location with First. - if (FirstDIL.atSameLineAs(LastDIL)) { + if (!FirstDIL->canDiscriminate(*LastDIL)) { // Create a new lexical scope and compute a new discriminator // number for it. - StringRef Filename = FirstDIL.getFilename(); - DIScope Scope = FirstDIL.getScope(); - DIFile File = Builder.createFile(Filename, Scope.getDirectory()); - unsigned Discriminator = FirstDIL.computeNewDiscriminator(Ctx); - DILexicalBlockFile NewScope = + StringRef Filename = FirstDIL->getFilename(); + auto *Scope = FirstDIL->getScope(); + auto *File = Builder.createFile(Filename, Scope->getDirectory()); + + // FIXME: Calculate the discriminator here, based on local information, + // and delete DILocation::computeNewDiscriminator(). The current + // solution gives different results depending on other modules in the + // same context. All we really need is to discriminate between + // FirstDIL and LastDIL -- a local map would suffice. + unsigned Discriminator = FirstDIL->computeNewDiscriminator(); + auto *NewScope = Builder.createLexicalBlockFile(Scope, File, Discriminator); - DILocation NewDIL = FirstDIL.copyWithNewScope(Ctx, NewScope); - DebugLoc newDebugLoc = DebugLoc::getFromDILocation(NewDIL); + auto *NewDIL = + DILocation::get(Ctx, FirstDIL->getLine(), FirstDIL->getColumn(), + NewScope, FirstDIL->getInlinedAt()); + DebugLoc newDebugLoc = NewDIL; // Attach this new debug location to First and every // instruction following First that shares the same location. for (BasicBlock::iterator I1(*First), E1 = Succ->end(); I1 != E1; ++I1) { - if (I1->getDebugLoc() != FirstLoc) break; + if (I1->getDebugLoc().get() != FirstDIL) + break; I1->setDebugLoc(newDebugLoc); - DEBUG(dbgs() << NewDIL.getFilename() << ":" << NewDIL.getLineNumber() - << ":" << NewDIL.getColumnNumber() << ":" - << NewDIL.getDiscriminator() << *I1 << "\n"); + DEBUG(dbgs() << NewDIL->getFilename() << ":" << NewDIL->getLine() + << ":" << NewDIL->getColumn() << ":" + << NewDIL->getDiscriminator() << *I1 << "\n"); } DEBUG(dbgs() << "\n"); Changed = true; diff --git a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp index 983f025..f3c8013 100644 --- a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -65,16 +65,10 @@ void llvm::DeleteDeadBlock(BasicBlock *BB) { /// any single-entry PHI nodes in it, fold them away. This handles the case /// when all entries to the PHI nodes in a block are guaranteed equal, such as /// when the block has exactly one predecessor. -void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, Pass *P) { +void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, AliasAnalysis *AA, + MemoryDependenceAnalysis *MemDep) { if (!isa<PHINode>(BB->begin())) return; - AliasAnalysis *AA = nullptr; - MemoryDependenceAnalysis *MemDep = nullptr; - if (P) { - AA = P->getAnalysisIfAvailable<AliasAnalysis>(); - MemDep = P->getAnalysisIfAvailable<MemoryDependenceAnalysis>(); - } - while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) { if (PN->getIncomingValue(0) != PN) PN->replaceAllUsesWith(PN->getIncomingValue(0)); @@ -113,7 +107,9 @@ bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) { /// MergeBlockIntoPredecessor - Attempts to merge a block into its predecessor, /// if possible. The return value indicates success or failure. -bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) { +bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT, + LoopInfo *LI, AliasAnalysis *AA, + MemoryDependenceAnalysis *MemDep) { // Don't merge away blocks who have their address taken. if (BB->hasAddressTaken()) return false; @@ -140,8 +136,8 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) { // Can't merge if there is PHI loop. for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE; ++BI) { if (PHINode *PN = dyn_cast<PHINode>(BI)) { - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if (PN->getIncomingValue(i) == PN) + for (Value *IncValue : PN->incoming_values()) + if (IncValue == PN) return false; } else break; @@ -149,7 +145,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) { // Begin by getting rid of unneeded PHIs. if (isa<PHINode>(BB->front())) - FoldSingleEntryPHINodes(BB, P); + FoldSingleEntryPHINodes(BB, AA, MemDep); // Delete the unconditional branch from the predecessor... PredBB->getInstList().pop_back(); @@ -166,28 +162,23 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) { PredBB->takeName(BB); // Finally, erase the old block and update dominator info. - if (P) { - if (DominatorTreeWrapperPass *DTWP = - P->getAnalysisIfAvailable<DominatorTreeWrapperPass>()) { - DominatorTree &DT = DTWP->getDomTree(); - if (DomTreeNode *DTN = DT.getNode(BB)) { - DomTreeNode *PredDTN = DT.getNode(PredBB); - SmallVector<DomTreeNode*, 8> Children(DTN->begin(), DTN->end()); - for (SmallVectorImpl<DomTreeNode *>::iterator DI = Children.begin(), - DE = Children.end(); DI != DE; ++DI) - DT.changeImmediateDominator(*DI, PredDTN); - - DT.eraseNode(BB); - } + if (DT) + if (DomTreeNode *DTN = DT->getNode(BB)) { + DomTreeNode *PredDTN = DT->getNode(PredBB); + SmallVector<DomTreeNode *, 8> Children(DTN->begin(), DTN->end()); + for (SmallVectorImpl<DomTreeNode *>::iterator DI = Children.begin(), + DE = Children.end(); + DI != DE; ++DI) + DT->changeImmediateDominator(*DI, PredDTN); + + DT->eraseNode(BB); + } - if (LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>()) - LI->removeBlock(BB); + if (LI) + LI->removeBlock(BB); - if (MemoryDependenceAnalysis *MD = - P->getAnalysisIfAvailable<MemoryDependenceAnalysis>()) - MD->invalidateCachedPredecessors(); - } - } + if (MemDep) + MemDep->invalidateCachedPredecessors(); BB->eraseFromParent(); return true; @@ -240,12 +231,14 @@ void llvm::ReplaceInstWithInst(Instruction *From, Instruction *To) { /// SplitEdge - Split the edge connecting specified block. Pass P must /// not be NULL. -BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) { +BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, DominatorTree *DT, + LoopInfo *LI) { unsigned SuccNum = GetSuccessorNumber(BB, Succ); // If this is a critical edge, let SplitCriticalEdge do it. TerminatorInst *LatchTerm = BB->getTerminator(); - if (SplitCriticalEdge(LatchTerm, SuccNum, P)) + if (SplitCriticalEdge(LatchTerm, SuccNum, CriticalEdgeSplittingOptions(DT, LI) + .setPreserveLCSSA())) return LatchTerm->getSuccessor(SuccNum); // If the edge isn't critical, then BB has a single successor or Succ has a @@ -255,23 +248,25 @@ BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) { // block. assert(SP == BB && "CFG broken"); SP = nullptr; - return SplitBlock(Succ, Succ->begin(), P); + return SplitBlock(Succ, Succ->begin(), DT, LI); } // Otherwise, if BB has a single successor, split it at the bottom of the // block. assert(BB->getTerminator()->getNumSuccessors() == 1 && "Should have a single succ!"); - return SplitBlock(BB, BB->getTerminator(), P); + return SplitBlock(BB, BB->getTerminator(), DT, LI); } -unsigned llvm::SplitAllCriticalEdges(Function &F, Pass *P) { +unsigned +llvm::SplitAllCriticalEdges(Function &F, + const CriticalEdgeSplittingOptions &Options) { unsigned NumBroken = 0; for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { TerminatorInst *TI = I->getTerminator(); if (TI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(TI)) for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) - if (SplitCriticalEdge(TI, i, P)) + if (SplitCriticalEdge(TI, i, Options)) ++NumBroken; } return NumBroken; @@ -282,7 +277,8 @@ unsigned llvm::SplitAllCriticalEdges(Function &F, Pass *P) { /// to a new block. The two blocks are joined by an unconditional branch and /// the loop info is updated. /// -BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) { +BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, + DominatorTree *DT, LoopInfo *LI) { BasicBlock::iterator SplitIt = SplitPt; while (isa<PHINode>(SplitIt) || isa<LandingPadInst>(SplitIt)) ++SplitIt; @@ -290,26 +286,23 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) { // The new block lives in whichever loop the old one did. This preserves // LCSSA as well, because we force the split point to be after any PHI nodes. - if (LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>()) + if (LI) if (Loop *L = LI->getLoopFor(Old)) - L->addBasicBlockToLoop(New, LI->getBase()); + L->addBasicBlockToLoop(New, *LI); - if (DominatorTreeWrapperPass *DTWP = - P->getAnalysisIfAvailable<DominatorTreeWrapperPass>()) { - DominatorTree &DT = DTWP->getDomTree(); + if (DT) // Old dominates New. New node dominates all other nodes dominated by Old. - if (DomTreeNode *OldNode = DT.getNode(Old)) { + if (DomTreeNode *OldNode = DT->getNode(Old)) { std::vector<DomTreeNode *> Children; for (DomTreeNode::iterator I = OldNode->begin(), E = OldNode->end(); I != E; ++I) Children.push_back(*I); - DomTreeNode *NewNode = DT.addNewBlock(New, Old); + DomTreeNode *NewNode = DT->addNewBlock(New, Old); for (std::vector<DomTreeNode *>::iterator I = Children.begin(), E = Children.end(); I != E; ++I) - DT.changeImmediateDominator(*I, NewNode); + DT->changeImmediateDominator(*I, NewNode); } - } return New; } @@ -318,45 +311,46 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) { /// analysis information. static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB, ArrayRef<BasicBlock *> Preds, - Pass *P, bool &HasLoopExit) { - if (!P) return; + DominatorTree *DT, LoopInfo *LI, + bool PreserveLCSSA, bool &HasLoopExit) { + // Update dominator tree if available. + if (DT) + DT->splitBlock(NewBB); + + // The rest of the logic is only relevant for updating the loop structures. + if (!LI) + return; - LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>(); - Loop *L = LI ? LI->getLoopFor(OldBB) : nullptr; + Loop *L = LI->getLoopFor(OldBB); // If we need to preserve loop analyses, collect some information about how // this split will affect loops. bool IsLoopEntry = !!L; bool SplitMakesNewLoopHeader = false; - if (LI) { - bool PreserveLCSSA = P->mustPreserveAnalysisID(LCSSAID); - for (ArrayRef<BasicBlock*>::iterator - i = Preds.begin(), e = Preds.end(); i != e; ++i) { - BasicBlock *Pred = *i; - - // If we need to preserve LCSSA, determine if any of the preds is a loop - // exit. - if (PreserveLCSSA) - if (Loop *PL = LI->getLoopFor(Pred)) - if (!PL->contains(OldBB)) - HasLoopExit = true; - - // If we need to preserve LoopInfo, note whether any of the preds crosses - // an interesting loop boundary. - if (!L) continue; - if (L->contains(Pred)) - IsLoopEntry = false; - else - SplitMakesNewLoopHeader = true; - } + for (ArrayRef<BasicBlock *>::iterator i = Preds.begin(), e = Preds.end(); + i != e; ++i) { + BasicBlock *Pred = *i; + + // If we need to preserve LCSSA, determine if any of the preds is a loop + // exit. + if (PreserveLCSSA) + if (Loop *PL = LI->getLoopFor(Pred)) + if (!PL->contains(OldBB)) + HasLoopExit = true; + + // If we need to preserve LoopInfo, note whether any of the preds crosses + // an interesting loop boundary. + if (!L) + continue; + if (L->contains(Pred)) + IsLoopEntry = false; + else + SplitMakesNewLoopHeader = true; } - // Update dominator tree if available. - if (DominatorTreeWrapperPass *DTWP = - P->getAnalysisIfAvailable<DominatorTreeWrapperPass>()) - DTWP->getDomTree().splitBlock(NewBB); - - if (!L) return; + // Unless we have a loop for OldBB, nothing else to do here. + if (!L) + return; if (IsLoopEntry) { // Add the new block to the nearest enclosing loop (and not an adjacent @@ -382,9 +376,9 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB, } if (InnermostPredLoop) - InnermostPredLoop->addBasicBlockToLoop(NewBB, LI->getBase()); + InnermostPredLoop->addBasicBlockToLoop(NewBB, *LI); } else { - L->addBasicBlockToLoop(NewBB, LI->getBase()); + L->addBasicBlockToLoop(NewBB, *LI); if (SplitMakesNewLoopHeader) L->moveToHeader(NewBB); } @@ -393,10 +387,9 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB, /// UpdatePHINodes - Update the PHI nodes in OrigBB to include the values coming /// from NewBB. This also updates AliasAnalysis, if available. static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB, - ArrayRef<BasicBlock*> Preds, BranchInst *BI, - Pass *P, bool HasLoopExit) { + ArrayRef<BasicBlock *> Preds, BranchInst *BI, + AliasAnalysis *AA, bool HasLoopExit) { // Otherwise, create a new PHI node in NewBB for each PHI node in OrigBB. - AliasAnalysis *AA = P ? P->getAnalysisIfAvailable<AliasAnalysis>() : nullptr; SmallPtrSet<BasicBlock *, 16> PredSet(Preds.begin(), Preds.end()); for (BasicBlock::iterator I = OrigBB->begin(); isa<PHINode>(I); ) { PHINode *PN = cast<PHINode>(I++); @@ -461,11 +454,15 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB, } } -/// SplitBlockPredecessors - This method transforms BB by introducing a new -/// basic block into the function, and moving some of the predecessors of BB to -/// be predecessors of the new block. The new predecessors are indicated by the -/// Preds array, which has NumPreds elements in it. The new block is given a -/// suffix of 'Suffix'. +/// SplitBlockPredecessors - This method introduces at least one new basic block +/// into the function and moves some of the predecessors of BB to be +/// predecessors of the new block. The new predecessors are indicated by the +/// Preds array. The new block is given a suffix of 'Suffix'. Returns new basic +/// block to which predecessors from Preds are now pointing. +/// +/// If BB is a landingpad block then additional basicblock might be introduced. +/// It will have suffix of 'Suffix'+".split_lp". +/// See SplitLandingPadPredecessors for more details on this case. /// /// This currently updates the LLVM IR, AliasAnalysis, DominatorTree, /// LoopInfo, and LCCSA but no other analyses. In particular, it does not @@ -473,8 +470,21 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB, /// of the edges being split is an exit of a loop with other exits). /// BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, - ArrayRef<BasicBlock*> Preds, - const char *Suffix, Pass *P) { + ArrayRef<BasicBlock *> Preds, + const char *Suffix, AliasAnalysis *AA, + DominatorTree *DT, LoopInfo *LI, + bool PreserveLCSSA) { + // For the landingpads we need to act a bit differently. + // Delegate this work to the SplitLandingPadPredecessors. + if (BB->isLandingPad()) { + SmallVector<BasicBlock*, 2> NewBBs; + std::string NewName = std::string(Suffix) + ".split-lp"; + + SplitLandingPadPredecessors(BB, Preds, Suffix, NewName.c_str(), + NewBBs, AA, DT, LI, PreserveLCSSA); + return NewBBs[0]; + } + // Create new basic block, insert right before the original block. BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), BB->getName()+Suffix, BB->getParent(), BB); @@ -505,10 +515,11 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, // Update DominatorTree, LoopInfo, and LCCSA analysis information. bool HasLoopExit = false; - UpdateAnalysisInformation(BB, NewBB, Preds, P, HasLoopExit); + UpdateAnalysisInformation(BB, NewBB, Preds, DT, LI, PreserveLCSSA, + HasLoopExit); // Update the PHI nodes in BB with the values coming from NewBB. - UpdatePHINodes(BB, NewBB, Preds, BI, P, HasLoopExit); + UpdatePHINodes(BB, NewBB, Preds, BI, AA, HasLoopExit); return NewBB; } @@ -526,10 +537,11 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, /// exits). /// void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB, - ArrayRef<BasicBlock*> Preds, + ArrayRef<BasicBlock *> Preds, const char *Suffix1, const char *Suffix2, - Pass *P, - SmallVectorImpl<BasicBlock*> &NewBBs) { + SmallVectorImpl<BasicBlock *> &NewBBs, + AliasAnalysis *AA, DominatorTree *DT, + LoopInfo *LI, bool PreserveLCSSA) { assert(OrigBB->isLandingPad() && "Trying to split a non-landing pad!"); // Create a new basic block for OrigBB's predecessors listed in Preds. Insert @@ -552,12 +564,12 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB, Preds[i]->getTerminator()->replaceUsesOfWith(OrigBB, NewBB1); } - // Update DominatorTree, LoopInfo, and LCCSA analysis information. bool HasLoopExit = false; - UpdateAnalysisInformation(OrigBB, NewBB1, Preds, P, HasLoopExit); + UpdateAnalysisInformation(OrigBB, NewBB1, Preds, DT, LI, PreserveLCSSA, + HasLoopExit); // Update the PHI nodes in OrigBB with the values coming from NewBB1. - UpdatePHINodes(OrigBB, NewBB1, Preds, BI1, P, HasLoopExit); + UpdatePHINodes(OrigBB, NewBB1, Preds, BI1, AA, HasLoopExit); // Move the remaining edges from OrigBB to point to NewBB2. SmallVector<BasicBlock*, 8> NewBB2Preds; @@ -589,10 +601,11 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB, // Update DominatorTree, LoopInfo, and LCCSA analysis information. HasLoopExit = false; - UpdateAnalysisInformation(OrigBB, NewBB2, NewBB2Preds, P, HasLoopExit); + UpdateAnalysisInformation(OrigBB, NewBB2, NewBB2Preds, DT, LI, + PreserveLCSSA, HasLoopExit); // Update the PHI nodes in OrigBB with the values coming from NewBB2. - UpdatePHINodes(OrigBB, NewBB2, NewBB2Preds, BI2, P, HasLoopExit); + UpdatePHINodes(OrigBB, NewBB2, NewBB2Preds, BI2, AA, HasLoopExit); } LandingPadInst *LPad = OrigBB->getLandingPadInst(); diff --git a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp index eda22cf..7e83c9e 100644 --- a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -18,6 +18,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/CFG.h" @@ -41,14 +42,19 @@ namespace { } bool runOnFunction(Function &F) override { - unsigned N = SplitAllCriticalEdges(F, this); + auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); + auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; + auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>(); + auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; + unsigned N = + SplitAllCriticalEdges(F, CriticalEdgeSplittingOptions(DT, LI)); NumBroken += N; return N > 0; } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addPreserved<DominatorTreeWrapperPass>(); - AU.addPreserved<LoopInfo>(); + AU.addPreserved<LoopInfoWrapperPass>(); // No loop canonicalization guarantees are broken by this pass. AU.addPreservedID(LoopSimplifyID); @@ -125,10 +131,9 @@ static void createPHIsForSplitLoopExit(ArrayRef<BasicBlock *> Preds, /// to. /// BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, - Pass *P, bool MergeIdenticalEdges, - bool DontDeleteUselessPhis, - bool SplitLandingPads) { - if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return nullptr; + const CriticalEdgeSplittingOptions &Options) { + if (!isCriticalEdge(TI, SuccNum, Options.MergeIdenticalEdges)) + return nullptr; assert(!isa<IndirectBrInst>(TI) && "Cannot split critical edge from IndirectBrInst"); @@ -179,29 +184,22 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, // If there are any other edges from TIBB to DestBB, update those to go // through the split block, making those edges non-critical as well (and // reducing the number of phi entries in the DestBB if relevant). - if (MergeIdenticalEdges) { + if (Options.MergeIdenticalEdges) { for (unsigned i = SuccNum+1, e = TI->getNumSuccessors(); i != e; ++i) { if (TI->getSuccessor(i) != DestBB) continue; // Remove an entry for TIBB from DestBB phi nodes. - DestBB->removePredecessor(TIBB, DontDeleteUselessPhis); + DestBB->removePredecessor(TIBB, Options.DontDeleteUselessPHIs); // We found another edge to DestBB, go to NewBB instead. TI->setSuccessor(i, NewBB); } } - - - // If we don't have a pass object, we can't update anything... - if (!P) return NewBB; - - DominatorTreeWrapperPass *DTWP = - P->getAnalysisIfAvailable<DominatorTreeWrapperPass>(); - DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; - LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>(); - // If we have nothing to update, just return. + auto *AA = Options.AA; + auto *DT = Options.DT; + auto *LI = Options.LI; if (!DT && !LI) return NewBB; @@ -268,13 +266,13 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, if (Loop *DestLoop = LI->getLoopFor(DestBB)) { if (TIL == DestLoop) { // Both in the same loop, the NewBB joins loop. - DestLoop->addBasicBlockToLoop(NewBB, LI->getBase()); + DestLoop->addBasicBlockToLoop(NewBB, *LI); } else if (TIL->contains(DestLoop)) { // Edge from an outer loop to an inner loop. Add to the outer loop. - TIL->addBasicBlockToLoop(NewBB, LI->getBase()); + TIL->addBasicBlockToLoop(NewBB, *LI); } else if (DestLoop->contains(TIL)) { // Edge from an inner loop to an outer loop. Add to the outer loop. - DestLoop->addBasicBlockToLoop(NewBB, LI->getBase()); + DestLoop->addBasicBlockToLoop(NewBB, *LI); } else { // Edge from two loops with no containment relation. Because these // are natural loops, we know that the destination block must be the @@ -283,19 +281,20 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, assert(DestLoop->getHeader() == DestBB && "Should not create irreducible loops!"); if (Loop *P = DestLoop->getParentLoop()) - P->addBasicBlockToLoop(NewBB, LI->getBase()); + P->addBasicBlockToLoop(NewBB, *LI); } } + // If TIBB is in a loop and DestBB is outside of that loop, we may need // to update LoopSimplify form and LCSSA form. - if (!TIL->contains(DestBB) && - P->mustPreserveAnalysisID(LoopSimplifyID)) { + if (!TIL->contains(DestBB)) { assert(!TIL->contains(NewBB) && "Split point for loop exit is contained in loop!"); // Update LCSSA form in the newly created exit block. - if (P->mustPreserveAnalysisID(LCSSAID)) + if (Options.PreserveLCSSA) { createPHIsForSplitLoopExit(TIBB, NewBB, DestBB); + } // The only that we can break LoopSimplify form by splitting a critical // edge is if after the split there exists some edge from TIL to DestBB @@ -322,20 +321,12 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, if (!LoopPreds.empty()) { assert(!DestBB->isLandingPad() && "We don't split edges to landing pads!"); - BasicBlock *NewExitBB = - SplitBlockPredecessors(DestBB, LoopPreds, "split", P); - if (P->mustPreserveAnalysisID(LCSSAID)) + BasicBlock *NewExitBB = SplitBlockPredecessors( + DestBB, LoopPreds, "split", AA, DT, LI, Options.PreserveLCSSA); + if (Options.PreserveLCSSA) createPHIsForSplitLoopExit(LoopPreds, NewExitBB, DestBB); } } - // LCSSA form was updated above for the case where LoopSimplify is - // available, which means that all predecessors of loop exit blocks - // are within the loop. Without LoopSimplify form, it would be - // necessary to insert a new phi. - assert((!P->mustPreserveAnalysisID(LCSSAID) || - P->mustPreserveAnalysisID(LoopSimplifyID)) && - "SplitCriticalEdge doesn't know how to update LCCSA form " - "without LoopSimplify!"); } } diff --git a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp index 322485d..8aa7b2a 100644 --- a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp @@ -21,7 +21,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" -#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" using namespace llvm; @@ -33,7 +33,7 @@ Value *llvm::CastToCStr(Value *V, IRBuilder<> &B) { /// EmitStrLen - Emit a call to the strlen function to the builder, for the /// specified pointer. This always returns an integer value of size intptr_t. -Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout *TD, +Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout &DL, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::strlen)) return nullptr; @@ -45,12 +45,9 @@ Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout *TD, AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, AVs); LLVMContext &Context = B.GetInsertBlock()->getContext(); - Constant *StrLen = M->getOrInsertFunction("strlen", - AttributeSet::get(M->getContext(), - AS), - TD->getIntPtrType(Context), - B.getInt8PtrTy(), - nullptr); + Constant *StrLen = M->getOrInsertFunction( + "strlen", AttributeSet::get(M->getContext(), AS), + DL.getIntPtrType(Context), B.getInt8PtrTy(), nullptr); CallInst *CI = B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen"); if (const Function *F = dyn_cast<Function>(StrLen->stripPointerCasts())) CI->setCallingConv(F->getCallingConv()); @@ -62,7 +59,7 @@ Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout *TD, /// specified pointer. Ptr is required to be some pointer type, MaxLen must /// be of size_t type, and the return value has 'intptr_t' type. Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B, - const DataLayout *TD, const TargetLibraryInfo *TLI) { + const DataLayout &DL, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::strnlen)) return nullptr; @@ -73,14 +70,11 @@ Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B, AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, AVs); LLVMContext &Context = B.GetInsertBlock()->getContext(); - Constant *StrNLen = M->getOrInsertFunction("strnlen", - AttributeSet::get(M->getContext(), - AS), - TD->getIntPtrType(Context), - B.getInt8PtrTy(), - TD->getIntPtrType(Context), - nullptr); - CallInst *CI = B.CreateCall2(StrNLen, CastToCStr(Ptr, B), MaxLen, "strnlen"); + Constant *StrNLen = + M->getOrInsertFunction("strnlen", AttributeSet::get(M->getContext(), AS), + DL.getIntPtrType(Context), B.getInt8PtrTy(), + DL.getIntPtrType(Context), nullptr); + CallInst *CI = B.CreateCall(StrNLen, {CastToCStr(Ptr, B), MaxLen}, "strnlen"); if (const Function *F = dyn_cast<Function>(StrNLen->stripPointerCasts())) CI->setCallingConv(F->getCallingConv()); @@ -91,7 +85,7 @@ Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B, /// specified pointer and character. Ptr is required to be some pointer type, /// and the return value has 'i8*' type. Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B, - const DataLayout *TD, const TargetLibraryInfo *TLI) { + const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::strchr)) return nullptr; @@ -106,17 +100,16 @@ Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B, AttributeSet::get(M->getContext(), AS), I8Ptr, I8Ptr, I32Ty, nullptr); - CallInst *CI = B.CreateCall2(StrChr, CastToCStr(Ptr, B), - ConstantInt::get(I32Ty, C), "strchr"); + CallInst *CI = B.CreateCall( + StrChr, {CastToCStr(Ptr, B), ConstantInt::get(I32Ty, C)}, "strchr"); if (const Function *F = dyn_cast<Function>(StrChr->stripPointerCasts())) CI->setCallingConv(F->getCallingConv()); return CI; } /// EmitStrNCmp - Emit a call to the strncmp function to the builder. -Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, - IRBuilder<> &B, const DataLayout *TD, - const TargetLibraryInfo *TLI) { +Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B, + const DataLayout &DL, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::strncmp)) return nullptr; @@ -128,15 +121,11 @@ Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, AS[2] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, AVs); LLVMContext &Context = B.GetInsertBlock()->getContext(); - Value *StrNCmp = M->getOrInsertFunction("strncmp", - AttributeSet::get(M->getContext(), - AS), - B.getInt32Ty(), - B.getInt8PtrTy(), - B.getInt8PtrTy(), - TD->getIntPtrType(Context), nullptr); - CallInst *CI = B.CreateCall3(StrNCmp, CastToCStr(Ptr1, B), - CastToCStr(Ptr2, B), Len, "strncmp"); + Value *StrNCmp = M->getOrInsertFunction( + "strncmp", AttributeSet::get(M->getContext(), AS), B.getInt32Ty(), + B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context), nullptr); + CallInst *CI = B.CreateCall( + StrNCmp, {CastToCStr(Ptr1, B), CastToCStr(Ptr2, B), Len}, "strncmp"); if (const Function *F = dyn_cast<Function>(StrNCmp->stripPointerCasts())) CI->setCallingConv(F->getCallingConv()); @@ -147,8 +136,7 @@ Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, /// EmitStrCpy - Emit a call to the strcpy function to the builder, for the /// specified pointer arguments. Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B, - const DataLayout *TD, const TargetLibraryInfo *TLI, - StringRef Name) { + const TargetLibraryInfo *TLI, StringRef Name) { if (!TLI->has(LibFunc::strcpy)) return nullptr; @@ -161,8 +149,8 @@ Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B, Value *StrCpy = M->getOrInsertFunction(Name, AttributeSet::get(M->getContext(), AS), I8Ptr, I8Ptr, I8Ptr, nullptr); - CallInst *CI = B.CreateCall2(StrCpy, CastToCStr(Dst, B), CastToCStr(Src, B), - Name); + CallInst *CI = + B.CreateCall(StrCpy, {CastToCStr(Dst, B), CastToCStr(Src, B)}, Name); if (const Function *F = dyn_cast<Function>(StrCpy->stripPointerCasts())) CI->setCallingConv(F->getCallingConv()); return CI; @@ -170,8 +158,7 @@ Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B, /// EmitStrNCpy - Emit a call to the strncpy function to the builder, for the /// specified pointer arguments. -Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len, - IRBuilder<> &B, const DataLayout *TD, +Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len, IRBuilder<> &B, const TargetLibraryInfo *TLI, StringRef Name) { if (!TLI->has(LibFunc::strncpy)) return nullptr; @@ -187,8 +174,8 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len, AS), I8Ptr, I8Ptr, I8Ptr, Len->getType(), nullptr); - CallInst *CI = B.CreateCall3(StrNCpy, CastToCStr(Dst, B), CastToCStr(Src, B), - Len, "strncpy"); + CallInst *CI = B.CreateCall( + StrNCpy, {CastToCStr(Dst, B), CastToCStr(Src, B), Len}, "strncpy"); if (const Function *F = dyn_cast<Function>(StrNCpy->stripPointerCasts())) CI->setCallingConv(F->getCallingConv()); return CI; @@ -198,7 +185,7 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len, /// This expects that the Len and ObjSize have type 'intptr_t' and Dst/Src /// are pointers. Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize, - IRBuilder<> &B, const DataLayout *TD, + IRBuilder<> &B, const DataLayout &DL, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::memcpy_chk)) return nullptr; @@ -208,16 +195,13 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize, AS = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, Attribute::NoUnwind); LLVMContext &Context = B.GetInsertBlock()->getContext(); - Value *MemCpy = M->getOrInsertFunction("__memcpy_chk", - AttributeSet::get(M->getContext(), AS), - B.getInt8PtrTy(), - B.getInt8PtrTy(), - B.getInt8PtrTy(), - TD->getIntPtrType(Context), - TD->getIntPtrType(Context), nullptr); + Value *MemCpy = M->getOrInsertFunction( + "__memcpy_chk", AttributeSet::get(M->getContext(), AS), B.getInt8PtrTy(), + B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context), + DL.getIntPtrType(Context), nullptr); Dst = CastToCStr(Dst, B); Src = CastToCStr(Src, B); - CallInst *CI = B.CreateCall4(MemCpy, Dst, Src, Len, ObjSize); + CallInst *CI = B.CreateCall(MemCpy, {Dst, Src, Len, ObjSize}); if (const Function *F = dyn_cast<Function>(MemCpy->stripPointerCasts())) CI->setCallingConv(F->getCallingConv()); return CI; @@ -225,9 +209,8 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize, /// EmitMemChr - Emit a call to the memchr function. This assumes that Ptr is /// a pointer, Val is an i32 value, and Len is an 'intptr_t' value. -Value *llvm::EmitMemChr(Value *Ptr, Value *Val, - Value *Len, IRBuilder<> &B, const DataLayout *TD, - const TargetLibraryInfo *TLI) { +Value *llvm::EmitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilder<> &B, + const DataLayout &DL, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::memchr)) return nullptr; @@ -236,14 +219,10 @@ Value *llvm::EmitMemChr(Value *Ptr, Value *Val, Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind }; AS = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, AVs); LLVMContext &Context = B.GetInsertBlock()->getContext(); - Value *MemChr = M->getOrInsertFunction("memchr", - AttributeSet::get(M->getContext(), AS), - B.getInt8PtrTy(), - B.getInt8PtrTy(), - B.getInt32Ty(), - TD->getIntPtrType(Context), - nullptr); - CallInst *CI = B.CreateCall3(MemChr, CastToCStr(Ptr, B), Val, Len, "memchr"); + Value *MemChr = M->getOrInsertFunction( + "memchr", AttributeSet::get(M->getContext(), AS), B.getInt8PtrTy(), + B.getInt8PtrTy(), B.getInt32Ty(), DL.getIntPtrType(Context), nullptr); + CallInst *CI = B.CreateCall(MemChr, {CastToCStr(Ptr, B), Val, Len}, "memchr"); if (const Function *F = dyn_cast<Function>(MemChr->stripPointerCasts())) CI->setCallingConv(F->getCallingConv()); @@ -252,9 +231,8 @@ Value *llvm::EmitMemChr(Value *Ptr, Value *Val, } /// EmitMemCmp - Emit a call to the memcmp function. -Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2, - Value *Len, IRBuilder<> &B, const DataLayout *TD, - const TargetLibraryInfo *TLI) { +Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B, + const DataLayout &DL, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::memcmp)) return nullptr; @@ -266,14 +244,11 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2, AS[2] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, AVs); LLVMContext &Context = B.GetInsertBlock()->getContext(); - Value *MemCmp = M->getOrInsertFunction("memcmp", - AttributeSet::get(M->getContext(), AS), - B.getInt32Ty(), - B.getInt8PtrTy(), - B.getInt8PtrTy(), - TD->getIntPtrType(Context), nullptr); - CallInst *CI = B.CreateCall3(MemCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B), - Len, "memcmp"); + Value *MemCmp = M->getOrInsertFunction( + "memcmp", AttributeSet::get(M->getContext(), AS), B.getInt32Ty(), + B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context), nullptr); + CallInst *CI = B.CreateCall( + MemCmp, {CastToCStr(Ptr1, B), CastToCStr(Ptr2, B), Len}, "memcmp"); if (const Function *F = dyn_cast<Function>(MemCmp->stripPointerCasts())) CI->setCallingConv(F->getCallingConv()); @@ -329,7 +304,7 @@ Value *llvm::EmitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name, Module *M = B.GetInsertBlock()->getParent()->getParent(); Value *Callee = M->getOrInsertFunction(Name, Op1->getType(), Op1->getType(), Op2->getType(), nullptr); - CallInst *CI = B.CreateCall2(Callee, Op1, Op2, Name); + CallInst *CI = B.CreateCall(Callee, {Op1, Op2}, Name); CI->setAttributes(Attrs); if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts())) CI->setCallingConv(F->getCallingConv()); @@ -339,7 +314,7 @@ Value *llvm::EmitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name, /// EmitPutChar - Emit a call to the putchar function. This assumes that Char /// is an integer. -Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const DataLayout *TD, +Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::putchar)) return nullptr; @@ -361,7 +336,7 @@ Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const DataLayout *TD, /// EmitPutS - Emit a call to the puts function. This assumes that Str is /// some pointer. -Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const DataLayout *TD, +Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::puts)) return nullptr; @@ -386,7 +361,7 @@ Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const DataLayout *TD, /// EmitFPutC - Emit a call to the fputc function. This assumes that Char is /// an integer and File is a pointer to FILE. Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B, - const DataLayout *TD, const TargetLibraryInfo *TLI) { + const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::fputc)) return nullptr; @@ -409,7 +384,7 @@ Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B, File->getType(), nullptr); Char = B.CreateIntCast(Char, B.getInt32Ty(), /*isSigned*/true, "chari"); - CallInst *CI = B.CreateCall2(F, Char, File, "fputc"); + CallInst *CI = B.CreateCall(F, {Char, File}, "fputc"); if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) CI->setCallingConv(Fn->getCallingConv()); @@ -419,7 +394,7 @@ Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B, /// EmitFPutS - Emit a call to the puts function. Str is required to be a /// pointer and File is a pointer to FILE. Value *llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B, - const DataLayout *TD, const TargetLibraryInfo *TLI) { + const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::fputs)) return nullptr; @@ -441,7 +416,7 @@ Value *llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B, F = M->getOrInsertFunction(FPutsName, B.getInt32Ty(), B.getInt8PtrTy(), File->getType(), nullptr); - CallInst *CI = B.CreateCall2(F, CastToCStr(Str, B), File, "fputs"); + CallInst *CI = B.CreateCall(F, {CastToCStr(Str, B), File}, "fputs"); if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) CI->setCallingConv(Fn->getCallingConv()); @@ -450,9 +425,8 @@ Value *llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B, /// EmitFWrite - Emit a call to the fwrite function. This assumes that Ptr is /// a pointer, Size is an 'intptr_t', and File is a pointer to FILE. -Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File, - IRBuilder<> &B, const DataLayout *TD, - const TargetLibraryInfo *TLI) { +Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B, + const DataLayout &DL, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::fwrite)) return nullptr; @@ -466,21 +440,18 @@ Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File, StringRef FWriteName = TLI->getName(LibFunc::fwrite); Constant *F; if (File->getType()->isPointerTy()) - F = M->getOrInsertFunction(FWriteName, - AttributeSet::get(M->getContext(), AS), - TD->getIntPtrType(Context), - B.getInt8PtrTy(), - TD->getIntPtrType(Context), - TD->getIntPtrType(Context), - File->getType(), nullptr); + F = M->getOrInsertFunction( + FWriteName, AttributeSet::get(M->getContext(), AS), + DL.getIntPtrType(Context), B.getInt8PtrTy(), DL.getIntPtrType(Context), + DL.getIntPtrType(Context), File->getType(), nullptr); else - F = M->getOrInsertFunction(FWriteName, TD->getIntPtrType(Context), - B.getInt8PtrTy(), - TD->getIntPtrType(Context), - TD->getIntPtrType(Context), - File->getType(), nullptr); - CallInst *CI = B.CreateCall4(F, CastToCStr(Ptr, B), Size, - ConstantInt::get(TD->getIntPtrType(Context), 1), File); + F = M->getOrInsertFunction(FWriteName, DL.getIntPtrType(Context), + B.getInt8PtrTy(), DL.getIntPtrType(Context), + DL.getIntPtrType(Context), File->getType(), + nullptr); + CallInst *CI = + B.CreateCall(F, {CastToCStr(Ptr, B), Size, + ConstantInt::get(DL.getIntPtrType(Context), 1), File}); if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) CI->setCallingConv(Fn->getCallingConv()); diff --git a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp index 96a763f..4f8d1df 100644 --- a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -34,7 +34,7 @@ #include <map> using namespace llvm; -// CloneBasicBlock - See comments in Cloning.h +/// See comments in Cloning.h. BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix, Function *F, @@ -154,23 +154,26 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, TypeMapper, Materializer); } -// Find the MDNode which corresponds to the DISubprogram data that described F. -static MDNode* FindSubprogram(const Function *F, DebugInfoFinder &Finder) { - for (DISubprogram Subprogram : Finder.subprograms()) { - if (Subprogram.describes(F)) return Subprogram; +// Find the MDNode which corresponds to the subprogram data that described F. +static DISubprogram *FindSubprogram(const Function *F, + DebugInfoFinder &Finder) { + for (DISubprogram *Subprogram : Finder.subprograms()) { + if (Subprogram->describes(F)) + return Subprogram; } return nullptr; } // Add an operand to an existing MDNode. The new operand will be added at the // back of the operand list. -static void AddOperand(DICompileUnit CU, DIArray SPs, Metadata *NewSP) { +static void AddOperand(DICompileUnit *CU, DISubprogramArray SPs, + Metadata *NewSP) { SmallVector<Metadata *, 16> NewSPs; - NewSPs.reserve(SPs->getNumOperands() + 1); - for (unsigned I = 0, E = SPs->getNumOperands(); I != E; ++I) - NewSPs.push_back(SPs->getOperand(I)); + NewSPs.reserve(SPs.size() + 1); + for (auto *SP : SPs) + NewSPs.push_back(SP); NewSPs.push_back(NewSP); - CU.replaceSubprograms(DIArray(MDNode::get(CU->getContext(), NewSPs))); + CU->replaceSubprograms(MDTuple::get(CU->getContext(), NewSPs)); } // Clone the module-level debug info associated with OldFunc. The cloned data @@ -180,21 +183,21 @@ static void CloneDebugInfoMetadata(Function *NewFunc, const Function *OldFunc, DebugInfoFinder Finder; Finder.processModule(*OldFunc->getParent()); - const MDNode *OldSubprogramMDNode = FindSubprogram(OldFunc, Finder); + const DISubprogram *OldSubprogramMDNode = FindSubprogram(OldFunc, Finder); if (!OldSubprogramMDNode) return; // Ensure that OldFunc appears in the map. // (if it's already there it must point to NewFunc anyway) VMap[OldFunc] = NewFunc; - DISubprogram NewSubprogram(MapMetadata(OldSubprogramMDNode, VMap)); - - for (DICompileUnit CU : Finder.compile_units()) { - DIArray Subprograms(CU.getSubprograms()); + auto *NewSubprogram = + cast<DISubprogram>(MapMetadata(OldSubprogramMDNode, VMap)); + for (auto *CU : Finder.compile_units()) { + auto Subprograms = CU->getSubprograms(); // If the compile unit's function list contains the old function, it should // also contain the new one. - for (unsigned i = 0; i < Subprograms.getNumElements(); i++) { - if ((MDNode*)Subprograms.getElement(i) == OldSubprogramMDNode) { + for (auto *SP : Subprograms) { + if (SP == OldSubprogramMDNode) { AddOperand(CU, Subprograms, NewSubprogram); break; } @@ -202,7 +205,7 @@ static void CloneDebugInfoMetadata(Function *NewFunc, const Function *OldFunc, } } -/// CloneFunction - Return a copy of the specified function, but without +/// Return a copy of the specified function, but without /// embedding the function into another module. Also, any references specified /// in the VMap are changed to refer to their mapped value instead of the /// original one. If any of the arguments to the function are in the VMap, @@ -250,8 +253,7 @@ Function *llvm::CloneFunction(const Function *F, ValueToValueMapTy &VMap, namespace { - /// PruningFunctionCloner - This class is a private class used to implement - /// the CloneAndPruneFunctionInto method. + /// This is a private class used to implement CloneAndPruneFunctionInto. struct PruningFunctionCloner { Function *NewFunc; const Function *OldFunc; @@ -259,29 +261,40 @@ namespace { bool ModuleLevelChanges; const char *NameSuffix; ClonedCodeInfo *CodeInfo; - const DataLayout *DL; + CloningDirector *Director; + ValueMapTypeRemapper *TypeMapper; + ValueMaterializer *Materializer; + public: PruningFunctionCloner(Function *newFunc, const Function *oldFunc, - ValueToValueMapTy &valueMap, - bool moduleLevelChanges, - const char *nameSuffix, - ClonedCodeInfo *codeInfo, - const DataLayout *DL) - : NewFunc(newFunc), OldFunc(oldFunc), - VMap(valueMap), ModuleLevelChanges(moduleLevelChanges), - NameSuffix(nameSuffix), CodeInfo(codeInfo), DL(DL) { + ValueToValueMapTy &valueMap, bool moduleLevelChanges, + const char *nameSuffix, ClonedCodeInfo *codeInfo, + CloningDirector *Director) + : NewFunc(newFunc), OldFunc(oldFunc), VMap(valueMap), + ModuleLevelChanges(moduleLevelChanges), NameSuffix(nameSuffix), + CodeInfo(codeInfo), Director(Director) { + // These are optional components. The Director may return null. + if (Director) { + TypeMapper = Director->getTypeRemapper(); + Materializer = Director->getValueMaterializer(); + } else { + TypeMapper = nullptr; + Materializer = nullptr; + } } - /// CloneBlock - The specified block is found to be reachable, clone it and + /// The specified block is found to be reachable, clone it and /// anything that it can reach. - void CloneBlock(const BasicBlock *BB, + void CloneBlock(const BasicBlock *BB, + BasicBlock::const_iterator StartingInst, std::vector<const BasicBlock*> &ToClone); }; } -/// CloneBlock - The specified block is found to be reachable, clone it and +/// The specified block is found to be reachable, clone it and /// anything that it can reach. void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, + BasicBlock::const_iterator StartingInst, std::vector<const BasicBlock*> &ToClone){ WeakVH &BBEntry = VMap[BB]; @@ -307,26 +320,45 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, const_cast<BasicBlock*>(BB)); VMap[OldBBAddr] = BlockAddress::get(NewFunc, NewBB); } - bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false; - + // Loop over all instructions, and copy them over, DCE'ing as we go. This // loop doesn't include the terminator. - for (BasicBlock::const_iterator II = BB->begin(), IE = --BB->end(); + for (BasicBlock::const_iterator II = StartingInst, IE = --BB->end(); II != IE; ++II) { + // If the "Director" remaps the instruction, don't clone it. + if (Director) { + CloningDirector::CloningAction Action + = Director->handleInstruction(VMap, II, NewBB); + // If the cloning director says stop, we want to stop everything, not + // just break out of the loop (which would cause the terminator to be + // cloned). The cloning director is responsible for inserting a proper + // terminator into the new basic block in this case. + if (Action == CloningDirector::StopCloningBB) + return; + // If the cloning director says skip, continue to the next instruction. + // In this case, the cloning director is responsible for mapping the + // skipped instruction to some value that is defined in the new + // basic block. + if (Action == CloningDirector::SkipInstruction) + continue; + } + Instruction *NewInst = II->clone(); // Eagerly remap operands to the newly cloned instruction, except for PHI // nodes for which we defer processing until we update the CFG. if (!isa<PHINode>(NewInst)) { RemapInstruction(NewInst, VMap, - ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges); + ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, + TypeMapper, Materializer); // If we can simplify this instruction to some other value, simply add // a mapping to that value rather than inserting a new instruction into // the basic block. - if (Value *V = SimplifyInstruction(NewInst, DL)) { + if (Value *V = + SimplifyInstruction(NewInst, BB->getModule()->getDataLayout())) { // On the off-chance that this simplifies to an instruction in the old // function, map it back into the new function. if (Value *MappedV = VMap.lookup(V)) @@ -354,6 +386,26 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, // Finally, clone over the terminator. const TerminatorInst *OldTI = BB->getTerminator(); bool TerminatorDone = false; + if (Director) { + CloningDirector::CloningAction Action + = Director->handleInstruction(VMap, OldTI, NewBB); + // If the cloning director says stop, we want to stop everything, not + // just break out of the loop (which would cause the terminator to be + // cloned). The cloning director is responsible for inserting a proper + // terminator into the new basic block in this case. + if (Action == CloningDirector::StopCloningBB) + return; + if (Action == CloningDirector::CloneSuccessors) { + // If the director says to skip with a terminate instruction, we still + // need to clone this block's successors. + const TerminatorInst *TI = NewBB->getTerminator(); + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) + ToClone.push_back(TI->getSuccessor(i)); + return; + } + assert(Action != CloningDirector::SkipInstruction && + "SkipInstruction is not valid for terminators."); + } if (const BranchInst *BI = dyn_cast<BranchInst>(OldTI)) { if (BI->isConditional()) { // If the condition was a known constant in the callee... @@ -409,39 +461,53 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, } } -/// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto, -/// except that it does some simple constant prop and DCE on the fly. The -/// effect of this is to copy significantly less code in cases where (for -/// example) a function call with constant arguments is inlined, and those -/// constant arguments cause a significant amount of code in the callee to be -/// dead. Since this doesn't produce an exact copy of the input, it can't be -/// used for things like CloneFunction or CloneModule. -void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, +/// This works like CloneAndPruneFunctionInto, except that it does not clone the +/// entire function. Instead it starts at an instruction provided by the caller +/// and copies (and prunes) only the code reachable from that instruction. +void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, + const Instruction *StartingInst, ValueToValueMapTy &VMap, bool ModuleLevelChanges, - SmallVectorImpl<ReturnInst*> &Returns, + SmallVectorImpl<ReturnInst *> &Returns, const char *NameSuffix, ClonedCodeInfo *CodeInfo, - const DataLayout *DL, - Instruction *TheCall) { + CloningDirector *Director) { assert(NameSuffix && "NameSuffix cannot be null!"); - + + ValueMapTypeRemapper *TypeMapper = nullptr; + ValueMaterializer *Materializer = nullptr; + + if (Director) { + TypeMapper = Director->getTypeRemapper(); + Materializer = Director->getValueMaterializer(); + } + #ifndef NDEBUG - for (Function::const_arg_iterator II = OldFunc->arg_begin(), - E = OldFunc->arg_end(); II != E; ++II) - assert(VMap.count(II) && "No mapping from source argument specified!"); + // If the cloning starts at the begining of the function, verify that + // the function arguments are mapped. + if (!StartingInst) + for (Function::const_arg_iterator II = OldFunc->arg_begin(), + E = OldFunc->arg_end(); II != E; ++II) + assert(VMap.count(II) && "No mapping from source argument specified!"); #endif PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges, - NameSuffix, CodeInfo, DL); + NameSuffix, CodeInfo, Director); + const BasicBlock *StartingBB; + if (StartingInst) + StartingBB = StartingInst->getParent(); + else { + StartingBB = &OldFunc->getEntryBlock(); + StartingInst = StartingBB->begin(); + } // Clone the entry block, and anything recursively reachable from it. std::vector<const BasicBlock*> CloneWorklist; - CloneWorklist.push_back(&OldFunc->getEntryBlock()); + PFC.CloneBlock(StartingBB, StartingInst, CloneWorklist); while (!CloneWorklist.empty()) { const BasicBlock *BB = CloneWorklist.back(); CloneWorklist.pop_back(); - PFC.CloneBlock(BB, CloneWorklist); + PFC.CloneBlock(BB, BB->begin(), CloneWorklist); } // Loop over all of the basic blocks in the old function. If the block was @@ -461,16 +527,24 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, // Handle PHI nodes specially, as we have to remove references to dead // blocks. - for (BasicBlock::const_iterator I = BI->begin(), E = BI->end(); I != E; ++I) - if (const PHINode *PN = dyn_cast<PHINode>(I)) - PHIToResolve.push_back(PN); - else + for (BasicBlock::const_iterator I = BI->begin(), E = BI->end(); I != E; ++I) { + // PHI nodes may have been remapped to non-PHI nodes by the caller or + // during the cloning process. + if (const PHINode *PN = dyn_cast<PHINode>(I)) { + if (isa<PHINode>(VMap[PN])) + PHIToResolve.push_back(PN); + else + break; + } else { break; + } + } // Finally, remap the terminator instructions, as those can't be remapped // until all BBs are mapped. RemapInstruction(NewBB->getTerminator(), VMap, - ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges); + ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, + TypeMapper, Materializer); } // Defer PHI resolution until rest of function is resolved, PHI resolution @@ -563,13 +637,13 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, // node). for (unsigned Idx = 0, Size = PHIToResolve.size(); Idx != Size; ++Idx) if (PHINode *PN = dyn_cast<PHINode>(VMap[PHIToResolve[Idx]])) - recursivelySimplifyInstruction(PN, DL); + recursivelySimplifyInstruction(PN); // Now that the inlined function body has been fully constructed, go through - // and zap unconditional fall-through branches. This happen all the time when + // and zap unconditional fall-through branches. This happens all the time when // specializing code: code specialization turns conditional branches into // uncond branches, and this code folds them. - Function::iterator Begin = cast<BasicBlock>(VMap[&OldFunc->getEntryBlock()]); + Function::iterator Begin = cast<BasicBlock>(VMap[StartingBB]); Function::iterator I = Begin; while (I != NewFunc->end()) { // Check if this block has become dead during inlining or other @@ -617,12 +691,32 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, // Do not increment I, iteratively merge all things this block branches to. } - // Make a final pass over the basic blocks from theh old function to gather + // Make a final pass over the basic blocks from the old function to gather // any return instructions which survived folding. We have to do this here // because we can iteratively remove and merge returns above. - for (Function::iterator I = cast<BasicBlock>(VMap[&OldFunc->getEntryBlock()]), + for (Function::iterator I = cast<BasicBlock>(VMap[StartingBB]), E = NewFunc->end(); I != E; ++I) if (ReturnInst *RI = dyn_cast<ReturnInst>(I->getTerminator())) Returns.push_back(RI); } + + +/// This works exactly like CloneFunctionInto, +/// except that it does some simple constant prop and DCE on the fly. The +/// effect of this is to copy significantly less code in cases where (for +/// example) a function call with constant arguments is inlined, and those +/// constant arguments cause a significant amount of code in the callee to be +/// dead. Since this doesn't produce an exact copy of the input, it can't be +/// used for things like CloneFunction or CloneModule. +void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, + ValueToValueMapTy &VMap, + bool ModuleLevelChanges, + SmallVectorImpl<ReturnInst*> &Returns, + const char *NameSuffix, + ClonedCodeInfo *CodeInfo, + Instruction *TheCall) { + CloneAndPruneIntoFromInst(NewFunc, OldFunc, OldFunc->front().begin(), VMap, + ModuleLevelChanges, Returns, NameSuffix, CodeInfo, + nullptr); +} diff --git a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp index fae9ff5..2693322 100644 --- a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp +++ b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp @@ -69,9 +69,7 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) { for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end(); I != E; ++I) { auto *PTy = cast<PointerType>(I->getType()); - auto *GA = - GlobalAlias::create(PTy->getElementType(), PTy->getAddressSpace(), - I->getLinkage(), I->getName(), New); + auto *GA = GlobalAlias::create(PTy, I->getLinkage(), I->getName(), New); GA->copyAttributesFrom(I); VMap[I] = GA; } diff --git a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp index e70a7d6..ab89b41 100644 --- a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -332,11 +332,11 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, DEBUG(dbgs() << **i << ", "); DEBUG(dbgs() << ")\n"); + StructType *StructTy; if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { - PointerType *StructPtr = - PointerType::getUnqual(StructType::get(M->getContext(), paramTy)); + StructTy = StructType::get(M->getContext(), paramTy); paramTy.clear(); - paramTy.push_back(StructPtr); + paramTy.push_back(PointerType::getUnqual(StructTy)); } FunctionType *funcType = FunctionType::get(RetTy, paramTy, false); @@ -364,8 +364,8 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext())); Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i); TerminatorInst *TI = newFunction->begin()->getTerminator(); - GetElementPtrInst *GEP = - GetElementPtrInst::Create(AI, Idx, "gep_" + inputs[i]->getName(), TI); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + StructTy, AI, Idx, "gep_" + inputs[i]->getName(), TI); RewriteVal = new LoadInst(GEP, "loadgep_" + inputs[i]->getName(), TI); } else RewriteVal = AI++; @@ -447,6 +447,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, } } + StructType *StructArgTy = nullptr; AllocaInst *Struct = nullptr; if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { std::vector<Type*> ArgTypes; @@ -455,7 +456,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, ArgTypes.push_back((*v)->getType()); // Allocate a struct at the beginning of this function - Type *StructArgTy = StructType::get(newFunction->getContext(), ArgTypes); + StructArgTy = StructType::get(newFunction->getContext(), ArgTypes); Struct = new AllocaInst(StructArgTy, nullptr, "structArg", codeReplacer->getParent()->begin()->begin()); @@ -465,9 +466,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, Value *Idx[2]; Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i); - GetElementPtrInst *GEP = - GetElementPtrInst::Create(Struct, Idx, - "gep_" + StructValues[i]->getName()); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + StructArgTy, Struct, Idx, "gep_" + StructValues[i]->getName()); codeReplacer->getInstList().push_back(GEP); StoreInst *SI = new StoreInst(StructValues[i], GEP); codeReplacer->getInstList().push_back(SI); @@ -491,9 +491,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, Value *Idx[2]; Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); - GetElementPtrInst *GEP - = GetElementPtrInst::Create(Struct, Idx, - "gep_reload_" + outputs[i]->getName()); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + StructArgTy, Struct, Idx, "gep_reload_" + outputs[i]->getName()); codeReplacer->getInstList().push_back(GEP); Output = GEP; } else { @@ -606,10 +605,9 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut+out); - GetElementPtrInst *GEP = - GetElementPtrInst::Create(OAI, Idx, - "gep_" + outputs[out]->getName(), - NTRet); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + StructArgTy, OAI, Idx, "gep_" + outputs[out]->getName(), + NTRet); new StoreInst(outputs[out], GEP, NTRet); } else { new StoreInst(outputs[out], OAI, NTRet); diff --git a/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp b/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp index 26875e8..dc95089 100644 --- a/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp +++ b/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp @@ -11,14 +11,15 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/BitVector.h" #include "llvm/Transforms/Utils/CtorUtils.h" +#include "llvm/ADT/BitVector.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #define DEBUG_TYPE "ctor_utils" diff --git a/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp index 9972b22..003da58 100644 --- a/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp +++ b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp @@ -39,6 +39,19 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads, F->getEntryBlock().begin()); } + // We cannot demote invoke instructions to the stack if their normal edge + // is critical. Therefore, split the critical edge and create a basic block + // into which the store can be inserted. + if (InvokeInst *II = dyn_cast<InvokeInst>(&I)) { + if (!II->getNormalDest()->getSinglePredecessor()) { + unsigned SuccNum = GetSuccessorNumber(II->getParent(), II->getNormalDest()); + assert(isCriticalEdge(II, SuccNum) && "Expected a critical edge!"); + BasicBlock *BB = SplitCriticalEdge(II, SuccNum); + assert(BB && "Unable to split critical edge."); + (void)BB; + } + } + // Change all of the users of the instruction to read from the stack slot. while (!I.use_empty()) { Instruction *U = cast<Instruction>(I.user_back()); @@ -71,7 +84,6 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads, } } - // Insert stores of the computed value into the stack slot. We have to be // careful if I is an invoke instruction, because we can't insert the store // AFTER the terminator instruction. @@ -79,27 +91,13 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads, if (!isa<TerminatorInst>(I)) { InsertPt = &I; ++InsertPt; + for (; isa<PHINode>(InsertPt) || isa<LandingPadInst>(InsertPt); ++InsertPt) + /* empty */; // Don't insert before PHI nodes or landingpad instrs. } else { InvokeInst &II = cast<InvokeInst>(I); - if (II.getNormalDest()->getSinglePredecessor()) - InsertPt = II.getNormalDest()->getFirstInsertionPt(); - else { - // We cannot demote invoke instructions to the stack if their normal edge - // is critical. Therefore, split the critical edge and insert the store - // in the newly created basic block. - unsigned SuccNum = GetSuccessorNumber(I.getParent(), II.getNormalDest()); - TerminatorInst *TI = &cast<TerminatorInst>(I); - assert (isCriticalEdge(TI, SuccNum) && - "Expected a critical edge!"); - BasicBlock *BB = SplitCriticalEdge(TI, SuccNum); - assert (BB && "Unable to split critical edge."); - InsertPt = BB->getFirstInsertionPt(); - } + InsertPt = II.getNormalDest()->getFirstInsertionPt(); } - for (; isa<PHINode>(InsertPt) || isa<LandingPadInst>(InsertPt); ++InsertPt) - /* empty */; // Don't insert before PHI nodes or landingpad instrs. - new StoreInst(&I, Slot, InsertPt); return Slot; } diff --git a/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp b/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp index 52e2d59..44b7d25 100644 --- a/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp +++ b/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp @@ -150,7 +150,7 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS, if (MSI->isVolatile()) return true; GS.StoredType = GlobalStatus::Stored; - } else if (ImmutableCallSite C = I) { + } else if (auto C = ImmutableCallSite(I)) { if (!C.isCallee(&U)) return true; GS.IsLoaded = true; diff --git a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp index 2a86eb5..ddeaff0 100644 --- a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -30,6 +30,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/DIBuilder.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" @@ -88,7 +89,7 @@ namespace { CallerLPad = cast<LandingPadInst>(I); } - /// getOuterResumeDest - The outer unwind destination is the target of + /// The outer unwind destination is the target of /// unwind edges introduced for calls within the inlined function. BasicBlock *getOuterResumeDest() const { return OuterResumeDest; @@ -98,17 +99,16 @@ namespace { LandingPadInst *getLandingPadInst() const { return CallerLPad; } - /// forwardResume - Forward the 'resume' instruction to the caller's landing - /// pad block. When the landing pad block has only one predecessor, this is + /// Forward the 'resume' instruction to the caller's landing pad block. + /// When the landing pad block has only one predecessor, this is /// a simple branch. When there is more than one predecessor, we need to /// split the landing pad block after the landingpad instruction and jump /// to there. void forwardResume(ResumeInst *RI, SmallPtrSetImpl<LandingPadInst*> &InlinedLPads); - /// addIncomingPHIValuesFor - Add incoming-PHI values to the unwind - /// destination block for the given basic block, using the values for the - /// original invoke's source block. + /// Add incoming-PHI values to the unwind destination block for the given + /// basic block, using the values for the original invoke's source block. void addIncomingPHIValuesFor(BasicBlock *BB) const { addIncomingPHIValuesForInto(BB, OuterResumeDest); } @@ -123,7 +123,7 @@ namespace { }; } -/// getInnerResumeDest - Get or create a target for the branch from ResumeInsts. +/// Get or create a target for the branch from ResumeInsts. BasicBlock *InvokeInliningInfo::getInnerResumeDest() { if (InnerResumeDest) return InnerResumeDest; @@ -158,8 +158,8 @@ BasicBlock *InvokeInliningInfo::getInnerResumeDest() { return InnerResumeDest; } -/// forwardResume - Forward the 'resume' instruction to the caller's landing pad -/// block. When the landing pad block has only one predecessor, this is a simple +/// Forward the 'resume' instruction to the caller's landing pad block. +/// When the landing pad block has only one predecessor, this is a simple /// branch. When there is more than one predecessor, we need to split the /// landing pad block after the landingpad instruction and jump to there. void InvokeInliningInfo::forwardResume(ResumeInst *RI, @@ -177,9 +177,9 @@ void InvokeInliningInfo::forwardResume(ResumeInst *RI, RI->eraseFromParent(); } -/// HandleCallsInBlockInlinedThroughInvoke - When we inline a basic block into -/// an invoke, we have to turn all of the calls that can throw into -/// invokes. This function analyze BB to see if there are any calls, and if so, +/// When we inline a basic block into an invoke, +/// we have to turn all of the calls that can throw into invokes. +/// This function analyze BB to see if there are any calls, and if so, /// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI /// nodes in that block with the values specified in InvokeDestPHIValues. static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, @@ -227,7 +227,7 @@ static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, } } -/// HandleInlinedInvoke - If we inlined an invoke site, we need to convert calls +/// If we inlined an invoke site, we need to convert calls /// in the body of the inlined function into invokes. /// /// II is the invoke instruction being inlined. FirstNewBlock is the first @@ -278,8 +278,8 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock, InvokeDest->removePredecessor(II->getParent()); } -/// CloneAliasScopeMetadata - When inlining a function that contains noalias -/// scope metadata, this metadata needs to be cloned so that the inlined blocks +/// When inlining a function that contains noalias scope metadata, +/// this metadata needs to be cloned so that the inlined blocks /// have different "unqiue scopes" at every call site. Were this not done, then /// aliasing scopes from a function inlined into a caller multiple times could /// not be differentiated (and this would lead to miscompiles because the @@ -319,13 +319,12 @@ static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) { // Now we have a complete set of all metadata in the chains used to specify // the noalias scopes and the lists of those scopes. - SmallVector<MDNode *, 16> DummyNodes; + SmallVector<TempMDTuple, 16> DummyNodes; DenseMap<const MDNode *, TrackingMDNodeRef> MDMap; for (SetVector<const MDNode *>::iterator I = MD.begin(), IE = MD.end(); I != IE; ++I) { - MDNode *Dummy = MDNode::getTemporary(CalledFunc->getContext(), None); - DummyNodes.push_back(Dummy); - MDMap[*I].reset(Dummy); + DummyNodes.push_back(MDTuple::getTemporary(CalledFunc->getContext(), None)); + MDMap[*I].reset(DummyNodes.back().get()); } // Create new metadata nodes to replace the dummy nodes, replacing old @@ -343,7 +342,8 @@ static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) { } MDNode *NewM = MDNode::get(CalledFunc->getContext(), NewOps); - MDNodeFwdDecl *TempM = cast<MDNodeFwdDecl>(MDMap[*I]); + MDTuple *TempM = cast<MDTuple>(MDMap[*I]); + assert(TempM->isTemporary() && "Expected temporary node"); TempM->replaceAllUsesWith(NewM); } @@ -388,18 +388,14 @@ static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) { NI->setMetadata(LLVMContext::MD_noalias, M); } } - - // Now that everything has been replaced, delete the dummy nodes. - for (unsigned i = 0, ie = DummyNodes.size(); i != ie; ++i) - MDNode::deleteTemporary(DummyNodes[i]); } -/// AddAliasScopeMetadata - If the inlined function has noalias arguments, then -/// add new alias scopes for each noalias argument, tag the mapped noalias +/// If the inlined function has noalias arguments, +/// then add new alias scopes for each noalias argument, tag the mapped noalias /// parameters with noalias metadata specifying the new scope, and tag all /// non-derived loads, stores and memory intrinsics with the new alias scopes. static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap, - const DataLayout *DL, AliasAnalysis *AA) { + const DataLayout &DL, AliasAnalysis *AA) { if (!EnableNoAliasConversion) return; @@ -625,8 +621,9 @@ static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap, /// If the inlined function has non-byval align arguments, then /// add @llvm.assume-based alignment assumptions to preserve this information. static void AddAlignmentAssumptions(CallSite CS, InlineFunctionInfo &IFI) { - if (!PreserveAlignmentAssumptions || !IFI.DL) + if (!PreserveAlignmentAssumptions) return; + auto &DL = CS.getCaller()->getParent()->getDataLayout(); // To avoid inserting redundant assumptions, we should check for assumptions // already in the caller. To do this, we might need a DT of the caller. @@ -648,20 +645,20 @@ static void AddAlignmentAssumptions(CallSite CS, InlineFunctionInfo &IFI) { // If we can already prove the asserted alignment in the context of the // caller, then don't bother inserting the assumption. Value *Arg = CS.getArgument(I->getArgNo()); - if (getKnownAlignment(Arg, IFI.DL, + if (getKnownAlignment(Arg, DL, CS.getInstruction(), &IFI.ACT->getAssumptionCache(*CalledFunc), - CS.getInstruction(), &DT) >= Align) + &DT) >= Align) continue; - IRBuilder<>(CS.getInstruction()).CreateAlignmentAssumption(*IFI.DL, Arg, - Align); + IRBuilder<>(CS.getInstruction()) + .CreateAlignmentAssumption(DL, Arg, Align); } } } -/// UpdateCallGraphAfterInlining - Once we have cloned code over from a callee -/// into the caller, update the specified callgraph to reflect the changes we -/// made. Note that it's possible that not all code was copied over, so only +/// Once we have cloned code over from a callee into the caller, +/// update the specified callgraph to reflect the changes we made. +/// Note that it's possible that not all code was copied over, so only /// some edges of the callgraph may remain. static void UpdateCallGraphAfterInlining(CallSite CS, Function::iterator FirstNewBlock, @@ -696,8 +693,15 @@ static void UpdateCallGraphAfterInlining(CallSite CS, // If the call was inlined, but then constant folded, there is no edge to // add. Check for this case. Instruction *NewCall = dyn_cast<Instruction>(VMI->second); - if (!NewCall) continue; + if (!NewCall) + continue; + // We do not treat intrinsic calls like real function calls because we + // expect them to become inline code; do not add an edge for an intrinsic. + CallSite CS = CallSite(NewCall); + if (CS && CS.getCalledFunction() && CS.getCalledFunction()->isIntrinsic()) + continue; + // Remember that this call site got inlined for the client of // InlineFunction. IFI.InlinedCalls.push_back(NewCall); @@ -729,11 +733,7 @@ static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M, Type *AggTy = cast<PointerType>(Src->getType())->getElementType(); IRBuilder<> Builder(InsertBlock->begin()); - Value *Size; - if (IFI.DL == nullptr) - Size = ConstantExpr::getSizeOf(AggTy); - else - Size = Builder.getInt64(IFI.DL->getTypeStoreSize(AggTy)); + Value *Size = Builder.getInt64(M->getDataLayout().getTypeStoreSize(AggTy)); // Always generate a memcpy of alignment 1 here because we don't know // the alignment of the src pointer. Other optimizations can infer @@ -741,7 +741,7 @@ static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M, Builder.CreateMemCpy(Dst, Src, Size, /*Align=*/1); } -/// HandleByValArgument - When inlining a call site that has a byval argument, +/// When inlining a call site that has a byval argument, /// we have to make the implicit memcpy explicit by adding it. static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, const Function *CalledFunc, @@ -762,11 +762,13 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, if (ByValAlignment <= 1) // 0 = unspecified, 1 = no particular alignment. return Arg; + const DataLayout &DL = Caller->getParent()->getDataLayout(); + // If the pointer is already known to be sufficiently aligned, or if we can // round it up to a larger alignment, then we don't need a temporary. - if (getOrEnforceKnownAlignment(Arg, ByValAlignment, IFI.DL, - &IFI.ACT->getAssumptionCache(*Caller), - TheCall) >= ByValAlignment) + if (getOrEnforceKnownAlignment(Arg, ByValAlignment, DL, TheCall, + &IFI.ACT->getAssumptionCache(*Caller)) >= + ByValAlignment) return Arg; // Otherwise, we have to make a memcpy to get a safe alignment. This is bad @@ -774,10 +776,9 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, } // Create the alloca. If we have DataLayout, use nice alignment. - unsigned Align = 1; - if (IFI.DL) - Align = IFI.DL->getPrefTypeAlignment(AggTy); - + unsigned Align = + Caller->getParent()->getDataLayout().getPrefTypeAlignment(AggTy); + // If the byval had an alignment specified, we *must* use at least that // alignment, as it is required by the byval argument (and uses of the // pointer inside the callee). @@ -792,8 +793,7 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, return NewAlloca; } -// isUsedByLifetimeMarker - Check whether this Value is used by a lifetime -// intrinsic. +// Check whether this Value is used by a lifetime intrinsic. static bool isUsedByLifetimeMarker(Value *V) { for (User *U : V->users()) { if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) { @@ -808,7 +808,7 @@ static bool isUsedByLifetimeMarker(Value *V) { return false; } -// hasLifetimeMarkers - Check whether the given alloca already has +// Check whether the given alloca already has // lifetime.start or lifetime.end intrinsics. static bool hasLifetimeMarkers(AllocaInst *AI) { Type *Ty = AI->getType(); @@ -827,35 +827,69 @@ static bool hasLifetimeMarkers(AllocaInst *AI) { return false; } -/// updateInlinedAtInfo - Helper function used by fixupLineNumbers to -/// recursively update InlinedAtEntry of a DebugLoc. -static DebugLoc updateInlinedAtInfo(const DebugLoc &DL, - const DebugLoc &InlinedAtDL, - LLVMContext &Ctx) { - if (MDNode *IA = DL.getInlinedAt(Ctx)) { - DebugLoc NewInlinedAtDL - = updateInlinedAtInfo(DebugLoc::getFromDILocation(IA), InlinedAtDL, Ctx); - return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx), - NewInlinedAtDL.getAsMDNode(Ctx)); +/// Rebuild the entire inlined-at chain for this instruction so that the top of +/// the chain now is inlined-at the new call site. +static DebugLoc +updateInlinedAtInfo(DebugLoc DL, DILocation *InlinedAtNode, LLVMContext &Ctx, + DenseMap<const DILocation *, DILocation *> &IANodes) { + SmallVector<DILocation *, 3> InlinedAtLocations; + DILocation *Last = InlinedAtNode; + DILocation *CurInlinedAt = DL; + + // Gather all the inlined-at nodes + while (DILocation *IA = CurInlinedAt->getInlinedAt()) { + // Skip any we've already built nodes for + if (DILocation *Found = IANodes[IA]) { + Last = Found; + break; + } + + InlinedAtLocations.push_back(IA); + CurInlinedAt = IA; } - return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx), - InlinedAtDL.getAsMDNode(Ctx)); + // Starting from the top, rebuild the nodes to point to the new inlined-at + // location (then rebuilding the rest of the chain behind it) and update the + // map of already-constructed inlined-at nodes. + for (auto I = InlinedAtLocations.rbegin(), E = InlinedAtLocations.rend(); + I != E; ++I) { + const DILocation *MD = *I; + Last = IANodes[MD] = DILocation::getDistinct( + Ctx, MD->getLine(), MD->getColumn(), MD->getScope(), Last); + } + + // And finally create the normal location for this instruction, referring to + // the new inlined-at chain. + return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(), Last); } -/// fixupLineNumbers - Update inlined instructions' line numbers to +/// Update inlined instructions' line numbers to /// to encode location where these instructions are inlined. static void fixupLineNumbers(Function *Fn, Function::iterator FI, Instruction *TheCall) { DebugLoc TheCallDL = TheCall->getDebugLoc(); - if (TheCallDL.isUnknown()) + if (!TheCallDL) return; + auto &Ctx = Fn->getContext(); + DILocation *InlinedAtNode = TheCallDL; + + // Create a unique call site, not to be confused with any other call from the + // same location. + InlinedAtNode = DILocation::getDistinct( + Ctx, InlinedAtNode->getLine(), InlinedAtNode->getColumn(), + InlinedAtNode->getScope(), InlinedAtNode->getInlinedAt()); + + // Cache the inlined-at nodes as they're built so they are reused, without + // this every instruction's inlined-at chain would become distinct from each + // other. + DenseMap<const DILocation *, DILocation *> IANodes; + for (; FI != Fn->end(); ++FI) { for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ++BI) { DebugLoc DL = BI->getDebugLoc(); - if (DL.isUnknown()) { + if (!DL) { // If the inlined instruction has no line number, make it look as if it // originates from the call location. This is important for // ((__always_inline__, __nodebug__)) functions which must use caller @@ -868,29 +902,15 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI, BI->setDebugLoc(TheCallDL); } else { - BI->setDebugLoc(updateInlinedAtInfo(DL, TheCallDL, BI->getContext())); - if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(BI)) { - LLVMContext &Ctx = BI->getContext(); - MDNode *InlinedAt = BI->getDebugLoc().getInlinedAt(Ctx); - DVI->setOperand(2, MetadataAsValue::get( - Ctx, createInlinedVariable(DVI->getVariable(), - InlinedAt, Ctx))); - } else if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI)) { - LLVMContext &Ctx = BI->getContext(); - MDNode *InlinedAt = BI->getDebugLoc().getInlinedAt(Ctx); - DDI->setOperand(1, MetadataAsValue::get( - Ctx, createInlinedVariable(DDI->getVariable(), - InlinedAt, Ctx))); - } + BI->setDebugLoc(updateInlinedAtInfo(DL, InlinedAtNode, BI->getContext(), IANodes)); } } } } -/// InlineFunction - This function inlines the called function into the basic -/// block of the caller. This returns false if it is not possible to inline -/// this call. The program is still in a well defined state if this occurs -/// though. +/// This function inlines the called function into the basic block of the +/// caller. This returns false if it is not possible to inline this call. +/// The program is still in a well defined state if this occurs though. /// /// Note that this only does one level of inlining. For example, if the /// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now @@ -975,6 +995,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // Keep a list of pair (dst, src) to emit byval initializations. SmallVector<std::pair<Value*, Value*>, 4> ByValInit; + auto &DL = Caller->getParent()->getDataLayout(); + assert(CalledFunc->arg_size() == CS.arg_size() && "No varargs calls can be inlined!"); @@ -1009,9 +1031,9 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // have no dead or constant instructions leftover after inlining occurs // (which can happen, e.g., because an argument was constant), but we'll be // happy with whatever the cloner can do. - CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, + CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, /*ModuleLevelChanges=*/false, Returns, ".i", - &InlinedFunctionInfo, IFI.DL, TheCall); + &InlinedFunctionInfo, TheCall); // Remember the first block that is newly cloned over. FirstNewBlock = LastBlock; ++FirstNewBlock; @@ -1032,7 +1054,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, CloneAliasScopeMetadata(CS, VMap); // Add noalias metadata if necessary. - AddAliasScopeMetadata(CS, VMap, IFI.DL, IFI.AA); + AddAliasScopeMetadata(CS, VMap, DL, IFI.AA); // FIXME: We could register any cloned assumptions instead of clearing the // whole function's cache. @@ -1079,6 +1101,10 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, FirstNewBlock->getInstList(), AI, I); } + // Move any dbg.declares describing the allocas into the entry basic block. + DIBuilder DIB(*Caller->getParent()); + for (auto &AI : IFI.StaticAllocas) + replaceDbgDeclareForAlloca(AI, AI, DIB, /*Deref=*/false); } bool InlinedMustTailCalls = false; @@ -1136,18 +1162,21 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, ConstantInt *AllocaSize = nullptr; if (ConstantInt *AIArraySize = dyn_cast<ConstantInt>(AI->getArraySize())) { - if (IFI.DL) { - Type *AllocaType = AI->getAllocatedType(); - uint64_t AllocaTypeSize = IFI.DL->getTypeAllocSize(AllocaType); - uint64_t AllocaArraySize = AIArraySize->getLimitedValue(); - assert(AllocaArraySize > 0 && "array size of AllocaInst is zero"); - // Check that array size doesn't saturate uint64_t and doesn't - // overflow when it's multiplied by type size. - if (AllocaArraySize != ~0ULL && - UINT64_MAX / AllocaArraySize >= AllocaTypeSize) { - AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()), - AllocaArraySize * AllocaTypeSize); - } + auto &DL = Caller->getParent()->getDataLayout(); + Type *AllocaType = AI->getAllocatedType(); + uint64_t AllocaTypeSize = DL.getTypeAllocSize(AllocaType); + uint64_t AllocaArraySize = AIArraySize->getLimitedValue(); + + // Don't add markers for zero-sized allocas. + if (AllocaArraySize == 0) + continue; + + // Check that array size doesn't saturate uint64_t and doesn't + // overflow when it's multiplied by type size. + if (AllocaArraySize != ~0ULL && + UINT64_MAX / AllocaArraySize >= AllocaTypeSize) { + AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()), + AllocaArraySize * AllocaTypeSize); } } @@ -1173,7 +1202,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // Insert the llvm.stacksave. CallInst *SavedPtr = IRBuilder<>(FirstNewBlock, FirstNewBlock->begin()) - .CreateCall(StackSave, "savedstack"); + .CreateCall(StackSave, {}, "savedstack"); // Insert a call to llvm.stackrestore before any return instructions in the // inlined function. @@ -1408,7 +1437,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // the entries are the same or undef). If so, remove the PHI so it doesn't // block other optimizations. if (PHI) { - if (Value *V = SimplifyInstruction(PHI, IFI.DL, nullptr, nullptr, + auto &DL = Caller->getParent()->getDataLayout(); + if (Value *V = SimplifyInstruction(PHI, DL, nullptr, nullptr, &IFI.ACT->getAssumptionCache(*Caller))) { PHI->replaceAllUsesWith(V); PHI->eraseFromParent(); diff --git a/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp b/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp index 0ae746c..30edf3b 100644 --- a/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp +++ b/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp @@ -252,8 +252,8 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor, Value *Ret0_1 = Builder.CreateICmpEQ(Divisor, Zero); Value *Ret0_2 = Builder.CreateICmpEQ(Dividend, Zero); Value *Ret0_3 = Builder.CreateOr(Ret0_1, Ret0_2); - Value *Tmp0 = Builder.CreateCall2(CTLZ, Divisor, True); - Value *Tmp1 = Builder.CreateCall2(CTLZ, Dividend, True); + Value *Tmp0 = Builder.CreateCall(CTLZ, {Divisor, True}); + Value *Tmp1 = Builder.CreateCall(CTLZ, {Dividend, True}); Value *SR = Builder.CreateSub(Tmp0, Tmp1); Value *Ret0_4 = Builder.CreateICmpUGT(SR, MSB); Value *Ret0 = Builder.CreateOr(Ret0_3, Ret0_4); diff --git a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp index 3f9b702..9d40b69 100644 --- a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp @@ -112,17 +112,17 @@ static bool processInstruction(Loop &L, Instruction &Inst, DominatorTree &DT, if (SSAUpdate.HasValueForBlock(ExitBB)) continue; - PHINode *PN = PHINode::Create(Inst.getType(), PredCache.GetNumPreds(ExitBB), + PHINode *PN = PHINode::Create(Inst.getType(), PredCache.size(ExitBB), Inst.getName() + ".lcssa", ExitBB->begin()); // Add inputs from inside the loop for this PHI. - for (BasicBlock **PI = PredCache.GetPreds(ExitBB); *PI; ++PI) { - PN->addIncoming(&Inst, *PI); + for (BasicBlock *Pred : PredCache.get(ExitBB)) { + PN->addIncoming(&Inst, Pred); // If the exit block has a predecessor not within the loop, arrange for // the incoming value use corresponding to that predecessor to be // rewritten in terms of a different LCSSA PHI. - if (!L.contains(*PI)) + if (!L.contains(Pred)) UsesToRewrite.push_back( &PN->getOperandUse(PN->getOperandNumForIncomingValue( PN->getNumIncomingValues() - 1))); @@ -294,21 +294,18 @@ struct LCSSA : public FunctionPass { AU.setPreservesCFG(); AU.addRequired<DominatorTreeWrapperPass>(); - AU.addRequired<LoopInfo>(); + AU.addRequired<LoopInfoWrapperPass>(); AU.addPreservedID(LoopSimplifyID); AU.addPreserved<AliasAnalysis>(); AU.addPreserved<ScalarEvolution>(); } - -private: - void verifyAnalysis() const override; }; } char LCSSA::ID = 0; INITIALIZE_PASS_BEGIN(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_END(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false) Pass *llvm::createLCSSAPass() { return new LCSSA(); } @@ -318,7 +315,7 @@ char &llvm::LCSSAID = LCSSA::ID; /// Process all loops in the function, inner-most out. bool LCSSA::runOnFunction(Function &F) { bool Changed = false; - LI = &getAnalysis<LoopInfo>(); + LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); SE = getAnalysisIfAvailable<ScalarEvolution>(); @@ -329,18 +326,3 @@ bool LCSSA::runOnFunction(Function &F) { return Changed; } -static void verifyLoop(Loop &L, DominatorTree &DT) { - // Recurse depth-first through inner loops. - for (Loop::iterator LI = L.begin(), LE = L.end(); LI != LE; ++LI) - verifyLoop(**LI, DT); - - // Check the special guarantees that LCSSA makes. - //assert(L.isLCSSAForm(DT) && "LCSSA form not preserved!"); -} - -void LCSSA::verifyAnalysis() const { - // Verify each loop nest in the function, assuming LI still points at that - // function's loop info. - for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) - verifyLoop(**I, *DT); -} diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp index 2a84d7e..70c77b0 100644 --- a/contrib/llvm/lib/Transforms/Utils/Local.cpp +++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LibCallSemantics.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" @@ -110,11 +111,17 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, } if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) { - // If we are switching on a constant, we can convert the switch into a - // single branch instruction! + // If we are switching on a constant, we can convert the switch to an + // unconditional branch. ConstantInt *CI = dyn_cast<ConstantInt>(SI->getCondition()); - BasicBlock *TheOnlyDest = SI->getDefaultDest(); - BasicBlock *DefaultDest = TheOnlyDest; + BasicBlock *DefaultDest = SI->getDefaultDest(); + BasicBlock *TheOnlyDest = DefaultDest; + + // If the default is unreachable, ignore it when searching for TheOnlyDest. + if (isa<UnreachableInst>(DefaultDest->getFirstNonPHIOrDbg()) && + SI->getNumCases() > 0) { + TheOnlyDest = SI->case_begin().getCaseSuccessor(); + } // Figure out which case it goes to. for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); @@ -410,7 +417,7 @@ bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN, /// /// This returns true if it changed the code, note that it can delete /// instructions in other blocks as well in this block. -bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const DataLayout *TD, +bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const TargetLibraryInfo *TLI) { bool MadeChange = false; @@ -427,7 +434,7 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const DataLayout *TD, Instruction *Inst = BI++; WeakVH BIHandle(BI); - if (recursivelySimplifyInstruction(Inst, TD, TLI)) { + if (recursivelySimplifyInstruction(Inst, TLI)) { MadeChange = true; if (BIHandle != BI) BI = BB->begin(); @@ -457,8 +464,7 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const DataLayout *TD, /// /// .. and delete the predecessor corresponding to the '1', this will attempt to /// recursively fold the and to 0. -void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred, - DataLayout *TD) { +void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred) { // This only adjusts blocks with PHI nodes. if (!isa<PHINode>(BB->begin())) return; @@ -473,7 +479,7 @@ void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred, PhiIt = &*++BasicBlock::iterator(cast<Instruction>(PhiIt)); Value *OldPhiIt = PhiIt; - if (!recursivelySimplifyInstruction(PN, TD)) + if (!recursivelySimplifyInstruction(PN)) continue; // If recursive simplification ended up deleting the next PHI node we would @@ -489,7 +495,7 @@ void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred, /// between them, moving the instructions in the predecessor into DestBB and /// deleting the predecessor block. /// -void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) { +void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, DominatorTree *DT) { // If BB has single-entry PHI nodes, fold them. while (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) { Value *NewVal = PN->getIncomingValue(0); @@ -525,14 +531,10 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) { if (PredBB == &DestBB->getParent()->getEntryBlock()) DestBB->moveAfter(PredBB); - if (P) { - if (DominatorTreeWrapperPass *DTWP = - P->getAnalysisIfAvailable<DominatorTreeWrapperPass>()) { - DominatorTree &DT = DTWP->getDomTree(); - BasicBlock *PredBBIDom = DT.getNode(PredBB)->getIDom()->getBlock(); - DT.changeImmediateDominator(DestBB, PredBBIDom); - DT.eraseNode(PredBB); - } + if (DT) { + BasicBlock *PredBBIDom = DT->getNode(PredBB)->getIDom()->getBlock(); + DT->changeImmediateDominator(DestBB, PredBBIDom); + DT->eraseNode(PredBB); } // Nuke BB. PredBB->eraseFromParent(); @@ -897,13 +899,14 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { /// their preferred alignment from the beginning. /// static unsigned enforceKnownAlignment(Value *V, unsigned Align, - unsigned PrefAlign, const DataLayout *TD) { + unsigned PrefAlign, + const DataLayout &DL) { V = V->stripPointerCasts(); if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) { // If the preferred alignment is greater than the natural stack alignment // then don't round up. This avoids dynamic stack realignment. - if (TD && TD->exceedsNaturalStackAlignment(PrefAlign)) + if (DL.exceedsNaturalStackAlignment(PrefAlign)) return Align; // If there is a requested alignment and if this is an alloca, round up. if (AI->getAlignment() >= PrefAlign) @@ -942,13 +945,13 @@ static unsigned enforceKnownAlignment(Value *V, unsigned Align, /// and it is more than the alignment of the ultimate object, see if we can /// increase the alignment of the ultimate object, making this check succeed. unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, - const DataLayout *DL, - AssumptionCache *AC, + const DataLayout &DL, const Instruction *CxtI, + AssumptionCache *AC, const DominatorTree *DT) { assert(V->getType()->isPointerTy() && "getOrEnforceKnownAlignment expects a pointer!"); - unsigned BitWidth = DL ? DL->getPointerTypeSizeInBits(V->getType()) : 64; + unsigned BitWidth = DL.getPointerTypeSizeInBits(V->getType()); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); computeKnownBits(V, KnownZero, KnownOne, DL, 0, AC, CxtI, DT); @@ -975,7 +978,7 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, /// /// See if there is a dbg.value intrinsic for DIVar before I. -static bool LdStHasDebugValue(DIVariable &DIVar, Instruction *I) { +static bool LdStHasDebugValue(const DILocalVariable *DIVar, Instruction *I) { // Since we can't guarantee that the original dbg.declare instrinsic // is removed by LowerDbgDeclare(), we need to make sure that we are // not inserting the same dbg.value intrinsic over and over. @@ -995,17 +998,13 @@ static bool LdStHasDebugValue(DIVariable &DIVar, Instruction *I) { /// that has an associated llvm.dbg.decl intrinsic. bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, StoreInst *SI, DIBuilder &Builder) { - DIVariable DIVar(DDI->getVariable()); - DIExpression DIExpr(DDI->getExpression()); - assert((!DIVar || DIVar.isVariable()) && - "Variable in DbgDeclareInst should be either null or a DIVariable."); - if (!DIVar) - return false; + auto *DIVar = DDI->getVariable(); + auto *DIExpr = DDI->getExpression(); + assert(DIVar && "Missing variable"); if (LdStHasDebugValue(DIVar, SI)) return true; - Instruction *DbgVal = nullptr; // If an argument is zero extended then use argument directly. The ZExt // may be zapped by an optimization pass in future. Argument *ExtendedArg = nullptr; @@ -1014,11 +1013,11 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0))) ExtendedArg = dyn_cast<Argument>(SExt->getOperand(0)); if (ExtendedArg) - DbgVal = Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar, DIExpr, SI); + Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar, DIExpr, + DDI->getDebugLoc(), SI); else - DbgVal = Builder.insertDbgValueIntrinsic(SI->getOperand(0), 0, DIVar, - DIExpr, SI); - DbgVal->setDebugLoc(DDI->getDebugLoc()); + Builder.insertDbgValueIntrinsic(SI->getOperand(0), 0, DIVar, DIExpr, + DDI->getDebugLoc(), SI); return true; } @@ -1026,19 +1025,15 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, /// that has an associated llvm.dbg.decl intrinsic. bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, LoadInst *LI, DIBuilder &Builder) { - DIVariable DIVar(DDI->getVariable()); - DIExpression DIExpr(DDI->getExpression()); - assert((!DIVar || DIVar.isVariable()) && - "Variable in DbgDeclareInst should be either null or a DIVariable."); - if (!DIVar) - return false; + auto *DIVar = DDI->getVariable(); + auto *DIExpr = DDI->getExpression(); + assert(DIVar && "Missing variable"); if (LdStHasDebugValue(DIVar, LI)) return true; - Instruction *DbgVal = - Builder.insertDbgValueIntrinsic(LI->getOperand(0), 0, DIVar, DIExpr, LI); - DbgVal->setDebugLoc(DDI->getDebugLoc()); + Builder.insertDbgValueIntrinsic(LI->getOperand(0), 0, DIVar, DIExpr, + DDI->getDebugLoc(), LI); return true; } @@ -1080,10 +1075,9 @@ bool llvm::LowerDbgDeclare(Function &F) { // This is a call by-value or some other instruction that // takes a pointer to the variable. Insert a *value* // intrinsic that describes the alloca. - auto DbgVal = DIB.insertDbgValueIntrinsic( - AI, 0, DIVariable(DDI->getVariable()), - DIExpression(DDI->getExpression()), CI); - DbgVal->setDebugLoc(DDI->getDebugLoc()); + DIB.insertDbgValueIntrinsic(AI, 0, DDI->getVariable(), + DDI->getExpression(), DDI->getDebugLoc(), + CI); } DDI->eraseFromParent(); } @@ -1104,32 +1098,31 @@ DbgDeclareInst *llvm::FindAllocaDbgDeclare(Value *V) { } bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress, - DIBuilder &Builder) { + DIBuilder &Builder, bool Deref) { DbgDeclareInst *DDI = FindAllocaDbgDeclare(AI); if (!DDI) return false; - DIVariable DIVar(DDI->getVariable()); - DIExpression DIExpr(DDI->getExpression()); - assert((!DIVar || DIVar.isVariable()) && - "Variable in DbgDeclareInst should be either null or a DIVariable."); - if (!DIVar) - return false; - - // Create a copy of the original DIDescriptor for user variable, prepending - // "deref" operation to a list of address elements, as new llvm.dbg.declare - // will take a value storing address of the memory for variable, not - // alloca itself. - SmallVector<int64_t, 4> NewDIExpr; - NewDIExpr.push_back(dwarf::DW_OP_deref); - if (DIExpr) - for (unsigned i = 0, n = DIExpr.getNumElements(); i < n; ++i) - NewDIExpr.push_back(DIExpr.getElement(i)); + DebugLoc Loc = DDI->getDebugLoc(); + auto *DIVar = DDI->getVariable(); + auto *DIExpr = DDI->getExpression(); + assert(DIVar && "Missing variable"); + + if (Deref) { + // Create a copy of the original DIDescriptor for user variable, prepending + // "deref" operation to a list of address elements, as new llvm.dbg.declare + // will take a value storing address of the memory for variable, not + // alloca itself. + SmallVector<uint64_t, 4> NewDIExpr; + NewDIExpr.push_back(dwarf::DW_OP_deref); + if (DIExpr) + NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end()); + DIExpr = Builder.createExpression(NewDIExpr); + } // Insert llvm.dbg.declare in the same basic block as the original alloca, // and remove old llvm.dbg.declare. BasicBlock *BB = AI->getParent(); - Builder.insertDeclare(NewAllocaAddress, DIVar, - Builder.createExpression(NewDIExpr), BB); + Builder.insertDeclare(NewAllocaAddress, DIVar, DIExpr, Loc, BB); DDI->eraseFromParent(); return true; } @@ -1254,7 +1247,7 @@ static bool markAliveBlocks(BasicBlock *BB, if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) { changeToUnreachable(II, true); Changed = true; - } else if (II->doesNotThrow()) { + } else if (II->doesNotThrow() && canSimplifyInvokeNoUnwind(II)) { if (II->use_empty() && II->onlyReadsMemory()) { // jump to the normal destination branch. BranchInst::Create(II->getNormalDest(), II); @@ -1350,3 +1343,23 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J, ArrayRef<unsign } } } + +unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To, + DominatorTree &DT, + const BasicBlockEdge &Root) { + assert(From->getType() == To->getType()); + + unsigned Count = 0; + for (Value::use_iterator UI = From->use_begin(), UE = From->use_end(); + UI != UE; ) { + Use &U = *UI++; + if (DT.dominates(Root, U)) { + U.set(To); + DEBUG(dbgs() << "Replace dominated use of '" + << From->getName() << "' as " + << *To << " in " << *U << "\n"); + ++Count; + } + } + return Count; +} diff --git a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp index c832a4b..90dfaba 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp @@ -57,8 +57,10 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" @@ -113,6 +115,14 @@ static void placeSplitBlockCarefully(BasicBlock *NewBB, BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, Pass *PP) { BasicBlock *Header = L->getHeader(); + // Get analyses that we try to update. + auto *AA = PP->getAnalysisIfAvailable<AliasAnalysis>(); + auto *DTWP = PP->getAnalysisIfAvailable<DominatorTreeWrapperPass>(); + auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; + auto *LIWP = PP->getAnalysisIfAvailable<LoopInfoWrapperPass>(); + auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; + bool PreserveLCSSA = PP->mustPreserveAnalysisID(LCSSAID); + // Compute the set of predecessors of the loop that are not in the loop. SmallVector<BasicBlock*, 8> OutsideBlocks; for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header); @@ -131,15 +141,8 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, Pass *PP) { // Split out the loop pre-header. BasicBlock *PreheaderBB; - if (!Header->isLandingPad()) { - PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader", - PP); - } else { - SmallVector<BasicBlock*, 2> NewBBs; - SplitLandingPadPredecessors(Header, OutsideBlocks, ".preheader", - ".split-lp", PP, NewBBs); - PreheaderBB = NewBBs[0]; - } + PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader", + AA, DT, LI, PreserveLCSSA); PreheaderBB->getTerminator()->setDebugLoc( Header->getFirstNonPHI()->getDebugLoc()); @@ -157,7 +160,9 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, Pass *PP) { /// /// This method is used to split exit blocks that have predecessors outside of /// the loop. -static BasicBlock *rewriteLoopExitBlock(Loop *L, BasicBlock *Exit, Pass *PP) { +static BasicBlock *rewriteLoopExitBlock(Loop *L, BasicBlock *Exit, + AliasAnalysis *AA, DominatorTree *DT, + LoopInfo *LI, Pass *PP) { SmallVector<BasicBlock*, 8> LoopBlocks; for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) { BasicBlock *P = *I; @@ -172,15 +177,10 @@ static BasicBlock *rewriteLoopExitBlock(Loop *L, BasicBlock *Exit, Pass *PP) { assert(!LoopBlocks.empty() && "No edges coming in from outside the loop?"); BasicBlock *NewExitBB = nullptr; - if (Exit->isLandingPad()) { - SmallVector<BasicBlock*, 2> NewBBs; - SplitLandingPadPredecessors(Exit, LoopBlocks, - ".loopexit", ".nonloopexit", - PP, NewBBs); - NewExitBB = NewBBs[0]; - } else { - NewExitBB = SplitBlockPredecessors(Exit, LoopBlocks, ".loopexit", PP); - } + bool PreserveLCSSA = PP->mustPreserveAnalysisID(LCSSAID); + + NewExitBB = SplitBlockPredecessors(Exit, LoopBlocks, ".loopexit", AA, DT, + LI, PreserveLCSSA); DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block " << NewExitBB->getName() << "\n"); @@ -211,10 +211,11 @@ static void addBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock, static PHINode *findPHIToPartitionLoops(Loop *L, AliasAnalysis *AA, DominatorTree *DT, AssumptionCache *AC) { + const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) { PHINode *PN = cast<PHINode>(I); ++I; - if (Value *V = SimplifyInstruction(PN, nullptr, nullptr, DT, AC)) { + if (Value *V = SimplifyInstruction(PN, DL, nullptr, DT, AC)) { // This is a degenerate PHI already, don't modify it! PN->replaceAllUsesWith(V); if (AA) AA->deleteValue(PN); @@ -287,9 +288,11 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader, if (SE) SE->forgetLoop(L); + bool PreserveLCSSA = PP->mustPreserveAnalysisID(LCSSAID); + BasicBlock *Header = L->getHeader(); - BasicBlock *NewBB = - SplitBlockPredecessors(Header, OuterLoopPreds, ".outer", PP); + BasicBlock *NewBB = SplitBlockPredecessors(Header, OuterLoopPreds, ".outer", + AA, DT, LI, PreserveLCSSA); // Make sure that NewBB is put someplace intelligent, which doesn't mess up // code layout too horribly. @@ -460,7 +463,7 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader, // Update Loop Information - we know that this block is now in the current // loop and all parent loops. - L->addBasicBlockToLoop(BEBlock, LI->getBase()); + L->addBasicBlockToLoop(BEBlock, *LI); // Update dominator information DT->splitBlock(BEBlock); @@ -476,7 +479,7 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader, /// explicit if they accepted the analysis directly and then updated it. static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist, AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI, - ScalarEvolution *SE, Pass *PP, const DataLayout *DL, + ScalarEvolution *SE, Pass *PP, AssumptionCache *AC) { bool Changed = false; ReprocessLoop: @@ -567,7 +570,7 @@ ReprocessLoop: // Must be exactly this loop: no subloops, parent loops, or non-loop preds // allowed. if (!L->contains(*PI)) { - if (rewriteLoopExitBlock(L, ExitBlock, PP)) { + if (rewriteLoopExitBlock(L, ExitBlock, AA, DT, LI, PP)) { ++NumInserted; Changed = true; } @@ -608,13 +611,15 @@ ReprocessLoop: } } + const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); + // Scan over the PHI nodes in the loop header. Since they now have only two // incoming values (the loop is canonicalized), we may have simplified the PHI // down to 'X = phi [X, Y]', which should be replaced with 'Y'. PHINode *PN; for (BasicBlock::iterator I = L->getHeader()->begin(); (PN = dyn_cast<PHINode>(I++)); ) - if (Value *V = SimplifyInstruction(PN, nullptr, nullptr, DT, AC)) { + if (Value *V = SimplifyInstruction(PN, DL, nullptr, DT, AC)) { if (AA) AA->deleteValue(PN); if (SE) SE->forgetValue(PN); PN->replaceAllUsesWith(V); @@ -676,7 +681,8 @@ ReprocessLoop: // The block has now been cleared of all instructions except for // a comparison and a conditional branch. SimplifyCFG may be able // to fold it now. - if (!FoldBranchToCommonDest(BI, DL)) continue; + if (!FoldBranchToCommonDest(BI)) + continue; // Success. The block is now dead, so remove it from the loop, // update the dominator tree and delete it. @@ -714,7 +720,7 @@ ReprocessLoop: bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, Pass *PP, AliasAnalysis *AA, ScalarEvolution *SE, - const DataLayout *DL, AssumptionCache *AC) { + AssumptionCache *AC) { bool Changed = false; // Worklist maintains our depth-first queue of loops in this nest to process. @@ -726,13 +732,12 @@ bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, Pass *PP, // order. We can use this simple process because loops form a tree. for (unsigned Idx = 0; Idx != Worklist.size(); ++Idx) { Loop *L2 = Worklist[Idx]; - for (Loop::iterator I = L2->begin(), E = L2->end(); I != E; ++I) - Worklist.push_back(*I); + Worklist.append(L2->begin(), L2->end()); } while (!Worklist.empty()) Changed |= simplifyOneLoop(Worklist.pop_back_val(), Worklist, AA, DT, LI, - SE, PP, DL, AC); + SE, PP, AC); return Changed; } @@ -750,7 +755,6 @@ namespace { DominatorTree *DT; LoopInfo *LI; ScalarEvolution *SE; - const DataLayout *DL; AssumptionCache *AC; bool runOnFunction(Function &F) override; @@ -762,8 +766,8 @@ namespace { AU.addRequired<DominatorTreeWrapperPass>(); AU.addPreserved<DominatorTreeWrapperPass>(); - AU.addRequired<LoopInfo>(); - AU.addPreserved<LoopInfo>(); + AU.addRequired<LoopInfoWrapperPass>(); + AU.addPreserved<LoopInfoWrapperPass>(); AU.addPreserved<AliasAnalysis>(); AU.addPreserved<ScalarEvolution>(); @@ -781,7 +785,7 @@ INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify", "Canonicalize natural loops", false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_END(LoopSimplify, "loop-simplify", "Canonicalize natural loops", false, false) @@ -795,16 +799,14 @@ Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); } bool LoopSimplify::runOnFunction(Function &F) { bool Changed = false; AA = getAnalysisIfAvailable<AliasAnalysis>(); - LI = &getAnalysis<LoopInfo>(); + LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); SE = getAnalysisIfAvailable<ScalarEvolution>(); - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); // Simplify each loop nest in the function. for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) - Changed |= simplifyLoop(*I, DT, LI, this, AA, SE, DL, AC); + Changed |= simplifyLoop(*I, DT, LI, this, AA, SE, AC); return Changed; } diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp index 5745920..1dbce47 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -26,8 +26,8 @@ #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/Dominators.h" #include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/LLVMContext.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -146,6 +146,13 @@ FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, LPPassManager *LPM, /// Similarly, TripMultiple divides the number of times that the LatchBlock may /// execute without exiting the loop. /// +/// If AllowRuntime is true then UnrollLoop will consider unrolling loops that +/// have a runtime (i.e. not compile time constant) trip count. Unrolling these +/// loops require a unroll "prologue" that runs "RuntimeTripCount % Count" +/// iterations before branching into the unrolled loop. UnrollLoop will not +/// runtime-unroll the loop if computing RuntimeTripCount will be expensive and +/// AllowExpensiveTripCount is false. +/// /// The LoopInfo Analysis that is passed will be kept consistent. /// /// If a LoopPassManager is passed in, and the loop is fully removed, it will be @@ -154,8 +161,9 @@ FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, LPPassManager *LPM, /// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are /// available from the Pass it must also preserve those analyses. bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, - bool AllowRuntime, unsigned TripMultiple, LoopInfo *LI, - Pass *PP, LPPassManager *LPM, AssumptionCache *AC) { + bool AllowRuntime, bool AllowExpensiveTripCount, + unsigned TripMultiple, LoopInfo *LI, Pass *PP, + LPPassManager *LPM, AssumptionCache *AC) { BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) { DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n"); @@ -218,7 +226,8 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, // flag is specified. bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime); - if (RuntimeTripCount && !UnrollRuntimeLoopProlog(L, Count, LI, LPM)) + if (RuntimeTripCount && + !UnrollRuntimeLoopProlog(L, Count, AllowExpensiveTripCount, LI, LPM)) return false; // Notify ScalarEvolution that the loop will be substantially changed, @@ -311,7 +320,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, // Tell LI about New. if (*BB == Header) { assert(LI->getLoopFor(*BB) == L && "Header should not be in a sub-loop"); - L->addBasicBlockToLoop(New, LI->getBase()); + L->addBasicBlockToLoop(New, *LI); } else { // Figure out which loop New is in. const Loop *OldLoop = LI->getLoopFor(*BB); @@ -333,7 +342,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, if (SE) SE->forgetLoop(OldLoop); } - NewLoop->addBasicBlockToLoop(New, LI->getBase()); + NewLoop->addBasicBlockToLoop(New, *LI); } if (*BB == Header) @@ -500,6 +509,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, // At this point, the code is well formed. We now do a quick sweep over the // inserted code, doing constant propagation and dead code elimination as we // go. + const DataLayout &DL = Header->getModule()->getDataLayout(); const std::vector<BasicBlock*> &NewLoopBlocks = L->getBlocks(); for (std::vector<BasicBlock*>::const_iterator BB = NewLoopBlocks.begin(), BBE = NewLoopBlocks.end(); BB != BBE; ++BB) @@ -508,7 +518,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, if (isInstructionTriviallyDead(Inst)) (*BB)->getInstList().erase(Inst); - else if (Value *V = SimplifyInstruction(Inst)) + else if (Value *V = SimplifyInstruction(Inst, DL)) if (LI->replacementPreservesLCSSAForm(Inst, V)) { Inst->replaceAllUsesWith(V); (*BB)->getInstList().erase(Inst); @@ -531,9 +541,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, if (!OuterL && !CompletelyUnroll) OuterL = L; if (OuterL) { - DataLayoutPass *DLP = PP->getAnalysisIfAvailable<DataLayoutPass>(); - const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr; - simplifyLoop(OuterL, DT, LI, PP, /*AliasAnalysis*/ nullptr, SE, DL, AC); + simplifyLoop(OuterL, DT, LI, PP, /*AliasAnalysis*/ nullptr, SE, AC); // LCSSA must be performed on the outermost affected loop. The unrolled // loop's last loop latch is guaranteed to be in the outermost loop after @@ -549,3 +557,26 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, return true; } + +/// Given an llvm.loop loop id metadata node, returns the loop hint metadata +/// node with the given name (for example, "llvm.loop.unroll.count"). If no +/// such metadata node exists, then nullptr is returned. +MDNode *llvm::GetUnrollMetadata(MDNode *LoopID, StringRef Name) { + // First operand should refer to the loop id itself. + assert(LoopID->getNumOperands() > 0 && "requires at least one operand"); + assert(LoopID->getOperand(0) == LoopID && "invalid loop id"); + + for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) { + MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i)); + if (!MD) + continue; + + MDString *S = dyn_cast<MDString>(MD->getOperand(0)); + if (!S) + continue; + + if (Name.equals(S->getString())) + return MD; + } + return nullptr; +} diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp index 8a32215..d1774df 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -23,14 +23,18 @@ #include "llvm/Transforms/Utils/UnrollLoop.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include <algorithm> @@ -58,7 +62,8 @@ STATISTIC(NumRuntimeUnrolled, static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, BasicBlock *LastPrologBB, BasicBlock *PrologEnd, BasicBlock *OrigPH, BasicBlock *NewPH, - ValueToValueMapTy &VMap, Pass *P) { + ValueToValueMapTy &VMap, AliasAnalysis *AA, + DominatorTree *DT, LoopInfo *LI, Pass *P) { BasicBlock *Latch = L->getLoopLatch(); assert(Latch && "Loop must have a latch"); @@ -122,13 +127,8 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, assert(Exit && "Loop must have a single exit block only"); // Split the exit to maintain loop canonicalization guarantees SmallVector<BasicBlock*, 4> Preds(pred_begin(Exit), pred_end(Exit)); - if (!Exit->isLandingPad()) { - SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", P); - } else { - SmallVector<BasicBlock*, 2> NewBBs; - SplitLandingPadPredecessors(Exit, Preds, ".unr1-lcssa", ".unr2-lcssa", - P, NewBBs); - } + SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", AA, DT, LI, + P->mustPreserveAnalysisID(LCSSAID)); // Add the branch to the exit block (around the unrolled loop) BranchInst::Create(Exit, NewPH, BrLoopExit, InsertPt); InsertPt->eraseFromParent(); @@ -167,9 +167,9 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog, NewBlocks.push_back(NewBB); if (NewLoop) - NewLoop->addBasicBlockToLoop(NewBB, LI->getBase()); + NewLoop->addBasicBlockToLoop(NewBB, *LI); else if (ParentLoop) - ParentLoop->addBasicBlockToLoop(NewBB, LI->getBase()); + ParentLoop->addBasicBlockToLoop(NewBB, *LI); VMap[*BB] = NewBB; if (Header == *BB) { @@ -278,7 +278,8 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog, /// ... /// End: /// -bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI, +bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, + bool AllowExpensiveTripCount, LoopInfo *LI, LPPassManager *LPM) { // for now, only unroll loops that contain a single exit if (!L->getExitingBlock()) @@ -312,15 +313,20 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI, if (isa<SCEVCouldNotCompute>(TripCountSC)) return false; + BasicBlock *Header = L->getHeader(); + const DataLayout &DL = Header->getModule()->getDataLayout(); + SCEVExpander Expander(*SE, DL, "loop-unroll"); + if (!AllowExpensiveTripCount && Expander.isHighCostExpansion(TripCountSC, L)) + return false; + // We only handle cases when the unroll factor is a power of 2. // Count is the loop unroll factor, the number of extra copies added + 1. if (!isPowerOf2_32(Count)) return false; // This constraint lets us deal with an overflowing trip count easily; see the - // comment on ModVal below. This check is equivalent to `Log2(Count) < - // BEWidth`. - if (static_cast<uint64_t>(Count) > (1ULL << BEWidth)) + // comment on ModVal below. + if (Log2_32(Count) > BEWidth) return false; // If this loop is nested, then the loop unroller changes the code in @@ -328,18 +334,20 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI, if (Loop *ParentLoop = L->getParentLoop()) SE->forgetLoop(ParentLoop); + // Grab analyses that we preserve. + auto *DTWP = LPM->getAnalysisIfAvailable<DominatorTreeWrapperPass>(); + auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; + BasicBlock *PH = L->getLoopPreheader(); - BasicBlock *Header = L->getHeader(); BasicBlock *Latch = L->getLoopLatch(); // It helps to splits the original preheader twice, one for the end of the // prolog code and one for a new loop preheader - BasicBlock *PEnd = SplitEdge(PH, Header, LPM->getAsPass()); - BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), LPM->getAsPass()); + BasicBlock *PEnd = SplitEdge(PH, Header, DT, LI); + BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), DT, LI); BranchInst *PreHeaderBR = cast<BranchInst>(PH->getTerminator()); // Compute the number of extra iterations required, which is: // extra iterations = run-time trip count % (loop unroll factor + 1) - SCEVExpander Expander(*SE, "loop-unroll"); Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(), PreHeaderBR); Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(), @@ -408,7 +416,7 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI, // PHI functions. BasicBlock *LastLoopBB = cast<BasicBlock>(VMap[Latch]); ConnectProlog(L, BECount, Count, LastLoopBB, PEnd, PH, NewPH, VMap, - LPM->getAsPass()); + /*AliasAnalysis*/ nullptr, DT, LI, LPM->getAsPass()); NumRuntimeUnrolled++; return true; } diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp new file mode 100644 index 0000000..a5890c0 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -0,0 +1,499 @@ +//===-- LoopUtils.cpp - Loop Utility functions -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines common loop utility functions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/Support/Debug.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/IR/Module.h" +#include "llvm/Transforms/Utils/LoopUtils.h" + +using namespace llvm; +using namespace llvm::PatternMatch; + +#define DEBUG_TYPE "loop-utils" + +bool ReductionDescriptor::areAllUsesIn(Instruction *I, + SmallPtrSetImpl<Instruction *> &Set) { + for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; ++Use) + if (!Set.count(dyn_cast<Instruction>(*Use))) + return false; + return true; +} + +bool ReductionDescriptor::AddReductionVar(PHINode *Phi, ReductionKind Kind, + Loop *TheLoop, bool HasFunNoNaNAttr, + ReductionDescriptor &RedDes) { + if (Phi->getNumIncomingValues() != 2) + return false; + + // Reduction variables are only found in the loop header block. + if (Phi->getParent() != TheLoop->getHeader()) + return false; + + // Obtain the reduction start value from the value that comes from the loop + // preheader. + Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader()); + + // ExitInstruction is the single value which is used outside the loop. + // We only allow for a single reduction value to be used outside the loop. + // This includes users of the reduction, variables (which form a cycle + // which ends in the phi node). + Instruction *ExitInstruction = nullptr; + // Indicates that we found a reduction operation in our scan. + bool FoundReduxOp = false; + + // We start with the PHI node and scan for all of the users of this + // instruction. All users must be instructions that can be used as reduction + // variables (such as ADD). We must have a single out-of-block user. The cycle + // must include the original PHI. + bool FoundStartPHI = false; + + // To recognize min/max patterns formed by a icmp select sequence, we store + // the number of instruction we saw from the recognized min/max pattern, + // to make sure we only see exactly the two instructions. + unsigned NumCmpSelectPatternInst = 0; + ReductionInstDesc ReduxDesc(false, nullptr); + + SmallPtrSet<Instruction *, 8> VisitedInsts; + SmallVector<Instruction *, 8> Worklist; + Worklist.push_back(Phi); + VisitedInsts.insert(Phi); + + // A value in the reduction can be used: + // - By the reduction: + // - Reduction operation: + // - One use of reduction value (safe). + // - Multiple use of reduction value (not safe). + // - PHI: + // - All uses of the PHI must be the reduction (safe). + // - Otherwise, not safe. + // - By one instruction outside of the loop (safe). + // - By further instructions outside of the loop (not safe). + // - By an instruction that is not part of the reduction (not safe). + // This is either: + // * An instruction type other than PHI or the reduction operation. + // * A PHI in the header other than the initial PHI. + while (!Worklist.empty()) { + Instruction *Cur = Worklist.back(); + Worklist.pop_back(); + + // No Users. + // If the instruction has no users then this is a broken chain and can't be + // a reduction variable. + if (Cur->use_empty()) + return false; + + bool IsAPhi = isa<PHINode>(Cur); + + // A header PHI use other than the original PHI. + if (Cur != Phi && IsAPhi && Cur->getParent() == Phi->getParent()) + return false; + + // Reductions of instructions such as Div, and Sub is only possible if the + // LHS is the reduction variable. + if (!Cur->isCommutative() && !IsAPhi && !isa<SelectInst>(Cur) && + !isa<ICmpInst>(Cur) && !isa<FCmpInst>(Cur) && + !VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0)))) + return false; + + // Any reduction instruction must be of one of the allowed kinds. + ReduxDesc = isReductionInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr); + if (!ReduxDesc.isReduction()) + return false; + + // A reduction operation must only have one use of the reduction value. + if (!IsAPhi && Kind != RK_IntegerMinMax && Kind != RK_FloatMinMax && + hasMultipleUsesOf(Cur, VisitedInsts)) + return false; + + // All inputs to a PHI node must be a reduction value. + if (IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts)) + return false; + + if (Kind == RK_IntegerMinMax && + (isa<ICmpInst>(Cur) || isa<SelectInst>(Cur))) + ++NumCmpSelectPatternInst; + if (Kind == RK_FloatMinMax && (isa<FCmpInst>(Cur) || isa<SelectInst>(Cur))) + ++NumCmpSelectPatternInst; + + // Check whether we found a reduction operator. + FoundReduxOp |= !IsAPhi; + + // Process users of current instruction. Push non-PHI nodes after PHI nodes + // onto the stack. This way we are going to have seen all inputs to PHI + // nodes once we get to them. + SmallVector<Instruction *, 8> NonPHIs; + SmallVector<Instruction *, 8> PHIs; + for (User *U : Cur->users()) { + Instruction *UI = cast<Instruction>(U); + + // Check if we found the exit user. + BasicBlock *Parent = UI->getParent(); + if (!TheLoop->contains(Parent)) { + // Exit if you find multiple outside users or if the header phi node is + // being used. In this case the user uses the value of the previous + // iteration, in which case we would loose "VF-1" iterations of the + // reduction operation if we vectorize. + if (ExitInstruction != nullptr || Cur == Phi) + return false; + + // The instruction used by an outside user must be the last instruction + // before we feed back to the reduction phi. Otherwise, we loose VF-1 + // operations on the value. + if (std::find(Phi->op_begin(), Phi->op_end(), Cur) == Phi->op_end()) + return false; + + ExitInstruction = Cur; + continue; + } + + // Process instructions only once (termination). Each reduction cycle + // value must only be used once, except by phi nodes and min/max + // reductions which are represented as a cmp followed by a select. + ReductionInstDesc IgnoredVal(false, nullptr); + if (VisitedInsts.insert(UI).second) { + if (isa<PHINode>(UI)) + PHIs.push_back(UI); + else + NonPHIs.push_back(UI); + } else if (!isa<PHINode>(UI) && + ((!isa<FCmpInst>(UI) && !isa<ICmpInst>(UI) && + !isa<SelectInst>(UI)) || + !isMinMaxSelectCmpPattern(UI, IgnoredVal).isReduction())) + return false; + + // Remember that we completed the cycle. + if (UI == Phi) + FoundStartPHI = true; + } + Worklist.append(PHIs.begin(), PHIs.end()); + Worklist.append(NonPHIs.begin(), NonPHIs.end()); + } + + // This means we have seen one but not the other instruction of the + // pattern or more than just a select and cmp. + if ((Kind == RK_IntegerMinMax || Kind == RK_FloatMinMax) && + NumCmpSelectPatternInst != 2) + return false; + + if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction) + return false; + + // We found a reduction var if we have reached the original phi node and we + // only have a single instruction with out-of-loop users. + + // The ExitInstruction(Instruction which is allowed to have out-of-loop users) + // is saved as part of the ReductionDescriptor. + + // Save the description of this reduction variable. + ReductionDescriptor RD(RdxStart, ExitInstruction, Kind, + ReduxDesc.getMinMaxKind()); + + RedDes = RD; + + return true; +} + +/// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction +/// pattern corresponding to a min(X, Y) or max(X, Y). +ReductionInstDesc +ReductionDescriptor::isMinMaxSelectCmpPattern(Instruction *I, + ReductionInstDesc &Prev) { + + assert((isa<ICmpInst>(I) || isa<FCmpInst>(I) || isa<SelectInst>(I)) && + "Expect a select instruction"); + Instruction *Cmp = nullptr; + SelectInst *Select = nullptr; + + // We must handle the select(cmp()) as a single instruction. Advance to the + // select. + if ((Cmp = dyn_cast<ICmpInst>(I)) || (Cmp = dyn_cast<FCmpInst>(I))) { + if (!Cmp->hasOneUse() || !(Select = dyn_cast<SelectInst>(*I->user_begin()))) + return ReductionInstDesc(false, I); + return ReductionInstDesc(Select, Prev.getMinMaxKind()); + } + + // Only handle single use cases for now. + if (!(Select = dyn_cast<SelectInst>(I))) + return ReductionInstDesc(false, I); + if (!(Cmp = dyn_cast<ICmpInst>(I->getOperand(0))) && + !(Cmp = dyn_cast<FCmpInst>(I->getOperand(0)))) + return ReductionInstDesc(false, I); + if (!Cmp->hasOneUse()) + return ReductionInstDesc(false, I); + + Value *CmpLeft; + Value *CmpRight; + + // Look for a min/max pattern. + if (m_UMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) + return ReductionInstDesc(Select, ReductionInstDesc::MRK_UIntMin); + else if (m_UMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) + return ReductionInstDesc(Select, ReductionInstDesc::MRK_UIntMax); + else if (m_SMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) + return ReductionInstDesc(Select, ReductionInstDesc::MRK_SIntMax); + else if (m_SMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) + return ReductionInstDesc(Select, ReductionInstDesc::MRK_SIntMin); + else if (m_OrdFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) + return ReductionInstDesc(Select, ReductionInstDesc::MRK_FloatMin); + else if (m_OrdFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) + return ReductionInstDesc(Select, ReductionInstDesc::MRK_FloatMax); + else if (m_UnordFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) + return ReductionInstDesc(Select, ReductionInstDesc::MRK_FloatMin); + else if (m_UnordFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) + return ReductionInstDesc(Select, ReductionInstDesc::MRK_FloatMax); + + return ReductionInstDesc(false, I); +} + +ReductionInstDesc ReductionDescriptor::isReductionInstr(Instruction *I, + ReductionKind Kind, + ReductionInstDesc &Prev, + bool HasFunNoNaNAttr) { + bool FP = I->getType()->isFloatingPointTy(); + bool FastMath = FP && I->hasUnsafeAlgebra(); + switch (I->getOpcode()) { + default: + return ReductionInstDesc(false, I); + case Instruction::PHI: + if (FP && + (Kind != RK_FloatMult && Kind != RK_FloatAdd && Kind != RK_FloatMinMax)) + return ReductionInstDesc(false, I); + return ReductionInstDesc(I, Prev.getMinMaxKind()); + case Instruction::Sub: + case Instruction::Add: + return ReductionInstDesc(Kind == RK_IntegerAdd, I); + case Instruction::Mul: + return ReductionInstDesc(Kind == RK_IntegerMult, I); + case Instruction::And: + return ReductionInstDesc(Kind == RK_IntegerAnd, I); + case Instruction::Or: + return ReductionInstDesc(Kind == RK_IntegerOr, I); + case Instruction::Xor: + return ReductionInstDesc(Kind == RK_IntegerXor, I); + case Instruction::FMul: + return ReductionInstDesc(Kind == RK_FloatMult && FastMath, I); + case Instruction::FSub: + case Instruction::FAdd: + return ReductionInstDesc(Kind == RK_FloatAdd && FastMath, I); + case Instruction::FCmp: + case Instruction::ICmp: + case Instruction::Select: + if (Kind != RK_IntegerMinMax && + (!HasFunNoNaNAttr || Kind != RK_FloatMinMax)) + return ReductionInstDesc(false, I); + return isMinMaxSelectCmpPattern(I, Prev); + } +} + +bool ReductionDescriptor::hasMultipleUsesOf( + Instruction *I, SmallPtrSetImpl<Instruction *> &Insts) { + unsigned NumUses = 0; + for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; + ++Use) { + if (Insts.count(dyn_cast<Instruction>(*Use))) + ++NumUses; + if (NumUses > 1) + return true; + } + + return false; +} +bool ReductionDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop, + ReductionDescriptor &RedDes) { + + bool HasFunNoNaNAttr = false; + BasicBlock *Header = TheLoop->getHeader(); + Function &F = *Header->getParent(); + if (F.hasFnAttribute("no-nans-fp-math")) + HasFunNoNaNAttr = + F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true"; + + if (AddReductionVar(Phi, RK_IntegerAdd, TheLoop, HasFunNoNaNAttr, RedDes)) { + DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n"); + return true; + } + if (AddReductionVar(Phi, RK_IntegerMult, TheLoop, HasFunNoNaNAttr, RedDes)) { + DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n"); + return true; + } + if (AddReductionVar(Phi, RK_IntegerOr, TheLoop, HasFunNoNaNAttr, RedDes)) { + DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n"); + return true; + } + if (AddReductionVar(Phi, RK_IntegerAnd, TheLoop, HasFunNoNaNAttr, RedDes)) { + DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n"); + return true; + } + if (AddReductionVar(Phi, RK_IntegerXor, TheLoop, HasFunNoNaNAttr, RedDes)) { + DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n"); + return true; + } + if (AddReductionVar(Phi, RK_IntegerMinMax, TheLoop, HasFunNoNaNAttr, + RedDes)) { + DEBUG(dbgs() << "Found a MINMAX reduction PHI." << *Phi << "\n"); + return true; + } + if (AddReductionVar(Phi, RK_FloatMult, TheLoop, HasFunNoNaNAttr, RedDes)) { + DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n"); + return true; + } + if (AddReductionVar(Phi, RK_FloatAdd, TheLoop, HasFunNoNaNAttr, RedDes)) { + DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n"); + return true; + } + if (AddReductionVar(Phi, RK_FloatMinMax, TheLoop, HasFunNoNaNAttr, RedDes)) { + DEBUG(dbgs() << "Found an float MINMAX reduction PHI." << *Phi << "\n"); + return true; + } + // Not a reduction of known type. + return false; +} + +/// This function returns the identity element (or neutral element) for +/// the operation K. +Constant *ReductionDescriptor::getReductionIdentity(ReductionKind K, Type *Tp) { + switch (K) { + case RK_IntegerXor: + case RK_IntegerAdd: + case RK_IntegerOr: + // Adding, Xoring, Oring zero to a number does not change it. + return ConstantInt::get(Tp, 0); + case RK_IntegerMult: + // Multiplying a number by 1 does not change it. + return ConstantInt::get(Tp, 1); + case RK_IntegerAnd: + // AND-ing a number with an all-1 value does not change it. + return ConstantInt::get(Tp, -1, true); + case RK_FloatMult: + // Multiplying a number by 1 does not change it. + return ConstantFP::get(Tp, 1.0L); + case RK_FloatAdd: + // Adding zero to a number does not change it. + return ConstantFP::get(Tp, 0.0L); + default: + llvm_unreachable("Unknown reduction kind"); + } +} + +/// This function translates the reduction kind to an LLVM binary operator. +unsigned ReductionDescriptor::getReductionBinOp(ReductionKind Kind) { + switch (Kind) { + case RK_IntegerAdd: + return Instruction::Add; + case RK_IntegerMult: + return Instruction::Mul; + case RK_IntegerOr: + return Instruction::Or; + case RK_IntegerAnd: + return Instruction::And; + case RK_IntegerXor: + return Instruction::Xor; + case RK_FloatMult: + return Instruction::FMul; + case RK_FloatAdd: + return Instruction::FAdd; + case RK_IntegerMinMax: + return Instruction::ICmp; + case RK_FloatMinMax: + return Instruction::FCmp; + default: + llvm_unreachable("Unknown reduction operation"); + } +} + +Value * +ReductionDescriptor::createMinMaxOp(IRBuilder<> &Builder, + ReductionInstDesc::MinMaxReductionKind RK, + Value *Left, Value *Right) { + CmpInst::Predicate P = CmpInst::ICMP_NE; + switch (RK) { + default: + llvm_unreachable("Unknown min/max reduction kind"); + case ReductionInstDesc::MRK_UIntMin: + P = CmpInst::ICMP_ULT; + break; + case ReductionInstDesc::MRK_UIntMax: + P = CmpInst::ICMP_UGT; + break; + case ReductionInstDesc::MRK_SIntMin: + P = CmpInst::ICMP_SLT; + break; + case ReductionInstDesc::MRK_SIntMax: + P = CmpInst::ICMP_SGT; + break; + case ReductionInstDesc::MRK_FloatMin: + P = CmpInst::FCMP_OLT; + break; + case ReductionInstDesc::MRK_FloatMax: + P = CmpInst::FCMP_OGT; + break; + } + + Value *Cmp; + if (RK == ReductionInstDesc::MRK_FloatMin || + RK == ReductionInstDesc::MRK_FloatMax) + Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp"); + else + Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp"); + + Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select"); + return Select; +} + +bool llvm::isInductionPHI(PHINode *Phi, ScalarEvolution *SE, + ConstantInt *&StepValue) { + Type *PhiTy = Phi->getType(); + // We only handle integer and pointer inductions variables. + if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy()) + return false; + + // Check that the PHI is consecutive. + const SCEV *PhiScev = SE->getSCEV(Phi); + const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev); + if (!AR) { + DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); + return false; + } + + const SCEV *Step = AR->getStepRecurrence(*SE); + // Calculate the pointer stride and check if it is consecutive. + const SCEVConstant *C = dyn_cast<SCEVConstant>(Step); + if (!C) + return false; + + ConstantInt *CV = C->getValue(); + if (PhiTy->isIntegerTy()) { + StepValue = CV; + return true; + } + + assert(PhiTy->isPointerTy() && "The PHI must be a pointer"); + Type *PointerElementType = PhiTy->getPointerElementType(); + // The pointer stride cannot be determined if the pointer element type is not + // sized. + if (!PointerElementType->isSized()) + return false; + + const DataLayout &DL = Phi->getModule()->getDataLayout(); + int64_t Size = static_cast<int64_t>(DL.getTypeAllocSize(PointerElementType)); + int64_t CVSize = CV->getSExtValue(); + if (CVSize % Size) + return false; + StepValue = ConstantInt::getSigned(CV->getType(), CVSize / Size); + return true; +} diff --git a/contrib/llvm/lib/Transforms/Utils/LowerExpectIntrinsic.cpp b/contrib/llvm/lib/Transforms/Utils/LowerExpectIntrinsic.cpp deleted file mode 100644 index ff89e74..0000000 --- a/contrib/llvm/lib/Transforms/Utils/LowerExpectIntrinsic.cpp +++ /dev/null @@ -1,188 +0,0 @@ -//===- LowerExpectIntrinsic.cpp - Lower expect intrinsic ------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass lowers the 'expect' intrinsic to LLVM metadata. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Scalar.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Intrinsics.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/MDBuilder.h" -#include "llvm/IR/Metadata.h" -#include "llvm/Pass.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include <vector> - -using namespace llvm; - -#define DEBUG_TYPE "lower-expect-intrinsic" - -STATISTIC(IfHandled, "Number of 'expect' intrinsic instructions handled"); - -static cl::opt<uint32_t> -LikelyBranchWeight("likely-branch-weight", cl::Hidden, cl::init(64), - cl::desc("Weight of the branch likely to be taken (default = 64)")); -static cl::opt<uint32_t> -UnlikelyBranchWeight("unlikely-branch-weight", cl::Hidden, cl::init(4), - cl::desc("Weight of the branch unlikely to be taken (default = 4)")); - -namespace { - - class LowerExpectIntrinsic : public FunctionPass { - - bool HandleSwitchExpect(SwitchInst *SI); - - bool HandleIfExpect(BranchInst *BI); - - public: - static char ID; - LowerExpectIntrinsic() : FunctionPass(ID) { - initializeLowerExpectIntrinsicPass(*PassRegistry::getPassRegistry()); - } - - bool runOnFunction(Function &F) override; - }; -} - - -bool LowerExpectIntrinsic::HandleSwitchExpect(SwitchInst *SI) { - CallInst *CI = dyn_cast<CallInst>(SI->getCondition()); - if (!CI) - return false; - - Function *Fn = CI->getCalledFunction(); - if (!Fn || Fn->getIntrinsicID() != Intrinsic::expect) - return false; - - Value *ArgValue = CI->getArgOperand(0); - ConstantInt *ExpectedValue = dyn_cast<ConstantInt>(CI->getArgOperand(1)); - if (!ExpectedValue) - return false; - - SwitchInst::CaseIt Case = SI->findCaseValue(ExpectedValue); - unsigned n = SI->getNumCases(); // +1 for default case. - std::vector<uint32_t> Weights(n + 1); - - Weights[0] = Case == SI->case_default() ? LikelyBranchWeight - : UnlikelyBranchWeight; - for (unsigned i = 0; i != n; ++i) - Weights[i + 1] = i == Case.getCaseIndex() ? LikelyBranchWeight - : UnlikelyBranchWeight; - - SI->setMetadata(LLVMContext::MD_prof, - MDBuilder(CI->getContext()).createBranchWeights(Weights)); - - SI->setCondition(ArgValue); - return true; -} - - -bool LowerExpectIntrinsic::HandleIfExpect(BranchInst *BI) { - if (BI->isUnconditional()) - return false; - - // Handle non-optimized IR code like: - // %expval = call i64 @llvm.expect.i64(i64 %conv1, i64 1) - // %tobool = icmp ne i64 %expval, 0 - // br i1 %tobool, label %if.then, label %if.end - // - // Or the following simpler case: - // %expval = call i1 @llvm.expect.i1(i1 %cmp, i1 1) - // br i1 %expval, label %if.then, label %if.end - - CallInst *CI; - - ICmpInst *CmpI = dyn_cast<ICmpInst>(BI->getCondition()); - if (!CmpI) { - CI = dyn_cast<CallInst>(BI->getCondition()); - } else { - if (CmpI->getPredicate() != CmpInst::ICMP_NE) - return false; - CI = dyn_cast<CallInst>(CmpI->getOperand(0)); - } - - if (!CI) - return false; - - Function *Fn = CI->getCalledFunction(); - if (!Fn || Fn->getIntrinsicID() != Intrinsic::expect) - return false; - - Value *ArgValue = CI->getArgOperand(0); - ConstantInt *ExpectedValue = dyn_cast<ConstantInt>(CI->getArgOperand(1)); - if (!ExpectedValue) - return false; - - MDBuilder MDB(CI->getContext()); - MDNode *Node; - - // If expect value is equal to 1 it means that we are more likely to take - // branch 0, in other case more likely is branch 1. - if (ExpectedValue->isOne()) - Node = MDB.createBranchWeights(LikelyBranchWeight, UnlikelyBranchWeight); - else - Node = MDB.createBranchWeights(UnlikelyBranchWeight, LikelyBranchWeight); - - BI->setMetadata(LLVMContext::MD_prof, Node); - - if (CmpI) - CmpI->setOperand(0, ArgValue); - else - BI->setCondition(ArgValue); - return true; -} - - -bool LowerExpectIntrinsic::runOnFunction(Function &F) { - for (Function::iterator I = F.begin(), E = F.end(); I != E;) { - BasicBlock *BB = I++; - - // Create "block_weights" metadata. - if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) { - if (HandleIfExpect(BI)) - IfHandled++; - } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) { - if (HandleSwitchExpect(SI)) - IfHandled++; - } - - // remove llvm.expect intrinsics. - for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); - BI != BE; ) { - CallInst *CI = dyn_cast<CallInst>(BI++); - if (!CI) - continue; - - Function *Fn = CI->getCalledFunction(); - if (Fn && Fn->getIntrinsicID() == Intrinsic::expect) { - Value *Exp = CI->getArgOperand(0); - CI->replaceAllUsesWith(Exp); - CI->eraseFromParent(); - } - } - } - - return false; -} - - -char LowerExpectIntrinsic::ID = 0; -INITIALIZE_PASS(LowerExpectIntrinsic, "lower-expect", "Lower 'expect' " - "Intrinsics", false, false) - -FunctionPass *llvm::createLowerExpectIntrinsicPass() { - return new LowerExpectIntrinsic(); -} diff --git a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp index 04b9130..e0e0e90 100644 --- a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp @@ -14,17 +14,17 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" -#include "llvm/IR/CFG.h" #include "llvm/Pass.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" #include <algorithm> using namespace llvm; @@ -32,6 +32,23 @@ using namespace llvm; #define DEBUG_TYPE "lower-switch" namespace { + struct IntRange { + int64_t Low, High; + }; + // Return true iff R is covered by Ranges. + static bool IsInRanges(const IntRange &R, + const std::vector<IntRange> &Ranges) { + // Note: Ranges must be sorted, non-overlapping and non-adjacent. + + // Find the first range whose High field is >= R.High, + // then check if the Low field is <= R.Low. If so, we + // have a Range that covers R. + auto I = std::lower_bound( + Ranges.begin(), Ranges.end(), R, + [](const IntRange &A, const IntRange &B) { return A.High < B.High; }); + return I != Ranges.end() && I->Low <= R.Low; + } + /// LowerSwitch Pass - Replace all SwitchInst instructions with chained branch /// instructions. class LowerSwitch : public FunctionPass { @@ -50,13 +67,12 @@ namespace { } struct CaseRange { - Constant* Low; - Constant* High; + ConstantInt* Low; + ConstantInt* High; BasicBlock* BB; - CaseRange(Constant *low = nullptr, Constant *high = nullptr, - BasicBlock *bb = nullptr) : - Low(low), High(high), BB(bb) { } + CaseRange(ConstantInt *low, ConstantInt *high, BasicBlock *bb) + : Low(low), High(high), BB(bb) {} }; typedef std::vector<CaseRange> CaseVector; @@ -67,7 +83,8 @@ namespace { BasicBlock *switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound, ConstantInt *UpperBound, Value *Val, BasicBlock *Predecessor, - BasicBlock *OrigBlock, BasicBlock *Default); + BasicBlock *OrigBlock, BasicBlock *Default, + const std::vector<IntRange> &UnreachableRanges); BasicBlock *newLeafBlock(CaseRange &Leaf, Value *Val, BasicBlock *OrigBlock, BasicBlock *Default); unsigned Clusterify(CaseVector &Cases, SwitchInst *SI); @@ -158,11 +175,16 @@ static void fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB, // Remove additional occurences coming from condensed cases and keep the // number of incoming values equal to the number of branches to SuccBB. + SmallVector<unsigned, 8> Indices; for (++Idx; LocalNumMergedCases > 0 && Idx < E; ++Idx) if (PN->getIncomingBlock(Idx) == OrigBB) { - PN->removeIncomingValue(Idx); + Indices.push_back(Idx); LocalNumMergedCases--; } + // Remove incoming values in the reverse order to prevent invalidating + // *successive* index. + for (auto III = Indices.rbegin(), IIE = Indices.rend(); III != IIE; ++III) + PN->removeIncomingValue(*III); } } @@ -171,12 +193,12 @@ static void fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB, // LowerBound and UpperBound are used to keep track of the bounds for Val // that have already been checked by a block emitted by one of the previous // calls to switchConvert in the call stack. -BasicBlock *LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, - ConstantInt *LowerBound, - ConstantInt *UpperBound, Value *Val, - BasicBlock *Predecessor, - BasicBlock *OrigBlock, - BasicBlock *Default) { +BasicBlock * +LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound, + ConstantInt *UpperBound, Value *Val, + BasicBlock *Predecessor, BasicBlock *OrigBlock, + BasicBlock *Default, + const std::vector<IntRange> &UnreachableRanges) { unsigned Size = End - Begin; if (Size == 1) { @@ -203,32 +225,32 @@ BasicBlock *LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, CaseRange &Pivot = *(Begin + Mid); DEBUG(dbgs() << "Pivot ==> " - << cast<ConstantInt>(Pivot.Low)->getValue() - << " -" << cast<ConstantInt>(Pivot.High)->getValue() << "\n"); + << Pivot.Low->getValue() + << " -" << Pivot.High->getValue() << "\n"); // NewLowerBound here should never be the integer minimal value. // This is because it is computed from a case range that is never // the smallest, so there is always a case range that has at least // a smaller value. - ConstantInt *NewLowerBound = cast<ConstantInt>(Pivot.Low); - ConstantInt *NewUpperBound; - - // If we don't have a Default block then it means that we can never - // have a value outside of a case range, so set the UpperBound to the highest - // value in the LHS part of the case ranges. - if (Default != nullptr) { - // Because NewLowerBound is never the smallest representable integer - // it is safe here to subtract one. - NewUpperBound = ConstantInt::get(NewLowerBound->getContext(), - NewLowerBound->getValue() - 1); - } else { - CaseItr LastLHS = LHS.begin() + LHS.size() - 1; - NewUpperBound = cast<ConstantInt>(LastLHS->High); + ConstantInt *NewLowerBound = Pivot.Low; + + // Because NewLowerBound is never the smallest representable integer + // it is safe here to subtract one. + ConstantInt *NewUpperBound = ConstantInt::get(NewLowerBound->getContext(), + NewLowerBound->getValue() - 1); + + if (!UnreachableRanges.empty()) { + // Check if the gap between LHS's highest and NewLowerBound is unreachable. + int64_t GapLow = LHS.back().High->getSExtValue() + 1; + int64_t GapHigh = NewLowerBound->getSExtValue() - 1; + IntRange Gap = { GapLow, GapHigh }; + if (GapHigh >= GapLow && IsInRanges(Gap, UnreachableRanges)) + NewUpperBound = LHS.back().High; } DEBUG(dbgs() << "LHS Bounds ==> "; if (LowerBound) { - dbgs() << cast<ConstantInt>(LowerBound)->getSExtValue(); + dbgs() << LowerBound->getSExtValue(); } else { dbgs() << "NONE"; } @@ -236,7 +258,7 @@ BasicBlock *LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, dbgs() << "RHS Bounds ==> "; dbgs() << NewLowerBound->getSExtValue() << " - "; if (UpperBound) { - dbgs() << cast<ConstantInt>(UpperBound)->getSExtValue() << "\n"; + dbgs() << UpperBound->getSExtValue() << "\n"; } else { dbgs() << "NONE\n"; }); @@ -251,10 +273,10 @@ BasicBlock *LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, BasicBlock *LBranch = switchConvert(LHS.begin(), LHS.end(), LowerBound, NewUpperBound, Val, NewNode, OrigBlock, - Default); + Default, UnreachableRanges); BasicBlock *RBranch = switchConvert(RHS.begin(), RHS.end(), NewLowerBound, UpperBound, Val, NewNode, OrigBlock, - Default); + Default, UnreachableRanges); Function::iterator FI = OrigBlock; F->getBasicBlockList().insert(++FI, NewNode); @@ -287,11 +309,11 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val, Leaf.Low, "SwitchLeaf"); } else { // Make range comparison - if (cast<ConstantInt>(Leaf.Low)->isMinValue(true /*isSigned*/)) { + if (Leaf.Low->isMinValue(true /*isSigned*/)) { // Val >= Min && Val <= Hi --> Val <= Hi Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SLE, Val, Leaf.High, "SwitchLeaf"); - } else if (cast<ConstantInt>(Leaf.Low)->isZero()) { + } else if (Leaf.Low->isZero()) { // Val >= 0 && Val <= Hi --> Val <=u Hi Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Val, Leaf.High, "SwitchLeaf"); @@ -316,8 +338,8 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val, for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) { PHINode* PN = cast<PHINode>(I); // Remove all but one incoming entries from the cluster - uint64_t Range = cast<ConstantInt>(Leaf.High)->getSExtValue() - - cast<ConstantInt>(Leaf.Low)->getSExtValue(); + uint64_t Range = Leaf.High->getSExtValue() - + Leaf.Low->getSExtValue(); for (uint64_t j = 0; j < Range; ++j) { PN->removeIncomingValue(OrigBlock); } @@ -345,8 +367,8 @@ unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) { if (Cases.size()>=2) for (CaseItr I = Cases.begin(), J = std::next(Cases.begin()); J != Cases.end();) { - int64_t nextValue = cast<ConstantInt>(J->Low)->getSExtValue(); - int64_t currentValue = cast<ConstantInt>(I->High)->getSExtValue(); + int64_t nextValue = J->Low->getSExtValue(); + int64_t currentValue = I->High->getSExtValue(); BasicBlock* nextBB = J->BB; BasicBlock* currentBB = I->BB; @@ -379,26 +401,102 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) { Value *Val = SI->getCondition(); // The value we are switching on... BasicBlock* Default = SI->getDefaultDest(); - // If there is only the default destination, don't bother with the code below. + // If there is only the default destination, just branch. if (!SI->getNumCases()) { - BranchInst::Create(SI->getDefaultDest(), CurBlock); - CurBlock->getInstList().erase(SI); + BranchInst::Create(Default, CurBlock); + SI->eraseFromParent(); return; } - const bool DefaultIsUnreachable = - Default->size() == 1 && isa<UnreachableInst>(Default->getTerminator()); + // Prepare cases vector. + CaseVector Cases; + unsigned numCmps = Clusterify(Cases, SI); + DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size() + << ". Total compares: " << numCmps << "\n"); + DEBUG(dbgs() << "Cases: " << Cases << "\n"); + (void)numCmps; + + ConstantInt *LowerBound = nullptr; + ConstantInt *UpperBound = nullptr; + std::vector<IntRange> UnreachableRanges; + + if (isa<UnreachableInst>(Default->getFirstNonPHIOrDbg())) { + // Make the bounds tightly fitted around the case value range, becase we + // know that the value passed to the switch must be exactly one of the case + // values. + assert(!Cases.empty()); + LowerBound = Cases.front().Low; + UpperBound = Cases.back().High; + + DenseMap<BasicBlock *, unsigned> Popularity; + unsigned MaxPop = 0; + BasicBlock *PopSucc = nullptr; + + IntRange R = { INT64_MIN, INT64_MAX }; + UnreachableRanges.push_back(R); + for (const auto &I : Cases) { + int64_t Low = I.Low->getSExtValue(); + int64_t High = I.High->getSExtValue(); + + IntRange &LastRange = UnreachableRanges.back(); + if (LastRange.Low == Low) { + // There is nothing left of the previous range. + UnreachableRanges.pop_back(); + } else { + // Terminate the previous range. + assert(Low > LastRange.Low); + LastRange.High = Low - 1; + } + if (High != INT64_MAX) { + IntRange R = { High + 1, INT64_MAX }; + UnreachableRanges.push_back(R); + } + + // Count popularity. + int64_t N = High - Low + 1; + unsigned &Pop = Popularity[I.BB]; + if ((Pop += N) > MaxPop) { + MaxPop = Pop; + PopSucc = I.BB; + } + } +#ifndef NDEBUG + /* UnreachableRanges should be sorted and the ranges non-adjacent. */ + for (auto I = UnreachableRanges.begin(), E = UnreachableRanges.end(); + I != E; ++I) { + assert(I->Low <= I->High); + auto Next = I + 1; + if (Next != E) { + assert(Next->Low > I->High); + } + } +#endif + + // Use the most popular block as the new default, reducing the number of + // cases. + assert(MaxPop > 0 && PopSucc); + Default = PopSucc; + for (CaseItr I = Cases.begin(); I != Cases.end();) { + if (I->BB == PopSucc) + I = Cases.erase(I); + else + ++I; + } + + // If there are no cases left, just branch. + if (Cases.empty()) { + BranchInst::Create(Default, CurBlock); + SI->eraseFromParent(); + return; + } + } + // Create a new, empty default block so that the new hierarchy of // if-then statements go to this and the PHI nodes are happy. - // if the default block is set as an unreachable we avoid creating one - // because will never be a valid target. - BasicBlock *NewDefault = nullptr; - if (!DefaultIsUnreachable) { - NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault"); - F->getBasicBlockList().insert(Default, NewDefault); - - BranchInst::Create(Default, NewDefault); - } + BasicBlock *NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault"); + F->getBasicBlockList().insert(Default, NewDefault); + BranchInst::Create(Default, NewDefault); + // If there is an entry in any PHI nodes for the default edge, make sure // to update them as well. for (BasicBlock::iterator I = Default->begin(); isa<PHINode>(I); ++I) { @@ -408,40 +506,18 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) { PN->setIncomingBlock((unsigned)BlockIdx, NewDefault); } - // Prepare cases vector. - CaseVector Cases; - unsigned numCmps = Clusterify(Cases, SI); - - DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size() - << ". Total compares: " << numCmps << "\n"); - DEBUG(dbgs() << "Cases: " << Cases << "\n"); - (void)numCmps; - - ConstantInt *UpperBound = nullptr; - ConstantInt *LowerBound = nullptr; - - // Optimize the condition where Default is an unreachable block. In this case - // we can make the bounds tightly fitted around the case value ranges, - // because we know that the value passed to the switch should always be - // exactly one of the case values. - if (DefaultIsUnreachable) { - CaseItr LastCase = Cases.begin() + Cases.size() - 1; - UpperBound = cast<ConstantInt>(LastCase->High); - LowerBound = cast<ConstantInt>(Cases.begin()->Low); - } BasicBlock *SwitchBlock = switchConvert(Cases.begin(), Cases.end(), LowerBound, UpperBound, Val, - OrigBlock, OrigBlock, NewDefault); + OrigBlock, OrigBlock, NewDefault, UnreachableRanges); // Branch to our shiny new if-then stuff... BranchInst::Create(SwitchBlock, OrigBlock); // We are now done with the switch instruction, delete it. + BasicBlock *OldDefault = SI->getDefaultDest(); CurBlock->getInstList().erase(SI); - pred_iterator PI = pred_begin(Default), E = pred_end(Default); - // If the Default block has no more predecessors just remove it - if (PI == E) { - DeleteDeadBlock(Default); - } + // If the Default block has no more predecessors just remove it. + if (pred_begin(OldDefault) == pred_end(OldDefault)) + DeleteDeadBlock(OldDefault); } diff --git a/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp index 35c701e..d69a81e 100644 --- a/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp +++ b/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp @@ -17,6 +17,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Module.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -93,3 +94,34 @@ llvm::collectUsedGlobalVariables(Module &M, SmallPtrSetImpl<GlobalValue *> &Set, } return GV; } + +Function *llvm::checkSanitizerInterfaceFunction(Constant *FuncOrBitcast) { + if (isa<Function>(FuncOrBitcast)) + return cast<Function>(FuncOrBitcast); + FuncOrBitcast->dump(); + std::string Err; + raw_string_ostream Stream(Err); + Stream << "Sanitizer interface function redefined: " << *FuncOrBitcast; + report_fatal_error(Err); +} + +std::pair<Function *, Function *> llvm::createSanitizerCtorAndInitFunctions( + Module &M, StringRef CtorName, StringRef InitName, + ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs) { + assert(!InitName.empty() && "Expected init function name"); + assert(InitArgTypes.size() == InitArgTypes.size() && + "Sanitizer's init function expects different number of arguments"); + Function *Ctor = Function::Create( + FunctionType::get(Type::getVoidTy(M.getContext()), false), + GlobalValue::InternalLinkage, CtorName, &M); + BasicBlock *CtorBB = BasicBlock::Create(M.getContext(), "", Ctor); + IRBuilder<> IRB(ReturnInst::Create(M.getContext(), CtorBB)); + Function *InitFunction = + checkSanitizerInterfaceFunction(M.getOrInsertFunction( + InitName, FunctionType::get(IRB.getVoidTy(), InitArgTypes, false), + AttributeSet())); + InitFunction->setLinkage(Function::ExternalLinkage); + IRB.CreateCall(InitFunction, InitArgs); + return std::make_pair(Ctor, InitFunction); +} + diff --git a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index dabadb7..623dbc9 100644 --- a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -13,16 +13,6 @@ // traversing the function in depth-first order to rewrite loads and stores as // appropriate. // -// The algorithm used here is based on: -// -// Sreedhar and Gao. A linear time algorithm for placing phi-nodes. -// In Proceedings of the 22nd ACM SIGPLAN-SIGACT Symposium on Principles of -// Programming Languages -// POPL '95. ACM, New York, NY, 62-73. -// -// It has been modified to not explicitly use the DJ graph data structure and to -// directly compute pruned SSA using per-variable liveness information. -// //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/PromoteMemToReg.h" @@ -34,6 +24,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasSetTracker.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/IteratedDominanceFrontier.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" @@ -45,9 +36,9 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" #include "llvm/Transforms/Utils/Local.h" #include <algorithm> -#include <queue> using namespace llvm; #define DEBUG_TYPE "mem2reg" @@ -274,9 +265,6 @@ struct PromoteMem2Reg { /// behavior. DenseMap<BasicBlock *, unsigned> BBNumbers; - /// Maps DomTreeNodes to their level in the dominator tree. - DenseMap<DomTreeNode *, unsigned> DomLevels; - /// Lazily compute the number of predecessors a block has. DenseMap<const BasicBlock *, unsigned> BBNumPreds; @@ -303,8 +291,6 @@ private: return NP - 1; } - void DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, - AllocaInfo &Info); void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, const SmallPtrSetImpl<BasicBlock *> &DefBlocks, SmallPtrSetImpl<BasicBlock *> &LiveInBlocks); @@ -531,6 +517,7 @@ void PromoteMem2Reg::run() { AllocaInfo Info; LargeBlockInfo LBI; + IDFCalculator IDF(DT); for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) { AllocaInst *AI = Allocas[AllocaNum]; @@ -578,31 +565,12 @@ void PromoteMem2Reg::run() { continue; } - // If we haven't computed dominator tree levels, do so now. - if (DomLevels.empty()) { - SmallVector<DomTreeNode *, 32> Worklist; - - DomTreeNode *Root = DT.getRootNode(); - DomLevels[Root] = 0; - Worklist.push_back(Root); - - while (!Worklist.empty()) { - DomTreeNode *Node = Worklist.pop_back_val(); - unsigned ChildLevel = DomLevels[Node] + 1; - for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); - CI != CE; ++CI) { - DomLevels[*CI] = ChildLevel; - Worklist.push_back(*CI); - } - } - } - // If we haven't computed a numbering for the BB's in the function, do so // now. if (BBNumbers.empty()) { unsigned ID = 0; - for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) - BBNumbers[I] = ID++; + for (auto &BB : F) + BBNumbers[&BB] = ID++; } // If we have an AST to keep updated, remember some pointer value that is @@ -621,7 +589,34 @@ void PromoteMem2Reg::run() { // the standard SSA construction algorithm. Determine which blocks need PHI // nodes and see if we can optimize out some work by avoiding insertion of // dead phi nodes. - DetermineInsertionPoint(AI, AllocaNum, Info); + + + // Unique the set of defining blocks for efficient lookup. + SmallPtrSet<BasicBlock *, 32> DefBlocks; + DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end()); + + // Determine which blocks the value is live in. These are blocks which lead + // to uses. + SmallPtrSet<BasicBlock *, 32> LiveInBlocks; + ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks); + + // At this point, we're committed to promoting the alloca using IDF's, and + // the standard SSA construction algorithm. Determine which blocks need phi + // nodes and see if we can optimize out some work by avoiding insertion of + // dead phi nodes. + IDF.setLiveInBlocks(LiveInBlocks); + IDF.setDefiningBlocks(DefBlocks); + SmallVector<BasicBlock *, 32> PHIBlocks; + IDF.calculate(PHIBlocks); + if (PHIBlocks.size() > 1) + std::sort(PHIBlocks.begin(), PHIBlocks.end(), + [this](BasicBlock *A, BasicBlock *B) { + return BBNumbers.lookup(A) < BBNumbers.lookup(B); + }); + + unsigned CurrentVersion = 0; + for (unsigned i = 0, e = PHIBlocks.size(); i != e; ++i) + QueuePhiNode(PHIBlocks[i], AllocaNum, CurrentVersion); } if (Allocas.empty()) @@ -667,6 +662,8 @@ void PromoteMem2Reg::run() { A->eraseFromParent(); } + const DataLayout &DL = F.getParent()->getDataLayout(); + // Remove alloca's dbg.declare instrinsics from the function. for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i) if (DbgDeclareInst *DDI = AllocaDbgDeclares[i]) @@ -691,7 +688,7 @@ void PromoteMem2Reg::run() { PHINode *PN = I->second; // If this PHI node merges one value and/or undefs, get the value. - if (Value *V = SimplifyInstruction(PN, nullptr, nullptr, &DT, AC)) { + if (Value *V = SimplifyInstruction(PN, DL, nullptr, &DT, AC)) { if (AST && PN->getType()->isPointerTy()) AST->deleteValue(PN); PN->replaceAllUsesWith(V); @@ -841,95 +838,6 @@ void PromoteMem2Reg::ComputeLiveInBlocks( } } -/// At this point, we're committed to promoting the alloca using IDF's, and the -/// standard SSA construction algorithm. Determine which blocks need phi nodes -/// and see if we can optimize out some work by avoiding insertion of dead phi -/// nodes. -void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, - AllocaInfo &Info) { - // Unique the set of defining blocks for efficient lookup. - SmallPtrSet<BasicBlock *, 32> DefBlocks; - DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end()); - - // Determine which blocks the value is live in. These are blocks which lead - // to uses. - SmallPtrSet<BasicBlock *, 32> LiveInBlocks; - ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks); - - // Use a priority queue keyed on dominator tree level so that inserted nodes - // are handled from the bottom of the dominator tree upwards. - typedef std::pair<DomTreeNode *, unsigned> DomTreeNodePair; - typedef std::priority_queue<DomTreeNodePair, SmallVector<DomTreeNodePair, 32>, - less_second> IDFPriorityQueue; - IDFPriorityQueue PQ; - - for (BasicBlock *BB : DefBlocks) { - if (DomTreeNode *Node = DT.getNode(BB)) - PQ.push(std::make_pair(Node, DomLevels[Node])); - } - - SmallVector<std::pair<unsigned, BasicBlock *>, 32> DFBlocks; - SmallPtrSet<DomTreeNode *, 32> Visited; - SmallVector<DomTreeNode *, 32> Worklist; - while (!PQ.empty()) { - DomTreeNodePair RootPair = PQ.top(); - PQ.pop(); - DomTreeNode *Root = RootPair.first; - unsigned RootLevel = RootPair.second; - - // Walk all dominator tree children of Root, inspecting their CFG edges with - // targets elsewhere on the dominator tree. Only targets whose level is at - // most Root's level are added to the iterated dominance frontier of the - // definition set. - - Worklist.clear(); - Worklist.push_back(Root); - - while (!Worklist.empty()) { - DomTreeNode *Node = Worklist.pop_back_val(); - BasicBlock *BB = Node->getBlock(); - - for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; - ++SI) { - DomTreeNode *SuccNode = DT.getNode(*SI); - - // Quickly skip all CFG edges that are also dominator tree edges instead - // of catching them below. - if (SuccNode->getIDom() == Node) - continue; - - unsigned SuccLevel = DomLevels[SuccNode]; - if (SuccLevel > RootLevel) - continue; - - if (!Visited.insert(SuccNode).second) - continue; - - BasicBlock *SuccBB = SuccNode->getBlock(); - if (!LiveInBlocks.count(SuccBB)) - continue; - - DFBlocks.push_back(std::make_pair(BBNumbers[SuccBB], SuccBB)); - if (!DefBlocks.count(SuccBB)) - PQ.push(std::make_pair(SuccNode, SuccLevel)); - } - - for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); CI != CE; - ++CI) { - if (!Visited.count(*CI)) - Worklist.push_back(*CI); - } - } - } - - if (DFBlocks.size() > 1) - std::sort(DFBlocks.begin(), DFBlocks.end()); - - unsigned CurrentVersion = 0; - for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i) - QueuePhiNode(DFBlocks[i].second, AllocaNum, CurrentVersion); -} - /// \brief Queue a phi-node to be added to a basic-block for a specific Alloca. /// /// Returns true if there wasn't already a phi-node for that variable diff --git a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp index 3fcb789..88b39dd 100644 --- a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp @@ -19,6 +19,7 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -150,12 +151,13 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { ProtoName, &BB->front()); // Fill in all the predecessors of the PHI. - for (unsigned i = 0, e = PredValues.size(); i != e; ++i) - InsertedPHI->addIncoming(PredValues[i].second, PredValues[i].first); + for (const auto &PredValue : PredValues) + InsertedPHI->addIncoming(PredValue.second, PredValue.first); // See if the PHI node can be merged to a single value. This can happen in // loop cases when we get a PHI of itself and one other value. - if (Value *V = SimplifyInstruction(InsertedPHI)) { + if (Value *V = + SimplifyInstruction(InsertedPHI, BB->getModule()->getDataLayout())) { InsertedPHI->eraseFromParent(); return V; } @@ -245,8 +247,7 @@ public: // but it is relatively slow. If we already have PHI nodes in this // block, walk one of them to get the predecessor list instead. if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) { - for (unsigned PI = 0, E = SomePhi->getNumIncomingValues(); PI != E; ++PI) - Preds->push_back(SomePhi->getIncomingBlock(PI)); + Preds->append(SomePhi->block_begin(), SomePhi->block_end()); } else { for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) Preds->push_back(*PI); @@ -321,12 +322,12 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) { //===----------------------------------------------------------------------===// LoadAndStorePromoter:: -LoadAndStorePromoter(const SmallVectorImpl<Instruction*> &Insts, +LoadAndStorePromoter(ArrayRef<const Instruction*> Insts, SSAUpdater &S, StringRef BaseName) : SSA(S) { if (Insts.empty()) return; - Value *SomeVal; - if (LoadInst *LI = dyn_cast<LoadInst>(Insts[0])) + const Value *SomeVal; + if (const LoadInst *LI = dyn_cast<LoadInst>(Insts[0])) SomeVal = LI; else SomeVal = cast<StoreInst>(Insts[0])->getOperand(0); @@ -344,20 +345,17 @@ run(const SmallVectorImpl<Instruction*> &Insts) const { // This is important because we have to handle multiple defs/uses in a block // ourselves: SSAUpdater is purely for cross-block references. DenseMap<BasicBlock*, TinyPtrVector<Instruction*> > UsesByBlock; - - for (unsigned i = 0, e = Insts.size(); i != e; ++i) { - Instruction *User = Insts[i]; + + for (Instruction *User : Insts) UsesByBlock[User->getParent()].push_back(User); - } // Okay, now we can iterate over all the blocks in the function with uses, // processing them. Keep track of which loads are loading a live-in value. // Walk the uses in the use-list order to be determinstic. SmallVector<LoadInst*, 32> LiveInLoads; DenseMap<Value*, Value*> ReplacedLoads; - - for (unsigned i = 0, e = Insts.size(); i != e; ++i) { - Instruction *User = Insts[i]; + + for (Instruction *User : Insts) { BasicBlock *BB = User->getParent(); TinyPtrVector<Instruction*> &BlockUses = UsesByBlock[BB]; @@ -380,8 +378,8 @@ run(const SmallVectorImpl<Instruction*> &Insts) const { // Otherwise, check to see if this block is all loads. bool HasStore = false; - for (unsigned i = 0, e = BlockUses.size(); i != e; ++i) { - if (isa<StoreInst>(BlockUses[i])) { + for (Instruction *I : BlockUses) { + if (isa<StoreInst>(I)) { HasStore = true; break; } @@ -391,8 +389,8 @@ run(const SmallVectorImpl<Instruction*> &Insts) const { // efficient way to tell which on is first in the block and don't want to // scan large blocks, so just add all loads as live ins. if (!HasStore) { - for (unsigned i = 0, e = BlockUses.size(); i != e; ++i) - LiveInLoads.push_back(cast<LoadInst>(BlockUses[i])); + for (Instruction *I : BlockUses) + LiveInLoads.push_back(cast<LoadInst>(I)); BlockUses.clear(); continue; } @@ -403,8 +401,8 @@ run(const SmallVectorImpl<Instruction*> &Insts) const { // block is a load, then it uses the live in value. The last store defines // the live out value. We handle this by doing a linear scan of the block. Value *StoredValue = nullptr; - for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) { - if (LoadInst *L = dyn_cast<LoadInst>(II)) { + for (Instruction &I : *BB) { + if (LoadInst *L = dyn_cast<LoadInst>(&I)) { // If this is a load from an unrelated pointer, ignore it. if (!isInstInList(L, Insts)) continue; @@ -419,8 +417,8 @@ run(const SmallVectorImpl<Instruction*> &Insts) const { } continue; } - - if (StoreInst *SI = dyn_cast<StoreInst>(II)) { + + if (StoreInst *SI = dyn_cast<StoreInst>(&I)) { // If this is a store to an unrelated pointer, ignore it. if (!isInstInList(SI, Insts)) continue; updateDebugInfo(SI); @@ -438,8 +436,7 @@ run(const SmallVectorImpl<Instruction*> &Insts) const { // Okay, now we rewrite all loads that use live-in values in the loop, // inserting PHI nodes as necessary. - for (unsigned i = 0, e = LiveInLoads.size(); i != e; ++i) { - LoadInst *ALoad = LiveInLoads[i]; + for (LoadInst *ALoad : LiveInLoads) { Value *NewVal = SSA.GetValueInMiddleOfBlock(ALoad->getParent()); replaceLoadWithValue(ALoad, NewVal); @@ -454,9 +451,7 @@ run(const SmallVectorImpl<Instruction*> &Insts) const { // Now that everything is rewritten, delete the old instructions from the // function. They should all be dead now. - for (unsigned i = 0, e = Insts.size(); i != e; ++i) { - Instruction *User = Insts[i]; - + for (Instruction *User : Insts) { // If this is a load that still has uses, then the load must have been added // as a live value in the SSAUpdate data structure for a block (e.g. because // the loaded value was stored later). In this case, we need to recursively diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index f6867c2..60ac271 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -53,9 +53,13 @@ using namespace PatternMatch; #define DEBUG_TYPE "simplifycfg" +// Chosen as 2 so as to be cheap, but still to have enough power to fold +// a select, so the "clamp" idiom (of a min followed by a max) will be caught. +// To catch this, we need to fold a compare and a select, hence '2' being the +// minimum reasonable default. static cl::opt<unsigned> -PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(1), - cl::desc("Control the amount of phi node folding to perform (default = 1)")); +PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(2), + cl::desc("Control the amount of phi node folding to perform (default = 2)")); static cl::opt<bool> DupRet("simplifycfg-dup-ret", cl::Hidden, cl::init(false), @@ -106,8 +110,8 @@ namespace { class SimplifyCFGOpt { const TargetTransformInfo &TTI; + const DataLayout &DL; unsigned BonusInstThreshold; - const DataLayout *const DL; AssumptionCache *AC; Value *isValueEqualityComparison(TerminatorInst *TI); BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI, @@ -127,9 +131,9 @@ class SimplifyCFGOpt { bool SimplifyCondBranch(BranchInst *BI, IRBuilder <>&Builder); public: - SimplifyCFGOpt(const TargetTransformInfo &TTI, unsigned BonusInstThreshold, - const DataLayout *DL, AssumptionCache *AC) - : TTI(TTI), BonusInstThreshold(BonusInstThreshold), DL(DL), AC(AC) {} + SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout &DL, + unsigned BonusInstThreshold, AssumptionCache *AC) + : TTI(TTI), DL(DL), BonusInstThreshold(BonusInstThreshold), AC(AC) {} bool run(BasicBlock *BB); }; } @@ -216,45 +220,15 @@ static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, } /// ComputeSpeculationCost - Compute an abstract "cost" of speculating the -/// given instruction, which is assumed to be safe to speculate. 1 means -/// cheap, 2 means less cheap, and UINT_MAX means prohibitively expensive. -static unsigned ComputeSpeculationCost(const User *I, const DataLayout *DL) { - assert(isSafeToSpeculativelyExecute(I, DL) && +/// given instruction, which is assumed to be safe to speculate. TCC_Free means +/// cheap, TCC_Basic means less cheap, and TCC_Expensive means prohibitively +/// expensive. +static unsigned ComputeSpeculationCost(const User *I, + const TargetTransformInfo &TTI) { + assert(isSafeToSpeculativelyExecute(I) && "Instruction is not safe to speculatively execute!"); - switch (Operator::getOpcode(I)) { - default: - // In doubt, be conservative. - return UINT_MAX; - case Instruction::GetElementPtr: - // GEPs are cheap if all indices are constant. - if (!cast<GEPOperator>(I)->hasAllConstantIndices()) - return UINT_MAX; - return 1; - case Instruction::ExtractValue: - case Instruction::Load: - case Instruction::Add: - case Instruction::Sub: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: - case Instruction::Shl: - case Instruction::LShr: - case Instruction::AShr: - case Instruction::ICmp: - case Instruction::Trunc: - case Instruction::ZExt: - case Instruction::SExt: - case Instruction::BitCast: - case Instruction::ExtractElement: - case Instruction::InsertElement: - return 1; // These are all cheap. - - case Instruction::Call: - case Instruction::Select: - return 2; - } + return TTI.getUserCost(I); } - /// DominatesMergePoint - If we have a merge point of an "if condition" as /// accepted above, return true if the specified value dominates the block. We /// don't handle the true generality of domination here, just a special case @@ -275,7 +249,7 @@ static unsigned ComputeSpeculationCost(const User *I, const DataLayout *DL) { static bool DominatesMergePoint(Value *V, BasicBlock *BB, SmallPtrSetImpl<Instruction*> *AggressiveInsts, unsigned &CostRemaining, - const DataLayout *DL) { + const TargetTransformInfo &TTI) { Instruction *I = dyn_cast<Instruction>(V); if (!I) { // Non-instructions all dominate instructions, but not all constantexprs @@ -308,10 +282,10 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, // Okay, it looks like the instruction IS in the "condition". Check to // see if it's a cheap instruction to unconditionally compute, and if it // only uses stuff defined outside of the condition. If so, hoist it out. - if (!isSafeToSpeculativelyExecute(I, DL)) + if (!isSafeToSpeculativelyExecute(I)) return false; - unsigned Cost = ComputeSpeculationCost(I, DL); + unsigned Cost = ComputeSpeculationCost(I, TTI); if (Cost > CostRemaining) return false; @@ -321,7 +295,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, // Okay, we can only really hoist these out if their operands do // not take us over the cost threshold. for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) - if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining, DL)) + if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining, TTI)) return false; // Okay, it's safe to do this! Remember this instruction. AggressiveInsts->insert(I); @@ -330,15 +304,15 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, /// GetConstantInt - Extract ConstantInt from value, looking through IntToPtr /// and PointerNullValue. Return NULL if value is not a constant int. -static ConstantInt *GetConstantInt(Value *V, const DataLayout *DL) { +static ConstantInt *GetConstantInt(Value *V, const DataLayout &DL) { // Normal constant int. ConstantInt *CI = dyn_cast<ConstantInt>(V); - if (CI || !DL || !isa<Constant>(V) || !V->getType()->isPointerTy()) + if (CI || !isa<Constant>(V) || !V->getType()->isPointerTy()) return CI; // This is some kind of pointer constant. Turn it into a pointer-sized // ConstantInt if possible. - IntegerType *PtrTy = cast<IntegerType>(DL->getIntPtrType(V->getType())); + IntegerType *PtrTy = cast<IntegerType>(DL.getIntPtrType(V->getType())); // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*). if (isa<ConstantPointerNull>(V)) @@ -371,23 +345,22 @@ namespace { /// while for a chain of '&&' it will build the set elements that make the test /// fail. struct ConstantComparesGatherer { - + const DataLayout &DL; Value *CompValue; /// Value found for the switch comparison Value *Extra; /// Extra clause to be checked before the switch SmallVector<ConstantInt *, 8> Vals; /// Set of integers to match in switch unsigned UsedICmps; /// Number of comparisons matched in the and/or chain /// Construct and compute the result for the comparison instruction Cond - ConstantComparesGatherer(Instruction *Cond, const DataLayout *DL) - : CompValue(nullptr), Extra(nullptr), UsedICmps(0) { - gather(Cond, DL); + ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL) + : DL(DL), CompValue(nullptr), Extra(nullptr), UsedICmps(0) { + gather(Cond); } /// Prevent copy - ConstantComparesGatherer(const ConstantComparesGatherer &) - LLVM_DELETED_FUNCTION; + ConstantComparesGatherer(const ConstantComparesGatherer &) = delete; ConstantComparesGatherer & - operator=(const ConstantComparesGatherer &) LLVM_DELETED_FUNCTION; + operator=(const ConstantComparesGatherer &) = delete; private: @@ -406,7 +379,7 @@ private: /// against is placed in CompValue. /// If CompValue is already set, the function is expected to fail if a match /// is found but the value compared to is different. - bool matchInstruction(Instruction *I, const DataLayout *DL, bool isEQ) { + bool matchInstruction(Instruction *I, bool isEQ) { // If this is an icmp against a constant, handle this as one of the cases. ICmpInst *ICI; ConstantInt *C; @@ -448,8 +421,8 @@ private: } // If we have "x ult 3", for example, then we can add 0,1,2 to the set. - ConstantRange Span = ConstantRange::makeICmpRegion(ICI->getPredicate(), - C->getValue()); + ConstantRange Span = ConstantRange::makeAllowedICmpRegion( + ICI->getPredicate(), C->getValue()); // Shift the range if the compare is fed by an add. This is the range // compare idiom as emitted by instcombine. @@ -488,7 +461,7 @@ private: /// the value being compared, and stick the list constants into the Vals /// vector. /// One "Extra" case is allowed to differ from the other. - void gather(Value *V, const DataLayout *DL) { + void gather(Value *V) { Instruction *I = dyn_cast<Instruction>(V); bool isEQ = (I->getOpcode() == Instruction::Or); @@ -510,7 +483,7 @@ private: } // Try to match the current instruction - if (matchInstruction(I, DL, isEQ)) + if (matchInstruction(I, isEQ)) // Match succeed, continue the loop continue; } @@ -558,15 +531,16 @@ Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) { CV = SI->getCondition(); } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) if (BI->isConditional() && BI->getCondition()->hasOneUse()) - if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) + if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) { if (ICI->isEquality() && GetConstantInt(ICI->getOperand(1), DL)) CV = ICI->getOperand(0); + } // Unwrap any lossless ptrtoint cast. - if (DL && CV) { + if (CV) { if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) { Value *Ptr = PTII->getPointerOperand(); - if (PTII->getType() == DL->getIntPtrType(Ptr->getType())) + if (PTII->getType() == DL.getIntPtrType(Ptr->getType())) CV = Ptr; } } @@ -1007,8 +981,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, Builder.SetInsertPoint(PTI); // Convert pointer to int before we switch. if (CV->getType()->isPointerTy()) { - assert(DL && "Cannot switch on pointer without DataLayout"); - CV = Builder.CreatePtrToInt(CV, DL->getIntPtrType(CV->getType()), + CV = Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()), "magicptr"); } @@ -1079,7 +1052,8 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I); /// HoistThenElseCodeToIf - Given a conditional branch that goes to BB1 and /// BB2, hoist any common code in the two blocks up into the branch block. The /// caller of this function guarantees that BI's block dominates BB1 and BB2. -static bool HoistThenElseCodeToIf(BranchInst *BI, const DataLayout *DL) { +static bool HoistThenElseCodeToIf(BranchInst *BI, + const TargetTransformInfo &TTI) { // This does very trivial matching, with limited scanning, to find identical // instructions in the two blocks. In particular, we don't want to get into // O(M*N) situations here where M and N are the sizes of BB1 and BB2. As @@ -1114,6 +1088,9 @@ static bool HoistThenElseCodeToIf(BranchInst *BI, const DataLayout *DL) { if (isa<TerminatorInst>(I1)) goto HoistTerminator; + if (!TTI.isProfitableToHoist(I1) || !TTI.isProfitableToHoist(I2)) + return Changed; + // For a normal instruction, we just move one to right before the branch, // then replace all uses of the other with the first. Finally, we remove // the now redundant second instruction. @@ -1167,9 +1144,9 @@ HoistTerminator: passingValueIsAlwaysUndefined(BB2V, PN)) return Changed; - if (isa<ConstantExpr>(BB1V) && !isSafeToSpeculativelyExecute(BB1V, DL)) + if (isa<ConstantExpr>(BB1V) && !isSafeToSpeculativelyExecute(BB1V)) return Changed; - if (isa<ConstantExpr>(BB2V) && !isSafeToSpeculativelyExecute(BB2V, DL)) + if (isa<ConstantExpr>(BB2V) && !isSafeToSpeculativelyExecute(BB2V)) return Changed; } } @@ -1489,7 +1466,7 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, /// /// \returns true if the conditional block is removed. static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, - const DataLayout *DL) { + const TargetTransformInfo &TTI) { // Be conservative for now. FP select instruction can often be expensive. Value *BrCond = BI->getCondition(); if (isa<FCmpInst>(BrCond)) @@ -1525,20 +1502,20 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, if (isa<DbgInfoIntrinsic>(I)) continue; - // Only speculatively execution a single instruction (not counting the + // Only speculatively execute a single instruction (not counting the // terminator) for now. ++SpeculationCost; if (SpeculationCost > 1) return false; // Don't hoist the instruction if it's unsafe or expensive. - if (!isSafeToSpeculativelyExecute(I, DL) && - !(HoistCondStores && - (SpeculatedStoreValue = isSafeToSpeculateStore(I, BB, ThenBB, - EndBB)))) + if (!isSafeToSpeculativelyExecute(I) && + !(HoistCondStores && (SpeculatedStoreValue = isSafeToSpeculateStore( + I, BB, ThenBB, EndBB)))) return false; if (!SpeculatedStoreValue && - ComputeSpeculationCost(I, DL) > PHINodeFoldingThreshold) + ComputeSpeculationCost(I, TTI) > + PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic) return false; // Store the store speculation candidate. @@ -1594,12 +1571,14 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, if (!OrigCE && !ThenCE) continue; // Known safe and cheap. - if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE, DL)) || - (OrigCE && !isSafeToSpeculativelyExecute(OrigCE, DL))) + if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE)) || + (OrigCE && !isSafeToSpeculativelyExecute(OrigCE))) return false; - unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE, DL) : 0; - unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE, DL) : 0; - if (OrigCost + ThenCost > 2 * PHINodeFoldingThreshold) + unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE, TTI) : 0; + unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE, TTI) : 0; + unsigned MaxCost = 2 * PHINodeFoldingThreshold * + TargetTransformInfo::TCC_Basic; + if (OrigCost + ThenCost > MaxCost) return false; // Account for the cost of an unfolded ConstantExpr which could end up @@ -1706,7 +1685,7 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) { /// that is defined in the same block as the branch and if any PHI entries are /// constants, thread edges corresponding to that entry to be branches to their /// ultimate destination. -static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout *DL) { +static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL) { BasicBlock *BB = BI->getParent(); PHINode *PN = dyn_cast<PHINode>(BI->getCondition()); // NOTE: we currently cannot transform this case if the PHI node is used @@ -1804,7 +1783,8 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout *DL) { /// FoldTwoEntryPHINode - Given a BB that starts with the specified two-entry /// PHI node, see if we can eliminate it. -static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *DL) { +static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, + const DataLayout &DL) { // Ok, this is a two entry PHI node. Check to see if this is a simple "if // statement", which has a very simple dominance structure. Basically, we // are trying to find the condition that is being branched on, which @@ -1835,6 +1815,8 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *DL) { SmallPtrSet<Instruction*, 4> AggressiveInsts; unsigned MaxCostVal0 = PHINodeFoldingThreshold, MaxCostVal1 = PHINodeFoldingThreshold; + MaxCostVal0 *= TargetTransformInfo::TCC_Basic; + MaxCostVal1 *= TargetTransformInfo::TCC_Basic; for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) { PHINode *PN = cast<PHINode>(II++); @@ -1845,9 +1827,9 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *DL) { } if (!DominatesMergePoint(PN->getIncomingValue(0), BB, &AggressiveInsts, - MaxCostVal0, DL) || + MaxCostVal0, TTI) || !DominatesMergePoint(PN->getIncomingValue(1), BB, &AggressiveInsts, - MaxCostVal1, DL)) + MaxCostVal1, TTI)) return false; } @@ -2067,8 +2049,7 @@ static bool checkCSEInPredecessor(Instruction *Inst, BasicBlock *PB) { /// FoldBranchToCommonDest - If this basic block is simple enough, and if a /// predecessor branches to us and one of our successors, fold the block into /// the predecessor and use logical operations to pick the right destination. -bool llvm::FoldBranchToCommonDest(BranchInst *BI, const DataLayout *DL, - unsigned BonusInstThreshold) { +bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) { BasicBlock *BB = BI->getParent(); Instruction *Cond = nullptr; @@ -2124,7 +2105,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, const DataLayout *DL, // Ignore dbg intrinsics. if (isa<DbgInfoIntrinsic>(I)) continue; - if (!I->hasOneUse() || !isSafeToSpeculativelyExecute(I, DL)) + if (!I->hasOneUse() || !isSafeToSpeculativelyExecute(I)) return false; // I has only one use and can be executed unconditionally. Instruction *User = dyn_cast<Instruction>(I->user_back()); @@ -2536,17 +2517,15 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { // The weight to CommonDest should be PredCommon * SuccTotal + // PredOther * SuccCommon. // The weight to OtherDest should be PredOther * SuccOther. - SmallVector<uint64_t, 2> NewWeights; - NewWeights.push_back(PredCommon * (SuccCommon + SuccOther) + - PredOther * SuccCommon); - NewWeights.push_back(PredOther * SuccOther); + uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther) + + PredOther * SuccCommon, + PredOther * SuccOther}; // Halve the weights if any of them cannot fit in an uint32_t FitWeights(NewWeights); - SmallVector<uint32_t, 2> MDWeights(NewWeights.begin(),NewWeights.end()); PBI->setMetadata(LLVMContext::MD_prof, - MDBuilder(BI->getContext()). - createBranchWeights(MDWeights)); + MDBuilder(BI->getContext()) + .createBranchWeights(NewWeights[0], NewWeights[1])); } // OtherDest may have phi nodes. If so, add an entry from PBI's @@ -2719,8 +2698,9 @@ static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) { /// We prefer to split the edge to 'end' so that there is a true/false entry to /// the PHI, merging the third icmp into the switch. static bool TryToSimplifyUncondBranchWithICmpInIt( - ICmpInst *ICI, IRBuilder<> &Builder, const TargetTransformInfo &TTI, - unsigned BonusInstThreshold, const DataLayout *DL, AssumptionCache *AC) { + ICmpInst *ICI, IRBuilder<> &Builder, const DataLayout &DL, + const TargetTransformInfo &TTI, unsigned BonusInstThreshold, + AssumptionCache *AC) { BasicBlock *BB = ICI->getParent(); // If the block has any PHIs in it or the icmp has multiple uses, it is too @@ -2753,7 +2733,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt( ICI->eraseFromParent(); } // BB is now empty, so it is likely to simplify away. - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; } // Ok, the block is reachable from the default dest. If the constant we're @@ -2769,7 +2749,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt( ICI->replaceAllUsesWith(V); ICI->eraseFromParent(); // BB is now empty, so it is likely to simplify away. - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; } // The use of the icmp has to be in the 'end' block, by the only PHI node in @@ -2825,8 +2805,8 @@ static bool TryToSimplifyUncondBranchWithICmpInIt( /// SimplifyBranchOnICmpChain - The specified branch is a conditional branch. /// Check to see if it is branching on an or/and chain of icmp instructions, and /// fold it into a switch instruction if so. -static bool SimplifyBranchOnICmpChain(BranchInst *BI, const DataLayout *DL, - IRBuilder<> &Builder) { +static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder, + const DataLayout &DL) { Instruction *Cond = dyn_cast<Instruction>(BI->getCondition()); if (!Cond) return false; @@ -2901,10 +2881,8 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const DataLayout *DL, Builder.SetInsertPoint(BI); // Convert pointer to int before we switch. if (CompVal->getType()->isPointerTy()) { - assert(DL && "Cannot switch on pointer without DataLayout"); - CompVal = Builder.CreatePtrToInt(CompVal, - DL->getIntPtrType(CompVal->getType()), - "magicptr"); + CompVal = Builder.CreatePtrToInt( + CompVal, DL.getIntPtrType(CompVal->getType()), "magicptr"); } // Create the new switch instruction now. @@ -2949,20 +2927,9 @@ bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) { return false; // Turn all invokes that unwind here into calls and delete the basic block. - bool InvokeRequiresTableEntry = false; - bool Changed = false; for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) { InvokeInst *II = cast<InvokeInst>((*PI++)->getTerminator()); - - if (II->hasFnAttr(Attribute::UWTable)) { - // Don't remove an `invoke' instruction if the ABI requires an entry into - // the table. - InvokeRequiresTableEntry = true; - continue; - } - SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3); - // Insert a call instruction before the invoke. CallInst *Call = CallInst::Create(II->getCalledValue(), Args, "", II); Call->takeName(II); @@ -2982,14 +2949,11 @@ bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) { // Finally, delete the invoke instruction! II->eraseFromParent(); - Changed = true; } - if (!InvokeRequiresTableEntry) - // The landingpad is now unreachable. Zap it. - BB->eraseFromParent(); - - return Changed; + // The landingpad is now unreachable. Zap it. + BB->eraseFromParent(); + return true; } bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) { @@ -3121,55 +3085,6 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { --i; --e; Changed = true; } - // If the default value is unreachable, figure out the most popular - // destination and make it the default. - if (SI->getDefaultDest() == BB) { - std::map<BasicBlock*, std::pair<unsigned, unsigned> > Popularity; - for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); - i != e; ++i) { - std::pair<unsigned, unsigned> &entry = - Popularity[i.getCaseSuccessor()]; - if (entry.first == 0) { - entry.first = 1; - entry.second = i.getCaseIndex(); - } else { - entry.first++; - } - } - - // Find the most popular block. - unsigned MaxPop = 0; - unsigned MaxIndex = 0; - BasicBlock *MaxBlock = nullptr; - for (std::map<BasicBlock*, std::pair<unsigned, unsigned> >::iterator - I = Popularity.begin(), E = Popularity.end(); I != E; ++I) { - if (I->second.first > MaxPop || - (I->second.first == MaxPop && MaxIndex > I->second.second)) { - MaxPop = I->second.first; - MaxIndex = I->second.second; - MaxBlock = I->first; - } - } - if (MaxBlock) { - // Make this the new default, allowing us to delete any explicit - // edges to it. - SI->setDefaultDest(MaxBlock); - Changed = true; - - // If MaxBlock has phinodes in it, remove MaxPop-1 entries from - // it. - if (isa<PHINode>(MaxBlock->begin())) - for (unsigned i = 0; i != MaxPop-1; ++i) - MaxBlock->removePredecessor(SI->getParent()); - - for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); - i != e; ++i) - if (i.getCaseSuccessor() == MaxBlock) { - SI->removeCase(i); - --i; --e; - } - } - } } else if (InvokeInst *II = dyn_cast<InvokeInst>(TI)) { if (II->getUnwindDest() == BB) { // Convert the invoke to a call instruction. This would be a good @@ -3203,70 +3118,122 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { return Changed; } -/// TurnSwitchRangeIntoICmp - Turns a switch with that contains only a -/// integer range comparison into a sub, an icmp and a branch. -static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) { - assert(SI->getNumCases() > 1 && "Degenerate switch?"); +static bool CasesAreContiguous(SmallVectorImpl<ConstantInt *> &Cases) { + assert(Cases.size() >= 1); - // Make sure all cases point to the same destination and gather the values. - SmallVector<ConstantInt *, 16> Cases; - SwitchInst::CaseIt I = SI->case_begin(); - Cases.push_back(I.getCaseValue()); - SwitchInst::CaseIt PrevI = I++; - for (SwitchInst::CaseIt E = SI->case_end(); I != E; PrevI = I++) { - if (PrevI.getCaseSuccessor() != I.getCaseSuccessor()) + array_pod_sort(Cases.begin(), Cases.end(), ConstantIntSortPredicate); + for (size_t I = 1, E = Cases.size(); I != E; ++I) { + if (Cases[I - 1]->getValue() != Cases[I]->getValue() + 1) return false; - Cases.push_back(I.getCaseValue()); } - assert(Cases.size() == SI->getNumCases() && "Not all cases gathered"); + return true; +} - // Sort the case values, then check if they form a range we can transform. - array_pod_sort(Cases.begin(), Cases.end(), ConstantIntSortPredicate); - for (unsigned I = 1, E = Cases.size(); I != E; ++I) { - if (Cases[I-1]->getValue() != Cases[I]->getValue()+1) - return false; +/// Turn a switch with two reachable destinations into an integer range +/// comparison and branch. +static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) { + assert(SI->getNumCases() > 1 && "Degenerate switch?"); + + bool HasDefault = + !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg()); + + // Partition the cases into two sets with different destinations. + BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr; + BasicBlock *DestB = nullptr; + SmallVector <ConstantInt *, 16> CasesA; + SmallVector <ConstantInt *, 16> CasesB; + + for (SwitchInst::CaseIt I : SI->cases()) { + BasicBlock *Dest = I.getCaseSuccessor(); + if (!DestA) DestA = Dest; + if (Dest == DestA) { + CasesA.push_back(I.getCaseValue()); + continue; + } + if (!DestB) DestB = Dest; + if (Dest == DestB) { + CasesB.push_back(I.getCaseValue()); + continue; + } + return false; // More than two destinations. } - Constant *Offset = ConstantExpr::getNeg(Cases.back()); - Constant *NumCases = ConstantInt::get(Offset->getType(), SI->getNumCases()); + assert(DestA && DestB && "Single-destination switch should have been folded."); + assert(DestA != DestB); + assert(DestB != SI->getDefaultDest()); + assert(!CasesB.empty() && "There must be non-default cases."); + assert(!CasesA.empty() || HasDefault); + + // Figure out if one of the sets of cases form a contiguous range. + SmallVectorImpl<ConstantInt *> *ContiguousCases = nullptr; + BasicBlock *ContiguousDest = nullptr; + BasicBlock *OtherDest = nullptr; + if (!CasesA.empty() && CasesAreContiguous(CasesA)) { + ContiguousCases = &CasesA; + ContiguousDest = DestA; + OtherDest = DestB; + } else if (CasesAreContiguous(CasesB)) { + ContiguousCases = &CasesB; + ContiguousDest = DestB; + OtherDest = DestA; + } else + return false; + + // Start building the compare and branch. + + Constant *Offset = ConstantExpr::getNeg(ContiguousCases->back()); + Constant *NumCases = ConstantInt::get(Offset->getType(), ContiguousCases->size()); Value *Sub = SI->getCondition(); if (!Offset->isNullValue()) - Sub = Builder.CreateAdd(Sub, Offset, Sub->getName()+".off"); + Sub = Builder.CreateAdd(Sub, Offset, Sub->getName() + ".off"); + Value *Cmp; // If NumCases overflowed, then all possible values jump to the successor. - if (NumCases->isNullValue() && SI->getNumCases() != 0) + if (NumCases->isNullValue() && !ContiguousCases->empty()) Cmp = ConstantInt::getTrue(SI->getContext()); else Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch"); - BranchInst *NewBI = Builder.CreateCondBr( - Cmp, SI->case_begin().getCaseSuccessor(), SI->getDefaultDest()); + BranchInst *NewBI = Builder.CreateCondBr(Cmp, ContiguousDest, OtherDest); // Update weight for the newly-created conditional branch. - SmallVector<uint64_t, 8> Weights; - bool HasWeights = HasBranchWeights(SI); - if (HasWeights) { + if (HasBranchWeights(SI)) { + SmallVector<uint64_t, 8> Weights; GetBranchWeights(SI, Weights); if (Weights.size() == 1 + SI->getNumCases()) { - // Combine all weights for the cases to be the true weight of NewBI. - // We assume that the sum of all weights for a Terminator can fit into 32 - // bits. - uint32_t NewTrueWeight = 0; - for (unsigned I = 1, E = Weights.size(); I != E; ++I) - NewTrueWeight += (uint32_t)Weights[I]; + uint64_t TrueWeight = 0; + uint64_t FalseWeight = 0; + for (size_t I = 0, E = Weights.size(); I != E; ++I) { + if (SI->getSuccessor(I) == ContiguousDest) + TrueWeight += Weights[I]; + else + FalseWeight += Weights[I]; + } + while (TrueWeight > UINT32_MAX || FalseWeight > UINT32_MAX) { + TrueWeight /= 2; + FalseWeight /= 2; + } NewBI->setMetadata(LLVMContext::MD_prof, - MDBuilder(SI->getContext()). - createBranchWeights(NewTrueWeight, - (uint32_t)Weights[0])); + MDBuilder(SI->getContext()).createBranchWeights( + (uint32_t)TrueWeight, (uint32_t)FalseWeight)); } } - // Prune obsolete incoming values off the successor's PHI nodes. - for (BasicBlock::iterator BBI = SI->case_begin().getCaseSuccessor()->begin(); - isa<PHINode>(BBI); ++BBI) { - for (unsigned I = 0, E = SI->getNumCases()-1; I != E; ++I) + // Prune obsolete incoming values off the successors' PHI nodes. + for (auto BBI = ContiguousDest->begin(); isa<PHINode>(BBI); ++BBI) { + unsigned PreviousEdges = ContiguousCases->size(); + if (ContiguousDest == SI->getDefaultDest()) ++PreviousEdges; + for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I) + cast<PHINode>(BBI)->removeIncomingValue(SI->getParent()); + } + for (auto BBI = OtherDest->begin(); isa<PHINode>(BBI); ++BBI) { + unsigned PreviousEdges = SI->getNumCases() - ContiguousCases->size(); + if (OtherDest == SI->getDefaultDest()) ++PreviousEdges; + for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I) cast<PHINode>(BBI)->removeIncomingValue(SI->getParent()); } + + // Drop the switch. SI->eraseFromParent(); return true; @@ -3274,8 +3241,8 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) { /// EliminateDeadSwitchCases - Compute masked bits for the condition of a switch /// and use it to remove dead cases. -static bool EliminateDeadSwitchCases(SwitchInst *SI, const DataLayout *DL, - AssumptionCache *AC) { +static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC, + const DataLayout &DL) { Value *Cond = SI->getCondition(); unsigned Bits = Cond->getType()->getIntegerBitWidth(); APInt KnownZero(Bits, 0), KnownOne(Bits, 0); @@ -3426,9 +3393,8 @@ static Constant *LookupConstant(Value *V, /// constant or can be replaced by constants from the ConstantPool. Returns the /// resulting constant on success, 0 otherwise. static Constant * -ConstantFold(Instruction *I, - const SmallDenseMap<Value *, Constant *> &ConstantPool, - const DataLayout *DL) { +ConstantFold(Instruction *I, const DataLayout &DL, + const SmallDenseMap<Value *, Constant *> &ConstantPool) { if (SelectInst *Select = dyn_cast<SelectInst>(I)) { Constant *A = LookupConstant(Select->getCondition(), ConstantPool); if (!A) @@ -3448,9 +3414,10 @@ ConstantFold(Instruction *I, return nullptr; } - if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) + if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) { return ConstantFoldCompareInstOperands(Cmp->getPredicate(), COps[0], COps[1], DL); + } return ConstantFoldInstOperands(I->getOpcode(), I->getType(), COps, DL); } @@ -3460,12 +3427,10 @@ ConstantFold(Instruction *I, /// destionations CaseDest corresponding to value CaseVal (0 for the default /// case), of a switch instruction SI. static bool -GetCaseResults(SwitchInst *SI, - ConstantInt *CaseVal, - BasicBlock *CaseDest, +GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, BasicBlock **CommonDest, - SmallVectorImpl<std::pair<PHINode *, Constant *> > &Res, - const DataLayout *DL) { + SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res, + const DataLayout &DL) { // The block from which we enter the common destination. BasicBlock *Pred = SI->getParent(); @@ -3484,7 +3449,7 @@ GetCaseResults(SwitchInst *SI, } else if (isa<DbgInfoIntrinsic>(I)) { // Skip debug intrinsic. continue; - } else if (Constant *C = ConstantFold(I, ConstantPool, DL)) { + } else if (Constant *C = ConstantFold(I, DL, ConstantPool)) { // Instruction is side-effect free and constant. // If the instruction has uses outside this block or a phi node slot for @@ -3555,11 +3520,11 @@ static void MapCaseToResult(ConstantInt *CaseVal, // results for the PHI node of the common destination block for a switch // instruction. Returns false if multiple PHI nodes have been found or if // there is not a common destination block for the switch. -static bool InitializeUniqueCases( - SwitchInst *SI, const DataLayout *DL, PHINode *&PHI, - BasicBlock *&CommonDest, - SwitchCaseResultVectorTy &UniqueResults, - Constant *&DefaultResult) { +static bool InitializeUniqueCases(SwitchInst *SI, PHINode *&PHI, + BasicBlock *&CommonDest, + SwitchCaseResultVectorTy &UniqueResults, + Constant *&DefaultResult, + const DataLayout &DL) { for (auto &I : SI->cases()) { ConstantInt *CaseVal = I.getCaseValue(); @@ -3666,15 +3631,15 @@ static void RemoveSwitchAfterSelectConversion(SwitchInst *SI, PHINode *PHI, /// phi nodes in a common successor block with only two different /// constant values, replace the switch with select. static bool SwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder, - const DataLayout *DL, AssumptionCache *AC) { + AssumptionCache *AC, const DataLayout &DL) { Value *const Cond = SI->getCondition(); PHINode *PHI = nullptr; BasicBlock *CommonDest = nullptr; Constant *DefaultResult; SwitchCaseResultVectorTy UniqueResults; // Collect all the cases that will deliver the same value from the switch. - if (!InitializeUniqueCases(SI, DL, PHI, CommonDest, UniqueResults, - DefaultResult)) + if (!InitializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult, + DL)) return false; // Selects choose between maximum two values. if (UniqueResults.size() != 2) @@ -3701,12 +3666,10 @@ namespace { /// SwitchLookupTable - Create a lookup table to use as a switch replacement /// with the contents of Values, using DefaultValue to fill any holes in the /// table. - SwitchLookupTable(Module &M, - uint64_t TableSize, - ConstantInt *Offset, - const SmallVectorImpl<std::pair<ConstantInt*, Constant*> >& Values, - Constant *DefaultValue, - const DataLayout *DL); + SwitchLookupTable( + Module &M, uint64_t TableSize, ConstantInt *Offset, + const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values, + Constant *DefaultValue, const DataLayout &DL); /// BuildLookup - Build instructions with Builder to retrieve the value at /// the position given by Index in the lookup table. @@ -3714,8 +3677,7 @@ namespace { /// WouldFitInRegister - Return true if a table with TableSize elements of /// type ElementType would fit in a target-legal register. - static bool WouldFitInRegister(const DataLayout *DL, - uint64_t TableSize, + static bool WouldFitInRegister(const DataLayout &DL, uint64_t TableSize, const Type *ElementType); private: @@ -3757,12 +3719,10 @@ namespace { }; } -SwitchLookupTable::SwitchLookupTable(Module &M, - uint64_t TableSize, - ConstantInt *Offset, - const SmallVectorImpl<std::pair<ConstantInt*, Constant*> >& Values, - Constant *DefaultValue, - const DataLayout *DL) +SwitchLookupTable::SwitchLookupTable( + Module &M, uint64_t TableSize, ConstantInt *Offset, + const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values, + Constant *DefaultValue, const DataLayout &DL) : SingleValue(nullptr), BitMap(nullptr), BitMapElementTy(nullptr), LinearOffset(nullptr), LinearMultiplier(nullptr), Array(nullptr) { assert(Values.size() && "Can't build lookup table without values!"); @@ -3924,19 +3884,17 @@ Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) { "switch.tableidx.zext"); Value *GEPIndices[] = { Builder.getInt32(0), Index }; - Value *GEP = Builder.CreateInBoundsGEP(Array, GEPIndices, - "switch.gep"); + Value *GEP = Builder.CreateInBoundsGEP(Array->getValueType(), Array, + GEPIndices, "switch.gep"); return Builder.CreateLoad(GEP, "switch.load"); } } llvm_unreachable("Unknown lookup table kind!"); } -bool SwitchLookupTable::WouldFitInRegister(const DataLayout *DL, +bool SwitchLookupTable::WouldFitInRegister(const DataLayout &DL, uint64_t TableSize, const Type *ElementType) { - if (!DL) - return false; const IntegerType *IT = dyn_cast<IntegerType>(ElementType); if (!IT) return false; @@ -3946,17 +3904,16 @@ bool SwitchLookupTable::WouldFitInRegister(const DataLayout *DL, // Avoid overflow, fitsInLegalInteger uses unsigned int for the width. if (TableSize >= UINT_MAX/IT->getBitWidth()) return false; - return DL->fitsInLegalInteger(TableSize * IT->getBitWidth()); + return DL.fitsInLegalInteger(TableSize * IT->getBitWidth()); } /// ShouldBuildLookupTable - Determine whether a lookup table should be built /// for this switch, based on the number of cases, size of the table and the /// types of the results. -static bool ShouldBuildLookupTable(SwitchInst *SI, - uint64_t TableSize, - const TargetTransformInfo &TTI, - const DataLayout *DL, - const SmallDenseMap<PHINode*, Type*>& ResultTypes) { +static bool +ShouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize, + const TargetTransformInfo &TTI, const DataLayout &DL, + const SmallDenseMap<PHINode *, Type *> &ResultTypes) { if (SI->getNumCases() > TableSize || TableSize >= UINT64_MAX / 10) return false; // TableSize overflowed, or mul below might overflow. @@ -4079,10 +4036,9 @@ static void reuseTableCompare(User *PhiUser, BasicBlock *PhiBlock, /// SwitchToLookupTable - If the switch is only used to initialize one or more /// phi nodes in a common successor block with different constant values, /// replace the switch with lookup tables. -static bool SwitchToLookupTable(SwitchInst *SI, - IRBuilder<> &Builder, - const TargetTransformInfo &TTI, - const DataLayout* DL) { +static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, + const DataLayout &DL, + const TargetTransformInfo &TTI) { assert(SI->getNumCases() > 1 && "Degenerate switch?"); // Only build lookup table when we have a target that supports it. @@ -4153,14 +4109,14 @@ static bool SwitchToLookupTable(SwitchInst *SI, // or a bitmask that fits in a register. SmallVector<std::pair<PHINode*, Constant*>, 4> DefaultResultsList; bool HasDefaultResults = GetCaseResults(SI, nullptr, SI->getDefaultDest(), - &CommonDest, DefaultResultsList, DL); + &CommonDest, DefaultResultsList, DL); bool NeedMask = (TableHasHoles && !HasDefaultResults); if (NeedMask) { // As an extra penalty for the validity test we require more cases. if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark). return false; - if (!(DL && DL->fitsInLegalInteger(TableSize))) + if (!DL.fitsInLegalInteger(TableSize)) return false; } @@ -4193,19 +4149,18 @@ static bool SwitchToLookupTable(SwitchInst *SI, "It is impossible for a switch to have more entries than the max " "representable value of its input integer type's size."); - // If we have a fully covered lookup table, unconditionally branch to the - // lookup table BB. Otherwise, check if the condition value is within the case - // range. If it is so, branch to the new BB. Otherwise branch to SI's default - // destination. + // If the default destination is unreachable, or if the lookup table covers + // all values of the conditional variable, branch directly to the lookup table + // BB. Otherwise, check that the condition is within the case range. + const bool DefaultIsReachable = + !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg()); + const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize); BranchInst *RangeCheckBranch = nullptr; - const bool GeneratingCoveredLookupTable = MaxTableSize == TableSize; - if (GeneratingCoveredLookupTable) { + if (!DefaultIsReachable || GeneratingCoveredLookupTable) { Builder.CreateBr(LookupBB); - // We cached PHINodes in PHIs, to avoid accessing deleted PHINodes later, - // do not delete PHINodes here. - SI->getDefaultDest()->removePredecessor(SI->getParent(), - true/*DontDeleteUselessPHIs*/); + // Note: We call removeProdecessor later since we need to be able to get the + // PHI value for the default case in case we're using a bit mask. } else { Value *Cmp = Builder.CreateICmpULT(TableIndex, ConstantInt::get( MinCaseVal->getType(), TableSize)); @@ -4257,6 +4212,13 @@ static bool SwitchToLookupTable(SwitchInst *SI, AddPredecessorToBlock(SI->getDefaultDest(), MaskBB, SI->getParent()); } + if (!DefaultIsReachable || GeneratingCoveredLookupTable) { + // We cached PHINodes in PHIs, to avoid accessing deleted PHINodes later, + // do not delete PHINodes here. + SI->getDefaultDest()->removePredecessor(SI->getParent(), + /*DontDeleteUselessPHIs=*/true); + } + bool ReturnedEarly = false; for (size_t I = 0, E = PHIs.size(); I != E; ++I) { PHINode *PHI = PHIs[I]; @@ -4317,12 +4279,12 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { // see if that predecessor totally determines the outcome of this switch. if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; Value *Cond = SI->getCondition(); if (SelectInst *Select = dyn_cast<SelectInst>(Cond)) if (SimplifySwitchOnSelect(SI, Select)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; // If the block only contains the switch, see if we can fold the block // away into any preds. @@ -4332,25 +4294,25 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { ++BBI; if (SI == &*BBI) if (FoldValueComparisonIntoPredecessors(SI, Builder)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; } // Try to transform the switch into an icmp and a branch. if (TurnSwitchRangeIntoICmp(SI, Builder)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; // Remove unreachable cases. - if (EliminateDeadSwitchCases(SI, DL, AC)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + if (EliminateDeadSwitchCases(SI, AC, DL)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; - if (SwitchToSelect(SI, Builder, DL, AC)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + if (SwitchToSelect(SI, Builder, AC, DL)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; if (ForwardSwitchConditionToPHI(SI)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; - if (SwitchToLookupTable(SI, Builder, TTI, DL)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + if (SwitchToLookupTable(SI, Builder, DL, TTI)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; return false; } @@ -4387,11 +4349,87 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) { if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) { if (SimplifyIndirectBrOnSelect(IBI, SI)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; } return Changed; } +/// Given an block with only a single landing pad and a unconditional branch +/// try to find another basic block which this one can be merged with. This +/// handles cases where we have multiple invokes with unique landing pads, but +/// a shared handler. +/// +/// We specifically choose to not worry about merging non-empty blocks +/// here. That is a PRE/scheduling problem and is best solved elsewhere. In +/// practice, the optimizer produces empty landing pad blocks quite frequently +/// when dealing with exception dense code. (see: instcombine, gvn, if-else +/// sinking in this file) +/// +/// This is primarily a code size optimization. We need to avoid performing +/// any transform which might inhibit optimization (such as our ability to +/// specialize a particular handler via tail commoning). We do this by not +/// merging any blocks which require us to introduce a phi. Since the same +/// values are flowing through both blocks, we don't loose any ability to +/// specialize. If anything, we make such specialization more likely. +/// +/// TODO - This transformation could remove entries from a phi in the target +/// block when the inputs in the phi are the same for the two blocks being +/// merged. In some cases, this could result in removal of the PHI entirely. +static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, + BasicBlock *BB) { + auto Succ = BB->getUniqueSuccessor(); + assert(Succ); + // If there's a phi in the successor block, we'd likely have to introduce + // a phi into the merged landing pad block. + if (isa<PHINode>(*Succ->begin())) + return false; + + for (BasicBlock *OtherPred : predecessors(Succ)) { + if (BB == OtherPred) + continue; + BasicBlock::iterator I = OtherPred->begin(); + LandingPadInst *LPad2 = dyn_cast<LandingPadInst>(I); + if (!LPad2 || !LPad2->isIdenticalTo(LPad)) + continue; + for (++I; isa<DbgInfoIntrinsic>(I); ++I) {} + BranchInst *BI2 = dyn_cast<BranchInst>(I); + if (!BI2 || !BI2->isIdenticalTo(BI)) + continue; + + // We've found an identical block. Update our predeccessors to take that + // path instead and make ourselves dead. + SmallSet<BasicBlock *, 16> Preds; + Preds.insert(pred_begin(BB), pred_end(BB)); + for (BasicBlock *Pred : Preds) { + InvokeInst *II = cast<InvokeInst>(Pred->getTerminator()); + assert(II->getNormalDest() != BB && + II->getUnwindDest() == BB && "unexpected successor"); + II->setUnwindDest(OtherPred); + } + + // The debug info in OtherPred doesn't cover the merged control flow that + // used to go through BB. We need to delete it or update it. + for (auto I = OtherPred->begin(), E = OtherPred->end(); + I != E;) { + Instruction &Inst = *I; I++; + if (isa<DbgInfoIntrinsic>(Inst)) + Inst.eraseFromParent(); + } + + SmallSet<BasicBlock *, 16> Succs; + Succs.insert(succ_begin(BB), succ_end(BB)); + for (BasicBlock *Succ : Succs) { + Succ->removePredecessor(BB); + } + + IRBuilder<> Builder(BI); + Builder.CreateUnreachable(); + BI->eraseFromParent(); + return true; + } + return false; +} + bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){ BasicBlock *BB = BI->getParent(); @@ -4411,17 +4449,26 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){ for (++I; isa<DbgInfoIntrinsic>(I); ++I) ; if (I->isTerminator() && - TryToSimplifyUncondBranchWithICmpInIt(ICI, Builder, TTI, - BonusInstThreshold, DL, AC)) + TryToSimplifyUncondBranchWithICmpInIt(ICI, Builder, DL, TTI, + BonusInstThreshold, AC)) return true; } + // See if we can merge an empty landing pad block with another which is + // equivalent. + if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(I)) { + for (++I; isa<DbgInfoIntrinsic>(I); ++I) {} + if (I->isTerminator() && + TryToMergeLandingPad(LPad, BI, BB)) + return true; + } + // If this basic block is ONLY a compare and a branch, and if a predecessor // branches to us and our successor, fold the comparison into the // predecessor and use logical operations to update the incoming value // for PHI nodes in common successor. - if (FoldBranchToCommonDest(BI, DL, BonusInstThreshold)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + if (FoldBranchToCommonDest(BI, BonusInstThreshold)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; return false; } @@ -4436,7 +4483,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { // switch. if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; // This block must be empty, except for the setcond inst, if it exists. // Ignore dbg intrinsics. @@ -4446,26 +4493,26 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { ++I; if (&*I == BI) { if (FoldValueComparisonIntoPredecessors(BI, Builder)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; } else if (&*I == cast<Instruction>(BI->getCondition())){ ++I; // Ignore dbg intrinsics. while (isa<DbgInfoIntrinsic>(I)) ++I; if (&*I == BI && FoldValueComparisonIntoPredecessors(BI, Builder)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; } } // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction. - if (SimplifyBranchOnICmpChain(BI, DL, Builder)) + if (SimplifyBranchOnICmpChain(BI, Builder, DL)) return true; // If this basic block is ONLY a compare and a branch, and if a predecessor // branches to us and one of our successors, fold the comparison into the // predecessor and use logical operations to pick the right destination. - if (FoldBranchToCommonDest(BI, DL, BonusInstThreshold)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + if (FoldBranchToCommonDest(BI, BonusInstThreshold)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; // We have a conditional branch to two blocks that are only reachable // from BI. We know that the condbr dominates the two blocks, so see if @@ -4473,16 +4520,16 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { // can hoist it up to the branching block. if (BI->getSuccessor(0)->getSinglePredecessor()) { if (BI->getSuccessor(1)->getSinglePredecessor()) { - if (HoistThenElseCodeToIf(BI, DL)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + if (HoistThenElseCodeToIf(BI, TTI)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; } else { // If Successor #1 has multiple preds, we may be able to conditionally // execute Successor #0 if it branches to Successor #1. TerminatorInst *Succ0TI = BI->getSuccessor(0)->getTerminator(); if (Succ0TI->getNumSuccessors() == 1 && Succ0TI->getSuccessor(0) == BI->getSuccessor(1)) - if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0), DL)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0), TTI)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; } } else if (BI->getSuccessor(1)->getSinglePredecessor()) { // If Successor #0 has multiple preds, we may be able to conditionally @@ -4490,8 +4537,8 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { TerminatorInst *Succ1TI = BI->getSuccessor(1)->getTerminator(); if (Succ1TI->getNumSuccessors() == 1 && Succ1TI->getSuccessor(0) == BI->getSuccessor(0)) - if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1), DL)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1), TTI)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; } // If this is a branch on a phi node in the current block, thread control @@ -4499,14 +4546,14 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition())) if (PN->getParent() == BI->getParent()) if (FoldCondBranchOnPHI(BI, DL)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; // Scan predecessor blocks for conditional branches. for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator())) if (PBI != BI && PBI->isConditional()) if (SimplifyCondBranchToCondBranch(PBI, BI)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; return false; } @@ -4618,7 +4665,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { // eliminate it, do so now. if (PHINode *PN = dyn_cast<PHINode>(BB->begin())) if (PN->getNumIncomingValues() == 2) - Changed |= FoldTwoEntryPHINode(PN, DL); + Changed |= FoldTwoEntryPHINode(PN, TTI, DL); Builder.SetInsertPoint(BB->getTerminator()); if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) { @@ -4650,7 +4697,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { /// of the CFG. It returns true if a modification was made. /// bool llvm::SimplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, - unsigned BonusInstThreshold, const DataLayout *DL, - AssumptionCache *AC) { - return SimplifyCFGOpt(TTI, BonusInstThreshold, DL, AC).run(BB); + unsigned BonusInstThreshold, AssumptionCache *AC) { + return SimplifyCFGOpt(TTI, BB->getModule()->getDataLayout(), + BonusInstThreshold, AC).run(BB); } diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp index d54c09a..3757a80 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -17,7 +17,6 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/IVUsers.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" @@ -48,22 +47,15 @@ namespace { Loop *L; LoopInfo *LI; ScalarEvolution *SE; - const DataLayout *DL; // May be NULL SmallVectorImpl<WeakVH> &DeadInsts; bool Changed; public: - SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, LPPassManager *LPM, - SmallVectorImpl<WeakVH> &Dead, IVUsers *IVU = nullptr) : - L(Loop), - LI(LPM->getAnalysisIfAvailable<LoopInfo>()), - SE(SE), - DeadInsts(Dead), - Changed(false) { - DataLayoutPass *DLP = LPM->getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; + SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, LoopInfo *LI, + SmallVectorImpl<WeakVH> &Dead) + : L(Loop), LI(LI), SE(SE), DeadInsts(Dead), Changed(false) { assert(LI && "IV simplification requires LoopInfo"); } @@ -277,95 +269,57 @@ bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst, bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO, Value *IVOperand) { - // Currently we only handle instructions of the form "add <indvar> <value>" - unsigned Op = BO->getOpcode(); - if (Op != Instruction::Add) + // Fastpath: we don't have any work to do if `BO` is `nuw` and `nsw`. + if (BO->hasNoUnsignedWrap() && BO->hasNoSignedWrap()) return false; - // If BO is already both nuw and nsw then there is nothing left to do - if (BO->hasNoUnsignedWrap() && BO->hasNoSignedWrap()) + const SCEV *(ScalarEvolution::*GetExprForBO)(const SCEV *, const SCEV *, + SCEV::NoWrapFlags); + + switch (BO->getOpcode()) { + default: return false; - IntegerType *IT = cast<IntegerType>(IVOperand->getType()); - Value *OtherOperand = nullptr; - if (BO->getOperand(0) == IVOperand) { - OtherOperand = BO->getOperand(1); - } else { - assert(BO->getOperand(1) == IVOperand && "only other use!"); - OtherOperand = BO->getOperand(0); + case Instruction::Add: + GetExprForBO = &ScalarEvolution::getAddExpr; + break; + + case Instruction::Sub: + GetExprForBO = &ScalarEvolution::getMinusSCEV; + break; + + case Instruction::Mul: + GetExprForBO = &ScalarEvolution::getMulExpr; + break; } - bool Changed = false; - const SCEV *OtherOpSCEV = SE->getSCEV(OtherOperand); - if (OtherOpSCEV == SE->getCouldNotCompute()) - return false; + unsigned BitWidth = cast<IntegerType>(BO->getType())->getBitWidth(); + Type *WideTy = IntegerType::get(BO->getContext(), BitWidth * 2); + const SCEV *LHS = SE->getSCEV(BO->getOperand(0)); + const SCEV *RHS = SE->getSCEV(BO->getOperand(1)); - const SCEV *IVOpSCEV = SE->getSCEV(IVOperand); - const SCEV *ZeroSCEV = SE->getConstant(IVOpSCEV->getType(), 0); + bool Changed = false; - if (!BO->hasNoSignedWrap()) { - // Upgrade the add to an "add nsw" if we can prove that it will never - // sign-overflow or sign-underflow. - - const SCEV *SignedMax = - SE->getConstant(APInt::getSignedMaxValue(IT->getBitWidth())); - const SCEV *SignedMin = - SE->getConstant(APInt::getSignedMinValue(IT->getBitWidth())); - - // The addition "IVOperand + OtherOp" does not sign-overflow if the result - // is sign-representable in 2's complement in the given bit-width. - // - // If OtherOp is SLT 0, then for an IVOperand in [SignedMin - OtherOp, - // SignedMax], "IVOperand + OtherOp" is in [SignedMin, SignedMax + OtherOp]. - // Everything in [SignedMin, SignedMax + OtherOp] is representable since - // SignedMax + OtherOp is at least -1. - // - // If OtherOp is SGE 0, then for an IVOperand in [SignedMin, SignedMax - - // OtherOp], "IVOperand + OtherOp" is in [SignedMin + OtherOp, SignedMax]. - // Everything in [SignedMin + OtherOp, SignedMax] is representable since - // SignedMin + OtherOp is at most -1. - // - // It follows that for all values of IVOperand in [SignedMin - smin(0, - // OtherOp), SignedMax - smax(0, OtherOp)] the result of the add is - // representable (i.e. there is no sign-overflow). - - const SCEV *UpperDelta = SE->getSMaxExpr(ZeroSCEV, OtherOpSCEV); - const SCEV *UpperLimit = SE->getMinusSCEV(SignedMax, UpperDelta); - - bool NeverSignedOverflows = - SE->isKnownPredicate(ICmpInst::ICMP_SLE, IVOpSCEV, UpperLimit); - - if (NeverSignedOverflows) { - const SCEV *LowerDelta = SE->getSMinExpr(ZeroSCEV, OtherOpSCEV); - const SCEV *LowerLimit = SE->getMinusSCEV(SignedMin, LowerDelta); - - bool NeverSignedUnderflows = - SE->isKnownPredicate(ICmpInst::ICMP_SGE, IVOpSCEV, LowerLimit); - if (NeverSignedUnderflows) { - BO->setHasNoSignedWrap(true); - Changed = true; - } + if (!BO->hasNoUnsignedWrap()) { + const SCEV *ExtendAfterOp = SE->getZeroExtendExpr(SE->getSCEV(BO), WideTy); + const SCEV *OpAfterExtend = (SE->*GetExprForBO)( + SE->getZeroExtendExpr(LHS, WideTy), SE->getZeroExtendExpr(RHS, WideTy), + SCEV::FlagAnyWrap); + if (ExtendAfterOp == OpAfterExtend) { + BO->setHasNoUnsignedWrap(); + SE->forgetValue(BO); + Changed = true; } } - if (!BO->hasNoUnsignedWrap()) { - // Upgrade the add computing "IVOperand + OtherOp" to an "add nuw" if we can - // prove that it will never unsigned-overflow (i.e. the result will always - // be representable in the given bit-width). - // - // "IVOperand + OtherOp" is unsigned-representable in 2's complement iff it - // does not produce a carry. "IVOperand + OtherOp" produces no carry iff - // IVOperand ULE (UnsignedMax - OtherOp). - - const SCEV *UnsignedMax = - SE->getConstant(APInt::getMaxValue(IT->getBitWidth())); - const SCEV *UpperLimit = SE->getMinusSCEV(UnsignedMax, OtherOpSCEV); - - bool NeverUnsignedOverflows = - SE->isKnownPredicate(ICmpInst::ICMP_ULE, IVOpSCEV, UpperLimit); - - if (NeverUnsignedOverflows) { - BO->setHasNoUnsignedWrap(true); + if (!BO->hasNoSignedWrap()) { + const SCEV *ExtendAfterOp = SE->getSignExtendExpr(SE->getSCEV(BO), WideTy); + const SCEV *OpAfterExtend = (SE->*GetExprForBO)( + SE->getSignExtendExpr(LHS, WideTy), SE->getSignExtendExpr(RHS, WideTy), + SCEV::FlagAnyWrap); + if (ExtendAfterOp == OpAfterExtend) { + BO->setHasNoSignedWrap(); + SE->forgetValue(BO); Changed = true; } } @@ -562,8 +516,8 @@ void IVVisitor::anchor() { } bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, LPPassManager *LPM, SmallVectorImpl<WeakVH> &Dead, IVVisitor *V) { - LoopInfo *LI = &LPM->getAnalysis<LoopInfo>(); - SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, LPM, Dead); + LoopInfo *LI = &LPM->getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, LI, Dead); SIV.simplifyUsers(CurrIV, V); return SIV.hasChanged(); } diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp index cc97098..c499c87 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp @@ -25,7 +25,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/Type.h" #include "llvm/Pass.h" -#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -43,7 +43,7 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); AU.addRequired<AssumptionCacheTracker>(); - AU.addRequired<TargetLibraryInfo>(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); } /// runOnFunction - Remove instructions that simplify. @@ -51,9 +51,9 @@ namespace { const DominatorTreeWrapperPass *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); const DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr; - const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>(); + const DataLayout &DL = F.getParent()->getDataLayout(); + const TargetLibraryInfo *TLI = + &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); AssumptionCache *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2; @@ -106,7 +106,7 @@ char InstSimplifier::ID = 0; INITIALIZE_PASS_BEGIN(InstSimplifier, "instsimplify", "Remove redundant instructions", false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(InstSimplifier, "instsimplify", "Remove redundant instructions", false, false) char &llvm::InstructionSimplifierID = InstSimplifier::ID; diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index 5a0d52e..6bbf828 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -30,7 +30,7 @@ #include "llvm/IR/PatternMatch.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" using namespace llvm; @@ -120,12 +120,12 @@ static bool hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty, /// string/memory copying library function \p Func. /// Acceptable functions are st[rp][n]?cpy, memove, memcpy, and memset. /// Their fortified (_chk) counterparts are also accepted. -static bool checkStringCopyLibFuncSignature(Function *F, LibFunc::Func Func, - const DataLayout *DL) { +static bool checkStringCopyLibFuncSignature(Function *F, LibFunc::Func Func) { + const DataLayout &DL = F->getParent()->getDataLayout(); FunctionType *FT = F->getFunctionType(); LLVMContext &Context = F->getContext(); Type *PCharTy = Type::getInt8PtrTy(Context); - Type *SizeTTy = DL ? DL->getIntPtrType(Context) : nullptr; + Type *SizeTTy = DL.getIntPtrType(Context); unsigned NumParams = FT->getNumParams(); // All string libfuncs return the same type as the first parameter. @@ -208,10 +208,6 @@ Value *LibCallSimplifier::optimizeStrCat(CallInst *CI, IRBuilder<> &B) { if (Len == 0) return Dst; - // These optimizations require DataLayout. - if (!DL) - return nullptr; - return emitStrLenMemCpy(Src, Dst, Len, B); } @@ -226,13 +222,13 @@ Value *LibCallSimplifier::emitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len, // Now that we have the destination's length, we must index into the // destination's pointer to get the actual memcpy destination (end of // the string .. we're concatenating). - Value *CpyDst = B.CreateGEP(Dst, DstLen, "endptr"); + Value *CpyDst = B.CreateGEP(B.getInt8Ty(), Dst, DstLen, "endptr"); // We have enough information to now generate the memcpy call to do the // concatenation for us. Make a memcpy to copy the nul byte with align = 1. - B.CreateMemCpy( - CpyDst, Src, - ConstantInt::get(DL->getIntPtrType(Src->getContext()), Len + 1), 1); + B.CreateMemCpy(CpyDst, Src, + ConstantInt::get(DL.getIntPtrType(Src->getContext()), Len + 1), + 1); return Dst; } @@ -269,10 +265,6 @@ Value *LibCallSimplifier::optimizeStrNCat(CallInst *CI, IRBuilder<> &B) { if (SrcLen == 0 || Len == 0) return Dst; - // These optimizations require DataLayout. - if (!DL) - return nullptr; - // We don't optimize this case if (Len < SrcLen) return nullptr; @@ -297,25 +289,21 @@ Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilder<> &B) { // of the input string and turn this into memchr. ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); if (!CharC) { - // These optimizations require DataLayout. - if (!DL) - return nullptr; - uint64_t Len = GetStringLength(SrcStr); if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32)) // memchr needs i32. return nullptr; - return EmitMemChr( - SrcStr, CI->getArgOperand(1), // include nul. - ConstantInt::get(DL->getIntPtrType(CI->getContext()), Len), B, DL, TLI); + return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul. + ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len), + B, DL, TLI); } // Otherwise, the character is a constant, see if the first argument is // a string literal. If so, we can constant fold. StringRef Str; if (!getConstantStringInfo(SrcStr, Str)) { - if (DL && CharC->isZero()) // strchr(p, 0) -> p + strlen(p) - return B.CreateGEP(SrcStr, EmitStrLen(SrcStr, B, DL, TLI), "strchr"); + if (CharC->isZero()) // strchr(p, 0) -> p + strlen(p) + return B.CreateGEP(B.getInt8Ty(), SrcStr, EmitStrLen(SrcStr, B, DL, TLI), "strchr"); return nullptr; } @@ -328,7 +316,7 @@ Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilder<> &B) { return Constant::getNullValue(CI->getType()); // strchr(s+n,c) -> gep(s+n+i,c) - return B.CreateGEP(SrcStr, B.getInt64(I), "strchr"); + return B.CreateGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "strchr"); } Value *LibCallSimplifier::optimizeStrRChr(CallInst *CI, IRBuilder<> &B) { @@ -350,8 +338,8 @@ Value *LibCallSimplifier::optimizeStrRChr(CallInst *CI, IRBuilder<> &B) { StringRef Str; if (!getConstantStringInfo(SrcStr, Str)) { // strrchr(s, 0) -> strchr(s, 0) - if (DL && CharC->isZero()) - return EmitStrChr(SrcStr, '\0', B, DL, TLI); + if (CharC->isZero()) + return EmitStrChr(SrcStr, '\0', B, TLI); return nullptr; } @@ -363,7 +351,7 @@ Value *LibCallSimplifier::optimizeStrRChr(CallInst *CI, IRBuilder<> &B) { return Constant::getNullValue(CI->getType()); // strrchr(s+n,c) -> gep(s+n+i,c) - return B.CreateGEP(SrcStr, B.getInt64(I), "strrchr"); + return B.CreateGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "strrchr"); } Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilder<> &B) { @@ -398,12 +386,8 @@ Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilder<> &B) { uint64_t Len1 = GetStringLength(Str1P); uint64_t Len2 = GetStringLength(Str2P); if (Len1 && Len2) { - // These optimizations require DataLayout. - if (!DL) - return nullptr; - return EmitMemCmp(Str1P, Str2P, - ConstantInt::get(DL->getIntPtrType(CI->getContext()), + ConstantInt::get(DL.getIntPtrType(CI->getContext()), std::min(Len1, Len2)), B, DL, TLI); } @@ -435,7 +419,7 @@ Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilder<> &B) { if (Length == 0) // strncmp(x,y,0) -> 0 return ConstantInt::get(CI->getType(), 0); - if (DL && Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1) + if (Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1) return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, DL, TLI); StringRef Str1, Str2; @@ -462,17 +446,13 @@ Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilder<> &B) { Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - if (!checkStringCopyLibFuncSignature(Callee, LibFunc::strcpy, DL)) + if (!checkStringCopyLibFuncSignature(Callee, LibFunc::strcpy)) return nullptr; Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); if (Dst == Src) // strcpy(x,x) -> x return Src; - // These optimizations require DataLayout. - if (!DL) - return nullptr; - // See if we can get the length of the input string. uint64_t Len = GetStringLength(Src); if (Len == 0) @@ -481,7 +461,7 @@ Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilder<> &B) { // We have enough information to now generate the memcpy call to do the // copy for us. Make a memcpy to copy the nul byte with align = 1. B.CreateMemCpy(Dst, Src, - ConstantInt::get(DL->getIntPtrType(CI->getContext()), Len), 1); + ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len), 1); return Dst; } @@ -490,17 +470,13 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) { // Verify the "stpcpy" function prototype. FunctionType *FT = Callee->getFunctionType(); - if (!checkStringCopyLibFuncSignature(Callee, LibFunc::stpcpy, DL)) - return nullptr; - - // These optimizations require DataLayout. - if (!DL) + if (!checkStringCopyLibFuncSignature(Callee, LibFunc::stpcpy)) return nullptr; Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x) Value *StrLen = EmitStrLen(Src, B, DL, TLI); - return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : nullptr; + return StrLen ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, StrLen) : nullptr; } // See if we can get the length of the input string. @@ -509,9 +485,9 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) { return nullptr; Type *PT = FT->getParamType(0); - Value *LenV = ConstantInt::get(DL->getIntPtrType(PT), Len); + Value *LenV = ConstantInt::get(DL.getIntPtrType(PT), Len); Value *DstEnd = - B.CreateGEP(Dst, ConstantInt::get(DL->getIntPtrType(PT), Len - 1)); + B.CreateGEP(B.getInt8Ty(), Dst, ConstantInt::get(DL.getIntPtrType(PT), Len - 1)); // We have enough information to now generate the memcpy call to do the // copy for us. Make a memcpy to copy the nul byte with align = 1. @@ -523,7 +499,7 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); FunctionType *FT = Callee->getFunctionType(); - if (!checkStringCopyLibFuncSignature(Callee, LibFunc::strncpy, DL)) + if (!checkStringCopyLibFuncSignature(Callee, LibFunc::strncpy)) return nullptr; Value *Dst = CI->getArgOperand(0); @@ -551,17 +527,13 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) { if (Len == 0) return Dst; // strncpy(x, y, 0) -> x - // These optimizations require DataLayout. - if (!DL) - return nullptr; - // Let strncpy handle the zero padding if (Len > SrcLen + 1) return nullptr; Type *PT = FT->getParamType(0); // strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant] - B.CreateMemCpy(Dst, Src, ConstantInt::get(DL->getIntPtrType(PT), Len), 1); + B.CreateMemCpy(Dst, Src, ConstantInt::get(DL.getIntPtrType(PT), Len), 1); return Dst; } @@ -625,12 +597,12 @@ Value *LibCallSimplifier::optimizeStrPBrk(CallInst *CI, IRBuilder<> &B) { if (I == StringRef::npos) // No match. return Constant::getNullValue(CI->getType()); - return B.CreateGEP(CI->getArgOperand(0), B.getInt64(I), "strpbrk"); + return B.CreateGEP(B.getInt8Ty(), CI->getArgOperand(0), B.getInt64(I), "strpbrk"); } // strpbrk(s, "a") -> strchr(s, 'a') - if (DL && HasS2 && S2.size() == 1) - return EmitStrChr(CI->getArgOperand(0), S2[0], B, DL, TLI); + if (HasS2 && S2.size() == 1) + return EmitStrChr(CI->getArgOperand(0), S2[0], B, TLI); return nullptr; } @@ -706,7 +678,7 @@ Value *LibCallSimplifier::optimizeStrCSpn(CallInst *CI, IRBuilder<> &B) { } // strcspn(s, "") -> strlen(s) - if (DL && HasS2 && S2.empty()) + if (HasS2 && S2.empty()) return EmitStrLen(CI->getArgOperand(0), B, DL, TLI); return nullptr; @@ -725,7 +697,7 @@ Value *LibCallSimplifier::optimizeStrStr(CallInst *CI, IRBuilder<> &B) { return B.CreateBitCast(CI->getArgOperand(0), CI->getType()); // fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0 - if (DL && isOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) { + if (isOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) { Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, DL, TLI); if (!StrLen) return nullptr; @@ -767,12 +739,98 @@ Value *LibCallSimplifier::optimizeStrStr(CallInst *CI, IRBuilder<> &B) { // fold strstr(x, "y") -> strchr(x, 'y'). if (HasStr2 && ToFindStr.size() == 1) { - Value *StrChr = EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, DL, TLI); + Value *StrChr = EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TLI); return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : nullptr; } return nullptr; } +Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilder<> &B) { + Function *Callee = CI->getCalledFunction(); + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 3 || !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isIntegerTy(32) || + !FT->getParamType(2)->isIntegerTy() || + !FT->getReturnType()->isPointerTy()) + return nullptr; + + Value *SrcStr = CI->getArgOperand(0); + ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); + ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2)); + + // memchr(x, y, 0) -> null + if (LenC && LenC->isNullValue()) + return Constant::getNullValue(CI->getType()); + + // From now on we need at least constant length and string. + StringRef Str; + if (!LenC || !getConstantStringInfo(SrcStr, Str, 0, /*TrimAtNul=*/false)) + return nullptr; + + // Truncate the string to LenC. If Str is smaller than LenC we will still only + // scan the string, as reading past the end of it is undefined and we can just + // return null if we don't find the char. + Str = Str.substr(0, LenC->getZExtValue()); + + // If the char is variable but the input str and length are not we can turn + // this memchr call into a simple bit field test. Of course this only works + // when the return value is only checked against null. + // + // It would be really nice to reuse switch lowering here but we can't change + // the CFG at this point. + // + // memchr("\r\n", C, 2) != nullptr -> (C & ((1 << '\r') | (1 << '\n'))) != 0 + // after bounds check. + if (!CharC && !Str.empty() && isOnlyUsedInZeroEqualityComparison(CI)) { + unsigned char Max = + *std::max_element(reinterpret_cast<const unsigned char *>(Str.begin()), + reinterpret_cast<const unsigned char *>(Str.end())); + + // Make sure the bit field we're about to create fits in a register on the + // target. + // FIXME: On a 64 bit architecture this prevents us from using the + // interesting range of alpha ascii chars. We could do better by emitting + // two bitfields or shifting the range by 64 if no lower chars are used. + if (!DL.fitsInLegalInteger(Max + 1)) + return nullptr; + + // For the bit field use a power-of-2 type with at least 8 bits to avoid + // creating unnecessary illegal types. + unsigned char Width = NextPowerOf2(std::max((unsigned char)7, Max)); + + // Now build the bit field. + APInt Bitfield(Width, 0); + for (char C : Str) + Bitfield.setBit((unsigned char)C); + Value *BitfieldC = B.getInt(Bitfield); + + // First check that the bit field access is within bounds. + Value *C = B.CreateZExtOrTrunc(CI->getArgOperand(1), BitfieldC->getType()); + Value *Bounds = B.CreateICmp(ICmpInst::ICMP_ULT, C, B.getIntN(Width, Width), + "memchr.bounds"); + + // Create code that checks if the given bit is set in the field. + Value *Shl = B.CreateShl(B.getIntN(Width, 1ULL), C); + Value *Bits = B.CreateIsNotNull(B.CreateAnd(Shl, BitfieldC), "memchr.bits"); + + // Finally merge both checks and cast to pointer type. The inttoptr + // implicitly zexts the i1 to intptr type. + return B.CreateIntToPtr(B.CreateAnd(Bounds, Bits, "memchr"), CI->getType()); + } + + // Check if all arguments are constants. If so, we can constant fold. + if (!CharC) + return nullptr; + + // Compute the offset. + size_t I = Str.find(CharC->getSExtValue() & 0xFF); + if (I == StringRef::npos) // Didn't find the char. memchr returns null. + return Constant::getNullValue(CI->getType()); + + // memchr(s+n,c,l) -> gep(s+n+i,c) + return B.CreateGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "memchr"); +} + Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); FunctionType *FT = Callee->getFunctionType(); @@ -827,11 +885,8 @@ Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) { Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - // These optimizations require DataLayout. - if (!DL) - return nullptr; - if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memcpy, DL)) + if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memcpy)) return nullptr; // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1) @@ -842,11 +897,8 @@ Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilder<> &B) { Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - // These optimizations require DataLayout. - if (!DL) - return nullptr; - if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memmove, DL)) + if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memmove)) return nullptr; // memmove(x, y, n) -> llvm.memmove(x, y, n, 1) @@ -857,11 +909,8 @@ Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilder<> &B) { Value *LibCallSimplifier::optimizeMemSet(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - // These optimizations require DataLayout. - if (!DL) - return nullptr; - if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memset, DL)) + if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memset)) return nullptr; // memset(p, v, n) -> llvm.memset(p, v, n, 1) @@ -924,7 +973,7 @@ Value *LibCallSimplifier::optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B, // floor((double)floatval) -> (double)floorf(floatval) if (Callee->isIntrinsic()) { Module *M = CI->getParent()->getParent()->getParent(); - Intrinsic::ID IID = (Intrinsic::ID) Callee->getIntrinsicID(); + Intrinsic::ID IID = Callee->getIntrinsicID(); Function *F = Intrinsic::getDeclaration(M, IID, B.getFloatTy()); V = B.CreateCall(F, V); } else { @@ -1101,7 +1150,7 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) { Value *Callee = M->getOrInsertFunction(TLI->getName(LdExp), Op->getType(), Op->getType(), B.getInt32Ty(), nullptr); - CallInst *CI = B.CreateCall2(Callee, One, LdExpArg); + CallInst *CI = B.CreateCall(Callee, {One, LdExpArg}); if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts())) CI->setCallingConv(F->getCallingConv()); @@ -1387,7 +1436,7 @@ Value *LibCallSimplifier::optimizeFFS(CallInst *CI, IRBuilder<> &B) { Type *ArgType = Op->getType(); Value *F = Intrinsic::getDeclaration(Callee->getParent(), Intrinsic::cttz, ArgType); - Value *V = B.CreateCall2(F, Op, B.getFalse(), "cttz"); + Value *V = B.CreateCall(F, {Op, B.getFalse()}, "cttz"); V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1)); V = B.CreateIntCast(V, B.getInt32Ty(), false); @@ -1521,7 +1570,7 @@ Value *LibCallSimplifier::optimizePrintFString(CallInst *CI, IRBuilder<> &B) { // printf("x") -> putchar('x'), even for '%'. if (FormatStr.size() == 1) { - Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, DL, TLI); + Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, TLI); if (CI->use_empty() || !Res) return Res; return B.CreateIntCast(Res, CI->getType(), true); @@ -1534,7 +1583,7 @@ Value *LibCallSimplifier::optimizePrintFString(CallInst *CI, IRBuilder<> &B) { // pass to be run after this pass, to merge duplicate strings. FormatStr = FormatStr.drop_back(); Value *GV = B.CreateGlobalString(FormatStr, "str"); - Value *NewCI = EmitPutS(GV, B, DL, TLI); + Value *NewCI = EmitPutS(GV, B, TLI); return (CI->use_empty() || !NewCI) ? NewCI : ConstantInt::get(CI->getType(), FormatStr.size() + 1); @@ -1544,7 +1593,7 @@ Value *LibCallSimplifier::optimizePrintFString(CallInst *CI, IRBuilder<> &B) { // printf("%c", chr) --> putchar(chr) if (FormatStr == "%c" && CI->getNumArgOperands() > 1 && CI->getArgOperand(1)->getType()->isIntegerTy()) { - Value *Res = EmitPutChar(CI->getArgOperand(1), B, DL, TLI); + Value *Res = EmitPutChar(CI->getArgOperand(1), B, TLI); if (CI->use_empty() || !Res) return Res; @@ -1554,7 +1603,7 @@ Value *LibCallSimplifier::optimizePrintFString(CallInst *CI, IRBuilder<> &B) { // printf("%s\n", str) --> puts(str) if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 && CI->getArgOperand(1)->getType()->isPointerTy()) { - return EmitPutS(CI->getArgOperand(1), B, DL, TLI); + return EmitPutS(CI->getArgOperand(1), B, TLI); } return nullptr; } @@ -1600,16 +1649,11 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, IRBuilder<> &B) { if (FormatStr[i] == '%') return nullptr; // we found a format specifier, bail out. - // These optimizations require DataLayout. - if (!DL) - return nullptr; - // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1) - B.CreateMemCpy( - CI->getArgOperand(0), CI->getArgOperand(1), - ConstantInt::get(DL->getIntPtrType(CI->getContext()), - FormatStr.size() + 1), - 1); // Copy the null byte. + B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), + ConstantInt::get(DL.getIntPtrType(CI->getContext()), + FormatStr.size() + 1), + 1); // Copy the null byte. return ConstantInt::get(CI->getType(), FormatStr.size()); } @@ -1627,17 +1671,13 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, IRBuilder<> &B) { Value *V = B.CreateTrunc(CI->getArgOperand(2), B.getInt8Ty(), "char"); Value *Ptr = CastToCStr(CI->getArgOperand(0), B); B.CreateStore(V, Ptr); - Ptr = B.CreateGEP(Ptr, B.getInt32(1), "nul"); + Ptr = B.CreateGEP(B.getInt8Ty(), Ptr, B.getInt32(1), "nul"); B.CreateStore(B.getInt8(0), Ptr); return ConstantInt::get(CI->getType(), 1); } if (FormatStr[1] == 's') { - // These optimizations require DataLayout. - if (!DL) - return nullptr; - // sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1) if (!CI->getArgOperand(2)->getType()->isPointerTy()) return nullptr; @@ -1702,13 +1742,9 @@ Value *LibCallSimplifier::optimizeFPrintFString(CallInst *CI, IRBuilder<> &B) { if (FormatStr[i] == '%') // Could handle %% -> % if we cared. return nullptr; // We found a format specifier. - // These optimizations require DataLayout. - if (!DL) - return nullptr; - return EmitFWrite( CI->getArgOperand(1), - ConstantInt::get(DL->getIntPtrType(CI->getContext()), FormatStr.size()), + ConstantInt::get(DL.getIntPtrType(CI->getContext()), FormatStr.size()), CI->getArgOperand(0), B, DL, TLI); } @@ -1723,14 +1759,14 @@ Value *LibCallSimplifier::optimizeFPrintFString(CallInst *CI, IRBuilder<> &B) { // fprintf(F, "%c", chr) --> fputc(chr, F) if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return nullptr; - return EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, DL, TLI); + return EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TLI); } if (FormatStr[1] == 's') { // fprintf(F, "%s", str) --> fputs(str, F) if (!CI->getArgOperand(2)->getType()->isPointerTy()) return nullptr; - return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, DL, TLI); + return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TLI); } return nullptr; } @@ -1790,7 +1826,7 @@ Value *LibCallSimplifier::optimizeFWrite(CallInst *CI, IRBuilder<> &B) { // This optimisation is only valid, if the return value is unused. if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F) Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char"); - Value *NewCI = EmitFPutC(Char, CI->getArgOperand(3), B, DL, TLI); + Value *NewCI = EmitFPutC(Char, CI->getArgOperand(3), B, TLI); return NewCI ? ConstantInt::get(CI->getType(), 1) : nullptr; } @@ -1802,10 +1838,6 @@ Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - // These optimizations require DataLayout. - if (!DL) - return nullptr; - // Require two pointers. Also, we can't optimize if return value is used. FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || @@ -1820,7 +1852,7 @@ Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilder<> &B) { // Known to have no uses (see above). return EmitFWrite( CI->getArgOperand(0), - ConstantInt::get(DL->getIntPtrType(CI->getContext()), Len - 1), + ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len - 1), CI->getArgOperand(1), B, DL, TLI); } @@ -1839,7 +1871,7 @@ Value *LibCallSimplifier::optimizePuts(CallInst *CI, IRBuilder<> &B) { if (Str.empty() && CI->use_empty()) { // puts("") -> putchar('\n') - Value *Res = EmitPutChar(B.getInt32('\n'), B, DL, TLI); + Value *Res = EmitPutChar(B.getInt32('\n'), B, TLI); if (CI->use_empty() || !Res) return Res; return B.CreateIntCast(Res, CI->getType(), true); @@ -1906,6 +1938,8 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI, return optimizeStrCSpn(CI, Builder); case LibFunc::strstr: return optimizeStrStr(CI, Builder); + case LibFunc::memchr: + return optimizeMemChr(CI, Builder); case LibFunc::memcmp: return optimizeMemCmp(CI, Builder); case LibFunc::memcpy: @@ -2088,15 +2122,19 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) { return nullptr; } -LibCallSimplifier::LibCallSimplifier(const DataLayout *DL, - const TargetLibraryInfo *TLI) : - FortifiedSimplifier(DL, TLI), - DL(DL), - TLI(TLI), - UnsafeFPShrink(false) { +LibCallSimplifier::LibCallSimplifier( + const DataLayout &DL, const TargetLibraryInfo *TLI, + function_ref<void(Instruction *, Value *)> Replacer) + : FortifiedSimplifier(TLI), DL(DL), TLI(TLI), UnsafeFPShrink(false), + Replacer(Replacer) {} + +void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) { + // Indirect through the replacer used in this instance. + Replacer(I, With); } -void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) const { +/*static*/ void LibCallSimplifier::replaceAllUsesWithDefault(Instruction *I, + Value *With) { I->replaceAllUsesWith(With); I->eraseFromParent(); } @@ -2183,7 +2221,7 @@ bool FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI, Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memcpy_chk, DL)) + if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memcpy_chk)) return nullptr; if (isFortifiedCallFoldable(CI, 3, 2, false)) { @@ -2197,7 +2235,7 @@ Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI, IRBuilder<> & Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memmove_chk, DL)) + if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memmove_chk)) return nullptr; if (isFortifiedCallFoldable(CI, 3, 2, false)) { @@ -2211,7 +2249,7 @@ Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI, IRBuilder<> Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memset_chk, DL)) + if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memset_chk)) return nullptr; if (isFortifiedCallFoldable(CI, 3, 2, false)) { @@ -2227,8 +2265,9 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI, LibFunc::Func Func) { Function *Callee = CI->getCalledFunction(); StringRef Name = Callee->getName(); + const DataLayout &DL = CI->getModule()->getDataLayout(); - if (!checkStringCopyLibFuncSignature(Callee, Func, DL)) + if (!checkStringCopyLibFuncSignature(Callee, Func)) return nullptr; Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1), @@ -2237,7 +2276,7 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI, // __stpcpy_chk(x,x,...) -> x+strlen(x) if (Func == LibFunc::stpcpy_chk && !OnlyLowerUnknownSize && Dst == Src) { Value *StrLen = EmitStrLen(Src, B, DL, TLI); - return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : nullptr; + return StrLen ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, StrLen) : nullptr; } // If a) we don't have any length information, or b) we know this will @@ -2245,29 +2284,25 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI, // st[rp]cpy_chk call which may fail at runtime if the size is too long. // TODO: It might be nice to get a maximum length out of the possible // string lengths for varying. - if (isFortifiedCallFoldable(CI, 2, 1, true)) { - Value *Ret = EmitStrCpy(Dst, Src, B, DL, TLI, Name.substr(2, 6)); - return Ret; - } else if (!OnlyLowerUnknownSize) { - // Maybe we can stil fold __st[rp]cpy_chk to __memcpy_chk. - uint64_t Len = GetStringLength(Src); - if (Len == 0) - return nullptr; + if (isFortifiedCallFoldable(CI, 2, 1, true)) + return EmitStrCpy(Dst, Src, B, TLI, Name.substr(2, 6)); - // This optimization requires DataLayout. - if (!DL) - return nullptr; + if (OnlyLowerUnknownSize) + return nullptr; - Type *SizeTTy = DL->getIntPtrType(CI->getContext()); - Value *LenV = ConstantInt::get(SizeTTy, Len); - Value *Ret = EmitMemCpyChk(Dst, Src, LenV, ObjSize, B, DL, TLI); - // If the function was an __stpcpy_chk, and we were able to fold it into - // a __memcpy_chk, we still need to return the correct end pointer. - if (Ret && Func == LibFunc::stpcpy_chk) - return B.CreateGEP(Dst, ConstantInt::get(SizeTTy, Len - 1)); - return Ret; - } - return nullptr; + // Maybe we can stil fold __st[rp]cpy_chk to __memcpy_chk. + uint64_t Len = GetStringLength(Src); + if (Len == 0) + return nullptr; + + Type *SizeTTy = DL.getIntPtrType(CI->getContext()); + Value *LenV = ConstantInt::get(SizeTTy, Len); + Value *Ret = EmitMemCpyChk(Dst, Src, LenV, ObjSize, B, DL, TLI); + // If the function was an __stpcpy_chk, and we were able to fold it into + // a __memcpy_chk, we still need to return the correct end pointer. + if (Ret && Func == LibFunc::stpcpy_chk) + return B.CreateGEP(B.getInt8Ty(), Dst, ConstantInt::get(SizeTTy, Len - 1)); + return Ret; } Value *FortifiedLibCallSimplifier::optimizeStrpNCpyChk(CallInst *CI, @@ -2276,20 +2311,29 @@ Value *FortifiedLibCallSimplifier::optimizeStrpNCpyChk(CallInst *CI, Function *Callee = CI->getCalledFunction(); StringRef Name = Callee->getName(); - if (!checkStringCopyLibFuncSignature(Callee, Func, DL)) + if (!checkStringCopyLibFuncSignature(Callee, Func)) return nullptr; if (isFortifiedCallFoldable(CI, 3, 2, false)) { - Value *Ret = - EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), B, DL, TLI, Name.substr(2, 7)); + Value *Ret = EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), B, TLI, Name.substr(2, 7)); return Ret; } return nullptr; } Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) { - if (CI->isNoBuiltin()) - return nullptr; + // FIXME: We shouldn't be changing "nobuiltin" or TLI unavailable calls here. + // Some clang users checked for _chk libcall availability using: + // __has_builtin(__builtin___memcpy_chk) + // When compiling with -fno-builtin, this is always true. + // When passing -ffreestanding/-mkernel, which both imply -fno-builtin, we + // end up with fortified libcalls, which isn't acceptable in a freestanding + // environment which only provides their non-fortified counterparts. + // + // Until we change clang and/or teach external users to check for availability + // differently, disregard the "nobuiltin" attribute and TLI::has. + // + // PR23093. LibFunc::Func Func; Function *Callee = CI->getCalledFunction(); @@ -2298,7 +2342,7 @@ Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) { bool isCallingConvC = CI->getCallingConv() == llvm::CallingConv::C; // First, check that this is a known library functions. - if (!TLI->getLibFunc(FuncName, Func) || !TLI->has(Func)) + if (!TLI->getLibFunc(FuncName, Func)) return nullptr; // We never change the calling convention. @@ -2324,8 +2368,6 @@ Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) { return nullptr; } -FortifiedLibCallSimplifier:: -FortifiedLibCallSimplifier(const DataLayout *DL, const TargetLibraryInfo *TLI, - bool OnlyLowerUnknownSize) - : DL(DL), TLI(TLI), OnlyLowerUnknownSize(OnlyLowerUnknownSize) { -} +FortifiedLibCallSimplifier::FortifiedLibCallSimplifier( + const TargetLibraryInfo *TLI, bool OnlyLowerUnknownSize) + : TLI(TLI), OnlyLowerUnknownSize(OnlyLowerUnknownSize) {} diff --git a/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp b/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp index d36283e..a2a54da 100644 --- a/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp @@ -60,7 +60,8 @@ #define DEBUG_TYPE "symbol-rewriter" #include "llvm/CodeGen/Passes.h" #include "llvm/Pass.h" -#include "llvm/PassManager.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/IR/LegacyPassManager.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MemoryBuffer.h" @@ -72,15 +73,15 @@ #include "llvm/Transforms/Utils/SymbolRewriter.h" using namespace llvm; +using namespace SymbolRewriter; static cl::list<std::string> RewriteMapFiles("rewrite-map-file", cl::desc("Symbol Rewrite Map"), cl::value_desc("filename")); -namespace llvm { -namespace SymbolRewriter { -void rewriteComdat(Module &M, GlobalObject *GO, const std::string &Source, - const std::string &Target) { +static void rewriteComdat(Module &M, GlobalObject *GO, + const std::string &Source, + const std::string &Target) { if (Comdat *CD = GO->getComdat()) { auto &Comdats = M.getComdatSymbolTable(); @@ -92,6 +93,7 @@ void rewriteComdat(Module &M, GlobalObject *GO, const std::string &Source, } } +namespace { template <RewriteDescriptor::Type DT, typename ValueType, ValueType *(llvm::Module::*Get)(StringRef) const> class ExplicitRewriteDescriptor : public RewriteDescriptor { @@ -226,6 +228,7 @@ typedef PatternRewriteDescriptor<RewriteDescriptor::Type::NamedAlias, &llvm::Module::getNamedAlias, &llvm::Module::aliases> PatternRewriteNamedAliasDescriptor; +} // namespace bool RewriteMapParser::parse(const std::string &MapFile, RewriteDescriptorList *DL) { @@ -489,8 +492,6 @@ parseRewriteGlobalAliasDescriptor(yaml::Stream &YS, yaml::ScalarNode *K, return true; } -} -} namespace { class RewriteSymbols : public ModulePass { diff --git a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp index 65f2ae2..cac80ac 100644 --- a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp +++ b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/ValueMapper.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/InlineAsm.h" @@ -155,13 +156,12 @@ static Metadata *mapToSelf(ValueToValueMapTy &VM, const Metadata *MD) { } static Metadata *MapMetadataImpl(const Metadata *MD, - SmallVectorImpl<UniquableMDNode *> &Cycles, + SmallVectorImpl<MDNode *> &Cycles, ValueToValueMapTy &VM, RemapFlags Flags, ValueMapTypeRemapper *TypeMapper, ValueMaterializer *Materializer); -static Metadata *mapMetadataOp(Metadata *Op, - SmallVectorImpl<UniquableMDNode *> &Cycles, +static Metadata *mapMetadataOp(Metadata *Op, SmallVectorImpl<MDNode *> &Cycles, ValueToValueMapTy &VM, RemapFlags Flags, ValueMapTypeRemapper *TypeMapper, ValueMaterializer *Materializer) { @@ -182,157 +182,85 @@ static Metadata *mapMetadataOp(Metadata *Op, return nullptr; } -static Metadata *cloneMDTuple(const MDTuple *Node, - SmallVectorImpl<UniquableMDNode *> &Cycles, - ValueToValueMapTy &VM, RemapFlags Flags, - ValueMapTypeRemapper *TypeMapper, - ValueMaterializer *Materializer, - bool IsDistinct) { - // Distinct MDTuples have their own code path. - assert(!IsDistinct && "Unexpected distinct tuple"); - (void)IsDistinct; - - SmallVector<Metadata *, 4> Elts; - Elts.reserve(Node->getNumOperands()); - for (unsigned I = 0, E = Node->getNumOperands(); I != E; ++I) - Elts.push_back(mapMetadataOp(Node->getOperand(I), Cycles, VM, Flags, - TypeMapper, Materializer)); - - return MDTuple::get(Node->getContext(), Elts); -} - -static Metadata *cloneMDLocation(const MDLocation *Node, - SmallVectorImpl<UniquableMDNode *> &Cycles, - ValueToValueMapTy &VM, RemapFlags Flags, - ValueMapTypeRemapper *TypeMapper, - ValueMaterializer *Materializer, - bool IsDistinct) { - return (IsDistinct ? MDLocation::getDistinct : MDLocation::get)( - Node->getContext(), Node->getLine(), Node->getColumn(), - mapMetadataOp(Node->getScope(), Cycles, VM, Flags, TypeMapper, - Materializer), - mapMetadataOp(Node->getInlinedAt(), Cycles, VM, Flags, TypeMapper, - Materializer)); -} - -static Metadata *cloneMDNode(const UniquableMDNode *Node, - SmallVectorImpl<UniquableMDNode *> &Cycles, - ValueToValueMapTy &VM, RemapFlags Flags, - ValueMapTypeRemapper *TypeMapper, - ValueMaterializer *Materializer, bool IsDistinct) { - switch (Node->getMetadataID()) { - default: - llvm_unreachable("Invalid UniquableMDNode subclass"); -#define HANDLE_UNIQUABLE_LEAF(CLASS) \ - case Metadata::CLASS##Kind: \ - return clone##CLASS(cast<CLASS>(Node), Cycles, VM, Flags, TypeMapper, \ - Materializer, IsDistinct); -#include "llvm/IR/Metadata.def" +/// \brief Remap nodes. +/// +/// Insert \c NewNode in the value map, and then remap \c OldNode's operands. +/// Assumes that \c NewNode is already a clone of \c OldNode. +/// +/// \pre \c NewNode is a clone of \c OldNode. +static bool remap(const MDNode *OldNode, MDNode *NewNode, + SmallVectorImpl<MDNode *> &Cycles, ValueToValueMapTy &VM, + RemapFlags Flags, ValueMapTypeRemapper *TypeMapper, + ValueMaterializer *Materializer) { + assert(OldNode->getNumOperands() == NewNode->getNumOperands() && + "Expected nodes to match"); + assert(OldNode->isResolved() && "Expected resolved node"); + assert(!NewNode->isUniqued() && "Expected non-uniqued node"); + + // Map the node upfront so it's available for cyclic references. + mapToMetadata(VM, OldNode, NewNode); + bool AnyChanged = false; + for (unsigned I = 0, E = OldNode->getNumOperands(); I != E; ++I) { + Metadata *Old = OldNode->getOperand(I); + assert(NewNode->getOperand(I) == Old && + "Expected old operands to already be in place"); + + Metadata *New = mapMetadataOp(OldNode->getOperand(I), Cycles, VM, Flags, + TypeMapper, Materializer); + if (Old != New) { + AnyChanged = true; + NewNode->replaceOperandWith(I, New); + } } -} -static void -trackCyclesUnderDistinct(const UniquableMDNode *Node, - SmallVectorImpl<UniquableMDNode *> &Cycles) { - // Track any cycles beneath this node. - for (Metadata *Op : Node->operands()) - if (auto *N = dyn_cast_or_null<UniquableMDNode>(Op)) - if (!N->isResolved()) - Cycles.push_back(N); + return AnyChanged; } /// \brief Map a distinct MDNode. /// /// Distinct nodes are not uniqued, so they must always recreated. -static Metadata *mapDistinctNode(const UniquableMDNode *Node, - SmallVectorImpl<UniquableMDNode *> &Cycles, +static Metadata *mapDistinctNode(const MDNode *Node, + SmallVectorImpl<MDNode *> &Cycles, ValueToValueMapTy &VM, RemapFlags Flags, ValueMapTypeRemapper *TypeMapper, ValueMaterializer *Materializer) { assert(Node->isDistinct() && "Expected distinct node"); - // Optimization for MDTuples. - if (isa<MDTuple>(Node)) { - // Create the node first so it's available for cyclical references. - SmallVector<Metadata *, 4> EmptyOps(Node->getNumOperands()); - MDTuple *NewMD = MDTuple::getDistinct(Node->getContext(), EmptyOps); - mapToMetadata(VM, Node, NewMD); - - // Fix the operands. - for (unsigned I = 0, E = Node->getNumOperands(); I != E; ++I) - NewMD->replaceOperandWith(I, - mapMetadataOp(Node->getOperand(I), Cycles, VM, - Flags, TypeMapper, Materializer)); - - trackCyclesUnderDistinct(NewMD, Cycles); - return NewMD; - } + MDNode *NewMD = MDNode::replaceWithDistinct(Node->clone()); + remap(Node, NewMD, Cycles, VM, Flags, TypeMapper, Materializer); - // In general we need a dummy node, since whether the operands are null can - // affect the size of the node. - std::unique_ptr<MDNodeFwdDecl> Dummy( - MDNode::getTemporary(Node->getContext(), None)); - mapToMetadata(VM, Node, Dummy.get()); - auto *NewMD = cast<UniquableMDNode>(cloneMDNode(Node, Cycles, VM, Flags, - TypeMapper, Materializer, - /* IsDistinct */ true)); - Dummy->replaceAllUsesWith(NewMD); - trackCyclesUnderDistinct(NewMD, Cycles); - return mapToMetadata(VM, Node, NewMD); -} + // Track any cycles beneath this node. + for (Metadata *Op : NewMD->operands()) + if (auto *Node = dyn_cast_or_null<MDNode>(Op)) + if (!Node->isResolved()) + Cycles.push_back(Node); -/// \brief Check whether a uniqued node needs to be remapped. -/// -/// Check whether a uniqued node needs to be remapped (due to any operands -/// changing). -static bool shouldRemapUniquedNode(const UniquableMDNode *Node, - SmallVectorImpl<UniquableMDNode *> &Cycles, - ValueToValueMapTy &VM, RemapFlags Flags, - ValueMapTypeRemapper *TypeMapper, - ValueMaterializer *Materializer) { - // Check all operands to see if any need to be remapped. - for (unsigned I = 0, E = Node->getNumOperands(); I != E; ++I) { - Metadata *Op = Node->getOperand(I); - if (Op != mapMetadataOp(Op, Cycles, VM, Flags, TypeMapper, Materializer)) - return true; - } - return false; + return NewMD; } /// \brief Map a uniqued MDNode. /// /// Uniqued nodes may not need to be recreated (they may map to themselves). -static Metadata *mapUniquedNode(const UniquableMDNode *Node, - SmallVectorImpl<UniquableMDNode *> &Cycles, +static Metadata *mapUniquedNode(const MDNode *Node, + SmallVectorImpl<MDNode *> &Cycles, ValueToValueMapTy &VM, RemapFlags Flags, ValueMapTypeRemapper *TypeMapper, ValueMaterializer *Materializer) { - assert(!Node->isDistinct() && "Expected uniqued node"); - - // Create a dummy node in case we have a metadata cycle. - MDNodeFwdDecl *Dummy = MDNode::getTemporary(Node->getContext(), None); - mapToMetadata(VM, Node, Dummy); - - // Check all operands to see if any need to be remapped. - if (!shouldRemapUniquedNode(Node, Cycles, VM, Flags, TypeMapper, - Materializer)) { - // Use an identity mapping. - mapToSelf(VM, Node); - MDNode::deleteTemporary(Dummy); - return const_cast<Metadata *>(static_cast<const Metadata *>(Node)); - } + assert(Node->isUniqued() && "Expected uniqued node"); - // At least one operand needs remapping. - Metadata *NewMD = - cloneMDNode(Node, Cycles, VM, Flags, TypeMapper, Materializer, - /* IsDistinct */ false); - Dummy->replaceAllUsesWith(NewMD); - MDNode::deleteTemporary(Dummy); - return mapToMetadata(VM, Node, NewMD); + // Create a temporary node upfront in case we have a metadata cycle. + auto ClonedMD = Node->clone(); + if (!remap(Node, ClonedMD.get(), Cycles, VM, Flags, TypeMapper, Materializer)) + // No operands changed, so use the identity mapping. + return mapToSelf(VM, Node); + + // At least one operand has changed, so uniquify the cloned node. + return mapToMetadata(VM, Node, + MDNode::replaceWithUniqued(std::move(ClonedMD))); } static Metadata *MapMetadataImpl(const Metadata *MD, - SmallVectorImpl<UniquableMDNode *> &Cycles, + SmallVectorImpl<MDNode *> &Cycles, ValueToValueMapTy &VM, RemapFlags Flags, ValueMapTypeRemapper *TypeMapper, ValueMaterializer *Materializer) { @@ -364,14 +292,18 @@ static Metadata *MapMetadataImpl(const Metadata *MD, return nullptr; } - const UniquableMDNode *Node = cast<UniquableMDNode>(MD); - assert(Node->isResolved() && "Unexpected unresolved node"); + // Note: this cast precedes the Flags check so we always get its associated + // assertion. + const MDNode *Node = cast<MDNode>(MD); // If this is a module-level metadata and we know that nothing at the // module level is changing, then use an identity mapping. if (Flags & RF_NoModuleLevelChanges) return mapToSelf(VM, MD); + // Require resolved nodes whenever metadata might be remapped. + assert(Node->isResolved() && "Unexpected unresolved node"); + if (Node->isDistinct()) return mapDistinctNode(Node, Cycles, VM, Flags, TypeMapper, Materializer); @@ -381,17 +313,19 @@ static Metadata *MapMetadataImpl(const Metadata *MD, Metadata *llvm::MapMetadata(const Metadata *MD, ValueToValueMapTy &VM, RemapFlags Flags, ValueMapTypeRemapper *TypeMapper, ValueMaterializer *Materializer) { - SmallVector<UniquableMDNode *, 8> Cycles; + SmallVector<MDNode *, 8> Cycles; Metadata *NewMD = MapMetadataImpl(MD, Cycles, VM, Flags, TypeMapper, Materializer); // Resolve cycles underneath MD. if (NewMD && NewMD != MD) { - if (auto *N = dyn_cast<UniquableMDNode>(NewMD)) - N->resolveCycles(); + if (auto *N = dyn_cast<MDNode>(NewMD)) + if (!N->isResolved()) + N->resolveCycles(); - for (UniquableMDNode *N : Cycles) - N->resolveCycles(); + for (MDNode *N : Cycles) + if (!N->isResolved()) + N->resolveCycles(); } else { // Shouldn't get unresolved cycles if nothing was remapped. assert(Cycles.empty() && "Expected no unresolved cycles"); @@ -450,7 +384,24 @@ void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap, I->setMetadata(MI->first, New); } + if (!TypeMapper) + return; + // If the instruction's type is being remapped, do so now. - if (TypeMapper) - I->mutateType(TypeMapper->remapType(I->getType())); + if (auto CS = CallSite(I)) { + SmallVector<Type *, 3> Tys; + FunctionType *FTy = CS.getFunctionType(); + Tys.reserve(FTy->getNumParams()); + for (Type *Ty : FTy->params()) + Tys.push_back(TypeMapper->remapType(Ty)); + CS.mutateFunctionType(FunctionType::get( + TypeMapper->remapType(I->getType()), Tys, FTy->isVarArg())); + return; + } + if (auto *AI = dyn_cast<AllocaInst>(I)) + AI->setAllocatedType(TypeMapper->remapType(AI->getAllocatedType())); + if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) + GEP->setSourceElementType( + TypeMapper->remapType(GEP->getSourceElementType())); + I->mutateType(TypeMapper->remapType(I->getType())); } |