diff options
author | dim <dim@FreeBSD.org> | 2014-03-21 17:53:59 +0000 |
---|---|---|
committer | dim <dim@FreeBSD.org> | 2014-03-21 17:53:59 +0000 |
commit | 9cedb8bb69b89b0f0c529937247a6a80cabdbaec (patch) | |
tree | c978f0e9ec1ab92dc8123783f30b08a7fd1e2a39 /contrib/llvm/lib/Transforms/Utils | |
parent | 03fdc2934eb61c44c049a02b02aa974cfdd8a0eb (diff) | |
download | FreeBSD-src-9cedb8bb69b89b0f0c529937247a6a80cabdbaec.zip FreeBSD-src-9cedb8bb69b89b0f0c529937247a6a80cabdbaec.tar.gz |
MFC 261991:
Upgrade our copy of llvm/clang to 3.4 release. This version supports
all of the features in the current working draft of the upcoming C++
standard, provisionally named C++1y.
The code generator's performance is greatly increased, and the loop
auto-vectorizer is now enabled at -Os and -O2 in addition to -O3. The
PowerPC backend has made several major improvements to code generation
quality and compile time, and the X86, SPARC, ARM32, Aarch64 and SystemZ
backends have all seen major feature work.
Release notes for llvm and clang can be found here:
<http://llvm.org/releases/3.4/docs/ReleaseNotes.html>
<http://llvm.org/releases/3.4/tools/clang/docs/ReleaseNotes.html>
MFC 262121 (by emaste):
Update lldb for clang/llvm 3.4 import
This commit largely restores the lldb source to the upstream r196259
snapshot with the addition of threaded inferior support and a few bug
fixes.
Specific upstream lldb revisions restored include:
SVN git
181387 779e6ac
181703 7bef4e2
182099 b31044e
182650 f2dcf35
182683 0d91b80
183862 15c1774
183929 99447a6
184177 0b2934b
184948 4dc3761
184954 007e7bc
186990 eebd175
Sponsored by: DARPA, AFRL
MFC 262186 (by emaste):
Fix mismerge in r262121
A break statement was lost in the merge. The error had no functional
impact, but restore it to reduce the diff against upstream.
MFC 262303:
Pull in r197521 from upstream clang trunk (by rdivacky):
Use the integrated assembler by default on FreeBSD/ppc and ppc64.
Requested by: jhibbits
MFC 262611:
Pull in r196874 from upstream llvm trunk:
Fix a crash that occurs when PWD is invalid.
MCJIT needs to be able to run in hostile environments, even when PWD
is invalid. There's no need to crash MCJIT in this case.
The obvious fix is to simply leave MCContext's CompilationDir empty
when PWD can't be determined. This way, MCJIT clients,
and other clients that link with LLVM don't need a valid working directory.
If we do want to guarantee valid CompilationDir, that should be done
only for clients of getCompilationDir(). This is as simple as checking
for an empty string.
The only current use of getCompilationDir is EmitGenDwarfInfo, which
won't conceivably run with an invalid working dir. However, in the
purely hypothetically and untestable case that this happens, the
AT_comp_dir will be omitted from the compilation_unit DIE.
This should help fix assertions occurring with ports-mgmt/tinderbox,
when it is using jails, and sometimes invalidates clang's current
working directory.
Reported by: decke
MFC 262809:
Pull in r203007 from upstream clang trunk:
Don't produce an alias between destructors with different calling conventions.
Fixes pr19007.
(Please note that is an LLVM PR identifier, not a FreeBSD one.)
This should fix Firefox and/or libxul crashes (due to problems with
regparm/stdcall calling conventions) on i386.
Reported by: multiple users on freebsd-current
PR: bin/187103
MFC 263048:
Repair recognition of "CC" as an alias for the C++ compiler, since it
was silently broken by upstream for a Windows-specific use-case.
Apparently some versions of CMake still rely on this archaic feature...
Reported by: rakuco
MFC 263049:
Garbage collect the old way of adding the libstdc++ include directories
in clang's InitHeaderSearch.cpp. This has been superseded by David
Chisnall's commit in r255321.
Moreover, if libc++ is used, the libstdc++ include directories should
not be in the search path at all. These directories are now only used
if you pass -stdlib=libstdc++.
Diffstat (limited to 'contrib/llvm/lib/Transforms/Utils')
24 files changed, 2454 insertions, 1092 deletions
diff --git a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp index ba99d2e..12de9ee 100644 --- a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -14,6 +14,7 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CFG.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" @@ -170,7 +171,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) { if (DomTreeNode *DTN = DT->getNode(BB)) { DomTreeNode *PredDTN = DT->getNode(PredBB); SmallVector<DomTreeNode*, 8> Children(DTN->begin(), DTN->end()); - for (SmallVector<DomTreeNode*, 8>::iterator DI = Children.begin(), + for (SmallVectorImpl<DomTreeNode *>::iterator DI = Children.begin(), DE = Children.end(); DI != DE; ++DI) DT->changeImmediateDominator(*DI, PredDTN); @@ -235,22 +236,6 @@ void llvm::ReplaceInstWithInst(Instruction *From, Instruction *To) { ReplaceInstWithInst(From->getParent()->getInstList(), BI, To); } -/// GetSuccessorNumber - Search for the specified successor of basic block BB -/// and return its position in the terminator instruction's list of -/// successors. It is an error to call this with a block that is not a -/// successor. -unsigned llvm::GetSuccessorNumber(BasicBlock *BB, BasicBlock *Succ) { - TerminatorInst *Term = BB->getTerminator(); -#ifndef NDEBUG - unsigned e = Term->getNumSuccessors(); -#endif - for (unsigned i = 0; ; ++i) { - assert(i != e && "Didn't find edge?"); - if (Term->getSuccessor(i) == Succ) - return i; - } -} - /// SplitEdge - Split the edge connecting specified block. Pass P must /// not be NULL. BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) { @@ -263,7 +248,6 @@ BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) { // If the edge isn't critical, then BB has a single successor or Succ has a // single pred. Split the block. - BasicBlock::iterator SplitPoint; if (BasicBlock *SP = Succ->getSinglePredecessor()) { // If the successor only has a single pred, split the top of the successor // block. @@ -416,8 +400,12 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB, // If all incoming values for the new PHI would be the same, just don't // make a new PHI. Instead, just remove the incoming values from the old // PHI. - for (unsigned i = 0, e = Preds.size(); i != e; ++i) - PN->removeIncomingValue(Preds[i], false); + for (unsigned i = 0, e = Preds.size(); i != e; ++i) { + // Explicitly check the BB index here to handle duplicates in Preds. + int Idx = PN->getBasicBlockIndex(Preds[i]); + if (Idx >= 0) + PN->removeIncomingValue(Idx, false); + } } else { // If the values coming into the block are not the same, we need a PHI. // Create the new PHI node, insert it into NewBB at the end of the block @@ -598,52 +586,6 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB, } } -/// FindFunctionBackedges - Analyze the specified function to find all of the -/// loop backedges in the function and return them. This is a relatively cheap -/// (compared to computing dominators and loop info) analysis. -/// -/// The output is added to Result, as pairs of <from,to> edge info. -void llvm::FindFunctionBackedges(const Function &F, - SmallVectorImpl<std::pair<const BasicBlock*,const BasicBlock*> > &Result) { - const BasicBlock *BB = &F.getEntryBlock(); - if (succ_begin(BB) == succ_end(BB)) - return; - - SmallPtrSet<const BasicBlock*, 8> Visited; - SmallVector<std::pair<const BasicBlock*, succ_const_iterator>, 8> VisitStack; - SmallPtrSet<const BasicBlock*, 8> InStack; - - Visited.insert(BB); - VisitStack.push_back(std::make_pair(BB, succ_begin(BB))); - InStack.insert(BB); - do { - std::pair<const BasicBlock*, succ_const_iterator> &Top = VisitStack.back(); - const BasicBlock *ParentBB = Top.first; - succ_const_iterator &I = Top.second; - - bool FoundNew = false; - while (I != succ_end(ParentBB)) { - BB = *I++; - if (Visited.insert(BB)) { - FoundNew = true; - break; - } - // Successor is in VisitStack, it's a back edge. - if (InStack.count(BB)) - Result.push_back(std::make_pair(ParentBB, BB)); - } - - if (FoundNew) { - // Go down one level if there is a unvisited successor. - InStack.insert(BB); - VisitStack.push_back(std::make_pair(BB, succ_begin(BB))); - } else { - // Go up one level. - InStack.erase(VisitStack.pop_back_val().first); - } - } while (!VisitStack.empty()); -} - /// FoldReturnIntoUncondBranch - This method duplicates the specified return /// instruction into a predecessor which ends in an unconditional branch. If /// the return instruction returns a value defined by a PHI, propagate the @@ -726,3 +668,104 @@ TerminatorInst *llvm::SplitBlockAndInsertIfThen(Instruction *Cmp, ReplaceInstWithInst(HeadOldTerm, HeadNewTerm); return CheckTerm; } + +/// GetIfCondition - Given a basic block (BB) with two predecessors, +/// check to see if the merge at this block is due +/// to an "if condition". If so, return the boolean condition that determines +/// which entry into BB will be taken. Also, return by references the block +/// that will be entered from if the condition is true, and the block that will +/// be entered if the condition is false. +/// +/// This does no checking to see if the true/false blocks have large or unsavory +/// instructions in them. +Value *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, + BasicBlock *&IfFalse) { + PHINode *SomePHI = dyn_cast<PHINode>(BB->begin()); + BasicBlock *Pred1 = NULL; + BasicBlock *Pred2 = NULL; + + if (SomePHI) { + if (SomePHI->getNumIncomingValues() != 2) + return NULL; + Pred1 = SomePHI->getIncomingBlock(0); + Pred2 = SomePHI->getIncomingBlock(1); + } else { + pred_iterator PI = pred_begin(BB), PE = pred_end(BB); + if (PI == PE) // No predecessor + return NULL; + Pred1 = *PI++; + if (PI == PE) // Only one predecessor + return NULL; + Pred2 = *PI++; + if (PI != PE) // More than two predecessors + return NULL; + } + + // We can only handle branches. Other control flow will be lowered to + // branches if possible anyway. + BranchInst *Pred1Br = dyn_cast<BranchInst>(Pred1->getTerminator()); + BranchInst *Pred2Br = dyn_cast<BranchInst>(Pred2->getTerminator()); + if (Pred1Br == 0 || Pred2Br == 0) + return 0; + + // Eliminate code duplication by ensuring that Pred1Br is conditional if + // either are. + if (Pred2Br->isConditional()) { + // If both branches are conditional, we don't have an "if statement". In + // reality, we could transform this case, but since the condition will be + // required anyway, we stand no chance of eliminating it, so the xform is + // probably not profitable. + if (Pred1Br->isConditional()) + return 0; + + std::swap(Pred1, Pred2); + std::swap(Pred1Br, Pred2Br); + } + + if (Pred1Br->isConditional()) { + // The only thing we have to watch out for here is to make sure that Pred2 + // doesn't have incoming edges from other blocks. If it does, the condition + // doesn't dominate BB. + if (Pred2->getSinglePredecessor() == 0) + return 0; + + // If we found a conditional branch predecessor, make sure that it branches + // to BB and Pred2Br. If it doesn't, this isn't an "if statement". + if (Pred1Br->getSuccessor(0) == BB && + Pred1Br->getSuccessor(1) == Pred2) { + IfTrue = Pred1; + IfFalse = Pred2; + } else if (Pred1Br->getSuccessor(0) == Pred2 && + Pred1Br->getSuccessor(1) == BB) { + IfTrue = Pred2; + IfFalse = Pred1; + } else { + // We know that one arm of the conditional goes to BB, so the other must + // go somewhere unrelated, and this must not be an "if statement". + return 0; + } + + return Pred1Br->getCondition(); + } + + // Ok, if we got here, both predecessors end with an unconditional branch to + // BB. Don't panic! If both blocks only have a single (identical) + // predecessor, and THAT is a conditional branch, then we're all ok! + BasicBlock *CommonPred = Pred1->getSinglePredecessor(); + if (CommonPred == 0 || CommonPred != Pred2->getSinglePredecessor()) + return 0; + + // Otherwise, if this is a conditional branch, then we can use it! + BranchInst *BI = dyn_cast<BranchInst>(CommonPred->getTerminator()); + if (BI == 0) return 0; + + assert(BI->isConditional() && "Two successors but not conditional?"); + if (BI->getSuccessor(0) == Pred1) { + IfTrue = Pred1; + IfFalse = Pred2; + } else { + IfTrue = Pred2; + IfFalse = Pred1; + } + return BI->getCondition(); +} diff --git a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp index 8513772..0e7f7f7 100644 --- a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -19,9 +19,9 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/CFG.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/ProfileInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Type.h" @@ -44,7 +44,6 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<DominatorTree>(); AU.addPreserved<LoopInfo>(); - AU.addPreserved<ProfileInfo>(); // No loop canonicalization guarantees are broken by this pass. AU.addPreservedID(LoopSimplifyID); @@ -84,39 +83,6 @@ bool BreakCriticalEdges::runOnFunction(Function &F) { // Implementation of the external critical edge manipulation functions //===----------------------------------------------------------------------===// -// isCriticalEdge - Return true if the specified edge is a critical edge. -// Critical edges are edges from a block with multiple successors to a block -// with multiple predecessors. -// -bool llvm::isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum, - bool AllowIdenticalEdges) { - assert(SuccNum < TI->getNumSuccessors() && "Illegal edge specification!"); - if (TI->getNumSuccessors() == 1) return false; - - const BasicBlock *Dest = TI->getSuccessor(SuccNum); - const_pred_iterator I = pred_begin(Dest), E = pred_end(Dest); - - // If there is more than one predecessor, this is a critical edge... - assert(I != E && "No preds, but we have an edge to the block?"); - const BasicBlock *FirstPred = *I; - ++I; // Skip one edge due to the incoming arc from TI. - if (!AllowIdenticalEdges) - return I != E; - - // If AllowIdenticalEdges is true, then we allow this edge to be considered - // non-critical iff all preds come from TI's block. - while (I != E) { - const BasicBlock *P = *I; - if (P != FirstPred) - return true; - // Note: leave this as is until no one ever compiles with either gcc 4.0.1 - // or Xcode 2. This seems to work around the pred_iterator assert in PR 2207 - E = pred_end(P); - ++I; - } - return false; -} - /// createPHIsForSplitLoopExit - When a loop exit edge is split, LCSSA form /// may require new PHIs in the new exit block. This function inserts the /// new PHIs, as needed. Preds is a list of preds inside the loop, SplitBB @@ -245,10 +211,9 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>(); LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>(); - ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>(); // If we have nothing to update, just return. - if (DT == 0 && LI == 0 && PI == 0) + if (DT == 0 && LI == 0) return NewBB; // Now update analysis information. Since the only predecessor of NewBB is @@ -401,9 +366,5 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, } } - // Update ProfileInfo if it is around. - if (PI) - PI->splitEdge(TIBB, DestBB, NewBB, MergeIdenticalEdges); - return NewBB; } diff --git a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp index be8d39e..d105f5e 100644 --- a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -78,7 +78,8 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, bool ModuleLevelChanges, SmallVectorImpl<ReturnInst*> &Returns, const char *NameSuffix, ClonedCodeInfo *CodeInfo, - ValueMapTypeRemapper *TypeMapper) { + ValueMapTypeRemapper *TypeMapper, + ValueMaterializer *Materializer) { assert(NameSuffix && "NameSuffix cannot be null!"); #ifndef NDEBUG @@ -147,7 +148,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, for (BasicBlock::iterator II = BB->begin(); II != BB->end(); ++II) RemapInstruction(II, VMap, ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, - TypeMapper); + TypeMapper, Materializer); } /// CloneFunction - Return a copy of the specified function, but without diff --git a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp index f7c659f..6f008644 100644 --- a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -277,8 +277,8 @@ void CodeExtractor::splitReturnBlocks() { DomTreeNode *NewNode = DT->addNewBlock(New, *I); - for (SmallVector<DomTreeNode*, 8>::iterator I = Children.begin(), - E = Children.end(); I != E; ++I) + for (SmallVectorImpl<DomTreeNode *>::iterator I = Children.begin(), + E = Children.end(); I != E; ++I) DT->changeImmediateDominator(*I, NewNode); } } @@ -665,8 +665,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, TheSwitch->setCondition(call); TheSwitch->setDefaultDest(TheSwitch->getSuccessor(NumExitBlocks)); // Remove redundant case - SwitchInst::CaseIt ToBeRemoved(TheSwitch, NumExitBlocks-1); - TheSwitch->removeCase(ToBeRemoved); + TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks-1)); break; } } diff --git a/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp index db525cd..0723b35 100644 --- a/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp +++ b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp @@ -10,6 +10,7 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/Analysis/CFG.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Type.h" diff --git a/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp b/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp new file mode 100644 index 0000000..1da226b --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp @@ -0,0 +1,486 @@ +//===- FlatternCFG.cpp - Code to perform CFG flattening ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Reduce conditional branches in CFG. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "flattencfg" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +using namespace llvm; + +namespace { +class FlattenCFGOpt { + AliasAnalysis *AA; + /// \brief Use parallel-and or parallel-or to generate conditions for + /// conditional branches. + bool FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder, Pass *P = 0); + /// \brief If \param BB is the merge block of an if-region, attempt to merge + /// the if-region with an adjacent if-region upstream if two if-regions + /// contain identical instructions. + bool MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder, Pass *P = 0); + /// \brief Compare a pair of blocks: \p Block1 and \p Block2, which + /// are from two if-regions whose entry blocks are \p Head1 and \p + /// Head2. \returns true if \p Block1 and \p Block2 contain identical + /// instructions, and have no memory reference alias with \p Head2. + /// This is used as a legality check for merging if-regions. + bool CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2, + BasicBlock *Block1, BasicBlock *Block2); + +public: + FlattenCFGOpt(AliasAnalysis *AA) : AA(AA) {} + bool run(BasicBlock *BB); +}; +} + +/// If \param [in] BB has more than one predecessor that is a conditional +/// branch, attempt to use parallel and/or for the branch condition. \returns +/// true on success. +/// +/// Before: +/// ...... +/// %cmp10 = fcmp une float %tmp1, %tmp2 +/// br i1 %cmp1, label %if.then, label %lor.rhs +/// +/// lor.rhs: +/// ...... +/// %cmp11 = fcmp une float %tmp3, %tmp4 +/// br i1 %cmp11, label %if.then, label %ifend +/// +/// if.end: // the merge block +/// ...... +/// +/// if.then: // has two predecessors, both of them contains conditional branch. +/// ...... +/// br label %if.end; +/// +/// After: +/// ...... +/// %cmp10 = fcmp une float %tmp1, %tmp2 +/// ...... +/// %cmp11 = fcmp une float %tmp3, %tmp4 +/// %cmp12 = or i1 %cmp10, %cmp11 // parallel-or mode. +/// br i1 %cmp12, label %if.then, label %ifend +/// +/// if.end: +/// ...... +/// +/// if.then: +/// ...... +/// br label %if.end; +/// +/// Current implementation handles two cases. +/// Case 1: \param BB is on the else-path. +/// +/// BB1 +/// / | +/// BB2 | +/// / \ | +/// BB3 \ | where, BB1, BB2 contain conditional branches. +/// \ | / BB3 contains unconditional branch. +/// \ | / BB4 corresponds to \param BB which is also the merge. +/// BB => BB4 +/// +/// +/// Corresponding source code: +/// +/// if (a == b && c == d) +/// statement; // BB3 +/// +/// Case 2: \param BB BB is on the then-path. +/// +/// BB1 +/// / | +/// | BB2 +/// \ / | where BB1, BB2 contain conditional branches. +/// BB => BB3 | BB3 contains unconditiona branch and corresponds +/// \ / to \param BB. BB4 is the merge. +/// BB4 +/// +/// Corresponding source code: +/// +/// if (a == b || c == d) +/// statement; // BB3 +/// +/// In both cases, \param BB is the common successor of conditional branches. +/// In Case 1, \param BB (BB4) has an unconditional branch (BB3) as +/// its predecessor. In Case 2, \param BB (BB3) only has conditional branches +/// as its predecessors. +/// +bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder, + Pass *P) { + PHINode *PHI = dyn_cast<PHINode>(BB->begin()); + if (PHI) + return false; // For simplicity, avoid cases containing PHI nodes. + + BasicBlock *LastCondBlock = NULL; + BasicBlock *FirstCondBlock = NULL; + BasicBlock *UnCondBlock = NULL; + int Idx = -1; + + // Check predecessors of \param BB. + SmallPtrSet<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB)); + for (SmallPtrSetIterator<BasicBlock *> PI = Preds.begin(), PE = Preds.end(); + PI != PE; ++PI) { + BasicBlock *Pred = *PI; + BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator()); + + // All predecessors should terminate with a branch. + if (!PBI) + return false; + + BasicBlock *PP = Pred->getSinglePredecessor(); + + if (PBI->isUnconditional()) { + // Case 1: Pred (BB3) is an unconditional block, it should + // have a single predecessor (BB2) that is also a predecessor + // of \param BB (BB4) and should not have address-taken. + // There should exist only one such unconditional + // branch among the predecessors. + if (UnCondBlock || !PP || (Preds.count(PP) == 0) || + Pred->hasAddressTaken()) + return false; + + UnCondBlock = Pred; + continue; + } + + // Only conditional branches are allowed beyond this point. + assert(PBI->isConditional()); + + // Condition's unique use should be the branch instruction. + Value *PC = PBI->getCondition(); + if (!PC || !PC->hasOneUse()) + return false; + + if (PP && Preds.count(PP)) { + // These are internal condition blocks to be merged from, e.g., + // BB2 in both cases. + // Should not be address-taken. + if (Pred->hasAddressTaken()) + return false; + + // Instructions in the internal condition blocks should be safe + // to hoist up. + for (BasicBlock::iterator BI = Pred->begin(), BE = PBI; BI != BE;) { + Instruction *CI = BI++; + if (isa<PHINode>(CI) || !isSafeToSpeculativelyExecute(CI)) + return false; + } + } else { + // This is the condition block to be merged into, e.g. BB1 in + // both cases. + if (FirstCondBlock) + return false; + FirstCondBlock = Pred; + } + + // Find whether BB is uniformly on the true (or false) path + // for all of its predecessors. + BasicBlock *PS1 = PBI->getSuccessor(0); + BasicBlock *PS2 = PBI->getSuccessor(1); + BasicBlock *PS = (PS1 == BB) ? PS2 : PS1; + int CIdx = (PS1 == BB) ? 0 : 1; + + if (Idx == -1) + Idx = CIdx; + else if (CIdx != Idx) + return false; + + // PS is the successor which is not BB. Check successors to identify + // the last conditional branch. + if (Preds.count(PS) == 0) { + // Case 2. + LastCondBlock = Pred; + } else { + // Case 1 + BranchInst *BPS = dyn_cast<BranchInst>(PS->getTerminator()); + if (BPS && BPS->isUnconditional()) { + // Case 1: PS(BB3) should be an unconditional branch. + LastCondBlock = Pred; + } + } + } + + if (!FirstCondBlock || !LastCondBlock || (FirstCondBlock == LastCondBlock)) + return false; + + TerminatorInst *TBB = LastCondBlock->getTerminator(); + BasicBlock *PS1 = TBB->getSuccessor(0); + BasicBlock *PS2 = TBB->getSuccessor(1); + BranchInst *PBI1 = dyn_cast<BranchInst>(PS1->getTerminator()); + BranchInst *PBI2 = dyn_cast<BranchInst>(PS2->getTerminator()); + + // If PS1 does not jump into PS2, but PS2 jumps into PS1, + // attempt branch inversion. + if (!PBI1 || !PBI1->isUnconditional() || + (PS1->getTerminator()->getSuccessor(0) != PS2)) { + // Check whether PS2 jumps into PS1. + if (!PBI2 || !PBI2->isUnconditional() || + (PS2->getTerminator()->getSuccessor(0) != PS1)) + return false; + + // Do branch inversion. + BasicBlock *CurrBlock = LastCondBlock; + bool EverChanged = false; + while (1) { + BranchInst *BI = dyn_cast<BranchInst>(CurrBlock->getTerminator()); + CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition()); + CmpInst::Predicate Predicate = CI->getPredicate(); + // Cannonicalize icmp_ne -> icmp_eq, fcmp_one -> fcmp_oeq + if ((Predicate == CmpInst::ICMP_NE) || (Predicate == CmpInst::FCMP_ONE)) { + CI->setPredicate(ICmpInst::getInversePredicate(Predicate)); + BI->swapSuccessors(); + EverChanged = true; + } + if (CurrBlock == FirstCondBlock) + break; + CurrBlock = CurrBlock->getSinglePredecessor(); + } + return EverChanged; + } + + // PS1 must have a conditional branch. + if (!PBI1 || !PBI1->isUnconditional()) + return false; + + // PS2 should not contain PHI node. + PHI = dyn_cast<PHINode>(PS2->begin()); + if (PHI) + return false; + + // Do the transformation. + BasicBlock *CB; + BranchInst *PBI = dyn_cast<BranchInst>(FirstCondBlock->getTerminator()); + bool Iteration = true; + IRBuilder<>::InsertPointGuard Guard(Builder); + Value *PC = PBI->getCondition(); + + do { + CB = PBI->getSuccessor(1 - Idx); + // Delete the conditional branch. + FirstCondBlock->getInstList().pop_back(); + FirstCondBlock->getInstList() + .splice(FirstCondBlock->end(), CB->getInstList()); + PBI = cast<BranchInst>(FirstCondBlock->getTerminator()); + Value *CC = PBI->getCondition(); + // Merge conditions. + Builder.SetInsertPoint(PBI); + Value *NC; + if (Idx == 0) + // Case 2, use parallel or. + NC = Builder.CreateOr(PC, CC); + else + // Case 1, use parallel and. + NC = Builder.CreateAnd(PC, CC); + + PBI->replaceUsesOfWith(CC, NC); + PC = NC; + if (CB == LastCondBlock) + Iteration = false; + // Remove internal conditional branches. + CB->dropAllReferences(); + // make CB unreachable and let downstream to delete the block. + new UnreachableInst(CB->getContext(), CB); + } while (Iteration); + + DEBUG(dbgs() << "Use parallel and/or in:\n" << *FirstCondBlock); + return true; +} + +/// Compare blocks from two if-regions, where \param Head1 is the entry of the +/// 1st if-region. \param Head2 is the entry of the 2nd if-region. \param +/// Block1 is a block in the 1st if-region to compare. \param Block2 is a block +// in the 2nd if-region to compare. \returns true if \param Block1 and \param +/// Block2 have identical instructions and do not have memory reference alias +/// with \param Head2. +/// +bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2, + BasicBlock *Block1, + BasicBlock *Block2) { + TerminatorInst *PTI2 = Head2->getTerminator(); + Instruction *PBI2 = Head2->begin(); + + bool eq1 = (Block1 == Head1); + bool eq2 = (Block2 == Head2); + if (eq1 || eq2) { + // An empty then-path or else-path. + return (eq1 == eq2); + } + + // Check whether instructions in Block1 and Block2 are identical + // and do not alias with instructions in Head2. + BasicBlock::iterator iter1 = Block1->begin(); + BasicBlock::iterator end1 = Block1->getTerminator(); + BasicBlock::iterator iter2 = Block2->begin(); + BasicBlock::iterator end2 = Block2->getTerminator(); + + while (1) { + if (iter1 == end1) { + if (iter2 != end2) + return false; + break; + } + + if (!iter1->isIdenticalTo(iter2)) + return false; + + // Illegal to remove instructions with side effects except + // non-volatile stores. + if (iter1->mayHaveSideEffects()) { + Instruction *CurI = &*iter1; + StoreInst *SI = dyn_cast<StoreInst>(CurI); + if (!SI || SI->isVolatile()) + return false; + } + + // For simplicity and speed, data dependency check can be + // avoided if read from memory doesn't exist. + if (iter1->mayReadFromMemory()) + return false; + + if (iter1->mayWriteToMemory()) { + for (BasicBlock::iterator BI = PBI2, BE = PTI2; BI != BE; ++BI) { + if (BI->mayReadFromMemory() || BI->mayWriteToMemory()) { + // Check alias with Head2. + if (!AA || AA->alias(iter1, BI)) + return false; + } + } + } + ++iter1; + ++iter2; + } + + return true; +} + +/// Check whether \param BB is the merge block of a if-region. If yes, check +/// whether there exists an adjacent if-region upstream, the two if-regions +/// contain identical instructions and can be legally merged. \returns true if +/// the two if-regions are merged. +/// +/// From: +/// if (a) +/// statement; +/// if (b) +/// statement; +/// +/// To: +/// if (a || b) +/// statement; +/// +bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder, + Pass *P) { + BasicBlock *IfTrue2, *IfFalse2; + Value *IfCond2 = GetIfCondition(BB, IfTrue2, IfFalse2); + Instruction *CInst2 = dyn_cast_or_null<Instruction>(IfCond2); + if (!CInst2) + return false; + + BasicBlock *SecondEntryBlock = CInst2->getParent(); + if (SecondEntryBlock->hasAddressTaken()) + return false; + + BasicBlock *IfTrue1, *IfFalse1; + Value *IfCond1 = GetIfCondition(SecondEntryBlock, IfTrue1, IfFalse1); + Instruction *CInst1 = dyn_cast_or_null<Instruction>(IfCond1); + if (!CInst1) + return false; + + BasicBlock *FirstEntryBlock = CInst1->getParent(); + + // Either then-path or else-path should be empty. + if ((IfTrue1 != FirstEntryBlock) && (IfFalse1 != FirstEntryBlock)) + return false; + if ((IfTrue2 != SecondEntryBlock) && (IfFalse2 != SecondEntryBlock)) + return false; + + TerminatorInst *PTI2 = SecondEntryBlock->getTerminator(); + Instruction *PBI2 = SecondEntryBlock->begin(); + + if (!CompareIfRegionBlock(FirstEntryBlock, SecondEntryBlock, IfTrue1, + IfTrue2)) + return false; + + if (!CompareIfRegionBlock(FirstEntryBlock, SecondEntryBlock, IfFalse1, + IfFalse2)) + return false; + + // Check whether \param SecondEntryBlock has side-effect and is safe to + // speculate. + for (BasicBlock::iterator BI = PBI2, BE = PTI2; BI != BE; ++BI) { + Instruction *CI = BI; + if (isa<PHINode>(CI) || CI->mayHaveSideEffects() || + !isSafeToSpeculativelyExecute(CI)) + return false; + } + + // Merge \param SecondEntryBlock into \param FirstEntryBlock. + FirstEntryBlock->getInstList().pop_back(); + FirstEntryBlock->getInstList() + .splice(FirstEntryBlock->end(), SecondEntryBlock->getInstList()); + BranchInst *PBI = dyn_cast<BranchInst>(FirstEntryBlock->getTerminator()); + Value *CC = PBI->getCondition(); + BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); + BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + Builder.SetInsertPoint(PBI); + Value *NC = Builder.CreateOr(CInst1, CC); + PBI->replaceUsesOfWith(CC, NC); + Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt); + + // Remove IfTrue1 + if (IfTrue1 != FirstEntryBlock) { + IfTrue1->dropAllReferences(); + IfTrue1->eraseFromParent(); + } + + // Remove IfFalse1 + if (IfFalse1 != FirstEntryBlock) { + IfFalse1->dropAllReferences(); + IfFalse1->eraseFromParent(); + } + + // Remove \param SecondEntryBlock + SecondEntryBlock->dropAllReferences(); + SecondEntryBlock->eraseFromParent(); + DEBUG(dbgs() << "If conditions merged into:\n" << *FirstEntryBlock); + return true; +} + +bool FlattenCFGOpt::run(BasicBlock *BB) { + bool Changed = false; + assert(BB && BB->getParent() && "Block not embedded in function!"); + assert(BB->getTerminator() && "Degenerate basic block encountered!"); + + IRBuilder<> Builder(BB); + + if (FlattenParallelAndOr(BB, Builder)) + return true; + + if (MergeIfRegion(BB, Builder)) + return true; + + return Changed; +} + +/// FlattenCFG - This function is used to flatten a CFG. For +/// example, it uses parallel-and and parallel-or mode to collapse +// if-conditions and merge if-regions with identical statements. +/// +bool llvm::FlattenCFG(BasicBlock *BB, AliasAnalysis *AA) { + return FlattenCFGOpt(AA).run(BB); +} diff --git a/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp b/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp new file mode 100644 index 0000000..5f0a563 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp @@ -0,0 +1,183 @@ +//===-- GlobalStatus.cpp - Compute status info for globals -----------------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Transforms/Utils/GlobalStatus.h" + +using namespace llvm; + +/// Return the stronger of the two ordering. If the two orderings are acquire +/// and release, then return AcquireRelease. +/// +static AtomicOrdering strongerOrdering(AtomicOrdering X, AtomicOrdering Y) { + if (X == Acquire && Y == Release) + return AcquireRelease; + if (Y == Acquire && X == Release) + return AcquireRelease; + return (AtomicOrdering)std::max(X, Y); +} + +/// It is safe to destroy a constant iff it is only used by constants itself. +/// Note that constants cannot be cyclic, so this test is pretty easy to +/// implement recursively. +/// +bool llvm::isSafeToDestroyConstant(const Constant *C) { + if (isa<GlobalValue>(C)) + return false; + + for (Value::const_use_iterator UI = C->use_begin(), E = C->use_end(); UI != E; + ++UI) + if (const Constant *CU = dyn_cast<Constant>(*UI)) { + if (!isSafeToDestroyConstant(CU)) + return false; + } else + return false; + return true; +} + +static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS, + SmallPtrSet<const PHINode *, 16> &PhiUsers) { + for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; + ++UI) { + const User *U = *UI; + if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) { + GS.HasNonInstructionUser = true; + + // If the result of the constantexpr isn't pointer type, then we won't + // know to expect it in various places. Just reject early. + if (!isa<PointerType>(CE->getType())) + return true; + + if (analyzeGlobalAux(CE, GS, PhiUsers)) + return true; + } else if (const Instruction *I = dyn_cast<Instruction>(U)) { + if (!GS.HasMultipleAccessingFunctions) { + const Function *F = I->getParent()->getParent(); + if (GS.AccessingFunction == 0) + GS.AccessingFunction = F; + else if (GS.AccessingFunction != F) + GS.HasMultipleAccessingFunctions = true; + } + if (const LoadInst *LI = dyn_cast<LoadInst>(I)) { + GS.IsLoaded = true; + // Don't hack on volatile loads. + if (LI->isVolatile()) + return true; + GS.Ordering = strongerOrdering(GS.Ordering, LI->getOrdering()); + } else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) { + // Don't allow a store OF the address, only stores TO the address. + if (SI->getOperand(0) == V) + return true; + + // Don't hack on volatile stores. + if (SI->isVolatile()) + return true; + + GS.Ordering = strongerOrdering(GS.Ordering, SI->getOrdering()); + + // If this is a direct store to the global (i.e., the global is a scalar + // value, not an aggregate), keep more specific information about + // stores. + if (GS.StoredType != GlobalStatus::Stored) { + if (const GlobalVariable *GV = + dyn_cast<GlobalVariable>(SI->getOperand(1))) { + Value *StoredVal = SI->getOperand(0); + + if (Constant *C = dyn_cast<Constant>(StoredVal)) { + if (C->isThreadDependent()) { + // The stored value changes between threads; don't track it. + return true; + } + } + + if (StoredVal == GV->getInitializer()) { + if (GS.StoredType < GlobalStatus::InitializerStored) + GS.StoredType = GlobalStatus::InitializerStored; + } else if (isa<LoadInst>(StoredVal) && + cast<LoadInst>(StoredVal)->getOperand(0) == GV) { + if (GS.StoredType < GlobalStatus::InitializerStored) + GS.StoredType = GlobalStatus::InitializerStored; + } else if (GS.StoredType < GlobalStatus::StoredOnce) { + GS.StoredType = GlobalStatus::StoredOnce; + GS.StoredOnceValue = StoredVal; + } else if (GS.StoredType == GlobalStatus::StoredOnce && + GS.StoredOnceValue == StoredVal) { + // noop. + } else { + GS.StoredType = GlobalStatus::Stored; + } + } else { + GS.StoredType = GlobalStatus::Stored; + } + } + } else if (isa<BitCastInst>(I)) { + if (analyzeGlobalAux(I, GS, PhiUsers)) + return true; + } else if (isa<GetElementPtrInst>(I)) { + if (analyzeGlobalAux(I, GS, PhiUsers)) + return true; + } else if (isa<SelectInst>(I)) { + if (analyzeGlobalAux(I, GS, PhiUsers)) + return true; + } else if (const PHINode *PN = dyn_cast<PHINode>(I)) { + // PHI nodes we can check just like select or GEP instructions, but we + // have to be careful about infinite recursion. + if (PhiUsers.insert(PN)) // Not already visited. + if (analyzeGlobalAux(I, GS, PhiUsers)) + return true; + } else if (isa<CmpInst>(I)) { + GS.IsCompared = true; + } else if (const MemTransferInst *MTI = dyn_cast<MemTransferInst>(I)) { + if (MTI->isVolatile()) + return true; + if (MTI->getArgOperand(0) == V) + GS.StoredType = GlobalStatus::Stored; + if (MTI->getArgOperand(1) == V) + GS.IsLoaded = true; + } else if (const MemSetInst *MSI = dyn_cast<MemSetInst>(I)) { + assert(MSI->getArgOperand(0) == V && "Memset only takes one pointer!"); + if (MSI->isVolatile()) + return true; + GS.StoredType = GlobalStatus::Stored; + } else if (ImmutableCallSite C = I) { + if (!C.isCallee(UI)) + return true; + GS.IsLoaded = true; + } else { + return true; // Any other non-load instruction might take address! + } + } else if (const Constant *C = dyn_cast<Constant>(U)) { + GS.HasNonInstructionUser = true; + // We might have a dead and dangling constant hanging off of here. + if (!isSafeToDestroyConstant(C)) + return true; + } else { + GS.HasNonInstructionUser = true; + // Otherwise must be some other user. + return true; + } + } + + return false; +} + +bool GlobalStatus::analyzeGlobal(const Value *V, GlobalStatus &GS) { + SmallPtrSet<const PHINode *, 16> PhiUsers; + return analyzeGlobalAux(V, GS, PhiUsers); +} + +GlobalStatus::GlobalStatus() + : IsCompared(false), IsLoaded(false), StoredType(NotStored), + StoredOnceValue(0), AccessingFunction(0), + HasMultipleAccessingFunctions(false), HasNonInstructionUser(false), + Ordering(NotAtomic) {} diff --git a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp index dabb67b9..d021bce 100644 --- a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -193,7 +193,8 @@ static bool HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, CallInst *CI = dyn_cast<CallInst>(I); // If this call cannot unwind, don't convert it to an invoke. - if (!CI || CI->doesNotThrow()) + // Inline asm calls cannot throw. + if (!CI || CI->doesNotThrow() || isa<InlineAsm>(CI->getCalledValue())) continue; // Convert this function call into an invoke instruction. First, split the diff --git a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp index 2d1b166..f15e8d5 100644 --- a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp @@ -55,7 +55,6 @@ namespace { DominatorTree *DT; LoopInfo *LI; ScalarEvolution *SE; - std::vector<BasicBlock*> LoopBlocks; PredIteratorCache PredCache; Loop *L; @@ -82,11 +81,6 @@ namespace { // Check the special guarantees that LCSSA makes. assert(L->isLCSSAForm(*DT) && "LCSSA form not preserved!"); } - - /// inLoop - returns true if the given block is within the current loop - bool inLoop(BasicBlock *B) const { - return std::binary_search(LoopBlocks.begin(), LoopBlocks.end(), B); - } }; } @@ -129,11 +123,6 @@ bool LCSSA::runOnLoop(Loop *TheLoop, LPPassManager &LPM) { if (ExitBlocks.empty()) return false; - // Speed up queries by creating a sorted vector of blocks. - LoopBlocks.clear(); - LoopBlocks.insert(LoopBlocks.end(), L->block_begin(), L->block_end()); - array_pod_sort(LoopBlocks.begin(), LoopBlocks.end()); - // Look at all the instructions in the loop, checking to see if they have uses // outside the loop. If so, rewrite those uses. bool MadeChange = false; @@ -198,7 +187,7 @@ bool LCSSA::ProcessInstruction(Instruction *Inst, if (PHINode *PN = dyn_cast<PHINode>(U)) UserBB = PN->getIncomingBlock(UI); - if (InstBB != UserBB && !inLoop(UserBB)) + if (InstBB != UserBB && !L->contains(UserBB)) UsesToRewrite.push_back(&UI.getUse()); } @@ -244,7 +233,7 @@ bool LCSSA::ProcessInstruction(Instruction *Inst, // If the exit block has a predecessor not within the loop, arrange for // the incoming value use corresponding to that predecessor to be // rewritten in terms of a different LCSSA PHI. - if (!inLoop(*PI)) + if (!L->contains(*PI)) UsesToRewrite.push_back( &PN->getOperandUse( PN->getOperandNumForIncomingValue(PN->getNumIncomingValues()-1))); diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp index 12e5b3e..2768041 100644 --- a/contrib/llvm/lib/Transforms/Utils/Local.cpp +++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp @@ -16,10 +16,10 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" -#include "llvm/Analysis/ProfileInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/DIBuilder.h" #include "llvm/DebugInfo.h" @@ -43,6 +43,8 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +STATISTIC(NumRemoved, "Number of unreachable basic blocks removed"); + //===----------------------------------------------------------------------===// // Local constant propagation. // @@ -84,7 +86,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, BI->eraseFromParent(); return true; } - + if (Dest2 == Dest1) { // Conditional branch to same location? // This branch matches something like this: // br bool %cond, label %Dest, label %Dest @@ -104,7 +106,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, } return false; } - + if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) { // If we are switching on a constant, we can convert the switch into a // single branch instruction! @@ -188,38 +190,33 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI); return true; } - + if (SI->getNumCases() == 1) { // Otherwise, we can fold this switch into a conditional branch // instruction if it has only one non-default destination. SwitchInst::CaseIt FirstCase = SI->case_begin(); - IntegersSubset& Case = FirstCase.getCaseValueEx(); - if (Case.isSingleNumber()) { - // FIXME: Currently work with ConstantInt based numbers. - Value *Cond = Builder.CreateICmpEQ(SI->getCondition(), - Case.getSingleNumber(0).toConstantInt(), - "cond"); - - // Insert the new branch. - BranchInst *NewBr = Builder.CreateCondBr(Cond, - FirstCase.getCaseSuccessor(), - SI->getDefaultDest()); - MDNode* MD = SI->getMetadata(LLVMContext::MD_prof); - if (MD && MD->getNumOperands() == 3) { - ConstantInt *SICase = dyn_cast<ConstantInt>(MD->getOperand(2)); - ConstantInt *SIDef = dyn_cast<ConstantInt>(MD->getOperand(1)); - assert(SICase && SIDef); - // The TrueWeight should be the weight for the single case of SI. - NewBr->setMetadata(LLVMContext::MD_prof, - MDBuilder(BB->getContext()). - createBranchWeights(SICase->getValue().getZExtValue(), - SIDef->getValue().getZExtValue())); - } + Value *Cond = Builder.CreateICmpEQ(SI->getCondition(), + FirstCase.getCaseValue(), "cond"); - // Delete the old switch. - SI->eraseFromParent(); - return true; + // Insert the new branch. + BranchInst *NewBr = Builder.CreateCondBr(Cond, + FirstCase.getCaseSuccessor(), + SI->getDefaultDest()); + MDNode* MD = SI->getMetadata(LLVMContext::MD_prof); + if (MD && MD->getNumOperands() == 3) { + ConstantInt *SICase = dyn_cast<ConstantInt>(MD->getOperand(2)); + ConstantInt *SIDef = dyn_cast<ConstantInt>(MD->getOperand(1)); + assert(SICase && SIDef); + // The TrueWeight should be the weight for the single case of SI. + NewBr->setMetadata(LLVMContext::MD_prof, + MDBuilder(BB->getContext()). + createBranchWeights(SICase->getValue().getZExtValue(), + SIDef->getValue().getZExtValue())); } + + // Delete the old switch. + SI->eraseFromParent(); + return true; } return false; } @@ -231,7 +228,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, BasicBlock *TheOnlyDest = BA->getBasicBlock(); // Insert the new branch. Builder.CreateBr(TheOnlyDest); - + for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) { if (IBI->getDestination(i) == TheOnlyDest) TheOnlyDest = 0; @@ -242,7 +239,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, IBI->eraseFromParent(); if (DeleteDeadConditions) RecursivelyDeleteTriviallyDeadInstructions(Address, TLI); - + // If we didn't find our destination in the IBI successor list, then we // have undefined behavior. Replace the unconditional branch with an // 'unreachable' instruction. @@ -250,11 +247,11 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, BB->getTerminator()->eraseFromParent(); new UnreachableInst(BB->getContext(), BB); } - + return true; } } - + return false; } @@ -321,10 +318,10 @@ llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V, Instruction *I = dyn_cast<Instruction>(V); if (!I || !I->use_empty() || !isInstructionTriviallyDead(I, TLI)) return false; - + SmallVector<Instruction*, 16> DeadInsts; DeadInsts.push_back(I); - + do { I = DeadInsts.pop_back_val(); @@ -333,9 +330,9 @@ llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V, for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { Value *OpV = I->getOperand(i); I->setOperand(i, 0); - + if (!OpV->use_empty()) continue; - + // If the operand is an instruction that became dead as we nulled out the // operand, and if it is 'trivially' dead, delete it in a future loop // iteration. @@ -343,7 +340,7 @@ llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V, if (isInstructionTriviallyDead(OpI, TLI)) DeadInsts.push_back(OpI); } - + I->eraseFromParent(); } while (!DeadInsts.empty()); @@ -415,7 +412,7 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const DataLayout *TD, Instruction *Inst = BI++; WeakVH BIHandle(BI); - if (recursivelySimplifyInstruction(Inst, TD)) { + if (recursivelySimplifyInstruction(Inst, TD, TLI)) { MadeChange = true; if (BIHandle != BI) BI = BB->begin(); @@ -450,12 +447,12 @@ void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred, // This only adjusts blocks with PHI nodes. if (!isa<PHINode>(BB->begin())) return; - + // Remove the entries for Pred from the PHI nodes in BB, but do not simplify // them down. This will leave us with single entry phi nodes and other phis // that can be removed. BB->removePredecessor(Pred, true); - + WeakVH PhiIt = &BB->front(); while (PHINode *PN = dyn_cast<PHINode>(PhiIt)) { PhiIt = &*++BasicBlock::iterator(cast<Instruction>(PhiIt)); @@ -486,10 +483,10 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) { PN->replaceAllUsesWith(NewVal); PN->eraseFromParent(); } - + BasicBlock *PredBB = DestBB->getSinglePredecessor(); assert(PredBB && "Block doesn't have a single predecessor!"); - + // Zap anything that took the address of DestBB. Not doing this will give the // address an invalid value. if (DestBB->hasAddressTaken()) { @@ -500,10 +497,10 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) { BA->getType())); BA->destroyConstant(); } - + // Anything that branched to PredBB now branches to DestBB. PredBB->replaceAllUsesWith(DestBB); - + // Splice all the instructions from PredBB to DestBB. PredBB->getTerminator()->eraseFromParent(); DestBB->getInstList().splice(DestBB->begin(), PredBB->getInstList()); @@ -515,25 +512,27 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) { DT->changeImmediateDominator(DestBB, PredBBIDom); DT->eraseNode(PredBB); } - ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>(); - if (PI) { - PI->replaceAllUses(PredBB, DestBB); - PI->removeEdge(ProfileInfo::getEdge(PredBB, DestBB)); - } } // Nuke BB. PredBB->eraseFromParent(); } +/// CanMergeValues - Return true if we can choose one of these values to use +/// in place of the other. Note that we will always choose the non-undef +/// value to keep. +static bool CanMergeValues(Value *First, Value *Second) { + return First == Second || isa<UndefValue>(First) || isa<UndefValue>(Second); +} + /// CanPropagatePredecessorsForPHIs - Return true if we can fold BB, an -/// almost-empty BB ending in an unconditional branch to Succ, into succ. +/// almost-empty BB ending in an unconditional branch to Succ, into Succ. /// /// Assumption: Succ is the single successor for BB. /// static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) { assert(*succ_begin(BB) == Succ && "Succ is not successor of BB!"); - DEBUG(dbgs() << "Looking to fold " << BB->getName() << " into " + DEBUG(dbgs() << "Looking to fold " << BB->getName() << " into " << Succ->getName() << "\n"); // Shortcut, if there is only a single predecessor it must be BB and merging // is always safe @@ -555,9 +554,10 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) { for (unsigned PI = 0, PE = PN->getNumIncomingValues(); PI != PE; ++PI) { BasicBlock *IBB = PN->getIncomingBlock(PI); if (BBPreds.count(IBB) && - BBPN->getIncomingValueForBlock(IBB) != PN->getIncomingValue(PI)) { - DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in " - << Succ->getName() << " is conflicting with " + !CanMergeValues(BBPN->getIncomingValueForBlock(IBB), + PN->getIncomingValue(PI))) { + DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in " + << Succ->getName() << " is conflicting with " << BBPN->getName() << " with regard to common predecessor " << IBB->getName() << "\n"); return false; @@ -570,8 +570,9 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) { // one for BB, in which case this phi node will not prevent the merging // of the block. BasicBlock *IBB = PN->getIncomingBlock(PI); - if (BBPreds.count(IBB) && Val != PN->getIncomingValue(PI)) { - DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in " + if (BBPreds.count(IBB) && + !CanMergeValues(Val, PN->getIncomingValue(PI))) { + DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in " << Succ->getName() << " is conflicting with regard to common " << "predecessor " << IBB->getName() << "\n"); return false; @@ -583,6 +584,139 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) { return true; } +typedef SmallVector<BasicBlock *, 16> PredBlockVector; +typedef DenseMap<BasicBlock *, Value *> IncomingValueMap; + +/// \brief Determines the value to use as the phi node input for a block. +/// +/// Select between \p OldVal any value that we know flows from \p BB +/// to a particular phi on the basis of which one (if either) is not +/// undef. Update IncomingValues based on the selected value. +/// +/// \param OldVal The value we are considering selecting. +/// \param BB The block that the value flows in from. +/// \param IncomingValues A map from block-to-value for other phi inputs +/// that we have examined. +/// +/// \returns the selected value. +static Value *selectIncomingValueForBlock(Value *OldVal, BasicBlock *BB, + IncomingValueMap &IncomingValues) { + if (!isa<UndefValue>(OldVal)) { + assert((!IncomingValues.count(BB) || + IncomingValues.find(BB)->second == OldVal) && + "Expected OldVal to match incoming value from BB!"); + + IncomingValues.insert(std::make_pair(BB, OldVal)); + return OldVal; + } + + IncomingValueMap::const_iterator It = IncomingValues.find(BB); + if (It != IncomingValues.end()) return It->second; + + return OldVal; +} + +/// \brief Create a map from block to value for the operands of a +/// given phi. +/// +/// Create a map from block to value for each non-undef value flowing +/// into \p PN. +/// +/// \param PN The phi we are collecting the map for. +/// \param IncomingValues [out] The map from block to value for this phi. +static void gatherIncomingValuesToPhi(PHINode *PN, + IncomingValueMap &IncomingValues) { + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + BasicBlock *BB = PN->getIncomingBlock(i); + Value *V = PN->getIncomingValue(i); + + if (!isa<UndefValue>(V)) + IncomingValues.insert(std::make_pair(BB, V)); + } +} + +/// \brief Replace the incoming undef values to a phi with the values +/// from a block-to-value map. +/// +/// \param PN The phi we are replacing the undefs in. +/// \param IncomingValues A map from block to value. +static void replaceUndefValuesInPhi(PHINode *PN, + const IncomingValueMap &IncomingValues) { + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *V = PN->getIncomingValue(i); + + if (!isa<UndefValue>(V)) continue; + + BasicBlock *BB = PN->getIncomingBlock(i); + IncomingValueMap::const_iterator It = IncomingValues.find(BB); + if (It == IncomingValues.end()) continue; + + PN->setIncomingValue(i, It->second); + } +} + +/// \brief Replace a value flowing from a block to a phi with +/// potentially multiple instances of that value flowing from the +/// block's predecessors to the phi. +/// +/// \param BB The block with the value flowing into the phi. +/// \param BBPreds The predecessors of BB. +/// \param PN The phi that we are updating. +static void redirectValuesFromPredecessorsToPhi(BasicBlock *BB, + const PredBlockVector &BBPreds, + PHINode *PN) { + Value *OldVal = PN->removeIncomingValue(BB, false); + assert(OldVal && "No entry in PHI for Pred BB!"); + + IncomingValueMap IncomingValues; + + // We are merging two blocks - BB, and the block containing PN - and + // as a result we need to redirect edges from the predecessors of BB + // to go to the block containing PN, and update PN + // accordingly. Since we allow merging blocks in the case where the + // predecessor and successor blocks both share some predecessors, + // and where some of those common predecessors might have undef + // values flowing into PN, we want to rewrite those values to be + // consistent with the non-undef values. + + gatherIncomingValuesToPhi(PN, IncomingValues); + + // If this incoming value is one of the PHI nodes in BB, the new entries + // in the PHI node are the entries from the old PHI. + if (isa<PHINode>(OldVal) && cast<PHINode>(OldVal)->getParent() == BB) { + PHINode *OldValPN = cast<PHINode>(OldVal); + for (unsigned i = 0, e = OldValPN->getNumIncomingValues(); i != e; ++i) { + // Note that, since we are merging phi nodes and BB and Succ might + // have common predecessors, we could end up with a phi node with + // identical incoming branches. This will be cleaned up later (and + // will trigger asserts if we try to clean it up now, without also + // simplifying the corresponding conditional branch). + BasicBlock *PredBB = OldValPN->getIncomingBlock(i); + Value *PredVal = OldValPN->getIncomingValue(i); + Value *Selected = selectIncomingValueForBlock(PredVal, PredBB, + IncomingValues); + + // And add a new incoming value for this predecessor for the + // newly retargeted branch. + PN->addIncoming(Selected, PredBB); + } + } else { + for (unsigned i = 0, e = BBPreds.size(); i != e; ++i) { + // Update existing incoming values in PN for this + // predecessor of BB. + BasicBlock *PredBB = BBPreds[i]; + Value *Selected = selectIncomingValueForBlock(OldVal, PredBB, + IncomingValues); + + // And add a new incoming value for this predecessor for the + // newly retargeted branch. + PN->addIncoming(Selected, PredBB); + } + } + + replaceUndefValuesInPhi(PN, IncomingValues); +} + /// TryToSimplifyUncondBranchFromEmptyBlock - BB is known to contain an /// unconditional branch, and contains no instructions other than PHI nodes, /// potential side-effect free intrinsics and the branch. If possible, @@ -595,7 +729,7 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) { // We can't eliminate infinite loops. BasicBlock *Succ = cast<BranchInst>(BB->getTerminator())->getSuccessor(0); if (BB == Succ) return false; - + // Check to see if merging these blocks would cause conflicts for any of the // phi nodes in BB or Succ. If not, we can safely merge. if (!CanPropagatePredecessorsForPHIs(BB, Succ)) return false; @@ -629,39 +763,21 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) { } DEBUG(dbgs() << "Killing Trivial BB: \n" << *BB); - + if (isa<PHINode>(Succ->begin())) { // If there is more than one pred of succ, and there are PHI nodes in // the successor, then we need to add incoming edges for the PHI nodes // - const SmallVector<BasicBlock*, 16> BBPreds(pred_begin(BB), pred_end(BB)); - + const PredBlockVector BBPreds(pred_begin(BB), pred_end(BB)); + // Loop over all of the PHI nodes in the successor of BB. for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) { PHINode *PN = cast<PHINode>(I); - Value *OldVal = PN->removeIncomingValue(BB, false); - assert(OldVal && "No entry in PHI for Pred BB!"); - - // If this incoming value is one of the PHI nodes in BB, the new entries - // in the PHI node are the entries from the old PHI. - if (isa<PHINode>(OldVal) && cast<PHINode>(OldVal)->getParent() == BB) { - PHINode *OldValPN = cast<PHINode>(OldVal); - for (unsigned i = 0, e = OldValPN->getNumIncomingValues(); i != e; ++i) - // Note that, since we are merging phi nodes and BB and Succ might - // have common predecessors, we could end up with a phi node with - // identical incoming branches. This will be cleaned up later (and - // will trigger asserts if we try to clean it up now, without also - // simplifying the corresponding conditional branch). - PN->addIncoming(OldValPN->getIncomingValue(i), - OldValPN->getIncomingBlock(i)); - } else { - // Add an incoming value for each of the new incoming values. - for (unsigned i = 0, e = BBPreds.size(); i != e; ++i) - PN->addIncoming(OldVal, BBPreds[i]); - } + + redirectValuesFromPredecessorsToPhi(BB, BBPreds, PN); } } - + if (Succ->getSinglePredecessor()) { // BB is the only predecessor of Succ, so Succ will end up with exactly // the same predecessors BB had. @@ -676,7 +792,7 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) { PN->eraseFromParent(); } } - + // Everything that jumped to BB now goes to Succ. BB->replaceAllUsesWith(Succ); if (!Succ->hasName()) Succ->takeName(BB); @@ -784,7 +900,7 @@ static unsigned enforceKnownAlignment(Value *V, unsigned Align, // the final program then it is impossible for us to reliably enforce the // preferred alignment. if (GV->isWeakForLinker()) return Align; - + if (GV->getAlignment() >= PrefAlign) return GV->getAlignment(); // We can only increase the alignment of the global if it has no alignment @@ -804,26 +920,27 @@ static unsigned enforceKnownAlignment(Value *V, unsigned Align, /// and it is more than the alignment of the ultimate object, see if we can /// increase the alignment of the ultimate object, making this check succeed. unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, - const DataLayout *TD) { + const DataLayout *DL) { assert(V->getType()->isPointerTy() && "getOrEnforceKnownAlignment expects a pointer!"); - unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 64; + unsigned BitWidth = DL ? DL->getPointerTypeSizeInBits(V->getType()) : 64; + APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - ComputeMaskedBits(V, KnownZero, KnownOne, TD); + ComputeMaskedBits(V, KnownZero, KnownOne, DL); unsigned TrailZ = KnownZero.countTrailingOnes(); - - // Avoid trouble with rediculously large TrailZ values, such as + + // Avoid trouble with ridiculously large TrailZ values, such as // those computed from a null pointer. TrailZ = std::min(TrailZ, unsigned(sizeof(unsigned) * CHAR_BIT - 1)); - + unsigned Align = 1u << std::min(BitWidth - 1, TrailZ); - + // LLVM doesn't support alignments larger than this currently. Align = std::min(Align, +Value::MaximumAlignment); - + if (PrefAlign > Align) - Align = enforceKnownAlignment(V, Align, PrefAlign, TD); - + Align = enforceKnownAlignment(V, Align, PrefAlign, DL); + // We don't need to make any adjustment. return Align; } @@ -854,7 +971,9 @@ static bool LdStHasDebugValue(DIVariable &DIVar, Instruction *I) { bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, StoreInst *SI, DIBuilder &Builder) { DIVariable DIVar(DDI->getVariable()); - if (!DIVar.Verify()) + assert((!DIVar || DIVar.isVariable()) && + "Variable in DbgDeclareInst should be either null or a DIVariable."); + if (!DIVar) return false; if (LdStHasDebugValue(DIVar, SI)) @@ -888,16 +1007,18 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, LoadInst *LI, DIBuilder &Builder) { DIVariable DIVar(DDI->getVariable()); - if (!DIVar.Verify()) + assert((!DIVar || DIVar.isVariable()) && + "Variable in DbgDeclareInst should be either null or a DIVariable."); + if (!DIVar) return false; if (LdStHasDebugValue(DIVar, LI)) return true; - Instruction *DbgVal = + Instruction *DbgVal = Builder.insertDbgValueIntrinsic(LI->getOperand(0), 0, DIVar, LI); - + // Propagate any debug metadata from the store onto the dbg.value. DebugLoc LIDL = LI->getDebugLoc(); if (!LIDL.isUnknown()) @@ -921,10 +1042,14 @@ bool llvm::LowerDbgDeclare(Function &F) { if (Dbgs.empty()) return false; - for (SmallVector<DbgDeclareInst *, 4>::iterator I = Dbgs.begin(), + for (SmallVectorImpl<DbgDeclareInst *>::iterator I = Dbgs.begin(), E = Dbgs.end(); I != E; ++I) { DbgDeclareInst *DDI = *I; - if (AllocaInst *AI = dyn_cast_or_null<AllocaInst>(DDI->getAddress())) { + AllocaInst *AI = dyn_cast_or_null<AllocaInst>(DDI->getAddress()); + // If this is an alloca for a scalar variable, insert a dbg.value + // at each load and store to the alloca and erase the dbg.declare. + if (AI && !AI->isArrayAllocation()) { + // We only remove the dbg.declare intrinsic if all uses are // converted to dbg.value intrinsics. bool RemoveDDI = true; @@ -961,7 +1086,9 @@ bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress, if (!DDI) return false; DIVariable DIVar(DDI->getVariable()); - if (!DIVar.Verify()) + assert((!DIVar || DIVar.isVariable()) && + "Variable in DbgDeclareInst should be either null or a DIVariable."); + if (!DIVar) return false; // Create a copy of the original DIDescriptor for user variable, appending @@ -990,33 +1117,153 @@ bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress, return true; } -bool llvm::removeUnreachableBlocks(Function &F) { - SmallPtrSet<BasicBlock*, 16> Reachable; +/// changeToUnreachable - Insert an unreachable instruction before the specified +/// instruction, making it and the rest of the code in the block dead. +static void changeToUnreachable(Instruction *I, bool UseLLVMTrap) { + BasicBlock *BB = I->getParent(); + // Loop over all of the successors, removing BB's entry from any PHI + // nodes. + for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) + (*SI)->removePredecessor(BB); + + // Insert a call to llvm.trap right before this. This turns the undefined + // behavior into a hard fail instead of falling through into random code. + if (UseLLVMTrap) { + Function *TrapFn = + Intrinsic::getDeclaration(BB->getParent()->getParent(), Intrinsic::trap); + CallInst *CallTrap = CallInst::Create(TrapFn, "", I); + CallTrap->setDebugLoc(I->getDebugLoc()); + } + new UnreachableInst(I->getContext(), I); + + // All instructions after this are dead. + BasicBlock::iterator BBI = I, BBE = BB->end(); + while (BBI != BBE) { + if (!BBI->use_empty()) + BBI->replaceAllUsesWith(UndefValue::get(BBI->getType())); + BB->getInstList().erase(BBI++); + } +} + +/// changeToCall - Convert the specified invoke into a normal call. +static void changeToCall(InvokeInst *II) { + SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3); + CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, "", II); + NewCall->takeName(II); + NewCall->setCallingConv(II->getCallingConv()); + NewCall->setAttributes(II->getAttributes()); + NewCall->setDebugLoc(II->getDebugLoc()); + II->replaceAllUsesWith(NewCall); + + // Follow the call by a branch to the normal destination. + BranchInst::Create(II->getNormalDest(), II); + + // Update PHI nodes in the unwind destination + II->getUnwindDest()->removePredecessor(II->getParent()); + II->eraseFromParent(); +} + +static bool markAliveBlocks(BasicBlock *BB, + SmallPtrSet<BasicBlock*, 128> &Reachable) { + SmallVector<BasicBlock*, 128> Worklist; - Worklist.push_back(&F.getEntryBlock()); - Reachable.insert(&F.getEntryBlock()); + Worklist.push_back(BB); + Reachable.insert(BB); + bool Changed = false; do { - BasicBlock *BB = Worklist.pop_back_val(); + BB = Worklist.pop_back_val(); + + // Do a quick scan of the basic block, turning any obviously unreachable + // instructions into LLVM unreachable insts. The instruction combining pass + // canonicalizes unreachable insts into stores to null or undef. + for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;++BBI){ + if (CallInst *CI = dyn_cast<CallInst>(BBI)) { + if (CI->doesNotReturn()) { + // If we found a call to a no-return function, insert an unreachable + // instruction after it. Make sure there isn't *already* one there + // though. + ++BBI; + if (!isa<UnreachableInst>(BBI)) { + // Don't insert a call to llvm.trap right before the unreachable. + changeToUnreachable(BBI, false); + Changed = true; + } + break; + } + } + + // Store to undef and store to null are undefined and used to signal that + // they should be changed to unreachable by passes that can't modify the + // CFG. + if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) { + // Don't touch volatile stores. + if (SI->isVolatile()) continue; + + Value *Ptr = SI->getOperand(1); + + if (isa<UndefValue>(Ptr) || + (isa<ConstantPointerNull>(Ptr) && + SI->getPointerAddressSpace() == 0)) { + changeToUnreachable(SI, true); + Changed = true; + break; + } + } + } + + // Turn invokes that call 'nounwind' functions into ordinary calls. + if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) { + Value *Callee = II->getCalledValue(); + if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) { + changeToUnreachable(II, true); + Changed = true; + } else if (II->doesNotThrow()) { + if (II->use_empty() && II->onlyReadsMemory()) { + // jump to the normal destination branch. + BranchInst::Create(II->getNormalDest(), II); + II->getUnwindDest()->removePredecessor(II->getParent()); + II->eraseFromParent(); + } else + changeToCall(II); + Changed = true; + } + } + + Changed |= ConstantFoldTerminator(BB, true); for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) if (Reachable.insert(*SI)) Worklist.push_back(*SI); } while (!Worklist.empty()); + return Changed; +} + +/// removeUnreachableBlocksFromFn - Remove blocks that are not reachable, even +/// if they are in a dead cycle. Return true if a change was made, false +/// otherwise. +bool llvm::removeUnreachableBlocks(Function &F) { + SmallPtrSet<BasicBlock*, 128> Reachable; + bool Changed = markAliveBlocks(F.begin(), Reachable); + // If there are unreachable blocks in the CFG... if (Reachable.size() == F.size()) - return false; + return Changed; assert(Reachable.size() < F.size()); - for (Function::iterator I = llvm::next(F.begin()), E = F.end(); I != E; ++I) { - if (Reachable.count(I)) + NumRemoved += F.size()-Reachable.size(); + + // Loop over all of the basic blocks that are not reachable, dropping all of + // their internal references... + for (Function::iterator BB = ++F.begin(), E = F.end(); BB != E; ++BB) { + if (Reachable.count(BB)) continue; - for (succ_iterator SI = succ_begin(I), SE = succ_end(I); SI != SE; ++SI) + for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) if (Reachable.count(*SI)) - (*SI)->removePredecessor(I); - I->dropAllReferences(); + (*SI)->removePredecessor(BB); + BB->dropAllReferences(); } - for (Function::iterator I = llvm::next(F.begin()), E=F.end(); I != E;) + for (Function::iterator I = ++F.begin(); I != F.end();) if (!Reachable.count(I)) I = F.getBasicBlockList().erase(I); else diff --git a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp index 37819cc..6d5f16c 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp @@ -59,6 +59,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/LoopUtils.h" using namespace llvm; STATISTIC(NumInserted, "Number of pre-header or exit blocks inserted"); @@ -100,16 +101,16 @@ namespace { private: bool ProcessLoop(Loop *L, LPPassManager &LPM); BasicBlock *RewriteLoopExitBlock(Loop *L, BasicBlock *Exit); - BasicBlock *InsertPreheaderForLoop(Loop *L); Loop *SeparateNestedLoop(Loop *L, LPPassManager &LPM, BasicBlock *Preheader); BasicBlock *InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader); - void PlaceSplitBlockCarefully(BasicBlock *NewBB, - SmallVectorImpl<BasicBlock*> &SplitPreds, - Loop *L); }; } +static void PlaceSplitBlockCarefully(BasicBlock *NewBB, + SmallVectorImpl<BasicBlock*> &SplitPreds, + Loop *L); + char LoopSimplify::ID = 0; INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify", "Canonicalize natural loops", true, false) @@ -208,7 +209,7 @@ ReprocessLoop: // Does the loop already have a preheader? If so, don't insert one. BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) { - Preheader = InsertPreheaderForLoop(L); + Preheader = InsertPreheaderForLoop(L, this); if (Preheader) { ++NumInserted; Changed = true; @@ -367,7 +368,7 @@ ReprocessLoop: /// preheader, this method is called to insert one. This method has two phases: /// preheader insertion and analysis updating. /// -BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) { +BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, Pass *PP) { BasicBlock *Header = L->getHeader(); // Compute the set of predecessors of the loop that are not in the loop. @@ -390,11 +391,11 @@ BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) { BasicBlock *PreheaderBB; if (!Header->isLandingPad()) { PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader", - this); + PP); } else { SmallVector<BasicBlock*, 2> NewBBs; SplitLandingPadPredecessors(Header, OutsideBlocks, ".preheader", - ".split-lp", this, NewBBs); + ".split-lp", PP, NewBBs); PreheaderBB = NewBBs[0]; } @@ -491,9 +492,9 @@ static PHINode *FindPHIToPartitionLoops(Loop *L, DominatorTree *DT, // PlaceSplitBlockCarefully - If the block isn't already, move the new block to // right after some 'outside block' block. This prevents the preheader from // being placed inside the loop body, e.g. when the loop hasn't been rotated. -void LoopSimplify::PlaceSplitBlockCarefully(BasicBlock *NewBB, - SmallVectorImpl<BasicBlock*> &SplitPreds, - Loop *L) { +void PlaceSplitBlockCarefully(BasicBlock *NewBB, + SmallVectorImpl<BasicBlock*> &SplitPreds, + Loop *L) { // Check to see if NewBB is already well placed. Function::iterator BBI = NewBB; --BBI; for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) { diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp index cb581b3..162807d 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -90,7 +90,8 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, // Move all definitions in the successor to the predecessor... OnlyPred->getInstList().splice(OnlyPred->end(), BB->getInstList()); - std::string OldName = BB->getName(); + // OldName will be valid until erased. + StringRef OldName = BB->getName(); // Erase basic block from the function... @@ -102,12 +103,13 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, } } LI->removeBlock(BB); - BB->eraseFromParent(); // Inherit predecessor's name if it exists... if (!OldName.empty() && !OnlyPred->hasName()) OnlyPred->setName(OldName); + BB->eraseFromParent(); + return OnlyPred; } @@ -239,8 +241,6 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, DEBUG(dbgs() << "!\n"); } - std::vector<BasicBlock*> LoopBlocks = L->getBlocks(); - bool ContinueOnTrue = L->contains(BI->getSuccessor(0)); BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue); diff --git a/contrib/llvm/lib/Transforms/Utils/LowerExpectIntrinsic.cpp b/contrib/llvm/lib/Transforms/Utils/LowerExpectIntrinsic.cpp index 4aee8ff..e017f50 100644 --- a/contrib/llvm/lib/Transforms/Utils/LowerExpectIntrinsic.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LowerExpectIntrinsic.cpp @@ -29,7 +29,7 @@ using namespace llvm; -STATISTIC(IfHandled, "Number of 'expect' intrinsic intructions handled"); +STATISTIC(IfHandled, "Number of 'expect' intrinsic instructions handled"); static cl::opt<uint32_t> LikelyBranchWeight("likely-branch-weight", cl::Hidden, cl::init(64), diff --git a/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp b/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp index 9ec84d7..9799a30 100644 --- a/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp @@ -61,6 +61,8 @@ static cl::opt<bool> ExpensiveEHSupport("enable-correct-eh-support", namespace { class LowerInvoke : public FunctionPass { + const TargetMachine *TM; + // Used for both models. Constant *AbortFn; @@ -70,15 +72,12 @@ namespace { Constant *SetJmpFn, *LongJmpFn, *StackSaveFn, *StackRestoreFn; bool useExpensiveEHSupport; - // We peek in TLI to grab the target's jmp_buf size and alignment - const TargetLowering *TLI; - public: static char ID; // Pass identification, replacement for typeid - explicit LowerInvoke(const TargetLowering *tli = NULL, + explicit LowerInvoke(const TargetMachine *TM = 0, bool useExpensiveEHSupport = ExpensiveEHSupport) - : FunctionPass(ID), useExpensiveEHSupport(useExpensiveEHSupport), - TLI(tli) { + : FunctionPass(ID), TM(TM), + useExpensiveEHSupport(useExpensiveEHSupport) { initializeLowerInvokePass(*PassRegistry::getPassRegistry()); } bool doInitialization(Module &M); @@ -108,12 +107,9 @@ INITIALIZE_PASS(LowerInvoke, "lowerinvoke", char &llvm::LowerInvokePassID = LowerInvoke::ID; // Public Interface To the LowerInvoke pass. -FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI) { - return new LowerInvoke(TLI, ExpensiveEHSupport); -} -FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI, +FunctionPass *llvm::createLowerInvokePass(const TargetMachine *TM, bool useExpensiveEHSupport) { - return new LowerInvoke(TLI, useExpensiveEHSupport); + return new LowerInvoke(TM, useExpensiveEHSupport || ExpensiveEHSupport); } // doInitialization - Make sure that there is a prototype for abort in the @@ -122,6 +118,7 @@ bool LowerInvoke::doInitialization(Module &M) { Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext()); if (useExpensiveEHSupport) { // Insert a type for the linked list of jump buffers. + const TargetLowering *TLI = TM ? TM->getTargetLowering() : 0; unsigned JBSize = TLI ? TLI->getJumpBufSize() : 0; JBSize = JBSize ? JBSize : 200; Type *JmpBufTy = ArrayType::get(VoidPtrTy, JBSize); @@ -349,7 +346,6 @@ splitLiveRangesLiveAcrossInvokes(SmallVectorImpl<InvokeInst*> &Invokes) { // Scan all of the uses and see if the live range is live across an unwind // edge. If we find a use live across an invoke edge, create an alloca // and spill the value. - std::set<InvokeInst*> InvokesWithStoreInserted; // Find all of the blocks that this value is live in. std::set<BasicBlock*> LiveBBs; @@ -430,6 +426,7 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) { // Create an alloca for the incoming jump buffer ptr and the new jump buffer // that needs to be restored on all exits from the function. This is an // alloca because the value needs to be live across invokes. + const TargetLowering *TLI = TM ? TM->getTargetLowering() : 0; unsigned Align = TLI ? TLI->getJumpBufAlignment() : 0; AllocaInst *JmpBuf = new AllocaInst(JBLinkTy, 0, Align, diff --git a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp index 955b853..2d2a8a5 100644 --- a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp @@ -66,6 +66,18 @@ namespace { BasicBlock* OrigBlock, BasicBlock* Default); unsigned Clusterify(CaseVector& Cases, SwitchInst *SI); }; + + /// The comparison function for sorting the switch case values in the vector. + /// WARNING: Case ranges should be disjoint! + struct CaseCmp { + bool operator () (const LowerSwitch::CaseRange& C1, + const LowerSwitch::CaseRange& C2) { + + const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low); + const ConstantInt* CI2 = cast<const ConstantInt>(C2.High); + return CI1->getValue().slt(CI2->getValue()); + } + }; } char LowerSwitch::ID = 0; @@ -147,7 +159,7 @@ BasicBlock* LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, Function::iterator FI = OrigBlock; F->getBasicBlockList().insert(++FI, NewNode); - ICmpInst* Comp = new ICmpInst(ICmpInst::ICMP_ULT, + ICmpInst* Comp = new ICmpInst(ICmpInst::ICMP_SLT, Val, Pivot.Low, "Pivot"); NewNode->getInstList().push_back(Comp); BranchInst::Create(LBranch, RBranch, Comp, NewNode); @@ -222,34 +234,40 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val, // Clusterify - Transform simple list of Cases into list of CaseRange's unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) { - - IntegersSubsetToBB TheClusterifier; + unsigned numCmps = 0; // Start with "simple" cases - for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); - i != e; ++i) { - BasicBlock *SuccBB = i.getCaseSuccessor(); - IntegersSubset CaseRanges = i.getCaseValueEx(); - TheClusterifier.add(CaseRanges, SuccBB); - } - - TheClusterifier.optimize(); + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i) + Cases.push_back(CaseRange(i.getCaseValue(), i.getCaseValue(), + i.getCaseSuccessor())); - size_t numCmps = 0; - for (IntegersSubsetToBB::RangeIterator i = TheClusterifier.begin(), - e = TheClusterifier.end(); i != e; ++i, ++numCmps) { - IntegersSubsetToBB::Cluster &C = *i; - - // FIXME: Currently work with ConstantInt based numbers. - // Changing it to APInt based is a pretty heavy for this commit. - Cases.push_back(CaseRange(C.first.getLow().toConstantInt(), - C.first.getHigh().toConstantInt(), C.second)); - if (C.first.isSingleNumber()) + std::sort(Cases.begin(), Cases.end(), CaseCmp()); + + // Merge case into clusters + if (Cases.size()>=2) + for (CaseItr I=Cases.begin(), J=llvm::next(Cases.begin()); J!=Cases.end(); ) { + int64_t nextValue = cast<ConstantInt>(J->Low)->getSExtValue(); + int64_t currentValue = cast<ConstantInt>(I->High)->getSExtValue(); + BasicBlock* nextBB = J->BB; + BasicBlock* currentBB = I->BB; + + // If the two neighboring cases go to the same destination, merge them + // into a single case. + if ((nextValue-currentValue==1) && (currentBB == nextBB)) { + I->High = J->High; + J = Cases.erase(J); + } else { + I = J++; + } + } + + for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) { + if (I->Low != I->High) // A range counts double, since it requires two compares. ++numCmps; } - return numCmps; + return numCmps; } // processSwitchInst - Replace the specified switch instruction with a sequence diff --git a/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp b/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp index 3716f58..c3704531 100644 --- a/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp +++ b/contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp @@ -53,7 +53,7 @@ namespace { } bool runOnModule(Module &M) { - static const char *metaNames[] = { + static const char *const metaNames[] = { // See http://en.wikipedia.org/wiki/Metasyntactic_variable "foo", "bar", "baz", "quux", "barney", "snork", "zot", "blam", "hoge", "wibble", "wobble", "widget", "wombat", "ham", "eggs", "pluto", "spam" diff --git a/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp index d090b48..ff6e6f9 100644 --- a/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp +++ b/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/ModuleUtils.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" @@ -62,3 +63,20 @@ void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority) { void llvm::appendToGlobalDtors(Module &M, Function *F, int Priority) { appendToGlobalArray("llvm.global_dtors", M, F, Priority); } + +GlobalVariable * +llvm::collectUsedGlobalVariables(Module &M, SmallPtrSet<GlobalValue *, 8> &Set, + bool CompilerUsed) { + const char *Name = CompilerUsed ? "llvm.compiler.used" : "llvm.used"; + GlobalVariable *GV = M.getGlobalVariable(Name); + if (!GV || !GV->hasInitializer()) + return GV; + + const ConstantArray *Init = cast<ConstantArray>(GV->getInitializer()); + for (unsigned I = 0, E = Init->getNumOperands(); I != E; ++I) { + Value *Op = Init->getOperand(I); + GlobalValue *G = cast<GlobalValue>(Op->stripPointerCastsNoFollowAliases()); + Set.insert(G); + } + return GV; +} diff --git a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index de335ec..8f6eee3 100644 --- a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -27,8 +27,8 @@ #define DEBUG_TYPE "mem2reg" #include "llvm/Transforms/Utils/PromoteMemToReg.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/Hashing.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" @@ -56,36 +56,13 @@ STATISTIC(NumSingleStore, "Number of alloca's promoted with a single store"); STATISTIC(NumDeadAlloca, "Number of dead alloca's removed"); STATISTIC(NumPHIInsert, "Number of PHI nodes inserted"); -namespace llvm { -template<> -struct DenseMapInfo<std::pair<BasicBlock*, unsigned> > { - typedef std::pair<BasicBlock*, unsigned> EltTy; - static inline EltTy getEmptyKey() { - return EltTy(reinterpret_cast<BasicBlock*>(-1), ~0U); - } - static inline EltTy getTombstoneKey() { - return EltTy(reinterpret_cast<BasicBlock*>(-2), 0U); - } - static unsigned getHashValue(const std::pair<BasicBlock*, unsigned> &Val) { - using llvm::hash_value; - return static_cast<unsigned>(hash_value(Val)); - } - static bool isEqual(const EltTy &LHS, const EltTy &RHS) { - return LHS == RHS; - } -}; -} - -/// isAllocaPromotable - Return true if this alloca is legal for promotion. -/// This is true if there are only loads and stores to the alloca. -/// bool llvm::isAllocaPromotable(const AllocaInst *AI) { // FIXME: If the memory unit is of pointer or integer type, we can permit // assignments to subsections of the memory unit. // Only allow direct and non-volatile loads and stores... for (Value::const_use_iterator UI = AI->use_begin(), UE = AI->use_end(); - UI != UE; ++UI) { // Loop over all of the uses of the alloca + UI != UE; ++UI) { // Loop over all of the uses of the alloca const User *U = *UI; if (const LoadInst *LI = dyn_cast<LoadInst>(U)) { // Note that atomic loads can be transformed; atomic semantics do @@ -94,7 +71,7 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) { return false; } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) { if (SI->getOperand(0) == AI) - return false; // Don't allow a store OF the AI, only INTO the AI. + return false; // Don't allow a store OF the AI, only INTO the AI. // Note that atomic stores can be transformed; atomic semantics do // not have any meaning for a local alloca. if (SI->isVolatile()) @@ -124,243 +101,217 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) { } namespace { - struct AllocaInfo; - - // Data package used by RenamePass() - class RenamePassData { - public: - typedef std::vector<Value *> ValVector; - - RenamePassData() : BB(NULL), Pred(NULL), Values() {} - RenamePassData(BasicBlock *B, BasicBlock *P, - const ValVector &V) : BB(B), Pred(P), Values(V) {} - BasicBlock *BB; - BasicBlock *Pred; - ValVector Values; - - void swap(RenamePassData &RHS) { - std::swap(BB, RHS.BB); - std::swap(Pred, RHS.Pred); - Values.swap(RHS.Values); + +struct AllocaInfo { + SmallVector<BasicBlock *, 32> DefiningBlocks; + SmallVector<BasicBlock *, 32> UsingBlocks; + + StoreInst *OnlyStore; + BasicBlock *OnlyBlock; + bool OnlyUsedInOneBlock; + + Value *AllocaPointerVal; + DbgDeclareInst *DbgDeclare; + + void clear() { + DefiningBlocks.clear(); + UsingBlocks.clear(); + OnlyStore = 0; + OnlyBlock = 0; + OnlyUsedInOneBlock = true; + AllocaPointerVal = 0; + DbgDeclare = 0; + } + + /// Scan the uses of the specified alloca, filling in the AllocaInfo used + /// by the rest of the pass to reason about the uses of this alloca. + void AnalyzeAlloca(AllocaInst *AI) { + clear(); + + // As we scan the uses of the alloca instruction, keep track of stores, + // and decide whether all of the loads and stores to the alloca are within + // the same basic block. + for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); + UI != E;) { + Instruction *User = cast<Instruction>(*UI++); + + if (StoreInst *SI = dyn_cast<StoreInst>(User)) { + // Remember the basic blocks which define new values for the alloca + DefiningBlocks.push_back(SI->getParent()); + AllocaPointerVal = SI->getOperand(0); + OnlyStore = SI; + } else { + LoadInst *LI = cast<LoadInst>(User); + // Otherwise it must be a load instruction, keep track of variable + // reads. + UsingBlocks.push_back(LI->getParent()); + AllocaPointerVal = LI; + } + + if (OnlyUsedInOneBlock) { + if (OnlyBlock == 0) + OnlyBlock = User->getParent(); + else if (OnlyBlock != User->getParent()) + OnlyUsedInOneBlock = false; + } } - }; - - /// LargeBlockInfo - This assigns and keeps a per-bb relative ordering of - /// load/store instructions in the block that directly load or store an alloca. + + DbgDeclare = FindAllocaDbgDeclare(AI); + } +}; + +// Data package used by RenamePass() +class RenamePassData { +public: + typedef std::vector<Value *> ValVector; + + RenamePassData() : BB(NULL), Pred(NULL), Values() {} + RenamePassData(BasicBlock *B, BasicBlock *P, const ValVector &V) + : BB(B), Pred(P), Values(V) {} + BasicBlock *BB; + BasicBlock *Pred; + ValVector Values; + + void swap(RenamePassData &RHS) { + std::swap(BB, RHS.BB); + std::swap(Pred, RHS.Pred); + Values.swap(RHS.Values); + } +}; + +/// \brief This assigns and keeps a per-bb relative ordering of load/store +/// instructions in the block that directly load or store an alloca. +/// +/// This functionality is important because it avoids scanning large basic +/// blocks multiple times when promoting many allocas in the same block. +class LargeBlockInfo { + /// \brief For each instruction that we track, keep the index of the + /// instruction. /// - /// This functionality is important because it avoids scanning large basic - /// blocks multiple times when promoting many allocas in the same block. - class LargeBlockInfo { - /// InstNumbers - For each instruction that we track, keep the index of the - /// instruction. The index starts out as the number of the instruction from - /// the start of the block. - DenseMap<const Instruction *, unsigned> InstNumbers; - public: - - /// isInterestingInstruction - This code only looks at accesses to allocas. - static bool isInterestingInstruction(const Instruction *I) { - return (isa<LoadInst>(I) && isa<AllocaInst>(I->getOperand(0))) || - (isa<StoreInst>(I) && isa<AllocaInst>(I->getOperand(1))); - } - - /// getInstructionIndex - Get or calculate the index of the specified - /// instruction. - unsigned getInstructionIndex(const Instruction *I) { - assert(isInterestingInstruction(I) && - "Not a load/store to/from an alloca?"); - - // If we already have this instruction number, return it. - DenseMap<const Instruction *, unsigned>::iterator It = InstNumbers.find(I); - if (It != InstNumbers.end()) return It->second; - - // Scan the whole block to get the instruction. This accumulates - // information for every interesting instruction in the block, in order to - // avoid gratuitus rescans. - const BasicBlock *BB = I->getParent(); - unsigned InstNo = 0; - for (BasicBlock::const_iterator BBI = BB->begin(), E = BB->end(); - BBI != E; ++BBI) - if (isInterestingInstruction(BBI)) - InstNumbers[BBI] = InstNo++; - It = InstNumbers.find(I); - - assert(It != InstNumbers.end() && "Didn't insert instruction?"); + /// The index starts out as the number of the instruction from the start of + /// the block. + DenseMap<const Instruction *, unsigned> InstNumbers; + +public: + + /// This code only looks at accesses to allocas. + static bool isInterestingInstruction(const Instruction *I) { + return (isa<LoadInst>(I) && isa<AllocaInst>(I->getOperand(0))) || + (isa<StoreInst>(I) && isa<AllocaInst>(I->getOperand(1))); + } + + /// Get or calculate the index of the specified instruction. + unsigned getInstructionIndex(const Instruction *I) { + assert(isInterestingInstruction(I) && + "Not a load/store to/from an alloca?"); + + // If we already have this instruction number, return it. + DenseMap<const Instruction *, unsigned>::iterator It = InstNumbers.find(I); + if (It != InstNumbers.end()) return It->second; - } - - void deleteValue(const Instruction *I) { - InstNumbers.erase(I); - } - - void clear() { - InstNumbers.clear(); - } - }; - - struct PromoteMem2Reg { - /// Allocas - The alloca instructions being promoted. - /// - std::vector<AllocaInst*> Allocas; - DominatorTree &DT; - DIBuilder *DIB; - - /// AST - An AliasSetTracker object to update. If null, don't update it. - /// - AliasSetTracker *AST; - - /// AllocaLookup - Reverse mapping of Allocas. - /// - DenseMap<AllocaInst*, unsigned> AllocaLookup; - - /// NewPhiNodes - The PhiNodes we're adding. That map is used to simplify - /// some Phi nodes as we iterate over it, so it should have deterministic - /// iterators. We could use a MapVector, but since we already maintain a - /// map from BasicBlock* to a stable numbering (BBNumbers), the DenseMap is - /// more efficient (also supports removal). - /// - DenseMap<std::pair<unsigned, unsigned>, PHINode*> NewPhiNodes; - - /// PhiToAllocaMap - For each PHI node, keep track of which entry in Allocas - /// it corresponds to. - DenseMap<PHINode*, unsigned> PhiToAllocaMap; - - /// PointerAllocaValues - If we are updating an AliasSetTracker, then for - /// each alloca that is of pointer type, we keep track of what to copyValue - /// to the inserted PHI nodes here. - /// - std::vector<Value*> PointerAllocaValues; - - /// AllocaDbgDeclares - For each alloca, we keep track of the dbg.declare - /// intrinsic that describes it, if any, so that we can convert it to a - /// dbg.value intrinsic if the alloca gets promoted. - SmallVector<DbgDeclareInst*, 8> AllocaDbgDeclares; - - /// Visited - The set of basic blocks the renamer has already visited. - /// - SmallPtrSet<BasicBlock*, 16> Visited; - - /// BBNumbers - Contains a stable numbering of basic blocks to avoid - /// non-determinstic behavior. - DenseMap<BasicBlock*, unsigned> BBNumbers; - - /// DomLevels - Maps DomTreeNodes to their level in the dominator tree. - DenseMap<DomTreeNode*, unsigned> DomLevels; - - /// BBNumPreds - Lazily compute the number of predecessors a block has. - DenseMap<const BasicBlock*, unsigned> BBNumPreds; - public: - PromoteMem2Reg(const std::vector<AllocaInst*> &A, DominatorTree &dt, - AliasSetTracker *ast) - : Allocas(A), DT(dt), DIB(0), AST(ast) {} - ~PromoteMem2Reg() { - delete DIB; - } - void run(); + // Scan the whole block to get the instruction. This accumulates + // information for every interesting instruction in the block, in order to + // avoid gratuitus rescans. + const BasicBlock *BB = I->getParent(); + unsigned InstNo = 0; + for (BasicBlock::const_iterator BBI = BB->begin(), E = BB->end(); BBI != E; + ++BBI) + if (isInterestingInstruction(BBI)) + InstNumbers[BBI] = InstNo++; + It = InstNumbers.find(I); + + assert(It != InstNumbers.end() && "Didn't insert instruction?"); + return It->second; + } - /// dominates - Return true if BB1 dominates BB2 using the DominatorTree. - /// - bool dominates(BasicBlock *BB1, BasicBlock *BB2) const { - return DT.dominates(BB1, BB2); - } + void deleteValue(const Instruction *I) { InstNumbers.erase(I); } - private: - void RemoveFromAllocasList(unsigned &AllocaIdx) { - Allocas[AllocaIdx] = Allocas.back(); - Allocas.pop_back(); - --AllocaIdx; - } + void clear() { InstNumbers.clear(); } +}; - unsigned getNumPreds(const BasicBlock *BB) { - unsigned &NP = BBNumPreds[BB]; - if (NP == 0) - NP = std::distance(pred_begin(BB), pred_end(BB))+1; - return NP-1; - } +struct PromoteMem2Reg { + /// The alloca instructions being promoted. + std::vector<AllocaInst *> Allocas; + DominatorTree &DT; + DIBuilder DIB; - void DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, - AllocaInfo &Info); - void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, - const SmallPtrSet<BasicBlock*, 32> &DefBlocks, - SmallPtrSet<BasicBlock*, 32> &LiveInBlocks); - - void RewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, - LargeBlockInfo &LBI); - void PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info, - LargeBlockInfo &LBI); - - void RenamePass(BasicBlock *BB, BasicBlock *Pred, - RenamePassData::ValVector &IncVals, - std::vector<RenamePassData> &Worklist); - bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version); - }; - - struct AllocaInfo { - SmallVector<BasicBlock*, 32> DefiningBlocks; - SmallVector<BasicBlock*, 32> UsingBlocks; - - StoreInst *OnlyStore; - BasicBlock *OnlyBlock; - bool OnlyUsedInOneBlock; - - Value *AllocaPointerVal; - DbgDeclareInst *DbgDeclare; - - void clear() { - DefiningBlocks.clear(); - UsingBlocks.clear(); - OnlyStore = 0; - OnlyBlock = 0; - OnlyUsedInOneBlock = true; - AllocaPointerVal = 0; - DbgDeclare = 0; - } - - /// AnalyzeAlloca - Scan the uses of the specified alloca, filling in our - /// ivars. - void AnalyzeAlloca(AllocaInst *AI) { - clear(); - - // As we scan the uses of the alloca instruction, keep track of stores, - // and decide whether all of the loads and stores to the alloca are within - // the same basic block. - for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); - UI != E;) { - Instruction *User = cast<Instruction>(*UI++); - - if (StoreInst *SI = dyn_cast<StoreInst>(User)) { - // Remember the basic blocks which define new values for the alloca - DefiningBlocks.push_back(SI->getParent()); - AllocaPointerVal = SI->getOperand(0); - OnlyStore = SI; - } else { - LoadInst *LI = cast<LoadInst>(User); - // Otherwise it must be a load instruction, keep track of variable - // reads. - UsingBlocks.push_back(LI->getParent()); - AllocaPointerVal = LI; - } - - if (OnlyUsedInOneBlock) { - if (OnlyBlock == 0) - OnlyBlock = User->getParent(); - else if (OnlyBlock != User->getParent()) - OnlyUsedInOneBlock = false; - } - } - - DbgDeclare = FindAllocaDbgDeclare(AI); - } - }; + /// An AliasSetTracker object to update. If null, don't update it. + AliasSetTracker *AST; - typedef std::pair<DomTreeNode*, unsigned> DomTreeNodePair; + /// Reverse mapping of Allocas. + DenseMap<AllocaInst *, unsigned> AllocaLookup; - struct DomTreeNodeCompare { - bool operator()(const DomTreeNodePair &LHS, const DomTreeNodePair &RHS) { - return LHS.second < RHS.second; - } - }; -} // end of anonymous namespace + /// \brief The PhiNodes we're adding. + /// + /// That map is used to simplify some Phi nodes as we iterate over it, so + /// it should have deterministic iterators. We could use a MapVector, but + /// since we already maintain a map from BasicBlock* to a stable numbering + /// (BBNumbers), the DenseMap is more efficient (also supports removal). + DenseMap<std::pair<unsigned, unsigned>, PHINode *> NewPhiNodes; + + /// For each PHI node, keep track of which entry in Allocas it corresponds + /// to. + DenseMap<PHINode *, unsigned> PhiToAllocaMap; + + /// If we are updating an AliasSetTracker, then for each alloca that is of + /// pointer type, we keep track of what to copyValue to the inserted PHI + /// nodes here. + std::vector<Value *> PointerAllocaValues; + + /// For each alloca, we keep track of the dbg.declare intrinsic that + /// describes it, if any, so that we can convert it to a dbg.value + /// intrinsic if the alloca gets promoted. + SmallVector<DbgDeclareInst *, 8> AllocaDbgDeclares; + + /// The set of basic blocks the renamer has already visited. + /// + SmallPtrSet<BasicBlock *, 16> Visited; + + /// Contains a stable numbering of basic blocks to avoid non-determinstic + /// behavior. + DenseMap<BasicBlock *, unsigned> BBNumbers; + + /// Maps DomTreeNodes to their level in the dominator tree. + DenseMap<DomTreeNode *, unsigned> DomLevels; + + /// Lazily compute the number of predecessors a block has. + DenseMap<const BasicBlock *, unsigned> BBNumPreds; + +public: + PromoteMem2Reg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT, + AliasSetTracker *AST) + : Allocas(Allocas.begin(), Allocas.end()), DT(DT), + DIB(*DT.getRoot()->getParent()->getParent()), AST(AST) {} + + void run(); + +private: + void RemoveFromAllocasList(unsigned &AllocaIdx) { + Allocas[AllocaIdx] = Allocas.back(); + Allocas.pop_back(); + --AllocaIdx; + } + + unsigned getNumPreds(const BasicBlock *BB) { + unsigned &NP = BBNumPreds[BB]; + if (NP == 0) + NP = std::distance(pred_begin(BB), pred_end(BB)) + 1; + return NP - 1; + } + + void DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, + AllocaInfo &Info); + void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, + const SmallPtrSet<BasicBlock *, 32> &DefBlocks, + SmallPtrSet<BasicBlock *, 32> &LiveInBlocks); + void RenamePass(BasicBlock *BB, BasicBlock *Pred, + RenamePassData::ValVector &IncVals, + std::vector<RenamePassData> &Worklist); + bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version); +}; + +} // end of anonymous namespace static void removeLifetimeIntrinsicUsers(AllocaInst *AI) { // Knowing that this alloca is promotable, we know that it's safe to kill all @@ -388,10 +339,191 @@ static void removeLifetimeIntrinsicUsers(AllocaInst *AI) { } } +/// \brief Rewrite as many loads as possible given a single store. +/// +/// When there is only a single store, we can use the domtree to trivially +/// replace all of the dominated loads with the stored value. Do so, and return +/// true if this has successfully promoted the alloca entirely. If this returns +/// false there were some loads which were not dominated by the single store +/// and thus must be phi-ed with undef. We fall back to the standard alloca +/// promotion algorithm in that case. +static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, + LargeBlockInfo &LBI, + DominatorTree &DT, + AliasSetTracker *AST) { + StoreInst *OnlyStore = Info.OnlyStore; + bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0)); + BasicBlock *StoreBB = OnlyStore->getParent(); + int StoreIndex = -1; + + // Clear out UsingBlocks. We will reconstruct it here if needed. + Info.UsingBlocks.clear(); + + for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) { + Instruction *UserInst = cast<Instruction>(*UI++); + if (!isa<LoadInst>(UserInst)) { + assert(UserInst == OnlyStore && "Should only have load/stores"); + continue; + } + LoadInst *LI = cast<LoadInst>(UserInst); + + // Okay, if we have a load from the alloca, we want to replace it with the + // only value stored to the alloca. We can do this if the value is + // dominated by the store. If not, we use the rest of the mem2reg machinery + // to insert the phi nodes as needed. + if (!StoringGlobalVal) { // Non-instructions are always dominated. + if (LI->getParent() == StoreBB) { + // If we have a use that is in the same block as the store, compare the + // indices of the two instructions to see which one came first. If the + // load came before the store, we can't handle it. + if (StoreIndex == -1) + StoreIndex = LBI.getInstructionIndex(OnlyStore); + + if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) { + // Can't handle this load, bail out. + Info.UsingBlocks.push_back(StoreBB); + continue; + } + + } else if (LI->getParent() != StoreBB && + !DT.dominates(StoreBB, LI->getParent())) { + // If the load and store are in different blocks, use BB dominance to + // check their relationships. If the store doesn't dom the use, bail + // out. + Info.UsingBlocks.push_back(LI->getParent()); + continue; + } + } + + // Otherwise, we *can* safely rewrite this load. + Value *ReplVal = OnlyStore->getOperand(0); + // If the replacement value is the load, this must occur in unreachable + // code. + if (ReplVal == LI) + ReplVal = UndefValue::get(LI->getType()); + LI->replaceAllUsesWith(ReplVal); + if (AST && LI->getType()->isPointerTy()) + AST->deleteValue(LI); + LI->eraseFromParent(); + LBI.deleteValue(LI); + } + + // Finally, after the scan, check to see if the store is all that is left. + if (!Info.UsingBlocks.empty()) + return false; // If not, we'll have to fall back for the remainder. + + // Record debuginfo for the store and remove the declaration's + // debuginfo. + if (DbgDeclareInst *DDI = Info.DbgDeclare) { + DIBuilder DIB(*AI->getParent()->getParent()->getParent()); + ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, DIB); + DDI->eraseFromParent(); + LBI.deleteValue(DDI); + } + // Remove the (now dead) store and alloca. + Info.OnlyStore->eraseFromParent(); + LBI.deleteValue(Info.OnlyStore); + + if (AST) + AST->deleteValue(AI); + AI->eraseFromParent(); + LBI.deleteValue(AI); + return true; +} + +/// Many allocas are only used within a single basic block. If this is the +/// case, avoid traversing the CFG and inserting a lot of potentially useless +/// PHI nodes by just performing a single linear pass over the basic block +/// using the Alloca. +/// +/// If we cannot promote this alloca (because it is read before it is written), +/// return true. This is necessary in cases where, due to control flow, the +/// alloca is potentially undefined on some control flow paths. e.g. code like +/// this is potentially correct: +/// +/// for (...) { if (c) { A = undef; undef = B; } } +/// +/// ... so long as A is not used before undef is set. +static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, + LargeBlockInfo &LBI, + AliasSetTracker *AST) { + // The trickiest case to handle is when we have large blocks. Because of this, + // this code is optimized assuming that large blocks happen. This does not + // significantly pessimize the small block case. This uses LargeBlockInfo to + // make it efficient to get the index of various operations in the block. + + // Walk the use-def list of the alloca, getting the locations of all stores. + typedef SmallVector<std::pair<unsigned, StoreInst *>, 64> StoresByIndexTy; + StoresByIndexTy StoresByIndex; + + for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E; + ++UI) + if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) + StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI)); + + // Sort the stores by their index, making it efficient to do a lookup with a + // binary search. + std::sort(StoresByIndex.begin(), StoresByIndex.end(), less_first()); + + // Walk all of the loads from this alloca, replacing them with the nearest + // store above them, if any. + for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) { + LoadInst *LI = dyn_cast<LoadInst>(*UI++); + if (!LI) + continue; + + unsigned LoadIdx = LBI.getInstructionIndex(LI); + + // Find the nearest store that has a lower index than this load. + StoresByIndexTy::iterator I = + std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(), + std::make_pair(LoadIdx, static_cast<StoreInst *>(0)), + less_first()); + + if (I == StoresByIndex.begin()) + // If there is no store before this load, the load takes the undef value. + LI->replaceAllUsesWith(UndefValue::get(LI->getType())); + else + // Otherwise, there was a store before this load, the load takes its value. + LI->replaceAllUsesWith(llvm::prior(I)->second->getOperand(0)); + + if (AST && LI->getType()->isPointerTy()) + AST->deleteValue(LI); + LI->eraseFromParent(); + LBI.deleteValue(LI); + } + + // Remove the (now dead) stores and alloca. + while (!AI->use_empty()) { + StoreInst *SI = cast<StoreInst>(AI->use_back()); + // Record debuginfo for the store before removing it. + if (DbgDeclareInst *DDI = Info.DbgDeclare) { + DIBuilder DIB(*AI->getParent()->getParent()->getParent()); + ConvertDebugDeclareToDebugValue(DDI, SI, DIB); + } + SI->eraseFromParent(); + LBI.deleteValue(SI); + } + + if (AST) + AST->deleteValue(AI); + AI->eraseFromParent(); + LBI.deleteValue(AI); + + // The alloca's debuginfo can be removed as well. + if (DbgDeclareInst *DDI = Info.DbgDeclare) { + DDI->eraseFromParent(); + LBI.deleteValue(DDI); + } + + ++NumLocalPromoted; +} + void PromoteMem2Reg::run() { Function &F = *DT.getRoot()->getParent(); - if (AST) PointerAllocaValues.resize(Allocas.size()); + if (AST) + PointerAllocaValues.resize(Allocas.size()); AllocaDbgDeclares.resize(Allocas.size()); AllocaInfo Info; @@ -400,8 +532,7 @@ void PromoteMem2Reg::run() { for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) { AllocaInst *AI = Allocas[AllocaNum]; - assert(isAllocaPromotable(AI) && - "Cannot promote non-promotable alloca!"); + assert(isAllocaPromotable(AI) && "Cannot promote non-promotable alloca!"); assert(AI->getParent()->getParent() == &F && "All allocas should be in the same function, which is same as DF!"); @@ -409,7 +540,8 @@ void PromoteMem2Reg::run() { if (AI->use_empty()) { // If there are no uses of the alloca, just delete it now. - if (AST) AST->deleteValue(AI); + if (AST) + AST->deleteValue(AI); AI->eraseFromParent(); // Remove the alloca from the Allocas list, since it has been processed @@ -417,7 +549,7 @@ void PromoteMem2Reg::run() { ++NumDeadAlloca; continue; } - + // Calculate the set of read and write-locations for each alloca. This is // analogous to finding the 'uses' and 'definitions' of each variable. Info.AnalyzeAlloca(AI); @@ -425,75 +557,27 @@ void PromoteMem2Reg::run() { // If there is only a single store to this value, replace any loads of // it that are directly dominated by the definition with the value stored. if (Info.DefiningBlocks.size() == 1) { - RewriteSingleStoreAlloca(AI, Info, LBI); - - // Finally, after the scan, check to see if the store is all that is left. - if (Info.UsingBlocks.empty()) { - // Record debuginfo for the store and remove the declaration's - // debuginfo. - if (DbgDeclareInst *DDI = Info.DbgDeclare) { - if (!DIB) - DIB = new DIBuilder(*DDI->getParent()->getParent()->getParent()); - ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, *DIB); - DDI->eraseFromParent(); - } - // Remove the (now dead) store and alloca. - Info.OnlyStore->eraseFromParent(); - LBI.deleteValue(Info.OnlyStore); - - if (AST) AST->deleteValue(AI); - AI->eraseFromParent(); - LBI.deleteValue(AI); - + if (rewriteSingleStoreAlloca(AI, Info, LBI, DT, AST)) { // The alloca has been processed, move on. RemoveFromAllocasList(AllocaNum); - ++NumSingleStore; continue; } } - + // If the alloca is only read and written in one basic block, just perform a // linear sweep over the block to eliminate it. if (Info.OnlyUsedInOneBlock) { - PromoteSingleBlockAlloca(AI, Info, LBI); - - // Finally, after the scan, check to see if the stores are all that is - // left. - if (Info.UsingBlocks.empty()) { - - // Remove the (now dead) stores and alloca. - while (!AI->use_empty()) { - StoreInst *SI = cast<StoreInst>(AI->use_back()); - // Record debuginfo for the store before removing it. - if (DbgDeclareInst *DDI = Info.DbgDeclare) { - if (!DIB) - DIB = new DIBuilder(*SI->getParent()->getParent()->getParent()); - ConvertDebugDeclareToDebugValue(DDI, SI, *DIB); - } - SI->eraseFromParent(); - LBI.deleteValue(SI); - } - - if (AST) AST->deleteValue(AI); - AI->eraseFromParent(); - LBI.deleteValue(AI); - - // The alloca has been processed, move on. - RemoveFromAllocasList(AllocaNum); - - // The alloca's debuginfo can be removed as well. - if (DbgDeclareInst *DDI = Info.DbgDeclare) - DDI->eraseFromParent(); + promoteSingleBlockAlloca(AI, Info, LBI, AST); - ++NumLocalPromoted; - continue; - } + // The alloca has been processed, move on. + RemoveFromAllocasList(AllocaNum); + continue; } // If we haven't computed dominator tree levels, do so now. if (DomLevels.empty()) { - SmallVector<DomTreeNode*, 32> Worklist; + SmallVector<DomTreeNode *, 32> Worklist; DomTreeNode *Root = DT.getRootNode(); DomLevels[Root] = 0; @@ -522,10 +606,11 @@ void PromoteMem2Reg::run() { // stored into the alloca. if (AST) PointerAllocaValues[AllocaNum] = Info.AllocaPointerVal; - + // Remember the dbg.declare intrinsic describing this alloca, if any. - if (Info.DbgDeclare) AllocaDbgDeclares[AllocaNum] = Info.DbgDeclare; - + if (Info.DbgDeclare) + AllocaDbgDeclares[AllocaNum] = Info.DbgDeclare; + // Keep the reverse mapping of the 'Allocas' array for the rename pass. AllocaLookup[Allocas[AllocaNum]] = AllocaNum; @@ -540,8 +625,7 @@ void PromoteMem2Reg::run() { return; // All of the allocas must have been trivial! LBI.clear(); - - + // Set the incoming values for the basic block to be null values for all of // the alloca's. We do this in case there is a load of a value that has not // been stored yet. In this case, it will get this null value. @@ -562,7 +646,7 @@ void PromoteMem2Reg::run() { // RenamePass may add new worklist entries. RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList); } while (!RenamePassWorkList.empty()); - + // The renamer uses the Visited set to avoid infinite loops. Clear it now. Visited.clear(); @@ -575,7 +659,8 @@ void PromoteMem2Reg::run() { // tree. Just delete the users now. if (!A->use_empty()) A->replaceAllUsesWith(UndefValue::get(A->getType())); - if (AST) AST->deleteValue(A); + if (AST) + AST->deleteValue(A); A->eraseFromParent(); } @@ -591,13 +676,15 @@ void PromoteMem2Reg::run() { bool EliminatedAPHI = true; while (EliminatedAPHI) { EliminatedAPHI = false; - + // Iterating over NewPhiNodes is deterministic, so it is safe to try to // simplify and RAUW them as we go. If it was not, we could add uses to // the values we replace with in a non deterministic order, thus creating // non deterministic def->use chains. - for (DenseMap<std::pair<unsigned, unsigned>, PHINode*>::iterator I = - NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E;) { + for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator + I = NewPhiNodes.begin(), + E = NewPhiNodes.end(); + I != E;) { PHINode *PN = I->second; // If this PHI node merges one value and/or undefs, get the value. @@ -613,15 +700,17 @@ void PromoteMem2Reg::run() { ++I; } } - + // At this point, the renamer has added entries to PHI nodes for all reachable // code. Unfortunately, there may be unreachable blocks which the renamer // hasn't traversed. If this is the case, the PHI nodes may not // have incoming values for all predecessors. Loop over all PHI nodes we have // created, inserting undef values if they are missing any incoming values. // - for (DenseMap<std::pair<unsigned, unsigned>, PHINode*>::iterator I = - NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E; ++I) { + for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator + I = NewPhiNodes.begin(), + E = NewPhiNodes.end(); + I != E; ++I) { // We want to do this once per basic block. As such, only process a block // when we find the PHI that is the first entry in the block. PHINode *SomePHI = I->second; @@ -636,21 +725,20 @@ void PromoteMem2Reg::run() { continue; // Get the preds for BB. - SmallVector<BasicBlock*, 16> Preds(pred_begin(BB), pred_end(BB)); - + SmallVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB)); + // Ok, now we know that all of the PHI nodes are missing entries for some // basic blocks. Start by sorting the incoming predecessors for efficient // access. std::sort(Preds.begin(), Preds.end()); - + // Now we loop through all BB's which have entries in SomePHI and remove // them from the Preds list. for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) { // Do a log(n) search of the Preds list for the entry we want. - SmallVector<BasicBlock*, 16>::iterator EntIt = - std::lower_bound(Preds.begin(), Preds.end(), - SomePHI->getIncomingBlock(i)); - assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i)&& + SmallVectorImpl<BasicBlock *>::iterator EntIt = std::lower_bound( + Preds.begin(), Preds.end(), SomePHI->getIncomingBlock(i)); + assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i) && "PHI node has entry for a block which is not a predecessor!"); // Remove the entry @@ -670,39 +758,41 @@ void PromoteMem2Reg::run() { SomePHI->addIncoming(UndefVal, Preds[pred]); } } - + NewPhiNodes.clear(); } +/// \brief Determine which blocks the value is live in. +/// +/// These are blocks which lead to uses. Knowing this allows us to avoid +/// inserting PHI nodes into blocks which don't lead to uses (thus, the +/// inserted phi nodes would be dead). +void PromoteMem2Reg::ComputeLiveInBlocks( + AllocaInst *AI, AllocaInfo &Info, + const SmallPtrSet<BasicBlock *, 32> &DefBlocks, + SmallPtrSet<BasicBlock *, 32> &LiveInBlocks) { -/// ComputeLiveInBlocks - Determine which blocks the value is live in. These -/// are blocks which lead to uses. Knowing this allows us to avoid inserting -/// PHI nodes into blocks which don't lead to uses (thus, the inserted phi nodes -/// would be dead). -void PromoteMem2Reg:: -ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, - const SmallPtrSet<BasicBlock*, 32> &DefBlocks, - SmallPtrSet<BasicBlock*, 32> &LiveInBlocks) { - // To determine liveness, we must iterate through the predecessors of blocks // where the def is live. Blocks are added to the worklist if we need to // check their predecessors. Start with all the using blocks. - SmallVector<BasicBlock*, 64> LiveInBlockWorklist(Info.UsingBlocks.begin(), - Info.UsingBlocks.end()); - + SmallVector<BasicBlock *, 64> LiveInBlockWorklist(Info.UsingBlocks.begin(), + Info.UsingBlocks.end()); + // If any of the using blocks is also a definition block, check to see if the // definition occurs before or after the use. If it happens before the use, // the value isn't really live-in. for (unsigned i = 0, e = LiveInBlockWorklist.size(); i != e; ++i) { BasicBlock *BB = LiveInBlockWorklist[i]; - if (!DefBlocks.count(BB)) continue; - + if (!DefBlocks.count(BB)) + continue; + // Okay, this is a block that both uses and defines the value. If the first // reference to the alloca is a def (store), then we know it isn't live-in. - for (BasicBlock::iterator I = BB->begin(); ; ++I) { + for (BasicBlock::iterator I = BB->begin();; ++I) { if (StoreInst *SI = dyn_cast<StoreInst>(I)) { - if (SI->getOperand(1) != AI) continue; - + if (SI->getOperand(1) != AI) + continue; + // We found a store to the alloca before a load. The alloca is not // actually live-in here. LiveInBlockWorklist[i] = LiveInBlockWorklist.back(); @@ -710,73 +800,76 @@ ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, --i, --e; break; } - + if (LoadInst *LI = dyn_cast<LoadInst>(I)) { - if (LI->getOperand(0) != AI) continue; - + if (LI->getOperand(0) != AI) + continue; + // Okay, we found a load before a store to the alloca. It is actually // live into this block. break; } } } - + // Now that we have a set of blocks where the phi is live-in, recursively add // their predecessors until we find the full region the value is live. while (!LiveInBlockWorklist.empty()) { BasicBlock *BB = LiveInBlockWorklist.pop_back_val(); - + // The block really is live in here, insert it into the set. If already in // the set, then it has already been processed. if (!LiveInBlocks.insert(BB)) continue; - + // Since the value is live into BB, it is either defined in a predecessor or // live into it to. Add the preds to the worklist unless they are a // defining block. for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { BasicBlock *P = *PI; - + // The value is not live into a predecessor if it defines the value. if (DefBlocks.count(P)) continue; - + // Otherwise it is, add to the worklist. LiveInBlockWorklist.push_back(P); } } } -/// DetermineInsertionPoint - At this point, we're committed to promoting the -/// alloca using IDF's, and the standard SSA construction algorithm. Determine -/// which blocks need phi nodes and see if we can optimize out some work by -/// avoiding insertion of dead phi nodes. +/// At this point, we're committed to promoting the alloca using IDF's, and the +/// standard SSA construction algorithm. Determine which blocks need phi nodes +/// and see if we can optimize out some work by avoiding insertion of dead phi +/// nodes. void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, AllocaInfo &Info) { // Unique the set of defining blocks for efficient lookup. - SmallPtrSet<BasicBlock*, 32> DefBlocks; + SmallPtrSet<BasicBlock *, 32> DefBlocks; DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end()); // Determine which blocks the value is live in. These are blocks which lead // to uses. - SmallPtrSet<BasicBlock*, 32> LiveInBlocks; + SmallPtrSet<BasicBlock *, 32> LiveInBlocks; ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks); // Use a priority queue keyed on dominator tree level so that inserted nodes // are handled from the bottom of the dominator tree upwards. + typedef std::pair<DomTreeNode *, unsigned> DomTreeNodePair; typedef std::priority_queue<DomTreeNodePair, SmallVector<DomTreeNodePair, 32>, - DomTreeNodeCompare> IDFPriorityQueue; + less_second> IDFPriorityQueue; IDFPriorityQueue PQ; - for (SmallPtrSet<BasicBlock*, 32>::const_iterator I = DefBlocks.begin(), - E = DefBlocks.end(); I != E; ++I) { + for (SmallPtrSet<BasicBlock *, 32>::const_iterator I = DefBlocks.begin(), + E = DefBlocks.end(); + I != E; ++I) { if (DomTreeNode *Node = DT.getNode(*I)) PQ.push(std::make_pair(Node, DomLevels[Node])); } - SmallVector<std::pair<unsigned, BasicBlock*>, 32> DFBlocks; - SmallPtrSet<DomTreeNode*, 32> Visited; - SmallVector<DomTreeNode*, 32> Worklist; + SmallVector<std::pair<unsigned, BasicBlock *>, 32> DFBlocks; + SmallPtrSet<DomTreeNode *, 32> Visited; + SmallVector<DomTreeNode *, 32> Worklist; while (!PQ.empty()) { DomTreeNodePair RootPair = PQ.top(); PQ.pop(); @@ -836,179 +929,22 @@ void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, QueuePhiNode(DFBlocks[i].second, AllocaNum, CurrentVersion); } -/// RewriteSingleStoreAlloca - If there is only a single store to this value, -/// replace any loads of it that are directly dominated by the definition with -/// the value stored. -void PromoteMem2Reg::RewriteSingleStoreAlloca(AllocaInst *AI, - AllocaInfo &Info, - LargeBlockInfo &LBI) { - StoreInst *OnlyStore = Info.OnlyStore; - bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0)); - BasicBlock *StoreBB = OnlyStore->getParent(); - int StoreIndex = -1; - - // Clear out UsingBlocks. We will reconstruct it here if needed. - Info.UsingBlocks.clear(); - - for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E; ) { - Instruction *UserInst = cast<Instruction>(*UI++); - if (!isa<LoadInst>(UserInst)) { - assert(UserInst == OnlyStore && "Should only have load/stores"); - continue; - } - LoadInst *LI = cast<LoadInst>(UserInst); - - // Okay, if we have a load from the alloca, we want to replace it with the - // only value stored to the alloca. We can do this if the value is - // dominated by the store. If not, we use the rest of the mem2reg machinery - // to insert the phi nodes as needed. - if (!StoringGlobalVal) { // Non-instructions are always dominated. - if (LI->getParent() == StoreBB) { - // If we have a use that is in the same block as the store, compare the - // indices of the two instructions to see which one came first. If the - // load came before the store, we can't handle it. - if (StoreIndex == -1) - StoreIndex = LBI.getInstructionIndex(OnlyStore); - - if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) { - // Can't handle this load, bail out. - Info.UsingBlocks.push_back(StoreBB); - continue; - } - - } else if (LI->getParent() != StoreBB && - !dominates(StoreBB, LI->getParent())) { - // If the load and store are in different blocks, use BB dominance to - // check their relationships. If the store doesn't dom the use, bail - // out. - Info.UsingBlocks.push_back(LI->getParent()); - continue; - } - } - - // Otherwise, we *can* safely rewrite this load. - Value *ReplVal = OnlyStore->getOperand(0); - // If the replacement value is the load, this must occur in unreachable - // code. - if (ReplVal == LI) - ReplVal = UndefValue::get(LI->getType()); - LI->replaceAllUsesWith(ReplVal); - if (AST && LI->getType()->isPointerTy()) - AST->deleteValue(LI); - LI->eraseFromParent(); - LBI.deleteValue(LI); - } -} - -namespace { - -/// StoreIndexSearchPredicate - This is a helper predicate used to search by the -/// first element of a pair. -struct StoreIndexSearchPredicate { - bool operator()(const std::pair<unsigned, StoreInst*> &LHS, - const std::pair<unsigned, StoreInst*> &RHS) { - return LHS.first < RHS.first; - } -}; - -} - -/// PromoteSingleBlockAlloca - Many allocas are only used within a single basic -/// block. If this is the case, avoid traversing the CFG and inserting a lot of -/// potentially useless PHI nodes by just performing a single linear pass over -/// the basic block using the Alloca. -/// -/// If we cannot promote this alloca (because it is read before it is written), -/// return true. This is necessary in cases where, due to control flow, the -/// alloca is potentially undefined on some control flow paths. e.g. code like -/// this is potentially correct: -/// -/// for (...) { if (c) { A = undef; undef = B; } } -/// -/// ... so long as A is not used before undef is set. +/// \brief Queue a phi-node to be added to a basic-block for a specific Alloca. /// -void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info, - LargeBlockInfo &LBI) { - // The trickiest case to handle is when we have large blocks. Because of this, - // this code is optimized assuming that large blocks happen. This does not - // significantly pessimize the small block case. This uses LargeBlockInfo to - // make it efficient to get the index of various operations in the block. - - // Clear out UsingBlocks. We will reconstruct it here if needed. - Info.UsingBlocks.clear(); - - // Walk the use-def list of the alloca, getting the locations of all stores. - typedef SmallVector<std::pair<unsigned, StoreInst*>, 64> StoresByIndexTy; - StoresByIndexTy StoresByIndex; - - for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); - UI != E; ++UI) - if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) - StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI)); - - // If there are no stores to the alloca, just replace any loads with undef. - if (StoresByIndex.empty()) { - for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) - if (LoadInst *LI = dyn_cast<LoadInst>(*UI++)) { - LI->replaceAllUsesWith(UndefValue::get(LI->getType())); - if (AST && LI->getType()->isPointerTy()) - AST->deleteValue(LI); - LBI.deleteValue(LI); - LI->eraseFromParent(); - } - return; - } - - // Sort the stores by their index, making it efficient to do a lookup with a - // binary search. - std::sort(StoresByIndex.begin(), StoresByIndex.end()); - - // Walk all of the loads from this alloca, replacing them with the nearest - // store above them, if any. - for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) { - LoadInst *LI = dyn_cast<LoadInst>(*UI++); - if (!LI) continue; - - unsigned LoadIdx = LBI.getInstructionIndex(LI); - - // Find the nearest store that has a lower than this load. - StoresByIndexTy::iterator I = - std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(), - std::pair<unsigned, StoreInst*>(LoadIdx, static_cast<StoreInst*>(0)), - StoreIndexSearchPredicate()); - - // If there is no store before this load, then we can't promote this load. - if (I == StoresByIndex.begin()) { - // Can't handle this load, bail out. - Info.UsingBlocks.push_back(LI->getParent()); - continue; - } - - // Otherwise, there was a store before this load, the load takes its value. - --I; - LI->replaceAllUsesWith(I->second->getOperand(0)); - if (AST && LI->getType()->isPointerTy()) - AST->deleteValue(LI); - LI->eraseFromParent(); - LBI.deleteValue(LI); - } -} - -// QueuePhiNode - queues a phi-node to be added to a basic-block for a specific -// Alloca returns true if there wasn't already a phi-node for that variable -// +/// Returns true if there wasn't already a phi-node for that variable bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo, unsigned &Version) { // Look up the basic-block in question. PHINode *&PN = NewPhiNodes[std::make_pair(BBNumbers[BB], AllocaNo)]; // If the BB already has a phi node added for the i'th alloca then we're done! - if (PN) return false; + if (PN) + return false; // Create a PhiNode using the dereferenced type... and add the phi-node to the // BasicBlock. PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), getNumPreds(BB), - Allocas[AllocaNo]->getName() + "." + Twine(Version++), + Allocas[AllocaNo]->getName() + "." + Twine(Version++), BB->begin()); ++NumPHIInsert; PhiToAllocaMap[PN] = AllocaNo; @@ -1019,10 +955,11 @@ bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo, return true; } -// RenamePass - Recursively traverse the CFG of the function, renaming loads and -// stores to the allocas which we are promoting. IncomingVals indicates what -// value each Alloca contains on exit from the predecessor block Pred. -// +/// \brief Recursively traverse the CFG of the function, renaming loads and +/// stores to the allocas which we are promoting. +/// +/// IncomingVals indicates what value each Alloca contains on exit from the +/// predecessor block Pred. void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred, RenamePassData::ValVector &IncomingVals, std::vector<RenamePassData> &Worklist) { @@ -1040,48 +977,49 @@ NextIteration: // inserted by this pass of mem2reg will have the same number of incoming // operands so far. Remember this count. unsigned NewPHINumOperands = APN->getNumOperands(); - - unsigned NumEdges = 0; - for (succ_iterator I = succ_begin(Pred), E = succ_end(Pred); I != E; ++I) - if (*I == BB) - ++NumEdges; + + unsigned NumEdges = std::count(succ_begin(Pred), succ_end(Pred), BB); assert(NumEdges && "Must be at least one edge from Pred to BB!"); - + // Add entries for all the phis. BasicBlock::iterator PNI = BB->begin(); do { unsigned AllocaNo = PhiToAllocaMap[APN]; - + // Add N incoming values to the PHI node. for (unsigned i = 0; i != NumEdges; ++i) APN->addIncoming(IncomingVals[AllocaNo], Pred); - + // The currently active variable for this block is now the PHI. IncomingVals[AllocaNo] = APN; - + // Get the next phi node. ++PNI; APN = dyn_cast<PHINode>(PNI); - if (APN == 0) break; - + if (APN == 0) + break; + // Verify that it is missing entries. If not, it is not being inserted // by this mem2reg invocation so we want to ignore it. } while (APN->getNumOperands() == NewPHINumOperands); } } - + // Don't revisit blocks. - if (!Visited.insert(BB)) return; + if (!Visited.insert(BB)) + return; - for (BasicBlock::iterator II = BB->begin(); !isa<TerminatorInst>(II); ) { + for (BasicBlock::iterator II = BB->begin(); !isa<TerminatorInst>(II);) { Instruction *I = II++; // get the instruction, increment iterator if (LoadInst *LI = dyn_cast<LoadInst>(I)) { AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand()); - if (!Src) continue; - - DenseMap<AllocaInst*, unsigned>::iterator AI = AllocaLookup.find(Src); - if (AI == AllocaLookup.end()) continue; + if (!Src) + continue; + + DenseMap<AllocaInst *, unsigned>::iterator AI = AllocaLookup.find(Src); + if (AI == AllocaLookup.end()) + continue; Value *V = IncomingVals[AI->second]; @@ -1094,30 +1032,29 @@ NextIteration: // Delete this instruction and mark the name as the current holder of the // value AllocaInst *Dest = dyn_cast<AllocaInst>(SI->getPointerOperand()); - if (!Dest) continue; - + if (!Dest) + continue; + DenseMap<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest); if (ai == AllocaLookup.end()) continue; - + // what value were we writing? IncomingVals[ai->second] = SI->getOperand(0); // Record debuginfo for the store before removing it. - if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second]) { - if (!DIB) - DIB = new DIBuilder(*SI->getParent()->getParent()->getParent()); - ConvertDebugDeclareToDebugValue(DDI, SI, *DIB); - } + if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second]) + ConvertDebugDeclareToDebugValue(DDI, SI, DIB); BB->getInstList().erase(SI); } } // 'Recurse' to our successors. succ_iterator I = succ_begin(BB), E = succ_end(BB); - if (I == E) return; + if (I == E) + return; // Keep track of the successors so we don't visit the same successor twice - SmallPtrSet<BasicBlock*, 8> VisitedSuccs; + SmallPtrSet<BasicBlock *, 8> VisitedSuccs; // Handle the first successor without using the worklist. VisitedSuccs.insert(*I); @@ -1132,18 +1069,11 @@ NextIteration: goto NextIteration; } -/// PromoteMemToReg - Promote the specified list of alloca instructions into -/// scalar registers, inserting PHI nodes as appropriate. This function does -/// not modify the CFG of the function at all. All allocas must be from the -/// same function. -/// -/// If AST is specified, the specified tracker is updated to reflect changes -/// made to the IR. -/// -void llvm::PromoteMemToReg(const std::vector<AllocaInst*> &Allocas, - DominatorTree &DT, AliasSetTracker *AST) { +void llvm::PromoteMemToReg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT, + AliasSetTracker *AST) { // If there is nothing to do, bail out... - if (Allocas.empty()) return; + if (Allocas.empty()) + return; PromoteMem2Reg(Allocas, DT, AST).run(); } diff --git a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp index 9d90fbe..30adbfa 100644 --- a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp @@ -42,8 +42,6 @@ SSAUpdater::~SSAUpdater() { delete static_cast<AvailableValsTy*>(AV); } -/// Initialize - Reset this object to get ready for a new set of SSA -/// updates with type 'Ty'. PHI nodes get a name based on 'Name'. void SSAUpdater::Initialize(Type *Ty, StringRef Name) { if (AV == 0) AV = new AvailableValsTy(); @@ -53,14 +51,10 @@ void SSAUpdater::Initialize(Type *Ty, StringRef Name) { ProtoName = Name; } -/// HasValueForBlock - Return true if the SSAUpdater already has a value for -/// the specified block. bool SSAUpdater::HasValueForBlock(BasicBlock *BB) const { return getAvailableVals(AV).count(BB); } -/// AddAvailableValue - Indicate that a rewritten value is available in the -/// specified block with the specified value. void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) { assert(ProtoType != 0 && "Need to initialize SSAUpdater"); assert(ProtoType == V->getType() && @@ -68,10 +62,8 @@ void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) { getAvailableVals(AV)[BB] = V; } -/// IsEquivalentPHI - Check if PHI has the same incoming value as specified -/// in ValueMapping for each predecessor block. static bool IsEquivalentPHI(PHINode *PHI, - DenseMap<BasicBlock*, Value*> &ValueMapping) { + SmallDenseMap<BasicBlock*, Value*, 8> &ValueMapping) { unsigned PHINumValues = PHI->getNumIncomingValues(); if (PHINumValues != ValueMapping.size()) return false; @@ -86,32 +78,11 @@ static bool IsEquivalentPHI(PHINode *PHI, return true; } -/// GetValueAtEndOfBlock - Construct SSA form, materializing a value that is -/// live at the end of the specified block. Value *SSAUpdater::GetValueAtEndOfBlock(BasicBlock *BB) { Value *Res = GetValueAtEndOfBlockInternal(BB); return Res; } -/// GetValueInMiddleOfBlock - Construct SSA form, materializing a value that -/// is live in the middle of the specified block. -/// -/// GetValueInMiddleOfBlock is the same as GetValueAtEndOfBlock except in one -/// important case: if there is a definition of the rewritten value after the -/// 'use' in BB. Consider code like this: -/// -/// X1 = ... -/// SomeBB: -/// use(X) -/// X2 = ... -/// br Cond, SomeBB, OutBB -/// -/// In this case, there are two values (X1 and X2) added to the AvailableVals -/// set by the client of the rewriter, and those values are both live out of -/// their respective blocks. However, the use of X happens in the *middle* of -/// a block. Because of this, we need to insert a new PHI node in SomeBB to -/// merge the appropriate values, and this value isn't live out of the block. -/// Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { // If there is no definition of the renamed variable in this block, just use // GetValueAtEndOfBlock to do our work. @@ -165,8 +136,8 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { // Otherwise, we do need a PHI: check to see if we already have one available // in this block that produces the right value. if (isa<PHINode>(BB->begin())) { - DenseMap<BasicBlock*, Value*> ValueMapping(PredValues.begin(), - PredValues.end()); + SmallDenseMap<BasicBlock*, Value*, 8> ValueMapping(PredValues.begin(), + PredValues.end()); PHINode *SomePHI; for (BasicBlock::iterator It = BB->begin(); (SomePHI = dyn_cast<PHINode>(It)); ++It) { @@ -203,8 +174,6 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { return InsertedPHI; } -/// RewriteUse - Rewrite a use of the symbolic value. This handles PHI nodes, -/// which use their value in the corresponding predecessor. void SSAUpdater::RewriteUse(Use &U) { Instruction *User = cast<Instruction>(U.getUser()); @@ -222,10 +191,6 @@ void SSAUpdater::RewriteUse(Use &U) { U.set(V); } -/// RewriteUseAfterInsertions - Rewrite a use, just like RewriteUse. However, -/// this version of the method can rewrite uses in the same block as a -/// definition, because it assumes that all uses of a value are below any -/// inserted values. void SSAUpdater::RewriteUseAfterInsertions(Use &U) { Instruction *User = cast<Instruction>(U.getUser()); @@ -238,8 +203,6 @@ void SSAUpdater::RewriteUseAfterInsertions(Use &U) { U.set(V); } -/// SSAUpdaterTraits<SSAUpdater> - Traits for the SSAUpdaterImpl template, -/// specialized for SSAUpdater. namespace llvm { template<> class SSAUpdaterTraits<SSAUpdater> { @@ -342,10 +305,9 @@ public: } // End llvm namespace -/// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry -/// for the specified BB and if so, return it. If not, construct SSA form by -/// first calculating the required placement of PHIs and then inserting new -/// PHIs where needed. +/// Check to see if AvailableVals has an entry for the specified BB and if so, +/// return it. If not, construct SSA form by first calculating the required +/// placement of PHIs and then inserting new PHIs where needed. Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) { AvailableValsTy &AvailableVals = getAvailableVals(AV); if (Value *V = AvailableVals[BB]) diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 052ad85..ff50b12 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" @@ -40,12 +41,14 @@ #include "llvm/Support/ConstantRange.h" #include "llvm/Support/Debug.h" #include "llvm/Support/NoFolder.h" +#include "llvm/Support/PatternMatch.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include <algorithm> #include <map> #include <set> using namespace llvm; +using namespace PatternMatch; static cl::opt<unsigned> PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(1), @@ -88,7 +91,6 @@ namespace { class SimplifyCFGOpt { const TargetTransformInfo &TTI; const DataLayout *const TD; - Value *isValueEqualityComparison(TerminatorInst *TI); BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI, std::vector<ValueEqualityComparisonCase> &Cases); @@ -194,94 +196,7 @@ static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, PN->addIncoming(PN->getIncomingValueForBlock(ExistPred), NewPred); } - -/// GetIfCondition - Given a basic block (BB) with two predecessors (and at -/// least one PHI node in it), check to see if the merge at this block is due -/// to an "if condition". If so, return the boolean condition that determines -/// which entry into BB will be taken. Also, return by references the block -/// that will be entered from if the condition is true, and the block that will -/// be entered if the condition is false. -/// -/// This does no checking to see if the true/false blocks have large or unsavory -/// instructions in them. -static Value *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, - BasicBlock *&IfFalse) { - PHINode *SomePHI = cast<PHINode>(BB->begin()); - assert(SomePHI->getNumIncomingValues() == 2 && - "Function can only handle blocks with 2 predecessors!"); - BasicBlock *Pred1 = SomePHI->getIncomingBlock(0); - BasicBlock *Pred2 = SomePHI->getIncomingBlock(1); - - // We can only handle branches. Other control flow will be lowered to - // branches if possible anyway. - BranchInst *Pred1Br = dyn_cast<BranchInst>(Pred1->getTerminator()); - BranchInst *Pred2Br = dyn_cast<BranchInst>(Pred2->getTerminator()); - if (Pred1Br == 0 || Pred2Br == 0) - return 0; - - // Eliminate code duplication by ensuring that Pred1Br is conditional if - // either are. - if (Pred2Br->isConditional()) { - // If both branches are conditional, we don't have an "if statement". In - // reality, we could transform this case, but since the condition will be - // required anyway, we stand no chance of eliminating it, so the xform is - // probably not profitable. - if (Pred1Br->isConditional()) - return 0; - - std::swap(Pred1, Pred2); - std::swap(Pred1Br, Pred2Br); - } - - if (Pred1Br->isConditional()) { - // The only thing we have to watch out for here is to make sure that Pred2 - // doesn't have incoming edges from other blocks. If it does, the condition - // doesn't dominate BB. - if (Pred2->getSinglePredecessor() == 0) - return 0; - - // If we found a conditional branch predecessor, make sure that it branches - // to BB and Pred2Br. If it doesn't, this isn't an "if statement". - if (Pred1Br->getSuccessor(0) == BB && - Pred1Br->getSuccessor(1) == Pred2) { - IfTrue = Pred1; - IfFalse = Pred2; - } else if (Pred1Br->getSuccessor(0) == Pred2 && - Pred1Br->getSuccessor(1) == BB) { - IfTrue = Pred2; - IfFalse = Pred1; - } else { - // We know that one arm of the conditional goes to BB, so the other must - // go somewhere unrelated, and this must not be an "if statement". - return 0; - } - - return Pred1Br->getCondition(); - } - - // Ok, if we got here, both predecessors end with an unconditional branch to - // BB. Don't panic! If both blocks only have a single (identical) - // predecessor, and THAT is a conditional branch, then we're all ok! - BasicBlock *CommonPred = Pred1->getSinglePredecessor(); - if (CommonPred == 0 || CommonPred != Pred2->getSinglePredecessor()) - return 0; - - // Otherwise, if this is a conditional branch, then we can use it! - BranchInst *BI = dyn_cast<BranchInst>(CommonPred->getTerminator()); - if (BI == 0) return 0; - - assert(BI->isConditional() && "Two successors but not conditional?"); - if (BI->getSuccessor(0) == Pred1) { - IfTrue = Pred1; - IfFalse = Pred2; - } else { - IfTrue = Pred2; - IfFalse = Pred1; - } - return BI->getCondition(); -} - -/// ComputeSpeculuationCost - Compute an abstract "cost" of speculating the +/// ComputeSpeculationCost - Compute an abstract "cost" of speculating the /// given instruction, which is assumed to be safe to speculate. 1 means /// cheap, 2 means less cheap, and UINT_MAX means prohibitively expensive. static unsigned ComputeSpeculationCost(const User *I) { @@ -432,7 +347,24 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra, // If this is an icmp against a constant, handle this as one of the cases. if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) { if (ConstantInt *C = GetConstantInt(I->getOperand(1), TD)) { + Value *RHSVal; + ConstantInt *RHSC; + if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ:ICmpInst::ICMP_NE)) { + // (x & ~2^x) == y --> x == y || x == y|2^x + // This undoes a transformation done by instcombine to fuse 2 compares. + if (match(ICI->getOperand(0), + m_And(m_Value(RHSVal), m_ConstantInt(RHSC)))) { + APInt Not = ~RHSC->getValue(); + if (Not.isPowerOf2()) { + Vals.push_back(C); + Vals.push_back( + ConstantInt::get(C->getContext(), C->getValue() | Not)); + UsedICmps++; + return RHSVal; + } + } + UsedICmps++; Vals.push_back(C); return I->getOperand(0); @@ -443,6 +375,13 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra, ConstantRange Span = ConstantRange::makeICmpRegion(ICI->getPredicate(), C->getValue()); + // Shift the range if the compare is fed by an add. This is the range + // compare idiom as emitted by instcombine. + bool hasAdd = + match(I->getOperand(0), m_Add(m_Value(RHSVal), m_ConstantInt(RHSC))); + if (hasAdd) + Span = Span.subtract(RHSC->getValue()); + // If this is an and/!= check then we want to optimize "x ugt 2" into // x != 0 && x != 1. if (!isEQ) @@ -455,7 +394,7 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra, for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp) Vals.push_back(ConstantInt::get(V->getContext(), Tmp)); UsedICmps++; - return I->getOperand(0); + return hasAdd ? RHSVal : I->getOperand(0); } return 0; } @@ -533,15 +472,17 @@ Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) { } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) if (BI->isConditional() && BI->getCondition()->hasOneUse()) if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) - if ((ICI->getPredicate() == ICmpInst::ICMP_EQ || - ICI->getPredicate() == ICmpInst::ICMP_NE) && - GetConstantInt(ICI->getOperand(1), TD)) + if (ICI->isEquality() && GetConstantInt(ICI->getOperand(1), TD)) CV = ICI->getOperand(0); // Unwrap any lossless ptrtoint cast. - if (TD && CV && CV->getType() == TD->getIntPtrType(CV->getContext())) - if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) - CV = PTII->getOperand(0); + if (TD && CV) { + if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) { + Value *Ptr = PTII->getPointerOperand(); + if (PTII->getType() == TD->getIntPtrType(Ptr->getType())) + CV = Ptr; + } + } return CV; } @@ -763,9 +704,10 @@ namespace { }; } -static int ConstantIntSortPredicate(const void *P1, const void *P2) { - const ConstantInt *LHS = *(const ConstantInt*const*)P1; - const ConstantInt *RHS = *(const ConstantInt*const*)P2; +static int ConstantIntSortPredicate(ConstantInt *const *P1, + ConstantInt *const *P2) { + const ConstantInt *LHS = *P1; + const ConstantInt *RHS = *P2; if (LHS->getValue().ult(RHS->getValue())) return 1; if (LHS->getValue() == RHS->getValue()) @@ -988,7 +930,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, // Convert pointer to int before we switch. if (CV->getType()->isPointerTy()) { assert(TD && "Cannot switch on pointer without DataLayout"); - CV = Builder.CreatePtrToInt(CV, TD->getIntPtrType(CV->getContext()), + CV = Builder.CreatePtrToInt(CV, TD->getIntPtrType(CV->getType()), "magicptr"); } @@ -1083,9 +1025,9 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) { (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2))) return false; - // If we get here, we can hoist at least one instruction. BasicBlock *BIParent = BI->getParent(); + bool Changed = false; do { // If we are hoisting the terminator instruction, don't move one (making a // broken BB), instead clone it, and remove BI. @@ -1100,6 +1042,7 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) { I2->replaceAllUsesWith(I1); I1->intersectOptionalDataWith(I2); I2->eraseFromParent(); + Changed = true; I1 = BB1_Itr++; I2 = BB2_Itr++; @@ -1119,7 +1062,23 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) { HoistTerminator: // It may not be possible to hoist an invoke. if (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)) - return true; + return Changed; + + for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI) { + PHINode *PN; + for (BasicBlock::iterator BBI = SI->begin(); + (PN = dyn_cast<PHINode>(BBI)); ++BBI) { + Value *BB1V = PN->getIncomingValueForBlock(BB1); + Value *BB2V = PN->getIncomingValueForBlock(BB2); + if (BB1V == BB2V) + continue; + + if (isa<ConstantExpr>(BB1V) && !isSafeToSpeculativelyExecute(BB1V)) + return Changed; + if (isa<ConstantExpr>(BB2V) && !isSafeToSpeculativelyExecute(BB2V)) + return Changed; + } + } // Okay, it is safe to hoist the terminator. Instruction *NT = I1->clone(); @@ -1362,8 +1321,8 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) { /// /// \return The pointer to the value of the previous store if the store can be /// hoisted into the predecessor block. 0 otherwise. -Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, - BasicBlock *StoreBB, BasicBlock *EndBB) { +static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, + BasicBlock *StoreBB, BasicBlock *EndBB) { StoreInst *StoreToHoist = dyn_cast<StoreInst>(I); if (!StoreToHoist) return 0; @@ -1522,18 +1481,23 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) { Value *OrigV = PN->getIncomingValueForBlock(BB); Value *ThenV = PN->getIncomingValueForBlock(ThenBB); + // FIXME: Try to remove some of the duplication with HoistThenElseCodeToIf. // Skip PHIs which are trivial. if (ThenV == OrigV) continue; HaveRewritablePHIs = true; - ConstantExpr *CE = dyn_cast<ConstantExpr>(ThenV); - if (!CE) + ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV); + ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV); + if (!OrigCE && !ThenCE) continue; // Known safe and cheap. - if (!isSafeToSpeculativelyExecute(CE)) + if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE)) || + (OrigCE && !isSafeToSpeculativelyExecute(OrigCE))) return false; - if (ComputeSpeculationCost(CE) > PHINodeFoldingThreshold) + unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE) : 0; + unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE) : 0; + if (OrigCost + ThenCost > 2 * PHINodeFoldingThreshold) return false; // Account for the cost of an unfolded ConstantExpr which could end up @@ -1598,6 +1562,19 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) { return true; } +/// \returns True if this block contains a CallInst with the NoDuplicate +/// attribute. +static bool HasNoDuplicateCall(const BasicBlock *BB) { + for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + const CallInst *CI = dyn_cast<CallInst>(I); + if (!CI) + continue; + if (CI->cannotDuplicate()) + return true; + } + return false; +} + /// BlockIsSimpleEnoughToThreadThrough - Return true if we can thread a branch /// across this block. static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) { @@ -1645,6 +1622,8 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout *TD) { // Now we know that this block has multiple preds and two succs. if (!BlockIsSimpleEnoughToThreadThrough(BB)) return false; + if (HasNoDuplicateCall(BB)) return false; + // Okay, this is a simple enough basic block. See if any phi values are // constants. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { @@ -2111,14 +2090,19 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { // Ensure that any values used in the bonus instruction are also used // by the terminator of the predecessor. This means that those values // must already have been resolved, so we won't be inhibiting the - // out-of-order core by speculating them earlier. - if (BonusInst) { + // out-of-order core by speculating them earlier. We also allow + // instructions that are used by the terminator's condition because it + // exposes more merging opportunities. + bool UsedByBranch = (BonusInst && BonusInst->hasOneUse() && + *BonusInst->use_begin() == Cond); + + if (BonusInst && !UsedByBranch) { // Collect the values used by the bonus inst SmallPtrSet<Value*, 4> UsedValues; for (Instruction::op_iterator OI = BonusInst->op_begin(), OE = BonusInst->op_end(); OI != OE; ++OI) { Value *V = *OI; - if (!isa<Constant>(V)) + if (!isa<Constant>(V) && !isa<Argument>(V)) UsedValues.insert(V); } @@ -2829,7 +2813,7 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const DataLayout *TD, if (CompVal->getType()->isPointerTy()) { assert(TD && "Cannot switch on pointer without DataLayout"); CompVal = Builder.CreatePtrToInt(CompVal, - TD->getIntPtrType(CompVal->getContext()), + TD->getIntPtrType(CompVal->getType()), "magicptr"); } @@ -3202,7 +3186,7 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) { /// and use it to remove dead cases. static bool EliminateDeadSwitchCases(SwitchInst *SI) { Value *Cond = SI->getCondition(); - unsigned Bits = cast<IntegerType>(Cond->getType())->getBitWidth(); + unsigned Bits = Cond->getType()->getIntegerBitWidth(); APInt KnownZero(Bits, 0), KnownOne(Bits, 0); ComputeMaskedBits(Cond, KnownZero, KnownOne); @@ -3307,7 +3291,7 @@ static bool ForwardSwitchConditionToPHI(SwitchInst *SI) { for (ForwardingNodesMap::iterator I = ForwardingNodes.begin(), E = ForwardingNodes.end(); I != E; ++I) { PHINode *Phi = I->first; - SmallVector<int,4> &Indexes = I->second; + SmallVectorImpl<int> &Indexes = I->second; if (Indexes.size() < 2) continue; @@ -3345,28 +3329,10 @@ static Constant *LookupConstant(Value *V, /// simple instructions such as binary operations where both operands are /// constant or can be replaced by constants from the ConstantPool. Returns the /// resulting constant on success, 0 otherwise. -static Constant *ConstantFold(Instruction *I, - const SmallDenseMap<Value*, Constant*>& ConstantPool) { - if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) { - Constant *A = LookupConstant(BO->getOperand(0), ConstantPool); - if (!A) - return 0; - Constant *B = LookupConstant(BO->getOperand(1), ConstantPool); - if (!B) - return 0; - return ConstantExpr::get(BO->getOpcode(), A, B); - } - - if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) { - Constant *A = LookupConstant(I->getOperand(0), ConstantPool); - if (!A) - return 0; - Constant *B = LookupConstant(I->getOperand(1), ConstantPool); - if (!B) - return 0; - return ConstantExpr::getCompare(Cmp->getPredicate(), A, B); - } - +static Constant * +ConstantFold(Instruction *I, + const SmallDenseMap<Value *, Constant *> &ConstantPool, + const DataLayout *DL) { if (SelectInst *Select = dyn_cast<SelectInst>(I)) { Constant *A = LookupConstant(Select->getCondition(), ConstantPool); if (!A) @@ -3378,25 +3344,32 @@ static Constant *ConstantFold(Instruction *I, return 0; } - if (CastInst *Cast = dyn_cast<CastInst>(I)) { - Constant *A = LookupConstant(I->getOperand(0), ConstantPool); - if (!A) + SmallVector<Constant *, 4> COps; + for (unsigned N = 0, E = I->getNumOperands(); N != E; ++N) { + if (Constant *A = LookupConstant(I->getOperand(N), ConstantPool)) + COps.push_back(A); + else return 0; - return ConstantExpr::getCast(Cast->getOpcode(), A, Cast->getDestTy()); } - return 0; + if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) + return ConstantFoldCompareInstOperands(Cmp->getPredicate(), COps[0], + COps[1], DL); + + return ConstantFoldInstOperands(I->getOpcode(), I->getType(), COps, DL); } /// GetCaseResults - Try to determine the resulting constant values in phi nodes /// at the common destination basic block, *CommonDest, for one of the case /// destionations CaseDest corresponding to value CaseVal (0 for the default /// case), of a switch instruction SI. -static bool GetCaseResults(SwitchInst *SI, - ConstantInt *CaseVal, - BasicBlock *CaseDest, - BasicBlock **CommonDest, - SmallVector<std::pair<PHINode*,Constant*>, 4> &Res) { +static bool +GetCaseResults(SwitchInst *SI, + ConstantInt *CaseVal, + BasicBlock *CaseDest, + BasicBlock **CommonDest, + SmallVectorImpl<std::pair<PHINode *, Constant *> > &Res, + const DataLayout *DL) { // The block from which we enter the common destination. BasicBlock *Pred = SI->getParent(); @@ -3415,7 +3388,7 @@ static bool GetCaseResults(SwitchInst *SI, } else if (isa<DbgInfoIntrinsic>(I)) { // Skip debug intrinsic. continue; - } else if (Constant *C = ConstantFold(I, ConstantPool)) { + } else if (Constant *C = ConstantFold(I, ConstantPool, DL)) { // Instruction is side-effect free and constant. ConstantPool.insert(std::make_pair(I, C)); } else { @@ -3469,7 +3442,7 @@ namespace { SwitchLookupTable(Module &M, uint64_t TableSize, ConstantInt *Offset, - const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Values, + const SmallVectorImpl<std::pair<ConstantInt*, Constant*> >& Values, Constant *DefaultValue, const DataLayout *TD); @@ -3516,7 +3489,7 @@ namespace { SwitchLookupTable::SwitchLookupTable(Module &M, uint64_t TableSize, ConstantInt *Offset, - const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Values, + const SmallVectorImpl<std::pair<ConstantInt*, Constant*> >& Values, Constant *DefaultValue, const DataLayout *TD) : SingleValue(0), BitMap(0), BitMapElementTy(0), Array(0) { @@ -3643,7 +3616,7 @@ bool SwitchLookupTable::WouldFitInRegister(const DataLayout *TD, } /// ShouldBuildLookupTable - Determine whether a lookup table should be built -/// for this switch, based on the number of caes, size of the table and the +/// for this switch, based on the number of cases, size of the table and the /// types of the results. static bool ShouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize, @@ -3739,7 +3712,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, typedef SmallVector<std::pair<PHINode*, Constant*>, 4> ResultsTy; ResultsTy Results; if (!GetCaseResults(SI, CaseVal, CI.getCaseSuccessor(), &CommonDest, - Results)) + Results, TD)) return false; // Append the result from this case to the list for each phi. @@ -3753,7 +3726,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, // Get the resulting values for the default case. SmallVector<std::pair<PHINode*, Constant*>, 4> DefaultResultsList; if (!GetCaseResults(SI, 0, SI->getDefaultDest(), &CommonDest, - DefaultResultsList)) + DefaultResultsList, TD)) return false; for (size_t I = 0, E = DefaultResultsList.size(); I != E; ++I) { PHINode *PHI = DefaultResultsList[I].first; @@ -3774,14 +3747,32 @@ static bool SwitchToLookupTable(SwitchInst *SI, CommonDest->getParent(), CommonDest); - // Check whether the condition value is within the case range, and branch to - // the new BB. + // Compute the table index value. Builder.SetInsertPoint(SI); Value *TableIndex = Builder.CreateSub(SI->getCondition(), MinCaseVal, "switch.tableidx"); - Value *Cmp = Builder.CreateICmpULT(TableIndex, ConstantInt::get( - MinCaseVal->getType(), TableSize)); - Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest()); + + // Compute the maximum table size representable by the integer type we are + // switching upon. + unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits(); + uint64_t MaxTableSize = CaseSize > 63? UINT64_MAX : 1ULL << CaseSize; + assert(MaxTableSize >= TableSize && + "It is impossible for a switch to have more entries than the max " + "representable value of its input integer type's size."); + + // If we have a fully covered lookup table, unconditionally branch to the + // lookup table BB. Otherwise, check if the condition value is within the case + // range. If it is so, branch to the new BB. Otherwise branch to SI's default + // destination. + const bool GeneratingCoveredLookupTable = MaxTableSize == TableSize; + if (GeneratingCoveredLookupTable) { + Builder.CreateBr(LookupBB); + SI->getDefaultDest()->removePredecessor(SI->getParent()); + } else { + Value *Cmp = Builder.CreateICmpULT(TableIndex, ConstantInt::get( + MinCaseVal->getType(), TableSize)); + Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest()); + } // Populate the BB that does the lookups. Builder.SetInsertPoint(LookupBB); @@ -3810,9 +3801,11 @@ static bool SwitchToLookupTable(SwitchInst *SI, Builder.CreateBr(CommonDest); // Remove the switch. - for (unsigned i = 0; i < SI->getNumSuccessors(); ++i) { + for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) { BasicBlock *Succ = SI->getSuccessor(i); - if (Succ == SI->getDefaultDest()) continue; + + if (Succ == SI->getDefaultDest()) + continue; Succ->removePredecessor(SI->getParent()); } SI->eraseFromParent(); diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp index 41c207c..bf3442a 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -119,7 +119,7 @@ Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand) return 0; D = ConstantInt::get(UseInst->getContext(), - APInt(BitWidth, 1).shl(D->getZExtValue())); + APInt::getOneBitSet(BitWidth, D->getZExtValue())); } FoldedExpr = SE->getUDivExpr(SE->getSCEV(IVSrc), SE->getSCEV(D)); } diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index 6bea2dd..15b3e66 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -17,6 +17,7 @@ #include "llvm/Transforms/Utils/SimplifyLibCalls.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringMap.h" +#include "llvm/ADT/Triple.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" @@ -26,11 +27,16 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/Support/Allocator.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" using namespace llvm; +static cl::opt<bool> +ColdErrorCalls("error-reporting-is-cold", cl::init(true), + cl::Hidden, cl::desc("Treat error-reporting calls as cold")); + /// This class is the abstract base class for the set of optimizations that /// corresponds to one library call. namespace { @@ -118,6 +124,21 @@ static bool callHasFloatingPointArgument(const CallInst *CI) { return false; } +/// \brief Check whether the overloaded unary floating point function +/// corresponing to \a Ty is available. +static bool hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty, + LibFunc::Func DoubleFn, LibFunc::Func FloatFn, + LibFunc::Func LongDoubleFn) { + switch (Ty->getTypeID()) { + case Type::FloatTyID: + return TLI->has(FloatFn); + case Type::DoubleTyID: + return TLI->has(DoubleFn); + default: + return TLI->has(LongDoubleFn); + } +} + //===----------------------------------------------------------------------===// // Fortified Library Call Optimizations //===----------------------------------------------------------------------===// @@ -477,7 +498,7 @@ struct StrChrOpt : public LibCallOptimization { // Compute the offset, make sure to handle the case when we're searching for // zero (a weird way to spell strlen). - size_t I = CharC->getSExtValue() == 0 ? + size_t I = (0xFF & CharC->getSExtValue()) == 0 ? Str.size() : Str.find(CharC->getSExtValue()); if (I == StringRef::npos) // Didn't find the char. strchr returns null. return Constant::getNullValue(CI->getType()); @@ -513,7 +534,7 @@ struct StrRChrOpt : public LibCallOptimization { } // Compute the offset. - size_t I = CharC->getSExtValue() == 0 ? + size_t I = (0xFF & CharC->getSExtValue()) == 0 ? Str.size() : Str.rfind(CharC->getSExtValue()); if (I == StringRef::npos) // Didn't find the char. Return null. return Constant::getNullValue(CI->getType()); @@ -774,7 +795,7 @@ struct StrPBrkOpt : public LibCallOptimization { // Constant folding. if (HasS1 && HasS2) { size_t I = S1.find_first_of(S2); - if (I == std::string::npos) // No match. + if (I == StringRef::npos) // No match. return Constant::getNullValue(CI->getType()); return B.CreateGEP(CI->getArgOperand(0), B.getInt64(I), "strpbrk"); @@ -912,7 +933,7 @@ struct StrStrOpt : public LibCallOptimization { // If both strings are known, constant fold it. if (HasStr1 && HasStr2) { - std::string::size_type Offset = SearchStr.find(ToFindStr); + size_t Offset = SearchStr.find(ToFindStr); if (Offset == StringRef::npos) // strstr("foo", "bar") -> null return Constant::getNullValue(CI->getType()); @@ -1031,7 +1052,7 @@ struct MemSetOpt : public LibCallOptimization { if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isIntegerTy() || - FT->getParamType(2) != TD->getIntPtrType(*Context)) + FT->getParamType(2) != TD->getIntPtrType(FT->getParamType(0))) return 0; // memset(p, v, n) -> llvm.memset(p, v, n, 1) @@ -1133,9 +1154,13 @@ struct PowOpt : public UnsafeFPLibCallOptimization { Value *Op1 = CI->getArgOperand(0), *Op2 = CI->getArgOperand(1); if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) { - if (Op1C->isExactlyValue(1.0)) // pow(1.0, x) -> 1.0 + // pow(1.0, x) -> 1.0 + if (Op1C->isExactlyValue(1.0)) return Op1C; - if (Op1C->isExactlyValue(2.0)) // pow(2.0, x) -> exp2(x) + // pow(2.0, x) -> exp2(x) + if (Op1C->isExactlyValue(2.0) && + hasUnaryFloatFn(TLI, Op1->getType(), LibFunc::exp2, LibFunc::exp2f, + LibFunc::exp2l)) return EmitUnaryFloatFnCall(Op2, "exp2", B, Callee->getAttributes()); } @@ -1145,7 +1170,11 @@ struct PowOpt : public UnsafeFPLibCallOptimization { if (Op2C->getValueAPF().isZero()) // pow(x, 0.0) -> 1.0 return ConstantFP::get(CI->getType(), 1.0); - if (Op2C->isExactlyValue(0.5)) { + if (Op2C->isExactlyValue(0.5) && + hasUnaryFloatFn(TLI, Op2->getType(), LibFunc::sqrt, LibFunc::sqrtf, + LibFunc::sqrtl) && + hasUnaryFloatFn(TLI, Op2->getType(), LibFunc::fabs, LibFunc::fabsf, + LibFunc::fabsl)) { // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))). // This is faster than calling pow, and still handles negative zero // and negative infinity correctly. @@ -1178,7 +1207,7 @@ struct Exp2Opt : public UnsafeFPLibCallOptimization { virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { Value *Ret = NULL; if (UnsafeFPShrink && Callee->getName() == "exp2" && - TLI->has(LibFunc::exp2)) { + TLI->has(LibFunc::exp2f)) { UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true); Ret = UnsafeUnaryDoubleFP.callOptimizer(Callee, CI, B); } @@ -1229,6 +1258,155 @@ struct Exp2Opt : public UnsafeFPLibCallOptimization { } }; +struct SinCosPiOpt : public LibCallOptimization { + SinCosPiOpt() {} + + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Make sure the prototype is as expected, otherwise the rest of the + // function is probably invalid and likely to abort. + if (!isTrigLibCall(CI)) + return 0; + + Value *Arg = CI->getArgOperand(0); + SmallVector<CallInst *, 1> SinCalls; + SmallVector<CallInst *, 1> CosCalls; + SmallVector<CallInst *, 1> SinCosCalls; + + bool IsFloat = Arg->getType()->isFloatTy(); + + // Look for all compatible sinpi, cospi and sincospi calls with the same + // argument. If there are enough (in some sense) we can make the + // substitution. + for (Value::use_iterator UI = Arg->use_begin(), UE = Arg->use_end(); + UI != UE; ++UI) + classifyArgUse(*UI, CI->getParent(), IsFloat, SinCalls, CosCalls, + SinCosCalls); + + // It's only worthwhile if both sinpi and cospi are actually used. + if (SinCosCalls.empty() && (SinCalls.empty() || CosCalls.empty())) + return 0; + + Value *Sin, *Cos, *SinCos; + insertSinCosCall(B, CI->getCalledFunction(), Arg, IsFloat, Sin, Cos, + SinCos); + + replaceTrigInsts(SinCalls, Sin); + replaceTrigInsts(CosCalls, Cos); + replaceTrigInsts(SinCosCalls, SinCos); + + return 0; + } + + bool isTrigLibCall(CallInst *CI) { + Function *Callee = CI->getCalledFunction(); + FunctionType *FT = Callee->getFunctionType(); + + // We can only hope to do anything useful if we can ignore things like errno + // and floating-point exceptions. + bool AttributesSafe = CI->hasFnAttr(Attribute::NoUnwind) && + CI->hasFnAttr(Attribute::ReadNone); + + // Other than that we need float(float) or double(double) + return AttributesSafe && FT->getNumParams() == 1 && + FT->getReturnType() == FT->getParamType(0) && + (FT->getParamType(0)->isFloatTy() || + FT->getParamType(0)->isDoubleTy()); + } + + void classifyArgUse(Value *Val, BasicBlock *BB, bool IsFloat, + SmallVectorImpl<CallInst *> &SinCalls, + SmallVectorImpl<CallInst *> &CosCalls, + SmallVectorImpl<CallInst *> &SinCosCalls) { + CallInst *CI = dyn_cast<CallInst>(Val); + + if (!CI) + return; + + Function *Callee = CI->getCalledFunction(); + StringRef FuncName = Callee->getName(); + LibFunc::Func Func; + if (!TLI->getLibFunc(FuncName, Func) || !TLI->has(Func) || + !isTrigLibCall(CI)) + return; + + if (IsFloat) { + if (Func == LibFunc::sinpif) + SinCalls.push_back(CI); + else if (Func == LibFunc::cospif) + CosCalls.push_back(CI); + else if (Func == LibFunc::sincospi_stretf) + SinCosCalls.push_back(CI); + } else { + if (Func == LibFunc::sinpi) + SinCalls.push_back(CI); + else if (Func == LibFunc::cospi) + CosCalls.push_back(CI); + else if (Func == LibFunc::sincospi_stret) + SinCosCalls.push_back(CI); + } + } + + void replaceTrigInsts(SmallVectorImpl<CallInst*> &Calls, Value *Res) { + for (SmallVectorImpl<CallInst*>::iterator I = Calls.begin(), + E = Calls.end(); + I != E; ++I) { + LCS->replaceAllUsesWith(*I, Res); + } + } + + void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg, + bool UseFloat, Value *&Sin, Value *&Cos, + Value *&SinCos) { + Type *ArgTy = Arg->getType(); + Type *ResTy; + StringRef Name; + + Triple T(OrigCallee->getParent()->getTargetTriple()); + if (UseFloat) { + Name = "__sincospi_stretf"; + + assert(T.getArch() != Triple::x86 && "x86 messy and unsupported for now"); + // x86_64 can't use {float, float} since that would be returned in both + // xmm0 and xmm1, which isn't what a real struct would do. + ResTy = T.getArch() == Triple::x86_64 + ? static_cast<Type *>(VectorType::get(ArgTy, 2)) + : static_cast<Type *>(StructType::get(ArgTy, ArgTy, NULL)); + } else { + Name = "__sincospi_stret"; + ResTy = StructType::get(ArgTy, ArgTy, NULL); + } + + Module *M = OrigCallee->getParent(); + Value *Callee = M->getOrInsertFunction(Name, OrigCallee->getAttributes(), + ResTy, ArgTy, NULL); + + if (Instruction *ArgInst = dyn_cast<Instruction>(Arg)) { + // If the argument is an instruction, it must dominate all uses so put our + // sincos call there. + BasicBlock::iterator Loc = ArgInst; + B.SetInsertPoint(ArgInst->getParent(), ++Loc); + } else { + // Otherwise (e.g. for a constant) the beginning of the function is as + // good a place as any. + BasicBlock &EntryBB = B.GetInsertBlock()->getParent()->getEntryBlock(); + B.SetInsertPoint(&EntryBB, EntryBB.begin()); + } + + SinCos = B.CreateCall(Callee, Arg, "sincospi"); + + if (SinCos->getType()->isStructTy()) { + Sin = B.CreateExtractValue(SinCos, 0, "sinpi"); + Cos = B.CreateExtractValue(SinCos, 1, "cospi"); + } else { + Sin = B.CreateExtractElement(SinCos, ConstantInt::get(B.getInt32Ty(), 0), + "sinpi"); + Cos = B.CreateExtractElement(SinCos, ConstantInt::get(B.getInt32Ty(), 1), + "cospi"); + } + } + +}; + //===----------------------------------------------------------------------===// // Integer Library Call Optimizations //===----------------------------------------------------------------------===// @@ -1333,6 +1511,54 @@ struct ToAsciiOpt : public LibCallOptimization { // Formatting and IO Library Call Optimizations //===----------------------------------------------------------------------===// +struct ErrorReportingOpt : public LibCallOptimization { + ErrorReportingOpt(int S = -1) : StreamArg(S) {} + + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &) { + // Error reporting calls should be cold, mark them as such. + // This applies even to non-builtin calls: it is only a hint and applies to + // functions that the frontend might not understand as builtins. + + // This heuristic was suggested in: + // Improving Static Branch Prediction in a Compiler + // Brian L. Deitrich, Ben-Chung Cheng, Wen-mei W. Hwu + // Proceedings of PACT'98, Oct. 1998, IEEE + + if (!CI->hasFnAttr(Attribute::Cold) && isReportingError(Callee, CI)) { + CI->addAttribute(AttributeSet::FunctionIndex, Attribute::Cold); + } + + return 0; + } + +protected: + bool isReportingError(Function *Callee, CallInst *CI) { + if (!ColdErrorCalls) + return false; + + if (!Callee || !Callee->isDeclaration()) + return false; + + if (StreamArg < 0) + return true; + + // These functions might be considered cold, but only if their stream + // argument is stderr. + + if (StreamArg >= (int) CI->getNumArgOperands()) + return false; + LoadInst *LI = dyn_cast<LoadInst>(CI->getArgOperand(StreamArg)); + if (!LI) + return false; + GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getPointerOperand()); + if (!GV || !GV->isDeclaration()) + return false; + return GV->getName() == "stderr"; + } + + int StreamArg; +}; + struct PrintFOpt : public LibCallOptimization { Value *optimizeFixedFormatString(Function *Callee, CallInst *CI, IRBuilder<> &B) { @@ -1361,7 +1587,7 @@ struct PrintFOpt : public LibCallOptimization { // printf("foo\n") --> puts("foo") if (FormatStr[FormatStr.size()-1] == '\n' && - FormatStr.find('%') == std::string::npos) { // no format characters. + FormatStr.find('%') == StringRef::npos) { // No format characters. // Create a string literal with no \n on it. We expect the constant merge // pass to be run after this pass, to merge duplicate strings. FormatStr = FormatStr.drop_back(); @@ -1513,6 +1739,9 @@ struct SPrintFOpt : public LibCallOptimization { struct FPrintFOpt : public LibCallOptimization { Value *optimizeFixedFormatString(Function *Callee, CallInst *CI, IRBuilder<> &B) { + ErrorReportingOpt ER(/* StreamArg = */ 0); + (void) ER.callOptimizer(Callee, CI, B); + // All the optimizations depend on the format string. StringRef FormatStr; if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr)) @@ -1590,6 +1819,9 @@ struct FPrintFOpt : public LibCallOptimization { struct FWriteOpt : public LibCallOptimization { virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + ErrorReportingOpt ER(/* StreamArg = */ 3); + (void) ER.callOptimizer(Callee, CI, B); + // Require a pointer, an integer, an integer, a pointer, returning integer. FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 4 || !FT->getParamType(0)->isPointerTy() || @@ -1623,6 +1855,9 @@ struct FWriteOpt : public LibCallOptimization { struct FPutsOpt : public LibCallOptimization { virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + ErrorReportingOpt ER(/* StreamArg = */ 1); + (void) ER.callOptimizer(Callee, CI, B); + // These optimizations require DataLayout. if (!TD) return 0; @@ -1741,6 +1976,7 @@ static MemSetOpt MemSet; // Math library call optimizations. static UnaryDoubleFPOpt UnaryDoubleFP(false); static UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true); +static SinCosPiOpt SinCosPi; // Integer library call optimizations. static FFSOpt FFS; @@ -1750,6 +1986,9 @@ static IsAsciiOpt IsAscii; static ToAsciiOpt ToAscii; // Formatting and IO library call optimizations. +static ErrorReportingOpt ErrorReporting; +static ErrorReportingOpt ErrorReporting0(0); +static ErrorReportingOpt ErrorReporting1(1); static PrintFOpt PrintF; static SPrintFOpt SPrintF; static FPrintFOpt FPrintF; @@ -1825,6 +2064,11 @@ LibCallOptimization *LibCallSimplifierImpl::lookupOptimization(CallInst *CI) { case LibFunc::cos: case LibFunc::cosl: return &Cos; + case LibFunc::sinpif: + case LibFunc::sinpi: + case LibFunc::cospif: + case LibFunc::cospi: + return &SinCosPi; case LibFunc::powf: case LibFunc::pow: case LibFunc::powl: @@ -1859,6 +2103,13 @@ LibCallOptimization *LibCallSimplifierImpl::lookupOptimization(CallInst *CI) { return &FPuts; case LibFunc::puts: return &Puts; + case LibFunc::perror: + return &ErrorReporting; + case LibFunc::vfprintf: + case LibFunc::fiprintf: + return &ErrorReporting0; + case LibFunc::fputc: + return &ErrorReporting1; case LibFunc::ceil: case LibFunc::fabs: case LibFunc::floor: @@ -1940,7 +2191,7 @@ LibCallSimplifier::~LibCallSimplifier() { } Value *LibCallSimplifier::optimizeCall(CallInst *CI) { - if (CI->hasFnAttr(Attribute::NoBuiltin)) return 0; + if (CI->isNoBuiltin()) return 0; return Impl->optimizeCall(CI); } @@ -1950,3 +2201,53 @@ void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) const { } } + +// TODO: +// Additional cases that we need to add to this file: +// +// cbrt: +// * cbrt(expN(X)) -> expN(x/3) +// * cbrt(sqrt(x)) -> pow(x,1/6) +// * cbrt(sqrt(x)) -> pow(x,1/9) +// +// exp, expf, expl: +// * exp(log(x)) -> x +// +// log, logf, logl: +// * log(exp(x)) -> x +// * log(x**y) -> y*log(x) +// * log(exp(y)) -> y*log(e) +// * log(exp2(y)) -> y*log(2) +// * log(exp10(y)) -> y*log(10) +// * log(sqrt(x)) -> 0.5*log(x) +// * log(pow(x,y)) -> y*log(x) +// +// lround, lroundf, lroundl: +// * lround(cnst) -> cnst' +// +// pow, powf, powl: +// * pow(exp(x),y) -> exp(x*y) +// * pow(sqrt(x),y) -> pow(x,y*0.5) +// * pow(pow(x,y),z)-> pow(x,y*z) +// +// round, roundf, roundl: +// * round(cnst) -> cnst' +// +// signbit: +// * signbit(cnst) -> cnst' +// * signbit(nncst) -> 0 (if pstv is a non-negative constant) +// +// sqrt, sqrtf, sqrtl: +// * sqrt(expN(x)) -> expN(x*0.5) +// * sqrt(Nroot(x)) -> pow(x,1/(2*N)) +// * sqrt(pow(x,y)) -> pow(|x|,y*0.5) +// +// strchr: +// * strchr(p, 0) -> strlen(p) +// tan, tanf, tanl: +// * tan(atan(x)) -> x +// +// trunc, truncf, truncl: +// * trunc(cnst) -> cnst' +// +// diff --git a/contrib/llvm/lib/Transforms/Utils/SpecialCaseList.cpp b/contrib/llvm/lib/Transforms/Utils/SpecialCaseList.cpp new file mode 100644 index 0000000..2ef692c --- /dev/null +++ b/contrib/llvm/lib/Transforms/Utils/SpecialCaseList.cpp @@ -0,0 +1,222 @@ +//===-- SpecialCaseList.cpp - special case list for sanitizers ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is a utility class for instrumentation passes (like AddressSanitizer +// or ThreadSanitizer) to avoid instrumenting some functions or global +// variables, or to instrument some functions or global variables in a specific +// way, based on a user-supplied list. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/SpecialCaseList.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Regex.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/system_error.h" +#include <string> +#include <utility> + +namespace llvm { + +/// Represents a set of regular expressions. Regular expressions which are +/// "literal" (i.e. no regex metacharacters) are stored in Strings, while all +/// others are represented as a single pipe-separated regex in RegEx. The +/// reason for doing so is efficiency; StringSet is much faster at matching +/// literal strings than Regex. +struct SpecialCaseList::Entry { + StringSet<> Strings; + Regex *RegEx; + + Entry() : RegEx(0) {} + + bool match(StringRef Query) const { + return Strings.count(Query) || (RegEx && RegEx->match(Query)); + } +}; + +SpecialCaseList::SpecialCaseList() : Entries() {} + +SpecialCaseList *SpecialCaseList::create( + const StringRef Path, std::string &Error) { + if (Path.empty()) + return new SpecialCaseList(); + OwningPtr<MemoryBuffer> File; + if (error_code EC = MemoryBuffer::getFile(Path, File)) { + Error = (Twine("Can't open file '") + Path + "': " + EC.message()).str(); + return 0; + } + return create(File.get(), Error); +} + +SpecialCaseList *SpecialCaseList::create( + const MemoryBuffer *MB, std::string &Error) { + OwningPtr<SpecialCaseList> SCL(new SpecialCaseList()); + if (!SCL->parse(MB, Error)) + return 0; + return SCL.take(); +} + +SpecialCaseList *SpecialCaseList::createOrDie(const StringRef Path) { + std::string Error; + if (SpecialCaseList *SCL = create(Path, Error)) + return SCL; + report_fatal_error(Error); +} + +bool SpecialCaseList::parse(const MemoryBuffer *MB, std::string &Error) { + // Iterate through each line in the blacklist file. + SmallVector<StringRef, 16> Lines; + SplitString(MB->getBuffer(), Lines, "\n\r"); + StringMap<StringMap<std::string> > Regexps; + assert(Entries.empty() && + "parse() should be called on an empty SpecialCaseList"); + int LineNo = 1; + for (SmallVectorImpl<StringRef>::iterator I = Lines.begin(), E = Lines.end(); + I != E; ++I, ++LineNo) { + // Ignore empty lines and lines starting with "#" + if (I->empty() || I->startswith("#")) + continue; + // Get our prefix and unparsed regexp. + std::pair<StringRef, StringRef> SplitLine = I->split(":"); + StringRef Prefix = SplitLine.first; + if (SplitLine.second.empty()) { + // Missing ':' in the line. + Error = (Twine("Malformed line ") + Twine(LineNo) + ": '" + + SplitLine.first + "'").str(); + return false; + } + + std::pair<StringRef, StringRef> SplitRegexp = SplitLine.second.split("="); + std::string Regexp = SplitRegexp.first; + StringRef Category = SplitRegexp.second; + + // Backwards compatibility. + if (Prefix == "global-init") { + Prefix = "global"; + Category = "init"; + } else if (Prefix == "global-init-type") { + Prefix = "type"; + Category = "init"; + } else if (Prefix == "global-init-src") { + Prefix = "src"; + Category = "init"; + } + + // See if we can store Regexp in Strings. + if (Regex::isLiteralERE(Regexp)) { + Entries[Prefix][Category].Strings.insert(Regexp); + continue; + } + + // Replace * with .* + for (size_t pos = 0; (pos = Regexp.find("*", pos)) != std::string::npos; + pos += strlen(".*")) { + Regexp.replace(pos, strlen("*"), ".*"); + } + + // Check that the regexp is valid. + Regex CheckRE(Regexp); + std::string REError; + if (!CheckRE.isValid(REError)) { + Error = (Twine("Malformed regex in line ") + Twine(LineNo) + ": '" + + SplitLine.second + "': " + REError).str(); + return false; + } + + // Add this regexp into the proper group by its prefix. + if (!Regexps[Prefix][Category].empty()) + Regexps[Prefix][Category] += "|"; + Regexps[Prefix][Category] += "^" + Regexp + "$"; + } + + // Iterate through each of the prefixes, and create Regexs for them. + for (StringMap<StringMap<std::string> >::const_iterator I = Regexps.begin(), + E = Regexps.end(); + I != E; ++I) { + for (StringMap<std::string>::const_iterator II = I->second.begin(), + IE = I->second.end(); + II != IE; ++II) { + Entries[I->getKey()][II->getKey()].RegEx = new Regex(II->getValue()); + } + } + return true; +} + +SpecialCaseList::~SpecialCaseList() { + for (StringMap<StringMap<Entry> >::iterator I = Entries.begin(), + E = Entries.end(); + I != E; ++I) { + for (StringMap<Entry>::const_iterator II = I->second.begin(), + IE = I->second.end(); + II != IE; ++II) { + delete II->second.RegEx; + } + } +} + +bool SpecialCaseList::isIn(const Function& F, const StringRef Category) const { + return isIn(*F.getParent(), Category) || + inSectionCategory("fun", F.getName(), Category); +} + +static StringRef GetGlobalTypeString(const GlobalValue &G) { + // Types of GlobalVariables are always pointer types. + Type *GType = G.getType()->getElementType(); + // For now we support blacklisting struct types only. + if (StructType *SGType = dyn_cast<StructType>(GType)) { + if (!SGType->isLiteral()) + return SGType->getName(); + } + return "<unknown type>"; +} + +bool SpecialCaseList::isIn(const GlobalVariable &G, + const StringRef Category) const { + return isIn(*G.getParent(), Category) || + inSectionCategory("global", G.getName(), Category) || + inSectionCategory("type", GetGlobalTypeString(G), Category); +} + +bool SpecialCaseList::isIn(const GlobalAlias &GA, + const StringRef Category) const { + if (isIn(*GA.getParent(), Category)) + return true; + + if (isa<FunctionType>(GA.getType()->getElementType())) + return inSectionCategory("fun", GA.getName(), Category); + + return inSectionCategory("global", GA.getName(), Category) || + inSectionCategory("type", GetGlobalTypeString(GA), Category); +} + +bool SpecialCaseList::isIn(const Module &M, const StringRef Category) const { + return inSectionCategory("src", M.getModuleIdentifier(), Category); +} + +bool SpecialCaseList::inSectionCategory(const StringRef Section, + const StringRef Query, + const StringRef Category) const { + StringMap<StringMap<Entry> >::const_iterator I = Entries.find(Section); + if (I == Entries.end()) return false; + StringMap<Entry>::const_iterator II = I->second.find(Category); + if (II == I->second.end()) return false; + + return II->getValue().match(Query); +} + +} // namespace llvm diff --git a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp index 544c5ee..457fc80 100644 --- a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp +++ b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp @@ -22,14 +22,22 @@ using namespace llvm; // Out of line method to get vtable etc for class. void ValueMapTypeRemapper::anchor() {} +void ValueMaterializer::anchor() {} Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, - ValueMapTypeRemapper *TypeMapper) { + ValueMapTypeRemapper *TypeMapper, + ValueMaterializer *Materializer) { ValueToValueMapTy::iterator I = VM.find(V); // If the value already exists in the map, use it. if (I != VM.end() && I->second) return I->second; + // If we have a materializer and it can materialize a value, use that. + if (Materializer) { + if (Value *NewV = Materializer->materializeValueFor(const_cast<Value*>(V))) + return VM[V] = NewV; + } + // Global values do not need to be seeded into the VM if they // are using the identity mapping. if (isa<GlobalValue>(V) || isa<MDString>(V)) @@ -64,7 +72,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) { Value *OP = MD->getOperand(i); if (OP == 0) continue; - Value *Mapped_OP = MapValue(OP, VM, Flags, TypeMapper); + Value *Mapped_OP = MapValue(OP, VM, Flags, TypeMapper, Materializer); // Use identity map if Mapped_Op is null and we can ignore missing // entries. if (Mapped_OP == OP || @@ -79,7 +87,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, if (Op == 0) Elts.push_back(0); else { - Value *Mapped_Op = MapValue(Op, VM, Flags, TypeMapper); + Value *Mapped_Op = MapValue(Op, VM, Flags, TypeMapper, Materializer); // Use identity map if Mapped_Op is null and we can ignore missing // entries. if (Mapped_Op == 0 && (Flags & RF_IgnoreMissingEntries)) @@ -109,9 +117,9 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, if (BlockAddress *BA = dyn_cast<BlockAddress>(C)) { Function *F = - cast<Function>(MapValue(BA->getFunction(), VM, Flags, TypeMapper)); + cast<Function>(MapValue(BA->getFunction(), VM, Flags, TypeMapper, Materializer)); BasicBlock *BB = cast_or_null<BasicBlock>(MapValue(BA->getBasicBlock(), VM, - Flags, TypeMapper)); + Flags, TypeMapper, Materializer)); return VM[V] = BlockAddress::get(F, BB ? BB : BA->getBasicBlock()); } @@ -121,7 +129,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, Value *Mapped = 0; for (; OpNo != NumOperands; ++OpNo) { Value *Op = C->getOperand(OpNo); - Mapped = MapValue(Op, VM, Flags, TypeMapper); + Mapped = MapValue(Op, VM, Flags, TypeMapper, Materializer); if (Mapped != C) break; } @@ -149,7 +157,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, // Map the rest of the operands that aren't processed yet. for (++OpNo; OpNo != NumOperands; ++OpNo) Ops.push_back(MapValue(cast<Constant>(C->getOperand(OpNo)), VM, - Flags, TypeMapper)); + Flags, TypeMapper, Materializer)); } if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) @@ -173,10 +181,11 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, /// current values into those specified by VMap. /// void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap, - RemapFlags Flags, ValueMapTypeRemapper *TypeMapper){ + RemapFlags Flags, ValueMapTypeRemapper *TypeMapper, + ValueMaterializer *Materializer){ // Remap operands. for (User::op_iterator op = I->op_begin(), E = I->op_end(); op != E; ++op) { - Value *V = MapValue(*op, VMap, Flags, TypeMapper); + Value *V = MapValue(*op, VMap, Flags, TypeMapper, Materializer); // If we aren't ignoring missing entries, assert that something happened. if (V != 0) *op = V; @@ -204,7 +213,7 @@ void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap, for (SmallVectorImpl<std::pair<unsigned, MDNode *> >::iterator MI = MDs.begin(), ME = MDs.end(); MI != ME; ++MI) { MDNode *Old = MI->second; - MDNode *New = MapValue(Old, VMap, Flags, TypeMapper); + MDNode *New = MapValue(Old, VMap, Flags, TypeMapper, Materializer); if (New != Old) I->setMetadata(MI->first, New); } |