summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Transforms/Scalar
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Transforms/Scalar')
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp377
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp1
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/DCE.cpp1
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp54
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/GEPSplitter.cpp83
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/GVN.cpp453
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp155
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp25
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LICM.cpp28
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp144
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp9
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp31
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp35
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp19
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp146
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SCCP.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Scalar.cpp27
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp298
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp10
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp160
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp127
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp15
23 files changed, 1284 insertions, 918 deletions
diff --git a/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp b/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 9536939..0184390 100644
--- a/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -47,21 +47,21 @@ using namespace llvm;
using namespace llvm::PatternMatch;
STATISTIC(NumBlocksElim, "Number of blocks eliminated");
-STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated");
-STATISTIC(NumGEPsElim, "Number of GEPs converted to casts");
+STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated");
+STATISTIC(NumGEPsElim, "Number of GEPs converted to casts");
STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "
"sunken Cmps");
STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "
"of sunken Casts");
STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
"computations were sunk");
-STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads");
-STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized");
+STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads");
+STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized");
+STATISTIC(NumRetsDup, "Number of return instructions duplicated");
-static cl::opt<bool>
-CriticalEdgeSplit("cgp-critical-edge-splitting",
- cl::desc("Split critical edges during codegen prepare"),
- cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableBranchOpts(
+ "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
+ cl::desc("Disable branch optimizations in CodeGenPrepare"));
namespace {
class CodeGenPrepare : public FunctionPass {
@@ -76,15 +76,15 @@ namespace {
/// update it.
BasicBlock::iterator CurInstIterator;
- /// BackEdges - Keep a set of all the loop back edges.
- ///
- SmallSet<std::pair<const BasicBlock*, const BasicBlock*>, 8> BackEdges;
-
- // Keeps track of non-local addresses that have been sunk into a block. This
- // allows us to avoid inserting duplicate code for blocks with multiple
- // load/stores of the same address.
+ /// Keeps track of non-local addresses that have been sunk into a block.
+ /// This allows us to avoid inserting duplicate code for blocks with
+ /// multiple load/stores of the same address.
DenseMap<Value*, Value*> SunkAddrs;
+ /// ModifiedDT - If CFG is modified in anyway, dominator tree may need to
+ /// be updated.
+ bool ModifiedDT;
+
public:
static char ID; // Pass identification, replacement for typeid
explicit CodeGenPrepare(const TargetLowering *tli = 0)
@@ -98,10 +98,6 @@ namespace {
AU.addPreserved<ProfileInfo>();
}
- virtual void releaseMemory() {
- BackEdges.clear();
- }
-
private:
bool EliminateMostlyEmptyBlocks(Function &F);
bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
@@ -113,7 +109,7 @@ namespace {
bool OptimizeCallInst(CallInst *CI);
bool MoveExtToFormExtLoad(Instruction *I);
bool OptimizeExtUses(Instruction *I);
- void findLoopBackEdges(const Function &F);
+ bool DupRetToEnableTailCallOpts(ReturnInst *RI);
};
}
@@ -125,40 +121,42 @@ FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) {
return new CodeGenPrepare(TLI);
}
-/// findLoopBackEdges - Do a DFS walk to find loop back edges.
-///
-void CodeGenPrepare::findLoopBackEdges(const Function &F) {
- SmallVector<std::pair<const BasicBlock*,const BasicBlock*>, 32> Edges;
- FindFunctionBackedges(F, Edges);
-
- BackEdges.insert(Edges.begin(), Edges.end());
-}
-
-
bool CodeGenPrepare::runOnFunction(Function &F) {
bool EverMadeChange = false;
+ ModifiedDT = false;
DT = getAnalysisIfAvailable<DominatorTree>();
PFI = getAnalysisIfAvailable<ProfileInfo>();
+
// First pass, eliminate blocks that contain only PHI nodes and an
// unconditional branch.
EverMadeChange |= EliminateMostlyEmptyBlocks(F);
- // Now find loop back edges, but only if they are being used to decide which
- // critical edges to split.
- if (CriticalEdgeSplit)
- findLoopBackEdges(F);
-
bool MadeChange = true;
while (MadeChange) {
MadeChange = false;
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
+ BasicBlock *BB = I++;
MadeChange |= OptimizeBlock(*BB);
+ }
EverMadeChange |= MadeChange;
}
SunkAddrs.clear();
+ if (!DisableBranchOpts) {
+ MadeChange = false;
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ MadeChange |= ConstantFoldTerminator(BB);
+
+ if (MadeChange)
+ ModifiedDT = true;
+ EverMadeChange |= MadeChange;
+ }
+
+ if (ModifiedDT && DT)
+ DT->DT->recalculate(F);
+
return EverMadeChange;
}
@@ -333,7 +331,7 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
// The PHIs are now updated, change everything that refers to BB to use
// DestBB and remove BB.
BB->replaceAllUsesWith(DestBB);
- if (DT) {
+ if (DT && !ModifiedDT) {
BasicBlock *BBIDom = DT->getNode(BB)->getIDom()->getBlock();
BasicBlock *DestBBIDom = DT->getNode(DestBB)->getIDom()->getBlock();
BasicBlock *NewIDom = DT->findNearestCommonDominator(BBIDom, DestBBIDom);
@@ -350,110 +348,6 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
}
-/// FindReusablePredBB - Check all of the predecessors of the block DestPHI
-/// lives in to see if there is a block that we can reuse as a critical edge
-/// from TIBB.
-static BasicBlock *FindReusablePredBB(PHINode *DestPHI, BasicBlock *TIBB) {
- BasicBlock *Dest = DestPHI->getParent();
-
- /// TIPHIValues - This array is lazily computed to determine the values of
- /// PHIs in Dest that TI would provide.
- SmallVector<Value*, 32> TIPHIValues;
-
- /// TIBBEntryNo - This is a cache to speed up pred queries for TIBB.
- unsigned TIBBEntryNo = 0;
-
- // Check to see if Dest has any blocks that can be used as a split edge for
- // this terminator.
- for (unsigned pi = 0, e = DestPHI->getNumIncomingValues(); pi != e; ++pi) {
- BasicBlock *Pred = DestPHI->getIncomingBlock(pi);
- // To be usable, the pred has to end with an uncond branch to the dest.
- BranchInst *PredBr = dyn_cast<BranchInst>(Pred->getTerminator());
- if (!PredBr || !PredBr->isUnconditional())
- continue;
- // Must be empty other than the branch and debug info.
- BasicBlock::iterator I = Pred->begin();
- while (isa<DbgInfoIntrinsic>(I))
- I++;
- if (&*I != PredBr)
- continue;
- // Cannot be the entry block; its label does not get emitted.
- if (Pred == &Dest->getParent()->getEntryBlock())
- continue;
-
- // Finally, since we know that Dest has phi nodes in it, we have to make
- // sure that jumping to Pred will have the same effect as going to Dest in
- // terms of PHI values.
- PHINode *PN;
- unsigned PHINo = 0;
- unsigned PredEntryNo = pi;
-
- bool FoundMatch = true;
- for (BasicBlock::iterator I = Dest->begin();
- (PN = dyn_cast<PHINode>(I)); ++I, ++PHINo) {
- if (PHINo == TIPHIValues.size()) {
- if (PN->getIncomingBlock(TIBBEntryNo) != TIBB)
- TIBBEntryNo = PN->getBasicBlockIndex(TIBB);
- TIPHIValues.push_back(PN->getIncomingValue(TIBBEntryNo));
- }
-
- // If the PHI entry doesn't work, we can't use this pred.
- if (PN->getIncomingBlock(PredEntryNo) != Pred)
- PredEntryNo = PN->getBasicBlockIndex(Pred);
-
- if (TIPHIValues[PHINo] != PN->getIncomingValue(PredEntryNo)) {
- FoundMatch = false;
- break;
- }
- }
-
- // If we found a workable predecessor, change TI to branch to Succ.
- if (FoundMatch)
- return Pred;
- }
- return 0;
-}
-
-
-/// SplitEdgeNicely - Split the critical edge from TI to its specified
-/// successor if it will improve codegen. We only do this if the successor has
-/// phi nodes (otherwise critical edges are ok). If there is already another
-/// predecessor of the succ that is empty (and thus has no phi nodes), use it
-/// instead of introducing a new block.
-static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum,
- SmallSet<std::pair<const BasicBlock*,
- const BasicBlock*>, 8> &BackEdges,
- Pass *P) {
- BasicBlock *TIBB = TI->getParent();
- BasicBlock *Dest = TI->getSuccessor(SuccNum);
- assert(isa<PHINode>(Dest->begin()) &&
- "This should only be called if Dest has a PHI!");
- PHINode *DestPHI = cast<PHINode>(Dest->begin());
-
- // Do not split edges to EH landing pads.
- if (InvokeInst *Invoke = dyn_cast<InvokeInst>(TI))
- if (Invoke->getSuccessor(1) == Dest)
- return;
-
- // As a hack, never split backedges of loops. Even though the copy for any
- // PHIs inserted on the backedge would be dead for exits from the loop, we
- // assume that the cost of *splitting* the backedge would be too high.
- if (BackEdges.count(std::make_pair(TIBB, Dest)))
- return;
-
- if (BasicBlock *ReuseBB = FindReusablePredBB(DestPHI, TIBB)) {
- ProfileInfo *PFI = P->getAnalysisIfAvailable<ProfileInfo>();
- if (PFI)
- PFI->splitEdge(TIBB, Dest, ReuseBB);
- Dest->removePredecessor(TIBB);
- TI->setSuccessor(SuccNum, ReuseBB);
- return;
- }
-
- SplitCriticalEdge(TI, SuccNum, P, true);
-}
-
-
/// OptimizeNoopCopyExpression - If the specified cast instruction is a noop
/// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC),
/// sink it into user blocks to reduce the number of virtual
@@ -640,7 +534,8 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
// happens.
WeakVH IterHandle(CurInstIterator);
- ReplaceAndSimplifyAllUses(CI, RetVal, TLI ? TLI->getTargetData() : 0, DT);
+ ReplaceAndSimplifyAllUses(CI, RetVal, TLI ? TLI->getTargetData() : 0,
+ ModifiedDT ? 0 : DT);
// If the iterator instruction was recursively deleted, start over at the
// start of the block.
@@ -666,6 +561,129 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
return Simplifier.fold(CI, TD);
}
+/// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return
+/// instructions to the predecessor to enable tail call optimizations. The
+/// case it is currently looking for is:
+/// bb0:
+/// %tmp0 = tail call i32 @f0()
+/// br label %return
+/// bb1:
+/// %tmp1 = tail call i32 @f1()
+/// br label %return
+/// bb2:
+/// %tmp2 = tail call i32 @f2()
+/// br label %return
+/// return:
+/// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
+/// ret i32 %retval
+///
+/// =>
+///
+/// bb0:
+/// %tmp0 = tail call i32 @f0()
+/// ret i32 %tmp0
+/// bb1:
+/// %tmp1 = tail call i32 @f1()
+/// ret i32 %tmp1
+/// bb2:
+/// %tmp2 = tail call i32 @f2()
+/// ret i32 %tmp2
+///
+bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) {
+ if (!TLI)
+ return false;
+
+ Value *V = RI->getReturnValue();
+ PHINode *PN = V ? dyn_cast<PHINode>(V) : NULL;
+ if (V && !PN)
+ return false;
+
+ BasicBlock *BB = RI->getParent();
+ if (PN && PN->getParent() != BB)
+ return false;
+
+ // It's not safe to eliminate the sign / zero extension of the return value.
+ // See llvm::isInTailCallPosition().
+ const Function *F = BB->getParent();
+ unsigned CallerRetAttr = F->getAttributes().getRetAttributes();
+ if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt))
+ return false;
+
+ // Make sure there are no instructions between the PHI and return, or that the
+ // return is the first instruction in the block.
+ if (PN) {
+ BasicBlock::iterator BI = BB->begin();
+ do { ++BI; } while (isa<DbgInfoIntrinsic>(BI));
+ if (&*BI != RI)
+ return false;
+ } else {
+ BasicBlock::iterator BI = BB->begin();
+ while (isa<DbgInfoIntrinsic>(BI)) ++BI;
+ if (&*BI != RI)
+ return false;
+ }
+
+ /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
+ /// call.
+ SmallVector<CallInst*, 4> TailCalls;
+ if (PN) {
+ for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
+ CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I));
+ // Make sure the phi value is indeed produced by the tail call.
+ if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) &&
+ TLI->mayBeEmittedAsTailCall(CI))
+ TailCalls.push_back(CI);
+ }
+ } else {
+ SmallPtrSet<BasicBlock*, 4> VisitedBBs;
+ for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
+ if (!VisitedBBs.insert(*PI))
+ continue;
+
+ BasicBlock::InstListType &InstList = (*PI)->getInstList();
+ BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin();
+ BasicBlock::InstListType::reverse_iterator RE = InstList.rend();
+ do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI));
+ if (RI == RE)
+ continue;
+
+ CallInst *CI = dyn_cast<CallInst>(&*RI);
+ if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI))
+ TailCalls.push_back(CI);
+ }
+ }
+
+ bool Changed = false;
+ for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) {
+ CallInst *CI = TailCalls[i];
+ CallSite CS(CI);
+
+ // Conservatively require the attributes of the call to match those of the
+ // return. Ignore noalias because it doesn't affect the call sequence.
+ unsigned CalleeRetAttr = CS.getAttributes().getRetAttributes();
+ if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias)
+ continue;
+
+ // Make sure the call instruction is followed by an unconditional branch to
+ // the return block.
+ BasicBlock *CallBB = CI->getParent();
+ BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator());
+ if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)
+ continue;
+
+ // Duplicate the return into CallBB.
+ (void)FoldReturnIntoUncondBranch(RI, BB, CallBB);
+ ModifiedDT = Changed = true;
+ ++NumRetsDup;
+ }
+
+ // If we eliminated all predecessors of the block, delete the block now.
+ if (Changed && pred_begin(BB) == pred_end(BB))
+ BB->eraseFromParent();
+
+ return Changed;
+}
+
//===----------------------------------------------------------------------===//
// Memory Optimization
//===----------------------------------------------------------------------===//
@@ -701,7 +719,8 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// the addressing mode obtained from the non-PHI roots of the graph
// are equivalent.
Value *Consensus = 0;
- unsigned NumUses = 0;
+ unsigned NumUsesConsensus = 0;
+ bool IsNumUsesConsensusValid = false;
SmallVector<Instruction*, 16> AddrModeInsts;
ExtAddrMode AddrMode;
while (!worklist.empty()) {
@@ -728,16 +747,31 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
ExtAddrMode NewAddrMode =
AddressingModeMatcher::Match(V, AccessTy,MemoryInst,
NewAddrModeInsts, *TLI);
-
- // Ensure that the obtained addressing mode is equivalent to that obtained
- // for all other roots of the PHI traversal. Also, when choosing one
- // such root as representative, select the one with the most uses in order
- // to keep the cost modeling heuristics in AddressingModeMatcher applicable.
- if (!Consensus || NewAddrMode == AddrMode) {
- if (V->getNumUses() > NumUses) {
+
+ // This check is broken into two cases with very similar code to avoid using
+ // getNumUses() as much as possible. Some values have a lot of uses, so
+ // calling getNumUses() unconditionally caused a significant compile-time
+ // regression.
+ if (!Consensus) {
+ Consensus = V;
+ AddrMode = NewAddrMode;
+ AddrModeInsts = NewAddrModeInsts;
+ continue;
+ } else if (NewAddrMode == AddrMode) {
+ if (!IsNumUsesConsensusValid) {
+ NumUsesConsensus = Consensus->getNumUses();
+ IsNumUsesConsensusValid = true;
+ }
+
+ // Ensure that the obtained addressing mode is equivalent to that obtained
+ // for all other roots of the PHI traversal. Also, when choosing one
+ // such root as representative, select the one with the most uses in order
+ // to keep the cost modeling heuristics in AddressingModeMatcher
+ // applicable.
+ unsigned NumUses = V->getNumUses();
+ if (NumUses > NumUsesConsensus) {
Consensus = V;
- NumUses = V->getNumUses();
- AddrMode = NewAddrMode;
+ NumUsesConsensus = NumUses;
AddrModeInsts = NewAddrModeInsts;
}
continue;
@@ -855,11 +889,26 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
+ // If we have no uses, recursively delete the value and all dead instructions
+ // using it.
if (Repl->use_empty()) {
+ // This can cause recursive deletion, which can invalidate our iterator.
+ // Use a WeakVH to hold onto it in case this happens.
+ WeakVH IterHandle(CurInstIterator);
+ BasicBlock *BB = CurInstIterator->getParent();
+
RecursivelyDeleteTriviallyDeadInstructions(Repl);
- // This address is now available for reassignment, so erase the table entry;
- // we don't want to match some completely different instruction.
- SunkAddrs[Addr] = 0;
+
+ if (IterHandle != CurInstIterator) {
+ // If the iterator instruction was recursively deleted, start over at the
+ // start of the block.
+ CurInstIterator = BB->begin();
+ SunkAddrs.clear();
+ } else {
+ // This address is now available for reassignment, so erase the table
+ // entry; we don't want to match some completely different instruction.
+ SunkAddrs[Addr] = 0;
+ }
}
++NumMemoryInsts;
return true;
@@ -1073,6 +1122,9 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) {
if (CallInst *CI = dyn_cast<CallInst>(I))
return OptimizeCallInst(CI);
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(I))
+ return DupRetToEnableTailCallOpts(RI);
+
return false;
}
@@ -1080,21 +1132,8 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) {
// across basic blocks and rewrite them to improve basic-block-at-a-time
// selection.
bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
- bool MadeChange = false;
-
- // Split all critical edges where the dest block has a PHI.
- if (CriticalEdgeSplit) {
- TerminatorInst *BBTI = BB.getTerminator();
- if (BBTI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(BBTI)) {
- for (unsigned i = 0, e = BBTI->getNumSuccessors(); i != e; ++i) {
- BasicBlock *SuccBB = BBTI->getSuccessor(i);
- if (isa<PHINode>(SuccBB->begin()) && isCriticalEdge(BBTI, i, true))
- SplitEdgeNicely(BBTI, i, BackEdges, this);
- }
- }
- }
-
SunkAddrs.clear();
+ bool MadeChange = false;
CurInstIterator = BB.begin();
for (BasicBlock::iterator E = BB.end(); CurInstIterator != E; )
diff --git a/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index be12973..e275268 100644
--- a/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -13,6 +13,7 @@
#define DEBUG_TYPE "correlated-value-propagation"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
#include "llvm/Function.h"
#include "llvm/Instructions.h"
#include "llvm/Pass.h"
diff --git a/contrib/llvm/lib/Transforms/Scalar/DCE.cpp b/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
index dbb68f3..8dbcc23 100644
--- a/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
@@ -23,7 +23,6 @@
#include "llvm/Pass.h"
#include "llvm/Support/InstIterator.h"
#include "llvm/ADT/Statistic.h"
-#include <set>
using namespace llvm;
STATISTIC(DIEEliminated, "Number of insts removed by DIE pass");
diff --git a/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 867a06a..53e4640 100644
--- a/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -340,24 +340,35 @@ static bool isCompleteOverwrite(const AliasAnalysis::Location &Later,
// Okay, we have stores to two completely different pointers. Try to
// decompose the pointer into a "base + constant_offset" form. If the base
// pointers are equal, then we can reason about the two stores.
- int64_t Off1 = 0, Off2 = 0;
- const Value *BP1 = GetPointerBaseWithConstantOffset(P1, Off1, TD);
- const Value *BP2 = GetPointerBaseWithConstantOffset(P2, Off2, TD);
+ int64_t EarlierOff = 0, LaterOff = 0;
+ const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, TD);
+ const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, TD);
// If the base pointers still differ, we have two completely different stores.
if (BP1 != BP2)
return false;
-
- // Otherwise, we might have a situation like:
- // store i16 -> P + 1 Byte
- // store i32 -> P
- // In this case, we see if the later store completely overlaps all bytes
- // stored by the previous store.
- if (Off1 < Off2 || // Earlier starts before Later.
- Off1+Earlier.Size > Off2+Later.Size) // Earlier goes beyond Later.
- return false;
- // Otherwise, we have complete overlap.
- return true;
+
+ // The later store completely overlaps the earlier store if:
+ //
+ // 1. Both start at the same offset and the later one's size is greater than
+ // or equal to the earlier one's, or
+ //
+ // |--earlier--|
+ // |-- later --|
+ //
+ // 2. The earlier store has an offset greater than the later offset, but which
+ // still lies completely within the later store.
+ //
+ // |--earlier--|
+ // |----- later ------|
+ //
+ // We have to be careful here as *Off is signed while *.Size is unsigned.
+ if (EarlierOff >= LaterOff &&
+ uint64_t(EarlierOff - LaterOff) + Earlier.Size <= Later.Size)
+ return true;
+
+ // Otherwise, they don't completely overlap.
+ return false;
}
/// isPossibleSelfRead - If 'Inst' might be a self read (i.e. a noop copy of a
@@ -474,7 +485,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
// away the store and we bail out. However, if we depend on on something
// that overwrites the memory location we *can* potentially optimize it.
//
- // Find out what memory location the dependant instruction stores.
+ // Find out what memory location the dependent instruction stores.
Instruction *DepWrite = InstDep.getInst();
AliasAnalysis::Location DepLoc = getLocForWrite(DepWrite, *AA);
// If we didn't get a useful location, or if it isn't a size, bail out.
@@ -631,28 +642,15 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
if (AA->doesNotAccessMemory(CS))
continue;
- unsigned NumModRef = 0, NumOther = 0;
-
// If the call might load from any of our allocas, then any store above
// the call is live.
SmallVector<Value*, 8> LiveAllocas;
for (SmallPtrSet<Value*, 16>::iterator I = DeadStackObjects.begin(),
E = DeadStackObjects.end(); I != E; ++I) {
- // If we detect that our AA is imprecise, it's not worth it to scan the
- // rest of the DeadPointers set. Just assume that the AA will return
- // ModRef for everything, and go ahead and bail out.
- if (NumModRef >= 16 && NumOther == 0)
- return MadeChange;
-
// See if the call site touches it.
AliasAnalysis::ModRefResult A =
AA->getModRefInfo(CS, *I, getPointerSize(*I, *AA));
- if (A == AliasAnalysis::ModRef)
- ++NumModRef;
- else
- ++NumOther;
-
if (A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref)
LiveAllocas.push_back(*I);
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/GEPSplitter.cpp b/contrib/llvm/lib/Transforms/Scalar/GEPSplitter.cpp
deleted file mode 100644
index 4c3d188..0000000
--- a/contrib/llvm/lib/Transforms/Scalar/GEPSplitter.cpp
+++ /dev/null
@@ -1,83 +0,0 @@
-//===- GEPSplitter.cpp - Split complex GEPs into simple ones --------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This function breaks GEPs with more than 2 non-zero operands into smaller
-// GEPs each with no more than 2 non-zero operands. This exposes redundancy
-// between GEPs with common initial operand sequences.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "split-geps"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/Pass.h"
-using namespace llvm;
-
-namespace {
- class GEPSplitter : public FunctionPass {
- virtual bool runOnFunction(Function &F);
- virtual void getAnalysisUsage(AnalysisUsage &AU) const;
- public:
- static char ID; // Pass identification, replacement for typeid
- explicit GEPSplitter() : FunctionPass(ID) {
- initializeGEPSplitterPass(*PassRegistry::getPassRegistry());
- }
- };
-}
-
-char GEPSplitter::ID = 0;
-INITIALIZE_PASS(GEPSplitter, "split-geps",
- "split complex GEPs into simple GEPs", false, false)
-
-FunctionPass *llvm::createGEPSplitterPass() {
- return new GEPSplitter();
-}
-
-bool GEPSplitter::runOnFunction(Function &F) {
- bool Changed = false;
-
- // Visit each GEP instruction.
- for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
- for (BasicBlock::iterator II = I->begin(), IE = I->end(); II != IE; )
- if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(II++)) {
- unsigned NumOps = GEP->getNumOperands();
- // Ignore GEPs which are already simple.
- if (NumOps <= 2)
- continue;
- bool FirstIndexIsZero = isa<ConstantInt>(GEP->getOperand(1)) &&
- cast<ConstantInt>(GEP->getOperand(1))->isZero();
- if (NumOps == 3 && FirstIndexIsZero)
- continue;
- // The first index is special and gets expanded with a 2-operand GEP
- // (unless it's zero, in which case we can skip this).
- Value *NewGEP = FirstIndexIsZero ?
- GEP->getOperand(0) :
- GetElementPtrInst::Create(GEP->getOperand(0), GEP->getOperand(1),
- "tmp", GEP);
- // All remaining indices get expanded with a 3-operand GEP with zero
- // as the second operand.
- Value *Idxs[2];
- Idxs[0] = ConstantInt::get(Type::getInt64Ty(F.getContext()), 0);
- for (unsigned i = 2; i != NumOps; ++i) {
- Idxs[1] = GEP->getOperand(i);
- NewGEP = GetElementPtrInst::Create(NewGEP, Idxs, Idxs+2, "tmp", GEP);
- }
- GEP->replaceAllUsesWith(NewGEP);
- GEP->eraseFromParent();
- Changed = true;
- }
-
- return Changed;
-}
-
-void GEPSplitter::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
-}
diff --git a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
index a0123f5..efecb97 100644
--- a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -63,50 +63,48 @@ static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true));
namespace {
struct Expression {
uint32_t opcode;
- const Type* type;
+ const Type *type;
SmallVector<uint32_t, 4> varargs;
- Expression() { }
- Expression(uint32_t o) : opcode(o) { }
+ Expression(uint32_t o = ~2U) : opcode(o) { }
bool operator==(const Expression &other) const {
if (opcode != other.opcode)
return false;
- else if (opcode == ~0U || opcode == ~1U)
+ if (opcode == ~0U || opcode == ~1U)
return true;
- else if (type != other.type)
+ if (type != other.type)
return false;
- else if (varargs != other.varargs)
+ if (varargs != other.varargs)
return false;
return true;
}
};
class ValueTable {
- private:
- DenseMap<Value*, uint32_t> valueNumbering;
- DenseMap<Expression, uint32_t> expressionNumbering;
- AliasAnalysis* AA;
- MemoryDependenceAnalysis* MD;
- DominatorTree* DT;
-
- uint32_t nextValueNumber;
-
- Expression create_expression(Instruction* I);
- uint32_t lookup_or_add_call(CallInst* C);
- public:
- ValueTable() : nextValueNumber(1) { }
- uint32_t lookup_or_add(Value *V);
- uint32_t lookup(Value *V) const;
- void add(Value *V, uint32_t num);
- void clear();
- void erase(Value *v);
- void setAliasAnalysis(AliasAnalysis* A) { AA = A; }
- AliasAnalysis *getAliasAnalysis() const { return AA; }
- void setMemDep(MemoryDependenceAnalysis* M) { MD = M; }
- void setDomTree(DominatorTree* D) { DT = D; }
- uint32_t getNextUnusedValueNumber() { return nextValueNumber; }
- void verifyRemoved(const Value *) const;
+ DenseMap<Value*, uint32_t> valueNumbering;
+ DenseMap<Expression, uint32_t> expressionNumbering;
+ AliasAnalysis *AA;
+ MemoryDependenceAnalysis *MD;
+ DominatorTree *DT;
+
+ uint32_t nextValueNumber;
+
+ Expression create_expression(Instruction* I);
+ uint32_t lookup_or_add_call(CallInst* C);
+ public:
+ ValueTable() : nextValueNumber(1) { }
+ uint32_t lookup_or_add(Value *V);
+ uint32_t lookup(Value *V) const;
+ void add(Value *V, uint32_t num);
+ void clear();
+ void erase(Value *v);
+ void setAliasAnalysis(AliasAnalysis* A) { AA = A; }
+ AliasAnalysis *getAliasAnalysis() const { return AA; }
+ void setMemDep(MemoryDependenceAnalysis* M) { MD = M; }
+ void setDomTree(DominatorTree* D) { DT = D; }
+ uint32_t getNextUnusedValueNumber() { return nextValueNumber; }
+ void verifyRemoved(const Value *) const;
};
}
@@ -364,14 +362,14 @@ uint32_t ValueTable::lookup(Value *V) const {
return VI->second;
}
-/// clear - Remove all entries from the ValueTable
+/// clear - Remove all entries from the ValueTable.
void ValueTable::clear() {
valueNumbering.clear();
expressionNumbering.clear();
nextValueNumber = 1;
}
-/// erase - Remove a value from the value numbering
+/// erase - Remove a value from the value numbering.
void ValueTable::erase(Value *V) {
valueNumbering.erase(V);
}
@@ -392,20 +390,11 @@ void ValueTable::verifyRemoved(const Value *V) const {
namespace {
class GVN : public FunctionPass {
- bool runOnFunction(Function &F);
- public:
- static char ID; // Pass identification, replacement for typeid
- explicit GVN(bool noloads = false)
- : FunctionPass(ID), NoLoads(noloads), MD(0) {
- initializeGVNPass(*PassRegistry::getPassRegistry());
- }
-
- private:
bool NoLoads;
MemoryDependenceAnalysis *MD;
DominatorTree *DT;
- const TargetData* TD;
-
+ const TargetData *TD;
+
ValueTable VN;
/// LeaderTable - A mapping from value numbers to lists of Value*'s that
@@ -418,17 +407,39 @@ namespace {
DenseMap<uint32_t, LeaderTableEntry> LeaderTable;
BumpPtrAllocator TableAllocator;
+ SmallVector<Instruction*, 8> InstrsToErase;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit GVN(bool noloads = false)
+ : FunctionPass(ID), NoLoads(noloads), MD(0) {
+ initializeGVNPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F);
+
+ /// markInstructionForDeletion - This removes the specified instruction from
+ /// our various maps and marks it for deletion.
+ void markInstructionForDeletion(Instruction *I) {
+ VN.erase(I);
+ InstrsToErase.push_back(I);
+ }
+
+ const TargetData *getTargetData() const { return TD; }
+ DominatorTree &getDominatorTree() const { return *DT; }
+ AliasAnalysis *getAliasAnalysis() const { return VN.getAliasAnalysis(); }
+ MemoryDependenceAnalysis &getMemDep() const { return *MD; }
+ private:
/// addToLeaderTable - Push a new Value to the LeaderTable onto the list for
/// its value number.
void addToLeaderTable(uint32_t N, Value *V, BasicBlock *BB) {
- LeaderTableEntry& Curr = LeaderTable[N];
+ LeaderTableEntry &Curr = LeaderTable[N];
if (!Curr.Val) {
Curr.Val = V;
Curr.BB = BB;
return;
}
- LeaderTableEntry* Node = TableAllocator.Allocate<LeaderTableEntry>();
+ LeaderTableEntry *Node = TableAllocator.Allocate<LeaderTableEntry>();
Node->Val = V;
Node->BB = BB;
Node->Next = Curr.Next;
@@ -474,19 +485,17 @@ namespace {
AU.addPreserved<DominatorTree>();
AU.addPreserved<AliasAnalysis>();
}
+
// Helper fuctions
// FIXME: eliminate or document these better
- bool processLoad(LoadInst* L,
- SmallVectorImpl<Instruction*> &toErase);
- bool processInstruction(Instruction *I,
- SmallVectorImpl<Instruction*> &toErase);
- bool processNonLocalLoad(LoadInst* L,
- SmallVectorImpl<Instruction*> &toErase);
+ bool processLoad(LoadInst *L);
+ bool processInstruction(Instruction *I);
+ bool processNonLocalLoad(LoadInst *L);
bool processBlock(BasicBlock *BB);
- void dump(DenseMap<uint32_t, Value*>& d);
+ void dump(DenseMap<uint32_t, Value*> &d);
bool iterateOnFunction(Function &F);
- bool performPRE(Function& F);
+ bool performPRE(Function &F);
Value *findLeader(BasicBlock *BB, uint32_t num);
void cleanupGlobalSets();
void verifyRemoved(const Instruction *I) const;
@@ -629,17 +638,17 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, TD))
return 0;
+ // If this is already the right type, just return it.
const Type *StoredValTy = StoredVal->getType();
uint64_t StoreSize = TD.getTypeStoreSizeInBits(StoredValTy);
- uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy);
+ uint64_t LoadSize = TD.getTypeStoreSizeInBits(LoadedTy);
// If the store and reload are the same size, we can always reuse it.
if (StoreSize == LoadSize) {
- if (StoredValTy->isPointerTy() && LoadedTy->isPointerTy()) {
- // Pointer to Pointer -> use bitcast.
+ // Pointer to Pointer -> use bitcast.
+ if (StoredValTy->isPointerTy() && LoadedTy->isPointerTy())
return new BitCastInst(StoredVal, LoadedTy, "", InsertPt);
- }
// Convert source pointers to integers, which can be bitcast.
if (StoredValTy->isPointerTy()) {
@@ -796,6 +805,36 @@ static int AnalyzeLoadFromClobberingStore(const Type *LoadTy, Value *LoadPtr,
StorePtr, StoreSize, TD);
}
+/// AnalyzeLoadFromClobberingLoad - This function is called when we have a
+/// memdep query of a load that ends up being clobbered by another load. See if
+/// the other load can feed into the second load.
+static int AnalyzeLoadFromClobberingLoad(const Type *LoadTy, Value *LoadPtr,
+ LoadInst *DepLI, const TargetData &TD){
+ // Cannot handle reading from store of first-class aggregate yet.
+ if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy())
+ return -1;
+
+ Value *DepPtr = DepLI->getPointerOperand();
+ uint64_t DepSize = TD.getTypeSizeInBits(DepLI->getType());
+ int R = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, TD);
+ if (R != -1) return R;
+
+ // If we have a load/load clobber an DepLI can be widened to cover this load,
+ // then we should widen it!
+ int64_t LoadOffs = 0;
+ const Value *LoadBase =
+ GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, TD);
+ unsigned LoadSize = TD.getTypeStoreSize(LoadTy);
+
+ unsigned Size = MemoryDependenceAnalysis::
+ getLoadLoadClobberFullWidthSize(LoadBase, LoadOffs, LoadSize, DepLI, TD);
+ if (Size == 0) return -1;
+
+ return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size*8, TD);
+}
+
+
+
static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr,
MemIntrinsic *MI,
const TargetData &TD) {
@@ -843,9 +882,9 @@ static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr,
/// GetStoreValueForLoad - This function is called when we have a
/// memdep query of a load that ends up being a clobbering store. This means
-/// that the store *may* provide bits used by the load but we can't be sure
-/// because the pointers don't mustalias. Check this case to see if there is
-/// anything more we can do before we give up.
+/// that the store provides bits used by the load but we the pointers don't
+/// mustalias. Check this case to see if there is anything more we can do
+/// before we give up.
static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
const Type *LoadTy,
Instruction *InsertPt, const TargetData &TD){
@@ -881,6 +920,69 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD);
}
+/// GetStoreValueForLoad - This function is called when we have a
+/// memdep query of a load that ends up being a clobbering load. This means
+/// that the load *may* provide bits used by the load but we can't be sure
+/// because the pointers don't mustalias. Check this case to see if there is
+/// anything more we can do before we give up.
+static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
+ const Type *LoadTy, Instruction *InsertPt,
+ GVN &gvn) {
+ const TargetData &TD = *gvn.getTargetData();
+ // If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to
+ // widen SrcVal out to a larger load.
+ unsigned SrcValSize = TD.getTypeStoreSize(SrcVal->getType());
+ unsigned LoadSize = TD.getTypeStoreSize(LoadTy);
+ if (Offset+LoadSize > SrcValSize) {
+ assert(!SrcVal->isVolatile() && "Cannot widen volatile load!");
+ assert(isa<IntegerType>(SrcVal->getType())&&"Can't widen non-integer load");
+ // If we have a load/load clobber an DepLI can be widened to cover this
+ // load, then we should widen it to the next power of 2 size big enough!
+ unsigned NewLoadSize = Offset+LoadSize;
+ if (!isPowerOf2_32(NewLoadSize))
+ NewLoadSize = NextPowerOf2(NewLoadSize);
+
+ Value *PtrVal = SrcVal->getPointerOperand();
+
+ // Insert the new load after the old load. This ensures that subsequent
+ // memdep queries will find the new load. We can't easily remove the old
+ // load completely because it is already in the value numbering table.
+ IRBuilder<> Builder(SrcVal->getParent(), ++BasicBlock::iterator(SrcVal));
+ const Type *DestPTy =
+ IntegerType::get(LoadTy->getContext(), NewLoadSize*8);
+ DestPTy = PointerType::get(DestPTy,
+ cast<PointerType>(PtrVal->getType())->getAddressSpace());
+
+ PtrVal = Builder.CreateBitCast(PtrVal, DestPTy);
+ LoadInst *NewLoad = Builder.CreateLoad(PtrVal);
+ NewLoad->takeName(SrcVal);
+ NewLoad->setAlignment(SrcVal->getAlignment());
+
+ DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n");
+ DEBUG(dbgs() << "TO: " << *NewLoad << "\n");
+
+ // Replace uses of the original load with the wider load. On a big endian
+ // system, we need to shift down to get the relevant bits.
+ Value *RV = NewLoad;
+ if (TD.isBigEndian())
+ RV = Builder.CreateLShr(RV,
+ NewLoadSize*8-SrcVal->getType()->getPrimitiveSizeInBits());
+ RV = Builder.CreateTrunc(RV, SrcVal->getType());
+ SrcVal->replaceAllUsesWith(RV);
+
+ // We would like to use gvn.markInstructionForDeletion here, but we can't
+ // because the load is already memoized into the leader map table that GVN
+ // tracks. It is potentially possible to remove the load from the table,
+ // but then there all of the operations based on it would need to be
+ // rehashed. Just leave the dead load around.
+ gvn.getMemDep().removeInstruction(SrcVal);
+ SrcVal = NewLoad;
+ }
+
+ return GetStoreValueForLoad(SrcVal, Offset, LoadTy, InsertPt, TD);
+}
+
+
/// GetMemInstValueForLoad - This function is called when we have a
/// memdep query of a load that ends up being a clobbering mem intrinsic.
static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
@@ -943,11 +1045,12 @@ struct AvailableValueInBlock {
BasicBlock *BB;
enum ValType {
SimpleVal, // A simple offsetted value that is accessed.
+ LoadVal, // A value produced by a load.
MemIntrin // A memory intrinsic which is loaded from.
};
/// V - The value that is live out of the block.
- PointerIntPair<Value *, 1, ValType> Val;
+ PointerIntPair<Value *, 2, ValType> Val;
/// Offset - The byte offset in Val that is interesting for the load query.
unsigned Offset;
@@ -972,37 +1075,69 @@ struct AvailableValueInBlock {
return Res;
}
+ static AvailableValueInBlock getLoad(BasicBlock *BB, LoadInst *LI,
+ unsigned Offset = 0) {
+ AvailableValueInBlock Res;
+ Res.BB = BB;
+ Res.Val.setPointer(LI);
+ Res.Val.setInt(LoadVal);
+ Res.Offset = Offset;
+ return Res;
+ }
+
bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
+ bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; }
+ bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; }
+
Value *getSimpleValue() const {
assert(isSimpleValue() && "Wrong accessor");
return Val.getPointer();
}
+ LoadInst *getCoercedLoadValue() const {
+ assert(isCoercedLoadValue() && "Wrong accessor");
+ return cast<LoadInst>(Val.getPointer());
+ }
+
MemIntrinsic *getMemIntrinValue() const {
- assert(!isSimpleValue() && "Wrong accessor");
+ assert(isMemIntrinValue() && "Wrong accessor");
return cast<MemIntrinsic>(Val.getPointer());
}
/// MaterializeAdjustedValue - Emit code into this block to adjust the value
/// defined here to the specified type. This handles various coercion cases.
- Value *MaterializeAdjustedValue(const Type *LoadTy,
- const TargetData *TD) const {
+ Value *MaterializeAdjustedValue(const Type *LoadTy, GVN &gvn) const {
Value *Res;
if (isSimpleValue()) {
Res = getSimpleValue();
if (Res->getType() != LoadTy) {
+ const TargetData *TD = gvn.getTargetData();
assert(TD && "Need target data to handle type mismatch case");
Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(),
*TD);
- DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " "
+ DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " "
<< *getSimpleValue() << '\n'
<< *Res << '\n' << "\n\n\n");
}
+ } else if (isCoercedLoadValue()) {
+ LoadInst *Load = getCoercedLoadValue();
+ if (Load->getType() == LoadTy && Offset == 0) {
+ Res = Load;
+ } else {
+ Res = GetLoadValueForLoad(Load, Offset, LoadTy, BB->getTerminator(),
+ gvn);
+
+ DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << " "
+ << *getCoercedLoadValue() << '\n'
+ << *Res << '\n' << "\n\n\n");
+ }
} else {
+ const TargetData *TD = gvn.getTargetData();
+ assert(TD && "Need target data to handle type mismatch case");
Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset,
LoadTy, BB->getTerminator(), *TD);
- DEBUG(errs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
+ DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
<< " " << *getMemIntrinValue() << '\n'
<< *Res << '\n' << "\n\n\n");
}
@@ -1010,21 +1145,20 @@ struct AvailableValueInBlock {
}
};
-}
+} // end anonymous namespace
/// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock,
/// construct SSA form, allowing us to eliminate LI. This returns the value
/// that should be used at LI's definition site.
static Value *ConstructSSAForLoadSet(LoadInst *LI,
SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock,
- const TargetData *TD,
- const DominatorTree &DT,
- AliasAnalysis *AA) {
+ GVN &gvn) {
// Check for the fully redundant, dominating load case. In this case, we can
// just use the dominating value directly.
if (ValuesPerBlock.size() == 1 &&
- DT.properlyDominates(ValuesPerBlock[0].BB, LI->getParent()))
- return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), TD);
+ gvn.getDominatorTree().properlyDominates(ValuesPerBlock[0].BB,
+ LI->getParent()))
+ return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), gvn);
// Otherwise, we have to construct SSA form.
SmallVector<PHINode*, 8> NewPHIs;
@@ -1040,14 +1174,16 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
if (SSAUpdate.HasValueForBlock(BB))
continue;
- SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, TD));
+ SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, gvn));
}
// Perform PHI construction.
Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent());
// If new PHI nodes were created, notify alias analysis.
- if (V->getType()->isPointerTy())
+ if (V->getType()->isPointerTy()) {
+ AliasAnalysis *AA = gvn.getAliasAnalysis();
+
for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
AA->copyValue(LI, NewPHIs[i]);
@@ -1059,6 +1195,7 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
for (unsigned ii = 0, ee = P->getNumIncomingValues(); ii != ee; ++ii)
AA->addEscapingUse(P->getOperandUse(2*ii));
}
+ }
return V;
}
@@ -1071,8 +1208,7 @@ static bool isLifetimeStart(const Instruction *Inst) {
/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are
/// non-local by performing PHI construction.
-bool GVN::processNonLocalLoad(LoadInst *LI,
- SmallVectorImpl<Instruction*> &toErase) {
+bool GVN::processNonLocalLoad(LoadInst *LI) {
// Find the non-local dependencies of the load.
SmallVector<NonLocalDepResult, 64> Deps;
AliasAnalysis::Location Loc = VN.getAliasAnalysis()->getLocation(LI);
@@ -1088,7 +1224,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
// If we had a phi translation failure, we'll have a single entry which is a
// clobber in the current block. Reject this early.
- if (Deps.size() == 1 && Deps[0].getResult().isClobber()) {
+ if (Deps.size() == 1 && Deps[0].getResult().isClobber() &&
+ Deps[0].getResult().getInst()->getParent() == LI->getParent()) {
DEBUG(
dbgs() << "GVN: non-local load ";
WriteAsOperand(dbgs(), LI);
@@ -1129,6 +1266,26 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
}
}
}
+
+ // Check to see if we have something like this:
+ // load i32* P
+ // load i8* (P+1)
+ // if we have this, replace the later with an extraction from the former.
+ if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInfo.getInst())) {
+ // If this is a clobber and L is the first instruction in its block, then
+ // we have the first instruction in the entry block.
+ if (DepLI != LI && Address && TD) {
+ int Offset = AnalyzeLoadFromClobberingLoad(LI->getType(),
+ LI->getPointerOperand(),
+ DepLI, *TD);
+
+ if (Offset != -1) {
+ ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB,DepLI,
+ Offset));
+ continue;
+ }
+ }
+ }
// If the clobbering value is a memset/memcpy/memmove, see if we can
// forward a value on from it.
@@ -1187,7 +1344,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
continue;
}
}
- ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, LD));
+ ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB, LD));
continue;
}
@@ -1206,16 +1363,14 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n');
// Perform PHI construction.
- Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT,
- VN.getAliasAnalysis());
+ Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this);
LI->replaceAllUsesWith(V);
if (isa<PHINode>(V))
V->takeName(LI);
if (V->getType()->isPointerTy())
MD->invalidateCachedPointerInfo(V);
- VN.erase(LI);
- toErase.push_back(LI);
+ markInstructionForDeletion(LI);
++NumGVNLoad;
return true;
}
@@ -1429,22 +1584,20 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
}
// Perform PHI construction.
- Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT,
- VN.getAliasAnalysis());
+ Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this);
LI->replaceAllUsesWith(V);
if (isa<PHINode>(V))
V->takeName(LI);
if (V->getType()->isPointerTy())
MD->invalidateCachedPointerInfo(V);
- VN.erase(LI);
- toErase.push_back(LI);
+ markInstructionForDeletion(LI);
++NumPRELoad;
return true;
}
/// processLoad - Attempt to eliminate a load, first by eliminating it
/// locally, and then attempting non-local elimination if that fails.
-bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
+bool GVN::processLoad(LoadInst *L) {
if (!MD)
return false;
@@ -1454,8 +1607,9 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
// ... to a pointer that has been loaded from before...
MemDepResult Dep = MD->getDependency(L);
- // If the value isn't available, don't do anything!
- if (Dep.isClobber()) {
+ // If we have a clobber and target data is around, see if this is a clobber
+ // that we can fix up through code synthesis.
+ if (Dep.isClobber() && TD) {
// Check to see if we have something like this:
// store i32 123, i32* %P
// %A = bitcast i32* %P to i8*
@@ -1467,26 +1621,40 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
// completely covers this load. This sort of thing can happen in bitfield
// access code.
Value *AvailVal = 0;
- if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst()))
- if (TD) {
- int Offset = AnalyzeLoadFromClobberingStore(L->getType(),
- L->getPointerOperand(),
- DepSI, *TD);
- if (Offset != -1)
- AvailVal = GetStoreValueForLoad(DepSI->getValueOperand(), Offset,
- L->getType(), L, *TD);
- }
+ if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst())) {
+ int Offset = AnalyzeLoadFromClobberingStore(L->getType(),
+ L->getPointerOperand(),
+ DepSI, *TD);
+ if (Offset != -1)
+ AvailVal = GetStoreValueForLoad(DepSI->getValueOperand(), Offset,
+ L->getType(), L, *TD);
+ }
+
+ // Check to see if we have something like this:
+ // load i32* P
+ // load i8* (P+1)
+ // if we have this, replace the later with an extraction from the former.
+ if (LoadInst *DepLI = dyn_cast<LoadInst>(Dep.getInst())) {
+ // If this is a clobber and L is the first instruction in its block, then
+ // we have the first instruction in the entry block.
+ if (DepLI == L)
+ return false;
+
+ int Offset = AnalyzeLoadFromClobberingLoad(L->getType(),
+ L->getPointerOperand(),
+ DepLI, *TD);
+ if (Offset != -1)
+ AvailVal = GetLoadValueForLoad(DepLI, Offset, L->getType(), L, *this);
+ }
// If the clobbering value is a memset/memcpy/memmove, see if we can forward
// a value on from it.
if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) {
- if (TD) {
- int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(),
- L->getPointerOperand(),
- DepMI, *TD);
- if (Offset != -1)
- AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L,*TD);
- }
+ int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(),
+ L->getPointerOperand(),
+ DepMI, *TD);
+ if (Offset != -1)
+ AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L, *TD);
}
if (AvailVal) {
@@ -1497,14 +1665,16 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
L->replaceAllUsesWith(AvailVal);
if (AvailVal->getType()->isPointerTy())
MD->invalidateCachedPointerInfo(AvailVal);
- VN.erase(L);
- toErase.push_back(L);
+ markInstructionForDeletion(L);
++NumGVNLoad;
return true;
}
-
+ }
+
+ // If the value isn't available, don't do anything!
+ if (Dep.isClobber()) {
DEBUG(
- // fast print dep, using operator<< on instruction would be too slow
+ // fast print dep, using operator<< on instruction is too slow.
dbgs() << "GVN: load ";
WriteAsOperand(dbgs(), L);
Instruction *I = Dep.getInst();
@@ -1515,7 +1685,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
// If it is defined in another block, try harder.
if (Dep.isNonLocal())
- return processNonLocalLoad(L, toErase);
+ return processNonLocalLoad(L);
Instruction *DepInst = Dep.getInst();
if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) {
@@ -1542,8 +1712,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
L->replaceAllUsesWith(StoredVal);
if (StoredVal->getType()->isPointerTy())
MD->invalidateCachedPointerInfo(StoredVal);
- VN.erase(L);
- toErase.push_back(L);
+ markInstructionForDeletion(L);
++NumGVNLoad;
return true;
}
@@ -1556,7 +1725,8 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
// (depending on its type).
if (DepLI->getType() != L->getType()) {
if (TD) {
- AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L,*TD);
+ AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(),
+ L, *TD);
if (AvailableVal == 0)
return false;
@@ -1571,8 +1741,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
L->replaceAllUsesWith(AvailableVal);
if (DepLI->getType()->isPointerTy())
MD->invalidateCachedPointerInfo(DepLI);
- VN.erase(L);
- toErase.push_back(L);
+ markInstructionForDeletion(L);
++NumGVNLoad;
return true;
}
@@ -1582,19 +1751,17 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
// intervening stores, for example.
if (isa<AllocaInst>(DepInst) || isMalloc(DepInst)) {
L->replaceAllUsesWith(UndefValue::get(L->getType()));
- VN.erase(L);
- toErase.push_back(L);
+ markInstructionForDeletion(L);
++NumGVNLoad;
return true;
}
// If this load occurs either right after a lifetime begin,
// then the loaded value is undefined.
- if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(DepInst)) {
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(DepInst)) {
if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
L->replaceAllUsesWith(UndefValue::get(L->getType()));
- VN.erase(L);
- toErase.push_back(L);
+ markInstructionForDeletion(L);
++NumGVNLoad;
return true;
}
@@ -1634,8 +1801,7 @@ Value *GVN::findLeader(BasicBlock *BB, uint32_t num) {
/// processInstruction - When calculating availability, handle an instruction
/// by inserting it into the appropriate sets
-bool GVN::processInstruction(Instruction *I,
- SmallVectorImpl<Instruction*> &toErase) {
+bool GVN::processInstruction(Instruction *I) {
// Ignore dbg info intrinsics.
if (isa<DbgInfoIntrinsic>(I))
return false;
@@ -1648,20 +1814,17 @@ bool GVN::processInstruction(Instruction *I,
I->replaceAllUsesWith(V);
if (MD && V->getType()->isPointerTy())
MD->invalidateCachedPointerInfo(V);
- VN.erase(I);
- toErase.push_back(I);
+ markInstructionForDeletion(I);
return true;
}
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
- bool Changed = processLoad(LI, toErase);
-
- if (!Changed) {
- unsigned Num = VN.lookup_or_add(LI);
- addToLeaderTable(Num, LI, LI->getParent());
- }
+ if (processLoad(LI))
+ return true;
- return Changed;
+ unsigned Num = VN.lookup_or_add(LI);
+ addToLeaderTable(Num, LI, LI->getParent());
+ return false;
}
// For conditions branches, we can perform simple conditional propagation on
@@ -1720,11 +1883,10 @@ bool GVN::processInstruction(Instruction *I,
}
// Remove it!
- VN.erase(I);
I->replaceAllUsesWith(repl);
if (MD && repl->getType()->isPointerTy())
MD->invalidateCachedPointerInfo(repl);
- toErase.push_back(I);
+ markInstructionForDeletion(I);
return true;
}
@@ -1781,35 +1943,36 @@ bool GVN::runOnFunction(Function& F) {
bool GVN::processBlock(BasicBlock *BB) {
- // FIXME: Kill off toErase by doing erasing eagerly in a helper function (and
- // incrementing BI before processing an instruction).
- SmallVector<Instruction*, 8> toErase;
+ // FIXME: Kill off InstrsToErase by doing erasing eagerly in a helper function
+ // (and incrementing BI before processing an instruction).
+ assert(InstrsToErase.empty() &&
+ "We expect InstrsToErase to be empty across iterations");
bool ChangedFunction = false;
for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
BI != BE;) {
- ChangedFunction |= processInstruction(BI, toErase);
- if (toErase.empty()) {
+ ChangedFunction |= processInstruction(BI);
+ if (InstrsToErase.empty()) {
++BI;
continue;
}
// If we need some instructions deleted, do it now.
- NumGVNInstr += toErase.size();
+ NumGVNInstr += InstrsToErase.size();
// Avoid iterator invalidation.
bool AtStart = BI == BB->begin();
if (!AtStart)
--BI;
- for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(),
- E = toErase.end(); I != E; ++I) {
+ for (SmallVector<Instruction*, 4>::iterator I = InstrsToErase.begin(),
+ E = InstrsToErase.end(); I != E; ++I) {
DEBUG(dbgs() << "GVN removed: " << **I << '\n');
if (MD) MD->removeInstruction(*I);
(*I)->eraseFromParent();
DEBUG(verifyRemoved(*I));
}
- toErase.clear();
+ InstrsToErase.clear();
if (AtStart)
BI = BB->begin();
@@ -1944,11 +2107,11 @@ bool GVN::performPRE(Function &F) {
addToLeaderTable(ValNo, PREInstr, PREPred);
// Create a PHI to make the value available in this block.
- PHINode* Phi = PHINode::Create(CurInst->getType(),
+ pred_iterator PB = pred_begin(CurrentBlock), PE = pred_end(CurrentBlock);
+ PHINode* Phi = PHINode::Create(CurInst->getType(), std::distance(PB, PE),
CurInst->getName() + ".pre-phi",
CurrentBlock->begin());
- for (pred_iterator PI = pred_begin(CurrentBlock),
- PE = pred_end(CurrentBlock); PI != PE; ++PI) {
+ for (pred_iterator PI = PB; PI != PE; ++PI) {
BasicBlock *P = *PI;
Phi->addIncoming(predMap[P], P);
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 0fb6798..09d569a 100644
--- a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -73,6 +73,7 @@ namespace {
LoopInfo *LI;
ScalarEvolution *SE;
DominatorTree *DT;
+ SmallVector<WeakVH, 16> DeadInsts;
bool Changed;
public:
@@ -98,6 +99,7 @@ namespace {
}
private:
+ bool isValidRewrite(Value *FromVal, Value *ToVal);
void EliminateIVComparisons();
void EliminateIVRemainders();
@@ -134,6 +136,53 @@ Pass *llvm::createIndVarSimplifyPass() {
return new IndVarSimplify();
}
+/// isValidRewrite - Return true if the SCEV expansion generated by the
+/// rewriter can replace the original value. SCEV guarantees that it
+/// produces the same value, but the way it is produced may be illegal IR.
+/// Ideally, this function will only be called for verification.
+bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
+ // If an SCEV expression subsumed multiple pointers, its expansion could
+ // reassociate the GEP changing the base pointer. This is illegal because the
+ // final address produced by a GEP chain must be inbounds relative to its
+ // underlying object. Otherwise basic alias analysis, among other things,
+ // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid
+ // producing an expression involving multiple pointers. Until then, we must
+ // bail out here.
+ //
+ // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject
+ // because it understands lcssa phis while SCEV does not.
+ Value *FromPtr = FromVal;
+ Value *ToPtr = ToVal;
+ if (GEPOperator *GEP = dyn_cast<GEPOperator>(FromVal)) {
+ FromPtr = GEP->getPointerOperand();
+ }
+ if (GEPOperator *GEP = dyn_cast<GEPOperator>(ToVal)) {
+ ToPtr = GEP->getPointerOperand();
+ }
+ if (FromPtr != FromVal || ToPtr != ToVal) {
+ // Quickly check the common case
+ if (FromPtr == ToPtr)
+ return true;
+
+ // SCEV may have rewritten an expression that produces the GEP's pointer
+ // operand. That's ok as long as the pointer operand has the same base
+ // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the
+ // base of a recurrence. This handles the case in which SCEV expansion
+ // converts a pointer type recurrence into a nonrecurrent pointer base
+ // indexed by an integer recurrence.
+ const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
+ const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
+ if (FromBase == ToBase)
+ return true;
+
+ DEBUG(dbgs() << "INDVARS: GEP rewrite bail out "
+ << *FromBase << " != " << *ToBase << "\n");
+
+ return false;
+ }
+ return true;
+}
+
/// LinearFunctionTestReplace - This method rewrites the exit condition of the
/// loop to be a canonical != comparison against the incremented loop induction
/// variable. This pass is able to rewrite the exit tests of any loop where the
@@ -226,7 +275,7 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
// update the branch to use the new comparison; in the common case this
// will make old comparison dead.
BI->setCondition(Cond);
- RecursivelyDeleteTriviallyDeadInstructions(OrigCond);
+ DeadInsts.push_back(OrigCond);
++NumLFTR;
Changed = true;
@@ -304,14 +353,18 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
if (!SE->isLoopInvariant(ExitValue, L))
continue;
- Changed = true;
- ++NumReplaced;
-
Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n'
<< " LoopVal = " << *Inst << "\n");
+ if (!isValidRewrite(Inst, ExitVal)) {
+ DeadInsts.push_back(ExitVal);
+ continue;
+ }
+ Changed = true;
+ ++NumReplaced;
+
PN->setIncomingValue(i, ExitVal);
// If this instruction is dead now, delete it.
@@ -366,8 +419,6 @@ void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
}
void IndVarSimplify::EliminateIVComparisons() {
- SmallVector<WeakVH, 16> DeadInsts;
-
// Look for ICmp users.
for (IVUsers::iterator I = IU->begin(), E = IU->end(); I != E; ++I) {
IVStrideUse &UI = *I;
@@ -399,18 +450,9 @@ void IndVarSimplify::EliminateIVComparisons() {
DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n');
DeadInsts.push_back(ICmp);
}
-
- // Now that we're done iterating through lists, clean up any instructions
- // which are now dead.
- while (!DeadInsts.empty())
- if (Instruction *Inst =
- dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
- RecursivelyDeleteTriviallyDeadInstructions(Inst);
}
void IndVarSimplify::EliminateIVRemainders() {
- SmallVector<WeakVH, 16> DeadInsts;
-
// Look for SRem and URem users.
for (IVUsers::iterator I = IU->begin(), E = IU->end(); I != E; ++I) {
IVStrideUse &UI = *I;
@@ -466,13 +508,6 @@ void IndVarSimplify::EliminateIVRemainders() {
DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');
DeadInsts.push_back(Rem);
}
-
- // Now that we're done iterating through lists, clean up any instructions
- // which are now dead.
- while (!DeadInsts.empty())
- if (Instruction *Inst =
- dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
- RecursivelyDeleteTriviallyDeadInstructions(Inst);
}
bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
@@ -491,6 +526,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
LI = &getAnalysis<LoopInfo>();
SE = &getAnalysis<ScalarEvolution>();
DT = &getAnalysis<DominatorTree>();
+ DeadInsts.clear();
Changed = false;
// If there are any floating-point recurrences, attempt to
@@ -589,9 +625,21 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
ExitingBlock, BI, Rewriter);
}
- // Rewrite IV-derived expressions. Clears the rewriter cache.
+ // Rewrite IV-derived expressions.
RewriteIVExpressions(L, Rewriter);
+ // Clear the rewriter cache, because values that are in the rewriter's cache
+ // can be deleted in the loop below, causing the AssertingVH in the cache to
+ // trigger.
+ Rewriter.clear();
+
+ // Now that we're done iterating through lists, clean up any instructions
+ // which are now dead.
+ while (!DeadInsts.empty())
+ if (Instruction *Inst =
+ dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
+ RecursivelyDeleteTriviallyDeadInstructions(Inst);
+
// The Rewriter may not be used from this point on.
// Loop-invariant instructions in the preheader that aren't used in the
@@ -632,7 +680,7 @@ static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) {
if (!isSafe(*I, L, SE)) return false;
return true;
}
-
+
// A cast is safe if its operand is.
if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
return isSafe(C->getOperand(), L, SE);
@@ -651,8 +699,6 @@ static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) {
}
void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
- SmallVector<WeakVH, 16> DeadInsts;
-
// Rewrite all induction variable expressions in terms of the canonical
// induction variable.
//
@@ -705,6 +751,13 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
// Now expand it into actual Instructions and patch it into place.
Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt);
+ DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n'
+ << " into = " << *NewVal << "\n");
+
+ if (!isValidRewrite(Op, NewVal)) {
+ DeadInsts.push_back(NewVal);
+ continue;
+ }
// Inform ScalarEvolution that this value is changing. The change doesn't
// affect its value, but it does potentially affect which use lists the
// value will be on after the replacement, which affects ScalarEvolution's
@@ -717,25 +770,13 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
NewVal->takeName(Op);
User->replaceUsesOfWith(Op, NewVal);
UI->setOperandValToReplace(NewVal);
- DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n'
- << " into = " << *NewVal << "\n");
+
++NumRemoved;
Changed = true;
// The old value may be dead now.
DeadInsts.push_back(Op);
}
-
- // Clear the rewriter cache, because values that are in the rewriter's cache
- // can be deleted in the loop below, causing the AssertingVH in the cache to
- // trigger.
- Rewriter.clear();
- // Now that we're done iterating through lists, clean up any instructions
- // which are now dead.
- while (!DeadInsts.empty())
- if (Instruction *Inst =
- dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
- RecursivelyDeleteTriviallyDeadInstructions(Inst);
}
/// If there's a single exit block, sink any loop-invariant values that
@@ -859,7 +900,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
BinaryOperator *Incr =
dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
if (Incr == 0 || Incr->getOpcode() != Instruction::FAdd) return;
-
+
// If this is not an add of the PHI with a constantfp, or if the constant fp
// is not an integer, bail out.
ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
@@ -884,7 +925,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
if (Compare == 0 || !Compare->hasOneUse() ||
!isa<BranchInst>(Compare->use_back()))
return;
-
+
BranchInst *TheBr = cast<BranchInst>(Compare->use_back());
// We need to verify that the branch actually controls the iteration count
@@ -896,8 +937,8 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
(L->contains(TheBr->getSuccessor(0)) &&
L->contains(TheBr->getSuccessor(1))))
return;
-
-
+
+
// If it isn't a comparison with an integer-as-fp (the exit value), we can't
// transform it.
ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
@@ -905,7 +946,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
if (ExitValueVal == 0 ||
!ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
return;
-
+
// Find new predicate for integer comparison.
CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
switch (Compare->getPredicate()) {
@@ -923,13 +964,13 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
case CmpInst::FCMP_OLE:
case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
}
-
+
// We convert the floating point induction variable to a signed i32 value if
// we can. This is only safe if the comparison will not overflow in a way
// that won't be trapped by the integer equivalent operations. Check for this
// now.
// TODO: We could use i64 if it is native and the range requires it.
-
+
// The start/stride/exit values must all fit in signed i32.
if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
return;
@@ -945,59 +986,59 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
if (InitValue >= ExitValue ||
NewPred == CmpInst::ICMP_SGT || NewPred == CmpInst::ICMP_SGE)
return;
-
+
uint32_t Range = uint32_t(ExitValue-InitValue);
if (NewPred == CmpInst::ICMP_SLE) {
// Normalize SLE -> SLT, check for infinite loop.
if (++Range == 0) return; // Range overflows.
}
-
+
unsigned Leftover = Range % uint32_t(IncValue);
-
+
// If this is an equality comparison, we require that the strided value
// exactly land on the exit value, otherwise the IV condition will wrap
// around and do things the fp IV wouldn't.
if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
Leftover != 0)
return;
-
+
// If the stride would wrap around the i32 before exiting, we can't
// transform the IV.
if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
return;
-
+
} else {
// If we have a negative stride, we require the init to be greater than the
// exit value and an equality or greater than comparison.
if (InitValue >= ExitValue ||
NewPred == CmpInst::ICMP_SLT || NewPred == CmpInst::ICMP_SLE)
return;
-
+
uint32_t Range = uint32_t(InitValue-ExitValue);
if (NewPred == CmpInst::ICMP_SGE) {
// Normalize SGE -> SGT, check for infinite loop.
if (++Range == 0) return; // Range overflows.
}
-
+
unsigned Leftover = Range % uint32_t(-IncValue);
-
+
// If this is an equality comparison, we require that the strided value
// exactly land on the exit value, otherwise the IV condition will wrap
// around and do things the fp IV wouldn't.
if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
Leftover != 0)
return;
-
+
// If the stride would wrap around the i32 before exiting, we can't
// transform the IV.
if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
return;
}
-
+
const IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
// Insert new integer induction variable.
- PHINode *NewPHI = PHINode::Create(Int32Ty, PN->getName()+".int", PN);
+ PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
PN->getIncomingBlock(IncomingEdge));
diff --git a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index 90094a8..7168177 100644
--- a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -16,6 +16,7 @@
#include "llvm/IntrinsicInst.h"
#include "llvm/LLVMContext.h"
#include "llvm/Pass.h"
+#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LazyValueInfo.h"
#include "llvm/Analysis/Loads.h"
@@ -170,9 +171,9 @@ bool JumpThreading::runOnFunction(Function &F) {
Changed = true;
continue;
}
-
+
BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
-
+
// Can't thread an unconditional jump, but if the block is "almost
// empty", we can replace uses of it with uses of the successor and make
// this dead.
@@ -608,7 +609,7 @@ static unsigned GetBestDestForJumpOnUndef(BasicBlock *BB) {
static bool hasAddressTakenAndUsed(BasicBlock *BB) {
if (!BB->hasAddressTaken()) return false;
-
+
// If the block has its address taken, it may be a tree of dead constants
// hanging off of it. These shouldn't keep the block alive.
BlockAddress *BA = BlockAddress::get(BB);
@@ -668,6 +669,17 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
return false; // Must be an invoke.
}
+ // Run constant folding to see if we can reduce the condition to a simple
+ // constant.
+ if (Instruction *I = dyn_cast<Instruction>(Condition)) {
+ Value *SimpleVal = ConstantFoldInstruction(I, TD);
+ if (SimpleVal) {
+ I->replaceAllUsesWith(SimpleVal);
+ I->eraseFromParent();
+ Condition = SimpleVal;
+ }
+ }
+
// If the terminator is branching on an undef, we can pick any of the
// successors to branch to. Let GetBestDestForJumpOnUndef decide.
if (isa<UndefValue>(Condition)) {
@@ -928,13 +940,14 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
array_pod_sort(AvailablePreds.begin(), AvailablePreds.end());
// Create a PHI node at the start of the block for the PRE'd load value.
- PHINode *PN = PHINode::Create(LI->getType(), "", LoadBB->begin());
+ pred_iterator PB = pred_begin(LoadBB), PE = pred_end(LoadBB);
+ PHINode *PN = PHINode::Create(LI->getType(), std::distance(PB, PE), "",
+ LoadBB->begin());
PN->takeName(LI);
// Insert new entries into the PHI for each predecessor. A single block may
// have multiple entries here.
- for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB); PI != E;
- ++PI) {
+ for (pred_iterator PI = PB; PI != PE; ++PI) {
BasicBlock *P = *PI;
AvailablePredsTy::iterator I =
std::lower_bound(AvailablePreds.begin(), AvailablePreds.end(),
diff --git a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
index 0786793..93de9cf 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -445,7 +445,8 @@ void LICM::sink(Instruction &I) {
// enough that we handle it as a special (more efficient) case. It is more
// efficient to handle because there are no PHI nodes that need to be placed.
if (ExitBlocks.size() == 1) {
- if (!DT->dominates(I.getParent(), ExitBlocks[0])) {
+ if (!isa<DbgInfoIntrinsic>(I) &&
+ !DT->dominates(I.getParent(), ExitBlocks[0])) {
// Instruction is not used, just delete it.
CurAST->deleteValue(&I);
// If I has users in unreachable blocks, eliminate.
@@ -742,30 +743,13 @@ void LICM::PromoteAliasSet(AliasSet &AS) {
Preheader->getTerminator());
SSA.AddAvailableValue(Preheader, PreheaderLoad);
- // Copy any value stored to or loaded from a must-alias of the pointer.
- if (PreheaderLoad->getType()->isPointerTy()) {
- Value *SomeValue;
- if (LoadInst *LI = dyn_cast<LoadInst>(LoopUses[0]))
- SomeValue = LI;
- else
- SomeValue = cast<StoreInst>(LoopUses[0])->getValueOperand();
-
- CurAST->copyValue(SomeValue, PreheaderLoad);
- }
-
// Rewrite all the loads in the loop and remember all the definitions from
// stores in the loop.
Promoter.run(LoopUses);
-
- // If the preheader load is itself a pointer, we need to tell alias analysis
- // about the new pointer we created in the preheader block and about any PHI
- // nodes that just got inserted.
- if (PreheaderLoad->getType()->isPointerTy()) {
- for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
- CurAST->copyValue(PreheaderLoad, NewPHIs[i]);
- }
-
- // fwew, we're done!
+
+ // If the SSAUpdater didn't use the load in the preheader, just zap it now.
+ if (PreheaderLoad->use_empty())
+ PreheaderLoad->eraseFromParent();
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index f8ce214..1366231 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -81,7 +81,7 @@ namespace {
bool processLoopStore(StoreInst *SI, const SCEV *BECount);
bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
-
+
bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
unsigned StoreAlignment,
Value *SplatValue, Instruction *TheStore,
@@ -91,7 +91,7 @@ namespace {
const SCEVAddRecExpr *StoreEv,
const SCEVAddRecExpr *LoadEv,
const SCEV *BECount);
-
+
/// This transformation requires natural loop information & requires that
/// loop preheaders be inserted into the CFG.
///
@@ -134,50 +134,50 @@ Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognize(); }
///
static void DeleteDeadInstruction(Instruction *I, ScalarEvolution &SE) {
SmallVector<Instruction*, 32> NowDeadInsts;
-
+
NowDeadInsts.push_back(I);
-
+
// Before we touch this instruction, remove it from SE!
do {
Instruction *DeadInst = NowDeadInsts.pop_back_val();
-
+
// This instruction is dead, zap it, in stages. Start by removing it from
// SCEV.
SE.forgetValue(DeadInst);
-
+
for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) {
Value *Op = DeadInst->getOperand(op);
DeadInst->setOperand(op, 0);
-
+
// If this operand just became dead, add it to the NowDeadInsts list.
if (!Op->use_empty()) continue;
-
+
if (Instruction *OpI = dyn_cast<Instruction>(Op))
if (isInstructionTriviallyDead(OpI))
NowDeadInsts.push_back(OpI);
}
-
+
DeadInst->eraseFromParent();
-
+
} while (!NowDeadInsts.empty());
}
bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
CurLoop = L;
-
+
// The trip count of the loop must be analyzable.
SE = &getAnalysis<ScalarEvolution>();
if (!SE->hasLoopInvariantBackedgeTakenCount(L))
return false;
const SCEV *BECount = SE->getBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(BECount)) return false;
-
+
// If this loop executes exactly one time, then it should be peeled, not
// optimized by this pass.
if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
if (BECst->getValue()->getValue() == 0)
return false;
-
+
// We require target data for now.
TD = getAnalysisIfAvailable<TargetData>();
if (TD == 0) return false;
@@ -185,14 +185,14 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
DT = &getAnalysis<DominatorTree>();
LoopInfo &LI = getAnalysis<LoopInfo>();
TLI = &getAnalysis<TargetLibraryInfo>();
-
+
SmallVector<BasicBlock*, 8> ExitBlocks;
CurLoop->getUniqueExitBlocks(ExitBlocks);
DEBUG(dbgs() << "loop-idiom Scanning: F["
<< L->getHeader()->getParent()->getName()
<< "] Loop %" << L->getHeader()->getName() << "\n");
-
+
bool MadeChange = false;
// Scan all the blocks in the loop that are not in subloops.
for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E;
@@ -200,7 +200,7 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
// Ignore blocks in subloops.
if (LI.getLoopFor(*BI) != CurLoop)
continue;
-
+
MadeChange |= runOnLoopBlock(*BI, BECount, ExitBlocks);
}
return MadeChange;
@@ -217,7 +217,7 @@ bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
if (!DT->dominates(BB, ExitBlocks[i]))
return false;
-
+
bool MadeChange = false;
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
Instruction *Inst = I++;
@@ -226,20 +226,20 @@ bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
WeakVH InstPtr(I);
if (!processLoopStore(SI, BECount)) continue;
MadeChange = true;
-
+
// If processing the store invalidated our iterator, start over from the
// top of the block.
if (InstPtr == 0)
I = BB->begin();
continue;
}
-
+
// Look for memset instructions, which may be optimized to a larger memset.
if (MemSetInst *MSI = dyn_cast<MemSetInst>(Inst)) {
WeakVH InstPtr(I);
if (!processLoopMemSet(MSI, BECount)) continue;
MadeChange = true;
-
+
// If processing the memset invalidated our iterator, start over from the
// top of the block.
if (InstPtr == 0)
@@ -247,7 +247,7 @@ bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
continue;
}
}
-
+
return MadeChange;
}
@@ -258,12 +258,12 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
Value *StoredVal = SI->getValueOperand();
Value *StorePtr = SI->getPointerOperand();
-
+
// Reject stores that are so large that they overflow an unsigned.
uint64_t SizeInBits = TD->getTypeSizeInBits(StoredVal->getType());
if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)
return false;
-
+
// See if the pointer expression is an AddRec like {base,+,1} on the current
// loop, which indicates a strided store. If we have something else, it's a
// random store we can't handle.
@@ -274,9 +274,9 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
// Check to see if the stride matches the size of the store. If so, then we
// know that every byte is touched in the loop.
- unsigned StoreSize = (unsigned)SizeInBits >> 3;
+ unsigned StoreSize = (unsigned)SizeInBits >> 3;
const SCEVConstant *Stride = dyn_cast<SCEVConstant>(StoreEv->getOperand(1));
-
+
if (Stride == 0 || StoreSize != Stride->getValue()->getValue()) {
// TODO: Could also handle negative stride here someday, that will require
// the validity check in mayLoopAccessLocation to be updated though.
@@ -285,7 +285,7 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
dbgs() << "NEGATIVE STRIDE: " << *SI << "\n";
dbgs() << "BB: " << *SI->getParent();
}
-
+
return false;
}
@@ -319,9 +319,9 @@ processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) {
// If we're not allowed to hack on memset, we fail.
if (!TLI->has(LibFunc::memset))
return false;
-
+
Value *Pointer = MSI->getDest();
-
+
// See if the pointer expression is an AddRec like {base,+,1} on the current
// loop, which indicates a strided store. If we have something else, it's a
// random store we can't handle.
@@ -333,16 +333,16 @@ processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) {
uint64_t SizeInBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
if ((SizeInBytes >> 32) != 0)
return false;
-
+
// Check to see if the stride matches the size of the memset. If so, then we
// know that every byte is touched in the loop.
const SCEVConstant *Stride = dyn_cast<SCEVConstant>(Ev->getOperand(1));
-
+
// TODO: Could also handle negative stride here someday, that will require the
// validity check in mayLoopAccessLocation to be updated though.
if (Stride == 0 || MSI->getLength() != Stride->getValue())
return false;
-
+
return processLoopStridedStore(Pointer, (unsigned)SizeInBytes,
MSI->getAlignment(), MSI->getValue(),
MSI, Ev, BECount);
@@ -365,7 +365,7 @@ static bool mayLoopAccessLocation(Value *Ptr,AliasAnalysis::ModRefResult Access,
// to be exactly the size of the memset, which is (BECount+1)*StoreSize
if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
AccessSize = (BECst->getValue()->getZExtValue()+1)*StoreSize;
-
+
// TODO: For this to be really effective, we have to dive into the pointer
// operand in the store. Store to &A[i] of 100 will always return may alias
// with store of &A[100], we need to StoreLoc to be "A" with size of 100,
@@ -394,12 +394,12 @@ static Constant *getMemSetPatternValue(Value *V, const TargetData &TD) {
// that doesn't seem worthwhile.
Constant *C = dyn_cast<Constant>(V);
if (C == 0) return 0;
-
+
// Only handle simple values that are a power of two bytes in size.
uint64_t Size = TD.getTypeSizeInBits(V->getType());
if (Size == 0 || (Size & 7) || (Size & (Size-1)))
return 0;
-
+
// Don't care enough about darwin/ppc to implement this.
if (TD.isBigEndian())
return 0;
@@ -410,7 +410,7 @@ static Constant *getMemSetPatternValue(Value *V, const TargetData &TD) {
// TODO: If CI is larger than 16-bytes, we can try slicing it in half to see
// if the top and bottom are the same (e.g. for vectors and large integers).
if (Size > 16) return 0;
-
+
// If the constant is exactly 16 bytes, just use it.
if (Size == 16) return C;
@@ -428,14 +428,14 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
unsigned StoreAlignment, Value *StoredVal,
Instruction *TheStore, const SCEVAddRecExpr *Ev,
const SCEV *BECount) {
-
+
// If the stored value is a byte-wise value (like i32 -1), then it may be
// turned into a memset of i8 -1, assuming that all the consecutive bytes
// are stored. A store of i32 0x01020304 can never be turned into a memset,
// but it can be turned into memset_pattern if the target supports it.
Value *SplatValue = isBytewiseValue(StoredVal);
Constant *PatternValue = 0;
-
+
// If we're allowed to form a memset, and the stored value would be acceptable
// for memset, use it.
if (SplatValue && TLI->has(LibFunc::memset) &&
@@ -453,8 +453,8 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
// do anything with a 3-byte store, for example.
return false;
}
-
-
+
+
// Okay, we have a strided store "p[i]" of a splattable value. We can turn
// this into a memset in the loop preheader now if we want. However, this
// would be unsafe to do if there is anything else in the loop that may read
@@ -463,47 +463,47 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
CurLoop, BECount,
StoreSize, getAnalysis<AliasAnalysis>(), TheStore))
return false;
-
+
// Okay, everything looks good, insert the memset.
BasicBlock *Preheader = CurLoop->getLoopPreheader();
-
+
IRBuilder<> Builder(Preheader->getTerminator());
-
+
// The trip count of the loop and the base pointer of the addrec SCEV is
// guaranteed to be loop invariant, which means that it should dominate the
// header. Just insert code for it in the preheader.
SCEVExpander Expander(*SE);
-
+
unsigned AddrSpace = cast<PointerType>(DestPtr->getType())->getAddressSpace();
- Value *BasePtr =
+ Value *BasePtr =
Expander.expandCodeFor(Ev->getStart(), Builder.getInt8PtrTy(AddrSpace),
Preheader->getTerminator());
-
+
// The # stored bytes is (BECount+1)*Size. Expand the trip count out to
// pointer size if it isn't already.
const Type *IntPtr = TD->getIntPtrType(DestPtr->getContext());
BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
-
+
const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
- true /*no unsigned overflow*/);
+ SCEV::FlagNUW);
if (StoreSize != 1)
NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
- true /*no unsigned overflow*/);
-
- Value *NumBytes =
+ SCEV::FlagNUW);
+
+ Value *NumBytes =
Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
-
- Value *NewCall;
+
+ CallInst *NewCall;
if (SplatValue)
NewCall = Builder.CreateMemSet(BasePtr, SplatValue,NumBytes,StoreAlignment);
else {
Module *M = TheStore->getParent()->getParent()->getParent();
Value *MSP = M->getOrInsertFunction("memset_pattern16",
Builder.getVoidTy(),
- Builder.getInt8PtrTy(),
+ Builder.getInt8PtrTy(),
Builder.getInt8PtrTy(), IntPtr,
(void*)0);
-
+
// Otherwise we should form a memset_pattern16. PatternValue is known to be
// an constant array of 16-bytes. Plop the value into a mergable global.
GlobalVariable *GV = new GlobalVariable(*M, PatternValue->getType(), true,
@@ -514,11 +514,11 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
Value *PatternPtr = ConstantExpr::getBitCast(GV, Builder.getInt8PtrTy());
NewCall = Builder.CreateCall3(MSP, BasePtr, PatternPtr, NumBytes);
}
-
+
DEBUG(dbgs() << " Formed memset: " << *NewCall << "\n"
<< " from store to: " << *Ev << " at: " << *TheStore << "\n");
- (void)NewCall;
-
+ NewCall->setDebugLoc(TheStore->getDebugLoc());
+
// Okay, the memset has been formed. Zap the original store and anything that
// feeds into it.
DeleteDeadInstruction(TheStore, *SE);
@@ -536,9 +536,9 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
// If we're not allowed to form memcpy, we fail.
if (!TLI->has(LibFunc::memcpy))
return false;
-
+
LoadInst *LI = cast<LoadInst>(SI->getValueOperand());
-
+
// Okay, we have a strided store "p[i]" of a loaded value. We can turn
// this into a memcpy in the loop preheader now if we want. However, this
// would be unsafe to do if there is anything else in the loop that may read
@@ -555,49 +555,49 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
CurLoop, BECount, StoreSize,
getAnalysis<AliasAnalysis>(), SI))
return false;
-
+
// Okay, everything looks good, insert the memcpy.
BasicBlock *Preheader = CurLoop->getLoopPreheader();
-
+
IRBuilder<> Builder(Preheader->getTerminator());
-
+
// The trip count of the loop and the base pointer of the addrec SCEV is
// guaranteed to be loop invariant, which means that it should dominate the
// header. Just insert code for it in the preheader.
SCEVExpander Expander(*SE);
- Value *LoadBasePtr =
+ Value *LoadBasePtr =
Expander.expandCodeFor(LoadEv->getStart(),
Builder.getInt8PtrTy(LI->getPointerAddressSpace()),
Preheader->getTerminator());
- Value *StoreBasePtr =
+ Value *StoreBasePtr =
Expander.expandCodeFor(StoreEv->getStart(),
Builder.getInt8PtrTy(SI->getPointerAddressSpace()),
Preheader->getTerminator());
-
+
// The # stored bytes is (BECount+1)*Size. Expand the trip count out to
// pointer size if it isn't already.
const Type *IntPtr = TD->getIntPtrType(SI->getContext());
BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
-
+
const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
- true /*no unsigned overflow*/);
+ SCEV::FlagNUW);
if (StoreSize != 1)
NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
- true /*no unsigned overflow*/);
-
+ SCEV::FlagNUW);
+
Value *NumBytes =
Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
-
+
Value *NewCall =
Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes,
std::min(SI->getAlignment(), LI->getAlignment()));
-
+
DEBUG(dbgs() << " Formed memcpy: " << *NewCall << "\n"
<< " from load ptr=" << *LoadEv << " at: " << *LI << "\n"
<< " from store ptr=" << *StoreEv << " at: " << *SI << "\n");
(void)NewCall;
-
+
// Okay, the memset has been formed. Zap the original store and anything that
// feeds into it.
DeleteDeadInstruction(SI, *SE);
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
index 95e1578..47dced3 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
@@ -184,7 +184,11 @@ bool LoopRotate::rotateLoop(Loop *L) {
// Now, this loop is suitable for rotation.
BasicBlock *OrigPreheader = L->getLoopPreheader();
BasicBlock *OrigLatch = L->getLoopLatch();
- assert(OrigPreheader && OrigLatch && "Loop not in canonical form?");
+
+ // If the loop could not be converted to canonical form, it must have an
+ // indirectbr in it, just give up.
+ if (OrigPreheader == 0 || OrigLatch == 0)
+ return false;
// Anything ScalarEvolution may know about this loop or the PHI nodes
// in its header will soon be invalidated.
@@ -322,7 +326,8 @@ bool LoopRotate::rotateLoop(Loop *L) {
// We can fold the conditional branch in the preheader, this makes things
// simpler. The first step is to remove the extra edge to the Exit block.
Exit->removePredecessor(OrigPreheader, true /*preserve LCSSA*/);
- BranchInst::Create(NewHeader, PHBI);
+ BranchInst *NewBI = BranchInst::Create(NewHeader, PHBI);
+ NewBI->setDebugLoc(PHBI->getDebugLoc());
PHBI->eraseFromParent();
// With our CFG finalized, update DomTree if it is available.
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index ac4aea2..5abc790 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -253,7 +253,8 @@ static void DoInitialMatch(const SCEV *S, Loop *L,
DoInitialMatch(AR->getStart(), L, Good, Bad, SE);
DoInitialMatch(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
AR->getStepRecurrence(SE),
- AR->getLoop()),
+ // FIXME: AR->getNoWrapFlags()
+ AR->getLoop(), SCEV::FlagAnyWrap),
L, Good, Bad, SE);
return;
}
@@ -455,7 +456,10 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE,
IgnoreSignificantBits);
if (!Start) return 0;
- return SE.getAddRecExpr(Start, Step, AR->getLoop());
+ // FlagNW is independent of the start value, step direction, and is
+ // preserved with smaller magnitude steps.
+ // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
+ return SE.getAddRecExpr(Start, Step, AR->getLoop(), SCEV::FlagAnyWrap);
}
return 0;
}
@@ -520,7 +524,9 @@ static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {
SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
int64_t Result = ExtractImmediate(NewOps.front(), SE);
if (Result != 0)
- S = SE.getAddRecExpr(NewOps, AR->getLoop());
+ S = SE.getAddRecExpr(NewOps, AR->getLoop(),
+ // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
+ SCEV::FlagAnyWrap);
return Result;
}
return 0;
@@ -545,7 +551,9 @@ static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {
SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
GlobalValue *Result = ExtractSymbol(NewOps.front(), SE);
if (Result)
- S = SE.getAddRecExpr(NewOps, AR->getLoop());
+ S = SE.getAddRecExpr(NewOps, AR->getLoop(),
+ // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
+ SCEV::FlagAnyWrap);
return Result;
}
return 0;
@@ -564,9 +572,6 @@ static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
switch (II->getIntrinsicID()) {
default: break;
case Intrinsic::prefetch:
- case Intrinsic::x86_sse2_loadu_dq:
- case Intrinsic::x86_sse2_loadu_pd:
- case Intrinsic::x86_sse_loadu_ps:
case Intrinsic::x86_sse_storeu_ps:
case Intrinsic::x86_sse2_storeu_pd:
case Intrinsic::x86_sse2_storeu_dq:
@@ -781,7 +786,7 @@ void Cost::RateFormula(const Formula &F,
}
}
-/// Loose - Set this cost to a loosing value.
+/// Loose - Set this cost to a losing value.
void Cost::Loose() {
NumRegs = ~0u;
AddRecCost = ~0u;
@@ -1483,7 +1488,7 @@ void LSRInstance::OptimizeShadowIV() {
if (!C->getValue().isStrictlyPositive()) continue;
/* Add new PHINode. */
- PHINode *NewPH = PHINode::Create(DestTy, "IV.S.", PH);
+ PHINode *NewPH = PHINode::Create(DestTy, 2, "IV.S.", PH);
/* create new increment. '++d' in above example. */
Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue());
@@ -1819,7 +1824,7 @@ LSRInstance::OptimizeLoopTermCond() {
}
}
-/// reconcileNewOffset - Determine if the given use can accomodate a fixup
+/// reconcileNewOffset - Determine if the given use can accommodate a fixup
/// at the given offset and other details. If so, update the use and
/// return true.
bool
@@ -2236,7 +2241,9 @@ static void CollectSubexprs(const SCEV *S, const SCEVConstant *C,
if (!AR->getStart()->isZero()) {
CollectSubexprs(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
AR->getStepRecurrence(SE),
- AR->getLoop()),
+ AR->getLoop(),
+ //FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
+ SCEV::FlagAnyWrap),
C, Ops, L, SE);
CollectSubexprs(AR->getStart(), C, Ops, L, SE);
return;
@@ -3047,7 +3054,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
}
}
-/// NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters - Call
+/// NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters - Call
/// FilterOutUndesirableDedicatedRegisters again, if necessary, now that
/// we've done more filtering, as it may be able to find more formulae to
/// eliminate.
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 80b263a..fef6bc3 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -43,7 +43,13 @@ namespace {
class LoopUnroll : public LoopPass {
public:
static char ID; // Pass ID, replacement for typeid
- LoopUnroll() : LoopPass(ID) {
+ LoopUnroll(int T = -1, int C = -1, int P = -1) : LoopPass(ID) {
+ CurrentThreshold = (T == -1) ? UnrollThreshold : unsigned(T);
+ CurrentCount = (C == -1) ? UnrollCount : unsigned(C);
+ CurrentAllowPartial = (P == -1) ? UnrollAllowPartial : (bool)P;
+
+ UserThreshold = (T != -1) || (UnrollThreshold.getNumOccurrences() > 0);
+
initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
}
@@ -56,7 +62,10 @@ namespace {
// explicit -unroll-threshold).
static const unsigned OptSizeUnrollThreshold = 50;
+ unsigned CurrentCount;
unsigned CurrentThreshold;
+ bool CurrentAllowPartial;
+ bool UserThreshold; // CurrentThreshold is user-specified.
bool runOnLoop(Loop *L, LPPassManager &LPM);
@@ -87,7 +96,9 @@ INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
-Pass *llvm::createLoopUnrollPass() { return new LoopUnroll(); }
+Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial) {
+ return new LoopUnroll(Threshold, Count, AllowPartial);
+}
/// ApproximateLoopSize - Approximate the size of the loop.
static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls) {
@@ -119,14 +130,14 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
// from UnrollThreshold, it is overridden to a smaller value if the current
// function is marked as optimize-for-size, and the unroll threshold was
// not user specified.
- CurrentThreshold = UnrollThreshold;
- if (Header->getParent()->hasFnAttr(Attribute::OptimizeForSize) &&
- UnrollThreshold.getNumOccurrences() == 0)
- CurrentThreshold = OptSizeUnrollThreshold;
+ unsigned Threshold = CurrentThreshold;
+ if (!UserThreshold &&
+ Header->getParent()->hasFnAttr(Attribute::OptimizeForSize))
+ Threshold = OptSizeUnrollThreshold;
// Find trip count
unsigned TripCount = L->getSmallConstantTripCount();
- unsigned Count = UnrollCount;
+ unsigned Count = CurrentCount;
// Automatically select an unroll count.
if (Count == 0) {
@@ -140,7 +151,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
}
// Enforce the threshold.
- if (CurrentThreshold != NoThreshold) {
+ if (Threshold != NoThreshold) {
unsigned NumInlineCandidates;
unsigned LoopSize = ApproximateLoopSize(L, NumInlineCandidates);
DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
@@ -149,16 +160,16 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
return false;
}
uint64_t Size = (uint64_t)LoopSize*Count;
- if (TripCount != 1 && Size > CurrentThreshold) {
+ if (TripCount != 1 && Size > Threshold) {
DEBUG(dbgs() << " Too large to fully unroll with count: " << Count
- << " because size: " << Size << ">" << CurrentThreshold << "\n");
- if (!UnrollAllowPartial) {
+ << " because size: " << Size << ">" << Threshold << "\n");
+ if (!CurrentAllowPartial) {
DEBUG(dbgs() << " will not try to unroll partially because "
<< "-unroll-allow-partial not given\n");
return false;
}
// Reduce unroll count to be modulo of TripCount for partial unrolling
- Count = CurrentThreshold / LoopSize;
+ Count = Threshold / LoopSize;
while (Count != 0 && TripCount%Count != 0) {
Count--;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index bde0e53..a3035cb 100644
--- a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -28,6 +28,7 @@
#include "llvm/Support/IRBuilder.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include <list>
using namespace llvm;
@@ -299,12 +300,15 @@ void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
namespace {
class MemCpyOpt : public FunctionPass {
MemoryDependenceAnalysis *MD;
+ TargetLibraryInfo *TLI;
const TargetData *TD;
public:
static char ID; // Pass identification, replacement for typeid
MemCpyOpt() : FunctionPass(ID) {
initializeMemCpyOptPass(*PassRegistry::getPassRegistry());
MD = 0;
+ TLI = 0;
+ TD = 0;
}
bool runOnFunction(Function &F);
@@ -316,6 +320,7 @@ namespace {
AU.addRequired<DominatorTree>();
AU.addRequired<MemoryDependenceAnalysis>();
AU.addRequired<AliasAnalysis>();
+ AU.addRequired<TargetLibraryInfo>();
AU.addPreserved<AliasAnalysis>();
AU.addPreserved<MemoryDependenceAnalysis>();
}
@@ -346,6 +351,7 @@ INITIALIZE_PASS_BEGIN(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTree)
INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
false, false)
@@ -688,7 +694,7 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
if (M->getSource() == MDep->getSource())
return false;
- // Second, the length of the memcpy's must be the same, or the preceeding one
+ // Second, the length of the memcpy's must be the same, or the preceding one
// must be larger than the following one.
ConstantInt *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
ConstantInt *MLen = dyn_cast<ConstantInt>(M->getLength());
@@ -804,6 +810,9 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
bool MemCpyOpt::processMemMove(MemMoveInst *M) {
AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+ if (!TLI->has(LibFunc::memmove))
+ return false;
+
// See if the pointers alias.
if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(M)))
return false;
@@ -935,6 +944,14 @@ bool MemCpyOpt::runOnFunction(Function &F) {
bool MadeChange = false;
MD = &getAnalysis<MemoryDependenceAnalysis>();
TD = getAnalysisIfAvailable<TargetData>();
+ TLI = &getAnalysis<TargetLibraryInfo>();
+
+ // If we don't have at least memset and memcpy, there is little point of doing
+ // anything here. These are required by a freestanding implementation, so if
+ // even they are disabled, there is no point in trying hard.
+ if (!TLI->has(LibFunc::memset) || !TLI->has(LibFunc::memcpy))
+ return false;
+
while (1) {
if (!iterateOnFunction(F))
break;
diff --git a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
index e093b52..c1dfe15 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -22,6 +22,7 @@
#define DEBUG_TYPE "reassociate"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
@@ -74,6 +75,8 @@ namespace {
class Reassociate : public FunctionPass {
DenseMap<BasicBlock*, unsigned> RankMap;
DenseMap<AssertingVH<>, unsigned> ValueRankMap;
+ SmallVector<WeakVH, 8> RedoInsts;
+ SmallVector<WeakVH, 8> DeadInsts;
bool MadeChange;
public:
static char ID; // Pass identification, replacement for typeid
@@ -98,7 +101,7 @@ namespace {
void LinearizeExprTree(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops);
void LinearizeExpr(BinaryOperator *I);
Value *RemoveFactorFromExpression(Value *V, Value *Factor);
- void ReassociateBB(BasicBlock *BB);
+ void ReassociateInst(BasicBlock::iterator &BBI);
void RemoveDeadBinaryOp(Value *V);
};
@@ -113,13 +116,13 @@ FunctionPass *llvm::createReassociatePass() { return new Reassociate(); }
void Reassociate::RemoveDeadBinaryOp(Value *V) {
Instruction *Op = dyn_cast<Instruction>(V);
- if (!Op || !isa<BinaryOperator>(Op) || !Op->use_empty())
+ if (!Op || !isa<BinaryOperator>(Op))
return;
Value *LHS = Op->getOperand(0), *RHS = Op->getOperand(1);
ValueRankMap.erase(Op);
- Op->eraseFromParent();
+ DeadInsts.push_back(Op);
RemoveDeadBinaryOp(LHS);
RemoveDeadBinaryOp(RHS);
}
@@ -214,6 +217,7 @@ static Instruction *LowerNegateToMultiply(Instruction *Neg,
ValueRankMap.erase(Neg);
Res->takeName(Neg);
Neg->replaceAllUsesWith(Res);
+ Res->setDebugLoc(Neg->getDebugLoc());
Neg->eraseFromParent();
return Res;
}
@@ -503,6 +507,7 @@ static Instruction *BreakUpSubtract(Instruction *Sub,
// Everyone now refers to the add instruction.
ValueRankMap.erase(Sub);
Sub->replaceAllUsesWith(New);
+ New->setDebugLoc(Sub->getDebugLoc());
Sub->eraseFromParent();
DEBUG(dbgs() << "Negated: " << *New << '\n');
@@ -528,6 +533,7 @@ static Instruction *ConvertShiftToMul(Instruction *Shl,
ValueRankMap.erase(Shl);
Mul->takeName(Shl);
Shl->replaceAllUsesWith(Mul);
+ Mul->setDebugLoc(Shl->getDebugLoc());
Shl->eraseFromParent();
return Mul;
}
@@ -603,7 +609,7 @@ Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) {
// remaining operand.
if (Factors.size() == 1) {
ValueRankMap.erase(BO);
- BO->eraseFromParent();
+ DeadInsts.push_back(BO);
V = Factors[0].Op;
} else {
RewriteExprTree(BO, Factors);
@@ -732,7 +738,7 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
// Now that we have inserted a multiply, optimize it. This allows us to
// handle cases that require multiple factoring steps, such as this:
// (X*2) + (X*2) + (X*2) -> (X*2)*3 -> X*6
- Mul = ReassociateExpression(cast<BinaryOperator>(Mul));
+ RedoInsts.push_back(Mul);
// If every add operand was a duplicate, return the multiply.
if (Ops.empty())
@@ -960,71 +966,69 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I,
}
-/// ReassociateBB - Inspect all of the instructions in this basic block,
-/// reassociating them as we go.
-void Reassociate::ReassociateBB(BasicBlock *BB) {
- for (BasicBlock::iterator BBI = BB->begin(); BBI != BB->end(); ) {
- Instruction *BI = BBI++;
- if (BI->getOpcode() == Instruction::Shl &&
- isa<ConstantInt>(BI->getOperand(1)))
- if (Instruction *NI = ConvertShiftToMul(BI, ValueRankMap)) {
- MadeChange = true;
- BI = NI;
- }
+/// ReassociateInst - Inspect and reassociate the instruction at the
+/// given position, post-incrementing the position.
+void Reassociate::ReassociateInst(BasicBlock::iterator &BBI) {
+ Instruction *BI = BBI++;
+ if (BI->getOpcode() == Instruction::Shl &&
+ isa<ConstantInt>(BI->getOperand(1)))
+ if (Instruction *NI = ConvertShiftToMul(BI, ValueRankMap)) {
+ MadeChange = true;
+ BI = NI;
+ }
- // Reject cases where it is pointless to do this.
- if (!isa<BinaryOperator>(BI) || BI->getType()->isFloatingPointTy() ||
- BI->getType()->isVectorTy())
- continue; // Floating point ops are not associative.
-
- // Do not reassociate boolean (i1) expressions. We want to preserve the
- // original order of evaluation for short-circuited comparisons that
- // SimplifyCFG has folded to AND/OR expressions. If the expression
- // is not further optimized, it is likely to be transformed back to a
- // short-circuited form for code gen, and the source order may have been
- // optimized for the most likely conditions.
- if (BI->getType()->isIntegerTy(1))
- continue;
+ // Reject cases where it is pointless to do this.
+ if (!isa<BinaryOperator>(BI) || BI->getType()->isFloatingPointTy() ||
+ BI->getType()->isVectorTy())
+ return; // Floating point ops are not associative.
+
+ // Do not reassociate boolean (i1) expressions. We want to preserve the
+ // original order of evaluation for short-circuited comparisons that
+ // SimplifyCFG has folded to AND/OR expressions. If the expression
+ // is not further optimized, it is likely to be transformed back to a
+ // short-circuited form for code gen, and the source order may have been
+ // optimized for the most likely conditions.
+ if (BI->getType()->isIntegerTy(1))
+ return;
- // If this is a subtract instruction which is not already in negate form,
- // see if we can convert it to X+-Y.
- if (BI->getOpcode() == Instruction::Sub) {
- if (ShouldBreakUpSubtract(BI)) {
- BI = BreakUpSubtract(BI, ValueRankMap);
- // Reset the BBI iterator in case BreakUpSubtract changed the
- // instruction it points to.
- BBI = BI;
- ++BBI;
+ // If this is a subtract instruction which is not already in negate form,
+ // see if we can convert it to X+-Y.
+ if (BI->getOpcode() == Instruction::Sub) {
+ if (ShouldBreakUpSubtract(BI)) {
+ BI = BreakUpSubtract(BI, ValueRankMap);
+ // Reset the BBI iterator in case BreakUpSubtract changed the
+ // instruction it points to.
+ BBI = BI;
+ ++BBI;
+ MadeChange = true;
+ } else if (BinaryOperator::isNeg(BI)) {
+ // Otherwise, this is a negation. See if the operand is a multiply tree
+ // and if this is not an inner node of a multiply tree.
+ if (isReassociableOp(BI->getOperand(1), Instruction::Mul) &&
+ (!BI->hasOneUse() ||
+ !isReassociableOp(BI->use_back(), Instruction::Mul))) {
+ BI = LowerNegateToMultiply(BI, ValueRankMap);
MadeChange = true;
- } else if (BinaryOperator::isNeg(BI)) {
- // Otherwise, this is a negation. See if the operand is a multiply tree
- // and if this is not an inner node of a multiply tree.
- if (isReassociableOp(BI->getOperand(1), Instruction::Mul) &&
- (!BI->hasOneUse() ||
- !isReassociableOp(BI->use_back(), Instruction::Mul))) {
- BI = LowerNegateToMultiply(BI, ValueRankMap);
- MadeChange = true;
- }
}
}
+ }
- // If this instruction is a commutative binary operator, process it.
- if (!BI->isAssociative()) continue;
- BinaryOperator *I = cast<BinaryOperator>(BI);
+ // If this instruction is a commutative binary operator, process it.
+ if (!BI->isAssociative()) return;
+ BinaryOperator *I = cast<BinaryOperator>(BI);
- // If this is an interior node of a reassociable tree, ignore it until we
- // get to the root of the tree, to avoid N^2 analysis.
- if (I->hasOneUse() && isReassociableOp(I->use_back(), I->getOpcode()))
- continue;
+ // If this is an interior node of a reassociable tree, ignore it until we
+ // get to the root of the tree, to avoid N^2 analysis.
+ if (I->hasOneUse() && isReassociableOp(I->use_back(), I->getOpcode()))
+ return;
- // If this is an add tree that is used by a sub instruction, ignore it
- // until we process the subtract.
- if (I->hasOneUse() && I->getOpcode() == Instruction::Add &&
- cast<Instruction>(I->use_back())->getOpcode() == Instruction::Sub)
- continue;
+ // If this is an add tree that is used by a sub instruction, ignore it
+ // until we process the subtract.
+ if (I->hasOneUse() && I->getOpcode() == Instruction::Add &&
+ cast<Instruction>(I->use_back())->getOpcode() == Instruction::Sub)
+ return;
- ReassociateExpression(I);
- }
+ ReassociateExpression(I);
}
Value *Reassociate::ReassociateExpression(BinaryOperator *I) {
@@ -1051,6 +1055,8 @@ Value *Reassociate::ReassociateExpression(BinaryOperator *I) {
// eliminate it.
DEBUG(dbgs() << "Reassoc to scalar: " << *V << '\n');
I->replaceAllUsesWith(V);
+ if (Instruction *VI = dyn_cast<Instruction>(V))
+ VI->setDebugLoc(I->getDebugLoc());
RemoveDeadBinaryOp(I);
++NumAnnihil;
return V;
@@ -1074,6 +1080,8 @@ Value *Reassociate::ReassociateExpression(BinaryOperator *I) {
// This expression tree simplified to something that isn't a tree,
// eliminate it.
I->replaceAllUsesWith(Ops[0].Op);
+ if (Instruction *OI = dyn_cast<Instruction>(Ops[0].Op))
+ OI->setDebugLoc(I->getDebugLoc());
RemoveDeadBinaryOp(I);
return Ops[0].Op;
}
@@ -1091,7 +1099,21 @@ bool Reassociate::runOnFunction(Function &F) {
MadeChange = false;
for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
- ReassociateBB(FI);
+ for (BasicBlock::iterator BBI = FI->begin(); BBI != FI->end(); )
+ ReassociateInst(BBI);
+
+ // Now that we're done, revisit any instructions which are likely to
+ // have secondary reassociation opportunities.
+ while (!RedoInsts.empty())
+ if (Value *V = RedoInsts.pop_back_val()) {
+ BasicBlock::iterator BBI = cast<Instruction>(V);
+ ReassociateInst(BBI);
+ }
+
+ // Now that we're done, delete any instructions which are no longer used.
+ while (!DeadInsts.empty())
+ if (Value *V = DeadInsts.pop_back_val())
+ RecursivelyDeleteTriviallyDeadInstructions(V);
// We are done with the rank map.
RankMap.clear();
diff --git a/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp b/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
index 459bb06..47afc77 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -9,7 +9,7 @@
//
// This file demotes all registers to memory references. It is intented to be
// the inverse of PromoteMemoryToRegister. By converting to loads, the only
-// values live accross basic blocks are allocas and loads before phi nodes.
+// values live across basic blocks are allocas and loads before phi nodes.
// It is intended that this should make CFG hacking much easier.
// To make later hacking easier, the entry block is split into two, such that
// all introduced allocas and nothing else are in the entry block.
diff --git a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
index c82e929..db8eb85 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
@@ -1989,7 +1989,7 @@ bool IPSCCP::runOnModule(Module &M) {
ReturnsToZap[i]->setOperand(0, UndefValue::get(F->getReturnType()));
}
- // If we infered constant or undef values for globals variables, we can delete
+ // If we inferred constant or undef values for globals variables, we can delete
// the global and any stores that remain to it.
const DenseMap<GlobalVariable*, LatticeVal> &TG = Solver.getTrackedGlobals();
for (DenseMap<GlobalVariable*, LatticeVal>::const_iterator I = TG.begin(),
diff --git a/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp b/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
index bf9ca6d..32a0506 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
@@ -17,6 +17,7 @@
#include "llvm-c/Initialization.h"
#include "llvm/InitializePasses.h"
#include "llvm/PassManager.h"
+#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/Verifier.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Transforms/Scalar.h"
@@ -34,7 +35,6 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeDCEPass(Registry);
initializeDeadInstEliminationPass(Registry);
initializeDSEPass(Registry);
- initializeGEPSplitterPass(Registry);
initializeGVNPass(Registry);
initializeEarlyCSEPass(Registry);
initializeIndVarSimplifyPass(Registry);
@@ -56,7 +56,6 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeSROA_DTPass(Registry);
initializeSROA_SSAUpPass(Registry);
initializeCFGSimplifyPassPass(Registry);
- initializeSimplifyHalfPowrLibCallsPass(Registry);
initializeSimplifyLibCallsPass(Registry);
initializeSinkingPass(Registry);
initializeTailDupPass(Registry);
@@ -103,6 +102,10 @@ void LLVMAddLoopDeletionPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createLoopDeletionPass());
}
+void LLVMAddLoopIdiomPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createLoopIdiomPass());
+}
+
void LLVMAddLoopRotatePass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createLoopRotatePass());
}
@@ -135,6 +138,10 @@ void LLVMAddScalarReplAggregatesPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createScalarReplAggregatesPass());
}
+void LLVMAddScalarReplAggregatesPassSSA(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createScalarReplAggregatesPass(-1, false));
+}
+
void LLVMAddScalarReplAggregatesPassWithThreshold(LLVMPassManagerRef PM,
int Threshold) {
unwrap(PM)->add(createScalarReplAggregatesPass(Threshold));
@@ -159,3 +166,19 @@ void LLVMAddDemoteMemoryToRegisterPass(LLVMPassManagerRef PM) {
void LLVMAddVerifierPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createVerifierPass());
}
+
+void LLVMAddCorrelatedValuePropagationPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createCorrelatedValuePropagationPass());
+}
+
+void LLVMAddEarlyCSEPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createEarlyCSEPass());
+}
+
+void LLVMAddTypeBasedAliasAnalysisPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createTypeBasedAliasAnalysisPass());
+}
+
+void LLVMAddBasicAliasAnalysisPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createBasicAliasAnalysisPass());
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index c3ca852..8178c27 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -219,7 +219,7 @@ namespace {
/// optimization, which scans the uses of an alloca and determines if it can
/// rewrite it in terms of a single new alloca that can be mem2reg'd.
class ConvertToScalarInfo {
- /// AllocaSize - The size of the alloca being considered.
+ /// AllocaSize - The size of the alloca being considered in bytes.
unsigned AllocaSize;
const TargetData &TD;
@@ -238,19 +238,22 @@ class ConvertToScalarInfo {
/// also declared as a vector, we do want to promote to a vector.
bool HadAVector;
+ /// HadNonMemTransferAccess - True if there is at least one access to the
+ /// alloca that is not a MemTransferInst. We don't want to turn structs into
+ /// large integers unless there is some potential for optimization.
+ bool HadNonMemTransferAccess;
+
public:
explicit ConvertToScalarInfo(unsigned Size, const TargetData &td)
- : AllocaSize(Size), TD(td) {
- IsNotTrivial = false;
- VectorTy = 0;
- HadAVector = false;
- }
+ : AllocaSize(Size), TD(td), IsNotTrivial(false), VectorTy(0),
+ HadAVector(false), HadNonMemTransferAccess(false) { }
AllocaInst *TryConvert(AllocaInst *AI);
private:
bool CanConvertToScalar(Value *V, uint64_t Offset);
- void MergeInType(const Type *In, uint64_t Offset);
+ void MergeInType(const Type *In, uint64_t Offset, bool IsLoadOrStore);
+ bool MergeInVectorType(const VectorType *VInTy, uint64_t Offset);
void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset);
Value *ConvertScalar_ExtractValue(Value *NV, const Type *ToType,
@@ -282,9 +285,14 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
<< *VectorTy << '\n');
NewTy = VectorTy; // Use the vector type.
} else {
+ unsigned BitWidth = AllocaSize * 8;
+ if (!HadAVector && !HadNonMemTransferAccess &&
+ !TD.fitsInLegalInteger(BitWidth))
+ return 0;
+
DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n");
// Create and insert the integer alloca.
- NewTy = IntegerType::get(AI->getContext(), AllocaSize*8);
+ NewTy = IntegerType::get(AI->getContext(), BitWidth);
}
AllocaInst *NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin());
ConvertUsesToScalar(AI, NewAI, 0);
@@ -294,16 +302,21 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
/// MergeInType - Add the 'In' type to the accumulated vector type (VectorTy)
/// so far at the offset specified by Offset (which is specified in bytes).
///
-/// There are two cases we handle here:
+/// There are three cases we handle here:
/// 1) A union of vector types of the same size and potentially its elements.
/// Here we turn element accesses into insert/extract element operations.
/// This promotes a <4 x float> with a store of float to the third element
/// into a <4 x float> that uses insert element.
-/// 2) A fully general blob of memory, which we turn into some (potentially
+/// 2) A union of vector types with power-of-2 size differences, e.g. a float,
+/// <2 x float> and <4 x float>. Here we turn element accesses into insert
+/// and extract element operations, and <2 x float> accesses into a cast to
+/// <2 x double>, an extract, and a cast back to <2 x float>.
+/// 3) A fully general blob of memory, which we turn into some (potentially
/// large) integer type with extract and insert operations where the loads
/// and stores would mutate the memory. We mark this by setting VectorTy
/// to VoidTy.
-void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) {
+void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset,
+ bool IsLoadOrStore) {
// If we already decided to turn this into a blob of integer memory, there is
// nothing to be done.
if (VectorTy && VectorTy->isVoidTy())
@@ -314,33 +327,33 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) {
// If the In type is a vector that is the same size as the alloca, see if it
// matches the existing VecTy.
if (const VectorType *VInTy = dyn_cast<VectorType>(In)) {
- // Remember if we saw a vector type.
- HadAVector = true;
-
- if (VInTy->getBitWidth()/8 == AllocaSize && Offset == 0) {
- // If we're storing/loading a vector of the right size, allow it as a
- // vector. If this the first vector we see, remember the type so that
- // we know the element size. If this is a subsequent access, ignore it
- // even if it is a differing type but the same size. Worst case we can
- // bitcast the resultant vectors.
- if (VectorTy == 0)
- VectorTy = VInTy;
+ if (MergeInVectorType(VInTy, Offset))
return;
- }
} else if (In->isFloatTy() || In->isDoubleTy() ||
(In->isIntegerTy() && In->getPrimitiveSizeInBits() >= 8 &&
isPowerOf2_32(In->getPrimitiveSizeInBits()))) {
+ // Full width accesses can be ignored, because they can always be turned
+ // into bitcasts.
+ unsigned EltSize = In->getPrimitiveSizeInBits()/8;
+ if (IsLoadOrStore && EltSize == AllocaSize)
+ return;
+
// If we're accessing something that could be an element of a vector, see
// if the implied vector agrees with what we already have and if Offset is
// compatible with it.
- unsigned EltSize = In->getPrimitiveSizeInBits()/8;
- if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 &&
- (VectorTy == 0 ||
- cast<VectorType>(VectorTy)->getElementType()
- ->getPrimitiveSizeInBits()/8 == EltSize)) {
- if (VectorTy == 0)
+ if (Offset % EltSize == 0 && AllocaSize % EltSize == 0) {
+ if (!VectorTy) {
VectorTy = VectorType::get(In, AllocaSize/EltSize);
- return;
+ return;
+ }
+
+ unsigned CurrentEltSize = cast<VectorType>(VectorTy)->getElementType()
+ ->getPrimitiveSizeInBits()/8;
+ if (EltSize == CurrentEltSize)
+ return;
+
+ if (In->isIntegerTy() && isPowerOf2_32(AllocaSize / EltSize))
+ return;
}
}
@@ -349,6 +362,77 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) {
VectorTy = Type::getVoidTy(In->getContext());
}
+/// MergeInVectorType - Handles the vector case of MergeInType, returning true
+/// if the type was successfully merged and false otherwise.
+bool ConvertToScalarInfo::MergeInVectorType(const VectorType *VInTy,
+ uint64_t Offset) {
+ // Remember if we saw a vector type.
+ HadAVector = true;
+
+ // TODO: Support nonzero offsets?
+ if (Offset != 0)
+ return false;
+
+ // Only allow vectors that are a power-of-2 away from the size of the alloca.
+ if (!isPowerOf2_64(AllocaSize / (VInTy->getBitWidth() / 8)))
+ return false;
+
+ // If this the first vector we see, remember the type so that we know the
+ // element size.
+ if (!VectorTy) {
+ VectorTy = VInTy;
+ return true;
+ }
+
+ unsigned BitWidth = cast<VectorType>(VectorTy)->getBitWidth();
+ unsigned InBitWidth = VInTy->getBitWidth();
+
+ // Vectors of the same size can be converted using a simple bitcast.
+ if (InBitWidth == BitWidth && AllocaSize == (InBitWidth / 8))
+ return true;
+
+ const Type *ElementTy = cast<VectorType>(VectorTy)->getElementType();
+ const Type *InElementTy = cast<VectorType>(VInTy)->getElementType();
+
+ // Do not allow mixed integer and floating-point accesses from vectors of
+ // different sizes.
+ if (ElementTy->isFloatingPointTy() != InElementTy->isFloatingPointTy())
+ return false;
+
+ if (ElementTy->isFloatingPointTy()) {
+ // Only allow floating-point vectors of different sizes if they have the
+ // same element type.
+ // TODO: This could be loosened a bit, but would anything benefit?
+ if (ElementTy != InElementTy)
+ return false;
+
+ // There are no arbitrary-precision floating-point types, which limits the
+ // number of legal vector types with larger element types that we can form
+ // to bitcast and extract a subvector.
+ // TODO: We could support some more cases with mixed fp128 and double here.
+ if (!(BitWidth == 64 || BitWidth == 128) ||
+ !(InBitWidth == 64 || InBitWidth == 128))
+ return false;
+ } else {
+ assert(ElementTy->isIntegerTy() && "Vector elements must be either integer "
+ "or floating-point.");
+ unsigned BitWidth = ElementTy->getPrimitiveSizeInBits();
+ unsigned InBitWidth = InElementTy->getPrimitiveSizeInBits();
+
+ // Do not allow integer types smaller than a byte or types whose widths are
+ // not a multiple of a byte.
+ if (BitWidth < 8 || InBitWidth < 8 ||
+ BitWidth % 8 != 0 || InBitWidth % 8 != 0)
+ return false;
+ }
+
+ // Pick the largest of the two vector types.
+ if (InBitWidth > BitWidth)
+ VectorTy = VInTy;
+
+ return true;
+}
+
/// CanConvertToScalar - V is a pointer. If we can convert the pointee and all
/// its accesses to a single vector type, return true and set VecTy to
/// the new type. If we could convert the alloca into a single promotable
@@ -369,7 +453,8 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
// Don't touch MMX operations.
if (LI->getType()->isX86_MMXTy())
return false;
- MergeInType(LI->getType(), Offset);
+ HadNonMemTransferAccess = true;
+ MergeInType(LI->getType(), Offset, true);
continue;
}
@@ -379,7 +464,8 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
// Don't touch MMX operations.
if (SI->getOperand(0)->getType()->isX86_MMXTy())
return false;
- MergeInType(SI->getOperand(0)->getType(), Offset);
+ HadNonMemTransferAccess = true;
+ MergeInType(SI->getOperand(0)->getType(), Offset, true);
continue;
}
@@ -403,6 +489,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
if (!CanConvertToScalar(GEP, Offset+GEPOffset))
return false;
IsNotTrivial = true; // Can't be mem2reg'd.
+ HadNonMemTransferAccess = true;
continue;
}
@@ -414,6 +501,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
!isa<ConstantInt>(MSI->getLength()))
return false;
IsNotTrivial = true; // Can't be mem2reg'd.
+ HadNonMemTransferAccess = true;
continue;
}
@@ -575,6 +663,63 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
}
}
+/// getScaledElementType - Gets a scaled element type for a partial vector
+/// access of an alloca. The input types must be integer or floating-point
+/// scalar or vector types, and the resulting type is an integer, float or
+/// double.
+static const Type *getScaledElementType(const Type *Ty1, const Type *Ty2,
+ unsigned NewBitWidth) {
+ bool IsFP1 = Ty1->isFloatingPointTy() ||
+ (Ty1->isVectorTy() &&
+ cast<VectorType>(Ty1)->getElementType()->isFloatingPointTy());
+ bool IsFP2 = Ty2->isFloatingPointTy() ||
+ (Ty2->isVectorTy() &&
+ cast<VectorType>(Ty2)->getElementType()->isFloatingPointTy());
+
+ LLVMContext &Context = Ty1->getContext();
+
+ // Prefer floating-point types over integer types, as integer types may have
+ // been created by earlier scalar replacement.
+ if (IsFP1 || IsFP2) {
+ if (NewBitWidth == 32)
+ return Type::getFloatTy(Context);
+ if (NewBitWidth == 64)
+ return Type::getDoubleTy(Context);
+ }
+
+ return Type::getIntNTy(Context, NewBitWidth);
+}
+
+/// CreateShuffleVectorCast - Creates a shuffle vector to convert one vector
+/// to another vector of the same element type which has the same allocation
+/// size but different primitive sizes (e.g. <3 x i32> and <4 x i32>).
+static Value *CreateShuffleVectorCast(Value *FromVal, const Type *ToType,
+ IRBuilder<> &Builder) {
+ const Type *FromType = FromVal->getType();
+ const VectorType *FromVTy = cast<VectorType>(FromType);
+ const VectorType *ToVTy = cast<VectorType>(ToType);
+ assert((ToVTy->getElementType() == FromVTy->getElementType()) &&
+ "Vectors must have the same element type");
+ Value *UnV = UndefValue::get(FromType);
+ unsigned numEltsFrom = FromVTy->getNumElements();
+ unsigned numEltsTo = ToVTy->getNumElements();
+
+ SmallVector<Constant*, 3> Args;
+ const Type* Int32Ty = Builder.getInt32Ty();
+ unsigned minNumElts = std::min(numEltsFrom, numEltsTo);
+ unsigned i;
+ for (i=0; i != minNumElts; ++i)
+ Args.push_back(ConstantInt::get(Int32Ty, i));
+
+ if (i < numEltsTo) {
+ Constant* UnC = UndefValue::get(Int32Ty);
+ for (; i != numEltsTo; ++i)
+ Args.push_back(UnC);
+ }
+ Constant *Mask = ConstantVector::get(Args);
+ return Builder.CreateShuffleVector(FromVal, UnV, Mask, "tmpV");
+}
+
/// ConvertScalar_ExtractValue - Extract a value of type ToType from an integer
/// or vector value FromVal, extracting the bits from the offset specified by
/// Offset. This returns the value, which is of type ToType.
@@ -589,14 +734,46 @@ Value *ConvertToScalarInfo::
ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
uint64_t Offset, IRBuilder<> &Builder) {
// If the load is of the whole new alloca, no conversion is needed.
- if (FromVal->getType() == ToType && Offset == 0)
+ const Type *FromType = FromVal->getType();
+ if (FromType == ToType && Offset == 0)
return FromVal;
// If the result alloca is a vector type, this is either an element
// access or a bitcast to another vector type of the same size.
- if (const VectorType *VTy = dyn_cast<VectorType>(FromVal->getType())) {
- if (ToType->isVectorTy())
- return Builder.CreateBitCast(FromVal, ToType, "tmp");
+ if (const VectorType *VTy = dyn_cast<VectorType>(FromType)) {
+ unsigned ToTypeSize = TD.getTypeAllocSize(ToType);
+ if (ToTypeSize == AllocaSize) {
+ // If the two types have the same primitive size, use a bit cast.
+ // Otherwise, it is two vectors with the same element type that has
+ // the same allocation size but different number of elements so use
+ // a shuffle vector.
+ if (FromType->getPrimitiveSizeInBits() ==
+ ToType->getPrimitiveSizeInBits())
+ return Builder.CreateBitCast(FromVal, ToType, "tmp");
+ else
+ return CreateShuffleVectorCast(FromVal, ToType, Builder);
+ }
+
+ if (isPowerOf2_64(AllocaSize / ToTypeSize)) {
+ assert(!(ToType->isVectorTy() && Offset != 0) && "Can't extract a value "
+ "of a smaller vector type at a nonzero offset.");
+
+ const Type *CastElementTy = getScaledElementType(FromType, ToType,
+ ToTypeSize * 8);
+ unsigned NumCastVectorElements = AllocaSize / ToTypeSize;
+
+ LLVMContext &Context = FromVal->getContext();
+ const Type *CastTy = VectorType::get(CastElementTy,
+ NumCastVectorElements);
+ Value *Cast = Builder.CreateBitCast(FromVal, CastTy, "tmp");
+
+ unsigned EltSize = TD.getTypeAllocSizeInBits(CastElementTy);
+ unsigned Elt = Offset/EltSize;
+ assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");
+ Value *Extract = Builder.CreateExtractElement(Cast, ConstantInt::get(
+ Type::getInt32Ty(Context), Elt), "tmp");
+ return Builder.CreateBitCast(Extract, ToType, "tmp");
+ }
// Otherwise it must be an element access.
unsigned Elt = 0;
@@ -714,21 +891,49 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
// Changing the whole vector with memset or with an access of a different
// vector type?
- if (ValSize == VecSize)
- return Builder.CreateBitCast(SV, AllocaType, "tmp");
+ if (ValSize == VecSize) {
+ // If the two types have the same primitive size, use a bit cast.
+ // Otherwise, it is two vectors with the same element type that has
+ // the same allocation size but different number of elements so use
+ // a shuffle vector.
+ if (VTy->getPrimitiveSizeInBits() ==
+ SV->getType()->getPrimitiveSizeInBits())
+ return Builder.CreateBitCast(SV, AllocaType, "tmp");
+ else
+ return CreateShuffleVectorCast(SV, VTy, Builder);
+ }
- uint64_t EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType());
+ if (isPowerOf2_64(VecSize / ValSize)) {
+ assert(!(SV->getType()->isVectorTy() && Offset != 0) && "Can't insert a "
+ "value of a smaller vector type at a nonzero offset.");
- // Must be an element insertion.
- unsigned Elt = Offset/EltSize;
+ const Type *CastElementTy = getScaledElementType(VTy, SV->getType(),
+ ValSize);
+ unsigned NumCastVectorElements = VecSize / ValSize;
- if (SV->getType() != VTy->getElementType())
- SV = Builder.CreateBitCast(SV, VTy->getElementType(), "tmp");
+ LLVMContext &Context = SV->getContext();
+ const Type *OldCastTy = VectorType::get(CastElementTy,
+ NumCastVectorElements);
+ Value *OldCast = Builder.CreateBitCast(Old, OldCastTy, "tmp");
- SV = Builder.CreateInsertElement(Old, SV,
+ Value *SVCast = Builder.CreateBitCast(SV, CastElementTy, "tmp");
+
+ unsigned EltSize = TD.getTypeAllocSizeInBits(CastElementTy);
+ unsigned Elt = Offset/EltSize;
+ assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");
+ Value *Insert =
+ Builder.CreateInsertElement(OldCast, SVCast, ConstantInt::get(
+ Type::getInt32Ty(Context), Elt), "tmp");
+ return Builder.CreateBitCast(Insert, AllocaType, "tmp");
+ }
+
+ // Must be an element insertion.
+ assert(SV->getType() == VTy->getElementType());
+ uint64_t EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType());
+ unsigned Elt = Offset/EltSize;
+ return Builder.CreateInsertElement(Old, SV,
ConstantInt::get(Type::getInt32Ty(SV->getContext()), Elt),
"tmp");
- return SV;
}
// If SV is a first-class aggregate value, insert each value recursively.
@@ -1083,7 +1288,8 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
}
const Type *LoadTy = cast<PointerType>(PN->getType())->getElementType();
- PHINode *NewPN = PHINode::Create(LoadTy, PN->getName()+".ld", PN);
+ PHINode *NewPN = PHINode::Create(LoadTy, PN->getNumIncomingValues(),
+ PN->getName()+".ld", PN);
// Get the TBAA tag and alignment to use from one of the loads. It doesn't
// matter which one we get and if any differ, it doesn't matter.
diff --git a/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index ce5dd73..1137c2b 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -73,7 +73,8 @@ static void ChangeToUnreachable(Instruction *I, bool UseLLVMTrap) {
if (UseLLVMTrap) {
Function *TrapFn =
Intrinsic::getDeclaration(BB->getParent()->getParent(), Intrinsic::trap);
- CallInst::Create(TrapFn, "", I);
+ CallInst *CallTrap = CallInst::Create(TrapFn, "", I);
+ CallTrap->setDebugLoc(I->getDebugLoc());
}
new UnreachableInst(I->getContext(), I);
@@ -259,11 +260,12 @@ static bool MergeEmptyReturnBlocks(Function &F) {
PHINode *RetBlockPHI = dyn_cast<PHINode>(RetBlock->begin());
if (RetBlockPHI == 0) {
Value *InVal = cast<ReturnInst>(RetBlock->getTerminator())->getOperand(0);
- RetBlockPHI = PHINode::Create(Ret->getOperand(0)->getType(), "merge",
+ pred_iterator PB = pred_begin(RetBlock), PE = pred_end(RetBlock);
+ RetBlockPHI = PHINode::Create(Ret->getOperand(0)->getType(),
+ std::distance(PB, PE), "merge",
&RetBlock->front());
- for (pred_iterator PI = pred_begin(RetBlock), E = pred_end(RetBlock);
- PI != E; ++PI)
+ for (pred_iterator PI = PB; PI != PE; ++PI)
RetBlockPHI->addIncoming(InVal, *PI);
RetBlock->getTerminator()->setOperand(0, RetBlockPHI);
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp b/contrib/llvm/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
deleted file mode 100644
index 70ff32e..0000000
--- a/contrib/llvm/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
+++ /dev/null
@@ -1,160 +0,0 @@
-//===- SimplifyHalfPowrLibCalls.cpp - Optimize specific half_powr calls ---===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a simple pass that applies an experimental
-// transformation on calls to specific functions.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "simplify-libcalls-halfpowr"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/Debug.h"
-using namespace llvm;
-
-namespace {
- /// This pass optimizes well half_powr function calls.
- ///
- class SimplifyHalfPowrLibCalls : public FunctionPass {
- const TargetData *TD;
- public:
- static char ID; // Pass identification
- SimplifyHalfPowrLibCalls() : FunctionPass(ID) {
- initializeSimplifyHalfPowrLibCallsPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F);
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- }
-
- Instruction *
- InlineHalfPowrs(const std::vector<Instruction *> &HalfPowrs,
- Instruction *InsertPt);
- };
- char SimplifyHalfPowrLibCalls::ID = 0;
-} // end anonymous namespace.
-
-INITIALIZE_PASS(SimplifyHalfPowrLibCalls, "simplify-libcalls-halfpowr",
- "Simplify half_powr library calls", false, false)
-
-// Public interface to the Simplify HalfPowr LibCalls pass.
-FunctionPass *llvm::createSimplifyHalfPowrLibCallsPass() {
- return new SimplifyHalfPowrLibCalls();
-}
-
-/// InlineHalfPowrs - Inline a sequence of adjacent half_powr calls, rearranging
-/// their control flow to better facilitate subsequent optimization.
-Instruction *
-SimplifyHalfPowrLibCalls::
-InlineHalfPowrs(const std::vector<Instruction *> &HalfPowrs,
- Instruction *InsertPt) {
- std::vector<BasicBlock *> Bodies;
- BasicBlock *NewBlock = 0;
-
- for (unsigned i = 0, e = HalfPowrs.size(); i != e; ++i) {
- CallInst *Call = cast<CallInst>(HalfPowrs[i]);
- Function *Callee = Call->getCalledFunction();
-
- // Minimally sanity-check the CFG of half_powr to ensure that it contains
- // the kind of code we expect. If we're running this pass, we have
- // reason to believe it will be what we expect.
- Function::iterator I = Callee->begin();
- BasicBlock *Prologue = I++;
- if (I == Callee->end()) break;
- BasicBlock *SubnormalHandling = I++;
- if (I == Callee->end()) break;
- BasicBlock *Body = I++;
- if (I != Callee->end()) break;
- if (SubnormalHandling->getSinglePredecessor() != Prologue)
- break;
- BranchInst *PBI = dyn_cast<BranchInst>(Prologue->getTerminator());
- if (!PBI || !PBI->isConditional())
- break;
- BranchInst *SNBI = dyn_cast<BranchInst>(SubnormalHandling->getTerminator());
- if (!SNBI || SNBI->isConditional())
- break;
- if (!isa<ReturnInst>(Body->getTerminator()))
- break;
-
- Instruction *NextInst = llvm::next(BasicBlock::iterator(Call));
-
- // Inline the call, taking care of what code ends up where.
- NewBlock = SplitBlock(NextInst->getParent(), NextInst, this);
-
- InlineFunctionInfo IFI(0, TD);
- bool B = InlineFunction(Call, IFI);
- assert(B && "half_powr didn't inline?");
- (void)B;
-
- BasicBlock *NewBody = NewBlock->getSinglePredecessor();
- assert(NewBody);
- Bodies.push_back(NewBody);
- }
-
- if (!NewBlock)
- return InsertPt;
-
- // Put the code for all the bodies into one block, to facilitate
- // subsequent optimization.
- (void)SplitEdge(NewBlock->getSinglePredecessor(), NewBlock, this);
- for (unsigned i = 0, e = Bodies.size(); i != e; ++i) {
- BasicBlock *Body = Bodies[i];
- Instruction *FNP = Body->getFirstNonPHI();
- // Splice the insts from body into NewBlock.
- NewBlock->getInstList().splice(NewBlock->begin(), Body->getInstList(),
- FNP, Body->getTerminator());
- }
-
- return NewBlock->begin();
-}
-
-/// runOnFunction - Top level algorithm.
-///
-bool SimplifyHalfPowrLibCalls::runOnFunction(Function &F) {
- TD = getAnalysisIfAvailable<TargetData>();
-
- bool Changed = false;
- std::vector<Instruction *> HalfPowrs;
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
- // Look for calls.
- bool IsHalfPowr = false;
- if (CallInst *CI = dyn_cast<CallInst>(I)) {
- // Look for direct calls and calls to non-external functions.
- Function *Callee = CI->getCalledFunction();
- if (Callee && Callee->hasExternalLinkage()) {
- // Look for calls with well-known names.
- if (Callee->getName() == "__half_powrf4")
- IsHalfPowr = true;
- }
- }
- if (IsHalfPowr)
- HalfPowrs.push_back(I);
- // We're looking for sequences of up to three such calls, which we'll
- // simplify as a group.
- if ((!IsHalfPowr && !HalfPowrs.empty()) || HalfPowrs.size() == 3) {
- I = InlineHalfPowrs(HalfPowrs, I);
- E = I->getParent()->end();
- HalfPowrs.clear();
- Changed = true;
- }
- }
- assert(HalfPowrs.empty() && "Block had no terminator!");
- }
-
- return Changed;
-}
diff --git a/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index 9f136d4..6247b03 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -49,6 +49,7 @@ class LibCallOptimization {
protected:
Function *Caller;
const TargetData *TD;
+ const TargetLibraryInfo *TLI;
LLVMContext* Context;
public:
LibCallOptimization() { }
@@ -62,9 +63,11 @@ public:
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B)
=0;
- Value *OptimizeCall(CallInst *CI, const TargetData *TD, IRBuilder<> &B) {
+ Value *OptimizeCall(CallInst *CI, const TargetData *TD,
+ const TargetLibraryInfo *TLI, IRBuilder<> &B) {
Caller = CI->getParent()->getParent();
this->TD = TD;
+ this->TLI = TLI;
if (CI->getCalledFunction())
Context = &CI->getCalledFunction()->getContext();
@@ -97,6 +100,15 @@ static bool IsOnlyUsedInZeroEqualityComparison(Value *V) {
}
return true;
}
+
+static bool CallHasFloatingPointArgument(const CallInst *CI) {
+ for (CallInst::const_op_iterator it = CI->op_begin(), e = CI->op_end();
+ it != e; ++it) {
+ if ((*it)->getType()->isFloatingPointTy())
+ return true;
+ }
+ return false;
+}
/// IsOnlyUsedInEqualityComparison - Return true if it is only used in equality
/// comparisons with With.
@@ -1075,14 +1087,8 @@ struct ToAsciiOpt : public LibCallOptimization {
// 'printf' Optimizations
struct PrintFOpt : public LibCallOptimization {
- virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- // Require one fixed pointer argument and an integer/void result.
- const FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
- !(FT->getReturnType()->isIntegerTy() ||
- FT->getReturnType()->isVoidTy()))
- return 0;
-
+ Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) {
// Check for a fixed format string.
std::string FormatStr;
if (!GetConstantStringInfo(CI->getArgOperand(0), FormatStr))
@@ -1138,20 +1144,40 @@ struct PrintFOpt : public LibCallOptimization {
}
return 0;
}
+
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Require one fixed pointer argument and an integer/void result.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
+ !(FT->getReturnType()->isIntegerTy() ||
+ FT->getReturnType()->isVoidTy()))
+ return 0;
+
+ if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) {
+ return V;
+ }
+
+ // printf(format, ...) -> iprintf(format, ...) if no floating point
+ // arguments.
+ if (TLI->has(LibFunc::iprintf) && !CallHasFloatingPointArgument(CI)) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ Constant *IPrintFFn =
+ M->getOrInsertFunction("iprintf", FT, Callee->getAttributes());
+ CallInst *New = cast<CallInst>(CI->clone());
+ New->setCalledFunction(IPrintFFn);
+ B.Insert(New);
+ return New;
+ }
+ return 0;
+ }
};
//===---------------------------------------===//
// 'sprintf' Optimizations
struct SPrintFOpt : public LibCallOptimization {
- virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- // Require two fixed pointer arguments and an integer result.
- const FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
- !FT->getParamType(1)->isPointerTy() ||
- !FT->getReturnType()->isIntegerTy())
- return 0;
-
+ Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) {
// Check for a fixed format string.
std::string FormatStr;
if (!GetConstantStringInfo(CI->getArgOperand(1), FormatStr))
@@ -1212,6 +1238,32 @@ struct SPrintFOpt : public LibCallOptimization {
}
return 0;
}
+
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Require two fixed pointer arguments and an integer result.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ !FT->getReturnType()->isIntegerTy())
+ return 0;
+
+ if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) {
+ return V;
+ }
+
+ // sprintf(str, format, ...) -> siprintf(str, format, ...) if no floating
+ // point arguments.
+ if (TLI->has(LibFunc::siprintf) && !CallHasFloatingPointArgument(CI)) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ Constant *SIPrintFFn =
+ M->getOrInsertFunction("siprintf", FT, Callee->getAttributes());
+ CallInst *New = cast<CallInst>(CI->clone());
+ New->setCalledFunction(SIPrintFFn);
+ B.Insert(New);
+ return New;
+ }
+ return 0;
+ }
};
//===---------------------------------------===//
@@ -1278,14 +1330,8 @@ struct FPutsOpt : public LibCallOptimization {
// 'fprintf' Optimizations
struct FPrintFOpt : public LibCallOptimization {
- virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- // Require two fixed paramters as pointers and integer result.
- const FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
- !FT->getParamType(1)->isPointerTy() ||
- !FT->getReturnType()->isIntegerTy())
- return 0;
-
+ Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) {
// All the optimizations depend on the format string.
std::string FormatStr;
if (!GetConstantStringInfo(CI->getArgOperand(1), FormatStr))
@@ -1330,6 +1376,32 @@ struct FPrintFOpt : public LibCallOptimization {
}
return 0;
}
+
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Require two fixed paramters as pointers and integer result.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ !FT->getReturnType()->isIntegerTy())
+ return 0;
+
+ if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) {
+ return V;
+ }
+
+ // fprintf(stream, format, ...) -> fiprintf(stream, format, ...) if no
+ // floating point arguments.
+ if (TLI->has(LibFunc::fiprintf) && !CallHasFloatingPointArgument(CI)) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ Constant *FIPrintFFn =
+ M->getOrInsertFunction("fiprintf", FT, Callee->getAttributes());
+ CallInst *New = cast<CallInst>(CI->clone());
+ New->setCalledFunction(FIPrintFFn);
+ B.Insert(New);
+ return New;
+ }
+ return 0;
+ }
};
//===---------------------------------------===//
@@ -1544,8 +1616,11 @@ bool SimplifyLibCalls::runOnFunction(Function &F) {
// Set the builder to the instruction after the call.
Builder.SetInsertPoint(BB, I);
+ // Use debug location of CI for all new instructions.
+ Builder.SetCurrentDebugLocation(CI->getDebugLoc());
+
// Try to optimize this call.
- Value *Result = LCO->OptimizeCall(CI, TD, Builder);
+ Value *Result = LCO->OptimizeCall(CI, TD, TLI, Builder);
if (Result == 0) continue;
DEBUG(dbgs() << "SimplifyLibCalls simplified: " << *CI;
diff --git a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 5b6bc04..539cc6f 100644
--- a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -36,7 +36,7 @@
// evaluated each time through the tail recursion. Safely keeping allocas
// in the entry block requires analysis to proves that the tail-called
// function does not read or write the stack object.
-// 2. Tail recursion is only performed if the call immediately preceeds the
+// 2. Tail recursion is only performed if the call immediately precedes the
// return instruction. It's possible that there could be a jump between
// the call and the return.
// 3. There can be intervening operations between the call and the return that
@@ -433,7 +433,7 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
if (CanMoveAboveCall(BBI, CI)) continue;
// If we can't move the instruction above the call, it might be because it
- // is an associative and commutative operation that could be tranformed
+ // is an associative and commutative operation that could be transformed
// using accumulator recursion elimination. Check to see if this is the
// case, and if so, remember the initial accumulator value for later.
if ((AccumulatorRecursionEliminationInitVal =
@@ -496,7 +496,7 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
Instruction *InsertPos = OldEntry->begin();
for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
I != E; ++I) {
- PHINode *PN = PHINode::Create(I->getType(),
+ PHINode *PN = PHINode::Create(I->getType(), 2,
I->getName() + ".tr", InsertPos);
I->replaceAllUsesWith(PN); // Everyone use the PHI node now!
PN->addIncoming(I, NewEntry);
@@ -527,8 +527,10 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
if (AccumulatorRecursionEliminationInitVal) {
Instruction *AccRecInstr = AccumulatorRecursionInstr;
// Start by inserting a new PHI node for the accumulator.
+ pred_iterator PB = pred_begin(OldEntry), PE = pred_end(OldEntry);
PHINode *AccPN =
PHINode::Create(AccumulatorRecursionEliminationInitVal->getType(),
+ std::distance(PB, PE) + 1,
"accumulator.tr", OldEntry->begin());
// Loop over all of the predecessors of the tail recursion block. For the
@@ -537,8 +539,7 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
// other tail recursions eliminated) the accumulator is not modified.
// Because we haven't added the branch in the current block to OldEntry yet,
// it will not show up as a predecessor.
- for (pred_iterator PI = pred_begin(OldEntry), PE = pred_end(OldEntry);
- PI != PE; ++PI) {
+ for (pred_iterator PI = PB; PI != PE; ++PI) {
BasicBlock *P = *PI;
if (P == &F->getEntryBlock())
AccPN->addIncoming(AccumulatorRecursionEliminationInitVal, P);
@@ -572,7 +573,9 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
// Now that all of the PHI nodes are in place, remove the call and
// ret instructions, replacing them with an unconditional branch.
- BranchInst::Create(OldEntry, Ret);
+ BranchInst *NewBI = BranchInst::Create(OldEntry, Ret);
+ NewBI->setDebugLoc(CI->getDebugLoc());
+
BB->getInstList().erase(Ret); // Remove return.
BB->getInstList().erase(CI); // Remove call.
++NumEliminated;
OpenPOWER on IntegriCloud