summaryrefslogtreecommitdiffstats
path: root/lib/Transforms/Scalar
diff options
context:
space:
mode:
authorrdivacky <rdivacky@FreeBSD.org>2009-11-18 14:58:34 +0000
committerrdivacky <rdivacky@FreeBSD.org>2009-11-18 14:58:34 +0000
commitd2e985fd323c167e20f77b045a1d99ad166e65db (patch)
tree6a111e552c75afc66228e3d8f19b6731e4013f10 /lib/Transforms/Scalar
parentded64d5d348ce8d8c5aa42cf63f6de9dd84b7e89 (diff)
downloadFreeBSD-src-d2e985fd323c167e20f77b045a1d99ad166e65db.zip
FreeBSD-src-d2e985fd323c167e20f77b045a1d99ad166e65db.tar.gz
Update LLVM to r89205.
Diffstat (limited to 'lib/Transforms/Scalar')
-rw-r--r--lib/Transforms/Scalar/ABCD.cpp37
-rw-r--r--lib/Transforms/Scalar/CMakeLists.txt1
-rw-r--r--lib/Transforms/Scalar/CondPropagate.cpp289
-rw-r--r--lib/Transforms/Scalar/ConstantProp.cpp2
-rw-r--r--lib/Transforms/Scalar/DeadStoreElimination.cpp193
-rw-r--r--lib/Transforms/Scalar/GVN.cpp40
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp4
-rw-r--r--lib/Transforms/Scalar/InstructionCombining.cpp631
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp735
-rw-r--r--lib/Transforms/Scalar/LICM.cpp9
-rw-r--r--lib/Transforms/Scalar/LoopDeletion.cpp4
-rw-r--r--lib/Transforms/Scalar/LoopIndexSplit.cpp4
-rw-r--r--lib/Transforms/Scalar/LoopRotation.cpp13
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp876
-rw-r--r--lib/Transforms/Scalar/LoopUnroll.cpp177
-rw-r--r--lib/Transforms/Scalar/LoopUnswitch.cpp7
-rw-r--r--lib/Transforms/Scalar/Reassociate.cpp36
-rw-r--r--lib/Transforms/Scalar/SCCP.cpp11
-rw-r--r--lib/Transforms/Scalar/SCCVN.cpp6
-rw-r--r--lib/Transforms/Scalar/Scalar.cpp4
-rw-r--r--lib/Transforms/Scalar/SimplifyLibCalls.cpp41
-rw-r--r--lib/Transforms/Scalar/TailDuplication.cpp3
-rw-r--r--lib/Transforms/Scalar/TailRecursionElimination.cpp32
23 files changed, 1716 insertions, 1439 deletions
diff --git a/lib/Transforms/Scalar/ABCD.cpp b/lib/Transforms/Scalar/ABCD.cpp
index c8541d7..e58fa63 100644
--- a/lib/Transforms/Scalar/ABCD.cpp
+++ b/lib/Transforms/Scalar/ABCD.cpp
@@ -412,7 +412,9 @@ class ABCD : public FunctionPass {
/// If PN_op1 and PN_o2 are different from NULL, create a constraint
/// PN_op2 -> PN_op1 with value. In case any of them is NULL, replace
/// with the respective V_op#, if V_op# is a ConstantInt.
- void createConstraintSigSig(PHINode *SIG_op1, PHINode *SIG_op2, APInt value);
+ void createConstraintSigSig(PHINode *SIG_op1, PHINode *SIG_op2,
+ ConstantInt *V_op1, ConstantInt *V_op2,
+ APInt value);
/// Returns the sigma representing the Instruction I in BasicBlock BB.
/// Returns NULL in case there is no sigma for this Instruction in this
@@ -735,25 +737,27 @@ void ABCD::createConstraintCmpInst(ICmpInst *ICI, TerminatorInst *TI) {
APInt Zero = APInt::getNullValue(width);
CmpInst::Predicate Pred = ICI->getPredicate();
+ ConstantInt *CI1 = dyn_cast<ConstantInt>(V_op1);
+ ConstantInt *CI2 = dyn_cast<ConstantInt>(V_op2);
switch (Pred) {
case CmpInst::ICMP_SGT: // signed greater than
- createConstraintSigSig(SIG_op2_t, SIG_op1_t, MinusOne);
- createConstraintSigSig(SIG_op1_f, SIG_op2_f, Zero);
+ createConstraintSigSig(SIG_op2_t, SIG_op1_t, CI2, CI1, MinusOne);
+ createConstraintSigSig(SIG_op1_f, SIG_op2_f, CI1, CI2, Zero);
break;
case CmpInst::ICMP_SGE: // signed greater or equal
- createConstraintSigSig(SIG_op2_t, SIG_op1_t, Zero);
- createConstraintSigSig(SIG_op1_f, SIG_op2_f, MinusOne);
+ createConstraintSigSig(SIG_op2_t, SIG_op1_t, CI2, CI1, Zero);
+ createConstraintSigSig(SIG_op1_f, SIG_op2_f, CI1, CI2, MinusOne);
break;
case CmpInst::ICMP_SLT: // signed less than
- createConstraintSigSig(SIG_op1_t, SIG_op2_t, MinusOne);
- createConstraintSigSig(SIG_op2_f, SIG_op1_f, Zero);
+ createConstraintSigSig(SIG_op1_t, SIG_op2_t, CI1, CI2, MinusOne);
+ createConstraintSigSig(SIG_op2_f, SIG_op1_f, CI2, CI1, Zero);
break;
case CmpInst::ICMP_SLE: // signed less or equal
- createConstraintSigSig(SIG_op1_t, SIG_op2_t, Zero);
- createConstraintSigSig(SIG_op2_f, SIG_op1_f, MinusOne);
+ createConstraintSigSig(SIG_op1_t, SIG_op2_t, CI1, CI2, Zero);
+ createConstraintSigSig(SIG_op2_f, SIG_op1_f, CI2, CI1, MinusOne);
break;
default:
@@ -772,6 +776,10 @@ void ABCD::createConstraintCmpInst(ICmpInst *ICI, TerminatorInst *TI) {
/// b->a and c->a with weight 0 in the lower bound graph, and the edges
/// a->b and a->c with weight 0 in the upper bound graph.
void ABCD::createConstraintPHINode(PHINode *PN) {
+ // FIXME: We really want to disallow sigma nodes, but I don't know the best
+ // way to detect the other than this.
+ if (PN->getNumOperands() == 2) return;
+
int32_t width = cast<IntegerType>(PN->getType())->getBitWidth();
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
Value *V = PN->getIncomingValue(i);
@@ -796,13 +804,11 @@ void ABCD::createConstraintSigInst(Instruction *I_op, BasicBlock *BB_succ_t,
int32_t width = cast<IntegerType>((*SIG_op_t)->getType())->getBitWidth();
inequality_graph.addEdge(I_op, *SIG_op_t, APInt(width, 0), true);
inequality_graph.addEdge(*SIG_op_t, I_op, APInt(width, 0), false);
- created.insert(*SIG_op_t);
}
if (*SIG_op_f) {
int32_t width = cast<IntegerType>((*SIG_op_f)->getType())->getBitWidth();
inequality_graph.addEdge(I_op, *SIG_op_f, APInt(width, 0), true);
inequality_graph.addEdge(*SIG_op_f, I_op, APInt(width, 0), false);
- created.insert(*SIG_op_f);
}
}
@@ -810,10 +816,17 @@ void ABCD::createConstraintSigInst(Instruction *I_op, BasicBlock *BB_succ_t,
/// PN_op2 -> PN_op1 with value. In case any of them is NULL, replace
/// with the respective V_op#, if V_op# is a ConstantInt.
void ABCD::createConstraintSigSig(PHINode *SIG_op1, PHINode *SIG_op2,
+ ConstantInt *V_op1, ConstantInt *V_op2,
APInt value) {
if (SIG_op1 && SIG_op2) {
inequality_graph.addEdge(SIG_op2, SIG_op1, value, true);
inequality_graph.addEdge(SIG_op1, SIG_op2, -value, false);
+ } else if (SIG_op1 && V_op2) {
+ inequality_graph.addEdge(V_op2, SIG_op1, value, true);
+ inequality_graph.addEdge(SIG_op1, V_op2, -value, false);
+ } else if (SIG_op2 && V_op1) {
+ inequality_graph.addEdge(SIG_op2, V_op1, value, true);
+ inequality_graph.addEdge(V_op1, SIG_op2, -value, false);
}
}
@@ -1036,7 +1049,7 @@ void ABCD::InequalityGraph::printHeader(raw_ostream &OS, Function &F) const {
/// Prints the body of the dot file
void ABCD::InequalityGraph::printBody(raw_ostream &OS) const {
- DenseMap<Value *, SmallPtrSet<Edge *, 16> >::iterator begin =
+ DenseMap<Value *, SmallPtrSet<Edge *, 16> >::const_iterator begin =
graph.begin(), end = graph.end();
for (; begin != end ; ++begin) {
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index e048518..5a92399 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -3,7 +3,6 @@ add_llvm_library(LLVMScalarOpts
ADCE.cpp
BasicBlockPlacement.cpp
CodeGenPrepare.cpp
- CondPropagate.cpp
ConstantProp.cpp
DCE.cpp
DeadStoreElimination.cpp
diff --git a/lib/Transforms/Scalar/CondPropagate.cpp b/lib/Transforms/Scalar/CondPropagate.cpp
deleted file mode 100644
index 8a6c556..0000000
--- a/lib/Transforms/Scalar/CondPropagate.cpp
+++ /dev/null
@@ -1,289 +0,0 @@
-//===-- CondPropagate.cpp - Propagate Conditional Expressions -------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass propagates information about conditional expressions through the
-// program, allowing it to eliminate conditional branches in some cases.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "condprop"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Pass.h"
-#include "llvm/Type.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/SmallVector.h"
-using namespace llvm;
-
-STATISTIC(NumBrThread, "Number of CFG edges threaded through branches");
-STATISTIC(NumSwThread, "Number of CFG edges threaded through switches");
-
-namespace {
- struct CondProp : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
- CondProp() : FunctionPass(&ID) {}
-
- virtual bool runOnFunction(Function &F);
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequiredID(BreakCriticalEdgesID);
- //AU.addRequired<DominanceFrontier>();
- }
-
- private:
- bool MadeChange;
- SmallVector<BasicBlock *, 4> DeadBlocks;
- void SimplifyBlock(BasicBlock *BB);
- void SimplifyPredecessors(BranchInst *BI);
- void SimplifyPredecessors(SwitchInst *SI);
- void RevectorBlockTo(BasicBlock *FromBB, BasicBlock *ToBB);
- bool RevectorBlockTo(BasicBlock *FromBB, Value *Cond, BranchInst *BI);
- };
-}
-
-char CondProp::ID = 0;
-static RegisterPass<CondProp> X("condprop", "Conditional Propagation");
-
-FunctionPass *llvm::createCondPropagationPass() {
- return new CondProp();
-}
-
-bool CondProp::runOnFunction(Function &F) {
- bool EverMadeChange = false;
- DeadBlocks.clear();
-
- // While we are simplifying blocks, keep iterating.
- do {
- MadeChange = false;
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E;)
- SimplifyBlock(BB++);
- EverMadeChange = EverMadeChange || MadeChange;
- } while (MadeChange);
-
- if (EverMadeChange) {
- while (!DeadBlocks.empty()) {
- BasicBlock *BB = DeadBlocks.back(); DeadBlocks.pop_back();
- DeleteDeadBlock(BB);
- }
- }
- return EverMadeChange;
-}
-
-void CondProp::SimplifyBlock(BasicBlock *BB) {
- if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
- // If this is a conditional branch based on a phi node that is defined in
- // this block, see if we can simplify predecessors of this block.
- if (BI->isConditional() && isa<PHINode>(BI->getCondition()) &&
- cast<PHINode>(BI->getCondition())->getParent() == BB)
- SimplifyPredecessors(BI);
-
- } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
- if (isa<PHINode>(SI->getCondition()) &&
- cast<PHINode>(SI->getCondition())->getParent() == BB)
- SimplifyPredecessors(SI);
- }
-
- // If possible, simplify the terminator of this block.
- if (ConstantFoldTerminator(BB))
- MadeChange = true;
-
- // If this block ends with an unconditional branch and the only successor has
- // only this block as a predecessor, merge the two blocks together.
- if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()))
- if (BI->isUnconditional() && BI->getSuccessor(0)->getSinglePredecessor() &&
- BB != BI->getSuccessor(0)) {
- BasicBlock *Succ = BI->getSuccessor(0);
-
- // If Succ has any PHI nodes, they are all single-entry PHI's. Eliminate
- // them.
- FoldSingleEntryPHINodes(Succ);
-
- // Remove BI.
- BI->eraseFromParent();
-
- // Move over all of the instructions.
- BB->getInstList().splice(BB->end(), Succ->getInstList());
-
- // Any phi nodes that had entries for Succ now have entries from BB.
- Succ->replaceAllUsesWith(BB);
-
- // Succ is now dead, but we cannot delete it without potentially
- // invalidating iterators elsewhere. Just insert an unreachable
- // instruction in it and delete this block later on.
- new UnreachableInst(BB->getContext(), Succ);
- DeadBlocks.push_back(Succ);
- MadeChange = true;
- }
-}
-
-// SimplifyPredecessors(branches) - We know that BI is a conditional branch
-// based on a PHI node defined in this block. If the phi node contains constant
-// operands, then the blocks corresponding to those operands can be modified to
-// jump directly to the destination instead of going through this block.
-void CondProp::SimplifyPredecessors(BranchInst *BI) {
- // TODO: We currently only handle the most trival case, where the PHI node has
- // one use (the branch), and is the only instruction besides the branch and dbg
- // intrinsics in the block.
- PHINode *PN = cast<PHINode>(BI->getCondition());
-
- if (PN->getNumIncomingValues() == 1) {
- // Eliminate single-entry PHI nodes.
- FoldSingleEntryPHINodes(PN->getParent());
- return;
- }
-
-
- if (!PN->hasOneUse()) return;
-
- BasicBlock *BB = BI->getParent();
- if (&*BB->begin() != PN)
- return;
- BasicBlock::iterator BBI = BB->begin();
- BasicBlock::iterator BBE = BB->end();
- while (BBI != BBE && isa<DbgInfoIntrinsic>(++BBI)) /* empty */;
- if (&*BBI != BI)
- return;
-
- // Ok, we have this really simple case, walk the PHI operands, looking for
- // constants. Walk from the end to remove operands from the end when
- // possible, and to avoid invalidating "i".
- for (unsigned i = PN->getNumIncomingValues(); i != 0; --i) {
- Value *InVal = PN->getIncomingValue(i-1);
- if (!RevectorBlockTo(PN->getIncomingBlock(i-1), InVal, BI))
- continue;
-
- ++NumBrThread;
-
- // If there were two predecessors before this simplification, or if the
- // PHI node contained all the same value except for the one we just
- // substituted, the PHI node may be deleted. Don't iterate through it the
- // last time.
- if (BI->getCondition() != PN) return;
- }
-}
-
-// SimplifyPredecessors(switch) - We know that SI is switch based on a PHI node
-// defined in this block. If the phi node contains constant operands, then the
-// blocks corresponding to those operands can be modified to jump directly to
-// the destination instead of going through this block.
-void CondProp::SimplifyPredecessors(SwitchInst *SI) {
- // TODO: We currently only handle the most trival case, where the PHI node has
- // one use (the branch), and is the only instruction besides the branch and
- // dbg intrinsics in the block.
- PHINode *PN = cast<PHINode>(SI->getCondition());
- if (!PN->hasOneUse()) return;
-
- BasicBlock *BB = SI->getParent();
- if (&*BB->begin() != PN)
- return;
- BasicBlock::iterator BBI = BB->begin();
- BasicBlock::iterator BBE = BB->end();
- while (BBI != BBE && isa<DbgInfoIntrinsic>(++BBI)) /* empty */;
- if (&*BBI != SI)
- return;
-
- // Ok, we have this really simple case, walk the PHI operands, looking for
- // constants. Walk from the end to remove operands from the end when
- // possible, and to avoid invalidating "i".
- for (unsigned i = PN->getNumIncomingValues(); i != 0; --i)
- if (ConstantInt *CI = dyn_cast<ConstantInt>(PN->getIncomingValue(i-1))) {
- BasicBlock *PredBB = PN->getIncomingBlock(i-1);
- if (isa<BranchInst>(PredBB->getTerminator())) {
- // If we have a constant, forward the edge from its current to its
- // ultimate destination.
- unsigned DestCase = SI->findCaseValue(CI);
- RevectorBlockTo(PredBB, SI->getSuccessor(DestCase));
- ++NumSwThread;
-
- // If there were two predecessors before this simplification, or if the
- // PHI node contained all the same value except for the one we just
- // substituted, the PHI node may be deleted. Don't iterate through it the
- // last time.
- if (SI->getCondition() != PN) return;
- }
- }
-}
-
-
-// RevectorBlockTo - Revector the unconditional branch at the end of FromBB to
-// the ToBB block, which is one of the successors of its current successor.
-void CondProp::RevectorBlockTo(BasicBlock *FromBB, BasicBlock *ToBB) {
- BranchInst *FromBr = cast<BranchInst>(FromBB->getTerminator());
- assert(FromBr->isUnconditional() && "FromBB should end with uncond br!");
-
- // Get the old block we are threading through.
- BasicBlock *OldSucc = FromBr->getSuccessor(0);
-
- // OldSucc had multiple successors. If ToBB has multiple predecessors, then
- // the edge between them would be critical, which we already took care of.
- // If ToBB has single operand PHI node then take care of it here.
- FoldSingleEntryPHINodes(ToBB);
-
- // Update PHI nodes in OldSucc to know that FromBB no longer branches to it.
- OldSucc->removePredecessor(FromBB);
-
- // Change FromBr to branch to the new destination.
- FromBr->setSuccessor(0, ToBB);
-
- MadeChange = true;
-}
-
-bool CondProp::RevectorBlockTo(BasicBlock *FromBB, Value *Cond, BranchInst *BI){
- BranchInst *FromBr = cast<BranchInst>(FromBB->getTerminator());
- if (!FromBr->isUnconditional())
- return false;
-
- // Get the old block we are threading through.
- BasicBlock *OldSucc = FromBr->getSuccessor(0);
-
- // If the condition is a constant, simply revector the unconditional branch at
- // the end of FromBB to one of the successors of its current successor.
- if (ConstantInt *CB = dyn_cast<ConstantInt>(Cond)) {
- BasicBlock *ToBB = BI->getSuccessor(CB->isZero());
-
- // OldSucc had multiple successors. If ToBB has multiple predecessors, then
- // the edge between them would be critical, which we already took care of.
- // If ToBB has single operand PHI node then take care of it here.
- FoldSingleEntryPHINodes(ToBB);
-
- // Update PHI nodes in OldSucc to know that FromBB no longer branches to it.
- OldSucc->removePredecessor(FromBB);
-
- // Change FromBr to branch to the new destination.
- FromBr->setSuccessor(0, ToBB);
- } else {
- BasicBlock *Succ0 = BI->getSuccessor(0);
- // Do not perform transform if the new destination has PHI nodes. The
- // transform will add new preds to the PHI's.
- if (isa<PHINode>(Succ0->begin()))
- return false;
-
- BasicBlock *Succ1 = BI->getSuccessor(1);
- if (isa<PHINode>(Succ1->begin()))
- return false;
-
- // Insert the new conditional branch.
- BranchInst::Create(Succ0, Succ1, Cond, FromBr);
-
- FoldSingleEntryPHINodes(Succ0);
- FoldSingleEntryPHINodes(Succ1);
-
- // Update PHI nodes in OldSucc to know that FromBB no longer branches to it.
- OldSucc->removePredecessor(FromBB);
-
- // Delete the old branch.
- FromBr->eraseFromParent();
- }
-
- MadeChange = true;
- return true;
-}
diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp
index 4fee327..ea20813 100644
--- a/lib/Transforms/Scalar/ConstantProp.cpp
+++ b/lib/Transforms/Scalar/ConstantProp.cpp
@@ -66,7 +66,7 @@ bool ConstantPropagation::runOnFunction(Function &F) {
WorkList.erase(WorkList.begin()); // Get an element from the worklist...
if (!I->use_empty()) // Don't muck with dead instructions...
- if (Constant *C = ConstantFoldInstruction(I, F.getContext())) {
+ if (Constant *C = ConstantFoldInstruction(I)) {
// Add all of the users of this instruction to the worklist, they might
// be constant propagatable now...
for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 90436f4..b0988b5 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -78,19 +78,96 @@ static RegisterPass<DSE> X("dse", "Dead Store Elimination");
FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); }
-/// isValueAtLeastAsBigAs - Return true if V1 is greater than or equal to the
-/// stored size of V2. This returns false if we don't know.
+/// doesClobberMemory - Does this instruction clobber (write without reading)
+/// some memory?
+static bool doesClobberMemory(Instruction *I) {
+ if (isa<StoreInst>(I))
+ return true;
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ default: return false;
+ case Intrinsic::memset: case Intrinsic::memmove: case Intrinsic::memcpy:
+ case Intrinsic::init_trampoline: case Intrinsic::lifetime_end: return true;
+ }
+ }
+ return false;
+}
+
+/// isElidable - If the value of this instruction and the memory it writes to is
+/// unused, may we delete this instrtction?
+static bool isElidable(Instruction *I) {
+ assert(doesClobberMemory(I));
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+ return II->getIntrinsicID() != Intrinsic::lifetime_end;
+ if (StoreInst *SI = dyn_cast<StoreInst>(I))
+ return !SI->isVolatile();
+ return true;
+}
+
+/// getPointerOperand - Return the pointer that is being clobbered.
+static Value *getPointerOperand(Instruction *I) {
+ assert(doesClobberMemory(I));
+ if (StoreInst *SI = dyn_cast<StoreInst>(I))
+ return SI->getPointerOperand();
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I))
+ return MI->getOperand(1);
+ IntrinsicInst *II = cast<IntrinsicInst>(I);
+ switch (II->getIntrinsicID()) {
+ default:
+ assert(false && "Unexpected intrinsic!");
+ case Intrinsic::init_trampoline:
+ return II->getOperand(1);
+ case Intrinsic::lifetime_end:
+ return II->getOperand(2);
+ }
+}
+
+/// getStoreSize - Return the length in bytes of the write by the clobbering
+/// instruction. If variable or unknown, returns -1.
+static unsigned getStoreSize(Instruction *I, const TargetData *TD) {
+ assert(doesClobberMemory(I));
+ if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ if (!TD) return -1u;
+ return TD->getTypeStoreSize(SI->getOperand(0)->getType());
+ }
+
+ Value *Len;
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) {
+ Len = MI->getLength();
+ } else {
+ IntrinsicInst *II = cast<IntrinsicInst>(I);
+ switch (II->getIntrinsicID()) {
+ default:
+ assert(false && "Unexpected intrinsic!");
+ case Intrinsic::init_trampoline:
+ return -1u;
+ case Intrinsic::lifetime_end:
+ Len = II->getOperand(1);
+ break;
+ }
+ }
+ if (ConstantInt *LenCI = dyn_cast<ConstantInt>(Len))
+ if (!LenCI->isAllOnesValue())
+ return LenCI->getZExtValue();
+ return -1u;
+}
+
+/// isStoreAtLeastAsWideAs - Return true if the size of the store in I1 is
+/// greater than or equal to the store in I2. This returns false if we don't
+/// know.
///
-static bool isValueAtLeastAsBigAs(Value *V1, Value *V2, const TargetData *TD) {
- const Type *V1Ty = V1->getType(), *V2Ty = V2->getType();
+static bool isStoreAtLeastAsWideAs(Instruction *I1, Instruction *I2,
+ const TargetData *TD) {
+ const Type *I1Ty = getPointerOperand(I1)->getType();
+ const Type *I2Ty = getPointerOperand(I2)->getType();
// Exactly the same type, must have exactly the same size.
- if (V1Ty == V2Ty) return true;
+ if (I1Ty == I2Ty) return true;
- // If we don't have target data, we don't know.
- if (TD == 0) return false;
+ int I1Size = getStoreSize(I1, TD);
+ int I2Size = getStoreSize(I2, TD);
- return TD->getTypeStoreSize(V1Ty) >= TD->getTypeStoreSize(V2Ty);
+ return I1Size != -1 && I2Size != -1 && I1Size >= I2Size;
}
bool DSE::runOnBasicBlock(BasicBlock &BB) {
@@ -104,14 +181,9 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
Instruction *Inst = BBI++;
// If we find a store or a free, get its memory dependence.
- if (!isa<StoreInst>(Inst) && !isFreeCall(Inst))
+ if (!doesClobberMemory(Inst) && !isFreeCall(Inst))
continue;
- // Don't molest volatile stores or do queries that will return "clobber".
- if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
- if (SI->isVolatile())
- continue;
-
MemDepResult InstDep = MD.getDependency(Inst);
// Ignore non-local stores.
@@ -124,16 +196,16 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
continue;
}
- StoreInst *SI = cast<StoreInst>(Inst);
-
// If not a definite must-alias dependency, ignore it.
if (!InstDep.isDef())
continue;
// If this is a store-store dependence, then the previous store is dead so
// long as this store is at least as big as it.
- if (StoreInst *DepStore = dyn_cast<StoreInst>(InstDep.getInst()))
- if (isValueAtLeastAsBigAs(SI->getOperand(0), DepStore->getOperand(0),TD)){
+ if (doesClobberMemory(InstDep.getInst())) {
+ Instruction *DepStore = InstDep.getInst();
+ if (isStoreAtLeastAsWideAs(Inst, DepStore, TD) &&
+ isElidable(DepStore)) {
// Delete the store and now-dead instructions that feed it.
DeleteDeadInstruction(DepStore);
NumFastStores++;
@@ -146,37 +218,43 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
--BBI;
continue;
}
+ }
+
+ if (!isElidable(Inst))
+ continue;
// If we're storing the same value back to a pointer that we just
// loaded from, then the store can be removed.
- if (LoadInst *DepLoad = dyn_cast<LoadInst>(InstDep.getInst())) {
- if (SI->getPointerOperand() == DepLoad->getPointerOperand() &&
- SI->getOperand(0) == DepLoad) {
- // DeleteDeadInstruction can delete the current instruction. Save BBI
- // in case we need it.
- WeakVH NextInst(BBI);
-
- DeleteDeadInstruction(SI);
-
- if (NextInst == 0) // Next instruction deleted.
- BBI = BB.begin();
- else if (BBI != BB.begin()) // Revisit this instruction if possible.
- --BBI;
- NumFastStores++;
- MadeChange = true;
- continue;
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ if (LoadInst *DepLoad = dyn_cast<LoadInst>(InstDep.getInst())) {
+ if (SI->getPointerOperand() == DepLoad->getPointerOperand() &&
+ SI->getOperand(0) == DepLoad) {
+ // DeleteDeadInstruction can delete the current instruction. Save BBI
+ // in case we need it.
+ WeakVH NextInst(BBI);
+
+ DeleteDeadInstruction(SI);
+
+ if (NextInst == 0) // Next instruction deleted.
+ BBI = BB.begin();
+ else if (BBI != BB.begin()) // Revisit this instruction if possible.
+ --BBI;
+ NumFastStores++;
+ MadeChange = true;
+ continue;
+ }
}
}
// If this is a lifetime end marker, we can throw away the store.
- if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(InstDep.getInst())) {
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(InstDep.getInst())) {
if (II->getIntrinsicID() == Intrinsic::lifetime_end) {
// Delete the store and now-dead instructions that feed it.
// DeleteDeadInstruction can delete the current instruction. Save BBI
// in case we need it.
WeakVH NextInst(BBI);
- DeleteDeadInstruction(SI);
+ DeleteDeadInstruction(Inst);
if (NextInst == 0) // Next instruction deleted.
BBI = BB.begin();
@@ -202,11 +280,11 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
bool DSE::handleFreeWithNonTrivialDependency(Instruction *F, MemDepResult Dep) {
AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
- StoreInst *Dependency = dyn_cast_or_null<StoreInst>(Dep.getInst());
- if (!Dependency || Dependency->isVolatile())
+ Instruction *Dependency = Dep.getInst();
+ if (!Dependency || !doesClobberMemory(Dependency) || !isElidable(Dependency))
return false;
- Value *DepPointer = Dependency->getPointerOperand()->getUnderlyingObject();
+ Value *DepPointer = getPointerOperand(Dependency)->getUnderlyingObject();
// Check for aliasing.
if (AA.alias(F->getOperand(1), 1, DepPointer, 1) !=
@@ -251,39 +329,28 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
--BBI;
// If we find a store whose pointer is dead.
- if (StoreInst* S = dyn_cast<StoreInst>(BBI)) {
- if (!S->isVolatile()) {
+ if (doesClobberMemory(BBI)) {
+ if (isElidable(BBI)) {
// See through pointer-to-pointer bitcasts
- Value* pointerOperand = S->getPointerOperand()->getUnderlyingObject();
+ Value *pointerOperand = getPointerOperand(BBI)->getUnderlyingObject();
// Alloca'd pointers or byval arguments (which are functionally like
// alloca's) are valid candidates for removal.
if (deadPointers.count(pointerOperand)) {
// DCE instructions only used to calculate that store.
+ Instruction *Dead = BBI;
BBI++;
- DeleteDeadInstruction(S, &deadPointers);
+ DeleteDeadInstruction(Dead, &deadPointers);
NumFastStores++;
MadeChange = true;
+ continue;
}
}
- continue;
- }
-
- // We can also remove memcpy's to local variables at the end of a function.
- if (MemCpyInst *M = dyn_cast<MemCpyInst>(BBI)) {
- Value *dest = M->getDest()->getUnderlyingObject();
-
- if (deadPointers.count(dest)) {
- BBI++;
- DeleteDeadInstruction(M, &deadPointers);
- NumFastOther++;
- MadeChange = true;
+ // Because a memcpy or memmove is also a load, we can't skip it if we
+ // didn't remove it.
+ if (!isa<MemTransferInst>(BBI))
continue;
- }
-
- // Because a memcpy is also a load, we can't skip it if we didn't remove
- // it.
}
Value* killPointer = 0;
@@ -304,11 +371,11 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
killPointer = L->getPointerOperand();
} else if (VAArgInst* V = dyn_cast<VAArgInst>(BBI)) {
killPointer = V->getOperand(0);
- } else if (isa<MemCpyInst>(BBI) &&
- isa<ConstantInt>(cast<MemCpyInst>(BBI)->getLength())) {
- killPointer = cast<MemCpyInst>(BBI)->getSource();
+ } else if (isa<MemTransferInst>(BBI) &&
+ isa<ConstantInt>(cast<MemTransferInst>(BBI)->getLength())) {
+ killPointer = cast<MemTransferInst>(BBI)->getSource();
killPointerSize = cast<ConstantInt>(
- cast<MemCpyInst>(BBI)->getLength())->getZExtValue();
+ cast<MemTransferInst>(BBI)->getLength())->getZExtValue();
} else if (AllocaInst* A = dyn_cast<AllocaInst>(BBI)) {
deadPointers.erase(A);
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 0e3f750..a8f39c1 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -443,6 +443,11 @@ uint32_t ValueTable::lookup_or_add_call(CallInst* C) {
valueNumbering[C] = e;
return e;
}
+ if (!MD) {
+ e = nextValueNumber++;
+ valueNumbering[C] = e;
+ return e;
+ }
MemDepResult local_dep = MD->getDependency(C);
@@ -624,7 +629,7 @@ uint32_t ValueTable::lookup_or_add(Value *V) {
/// lookup - Returns the value number of the specified value. Fails if
/// the value has not yet been numbered.
uint32_t ValueTable::lookup(Value *V) const {
- DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V);
+ DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V);
assert(VI != valueNumbering.end() && "Value not numbered?");
return VI->second;
}
@@ -644,7 +649,7 @@ void ValueTable::erase(Value *V) {
/// verifyRemoved - Verify that the value is removed from all internal data
/// structures.
void ValueTable::verifyRemoved(const Value *V) const {
- for (DenseMap<Value*, uint32_t>::iterator
+ for (DenseMap<Value*, uint32_t>::const_iterator
I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) {
assert(I->first != V && "Inst still occurs in value numbering map!");
}
@@ -669,10 +674,12 @@ namespace {
bool runOnFunction(Function &F);
public:
static char ID; // Pass identification, replacement for typeid
- GVN(bool nopre = false) : FunctionPass(&ID), NoPRE(nopre) { }
+ explicit GVN(bool nopre = false, bool noloads = false)
+ : FunctionPass(&ID), NoPRE(nopre), NoLoads(noloads), MD(0) { }
private:
bool NoPRE;
+ bool NoLoads;
MemoryDependenceAnalysis *MD;
DominatorTree *DT;
@@ -682,7 +689,8 @@ namespace {
// This transformation requires dominator postdominator info
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<DominatorTree>();
- AU.addRequired<MemoryDependenceAnalysis>();
+ if (!NoLoads)
+ AU.addRequired<MemoryDependenceAnalysis>();
AU.addRequired<AliasAnalysis>();
AU.addPreserved<DominatorTree>();
@@ -711,7 +719,9 @@ namespace {
}
// createGVNPass - The public interface to this file...
-FunctionPass *llvm::createGVNPass(bool NoPRE) { return new GVN(NoPRE); }
+FunctionPass *llvm::createGVNPass(bool NoPRE, bool NoLoads) {
+ return new GVN(NoPRE, NoLoads);
+}
static RegisterPass<GVN> X("gvn",
"Global Value Numbering");
@@ -1476,6 +1486,9 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
/// processLoad - Attempt to eliminate a load, first by eliminating it
/// locally, and then attempting non-local elimination if that fails.
bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
+ if (!MD)
+ return false;
+
if (L->isVolatile())
return false;
@@ -1686,7 +1699,7 @@ bool GVN::processInstruction(Instruction *I,
if (constVal) {
p->replaceAllUsesWith(constVal);
- if (isa<PointerType>(constVal->getType()))
+ if (MD && isa<PointerType>(constVal->getType()))
MD->invalidateCachedPointerInfo(constVal);
VN.erase(p);
@@ -1707,7 +1720,7 @@ bool GVN::processInstruction(Instruction *I,
// Remove it!
VN.erase(I);
I->replaceAllUsesWith(repl);
- if (isa<PointerType>(repl->getType()))
+ if (MD && isa<PointerType>(repl->getType()))
MD->invalidateCachedPointerInfo(repl);
toErase.push_back(I);
return true;
@@ -1721,7 +1734,8 @@ bool GVN::processInstruction(Instruction *I,
/// runOnFunction - This is the main transformation entry point for a function.
bool GVN::runOnFunction(Function& F) {
- MD = &getAnalysis<MemoryDependenceAnalysis>();
+ if (!NoLoads)
+ MD = &getAnalysis<MemoryDependenceAnalysis>();
DT = &getAnalysis<DominatorTree>();
VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>());
VN.setMemDep(MD);
@@ -1793,7 +1807,7 @@ bool GVN::processBlock(BasicBlock *BB) {
for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(),
E = toErase.end(); I != E; ++I) {
DEBUG(errs() << "GVN removed: " << **I << '\n');
- MD->removeInstruction(*I);
+ if (MD) MD->removeInstruction(*I);
(*I)->eraseFromParent();
DEBUG(verifyRemoved(*I));
}
@@ -1946,12 +1960,12 @@ bool GVN::performPRE(Function &F) {
localAvail[CurrentBlock]->table[ValNo] = Phi;
CurInst->replaceAllUsesWith(Phi);
- if (isa<PointerType>(Phi->getType()))
+ if (MD && isa<PointerType>(Phi->getType()))
MD->invalidateCachedPointerInfo(Phi);
VN.erase(CurInst);
DEBUG(errs() << "GVN PRE removed: " << *CurInst << '\n');
- MD->removeInstruction(CurInst);
+ if (MD) MD->removeInstruction(CurInst);
CurInst->eraseFromParent();
DEBUG(verifyRemoved(CurInst));
Changed = true;
@@ -2011,12 +2025,12 @@ void GVN::verifyRemoved(const Instruction *Inst) const {
// Walk through the value number scope to make sure the instruction isn't
// ferreted away in it.
- for (DenseMap<BasicBlock*, ValueNumberScope*>::iterator
+ for (DenseMap<BasicBlock*, ValueNumberScope*>::const_iterator
I = localAvail.begin(), E = localAvail.end(); I != E; ++I) {
const ValueNumberScope *VNS = I->second;
while (VNS) {
- for (DenseMap<uint32_t, Value*>::iterator
+ for (DenseMap<uint32_t, Value*>::const_iterator
II = VNS->table.begin(), IE = VNS->table.end(); II != IE; ++II) {
assert(II->second != Inst && "Inst still in value numbering scope!");
}
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index b0bc70c..2912421 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -536,8 +536,10 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
BasicBlock *ExitBlock = L->getExitBlock();
if (!ExitBlock) return;
- Instruction *InsertPt = ExitBlock->getFirstNonPHI();
BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader) return;
+
+ Instruction *InsertPt = ExitBlock->getFirstNonPHI();
BasicBlock::iterator I = Preheader->getTerminator();
while (I != Preheader->begin()) {
--I;
diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp
index 7e75cfb..1c48366 100644
--- a/lib/Transforms/Scalar/InstructionCombining.cpp
+++ b/lib/Transforms/Scalar/InstructionCombining.cpp
@@ -42,6 +42,7 @@
#include "llvm/GlobalVariable.h"
#include "llvm/Operator.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Target/TargetData.h"
@@ -283,6 +284,8 @@ namespace {
Instruction *visitSelectInstWithICmp(SelectInst &SI, ICmpInst *ICI);
Instruction *visitCallInst(CallInst &CI);
Instruction *visitInvokeInst(InvokeInst &II);
+
+ Instruction *SliceUpIllegalIntegerPHI(PHINode &PN);
Instruction *visitPHINode(PHINode &PN);
Instruction *visitGetElementPtrInst(GetElementPtrInst &GEP);
Instruction *visitAllocaInst(AllocaInst &AI);
@@ -380,10 +383,6 @@ namespace {
/// commutative operators.
bool SimplifyCommutative(BinaryOperator &I);
- /// SimplifyCompare - This reorders the operands of a CmpInst to get them in
- /// most-complex to least-complex order.
- bool SimplifyCompare(CmpInst &I);
-
/// SimplifyDemandedUseBits - Attempts to replace V with a simpler value
/// based on the demanded bits.
Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
@@ -478,6 +477,34 @@ static const Type *getPromotedType(const Type *Ty) {
return Ty;
}
+/// ShouldChangeType - Return true if it is desirable to convert a computation
+/// from 'From' to 'To'. We don't want to convert from a legal to an illegal
+/// type for example, or from a smaller to a larger illegal type.
+static bool ShouldChangeType(const Type *From, const Type *To,
+ const TargetData *TD) {
+ assert(isa<IntegerType>(From) && isa<IntegerType>(To));
+
+ // If we don't have TD, we don't know if the source/dest are legal.
+ if (!TD) return false;
+
+ unsigned FromWidth = From->getPrimitiveSizeInBits();
+ unsigned ToWidth = To->getPrimitiveSizeInBits();
+ bool FromLegal = TD->isLegalInteger(FromWidth);
+ bool ToLegal = TD->isLegalInteger(ToWidth);
+
+ // If this is a legal integer from type, and the result would be an illegal
+ // type, don't do the transformation.
+ if (FromLegal && !ToLegal)
+ return false;
+
+ // Otherwise, if both are illegal, do not increase the size of the result. We
+ // do allow things like i160 -> i64, but not i64 -> i160.
+ if (!FromLegal && !ToLegal && ToWidth > FromWidth)
+ return false;
+
+ return true;
+}
+
/// getBitCastOperand - If the specified operand is a CastInst, a constant
/// expression bitcast, or a GetElementPtrInst with all zero indices, return the
/// operand value, otherwise return null.
@@ -584,17 +611,6 @@ bool InstCombiner::SimplifyCommutative(BinaryOperator &I) {
return Changed;
}
-/// SimplifyCompare - For a CmpInst this function just orders the operands
-/// so that theyare listed from right (least complex) to left (most complex).
-/// This puts constants before unary operators before binary operators.
-bool InstCombiner::SimplifyCompare(CmpInst &I) {
- if (getComplexity(I.getOperand(0)) >= getComplexity(I.getOperand(1)))
- return false;
- I.swapOperands();
- // Compare instructions are not associative so there's nothing else we can do.
- return true;
-}
-
// dyn_castNegVal - Given a 'sub' instruction, return the RHS of the instruction
// if the LHS is a constant zero (which is the 'negate' form).
//
@@ -4304,25 +4320,15 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
bool Changed = SimplifyCommutative(I);
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
- if (isa<UndefValue>(Op1)) // X & undef -> 0
- return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
-
- // and X, X = X
- if (Op0 == Op1)
- return ReplaceInstUsesWith(I, Op1);
+ if (Value *V = SimplifyAndInst(Op0, Op1, TD))
+ return ReplaceInstUsesWith(I, V);
+
// See if we can simplify any instructions used by the instruction whose sole
// purpose is to compute bits we don't care about.
if (SimplifyDemandedInstructionBits(I))
return &I;
- if (isa<VectorType>(I.getType())) {
- if (ConstantVector *CP = dyn_cast<ConstantVector>(Op1)) {
- if (CP->isAllOnesValue()) // X & <-1,-1> -> X
- return ReplaceInstUsesWith(I, I.getOperand(0));
- } else if (isa<ConstantAggregateZero>(Op1)) {
- return ReplaceInstUsesWith(I, Op1); // X & <0,0> -> <0,0>
- }
- }
+
if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(Op1)) {
const APInt &AndRHSMask = AndRHS->getValue();
@@ -4443,42 +4449,29 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
return NV;
}
- Value *Op0NotVal = dyn_castNotVal(Op0);
- Value *Op1NotVal = dyn_castNotVal(Op1);
-
- if (Op0NotVal == Op1 || Op1NotVal == Op0) // A & ~A == ~A & A == 0
- return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
// (~A & ~B) == (~(A | B)) - De Morgan's Law
- if (Op0NotVal && Op1NotVal && isOnlyUse(Op0) && isOnlyUse(Op1)) {
- Value *Or = Builder->CreateOr(Op0NotVal, Op1NotVal,
- I.getName()+".demorgan");
- return BinaryOperator::CreateNot(Or);
- }
-
+ if (Value *Op0NotVal = dyn_castNotVal(Op0))
+ if (Value *Op1NotVal = dyn_castNotVal(Op1))
+ if (Op0->hasOneUse() && Op1->hasOneUse()) {
+ Value *Or = Builder->CreateOr(Op0NotVal, Op1NotVal,
+ I.getName()+".demorgan");
+ return BinaryOperator::CreateNot(Or);
+ }
+
{
Value *A = 0, *B = 0, *C = 0, *D = 0;
- if (match(Op0, m_Or(m_Value(A), m_Value(B)))) {
- if (A == Op1 || B == Op1) // (A | ?) & A --> A
- return ReplaceInstUsesWith(I, Op1);
-
- // (A|B) & ~(A&B) -> A^B
- if (match(Op1, m_Not(m_And(m_Value(C), m_Value(D))))) {
- if ((A == C && B == D) || (A == D && B == C))
- return BinaryOperator::CreateXor(A, B);
- }
- }
+ // (A|B) & ~(A&B) -> A^B
+ if (match(Op0, m_Or(m_Value(A), m_Value(B))) &&
+ match(Op1, m_Not(m_And(m_Value(C), m_Value(D)))) &&
+ ((A == C && B == D) || (A == D && B == C)))
+ return BinaryOperator::CreateXor(A, B);
- if (match(Op1, m_Or(m_Value(A), m_Value(B)))) {
- if (A == Op0 || B == Op0) // A & (A | ?) --> A
- return ReplaceInstUsesWith(I, Op0);
-
- // ~(A&B) & (A|B) -> A^B
- if (match(Op0, m_Not(m_And(m_Value(C), m_Value(D))))) {
- if ((A == C && B == D) || (A == D && B == C))
- return BinaryOperator::CreateXor(A, B);
- }
- }
+ // ~(A&B) & (A|B) -> A^B
+ if (match(Op1, m_Or(m_Value(A), m_Value(B))) &&
+ match(Op0, m_Not(m_And(m_Value(C), m_Value(D)))) &&
+ ((A == C && B == D) || (A == D && B == C)))
+ return BinaryOperator::CreateXor(A, B);
if (Op0->hasOneUse() &&
match(Op0, m_Xor(m_Value(A), m_Value(B)))) {
@@ -5010,27 +5003,15 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
bool Changed = SimplifyCommutative(I);
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
- if (isa<UndefValue>(Op1)) // X | undef -> -1
- return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
-
- // or X, X = X
- if (Op0 == Op1)
- return ReplaceInstUsesWith(I, Op0);
-
+ if (Value *V = SimplifyOrInst(Op0, Op1, TD))
+ return ReplaceInstUsesWith(I, V);
+
+
// See if we can simplify any instructions used by the instruction whose sole
// purpose is to compute bits we don't care about.
if (SimplifyDemandedInstructionBits(I))
return &I;
- if (isa<VectorType>(I.getType())) {
- if (isa<ConstantAggregateZero>(Op1)) {
- return ReplaceInstUsesWith(I, Op0); // X | <0,0> -> X
- } else if (ConstantVector *CP = dyn_cast<ConstantVector>(Op1)) {
- if (CP->isAllOnesValue()) // X | <-1,-1> -> <-1,-1>
- return ReplaceInstUsesWith(I, I.getOperand(1));
- }
- }
- // or X, -1 == -1
if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
ConstantInt *C1 = 0; Value *X = 0;
// (X & C1) | C2 --> (X | C2) & (C1|C2)
@@ -5063,13 +5044,6 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
Value *A = 0, *B = 0;
ConstantInt *C1 = 0, *C2 = 0;
- if (match(Op0, m_And(m_Value(A), m_Value(B))))
- if (A == Op1 || B == Op1) // (A & ?) | A --> A
- return ReplaceInstUsesWith(I, Op1);
- if (match(Op1, m_And(m_Value(A), m_Value(B))))
- if (A == Op0 || B == Op0) // A | (A & ?) --> A
- return ReplaceInstUsesWith(I, Op0);
-
// (A | B) | C and A | (B | C) -> bswap if possible.
// (A >> B) | (C << D) and (A << B) | (B >> C) -> bswap if possible.
if (match(Op0, m_Or(m_Value(), m_Value())) ||
@@ -5203,23 +5177,14 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
if (Ret) return Ret;
}
- if ((A = dyn_castNotVal(Op0))) { // ~A | Op1
- if (A == Op1) // ~A | A == -1
- return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
- } else {
- A = 0;
- }
- // Note, A is still live here!
- if ((B = dyn_castNotVal(Op1))) { // Op0 | ~B
- if (Op0 == B)
- return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
-
- // (~A | ~B) == (~(A & B)) - De Morgan's Law
- if (A && isOnlyUse(Op0) && isOnlyUse(Op1)) {
- Value *And = Builder->CreateAnd(A, B, I.getName()+".demorgan");
- return BinaryOperator::CreateNot(And);
- }
- }
+ // (~A | ~B) == (~(A & B)) - De Morgan's Law
+ if (Value *Op0NotVal = dyn_castNotVal(Op0))
+ if (Value *Op1NotVal = dyn_castNotVal(Op1))
+ if (Op0->hasOneUse() && Op1->hasOneUse()) {
+ Value *And = Builder->CreateAnd(Op0NotVal, Op1NotVal,
+ I.getName()+".demorgan");
+ return BinaryOperator::CreateNot(And);
+ }
// (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B)
if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1))) {
@@ -5942,28 +5907,25 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
}
Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
- bool Changed = SimplifyCompare(I);
- Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ bool Changed = false;
+
+ /// Orders the operands of the compare so that they are listed from most
+ /// complex to least complex. This puts constants before unary operators,
+ /// before binary operators.
+ if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) {
+ I.swapOperands();
+ Changed = true;
+ }
- // Fold trivial predicates.
- if (I.getPredicate() == FCmpInst::FCMP_FALSE)
- return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), 0));
- if (I.getPredicate() == FCmpInst::FCMP_TRUE)
- return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), 1));
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ if (Value *V = SimplifyFCmpInst(I.getPredicate(), Op0, Op1, TD))
+ return ReplaceInstUsesWith(I, V);
+
// Simplify 'fcmp pred X, X'
if (Op0 == Op1) {
switch (I.getPredicate()) {
default: llvm_unreachable("Unknown predicate!");
- case FCmpInst::FCMP_UEQ: // True if unordered or equal
- case FCmpInst::FCMP_UGE: // True if unordered, greater than, or equal
- case FCmpInst::FCMP_ULE: // True if unordered, less than, or equal
- return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), 1));
- case FCmpInst::FCMP_OGT: // True if ordered and greater than
- case FCmpInst::FCMP_OLT: // True if ordered and less than
- case FCmpInst::FCMP_ONE: // True if ordered and operands are unequal
- return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), 0));
-
case FCmpInst::FCMP_UNO: // True if unordered: isnan(X) | isnan(Y)
case FCmpInst::FCMP_ULT: // True if unordered or less than
case FCmpInst::FCMP_UGT: // True if unordered or greater than
@@ -5984,23 +5946,8 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
}
}
- if (isa<UndefValue>(Op1)) // fcmp pred X, undef -> undef
- return ReplaceInstUsesWith(I, UndefValue::get(I.getType()));
-
// Handle fcmp with constant RHS
if (Constant *RHSC = dyn_cast<Constant>(Op1)) {
- // If the constant is a nan, see if we can fold the comparison based on it.
- if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) {
- if (CFP->getValueAPF().isNaN()) {
- if (FCmpInst::isOrdered(I.getPredicate())) // True if ordered and...
- return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
- assert(FCmpInst::isUnordered(I.getPredicate()) &&
- "Comparison must be either ordered or unordered!");
- // True if unordered.
- return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
- }
- }
-
if (Instruction *LHSI = dyn_cast<Instruction>(Op0))
switch (LHSI->getOpcode()) {
case Instruction::PHI:
@@ -6047,26 +5994,22 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
}
Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
- bool Changed = SimplifyCompare(I);
+ bool Changed = false;
+
+ /// Orders the operands of the compare so that they are listed from most
+ /// complex to least complex. This puts constants before unary operators,
+ /// before binary operators.
+ if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) {
+ I.swapOperands();
+ Changed = true;
+ }
+
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
- const Type *Ty = Op0->getType();
-
- // icmp X, X
- if (Op0 == Op1)
- return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(),
- I.isTrueWhenEqual()));
-
- if (isa<UndefValue>(Op1)) // X icmp undef -> undef
- return ReplaceInstUsesWith(I, UndefValue::get(I.getType()));
- // icmp <global/alloca*/null>, <global/alloca*/null> - Global/Stack value
- // addresses never equal each other! We already know that Op0 != Op1.
- if ((isa<GlobalValue>(Op0) || isa<AllocaInst>(Op0) ||
- isa<ConstantPointerNull>(Op0)) &&
- (isa<GlobalValue>(Op1) || isa<AllocaInst>(Op1) ||
- isa<ConstantPointerNull>(Op1)))
- return ReplaceInstUsesWith(I, ConstantInt::get(Type::getInt1Ty(*Context),
- !I.isTrueWhenEqual()));
+ if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, TD))
+ return ReplaceInstUsesWith(I, V);
+
+ const Type *Ty = Op0->getType();
// icmp's with boolean values can always be turned into bitwise operations
if (Ty == Type::getInt1Ty(*Context)) {
@@ -6131,27 +6074,24 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
// If we have an icmp le or icmp ge instruction, turn it into the
// appropriate icmp lt or icmp gt instruction. This allows us to rely on
- // them being folded in the code below.
+ // them being folded in the code below. The SimplifyICmpInst code has
+ // already handled the edge cases for us, so we just assert on them.
switch (I.getPredicate()) {
default: break;
case ICmpInst::ICMP_ULE:
- if (CI->isMaxValue(false)) // A <=u MAX -> TRUE
- return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
+ assert(!CI->isMaxValue(false)); // A <=u MAX -> TRUE
return new ICmpInst(ICmpInst::ICMP_ULT, Op0,
AddOne(CI));
case ICmpInst::ICMP_SLE:
- if (CI->isMaxValue(true)) // A <=s MAX -> TRUE
- return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
+ assert(!CI->isMaxValue(true)); // A <=s MAX -> TRUE
return new ICmpInst(ICmpInst::ICMP_SLT, Op0,
AddOne(CI));
case ICmpInst::ICMP_UGE:
- if (CI->isMinValue(false)) // A >=u MIN -> TRUE
- return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
+ assert(!CI->isMinValue(false)); // A >=u MIN -> TRUE
return new ICmpInst(ICmpInst::ICMP_UGT, Op0,
SubOne(CI));
case ICmpInst::ICMP_SGE:
- if (CI->isMinValue(true)) // A >=s MIN -> TRUE
- return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
+ assert(!CI->isMinValue(true)); // A >=s MIN -> TRUE
return new ICmpInst(ICmpInst::ICMP_SGT, Op0,
SubOne(CI));
}
@@ -8083,8 +8023,7 @@ bool InstCombiner::CanEvaluateInDifferentType(Value *V, const Type *Ty,
Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty,
bool isSigned) {
if (Constant *C = dyn_cast<Constant>(V))
- return ConstantExpr::getIntegerCast(C, Ty,
- isSigned /*Sext or ZExt*/);
+ return ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/);
// Otherwise, it must be an instruction.
Instruction *I = cast<Instruction>(V);
@@ -8117,8 +8056,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty,
return I->getOperand(0);
// Otherwise, must be the same type of cast, so just reinsert a new one.
- Res = CastInst::Create(cast<CastInst>(I)->getOpcode(), I->getOperand(0),
- Ty);
+ Res = CastInst::Create(cast<CastInst>(I)->getOpcode(), I->getOperand(0),Ty);
break;
case Instruction::Select: {
Value *True = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
@@ -8167,9 +8105,15 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
return NV;
// If we are casting a PHI then fold the cast into the PHI
- if (isa<PHINode>(Src))
- if (Instruction *NV = FoldOpIntoPhi(CI))
- return NV;
+ if (isa<PHINode>(Src)) {
+ // We don't do this if this would create a PHI node with an illegal type if
+ // it is currently legal.
+ if (!isa<IntegerType>(Src->getType()) ||
+ !isa<IntegerType>(CI.getType()) ||
+ ShouldChangeType(CI.getType(), Src->getType(), TD))
+ if (Instruction *NV = FoldOpIntoPhi(CI))
+ return NV;
+ }
return 0;
}
@@ -8289,23 +8233,6 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
return commonCastTransforms(CI);
}
-/// isSafeIntegerType - Return true if this is a basic integer type, not a crazy
-/// type like i42. We don't want to introduce operations on random non-legal
-/// integer types where they don't already exist in the code. In the future,
-/// we should consider making this based off target-data, so that 32-bit targets
-/// won't get i64 operations etc.
-static bool isSafeIntegerType(const Type *Ty) {
- switch (Ty->getPrimitiveSizeInBits()) {
- case 8:
- case 16:
- case 32:
- case 64:
- return true;
- default:
- return false;
- }
-}
-
/// commonIntCastTransforms - This function implements the common transforms
/// for trunc, zext, and sext.
Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
@@ -8334,8 +8261,8 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
// Only do this if the dest type is a simple type, don't convert the
// expression tree to something weird like i93 unless the source is also
// strange.
- if ((isSafeIntegerType(DestTy->getScalarType()) ||
- !isSafeIntegerType(SrcI->getType()->getScalarType())) &&
+ if ((isa<VectorType>(DestTy) ||
+ ShouldChangeType(SrcI->getType(), DestTy, TD)) &&
CanEvaluateInDifferentType(SrcI, DestTy,
CI.getOpcode(), NumCastsRemoved)) {
// If this cast is a truncate, evaluting in a different type always
@@ -8356,6 +8283,7 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
break;
case Instruction::ZExt: {
DoXForm = NumCastsRemoved >= 1;
+
if (!DoXForm && 0) {
// If it's unnecessary to issue an AND to clear the high bits, it's
// always profitable to do this xform.
@@ -8522,7 +8450,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
return BinaryOperator::CreateLShr(V1, V2);
}
}
-
+
return 0;
}
@@ -10880,9 +10808,10 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
}
-// FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary"
-// operator and they all are only used by the PHI, PHI together their
-// inputs, and do the operation once, to the result of the PHI.
+
+/// FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary"
+/// operator and they all are only used by the PHI, PHI together their
+/// inputs, and do the operation once, to the result of the PHI.
Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
Instruction *FirstInst = cast<Instruction>(PN.getIncomingValue(0));
@@ -10900,6 +10829,13 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
if (isa<CastInst>(FirstInst)) {
CastSrcTy = FirstInst->getOperand(0)->getType();
+
+ // Be careful about transforming integer PHIs. We don't want to pessimize
+ // the code by turning an i32 into an i1293.
+ if (isa<IntegerType>(PN.getType()) && isa<IntegerType>(CastSrcTy)) {
+ if (!ShouldChangeType(PN.getType(), CastSrcTy, TD))
+ return 0;
+ }
} else if (isa<BinaryOperator>(FirstInst) || isa<CmpInst>(FirstInst)) {
// Can fold binop, compare or shift here if the RHS is a constant,
// otherwise call FoldPHIArgBinOpIntoPHI.
@@ -11012,6 +10948,222 @@ static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal,
}
+namespace {
+struct PHIUsageRecord {
+ unsigned PHIId; // The ID # of the PHI (something determinstic to sort on)
+ unsigned Shift; // The amount shifted.
+ Instruction *Inst; // The trunc instruction.
+
+ PHIUsageRecord(unsigned pn, unsigned Sh, Instruction *User)
+ : PHIId(pn), Shift(Sh), Inst(User) {}
+
+ bool operator<(const PHIUsageRecord &RHS) const {
+ if (PHIId < RHS.PHIId) return true;
+ if (PHIId > RHS.PHIId) return false;
+ if (Shift < RHS.Shift) return true;
+ if (Shift > RHS.Shift) return false;
+ return Inst->getType()->getPrimitiveSizeInBits() <
+ RHS.Inst->getType()->getPrimitiveSizeInBits();
+ }
+};
+
+struct LoweredPHIRecord {
+ PHINode *PN; // The PHI that was lowered.
+ unsigned Shift; // The amount shifted.
+ unsigned Width; // The width extracted.
+
+ LoweredPHIRecord(PHINode *pn, unsigned Sh, const Type *Ty)
+ : PN(pn), Shift(Sh), Width(Ty->getPrimitiveSizeInBits()) {}
+
+ // Ctor form used by DenseMap.
+ LoweredPHIRecord(PHINode *pn, unsigned Sh)
+ : PN(pn), Shift(Sh), Width(0) {}
+};
+}
+
+namespace llvm {
+ template<>
+ struct DenseMapInfo<LoweredPHIRecord> {
+ static inline LoweredPHIRecord getEmptyKey() {
+ return LoweredPHIRecord(0, 0);
+ }
+ static inline LoweredPHIRecord getTombstoneKey() {
+ return LoweredPHIRecord(0, 1);
+ }
+ static unsigned getHashValue(const LoweredPHIRecord &Val) {
+ return DenseMapInfo<PHINode*>::getHashValue(Val.PN) ^ (Val.Shift>>3) ^
+ (Val.Width>>3);
+ }
+ static bool isEqual(const LoweredPHIRecord &LHS,
+ const LoweredPHIRecord &RHS) {
+ return LHS.PN == RHS.PN && LHS.Shift == RHS.Shift &&
+ LHS.Width == RHS.Width;
+ }
+ static bool isPod() { return true; }
+ };
+}
+
+
+/// SliceUpIllegalIntegerPHI - This is an integer PHI and we know that it has an
+/// illegal type: see if it is only used by trunc or trunc(lshr) operations. If
+/// so, we split the PHI into the various pieces being extracted. This sort of
+/// thing is introduced when SROA promotes an aggregate to large integer values.
+///
+/// TODO: The user of the trunc may be an bitcast to float/double/vector or an
+/// inttoptr. We should produce new PHIs in the right type.
+///
+Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
+ // PHIUsers - Keep track of all of the truncated values extracted from a set
+ // of PHIs, along with their offset. These are the things we want to rewrite.
+ SmallVector<PHIUsageRecord, 16> PHIUsers;
+
+ // PHIs are often mutually cyclic, so we keep track of a whole set of PHI
+ // nodes which are extracted from. PHIsToSlice is a set we use to avoid
+ // revisiting PHIs, PHIsInspected is a ordered list of PHIs that we need to
+ // check the uses of (to ensure they are all extracts).
+ SmallVector<PHINode*, 8> PHIsToSlice;
+ SmallPtrSet<PHINode*, 8> PHIsInspected;
+
+ PHIsToSlice.push_back(&FirstPhi);
+ PHIsInspected.insert(&FirstPhi);
+
+ for (unsigned PHIId = 0; PHIId != PHIsToSlice.size(); ++PHIId) {
+ PHINode *PN = PHIsToSlice[PHIId];
+
+ for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end();
+ UI != E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+
+ // If the user is a PHI, inspect its uses recursively.
+ if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
+ if (PHIsInspected.insert(UserPN))
+ PHIsToSlice.push_back(UserPN);
+ continue;
+ }
+
+ // Truncates are always ok.
+ if (isa<TruncInst>(User)) {
+ PHIUsers.push_back(PHIUsageRecord(PHIId, 0, User));
+ continue;
+ }
+
+ // Otherwise it must be a lshr which can only be used by one trunc.
+ if (User->getOpcode() != Instruction::LShr ||
+ !User->hasOneUse() || !isa<TruncInst>(User->use_back()) ||
+ !isa<ConstantInt>(User->getOperand(1)))
+ return 0;
+
+ unsigned Shift = cast<ConstantInt>(User->getOperand(1))->getZExtValue();
+ PHIUsers.push_back(PHIUsageRecord(PHIId, Shift, User->use_back()));
+ }
+ }
+
+ // If we have no users, they must be all self uses, just nuke the PHI.
+ if (PHIUsers.empty())
+ return ReplaceInstUsesWith(FirstPhi, UndefValue::get(FirstPhi.getType()));
+
+ // If this phi node is transformable, create new PHIs for all the pieces
+ // extracted out of it. First, sort the users by their offset and size.
+ array_pod_sort(PHIUsers.begin(), PHIUsers.end());
+
+ DEBUG(errs() << "SLICING UP PHI: " << FirstPhi << '\n';
+ for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i)
+ errs() << "AND USER PHI #" << i << ": " << *PHIsToSlice[i] <<'\n';
+ );
+
+ // PredValues - This is a temporary used when rewriting PHI nodes. It is
+ // hoisted out here to avoid construction/destruction thrashing.
+ DenseMap<BasicBlock*, Value*> PredValues;
+
+ // ExtractedVals - Each new PHI we introduce is saved here so we don't
+ // introduce redundant PHIs.
+ DenseMap<LoweredPHIRecord, PHINode*> ExtractedVals;
+
+ for (unsigned UserI = 0, UserE = PHIUsers.size(); UserI != UserE; ++UserI) {
+ unsigned PHIId = PHIUsers[UserI].PHIId;
+ PHINode *PN = PHIsToSlice[PHIId];
+ unsigned Offset = PHIUsers[UserI].Shift;
+ const Type *Ty = PHIUsers[UserI].Inst->getType();
+
+ PHINode *EltPHI;
+
+ // If we've already lowered a user like this, reuse the previously lowered
+ // value.
+ if ((EltPHI = ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)]) == 0) {
+
+ // Otherwise, Create the new PHI node for this user.
+ EltPHI = PHINode::Create(Ty, PN->getName()+".off"+Twine(Offset), PN);
+ assert(EltPHI->getType() != PN->getType() &&
+ "Truncate didn't shrink phi?");
+
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *Pred = PN->getIncomingBlock(i);
+ Value *&PredVal = PredValues[Pred];
+
+ // If we already have a value for this predecessor, reuse it.
+ if (PredVal) {
+ EltPHI->addIncoming(PredVal, Pred);
+ continue;
+ }
+
+ // Handle the PHI self-reuse case.
+ Value *InVal = PN->getIncomingValue(i);
+ if (InVal == PN) {
+ PredVal = EltPHI;
+ EltPHI->addIncoming(PredVal, Pred);
+ continue;
+ } else if (PHINode *InPHI = dyn_cast<PHINode>(PN)) {
+ // If the incoming value was a PHI, and if it was one of the PHIs we
+ // already rewrote it, just use the lowered value.
+ if (Value *Res = ExtractedVals[LoweredPHIRecord(InPHI, Offset, Ty)]) {
+ PredVal = Res;
+ EltPHI->addIncoming(PredVal, Pred);
+ continue;
+ }
+ }
+
+ // Otherwise, do an extract in the predecessor.
+ Builder->SetInsertPoint(Pred, Pred->getTerminator());
+ Value *Res = InVal;
+ if (Offset)
+ Res = Builder->CreateLShr(Res, ConstantInt::get(InVal->getType(),
+ Offset), "extract");
+ Res = Builder->CreateTrunc(Res, Ty, "extract.t");
+ PredVal = Res;
+ EltPHI->addIncoming(Res, Pred);
+
+ // If the incoming value was a PHI, and if it was one of the PHIs we are
+ // rewriting, we will ultimately delete the code we inserted. This
+ // means we need to revisit that PHI to make sure we extract out the
+ // needed piece.
+ if (PHINode *OldInVal = dyn_cast<PHINode>(PN->getIncomingValue(i)))
+ if (PHIsInspected.count(OldInVal)) {
+ unsigned RefPHIId = std::find(PHIsToSlice.begin(),PHIsToSlice.end(),
+ OldInVal)-PHIsToSlice.begin();
+ PHIUsers.push_back(PHIUsageRecord(RefPHIId, Offset,
+ cast<Instruction>(Res)));
+ ++UserE;
+ }
+ }
+ PredValues.clear();
+
+ DEBUG(errs() << " Made element PHI for offset " << Offset << ": "
+ << *EltPHI << '\n');
+ ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)] = EltPHI;
+ }
+
+ // Replace the use of this piece with the PHI node.
+ ReplaceInstUsesWith(*PHIUsers[UserI].Inst, EltPHI);
+ }
+
+ // Replace all the remaining uses of the PHI nodes (self uses and the lshrs)
+ // with undefs.
+ Value *Undef = UndefValue::get(FirstPhi.getType());
+ for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i)
+ ReplaceInstUsesWith(*PHIsToSlice[i], Undef);
+ return ReplaceInstUsesWith(FirstPhi, Undef);
+}
+
// PHINode simplification
//
Instruction *InstCombiner::visitPHINode(PHINode &PN) {
@@ -11117,6 +11269,15 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
}
}
+ // If this is an integer PHI and we know that it has an illegal type, see if
+ // it is only used by trunc or trunc(lshr) operations. If so, we split the
+ // PHI into the various pieces being extracted. This sort of thing is
+ // introduced when SROA promotes an aggregate to a single large integer type.
+ if (isa<IntegerType>(PN.getType()) && TD &&
+ !TD->isLegalInteger(PN.getType()->getPrimitiveSizeInBits()))
+ if (Instruction *Res = SliceUpIllegalIntegerPHI(PN))
+ return Res;
+
return 0;
}
@@ -12210,6 +12371,47 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
return ExtractValueInst::Create(IV->getInsertedValueOperand(),
exti, exte);
}
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Agg)) {
+ // We're extracting from an intrinsic, see if we're the only user, which
+ // allows us to simplify multiple result intrinsics to simpler things that
+ // just get one value..
+ if (II->hasOneUse()) {
+ // Check if we're grabbing the overflow bit or the result of a 'with
+ // overflow' intrinsic. If it's the latter we can remove the intrinsic
+ // and replace it with a traditional binary instruction.
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::sadd_with_overflow:
+ if (*EV.idx_begin() == 0) { // Normal result.
+ Value *LHS = II->getOperand(1), *RHS = II->getOperand(2);
+ II->replaceAllUsesWith(UndefValue::get(II->getType()));
+ EraseInstFromFunction(*II);
+ return BinaryOperator::CreateAdd(LHS, RHS);
+ }
+ break;
+ case Intrinsic::usub_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ if (*EV.idx_begin() == 0) { // Normal result.
+ Value *LHS = II->getOperand(1), *RHS = II->getOperand(2);
+ II->replaceAllUsesWith(UndefValue::get(II->getType()));
+ EraseInstFromFunction(*II);
+ return BinaryOperator::CreateSub(LHS, RHS);
+ }
+ break;
+ case Intrinsic::umul_with_overflow:
+ case Intrinsic::smul_with_overflow:
+ if (*EV.idx_begin() == 0) { // Normal result.
+ Value *LHS = II->getOperand(1), *RHS = II->getOperand(2);
+ II->replaceAllUsesWith(UndefValue::get(II->getType()));
+ EraseInstFromFunction(*II);
+ return BinaryOperator::CreateMul(LHS, RHS);
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ }
// Can't simplify extracts from other values. Note that nested extracts are
// already simplified implicitely by the above (extract ( extract (insert) )
// will be translated into extract ( insert ( extract ) ) first and then just
@@ -12715,29 +12917,33 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
if (isa<UndefValue>(RHS)) {
std::vector<unsigned> LHSMask = getShuffleMask(LHSSVI);
- std::vector<unsigned> NewMask;
- for (unsigned i = 0, e = Mask.size(); i != e; ++i)
- if (Mask[i] >= 2*e)
- NewMask.push_back(2*e);
- else
- NewMask.push_back(LHSMask[Mask[i]]);
+ if (LHSMask.size() == Mask.size()) {
+ std::vector<unsigned> NewMask;
+ for (unsigned i = 0, e = Mask.size(); i != e; ++i)
+ if (Mask[i] >= 2*e)
+ NewMask.push_back(2*e);
+ else
+ NewMask.push_back(LHSMask[Mask[i]]);
- // If the result mask is equal to the src shuffle or this shuffle mask, do
- // the replacement.
- if (NewMask == LHSMask || NewMask == Mask) {
- unsigned LHSInNElts =
- cast<VectorType>(LHSSVI->getOperand(0)->getType())->getNumElements();
- std::vector<Constant*> Elts;
- for (unsigned i = 0, e = NewMask.size(); i != e; ++i) {
- if (NewMask[i] >= LHSInNElts*2) {
- Elts.push_back(UndefValue::get(Type::getInt32Ty(*Context)));
- } else {
- Elts.push_back(ConstantInt::get(Type::getInt32Ty(*Context), NewMask[i]));
+ // If the result mask is equal to the src shuffle or this
+ // shuffle mask, do the replacement.
+ if (NewMask == LHSMask || NewMask == Mask) {
+ unsigned LHSInNElts =
+ cast<VectorType>(LHSSVI->getOperand(0)->getType())->
+ getNumElements();
+ std::vector<Constant*> Elts;
+ for (unsigned i = 0, e = NewMask.size(); i != e; ++i) {
+ if (NewMask[i] >= LHSInNElts*2) {
+ Elts.push_back(UndefValue::get(Type::getInt32Ty(*Context)));
+ } else {
+ Elts.push_back(ConstantInt::get(Type::getInt32Ty(*Context),
+ NewMask[i]));
+ }
}
+ return new ShuffleVectorInst(LHSSVI->getOperand(0),
+ LHSSVI->getOperand(1),
+ ConstantVector::get(Elts));
}
- return new ShuffleVectorInst(LHSSVI->getOperand(0),
- LHSSVI->getOperand(1),
- ConstantVector::get(Elts));
}
}
}
@@ -12824,7 +13030,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
// ConstantProp instruction if trivially constant.
if (!Inst->use_empty() && isa<Constant>(Inst->getOperand(0)))
- if (Constant *C = ConstantFoldInstruction(Inst, BB->getContext(), TD)) {
+ if (Constant *C = ConstantFoldInstruction(Inst, TD)) {
DEBUG(errs() << "IC: ConstFold to: " << *C << " from: "
<< *Inst << '\n');
Inst->replaceAllUsesWith(C);
@@ -12846,8 +13052,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
if (!FoldedConstants.insert(CE))
continue;
- Constant *NewC =
- ConstantFoldConstantExpression(CE, BB->getContext(), TD);
+ Constant *NewC = ConstantFoldConstantExpression(CE, TD);
if (NewC && NewC != CE) {
*i = NewC;
MadeIRChange = true;
@@ -12954,7 +13159,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
// Instruction isn't dead, see if we can constant propagate it.
if (!I->use_empty() && isa<Constant>(I->getOperand(0)))
- if (Constant *C = ConstantFoldInstruction(I, F.getContext(), TD)) {
+ if (Constant *C = ConstantFoldInstruction(I, TD)) {
DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n');
// Add operands to the worklist.
@@ -13065,7 +13270,7 @@ bool InstCombiner::runOnFunction(Function &F) {
/// Builder - This is an IRBuilder that automatically inserts new
/// instructions into the worklist when they are created.
IRBuilder<true, TargetFolder, InstCombineIRInserter>
- TheBuilder(F.getContext(), TargetFolder(TD, F.getContext()),
+ TheBuilder(F.getContext(), TargetFolder(TD),
InstCombineIRInserter(Worklist));
Builder = &TheBuilder;
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 10c9ec6..5864113 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -16,7 +16,8 @@
#include "llvm/IntrinsicInst.h"
#include "llvm/LLVMContext.h"
#include "llvm/Pass.h"
-#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LazyValueInfo.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
@@ -40,6 +41,12 @@ Threshold("jump-threading-threshold",
cl::desc("Max block size to duplicate for jump threading"),
cl::init(6), cl::Hidden);
+// Turn on use of LazyValueInfo.
+static cl::opt<bool>
+EnableLVI("enable-jump-threading-lvi", cl::ReallyHidden);
+
+
+
namespace {
/// This pass performs 'jump threading', which looks at blocks that have
/// multiple predecessors and multiple successors. If one or more of the
@@ -59,6 +66,7 @@ namespace {
///
class JumpThreading : public FunctionPass {
TargetData *TD;
+ LazyValueInfo *LVI;
#ifdef NDEBUG
SmallPtrSet<BasicBlock*, 16> LoopHeaders;
#else
@@ -69,20 +77,31 @@ namespace {
JumpThreading() : FunctionPass(&ID) {}
bool runOnFunction(Function &F);
- void FindLoopHeaders(Function &F);
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ if (EnableLVI)
+ AU.addRequired<LazyValueInfo>();
+ }
+
+ void FindLoopHeaders(Function &F);
bool ProcessBlock(BasicBlock *BB);
- bool ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, BasicBlock *SuccBB);
+ bool ThreadEdge(BasicBlock *BB, const SmallVectorImpl<BasicBlock*> &PredBBs,
+ BasicBlock *SuccBB);
bool DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
BasicBlock *PredBB);
-
- BasicBlock *FactorCommonPHIPreds(PHINode *PN, Value *Val);
+
+ typedef SmallVectorImpl<std::pair<ConstantInt*,
+ BasicBlock*> > PredValueInfo;
+
+ bool ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,
+ PredValueInfo &Result);
+ bool ProcessThreadableEdges(Value *Cond, BasicBlock *BB);
+
+
bool ProcessBranchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB);
bool ProcessSwitchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB);
bool ProcessJumpOnPHI(PHINode *PN);
- bool ProcessBranchOnLogical(Value *V, BasicBlock *BB, bool isAnd);
- bool ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB);
bool SimplifyPartiallyRedundantLoad(LoadInst *LI);
};
@@ -100,6 +119,7 @@ FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); }
bool JumpThreading::runOnFunction(Function &F) {
DEBUG(errs() << "Jump threading on function '" << F.getName() << "'\n");
TD = getAnalysisIfAvailable<TargetData>();
+ LVI = EnableLVI ? &getAnalysis<LazyValueInfo>() : 0;
FindLoopHeaders(F);
@@ -109,6 +129,7 @@ bool JumpThreading::runOnFunction(Function &F) {
bool Changed = false;
for (Function::iterator I = F.begin(), E = F.end(); I != E;) {
BasicBlock *BB = I;
+ // Thread all of the branches we can over this block.
while (ProcessBlock(BB))
Changed = true;
@@ -123,6 +144,29 @@ bool JumpThreading::runOnFunction(Function &F) {
LoopHeaders.erase(BB);
DeleteDeadBlock(BB);
Changed = true;
+ } else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
+ // Can't thread an unconditional jump, but if the block is "almost
+ // empty", we can replace uses of it with uses of the successor and make
+ // this dead.
+ if (BI->isUnconditional() &&
+ BB != &BB->getParent()->getEntryBlock()) {
+ BasicBlock::iterator BBI = BB->getFirstNonPHI();
+ // Ignore dbg intrinsics.
+ while (isa<DbgInfoIntrinsic>(BBI))
+ ++BBI;
+ // If the terminator is the only non-phi instruction, try to nuke it.
+ if (BBI->isTerminator()) {
+ // Since TryToSimplifyUncondBranchFromEmptyBlock may delete the
+ // block, we have to make sure it isn't in the LoopHeaders set. We
+ // reinsert afterward in the rare case when the block isn't deleted.
+ bool ErasedFromLoopHeaders = LoopHeaders.erase(BB);
+
+ if (TryToSimplifyUncondBranchFromEmptyBlock(BB))
+ Changed = true;
+ else if (ErasedFromLoopHeaders)
+ LoopHeaders.insert(BB);
+ }
+ }
}
}
AnotherIteration = Changed;
@@ -139,6 +183,10 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) {
/// Ignore PHI nodes, these will be flattened when duplication happens.
BasicBlock::const_iterator I = BB->getFirstNonPHI();
+ // FIXME: THREADING will delete values that are just used to compute the
+ // branch, so they shouldn't count against the duplication cost.
+
+
// Sum up the cost of each instruction until we get to the terminator. Don't
// include the terminator because the copy won't include it.
unsigned Size = 0;
@@ -173,8 +221,6 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) {
return Size;
}
-
-
/// FindLoopHeaders - We do not want jump threading to turn proper loop
/// structures into irreducible loops. Doing this breaks up the loop nesting
/// hierarchy and pessimizes later transformations. To prevent this from
@@ -198,29 +244,181 @@ void JumpThreading::FindLoopHeaders(Function &F) {
LoopHeaders.insert(const_cast<BasicBlock*>(Edges[i].second));
}
-
-/// FactorCommonPHIPreds - If there are multiple preds with the same incoming
-/// value for the PHI, factor them together so we get one block to thread for
-/// the whole group.
-/// This is important for things like "phi i1 [true, true, false, true, x]"
-/// where we only need to clone the block for the true blocks once.
+/// ComputeValueKnownInPredecessors - Given a basic block BB and a value V, see
+/// if we can infer that the value is a known ConstantInt in any of our
+/// predecessors. If so, return the known list of value and pred BB in the
+/// result vector. If a value is known to be undef, it is returned as null.
+///
+/// This returns true if there were any known values.
///
-BasicBlock *JumpThreading::FactorCommonPHIPreds(PHINode *PN, Value *Val) {
- SmallVector<BasicBlock*, 16> CommonPreds;
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- if (PN->getIncomingValue(i) == Val)
- CommonPreds.push_back(PN->getIncomingBlock(i));
-
- if (CommonPreds.size() == 1)
- return CommonPreds[0];
+bool JumpThreading::
+ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
+ // If V is a constantint, then it is known in all predecessors.
+ if (isa<ConstantInt>(V) || isa<UndefValue>(V)) {
+ ConstantInt *CI = dyn_cast<ConstantInt>(V);
- DEBUG(errs() << " Factoring out " << CommonPreds.size()
- << " common predecessors.\n");
- return SplitBlockPredecessors(PN->getParent(),
- &CommonPreds[0], CommonPreds.size(),
- ".thr_comm", this);
-}
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+ Result.push_back(std::make_pair(CI, *PI));
+ return true;
+ }
+
+ // If V is a non-instruction value, or an instruction in a different block,
+ // then it can't be derived from a PHI.
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (I == 0 || I->getParent() != BB) {
+
+ // Okay, if this is a live-in value, see if it has a known value at the end
+ // of any of our predecessors.
+ //
+ // FIXME: This should be an edge property, not a block end property.
+ /// TODO: Per PR2563, we could infer value range information about a
+ /// predecessor based on its terminator.
+ //
+ if (LVI) {
+ // FIXME: change this to use the more-rich 'getPredicateOnEdge' method if
+ // "I" is a non-local compare-with-a-constant instruction. This would be
+ // able to handle value inequalities better, for example if the compare is
+ // "X < 4" and "X < 3" is known true but "X < 4" itself is not available.
+ // Perhaps getConstantOnEdge should be smart enough to do this?
+
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ // If the value is known by LazyValueInfo to be a constant in a
+ // predecessor, use that information to try to thread this block.
+ Constant *PredCst = LVI->getConstantOnEdge(V, *PI, BB);
+ if (PredCst == 0 ||
+ (!isa<ConstantInt>(PredCst) && !isa<UndefValue>(PredCst)))
+ continue;
+
+ Result.push_back(std::make_pair(dyn_cast<ConstantInt>(PredCst), *PI));
+ }
+
+ return !Result.empty();
+ }
+
+ return false;
+ }
+
+ /// If I is a PHI node, then we know the incoming values for any constants.
+ if (PHINode *PN = dyn_cast<PHINode>(I)) {
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *InVal = PN->getIncomingValue(i);
+ if (isa<ConstantInt>(InVal) || isa<UndefValue>(InVal)) {
+ ConstantInt *CI = dyn_cast<ConstantInt>(InVal);
+ Result.push_back(std::make_pair(CI, PN->getIncomingBlock(i)));
+ }
+ }
+ return !Result.empty();
+ }
+
+ SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> LHSVals, RHSVals;
+
+ // Handle some boolean conditions.
+ if (I->getType()->getPrimitiveSizeInBits() == 1) {
+ // X | true -> true
+ // X & false -> false
+ if (I->getOpcode() == Instruction::Or ||
+ I->getOpcode() == Instruction::And) {
+ ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals);
+ ComputeValueKnownInPredecessors(I->getOperand(1), BB, RHSVals);
+
+ if (LHSVals.empty() && RHSVals.empty())
+ return false;
+
+ ConstantInt *InterestingVal;
+ if (I->getOpcode() == Instruction::Or)
+ InterestingVal = ConstantInt::getTrue(I->getContext());
+ else
+ InterestingVal = ConstantInt::getFalse(I->getContext());
+
+ // Scan for the sentinel.
+ for (unsigned i = 0, e = LHSVals.size(); i != e; ++i)
+ if (LHSVals[i].first == InterestingVal || LHSVals[i].first == 0)
+ Result.push_back(LHSVals[i]);
+ for (unsigned i = 0, e = RHSVals.size(); i != e; ++i)
+ if (RHSVals[i].first == InterestingVal || RHSVals[i].first == 0)
+ Result.push_back(RHSVals[i]);
+ return !Result.empty();
+ }
+
+ // Handle the NOT form of XOR.
+ if (I->getOpcode() == Instruction::Xor &&
+ isa<ConstantInt>(I->getOperand(1)) &&
+ cast<ConstantInt>(I->getOperand(1))->isOne()) {
+ ComputeValueKnownInPredecessors(I->getOperand(0), BB, Result);
+ if (Result.empty())
+ return false;
+
+ // Invert the known values.
+ for (unsigned i = 0, e = Result.size(); i != e; ++i)
+ if (Result[i].first)
+ Result[i].first =
+ cast<ConstantInt>(ConstantExpr::getNot(Result[i].first));
+ return true;
+ }
+ }
+ // Handle compare with phi operand, where the PHI is defined in this block.
+ if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) {
+ PHINode *PN = dyn_cast<PHINode>(Cmp->getOperand(0));
+ if (PN && PN->getParent() == BB) {
+ // We can do this simplification if any comparisons fold to true or false.
+ // See if any do.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *PredBB = PN->getIncomingBlock(i);
+ Value *LHS = PN->getIncomingValue(i);
+ Value *RHS = Cmp->getOperand(1)->DoPHITranslation(BB, PredBB);
+
+ Value *Res = SimplifyCmpInst(Cmp->getPredicate(), LHS, RHS, TD);
+ if (Res == 0) {
+ if (!LVI || !isa<Constant>(RHS))
+ continue;
+
+ LazyValueInfo::Tristate
+ ResT = LVI->getPredicateOnEdge(Cmp->getPredicate(), LHS,
+ cast<Constant>(RHS), PredBB, BB);
+ if (ResT == LazyValueInfo::Unknown)
+ continue;
+ Res = ConstantInt::get(Type::getInt1Ty(LHS->getContext()), ResT);
+ }
+
+ if (isa<UndefValue>(Res))
+ Result.push_back(std::make_pair((ConstantInt*)0, PredBB));
+ else if (ConstantInt *CI = dyn_cast<ConstantInt>(Res))
+ Result.push_back(std::make_pair(CI, PredBB));
+ }
+
+ return !Result.empty();
+ }
+
+
+ // If comparing a live-in value against a constant, see if we know the
+ // live-in value on any predecessors.
+ if (LVI && isa<Constant>(Cmp->getOperand(1)) &&
+ Cmp->getType()->isInteger() && // Not vector compare.
+ (!isa<Instruction>(Cmp->getOperand(0)) ||
+ cast<Instruction>(Cmp->getOperand(0))->getParent() != BB)) {
+ Constant *RHSCst = cast<Constant>(Cmp->getOperand(1));
+
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ // If the value is known by LazyValueInfo to be a constant in a
+ // predecessor, use that information to try to thread this block.
+ LazyValueInfo::Tristate
+ Res = LVI->getPredicateOnEdge(Cmp->getPredicate(), Cmp->getOperand(0),
+ RHSCst, *PI, BB);
+ if (Res == LazyValueInfo::Unknown)
+ continue;
+
+ Constant *ResC = ConstantInt::get(Cmp->getType(), Res);
+ Result.push_back(std::make_pair(cast<ConstantInt>(ResC), *PI));
+ }
+
+ return !Result.empty();
+ }
+ }
+ return false;
+}
+
+
/// GetBestDestForBranchOnUndef - If we determine that the specified block ends
/// in an undefined jump, decide which block is best to revector to.
@@ -251,7 +449,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
// successor, merge the blocks. This encourages recursive jump threading
// because now the condition in this block can be threaded through
// predecessors of our predecessor block.
- if (BasicBlock *SinglePred = BB->getSinglePredecessor())
+ if (BasicBlock *SinglePred = BB->getSinglePredecessor()) {
if (SinglePred->getTerminator()->getNumSuccessors() == 1 &&
SinglePred != BB) {
// If SinglePred was a loop header, BB becomes one.
@@ -267,10 +465,10 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
BB->moveBefore(&BB->getParent()->getEntryBlock());
return true;
}
-
- // See if this block ends with a branch or switch. If so, see if the
- // condition is a phi node. If so, and if an entry of the phi node is a
- // constant, we can thread the block.
+ }
+
+ // Look to see if the terminator is a branch of switch, if not we can't thread
+ // it.
Value *Condition;
if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
// Can't thread an unconditional jump.
@@ -301,7 +499,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
TerminatorInst *BBTerm = BB->getTerminator();
for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) {
if (i == BestSucc) continue;
- BBTerm->getSuccessor(i)->removePredecessor(BB);
+ RemovePredecessorAndSimplify(BBTerm->getSuccessor(i), BB, TD);
}
DEBUG(errs() << " In block '" << BB->getName()
@@ -318,7 +516,8 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
// br COND, BBX, BBY
// BBX:
// br COND, BBZ, BBW
- if (!Condition->hasOneUse() && // Multiple uses.
+ if (!LVI &&
+ !Condition->hasOneUse() && // Multiple uses.
(CondInst == 0 || CondInst->getParent() != BB)) { // Non-local definition.
pred_iterator PI = pred_begin(BB), E = pred_end(BB);
if (isa<BranchInst>(BB->getTerminator())) {
@@ -338,52 +537,40 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
}
// All the rest of our checks depend on the condition being an instruction.
- if (CondInst == 0)
+ if (CondInst == 0) {
+ // FIXME: Unify this with code below.
+ if (LVI && ProcessThreadableEdges(Condition, BB))
+ return true;
return false;
+ }
+
// See if this is a phi node in the current block.
if (PHINode *PN = dyn_cast<PHINode>(CondInst))
if (PN->getParent() == BB)
return ProcessJumpOnPHI(PN);
- // If this is a conditional branch whose condition is and/or of a phi, try to
- // simplify it.
- if ((CondInst->getOpcode() == Instruction::And ||
- CondInst->getOpcode() == Instruction::Or) &&
- isa<BranchInst>(BB->getTerminator()) &&
- ProcessBranchOnLogical(CondInst, BB,
- CondInst->getOpcode() == Instruction::And))
- return true;
-
if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondInst)) {
- if (isa<PHINode>(CondCmp->getOperand(0))) {
- // If we have "br (phi != 42)" and the phi node has any constant values
- // as operands, we can thread through this block.
- //
- // If we have "br (cmp phi, x)" and the phi node contains x such that the
- // comparison uniquely identifies the branch target, we can thread
- // through this block.
-
- if (ProcessBranchOnCompare(CondCmp, BB))
- return true;
- }
-
- // If we have a comparison, loop over the predecessors to see if there is
- // a condition with the same value.
- pred_iterator PI = pred_begin(BB), E = pred_end(BB);
- for (; PI != E; ++PI)
- if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
- if (PBI->isConditional() && *PI != BB) {
- if (CmpInst *CI = dyn_cast<CmpInst>(PBI->getCondition())) {
- if (CI->getOperand(0) == CondCmp->getOperand(0) &&
- CI->getOperand(1) == CondCmp->getOperand(1) &&
- CI->getPredicate() == CondCmp->getPredicate()) {
- // TODO: Could handle things like (x != 4) --> (x == 17)
- if (ProcessBranchOnDuplicateCond(*PI, BB))
- return true;
+ if (!LVI &&
+ (!isa<PHINode>(CondCmp->getOperand(0)) ||
+ cast<PHINode>(CondCmp->getOperand(0))->getParent() != BB)) {
+ // If we have a comparison, loop over the predecessors to see if there is
+ // a condition with a lexically identical value.
+ pred_iterator PI = pred_begin(BB), E = pred_end(BB);
+ for (; PI != E; ++PI)
+ if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
+ if (PBI->isConditional() && *PI != BB) {
+ if (CmpInst *CI = dyn_cast<CmpInst>(PBI->getCondition())) {
+ if (CI->getOperand(0) == CondCmp->getOperand(0) &&
+ CI->getOperand(1) == CondCmp->getOperand(1) &&
+ CI->getPredicate() == CondCmp->getPredicate()) {
+ // TODO: Could handle things like (x != 4) --> (x == 17)
+ if (ProcessBranchOnDuplicateCond(*PI, BB))
+ return true;
+ }
}
}
- }
+ }
}
// Check for some cases that are worth simplifying. Right now we want to look
@@ -398,10 +585,21 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
if (isa<Constant>(CondCmp->getOperand(1)))
SimplifyValue = CondCmp->getOperand(0);
+ // TODO: There are other places where load PRE would be profitable, such as
+ // more complex comparisons.
if (LoadInst *LI = dyn_cast<LoadInst>(SimplifyValue))
if (SimplifyPartiallyRedundantLoad(LI))
return true;
+
+ // Handle a variety of cases where we are branching on something derived from
+ // a PHI node in the current block. If we can prove that any predecessors
+ // compute a predictable value based on a PHI node, thread those predecessors.
+ //
+ if (ProcessThreadableEdges(CondInst, BB))
+ return true;
+
+
// TODO: If we have: "br (X > 0)" and we have a predecessor where we know
// "(X == 4)" thread through this block.
@@ -459,8 +657,11 @@ bool JumpThreading::ProcessBranchOnDuplicateCond(BasicBlock *PredBB,
// Next, figure out which successor we are threading to.
BasicBlock *SuccBB = DestBI->getSuccessor(!BranchDir);
+ SmallVector<BasicBlock*, 2> Preds;
+ Preds.push_back(PredBB);
+
// Ok, try to thread it!
- return ThreadEdge(BB, PredBB, SuccBB);
+ return ThreadEdge(BB, Preds, SuccBB);
}
/// ProcessSwitchOnDuplicateCond - We found a block and a predecessor of that
@@ -553,7 +754,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
Value *LoadedPtr = LI->getOperand(0);
// If the loaded operand is defined in the LoadBB, it can't be available.
- // FIXME: Could do PHI translation, that would be fun :)
+ // TODO: Could do simple PHI translation, that would be fun :)
if (Instruction *PtrOp = dyn_cast<Instruction>(LoadedPtr))
if (PtrOp->getParent() == LoadBB)
return false;
@@ -562,8 +763,8 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
// the entry to its block.
BasicBlock::iterator BBIt = LI;
- if (Value *AvailableVal = FindAvailableLoadedValue(LoadedPtr, LoadBB,
- BBIt, 6)) {
+ if (Value *AvailableVal =
+ FindAvailableLoadedValue(LoadedPtr, LoadBB, BBIt, 6)) {
// If the value if the load is locally available within the block, just use
// it. This frequently occurs for reg2mem'd allocas.
//cerr << "LOAD ELIMINATED:\n" << *BBIt << *LI << "\n";
@@ -646,7 +847,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
// Split them out to their own block.
UnavailablePred =
SplitBlockPredecessors(LoadBB, &PredsToSplit[0], PredsToSplit.size(),
- "thread-split", this);
+ "thread-pre-split", this);
}
// If the value isn't available in all predecessors, then there will be
@@ -655,7 +856,8 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
if (UnavailablePred) {
assert(UnavailablePred->getTerminator()->getNumSuccessors() == 1 &&
"Can't handle critical edge here!");
- Value *NewVal = new LoadInst(LoadedPtr, LI->getName()+".pr",
+ Value *NewVal = new LoadInst(LoadedPtr, LI->getName()+".pr", false,
+ LI->getAlignment(),
UnavailablePred->getTerminator());
AvailablePreds.push_back(std::make_pair(UnavailablePred, NewVal));
}
@@ -690,55 +892,183 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
return true;
}
-
-/// ProcessJumpOnPHI - We have a conditional branch or switch on a PHI node in
-/// the current block. See if there are any simplifications we can do based on
-/// inputs to the phi node.
-///
-bool JumpThreading::ProcessJumpOnPHI(PHINode *PN) {
- BasicBlock *BB = PN->getParent();
+/// FindMostPopularDest - The specified list contains multiple possible
+/// threadable destinations. Pick the one that occurs the most frequently in
+/// the list.
+static BasicBlock *
+FindMostPopularDest(BasicBlock *BB,
+ const SmallVectorImpl<std::pair<BasicBlock*,
+ BasicBlock*> > &PredToDestList) {
+ assert(!PredToDestList.empty());
+
+ // Determine popularity. If there are multiple possible destinations, we
+ // explicitly choose to ignore 'undef' destinations. We prefer to thread
+ // blocks with known and real destinations to threading undef. We'll handle
+ // them later if interesting.
+ DenseMap<BasicBlock*, unsigned> DestPopularity;
+ for (unsigned i = 0, e = PredToDestList.size(); i != e; ++i)
+ if (PredToDestList[i].second)
+ DestPopularity[PredToDestList[i].second]++;
+
+ // Find the most popular dest.
+ DenseMap<BasicBlock*, unsigned>::iterator DPI = DestPopularity.begin();
+ BasicBlock *MostPopularDest = DPI->first;
+ unsigned Popularity = DPI->second;
+ SmallVector<BasicBlock*, 4> SamePopularity;
+
+ for (++DPI; DPI != DestPopularity.end(); ++DPI) {
+ // If the popularity of this entry isn't higher than the popularity we've
+ // seen so far, ignore it.
+ if (DPI->second < Popularity)
+ ; // ignore.
+ else if (DPI->second == Popularity) {
+ // If it is the same as what we've seen so far, keep track of it.
+ SamePopularity.push_back(DPI->first);
+ } else {
+ // If it is more popular, remember it.
+ SamePopularity.clear();
+ MostPopularDest = DPI->first;
+ Popularity = DPI->second;
+ }
+ }
- // See if the phi node has any constant integer or undef values. If so, we
- // can determine where the corresponding predecessor will branch.
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- Value *PredVal = PN->getIncomingValue(i);
-
- // Check to see if this input is a constant integer. If so, the direction
- // of the branch is predictable.
- if (ConstantInt *CI = dyn_cast<ConstantInt>(PredVal)) {
- // Merge any common predecessors that will act the same.
- BasicBlock *PredBB = FactorCommonPHIPreds(PN, CI);
+ // Okay, now we know the most popular destination. If there is more than
+ // destination, we need to determine one. This is arbitrary, but we need
+ // to make a deterministic decision. Pick the first one that appears in the
+ // successor list.
+ if (!SamePopularity.empty()) {
+ SamePopularity.push_back(MostPopularDest);
+ TerminatorInst *TI = BB->getTerminator();
+ for (unsigned i = 0; ; ++i) {
+ assert(i != TI->getNumSuccessors() && "Didn't find any successor!");
- BasicBlock *SuccBB;
- if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()))
- SuccBB = BI->getSuccessor(CI->isZero());
- else {
- SwitchInst *SI = cast<SwitchInst>(BB->getTerminator());
- SuccBB = SI->getSuccessor(SI->findCaseValue(CI));
- }
+ if (std::find(SamePopularity.begin(), SamePopularity.end(),
+ TI->getSuccessor(i)) == SamePopularity.end())
+ continue;
- // Ok, try to thread it!
- return ThreadEdge(BB, PredBB, SuccBB);
+ MostPopularDest = TI->getSuccessor(i);
+ break;
}
+ }
+
+ // Okay, we have finally picked the most popular destination.
+ return MostPopularDest;
+}
+
+bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB) {
+ // If threading this would thread across a loop header, don't even try to
+ // thread the edge.
+ if (LoopHeaders.count(BB))
+ return false;
+
+ SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> PredValues;
+ if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues))
+ return false;
+ assert(!PredValues.empty() &&
+ "ComputeValueKnownInPredecessors returned true with no values");
+
+ DEBUG(errs() << "IN BB: " << *BB;
+ for (unsigned i = 0, e = PredValues.size(); i != e; ++i) {
+ errs() << " BB '" << BB->getName() << "': FOUND condition = ";
+ if (PredValues[i].first)
+ errs() << *PredValues[i].first;
+ else
+ errs() << "UNDEF";
+ errs() << " for pred '" << PredValues[i].second->getName()
+ << "'.\n";
+ });
+
+ // Decide what we want to thread through. Convert our list of known values to
+ // a list of known destinations for each pred. This also discards duplicate
+ // predecessors and keeps track of the undefined inputs (which are represented
+ // as a null dest in the PredToDestList).
+ SmallPtrSet<BasicBlock*, 16> SeenPreds;
+ SmallVector<std::pair<BasicBlock*, BasicBlock*>, 16> PredToDestList;
+
+ BasicBlock *OnlyDest = 0;
+ BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL;
+
+ for (unsigned i = 0, e = PredValues.size(); i != e; ++i) {
+ BasicBlock *Pred = PredValues[i].second;
+ if (!SeenPreds.insert(Pred))
+ continue; // Duplicate predecessor entry.
- // If the input is an undef, then it doesn't matter which way it will go.
- // Pick an arbitrary dest and thread the edge.
- if (UndefValue *UV = dyn_cast<UndefValue>(PredVal)) {
- // Merge any common predecessors that will act the same.
- BasicBlock *PredBB = FactorCommonPHIPreds(PN, UV);
- BasicBlock *SuccBB =
- BB->getTerminator()->getSuccessor(GetBestDestForJumpOnUndef(BB));
-
- // Ok, try to thread it!
- return ThreadEdge(BB, PredBB, SuccBB);
+ // If the predecessor ends with an indirect goto, we can't change its
+ // destination.
+ if (isa<IndirectBrInst>(Pred->getTerminator()))
+ continue;
+
+ ConstantInt *Val = PredValues[i].first;
+
+ BasicBlock *DestBB;
+ if (Val == 0) // Undef.
+ DestBB = 0;
+ else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()))
+ DestBB = BI->getSuccessor(Val->isZero());
+ else {
+ SwitchInst *SI = cast<SwitchInst>(BB->getTerminator());
+ DestBB = SI->getSuccessor(SI->findCaseValue(Val));
}
+
+ // If we have exactly one destination, remember it for efficiency below.
+ if (i == 0)
+ OnlyDest = DestBB;
+ else if (OnlyDest != DestBB)
+ OnlyDest = MultipleDestSentinel;
+
+ PredToDestList.push_back(std::make_pair(Pred, DestBB));
}
- // If the incoming values are all variables, we don't know the destination of
- // any predecessors. However, if any of the predecessor blocks end in an
- // unconditional branch, we can *duplicate* the jump into that block in order
- // to further encourage jump threading and to eliminate cases where we have
- // branch on a phi of an icmp (branch on icmp is much better).
+ // If all edges were unthreadable, we fail.
+ if (PredToDestList.empty())
+ return false;
+
+ // Determine which is the most common successor. If we have many inputs and
+ // this block is a switch, we want to start by threading the batch that goes
+ // to the most popular destination first. If we only know about one
+ // threadable destination (the common case) we can avoid this.
+ BasicBlock *MostPopularDest = OnlyDest;
+
+ if (MostPopularDest == MultipleDestSentinel)
+ MostPopularDest = FindMostPopularDest(BB, PredToDestList);
+
+ // Now that we know what the most popular destination is, factor all
+ // predecessors that will jump to it into a single predecessor.
+ SmallVector<BasicBlock*, 16> PredsToFactor;
+ for (unsigned i = 0, e = PredToDestList.size(); i != e; ++i)
+ if (PredToDestList[i].second == MostPopularDest) {
+ BasicBlock *Pred = PredToDestList[i].first;
+
+ // This predecessor may be a switch or something else that has multiple
+ // edges to the block. Factor each of these edges by listing them
+ // according to # occurrences in PredsToFactor.
+ TerminatorInst *PredTI = Pred->getTerminator();
+ for (unsigned i = 0, e = PredTI->getNumSuccessors(); i != e; ++i)
+ if (PredTI->getSuccessor(i) == BB)
+ PredsToFactor.push_back(Pred);
+ }
+
+ // If the threadable edges are branching on an undefined value, we get to pick
+ // the destination that these predecessors should get to.
+ if (MostPopularDest == 0)
+ MostPopularDest = BB->getTerminator()->
+ getSuccessor(GetBestDestForJumpOnUndef(BB));
+
+ // Ok, try to thread it!
+ return ThreadEdge(BB, PredsToFactor, MostPopularDest);
+}
+
+/// ProcessJumpOnPHI - We have a conditional branch or switch on a PHI node in
+/// the current block. See if there are any simplifications we can do based on
+/// inputs to the phi node.
+///
+bool JumpThreading::ProcessJumpOnPHI(PHINode *PN) {
+ BasicBlock *BB = PN->getParent();
+
+ // If any of the predecessor blocks end in an unconditional branch, we can
+ // *duplicate* the jump into that block in order to further encourage jump
+ // threading and to eliminate cases where we have branch on a phi of an icmp
+ // (branch on icmp is much better).
// We don't want to do this tranformation for switches, because we don't
// really want to duplicate a switch.
@@ -759,137 +1089,6 @@ bool JumpThreading::ProcessJumpOnPHI(PHINode *PN) {
}
-/// ProcessJumpOnLogicalPHI - PN's basic block contains a conditional branch
-/// whose condition is an AND/OR where one side is PN. If PN has constant
-/// operands that permit us to evaluate the condition for some operand, thread
-/// through the block. For example with:
-/// br (and X, phi(Y, Z, false))
-/// the predecessor corresponding to the 'false' will always jump to the false
-/// destination of the branch.
-///
-bool JumpThreading::ProcessBranchOnLogical(Value *V, BasicBlock *BB,
- bool isAnd) {
- // If this is a binary operator tree of the same AND/OR opcode, check the
- // LHS/RHS.
- if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V))
- if ((isAnd && BO->getOpcode() == Instruction::And) ||
- (!isAnd && BO->getOpcode() == Instruction::Or)) {
- if (ProcessBranchOnLogical(BO->getOperand(0), BB, isAnd))
- return true;
- if (ProcessBranchOnLogical(BO->getOperand(1), BB, isAnd))
- return true;
- }
-
- // If this isn't a PHI node, we can't handle it.
- PHINode *PN = dyn_cast<PHINode>(V);
- if (!PN || PN->getParent() != BB) return false;
-
- // We can only do the simplification for phi nodes of 'false' with AND or
- // 'true' with OR. See if we have any entries in the phi for this.
- unsigned PredNo = ~0U;
- ConstantInt *PredCst = ConstantInt::get(Type::getInt1Ty(BB->getContext()),
- !isAnd);
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- if (PN->getIncomingValue(i) == PredCst) {
- PredNo = i;
- break;
- }
- }
-
- // If no match, bail out.
- if (PredNo == ~0U)
- return false;
-
- // If so, we can actually do this threading. Merge any common predecessors
- // that will act the same.
- BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredCst);
-
- // Next, figure out which successor we are threading to. If this was an AND,
- // the constant must be FALSE, and we must be targeting the 'false' block.
- // If this is an OR, the constant must be TRUE, and we must be targeting the
- // 'true' block.
- BasicBlock *SuccBB = BB->getTerminator()->getSuccessor(isAnd);
-
- // Ok, try to thread it!
- return ThreadEdge(BB, PredBB, SuccBB);
-}
-
-/// GetResultOfComparison - Given an icmp/fcmp predicate and the left and right
-/// hand sides of the compare instruction, try to determine the result. If the
-/// result can not be determined, a null pointer is returned.
-static Constant *GetResultOfComparison(CmpInst::Predicate pred,
- Value *LHS, Value *RHS,
- LLVMContext &Context) {
- if (Constant *CLHS = dyn_cast<Constant>(LHS))
- if (Constant *CRHS = dyn_cast<Constant>(RHS))
- return ConstantExpr::getCompare(pred, CLHS, CRHS);
-
- if (LHS == RHS)
- if (isa<IntegerType>(LHS->getType()) || isa<PointerType>(LHS->getType()))
- return ICmpInst::isTrueWhenEqual(pred) ?
- ConstantInt::getTrue(Context) : ConstantInt::getFalse(Context);
-
- return 0;
-}
-
-/// ProcessBranchOnCompare - We found a branch on a comparison between a phi
-/// node and a value. If we can identify when the comparison is true between
-/// the phi inputs and the value, we can fold the compare for that edge and
-/// thread through it.
-bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) {
- PHINode *PN = cast<PHINode>(Cmp->getOperand(0));
- Value *RHS = Cmp->getOperand(1);
-
- // If the phi isn't in the current block, an incoming edge to this block
- // doesn't control the destination.
- if (PN->getParent() != BB)
- return false;
-
- // We can do this simplification if any comparisons fold to true or false.
- // See if any do.
- Value *PredVal = 0;
- bool TrueDirection = false;
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- PredVal = PN->getIncomingValue(i);
-
- Constant *Res = GetResultOfComparison(Cmp->getPredicate(), PredVal,
- RHS, Cmp->getContext());
- if (!Res) {
- PredVal = 0;
- continue;
- }
-
- // If this folded to a constant expr, we can't do anything.
- if (ConstantInt *ResC = dyn_cast<ConstantInt>(Res)) {
- TrueDirection = ResC->getZExtValue();
- break;
- }
- // If this folded to undef, just go the false way.
- if (isa<UndefValue>(Res)) {
- TrueDirection = false;
- break;
- }
-
- // Otherwise, we can't fold this input.
- PredVal = 0;
- }
-
- // If no match, bail out.
- if (PredVal == 0)
- return false;
-
- // If so, we can actually do this threading. Merge any common predecessors
- // that will act the same.
- BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredVal);
-
- // Next, get our successor.
- BasicBlock *SuccBB = BB->getTerminator()->getSuccessor(!TrueDirection);
-
- // Ok, try to thread it!
- return ThreadEdge(BB, PredBB, SuccBB);
-}
-
-
/// AddPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new
/// predecessor to the PHIBB block. If it has PHI nodes, add entries for
/// NewPred using the entries from OldPred (suitably mapped).
@@ -914,10 +1113,11 @@ static void AddPHINodeEntriesForMappedBlock(BasicBlock *PHIBB,
}
}
-/// ThreadEdge - We have decided that it is safe and profitable to thread an
-/// edge from PredBB to SuccBB across BB. Transform the IR to reflect this
-/// change.
-bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB,
+/// ThreadEdge - We have decided that it is safe and profitable to factor the
+/// blocks in PredBBs to one predecessor, then thread an edge from it to SuccBB
+/// across BB. Transform the IR to reflect this change.
+bool JumpThreading::ThreadEdge(BasicBlock *BB,
+ const SmallVectorImpl<BasicBlock*> &PredBBs,
BasicBlock *SuccBB) {
// If threading to the same block as we come from, we would infinite loop.
if (SuccBB == BB) {
@@ -929,8 +1129,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB,
// If threading this would thread across a loop header, don't thread the edge.
// See the comments above FindLoopHeaders for justifications and caveats.
if (LoopHeaders.count(BB)) {
- DEBUG(errs() << " Not threading from '" << PredBB->getName()
- << "' across loop header BB '" << BB->getName()
+ DEBUG(errs() << " Not threading across loop header BB '" << BB->getName()
<< "' to dest BB '" << SuccBB->getName()
<< "' - it might create an irreducible loop!\n");
return false;
@@ -943,6 +1142,17 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB,
return false;
}
+ // And finally, do it! Start by factoring the predecessors is needed.
+ BasicBlock *PredBB;
+ if (PredBBs.size() == 1)
+ PredBB = PredBBs[0];
+ else {
+ DEBUG(errs() << " Factoring out " << PredBBs.size()
+ << " common predecessors.\n");
+ PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(),
+ ".thr_comm", this);
+ }
+
// And finally, do it!
DEBUG(errs() << " Threading edge from '" << PredBB->getName() << "' to '"
<< SuccBB->getName() << "' with cost: " << JumpThreadCost
@@ -1034,7 +1244,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB,
TerminatorInst *PredTerm = PredBB->getTerminator();
for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i)
if (PredTerm->getSuccessor(i) == BB) {
- BB->removePredecessor(PredBB);
+ RemovePredecessorAndSimplify(BB, PredBB, TD);
PredTerm->setSuccessor(i, NewBB);
}
@@ -1044,9 +1254,12 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB,
BI = NewBB->begin();
for (BasicBlock::iterator E = NewBB->end(); BI != E; ) {
Instruction *Inst = BI++;
- if (Constant *C = ConstantFoldInstruction(Inst, BB->getContext(), TD)) {
- Inst->replaceAllUsesWith(C);
- Inst->eraseFromParent();
+
+ if (Value *V = SimplifyInstruction(Inst, TD)) {
+ WeakVH BIHandle(BI);
+ ReplaceAndSimplifyAllUses(Inst, V, TD);
+ if (BIHandle == 0)
+ BI = NewBB->begin();
continue;
}
@@ -1164,7 +1377,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
// PredBB no longer jumps to BB, remove entries in the PHI node for the edge
// that we nuked.
- BB->removePredecessor(PredBB);
+ RemovePredecessorAndSimplify(BB, PredBB, TD);
// Remove the unconditional branch at the end of the PredBB block.
OldPredBranch->eraseFromParent();
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index 756fbf3..104c873 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -263,7 +263,6 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
// Get the preheader block to move instructions into...
Preheader = L->getLoopPreheader();
- assert(Preheader&&"Preheader insertion pass guarantees we have a preheader!");
// Loop over the body of this loop, looking for calls, invokes, and stores.
// Because subloops have already been incorporated into AST, we skip blocks in
@@ -286,12 +285,14 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
// us to sink instructions in one pass, without iteration. After sinking
// instructions, we perform another pass to hoist them out of the loop.
//
- SinkRegion(DT->getNode(L->getHeader()));
- HoistRegion(DT->getNode(L->getHeader()));
+ if (L->hasDedicatedExits())
+ SinkRegion(DT->getNode(L->getHeader()));
+ if (Preheader)
+ HoistRegion(DT->getNode(L->getHeader()));
// Now that all loop invariants have been removed from the loop, promote any
// memory references to scalars that we can...
- if (!DisablePromotion)
+ if (!DisablePromotion && Preheader && L->hasDedicatedExits())
PromoteValuesInLoop();
// Clear out loops state information for the next iteration
diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp
index 866d8b4..48817ab 100644
--- a/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -115,6 +115,10 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
if (!preheader)
return false;
+ // If LoopSimplify form is not available, stay out of trouble.
+ if (!L->hasDedicatedExits())
+ return false;
+
// We can't remove loops that contain subloops. If the subloops were dead,
// they would already have been removed in earlier executions of this pass.
if (L->begin() != L->end())
diff --git a/lib/Transforms/Scalar/LoopIndexSplit.cpp b/lib/Transforms/Scalar/LoopIndexSplit.cpp
index 920d85c..8b6a233 100644
--- a/lib/Transforms/Scalar/LoopIndexSplit.cpp
+++ b/lib/Transforms/Scalar/LoopIndexSplit.cpp
@@ -209,6 +209,10 @@ bool LoopIndexSplit::runOnLoop(Loop *IncomingLoop, LPPassManager &LPM_Ref) {
L = IncomingLoop;
LPM = &LPM_Ref;
+ // If LoopSimplify form is not available, stay out of trouble.
+ if (!L->isLoopSimplifyForm())
+ return false;
+
// FIXME - Nested loops make dominator info updates tricky.
if (!L->getSubLoops().empty())
return false;
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index 7a4bb35..5004483 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -15,7 +15,6 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Function.h"
#include "llvm/IntrinsicInst.h"
-#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/ScalarEvolution.h"
@@ -49,6 +48,7 @@ namespace {
AU.addRequiredID(LCSSAID);
AU.addPreservedID(LCSSAID);
AU.addPreserved<ScalarEvolution>();
+ AU.addRequired<LoopInfo>();
AU.addPreserved<LoopInfo>();
AU.addPreserved<DominatorTree>();
AU.addPreserved<DominanceFrontier>();
@@ -104,17 +104,18 @@ bool LoopRotate::runOnLoop(Loop *Lp, LPPassManager &LPM) {
bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {
L = Lp;
- OrigHeader = L->getHeader();
OrigPreHeader = L->getLoopPreheader();
+ if (!OrigPreHeader) return false;
+
OrigLatch = L->getLoopLatch();
+ if (!OrigLatch) return false;
+
+ OrigHeader = L->getHeader();
// If the loop has only one block then there is not much to rotate.
if (L->getBlocks().size() == 1)
return false;
- assert(OrigHeader && OrigLatch && OrigPreHeader &&
- "Loop is not in canonical form");
-
// If the loop header is not one of the loop exiting blocks then
// either this loop is already rotated or it is not
// suitable for loop rotation transformations.
@@ -287,7 +288,7 @@ void LoopRotate::preserveCanonicalLoopForm(LPPassManager &LPM) {
"bb.nph",
OrigHeader->getParent(),
NewHeader);
- LoopInfo &LI = LPM.getAnalysis<LoopInfo>();
+ LoopInfo &LI = getAnalysis<LoopInfo>();
if (Loop *PL = LI.getLoopFor(OrigPreHeader))
PL->addBasicBlockToLoop(NewPreHeader, LI.getBase());
BranchInst::Create(NewHeader, NewPreHeader);
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index e20fb16..564c7ac 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -51,6 +51,7 @@ STATISTIC(NumEliminated, "Number of strides eliminated");
STATISTIC(NumShadow, "Number of Shadow IVs optimized");
STATISTIC(NumImmSunk, "Number of common expr immediates sunk into uses");
STATISTIC(NumLoopCond, "Number of loop terminating conds optimized");
+STATISTIC(NumCountZero, "Number of count iv optimized to count toward zero");
static cl::opt<bool> EnableFullLSRMode("enable-full-lsr",
cl::init(false),
@@ -107,7 +108,7 @@ namespace {
public:
static char ID; // Pass ID, replacement for typeid
- explicit LoopStrengthReduce(const TargetLowering *tli = NULL) :
+ explicit LoopStrengthReduce(const TargetLowering *tli = NULL) :
LoopPass(&ID), TLI(tli) {
}
@@ -131,12 +132,10 @@ namespace {
}
private:
- ICmpInst *ChangeCompareStride(Loop *L, ICmpInst *Cond,
- IVStrideUse* &CondUse,
- const SCEV *const * &CondStride);
-
void OptimizeIndvars(Loop *L);
- void OptimizeLoopCountIV(Loop *L);
+
+ /// OptimizeLoopTermCond - Change loop terminating condition to use the
+ /// postinc iv when possible.
void OptimizeLoopTermCond(Loop *L);
/// OptimizeShadowIV - If IV is used in a int-to-float cast
@@ -148,8 +147,28 @@ namespace {
ICmpInst *OptimizeMax(Loop *L, ICmpInst *Cond,
IVStrideUse* &CondUse);
+ /// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for
+ /// deciding when to exit the loop is used only for that purpose, try to
+ /// rearrange things so it counts down to a test against zero.
+ bool OptimizeLoopCountIV(Loop *L);
+ bool OptimizeLoopCountIVOfStride(const SCEV* &Stride,
+ IVStrideUse* &CondUse, Loop *L);
+
+ /// StrengthReduceIVUsersOfStride - Strength reduce all of the users of a
+ /// single stride of IV. All of the users may have different starting
+ /// values, and this may not be the only stride.
+ void StrengthReduceIVUsersOfStride(const SCEV *const &Stride,
+ IVUsersOfOneStride &Uses,
+ Loop *L);
+ void StrengthReduceIVUsers(Loop *L);
+
+ ICmpInst *ChangeCompareStride(Loop *L, ICmpInst *Cond,
+ IVStrideUse* &CondUse,
+ const SCEV* &CondStride,
+ bool PostPass = false);
+
bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse,
- const SCEV *const * &CondStride);
+ const SCEV* &CondStride);
bool RequiresTypeConversion(const Type *Ty, const Type *NewTy);
const SCEV *CheckForIVReuse(bool, bool, bool, const SCEV *const&,
IVExpr&, const Type*,
@@ -164,6 +183,7 @@ namespace {
bool &AllUsesAreAddresses,
bool &AllUsesAreOutsideLoop,
std::vector<BasedUser> &UsersToProcess);
+ bool StrideMightBeShared(const SCEV *Stride, Loop *L, bool CheckPreInc);
bool ShouldUseFullStrengthReductionMode(
const std::vector<BasedUser> &UsersToProcess,
const Loop *L,
@@ -188,9 +208,7 @@ namespace {
Instruction *IVIncInsertPt,
const Loop *L,
SCEVExpander &PreheaderRewriter);
- void StrengthReduceStridedIVUsers(const SCEV *const &Stride,
- IVUsersOfOneStride &Uses,
- Loop *L);
+
void DeleteTriviallyDeadInstructions();
};
}
@@ -208,11 +226,11 @@ Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) {
/// their operands subsequently dead.
void LoopStrengthReduce::DeleteTriviallyDeadInstructions() {
if (DeadInsts.empty()) return;
-
+
while (!DeadInsts.empty()) {
Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.back());
DeadInsts.pop_back();
-
+
if (I == 0 || !isInstructionTriviallyDead(I))
continue;
@@ -223,14 +241,14 @@ void LoopStrengthReduce::DeleteTriviallyDeadInstructions() {
DeadInsts.push_back(U);
}
}
-
+
I->eraseFromParent();
Changed = true;
}
}
-/// containsAddRecFromDifferentLoop - Determine whether expression S involves a
-/// subexpression that is an AddRec from a loop other than L. An outer loop
+/// containsAddRecFromDifferentLoop - Determine whether expression S involves a
+/// subexpression that is an AddRec from a loop other than L. An outer loop
/// of L is OK, but not an inner loop nor a disjoint loop.
static bool containsAddRecFromDifferentLoop(const SCEV *S, Loop *L) {
// This is very common, put it first.
@@ -256,7 +274,7 @@ static bool containsAddRecFromDifferentLoop(const SCEV *S, Loop *L) {
return containsAddRecFromDifferentLoop(DE->getLHS(), L) ||
containsAddRecFromDifferentLoop(DE->getRHS(), L);
#if 0
- // SCEVSDivExpr has been backed out temporarily, but will be back; we'll
+ // SCEVSDivExpr has been backed out temporarily, but will be back; we'll
// need this when it is.
if (const SCEVSDivExpr *DE = dyn_cast<SCEVSDivExpr>(S))
return containsAddRecFromDifferentLoop(DE->getLHS(), L) ||
@@ -328,7 +346,7 @@ namespace {
/// field to the Imm field (below). BasedUser values are sorted by this
/// field.
const SCEV *Base;
-
+
/// Inst - The instruction using the induction variable.
Instruction *Inst;
@@ -352,11 +370,11 @@ namespace {
// instruction for a loop and uses outside the loop that are dominated by
// the loop.
bool isUseOfPostIncrementedValue;
-
+
BasedUser(IVStrideUse &IVSU, ScalarEvolution *se)
: SE(se), Base(IVSU.getOffset()), Inst(IVSU.getUser()),
OperandValToReplace(IVSU.getOperandValToReplace()),
- Imm(SE->getIntegerSCEV(0, Base->getType())),
+ Imm(SE->getIntegerSCEV(0, Base->getType())),
isUseOfPostIncrementedValue(IVSU.isUseOfPostIncrementedValue()) {}
// Once we rewrite the code to insert the new IVs we want, update the
@@ -367,8 +385,8 @@ namespace {
SCEVExpander &Rewriter, Loop *L, Pass *P,
LoopInfo &LI,
SmallVectorImpl<WeakVH> &DeadInsts);
-
- Value *InsertCodeForBaseAtPosition(const SCEV *const &NewBase,
+
+ Value *InsertCodeForBaseAtPosition(const SCEV *const &NewBase,
const Type *Ty,
SCEVExpander &Rewriter,
Instruction *IP, Loop *L,
@@ -383,7 +401,7 @@ void BasedUser::dump() const {
errs() << " Inst: " << *Inst;
}
-Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *const &NewBase,
+Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *const &NewBase,
const Type *Ty,
SCEVExpander &Rewriter,
Instruction *IP, Loop *L,
@@ -393,10 +411,10 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *const &NewBase,
// want to insert this expression before the user, we'd rather pull it out as
// many loops as possible.
Instruction *BaseInsertPt = IP;
-
+
// Figure out the most-nested loop that IP is in.
Loop *InsertLoop = LI.getLoopFor(IP->getParent());
-
+
// If InsertLoop is not L, and InsertLoop is nested inside of L, figure out
// the preheader of the outer-most loop where NewBase is not loop invariant.
if (L->contains(IP->getParent()))
@@ -404,7 +422,7 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *const &NewBase,
BaseInsertPt = InsertLoop->getLoopPreheader()->getTerminator();
InsertLoop = InsertLoop->getParentLoop();
}
-
+
Value *Base = Rewriter.expandCodeFor(NewBase, 0, BaseInsertPt);
const SCEV *NewValSCEV = SE->getUnknown(Base);
@@ -430,7 +448,7 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase,
if (!isa<PHINode>(Inst)) {
// By default, insert code at the user instruction.
BasicBlock::iterator InsertPt = Inst;
-
+
// However, if the Operand is itself an instruction, the (potentially
// complex) inserted code may be shared by many users. Because of this, we
// want to emit code for the computation of the operand right before its old
@@ -442,7 +460,7 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase,
//
// If this is a use outside the loop (which means after, since it is based
// on a loop indvar) we use the post-incremented value, so that we don't
- // artificially make the preinc value live out the bottom of the loop.
+ // artificially make the preinc value live out the bottom of the loop.
if (!isUseOfPostIncrementedValue && L->contains(Inst->getParent())) {
if (NewBasePt && isa<PHINode>(OperandValToReplace)) {
InsertPt = NewBasePt;
@@ -477,7 +495,7 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase,
if (PN->getIncomingValue(i) == OperandValToReplace) {
// If the original expression is outside the loop, put the replacement
// code in the same place as the original expression,
- // which need not be an immediate predecessor of this PHI. This way we
+ // which need not be an immediate predecessor of this PHI. This way we
// need only one copy of it even if it is referenced multiple times in
// the PHI. We don't do this when the original expression is inside the
// loop because multiple copies sometimes do useful sinking of code in
@@ -490,6 +508,7 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase,
// is the canonical backedge for this loop, as this can make some
// inserted code be in an illegal position.
if (e != 1 && PHIPred->getTerminator()->getNumSuccessors() > 1 &&
+ !isa<IndirectBrInst>(PHIPred->getTerminator()) &&
(PN->getParent() != L->getHeader() || !L->contains(PHIPred))) {
// First step, split the critical edge.
@@ -572,11 +591,11 @@ static bool fitsInAddressMode(const SCEV *const &V, const Type *AccessTy,
static void MoveLoopVariantsToImmediateField(const SCEV *&Val, const SCEV *&Imm,
Loop *L, ScalarEvolution *SE) {
if (Val->isLoopInvariant(L)) return; // Nothing to do.
-
+
if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) {
SmallVector<const SCEV *, 4> NewOps;
NewOps.reserve(SAE->getNumOperands());
-
+
for (unsigned i = 0; i != SAE->getNumOperands(); ++i)
if (!SAE->getOperand(i)->isLoopInvariant(L)) {
// If this is a loop-variant expression, it must stay in the immediate
@@ -594,7 +613,7 @@ static void MoveLoopVariantsToImmediateField(const SCEV *&Val, const SCEV *&Imm,
// Try to pull immediates out of the start value of nested addrec's.
const SCEV *Start = SARE->getStart();
MoveLoopVariantsToImmediateField(Start, Imm, L, SE);
-
+
SmallVector<const SCEV *, 4> Ops(SARE->op_begin(), SARE->op_end());
Ops[0] = Start;
Val = SE->getAddRecExpr(Ops, SARE->getLoop());
@@ -617,11 +636,11 @@ static void MoveImmediateValues(const TargetLowering *TLI,
if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) {
SmallVector<const SCEV *, 4> NewOps;
NewOps.reserve(SAE->getNumOperands());
-
+
for (unsigned i = 0; i != SAE->getNumOperands(); ++i) {
const SCEV *NewOp = SAE->getOperand(i);
MoveImmediateValues(TLI, AccessTy, NewOp, Imm, isAddress, L, SE);
-
+
if (!NewOp->isLoopInvariant(L)) {
// If this is a loop-variant expression, it must stay in the immediate
// field of the expression.
@@ -640,7 +659,7 @@ static void MoveImmediateValues(const TargetLowering *TLI,
// Try to pull immediates out of the start value of nested addrec's.
const SCEV *Start = SARE->getStart();
MoveImmediateValues(TLI, AccessTy, Start, Imm, isAddress, L, SE);
-
+
if (Start != SARE->getStart()) {
SmallVector<const SCEV *, 4> Ops(SARE->op_begin(), SARE->op_end());
Ops[0] = Start;
@@ -656,8 +675,8 @@ static void MoveImmediateValues(const TargetLowering *TLI,
const SCEV *SubImm = SE->getIntegerSCEV(0, Val->getType());
const SCEV *NewOp = SME->getOperand(1);
MoveImmediateValues(TLI, AccessTy, NewOp, SubImm, isAddress, L, SE);
-
- // If we extracted something out of the subexpressions, see if we can
+
+ // If we extracted something out of the subexpressions, see if we can
// simplify this!
if (NewOp != SME->getOperand(1)) {
// Scale SubImm up by "8". If the result is a target constant, we are
@@ -666,7 +685,7 @@ static void MoveImmediateValues(const TargetLowering *TLI,
if (fitsInAddressMode(SubImm, AccessTy, TLI, false)) {
// Accumulate the immediate.
Imm = SE->getAddExpr(Imm, SubImm);
-
+
// Update what is left of 'Val'.
Val = SE->getMulExpr(SME->getOperand(0), NewOp);
return;
@@ -714,7 +733,7 @@ static void SeparateSubExprs(SmallVector<const SCEV *, 16> &SubExprs,
SmallVector<const SCEV *, 4> Ops(SARE->op_begin(), SARE->op_end());
Ops[0] = Zero; // Start with zero base.
SubExprs.push_back(SE->getAddRecExpr(Ops, SARE->getLoop()));
-
+
SeparateSubExprs(SubExprs, SARE->getOperand(0), SE);
}
@@ -724,7 +743,7 @@ static void SeparateSubExprs(SmallVector<const SCEV *, 16> &SubExprs,
}
}
-// This is logically local to the following function, but C++ says we have
+// This is logically local to the following function, but C++ says we have
// to make it file scope.
struct SubExprUseData { unsigned Count; bool notAllUsesAreFree; };
@@ -762,7 +781,7 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,
// an addressing mode "for free"; such expressions are left within the loop.
// struct SubExprUseData { unsigned Count; bool notAllUsesAreFree; };
std::map<const SCEV *, SubExprUseData> SubExpressionUseData;
-
+
// UniqueSubExprs - Keep track of all of the subexpressions we see in the
// order we see them.
SmallVector<const SCEV *, 16> UniqueSubExprs;
@@ -779,7 +798,7 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,
if (!L->contains(Uses[i].Inst->getParent()))
continue;
NumUsesInsideLoop++;
-
+
// If the base is zero (which is common), return zero now, there are no
// CSEs we can find.
if (Uses[i].Base == Zero) return Zero;
@@ -811,13 +830,13 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,
// Now that we know how many times each is used, build Result. Iterate over
// UniqueSubexprs so that we have a stable ordering.
for (unsigned i = 0, e = UniqueSubExprs.size(); i != e; ++i) {
- std::map<const SCEV *, SubExprUseData>::iterator I =
+ std::map<const SCEV *, SubExprUseData>::iterator I =
SubExpressionUseData.find(UniqueSubExprs[i]);
assert(I != SubExpressionUseData.end() && "Entry not found?");
- if (I->second.Count == NumUsesInsideLoop) { // Found CSE!
+ if (I->second.Count == NumUsesInsideLoop) { // Found CSE!
if (I->second.notAllUsesAreFree)
Result = SE->getAddExpr(Result, I->first);
- else
+ else
FreeResult = SE->getAddExpr(FreeResult, I->first);
} else
// Remove non-cse's from SubExpressionUseData.
@@ -849,13 +868,13 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,
// If we found no CSE's, return now.
if (Result == Zero) return Result;
-
+
// If we still have a FreeResult, remove its subexpressions from
// SubExpressionUseData. This means they will remain in the use Bases.
if (FreeResult != Zero) {
SeparateSubExprs(SubExprs, FreeResult, SE);
for (unsigned j = 0, e = SubExprs.size(); j != e; ++j) {
- std::map<const SCEV *, SubExprUseData>::iterator I =
+ std::map<const SCEV *, SubExprUseData>::iterator I =
SubExpressionUseData.find(SubExprs[j]);
SubExpressionUseData.erase(I);
}
@@ -882,7 +901,7 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,
SubExprs.erase(SubExprs.begin()+j);
--j; --e;
}
-
+
// Finally, add the non-shared expressions together.
if (SubExprs.empty())
Uses[i].Base = Zero;
@@ -890,11 +909,11 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,
Uses[i].Base = SE->getAddExpr(SubExprs);
SubExprs.clear();
}
-
+
return Result;
}
-/// ValidScale - Check whether the given Scale is valid for all loads and
+/// ValidScale - Check whether the given Scale is valid for all loads and
/// stores in UsersToProcess.
///
bool LoopStrengthReduce::ValidScale(bool HasBaseReg, int64_t Scale,
@@ -911,7 +930,7 @@ bool LoopStrengthReduce::ValidScale(bool HasBaseReg, int64_t Scale,
AccessTy = getAccessType(UsersToProcess[i].Inst);
else if (isa<PHINode>(UsersToProcess[i].Inst))
continue;
-
+
TargetLowering::AddrMode AM;
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(UsersToProcess[i].Imm))
AM.BaseOffs = SC->getValue()->getSExtValue();
@@ -983,13 +1002,13 @@ bool LoopStrengthReduce::RequiresTypeConversion(const Type *Ty1,
/// reuse is possible. Factors can be negative on same targets, e.g. ARM.
///
/// If all uses are outside the loop, we don't require that all multiplies
-/// be folded into the addressing mode, nor even that the factor be constant;
-/// a multiply (executed once) outside the loop is better than another IV
+/// be folded into the addressing mode, nor even that the factor be constant;
+/// a multiply (executed once) outside the loop is better than another IV
/// within. Well, usually.
const SCEV *LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
bool AllUsesAreAddresses,
bool AllUsesAreOutsideLoop,
- const SCEV *const &Stride,
+ const SCEV *const &Stride,
IVExpr &IV, const Type *Ty,
const std::vector<BasedUser>& UsersToProcess) {
if (StrideNoReuse.count(Stride))
@@ -999,11 +1018,16 @@ const SCEV *LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
int64_t SInt = SC->getValue()->getSExtValue();
for (unsigned NewStride = 0, e = IU->StrideOrder.size();
NewStride != e; ++NewStride) {
- std::map<const SCEV *, IVsOfOneStride>::iterator SI =
+ std::map<const SCEV *, IVsOfOneStride>::iterator SI =
IVsByStride.find(IU->StrideOrder[NewStride]);
if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first) ||
StrideNoReuse.count(SI->first))
continue;
+ // The other stride has no uses, don't reuse it.
+ std::map<const SCEV *, IVUsersOfOneStride *>::iterator UI =
+ IU->IVUsesByStride.find(IU->StrideOrder[NewStride]);
+ if (UI->second->Users.empty())
+ continue;
int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
if (SI->first != Stride &&
(unsigned(abs64(SInt)) < SSInt || (SInt % SSInt) != 0))
@@ -1052,7 +1076,7 @@ const SCEV *LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
// an existing IV if we can.
for (unsigned NewStride = 0, e = IU->StrideOrder.size();
NewStride != e; ++NewStride) {
- std::map<const SCEV *, IVsOfOneStride>::iterator SI =
+ std::map<const SCEV *, IVsOfOneStride>::iterator SI =
IVsByStride.find(IU->StrideOrder[NewStride]);
if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first))
continue;
@@ -1072,9 +1096,9 @@ const SCEV *LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
// -1*old.
for (unsigned NewStride = 0, e = IU->StrideOrder.size();
NewStride != e; ++NewStride) {
- std::map<const SCEV *, IVsOfOneStride>::iterator SI =
+ std::map<const SCEV *, IVsOfOneStride>::iterator SI =
IVsByStride.find(IU->StrideOrder[NewStride]);
- if (SI == IVsByStride.end())
+ if (SI == IVsByStride.end())
continue;
if (const SCEVMulExpr *ME = dyn_cast<SCEVMulExpr>(SI->first))
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(ME->getOperand(0)))
@@ -1104,18 +1128,18 @@ static bool PartitionByIsUseOfPostIncrementedValue(const BasedUser &Val) {
static bool isNonConstantNegative(const SCEV *const &Expr) {
const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Expr);
if (!Mul) return false;
-
+
// If there is a constant factor, it will be first.
const SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0));
if (!SC) return false;
-
+
// Return true if the value is negative, this matches things like (-42 * V).
return SC->getValue()->getValue().isNegative();
}
/// CollectIVUsers - Transform our list of users and offsets to a bit more
-/// complex table. In this new vector, each 'BasedUser' contains 'Base', the base
-/// of the strided accesses, as well as the old information from Uses. We
+/// complex table. In this new vector, each 'BasedUser' contains 'Base', the
+/// base of the strided accesses, as well as the old information from Uses. We
/// progressively move information from the Base field to the Imm field, until
/// we eventually have the full access expression to rewrite the use.
const SCEV *LoopStrengthReduce::CollectIVUsers(const SCEV *const &Stride,
@@ -1145,7 +1169,7 @@ const SCEV *LoopStrengthReduce::CollectIVUsers(const SCEV *const &Stride,
// We now have a whole bunch of uses of like-strided induction variables, but
// they might all have different bases. We want to emit one PHI node for this
// stride which we fold as many common expressions (between the IVs) into as
- // possible. Start by identifying the common expressions in the base values
+ // possible. Start by identifying the common expressions in the base values
// for the strides (e.g. if we have "A+C+B" and "A+B+D" as our bases, find
// "A+B"), emit it to the preheader, then remove the expression from the
// UsersToProcess base values.
@@ -1165,11 +1189,11 @@ const SCEV *LoopStrengthReduce::CollectIVUsers(const SCEV *const &Stride,
if (!L->contains(UsersToProcess[i].Inst->getParent())) {
UsersToProcess[i].Imm = SE->getAddExpr(UsersToProcess[i].Imm,
UsersToProcess[i].Base);
- UsersToProcess[i].Base =
+ UsersToProcess[i].Base =
SE->getIntegerSCEV(0, UsersToProcess[i].Base->getType());
} else {
// Not all uses are outside the loop.
- AllUsesAreOutsideLoop = false;
+ AllUsesAreOutsideLoop = false;
// Addressing modes can be folded into loads and stores. Be careful that
// the store is through the expression, not of the expression though.
@@ -1183,11 +1207,11 @@ const SCEV *LoopStrengthReduce::CollectIVUsers(const SCEV *const &Stride,
if (isAddress)
HasAddress = true;
-
+
// If this use isn't an address, then not all uses are addresses.
if (!isAddress && !isPHI)
AllUsesAreAddresses = false;
-
+
MoveImmediateValues(TLI, UsersToProcess[i].Inst, UsersToProcess[i].Base,
UsersToProcess[i].Imm, isAddress, L, SE);
}
@@ -1198,7 +1222,7 @@ const SCEV *LoopStrengthReduce::CollectIVUsers(const SCEV *const &Stride,
// for one fewer iv.
if (NumPHI > 1)
AllUsesAreAddresses = false;
-
+
// There are no in-loop address uses.
if (AllUsesAreAddresses && (!HasAddress && !AllUsesAreOutsideLoop))
AllUsesAreAddresses = false;
@@ -1491,12 +1515,13 @@ static bool IsImmFoldedIntoAddrMode(GlobalValue *GV, int64_t Offset,
return true;
}
-/// StrengthReduceStridedIVUsers - Strength reduce all of the users of a single
+/// StrengthReduceIVUsersOfStride - Strength reduce all of the users of a single
/// stride of IV. All of the users may have different starting values, and this
/// may not be the only stride.
-void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV *const &Stride,
- IVUsersOfOneStride &Uses,
- Loop *L) {
+void
+LoopStrengthReduce::StrengthReduceIVUsersOfStride(const SCEV *const &Stride,
+ IVUsersOfOneStride &Uses,
+ Loop *L) {
// If all the users are moved to another stride, then there is nothing to do.
if (Uses.Users.empty())
return;
@@ -1518,8 +1543,8 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV *const &Stride,
// have the full access expression to rewrite the use.
std::vector<BasedUser> UsersToProcess;
const SCEV *CommonExprs = CollectIVUsers(Stride, Uses, L, AllUsesAreAddresses,
- AllUsesAreOutsideLoop,
- UsersToProcess);
+ AllUsesAreOutsideLoop,
+ UsersToProcess);
// Sort the UsersToProcess array so that users with common bases are
// next to each other.
@@ -1588,12 +1613,12 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV *const &Stride,
const SCEV *RewriteFactor = SE->getIntegerSCEV(0, ReplacedTy);
IVExpr ReuseIV(SE->getIntegerSCEV(0,
Type::getInt32Ty(Preheader->getContext())),
- SE->getIntegerSCEV(0,
+ SE->getIntegerSCEV(0,
Type::getInt32Ty(Preheader->getContext())),
0);
- /// Choose a strength-reduction strategy and prepare for it by creating
- /// the necessary PHIs and adjusting the bookkeeping.
+ // Choose a strength-reduction strategy and prepare for it by creating
+ // the necessary PHIs and adjusting the bookkeeping.
if (ShouldUseFullStrengthReductionMode(UsersToProcess, L,
AllUsesAreAddresses, Stride)) {
PrepareToStrengthReduceFully(UsersToProcess, Stride, CommonExprs, L,
@@ -1606,7 +1631,7 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV *const &Stride,
// If all uses are addresses, check if it is possible to reuse an IV. The
// new IV must have a stride that is a multiple of the old stride; the
// multiple must be a number that can be encoded in the scale field of the
- // target addressing mode; and we must have a valid instruction after this
+ // target addressing mode; and we must have a valid instruction after this
// substitution, including the immediate field, if any.
RewriteFactor = CheckForIVReuse(HaveCommonExprs, AllUsesAreAddresses,
AllUsesAreOutsideLoop,
@@ -1649,7 +1674,7 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV *const &Stride,
// We want this constant emitted into the preheader! This is just
// using cast as a copy so BitCast (no-op cast) is appropriate
BaseV = new BitCastInst(BaseV, BaseV->getType(), "preheaderinsert",
- PreInsertPt);
+ PreInsertPt);
}
}
@@ -1723,7 +1748,7 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV *const &Stride,
assert(SE->getTypeSizeInBits(RewriteExpr->getType()) <
SE->getTypeSizeInBits(ReuseIV.Base->getType()) &&
"Unexpected lengthening conversion!");
- typedBase = SE->getTruncateExpr(ReuseIV.Base,
+ typedBase = SE->getTruncateExpr(ReuseIV.Base,
RewriteExpr->getType());
}
RewriteExpr = SE->getMinusSCEV(RewriteExpr, typedBase);
@@ -1775,11 +1800,29 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV *const &Stride,
// different starting values, into different PHIs.
}
+void LoopStrengthReduce::StrengthReduceIVUsers(Loop *L) {
+ // Note: this processes each stride/type pair individually. All users
+ // passed into StrengthReduceIVUsersOfStride have the same type AND stride.
+ // Also, note that we iterate over IVUsesByStride indirectly by using
+ // StrideOrder. This extra layer of indirection makes the ordering of
+ // strides deterministic - not dependent on map order.
+ for (unsigned Stride = 0, e = IU->StrideOrder.size(); Stride != e; ++Stride) {
+ std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
+ IU->IVUsesByStride.find(IU->StrideOrder[Stride]);
+ assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
+ // FIXME: Generalize to non-affine IV's.
+ if (!SI->first->isLoopInvariant(L))
+ continue;
+ StrengthReduceIVUsersOfStride(SI->first, *SI->second, L);
+ }
+}
+
/// FindIVUserForCond - If Cond has an operand that is an expression of an IV,
/// set the IV user and stride information and return true, otherwise return
/// false.
-bool LoopStrengthReduce::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse,
- const SCEV *const * &CondStride) {
+bool LoopStrengthReduce::FindIVUserForCond(ICmpInst *Cond,
+ IVStrideUse *&CondUse,
+ const SCEV* &CondStride) {
for (unsigned Stride = 0, e = IU->StrideOrder.size();
Stride != e && !CondUse; ++Stride) {
std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
@@ -1793,12 +1836,12 @@ bool LoopStrengthReduce::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse
// InstCombine does it as well for simple uses, it's not clear that it
// occurs enough in real life to handle.
CondUse = UI;
- CondStride = &SI->first;
+ CondStride = SI->first;
return true;
}
}
return false;
-}
+}
namespace {
// Constant strides come first which in turns are sorted by their absolute
@@ -1851,8 +1894,9 @@ namespace {
/// v1 = v1 + 3
/// if (v1 < 30) goto loop
ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
- IVStrideUse* &CondUse,
- const SCEV *const* &CondStride) {
+ IVStrideUse* &CondUse,
+ const SCEV* &CondStride,
+ bool PostPass) {
// If there's only one stride in the loop, there's nothing to do here.
if (IU->StrideOrder.size() < 2)
return Cond;
@@ -1860,23 +1904,31 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
// trying to change the condition because the stride will still
// remain.
std::map<const SCEV *, IVUsersOfOneStride *>::iterator I =
- IU->IVUsesByStride.find(*CondStride);
- if (I == IU->IVUsesByStride.end() ||
- I->second->Users.size() != 1)
+ IU->IVUsesByStride.find(CondStride);
+ if (I == IU->IVUsesByStride.end())
return Cond;
+ if (I->second->Users.size() > 1) {
+ for (ilist<IVStrideUse>::iterator II = I->second->Users.begin(),
+ EE = I->second->Users.end(); II != EE; ++II) {
+ if (II->getUser() == Cond)
+ continue;
+ if (!isInstructionTriviallyDead(II->getUser()))
+ return Cond;
+ }
+ }
// Only handle constant strides for now.
- const SCEVConstant *SC = dyn_cast<SCEVConstant>(*CondStride);
+ const SCEVConstant *SC = dyn_cast<SCEVConstant>(CondStride);
if (!SC) return Cond;
ICmpInst::Predicate Predicate = Cond->getPredicate();
int64_t CmpSSInt = SC->getValue()->getSExtValue();
- unsigned BitWidth = SE->getTypeSizeInBits((*CondStride)->getType());
+ unsigned BitWidth = SE->getTypeSizeInBits(CondStride->getType());
uint64_t SignBit = 1ULL << (BitWidth-1);
const Type *CmpTy = Cond->getOperand(0)->getType();
const Type *NewCmpTy = NULL;
unsigned TyBits = SE->getTypeSizeInBits(CmpTy);
unsigned NewTyBits = 0;
- const SCEV **NewStride = NULL;
+ const SCEV *NewStride = NULL;
Value *NewCmpLHS = NULL;
Value *NewCmpRHS = NULL;
int64_t Scale = 1;
@@ -1885,16 +1937,31 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
if (ConstantInt *C = dyn_cast<ConstantInt>(Cond->getOperand(1))) {
int64_t CmpVal = C->getValue().getSExtValue();
+ // Check the relevant induction variable for conformance to
+ // the pattern.
+ const SCEV *IV = SE->getSCEV(Cond->getOperand(0));
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
+ if (!AR || !AR->isAffine())
+ return Cond;
+
+ const SCEVConstant *StartC = dyn_cast<SCEVConstant>(AR->getStart());
// Check stride constant and the comparision constant signs to detect
// overflow.
- if ((CmpVal & SignBit) != (CmpSSInt & SignBit))
- return Cond;
+ if (StartC) {
+ if ((StartC->getValue()->getSExtValue() < CmpVal && CmpSSInt < 0) ||
+ (StartC->getValue()->getSExtValue() > CmpVal && CmpSSInt > 0))
+ return Cond;
+ } else {
+ // More restrictive check for the other cases.
+ if ((CmpVal & SignBit) != (CmpSSInt & SignBit))
+ return Cond;
+ }
// Look for a suitable stride / iv as replacement.
for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
IU->IVUsesByStride.find(IU->StrideOrder[i]);
- if (!isa<SCEVConstant>(SI->first))
+ if (!isa<SCEVConstant>(SI->first) || SI->second->Users.empty())
continue;
int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
if (SSInt == CmpSSInt ||
@@ -1904,6 +1971,14 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
Scale = SSInt / CmpSSInt;
int64_t NewCmpVal = CmpVal * Scale;
+
+ // If old icmp value fits in icmp immediate field, but the new one doesn't
+ // try something else.
+ if (TLI &&
+ TLI->isLegalICmpImmediate(CmpVal) &&
+ !TLI->isLegalICmpImmediate(NewCmpVal))
+ continue;
+
APInt Mul = APInt(BitWidth*2, CmpVal, true);
Mul = Mul * APInt(BitWidth*2, Scale, true);
// Check for overflow.
@@ -1918,8 +1993,6 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
(CmpVal & SignBit) != (NewCmpVal & SignBit))
continue;
- if (NewCmpVal == CmpVal)
- continue;
// Pick the best iv to use trying to avoid a cast.
NewCmpLHS = NULL;
for (ilist<IVStrideUse>::iterator UI = SI->second->Users.begin(),
@@ -1969,19 +2042,21 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
if (NewTyBits != TyBits && !isa<SCEVConstant>(CondUse->getOffset()))
continue;
- bool AllUsesAreAddresses = true;
- bool AllUsesAreOutsideLoop = true;
- std::vector<BasedUser> UsersToProcess;
- const SCEV *CommonExprs = CollectIVUsers(SI->first, *SI->second, L,
- AllUsesAreAddresses,
- AllUsesAreOutsideLoop,
- UsersToProcess);
- // Avoid rewriting the compare instruction with an iv of new stride
- // if it's likely the new stride uses will be rewritten using the
- // stride of the compare instruction.
- if (AllUsesAreAddresses &&
- ValidScale(!CommonExprs->isZero(), Scale, UsersToProcess))
- continue;
+ if (!PostPass) {
+ bool AllUsesAreAddresses = true;
+ bool AllUsesAreOutsideLoop = true;
+ std::vector<BasedUser> UsersToProcess;
+ const SCEV *CommonExprs = CollectIVUsers(SI->first, *SI->second, L,
+ AllUsesAreAddresses,
+ AllUsesAreOutsideLoop,
+ UsersToProcess);
+ // Avoid rewriting the compare instruction with an iv of new stride
+ // if it's likely the new stride uses will be rewritten using the
+ // stride of the compare instruction.
+ if (AllUsesAreAddresses &&
+ ValidScale(!CommonExprs->isZero(), Scale, UsersToProcess))
+ continue;
+ }
// Avoid rewriting the compare instruction with an iv which has
// implicit extension or truncation built into it.
@@ -1994,7 +2069,7 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
if (Scale < 0 && !Cond->isEquality())
Predicate = ICmpInst::getSwappedPredicate(Predicate);
- NewStride = &IU->StrideOrder[i];
+ NewStride = IU->StrideOrder[i];
if (!isa<PointerType>(NewCmpTy))
NewCmpRHS = ConstantInt::get(NewCmpTy, NewCmpVal);
else {
@@ -2031,13 +2106,16 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
Cond = new ICmpInst(OldCond, Predicate, NewCmpLHS, NewCmpRHS,
L->getHeader()->getName() + ".termcond");
+ DEBUG(errs() << " Change compare stride in Inst " << *OldCond);
+ DEBUG(errs() << " to " << *Cond << '\n');
+
// Remove the old compare instruction. The old indvar is probably dead too.
DeadInsts.push_back(CondUse->getOperandValToReplace());
OldCond->replaceAllUsesWith(Cond);
OldCond->eraseFromParent();
- IU->IVUsesByStride[*NewStride]->addUser(NewOffset, Cond, NewCmpLHS);
- CondUse = &IU->IVUsesByStride[*NewStride]->Users.back();
+ IU->IVUsesByStride[NewStride]->addUser(NewOffset, Cond, NewCmpLHS);
+ CondUse = &IU->IVUsesByStride[NewStride]->Users.back();
CondStride = NewStride;
++NumEliminated;
Changed = true;
@@ -2180,7 +2258,7 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
return;
-
+
for (unsigned Stride = 0, e = IU->StrideOrder.size(); Stride != e;
++Stride) {
std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
@@ -2199,13 +2277,13 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
/* If shadow use is a int->float cast then insert a second IV
to eliminate this cast.
- for (unsigned i = 0; i < n; ++i)
+ for (unsigned i = 0; i < n; ++i)
foo((double)i);
is transformed into
double d = 0.0;
- for (unsigned i = 0; i < n; ++i, ++d)
+ for (unsigned i = 0; i < n; ++i, ++d)
foo(d);
*/
if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser()))
@@ -2227,7 +2305,7 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
const Type *SrcTy = PH->getType();
int Mantissa = DestTy->getFPMantissaWidth();
- if (Mantissa == -1) continue;
+ if (Mantissa == -1) continue;
if ((int)SE->getTypeSizeInBits(SrcTy) > Mantissa)
continue;
@@ -2239,12 +2317,12 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
Entry = 1;
Latch = 0;
}
-
+
ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));
if (!Init) continue;
Constant *NewInit = ConstantFP::get(DestTy, Init->getZExtValue());
- BinaryOperator *Incr =
+ BinaryOperator *Incr =
dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));
if (!Incr) continue;
if (Incr->getOpcode() != Instruction::Add
@@ -2271,7 +2349,7 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
/* create new increment. '++d' in above example. */
Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue());
- BinaryOperator *NewIncr =
+ BinaryOperator *NewIncr =
BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ?
Instruction::FAdd : Instruction::FSub,
NewPH, CFP, "IV.S.next.", Incr);
@@ -2297,237 +2375,385 @@ void LoopStrengthReduce::OptimizeIndvars(Loop *L) {
OptimizeShadowIV(L);
}
-/// OptimizeLoopTermCond - Change loop terminating condition to use the
+bool LoopStrengthReduce::StrideMightBeShared(const SCEV* Stride, Loop *L,
+ bool CheckPreInc) {
+ int64_t SInt = cast<SCEVConstant>(Stride)->getValue()->getSExtValue();
+ for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
+ std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
+ IU->IVUsesByStride.find(IU->StrideOrder[i]);
+ const SCEV *Share = SI->first;
+ if (!isa<SCEVConstant>(SI->first) || Share == Stride)
+ continue;
+ int64_t SSInt = cast<SCEVConstant>(Share)->getValue()->getSExtValue();
+ if (SSInt == SInt)
+ return true; // This can definitely be reused.
+ if (unsigned(abs64(SSInt)) < SInt || (SSInt % SInt) != 0)
+ continue;
+ int64_t Scale = SSInt / SInt;
+ bool AllUsesAreAddresses = true;
+ bool AllUsesAreOutsideLoop = true;
+ std::vector<BasedUser> UsersToProcess;
+ const SCEV *CommonExprs = CollectIVUsers(SI->first, *SI->second, L,
+ AllUsesAreAddresses,
+ AllUsesAreOutsideLoop,
+ UsersToProcess);
+ if (AllUsesAreAddresses &&
+ ValidScale(!CommonExprs->isZero(), Scale, UsersToProcess)) {
+ if (!CheckPreInc)
+ return true;
+ // Any pre-inc iv use?
+ IVUsersOfOneStride &StrideUses = *IU->IVUsesByStride[Share];
+ for (ilist<IVStrideUse>::iterator I = StrideUses.Users.begin(),
+ E = StrideUses.Users.end(); I != E; ++I) {
+ if (!I->isUseOfPostIncrementedValue())
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+/// isUsedByExitBranch - Return true if icmp is used by a loop terminating
+/// conditional branch or it's and / or with other conditions before being used
+/// as the condition.
+static bool isUsedByExitBranch(ICmpInst *Cond, Loop *L) {
+ BasicBlock *CondBB = Cond->getParent();
+ if (!L->isLoopExiting(CondBB))
+ return false;
+ BranchInst *TermBr = dyn_cast<BranchInst>(CondBB->getTerminator());
+ if (!TermBr || !TermBr->isConditional())
+ return false;
+
+ Value *User = *Cond->use_begin();
+ Instruction *UserInst = dyn_cast<Instruction>(User);
+ while (UserInst &&
+ (UserInst->getOpcode() == Instruction::And ||
+ UserInst->getOpcode() == Instruction::Or)) {
+ if (!UserInst->hasOneUse() || UserInst->getParent() != CondBB)
+ return false;
+ User = *User->use_begin();
+ UserInst = dyn_cast<Instruction>(User);
+ }
+ return User == TermBr;
+}
+
+static bool ShouldCountToZero(ICmpInst *Cond, IVStrideUse* &CondUse,
+ ScalarEvolution *SE, Loop *L,
+ const TargetLowering *TLI = 0) {
+ if (!L->contains(Cond->getParent()))
+ return false;
+
+ if (!isa<SCEVConstant>(CondUse->getOffset()))
+ return false;
+
+ // Handle only tests for equality for the moment.
+ if (!Cond->isEquality() || !Cond->hasOneUse())
+ return false;
+ if (!isUsedByExitBranch(Cond, L))
+ return false;
+
+ Value *CondOp0 = Cond->getOperand(0);
+ const SCEV *IV = SE->getSCEV(CondOp0);
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
+ if (!AR || !AR->isAffine())
+ return false;
+
+ const SCEVConstant *SC = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
+ if (!SC || SC->getValue()->getSExtValue() < 0)
+ // If it's already counting down, don't do anything.
+ return false;
+
+ // If the RHS of the comparison is not an loop invariant, the rewrite
+ // cannot be done. Also bail out if it's already comparing against a zero.
+ // If we are checking this before cmp stride optimization, check if it's
+ // comparing against a already legal immediate.
+ Value *RHS = Cond->getOperand(1);
+ ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS);
+ if (!L->isLoopInvariant(RHS) ||
+ (RHSC && RHSC->isZero()) ||
+ (RHSC && TLI && TLI->isLegalICmpImmediate(RHSC->getSExtValue())))
+ return false;
+
+ // Make sure the IV is only used for counting. Value may be preinc or
+ // postinc; 2 uses in either case.
+ if (!CondOp0->hasNUses(2))
+ return false;
+
+ return true;
+}
+
+/// OptimizeLoopTermCond - Change loop terminating condition to use the
/// postinc iv when possible.
void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {
- // Finally, get the terminating condition for the loop if possible. If we
- // can, we want to change it to use a post-incremented version of its
- // induction variable, to allow coalescing the live ranges for the IV into
- // one register value.
BasicBlock *LatchBlock = L->getLoopLatch();
- BasicBlock *ExitingBlock = L->getExitingBlock();
-
- if (!ExitingBlock)
- // Multiple exits, just look at the exit in the latch block if there is one.
- ExitingBlock = LatchBlock;
- BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
- if (!TermBr)
- return;
- if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))
- return;
+ bool LatchExit = L->isLoopExiting(LatchBlock);
+ SmallVector<BasicBlock*, 8> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
- // Search IVUsesByStride to find Cond's IVUse if there is one.
- IVStrideUse *CondUse = 0;
- const SCEV *const *CondStride = 0;
- ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
- if (!FindIVUserForCond(Cond, CondUse, CondStride))
- return; // setcc doesn't use the IV.
-
- if (ExitingBlock != LatchBlock) {
- if (!Cond->hasOneUse())
- // See below, we don't want the condition to be cloned.
- return;
-
- // If exiting block is the latch block, we know it's safe and profitable to
- // transform the icmp to use post-inc iv. Otherwise do so only if it would
- // not reuse another iv and its iv would be reused by other uses. We are
- // optimizing for the case where the icmp is the only use of the iv.
- IVUsersOfOneStride &StrideUses = *IU->IVUsesByStride[*CondStride];
- for (ilist<IVStrideUse>::iterator I = StrideUses.Users.begin(),
- E = StrideUses.Users.end(); I != E; ++I) {
- if (I->getUser() == Cond)
- continue;
- if (!I->isUseOfPostIncrementedValue())
- return;
- }
+ for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
+ BasicBlock *ExitingBlock = ExitingBlocks[i];
- // FIXME: This is expensive, and worse still ChangeCompareStride does a
- // similar check. Can we perform all the icmp related transformations after
- // StrengthReduceStridedIVUsers?
- if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(*CondStride)) {
- int64_t SInt = SC->getValue()->getSExtValue();
- for (unsigned NewStride = 0, ee = IU->StrideOrder.size(); NewStride != ee;
- ++NewStride) {
- std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
- IU->IVUsesByStride.find(IU->StrideOrder[NewStride]);
- if (!isa<SCEVConstant>(SI->first) || SI->first == *CondStride)
- continue;
- int64_t SSInt =
- cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
- if (SSInt == SInt)
- return; // This can definitely be reused.
- if (unsigned(abs64(SSInt)) < SInt || (SSInt % SInt) != 0)
- continue;
- int64_t Scale = SSInt / SInt;
- bool AllUsesAreAddresses = true;
- bool AllUsesAreOutsideLoop = true;
- std::vector<BasedUser> UsersToProcess;
- const SCEV *CommonExprs = CollectIVUsers(SI->first, *SI->second, L,
- AllUsesAreAddresses,
- AllUsesAreOutsideLoop,
- UsersToProcess);
- // Avoid rewriting the compare instruction with an iv of new stride
- // if it's likely the new stride uses will be rewritten using the
- // stride of the compare instruction.
- if (AllUsesAreAddresses &&
- ValidScale(!CommonExprs->isZero(), Scale, UsersToProcess))
- return;
- }
- }
+ // Finally, get the terminating condition for the loop if possible. If we
+ // can, we want to change it to use a post-incremented version of its
+ // induction variable, to allow coalescing the live ranges for the IV into
+ // one register value.
- StrideNoReuse.insert(*CondStride);
- }
+ BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
+ if (!TermBr)
+ continue;
+ // FIXME: Overly conservative, termination condition could be an 'or' etc..
+ if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))
+ continue;
- // If the trip count is computed in terms of a max (due to ScalarEvolution
- // being unable to find a sufficient guard, for example), change the loop
- // comparison to use SLT or ULT instead of NE.
- Cond = OptimizeMax(L, Cond, CondUse);
-
- // If possible, change stride and operands of the compare instruction to
- // eliminate one stride.
- if (ExitingBlock == LatchBlock)
- Cond = ChangeCompareStride(L, Cond, CondUse, CondStride);
-
- // It's possible for the setcc instruction to be anywhere in the loop, and
- // possible for it to have multiple users. If it is not immediately before
- // the latch block branch, move it.
- if (&*++BasicBlock::iterator(Cond) != (Instruction*)TermBr) {
- if (Cond->hasOneUse()) { // Condition has a single use, just move it.
- Cond->moveBefore(TermBr);
- } else {
- // Otherwise, clone the terminating condition and insert into the loopend.
- Cond = cast<ICmpInst>(Cond->clone());
- Cond->setName(L->getHeader()->getName() + ".termcond");
- LatchBlock->getInstList().insert(TermBr, Cond);
-
- // Clone the IVUse, as the old use still exists!
- IU->IVUsesByStride[*CondStride]->addUser(CondUse->getOffset(), Cond,
- CondUse->getOperandValToReplace());
- CondUse = &IU->IVUsesByStride[*CondStride]->Users.back();
+ // Search IVUsesByStride to find Cond's IVUse if there is one.
+ IVStrideUse *CondUse = 0;
+ const SCEV *CondStride = 0;
+ ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
+ if (!FindIVUserForCond(Cond, CondUse, CondStride))
+ continue;
+
+ // If the latch block is exiting and it's not a single block loop, it's
+ // not safe to use postinc iv in other exiting blocks. FIXME: overly
+ // conservative? How about icmp stride optimization?
+ bool UsePostInc = !(e > 1 && LatchExit && ExitingBlock != LatchBlock);
+ if (UsePostInc && ExitingBlock != LatchBlock) {
+ if (!Cond->hasOneUse())
+ // See below, we don't want the condition to be cloned.
+ UsePostInc = false;
+ else {
+ // If exiting block is the latch block, we know it's safe and profitable
+ // to transform the icmp to use post-inc iv. Otherwise do so only if it
+ // would not reuse another iv and its iv would be reused by other uses.
+ // We are optimizing for the case where the icmp is the only use of the
+ // iv.
+ IVUsersOfOneStride &StrideUses = *IU->IVUsesByStride[CondStride];
+ for (ilist<IVStrideUse>::iterator I = StrideUses.Users.begin(),
+ E = StrideUses.Users.end(); I != E; ++I) {
+ if (I->getUser() == Cond)
+ continue;
+ if (!I->isUseOfPostIncrementedValue()) {
+ UsePostInc = false;
+ break;
+ }
+ }
+ }
+
+ // If iv for the stride might be shared and any of the users use pre-inc
+ // iv might be used, then it's not safe to use post-inc iv.
+ if (UsePostInc &&
+ isa<SCEVConstant>(CondStride) &&
+ StrideMightBeShared(CondStride, L, true))
+ UsePostInc = false;
}
- }
- // If we get to here, we know that we can transform the setcc instruction to
- // use the post-incremented version of the IV, allowing us to coalesce the
- // live ranges for the IV correctly.
- CondUse->setOffset(SE->getMinusSCEV(CondUse->getOffset(), *CondStride));
- CondUse->setIsUseOfPostIncrementedValue(true);
- Changed = true;
+ // If the trip count is computed in terms of a max (due to ScalarEvolution
+ // being unable to find a sufficient guard, for example), change the loop
+ // comparison to use SLT or ULT instead of NE.
+ Cond = OptimizeMax(L, Cond, CondUse);
+
+ // If possible, change stride and operands of the compare instruction to
+ // eliminate one stride. However, avoid rewriting the compare instruction
+ // with an iv of new stride if it's likely the new stride uses will be
+ // rewritten using the stride of the compare instruction.
+ if (ExitingBlock == LatchBlock && isa<SCEVConstant>(CondStride)) {
+ // If the condition stride is a constant and it's the only use, we might
+ // want to optimize it first by turning it to count toward zero.
+ if (!StrideMightBeShared(CondStride, L, false) &&
+ !ShouldCountToZero(Cond, CondUse, SE, L, TLI))
+ Cond = ChangeCompareStride(L, Cond, CondUse, CondStride);
+ }
- ++NumLoopCond;
-}
+ if (!UsePostInc)
+ continue;
-/// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for deciding
-/// when to exit the loop is used only for that purpose, try to rearrange things
-/// so it counts down to a test against zero.
-void LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) {
+ DEBUG(errs() << " Change loop exiting icmp to use postinc iv: "
+ << *Cond << '\n');
- // If the number of times the loop is executed isn't computable, give up.
- const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
- if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
- return;
+ // It's possible for the setcc instruction to be anywhere in the loop, and
+ // possible for it to have multiple users. If it is not immediately before
+ // the exiting block branch, move it.
+ if (&*++BasicBlock::iterator(Cond) != (Instruction*)TermBr) {
+ if (Cond->hasOneUse()) { // Condition has a single use, just move it.
+ Cond->moveBefore(TermBr);
+ } else {
+ // Otherwise, clone the terminating condition and insert into the
+ // loopend.
+ Cond = cast<ICmpInst>(Cond->clone());
+ Cond->setName(L->getHeader()->getName() + ".termcond");
+ ExitingBlock->getInstList().insert(TermBr, Cond);
+
+ // Clone the IVUse, as the old use still exists!
+ IU->IVUsesByStride[CondStride]->addUser(CondUse->getOffset(), Cond,
+ CondUse->getOperandValToReplace());
+ CondUse = &IU->IVUsesByStride[CondStride]->Users.back();
+ }
+ }
- // Get the terminating condition for the loop if possible (this isn't
- // necessarily in the latch, or a block that's a predecessor of the header).
- if (!L->getExitBlock())
- return; // More than one loop exit blocks.
+ // If we get to here, we know that we can transform the setcc instruction to
+ // use the post-incremented version of the IV, allowing us to coalesce the
+ // live ranges for the IV correctly.
+ CondUse->setOffset(SE->getMinusSCEV(CondUse->getOffset(), CondStride));
+ CondUse->setIsUseOfPostIncrementedValue(true);
+ Changed = true;
- // Okay, there is one exit block. Try to find the condition that causes the
- // loop to be exited.
- BasicBlock *ExitingBlock = L->getExitingBlock();
- if (!ExitingBlock)
- return; // More than one block exiting!
+ ++NumLoopCond;
+ }
+}
- // Okay, we've computed the exiting block. See what condition causes us to
- // exit.
- //
- // FIXME: we should be able to handle switch instructions (with a single exit)
- BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
- if (TermBr == 0) return;
- assert(TermBr->isConditional() && "If unconditional, it can't be in loop!");
- if (!isa<ICmpInst>(TermBr->getCondition()))
- return;
- ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
+bool LoopStrengthReduce::OptimizeLoopCountIVOfStride(const SCEV* &Stride,
+ IVStrideUse* &CondUse,
+ Loop *L) {
+ // If the only use is an icmp of a loop exiting conditional branch, then
+ // attempt the optimization.
+ BasedUser User = BasedUser(*CondUse, SE);
+ assert(isa<ICmpInst>(User.Inst) && "Expecting an ICMPInst!");
+ ICmpInst *Cond = cast<ICmpInst>(User.Inst);
+
+ // Less strict check now that compare stride optimization is done.
+ if (!ShouldCountToZero(Cond, CondUse, SE, L))
+ return false;
- // Handle only tests for equality for the moment, and only stride 1.
- if (Cond->getPredicate() != CmpInst::ICMP_EQ)
- return;
- const SCEV *IV = SE->getSCEV(Cond->getOperand(0));
- const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
- const SCEV *One = SE->getIntegerSCEV(1, BackedgeTakenCount->getType());
- if (!AR || !AR->isAffine() || AR->getStepRecurrence(*SE) != One)
- return;
- // If the RHS of the comparison is defined inside the loop, the rewrite
- // cannot be done.
- if (Instruction *CR = dyn_cast<Instruction>(Cond->getOperand(1)))
- if (L->contains(CR->getParent()))
- return;
+ Value *CondOp0 = Cond->getOperand(0);
+ PHINode *PHIExpr = dyn_cast<PHINode>(CondOp0);
+ Instruction *Incr;
+ if (!PHIExpr) {
+ // Value tested is postinc. Find the phi node.
+ Incr = dyn_cast<BinaryOperator>(CondOp0);
+ // FIXME: Just use User.OperandValToReplace here?
+ if (!Incr || Incr->getOpcode() != Instruction::Add)
+ return false;
- // Make sure the IV is only used for counting. Value may be preinc or
- // postinc; 2 uses in either case.
- if (!Cond->getOperand(0)->hasNUses(2))
- return;
- PHINode *phi = dyn_cast<PHINode>(Cond->getOperand(0));
- Instruction *incr;
- if (phi && phi->getParent()==L->getHeader()) {
- // value tested is preinc. Find the increment.
- // A CmpInst is not a BinaryOperator; we depend on this.
- Instruction::use_iterator UI = phi->use_begin();
- incr = dyn_cast<BinaryOperator>(UI);
- if (!incr)
- incr = dyn_cast<BinaryOperator>(++UI);
- // 1 use for postinc value, the phi. Unnecessarily conservative?
- if (!incr || !incr->hasOneUse() || incr->getOpcode()!=Instruction::Add)
- return;
- } else {
- // Value tested is postinc. Find the phi node.
- incr = dyn_cast<BinaryOperator>(Cond->getOperand(0));
- if (!incr || incr->getOpcode()!=Instruction::Add)
- return;
-
- Instruction::use_iterator UI = Cond->getOperand(0)->use_begin();
- phi = dyn_cast<PHINode>(UI);
- if (!phi)
- phi = dyn_cast<PHINode>(++UI);
+ PHIExpr = dyn_cast<PHINode>(Incr->getOperand(0));
+ if (!PHIExpr)
+ return false;
// 1 use for preinc value, the increment.
- if (!phi || phi->getParent()!=L->getHeader() || !phi->hasOneUse())
- return;
+ if (!PHIExpr->hasOneUse())
+ return false;
+ } else {
+ assert(isa<PHINode>(CondOp0) &&
+ "Unexpected loop exiting counting instruction sequence!");
+ PHIExpr = cast<PHINode>(CondOp0);
+ // Value tested is preinc. Find the increment.
+ // A CmpInst is not a BinaryOperator; we depend on this.
+ Instruction::use_iterator UI = PHIExpr->use_begin();
+ Incr = dyn_cast<BinaryOperator>(UI);
+ if (!Incr)
+ Incr = dyn_cast<BinaryOperator>(++UI);
+ // One use for postinc value, the phi. Unnecessarily conservative?
+ if (!Incr || !Incr->hasOneUse() || Incr->getOpcode() != Instruction::Add)
+ return false;
}
// Replace the increment with a decrement.
- BinaryOperator *decr =
- BinaryOperator::Create(Instruction::Sub, incr->getOperand(0),
- incr->getOperand(1), "tmp", incr);
- incr->replaceAllUsesWith(decr);
- incr->eraseFromParent();
+ DEBUG(errs() << "LSR: Examining use ");
+ DEBUG(WriteAsOperand(errs(), CondOp0, /*PrintType=*/false));
+ DEBUG(errs() << " in Inst: " << *Cond << '\n');
+ BinaryOperator *Decr = BinaryOperator::Create(Instruction::Sub,
+ Incr->getOperand(0), Incr->getOperand(1), "tmp", Incr);
+ Incr->replaceAllUsesWith(Decr);
+ Incr->eraseFromParent();
// Substitute endval-startval for the original startval, and 0 for the
- // original endval. Since we're only testing for equality this is OK even
+ // original endval. Since we're only testing for equality this is OK even
// if the computation wraps around.
BasicBlock *Preheader = L->getLoopPreheader();
Instruction *PreInsertPt = Preheader->getTerminator();
- int inBlock = L->contains(phi->getIncomingBlock(0)) ? 1 : 0;
- Value *startVal = phi->getIncomingValue(inBlock);
- Value *endVal = Cond->getOperand(1);
- // FIXME check for case where both are constant
+ unsigned InBlock = L->contains(PHIExpr->getIncomingBlock(0)) ? 1 : 0;
+ Value *StartVal = PHIExpr->getIncomingValue(InBlock);
+ Value *EndVal = Cond->getOperand(1);
+ DEBUG(errs() << " Optimize loop counting iv to count down ["
+ << *EndVal << " .. " << *StartVal << "]\n");
+
+ // FIXME: check for case where both are constant.
Constant* Zero = ConstantInt::get(Cond->getOperand(1)->getType(), 0);
- BinaryOperator *NewStartVal =
- BinaryOperator::Create(Instruction::Sub, endVal, startVal,
- "tmp", PreInsertPt);
- phi->setIncomingValue(inBlock, NewStartVal);
+ BinaryOperator *NewStartVal = BinaryOperator::Create(Instruction::Sub,
+ EndVal, StartVal, "tmp", PreInsertPt);
+ PHIExpr->setIncomingValue(InBlock, NewStartVal);
Cond->setOperand(1, Zero);
+ DEBUG(errs() << " New icmp: " << *Cond << "\n");
+
+ int64_t SInt = cast<SCEVConstant>(Stride)->getValue()->getSExtValue();
+ const SCEV *NewStride = 0;
+ bool Found = false;
+ for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
+ const SCEV *OldStride = IU->StrideOrder[i];
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(OldStride))
+ if (SC->getValue()->getSExtValue() == -SInt) {
+ Found = true;
+ NewStride = OldStride;
+ break;
+ }
+ }
+
+ if (!Found)
+ NewStride = SE->getIntegerSCEV(-SInt, Stride->getType());
+ IU->AddUser(NewStride, CondUse->getOffset(), Cond, Cond->getOperand(0));
+ IU->IVUsesByStride[Stride]->removeUser(CondUse);
+
+ CondUse = &IU->IVUsesByStride[NewStride]->Users.back();
+ Stride = NewStride;
- Changed = true;
+ ++NumCountZero;
+
+ return true;
}
-bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) {
+/// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for deciding
+/// when to exit the loop is used only for that purpose, try to rearrange things
+/// so it counts down to a test against zero.
+bool LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) {
+ bool ThisChanged = false;
+ for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
+ const SCEV *Stride = IU->StrideOrder[i];
+ std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
+ IU->IVUsesByStride.find(Stride);
+ assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
+ // FIXME: Generalize to non-affine IV's.
+ if (!SI->first->isLoopInvariant(L))
+ continue;
+ // If stride is a constant and it has an icmpinst use, check if we can
+ // optimize the loop to count down.
+ if (isa<SCEVConstant>(Stride) && SI->second->Users.size() == 1) {
+ Instruction *User = SI->second->Users.begin()->getUser();
+ if (!isa<ICmpInst>(User))
+ continue;
+ const SCEV *CondStride = Stride;
+ IVStrideUse *Use = &*SI->second->Users.begin();
+ if (!OptimizeLoopCountIVOfStride(CondStride, Use, L))
+ continue;
+ ThisChanged = true;
+ // Now check if it's possible to reuse this iv for other stride uses.
+ for (unsigned j = 0, ee = IU->StrideOrder.size(); j != ee; ++j) {
+ const SCEV *SStride = IU->StrideOrder[j];
+ if (SStride == CondStride)
+ continue;
+ std::map<const SCEV *, IVUsersOfOneStride *>::iterator SII =
+ IU->IVUsesByStride.find(SStride);
+ assert(SII != IU->IVUsesByStride.end() && "Stride doesn't exist!");
+ // FIXME: Generalize to non-affine IV's.
+ if (!SII->first->isLoopInvariant(L))
+ continue;
+ // FIXME: Rewrite other stride using CondStride.
+ }
+ }
+ }
+
+ Changed |= ThisChanged;
+ return ThisChanged;
+}
+
+bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) {
IU = &getAnalysis<IVUsers>();
LI = &getAnalysis<LoopInfo>();
DT = &getAnalysis<DominatorTree>();
SE = &getAnalysis<ScalarEvolution>();
Changed = false;
+ // If LoopSimplify form is not available, stay out of trouble.
+ if (!L->getLoopPreheader() || !L->getLoopLatch())
+ return false;
+
if (!IU->IVUsesByStride.empty()) {
DEBUG(errs() << "\nLSR on \"" << L->getHeader()->getParent()->getName()
<< "\" ";
@@ -2545,7 +2771,7 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) {
// Change loop terminating condition to use the postinc iv when possible
// and optimize loop terminating compare. FIXME: Move this after
- // StrengthReduceStridedIVUsers?
+ // StrengthReduceIVUsersOfStride?
OptimizeLoopTermCond(L);
// FIXME: We can shrink overlarge IV's here. e.g. if the code has
@@ -2561,26 +2787,12 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) {
// IVsByStride keeps IVs for one particular loop.
assert(IVsByStride.empty() && "Stale entries in IVsByStride?");
- // Note: this processes each stride/type pair individually. All users
- // passed into StrengthReduceStridedIVUsers have the same type AND stride.
- // Also, note that we iterate over IVUsesByStride indirectly by using
- // StrideOrder. This extra layer of indirection makes the ordering of
- // strides deterministic - not dependent on map order.
- for (unsigned Stride = 0, e = IU->StrideOrder.size();
- Stride != e; ++Stride) {
- std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
- IU->IVUsesByStride.find(IU->StrideOrder[Stride]);
- assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
- // FIXME: Generalize to non-affine IV's.
- if (!SI->first->isLoopInvariant(L))
- continue;
- StrengthReduceStridedIVUsers(SI->first, *SI->second, L);
- }
- }
+ StrengthReduceIVUsers(L);
- // After all sharing is done, see if we can adjust the loop to test against
- // zero instead of counting up to a maximum. This is usually faster.
- OptimizeLoopCountIV(L);
+ // After all sharing is done, see if we can adjust the loop to test against
+ // zero instead of counting up to a maximum. This is usually faster.
+ OptimizeLoopCountIV(L);
+ }
// We're done analyzing this loop; release all the state we built up for it.
IVsByStride.clear();
diff --git a/lib/Transforms/Scalar/LoopUnroll.cpp b/lib/Transforms/Scalar/LoopUnroll.cpp
deleted file mode 100644
index 837ec59..0000000
--- a/lib/Transforms/Scalar/LoopUnroll.cpp
+++ /dev/null
@@ -1,177 +0,0 @@
-//===-- LoopUnroll.cpp - Loop unroller pass -------------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass implements a simple loop unroller. It works best when loops have
-// been canonicalized by the -indvars pass, allowing it to determine the trip
-// counts of loops easily.
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "loop-unroll"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/UnrollLoop.h"
-#include <climits>
-
-using namespace llvm;
-
-static cl::opt<unsigned>
-UnrollThreshold("unroll-threshold", cl::init(100), cl::Hidden,
- cl::desc("The cut-off point for automatic loop unrolling"));
-
-static cl::opt<unsigned>
-UnrollCount("unroll-count", cl::init(0), cl::Hidden,
- cl::desc("Use this unroll count for all loops, for testing purposes"));
-
-static cl::opt<bool>
-UnrollAllowPartial("unroll-allow-partial", cl::init(false), cl::Hidden,
- cl::desc("Allows loops to be partially unrolled until "
- "-unroll-threshold loop size is reached."));
-
-namespace {
- class LoopUnroll : public LoopPass {
- public:
- static char ID; // Pass ID, replacement for typeid
- LoopUnroll() : LoopPass(&ID) {}
-
- /// A magic value for use with the Threshold parameter to indicate
- /// that the loop unroll should be performed regardless of how much
- /// code expansion would result.
- static const unsigned NoThreshold = UINT_MAX;
-
- bool runOnLoop(Loop *L, LPPassManager &LPM);
-
- /// This transformation requires natural loop information & requires that
- /// loop preheaders be inserted into the CFG...
- ///
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequiredID(LoopSimplifyID);
- AU.addRequiredID(LCSSAID);
- AU.addRequired<LoopInfo>();
- AU.addPreservedID(LCSSAID);
- AU.addPreserved<LoopInfo>();
- // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info.
- // If loop unroll does not preserve dom info then LCSSA pass on next
- // loop will receive invalid dom info.
- // For now, recreate dom info, if loop is unrolled.
- AU.addPreserved<DominatorTree>();
- AU.addPreserved<DominanceFrontier>();
- }
- };
-}
-
-char LoopUnroll::ID = 0;
-static RegisterPass<LoopUnroll> X("loop-unroll", "Unroll loops");
-
-Pass *llvm::createLoopUnrollPass() { return new LoopUnroll(); }
-
-/// ApproximateLoopSize - Approximate the size of the loop.
-static unsigned ApproximateLoopSize(const Loop *L) {
- unsigned Size = 0;
- for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
- I != E; ++I) {
- BasicBlock *BB = *I;
- Instruction *Term = BB->getTerminator();
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
- if (isa<PHINode>(I) && BB == L->getHeader()) {
- // Ignore PHI nodes in the header.
- } else if (I->hasOneUse() && I->use_back() == Term) {
- // Ignore instructions only used by the loop terminator.
- } else if (isa<DbgInfoIntrinsic>(I)) {
- // Ignore debug instructions
- } else if (isa<GetElementPtrInst>(I) && I->hasOneUse()) {
- // Ignore GEP as they generally are subsumed into a load or store.
- } else if (isa<CallInst>(I)) {
- // Estimate size overhead introduced by call instructions which
- // is higher than other instructions. Here 3 and 10 are magic
- // numbers that help one isolated test case from PR2067 without
- // negatively impacting measured benchmarks.
- Size += isa<IntrinsicInst>(I) ? 3 : 10;
- } else {
- ++Size;
- }
-
- // TODO: Ignore expressions derived from PHI and constants if inval of phi
- // is a constant, or if operation is associative. This will get induction
- // variables.
- }
- }
-
- return Size;
-}
-
-bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
- assert(L->isLCSSAForm());
- LoopInfo *LI = &getAnalysis<LoopInfo>();
-
- BasicBlock *Header = L->getHeader();
- DEBUG(errs() << "Loop Unroll: F[" << Header->getParent()->getName()
- << "] Loop %" << Header->getName() << "\n");
- (void)Header;
-
- // Find trip count
- unsigned TripCount = L->getSmallConstantTripCount();
- unsigned Count = UnrollCount;
-
- // Automatically select an unroll count.
- if (Count == 0) {
- // Conservative heuristic: if we know the trip count, see if we can
- // completely unroll (subject to the threshold, checked below); otherwise
- // try to find greatest modulo of the trip count which is still under
- // threshold value.
- if (TripCount == 0)
- return false;
- Count = TripCount;
- }
-
- // Enforce the threshold.
- if (UnrollThreshold != NoThreshold) {
- unsigned LoopSize = ApproximateLoopSize(L);
- DEBUG(errs() << " Loop Size = " << LoopSize << "\n");
- uint64_t Size = (uint64_t)LoopSize*Count;
- if (TripCount != 1 && Size > UnrollThreshold) {
- DEBUG(errs() << " Too large to fully unroll with count: " << Count
- << " because size: " << Size << ">" << UnrollThreshold << "\n");
- if (!UnrollAllowPartial) {
- DEBUG(errs() << " will not try to unroll partially because "
- << "-unroll-allow-partial not given\n");
- return false;
- }
- // Reduce unroll count to be modulo of TripCount for partial unrolling
- Count = UnrollThreshold / LoopSize;
- while (Count != 0 && TripCount%Count != 0) {
- Count--;
- }
- if (Count < 2) {
- DEBUG(errs() << " could not unroll partially\n");
- return false;
- }
- DEBUG(errs() << " partially unrolling with count: " << Count << "\n");
- }
- }
-
- // Unroll the loop.
- Function *F = L->getHeader()->getParent();
- if (!UnrollLoop(L, Count, LI, &LPM))
- return false;
-
- // FIXME: Reconstruct dom info, because it is not preserved properly.
- DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
- if (DT) {
- DT->runOnFunction(*F);
- DominanceFrontier *DF = getAnalysisIfAvailable<DominanceFrontier>();
- if (DF)
- DF->runOnFunction(*F);
- }
- return true;
-}
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index c7b00da..38d267a 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -32,7 +32,6 @@
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
#include "llvm/Instructions.h"
-#include "llvm/LLVMContext.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -407,6 +406,10 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val){
initLoopData();
Function *F = loopHeader->getParent();
+ // If LoopSimplify was unable to form a preheader, don't do any unswitching.
+ if (!loopPreheader)
+ return false;
+
// If the condition is trivial, always unswitch. There is no code growth for
// this case.
if (!IsTrivialUnswitchCondition(LoopCond)) {
@@ -957,7 +960,7 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
Worklist.pop_back();
// Simple constant folding.
- if (Constant *C = ConstantFoldInstruction(I, I->getContext())) {
+ if (Constant *C = ConstantFoldInstruction(I)) {
ReplaceUsesOfWith(I, C, Worklist, L, LPM);
continue;
}
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index af29f97..8466918 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -27,7 +27,6 @@
#include "llvm/Function.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
#include "llvm/Pass.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/Support/CFG.h"
@@ -198,8 +197,7 @@ static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) {
/// LowerNegateToMultiply - Replace 0-X with X*-1.
///
static Instruction *LowerNegateToMultiply(Instruction *Neg,
- std::map<AssertingVH<>, unsigned> &ValueRankMap,
- LLVMContext &Context) {
+ std::map<AssertingVH<>, unsigned> &ValueRankMap) {
Constant *Cst = Constant::getAllOnesValue(Neg->getType());
Instruction *Res = BinaryOperator::CreateMul(Neg->getOperand(1), Cst, "",Neg);
@@ -255,7 +253,6 @@ void Reassociate::LinearizeExprTree(BinaryOperator *I,
std::vector<ValueEntry> &Ops) {
Value *LHS = I->getOperand(0), *RHS = I->getOperand(1);
unsigned Opcode = I->getOpcode();
- LLVMContext &Context = I->getContext();
// First step, linearize the expression if it is in ((A+B)+(C+D)) form.
BinaryOperator *LHSBO = isReassociableOp(LHS, Opcode);
@@ -265,13 +262,11 @@ void Reassociate::LinearizeExprTree(BinaryOperator *I,
// transform them into multiplies by -1 so they can be reassociated.
if (I->getOpcode() == Instruction::Mul) {
if (!LHSBO && LHS->hasOneUse() && BinaryOperator::isNeg(LHS)) {
- LHS = LowerNegateToMultiply(cast<Instruction>(LHS),
- ValueRankMap, Context);
+ LHS = LowerNegateToMultiply(cast<Instruction>(LHS), ValueRankMap);
LHSBO = isReassociableOp(LHS, Opcode);
}
if (!RHSBO && RHS->hasOneUse() && BinaryOperator::isNeg(RHS)) {
- RHS = LowerNegateToMultiply(cast<Instruction>(RHS),
- ValueRankMap, Context);
+ RHS = LowerNegateToMultiply(cast<Instruction>(RHS), ValueRankMap);
RHSBO = isReassociableOp(RHS, Opcode);
}
}
@@ -373,7 +368,7 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,
// version of the value is returned, and BI is left pointing at the instruction
// that should be processed next by the reassociation pass.
//
-static Value *NegateValue(LLVMContext &Context, Value *V, Instruction *BI) {
+static Value *NegateValue(Value *V, Instruction *BI) {
// We are trying to expose opportunity for reassociation. One of the things
// that we want to do to achieve this is to push a negation as deep into an
// expression chain as possible, to expose the add instructions. In practice,
@@ -386,8 +381,8 @@ static Value *NegateValue(LLVMContext &Context, Value *V, Instruction *BI) {
if (Instruction *I = dyn_cast<Instruction>(V))
if (I->getOpcode() == Instruction::Add && I->hasOneUse()) {
// Push the negates through the add.
- I->setOperand(0, NegateValue(Context, I->getOperand(0), BI));
- I->setOperand(1, NegateValue(Context, I->getOperand(1), BI));
+ I->setOperand(0, NegateValue(I->getOperand(0), BI));
+ I->setOperand(1, NegateValue(I->getOperand(1), BI));
// We must move the add instruction here, because the neg instructions do
// not dominate the old add instruction in general. By moving it, we are
@@ -407,7 +402,7 @@ static Value *NegateValue(LLVMContext &Context, Value *V, Instruction *BI) {
/// ShouldBreakUpSubtract - Return true if we should break up this subtract of
/// X-Y into (X + -Y).
-static bool ShouldBreakUpSubtract(LLVMContext &Context, Instruction *Sub) {
+static bool ShouldBreakUpSubtract(Instruction *Sub) {
// If this is a negation, we can't split it up!
if (BinaryOperator::isNeg(Sub))
return false;
@@ -431,7 +426,7 @@ static bool ShouldBreakUpSubtract(LLVMContext &Context, Instruction *Sub) {
/// BreakUpSubtract - If we have (X-Y), and if either X is an add, or if this is
/// only used by an add, transform this into (X+(0-Y)) to promote better
/// reassociation.
-static Instruction *BreakUpSubtract(LLVMContext &Context, Instruction *Sub,
+static Instruction *BreakUpSubtract(Instruction *Sub,
std::map<AssertingVH<>, unsigned> &ValueRankMap) {
// Convert a subtract into an add and a neg instruction... so that sub
// instructions can be commuted with other add instructions...
@@ -439,7 +434,7 @@ static Instruction *BreakUpSubtract(LLVMContext &Context, Instruction *Sub,
// Calculate the negative value of Operand 1 of the sub instruction...
// and set it as the RHS of the add instruction we just made...
//
- Value *NegVal = NegateValue(Context, Sub->getOperand(1), Sub);
+ Value *NegVal = NegateValue(Sub->getOperand(1), Sub);
Instruction *New =
BinaryOperator::CreateAdd(Sub->getOperand(0), NegVal, "", Sub);
New->takeName(Sub);
@@ -457,8 +452,7 @@ static Instruction *BreakUpSubtract(LLVMContext &Context, Instruction *Sub,
/// by one, change this into a multiply by a constant to assist with further
/// reassociation.
static Instruction *ConvertShiftToMul(Instruction *Shl,
- std::map<AssertingVH<>, unsigned> &ValueRankMap,
- LLVMContext &Context) {
+ std::map<AssertingVH<>, unsigned> &ValueRankMap) {
// If an operand of this shift is a reassociable multiply, or if the shift
// is used by a reassociable multiply or add, turn into a multiply.
if (isReassociableOp(Shl->getOperand(0), Instruction::Mul) ||
@@ -781,13 +775,11 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I,
/// ReassociateBB - Inspect all of the instructions in this basic block,
/// reassociating them as we go.
void Reassociate::ReassociateBB(BasicBlock *BB) {
- LLVMContext &Context = BB->getContext();
-
for (BasicBlock::iterator BBI = BB->begin(); BBI != BB->end(); ) {
Instruction *BI = BBI++;
if (BI->getOpcode() == Instruction::Shl &&
isa<ConstantInt>(BI->getOperand(1)))
- if (Instruction *NI = ConvertShiftToMul(BI, ValueRankMap, Context)) {
+ if (Instruction *NI = ConvertShiftToMul(BI, ValueRankMap)) {
MadeChange = true;
BI = NI;
}
@@ -800,8 +792,8 @@ void Reassociate::ReassociateBB(BasicBlock *BB) {
// If this is a subtract instruction which is not already in negate form,
// see if we can convert it to X+-Y.
if (BI->getOpcode() == Instruction::Sub) {
- if (ShouldBreakUpSubtract(Context, BI)) {
- BI = BreakUpSubtract(Context, BI, ValueRankMap);
+ if (ShouldBreakUpSubtract(BI)) {
+ BI = BreakUpSubtract(BI, ValueRankMap);
MadeChange = true;
} else if (BinaryOperator::isNeg(BI)) {
// Otherwise, this is a negation. See if the operand is a multiply tree
@@ -809,7 +801,7 @@ void Reassociate::ReassociateBB(BasicBlock *BB) {
if (isReassociableOp(BI->getOperand(1), Instruction::Mul) &&
(!BI->hasOneUse() ||
!isReassociableOp(BI->use_back(), Instruction::Mul))) {
- BI = LowerNegateToMultiply(BI, ValueRankMap, Context);
+ BI = LowerNegateToMultiply(BI, ValueRankMap);
MadeChange = true;
}
}
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 509a6db..c202a2c 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -795,9 +795,14 @@ void SCCPSolver::visitExtractValueInst(ExtractValueInst &EVI) {
return markOverdefined(&EVI);
Value *AggVal = EVI.getAggregateOperand();
- unsigned i = *EVI.idx_begin();
- LatticeVal EltVal = getStructValueState(AggVal, i);
- mergeInValue(getValueState(&EVI), &EVI, EltVal);
+ if (isa<StructType>(AggVal->getType())) {
+ unsigned i = *EVI.idx_begin();
+ LatticeVal EltVal = getStructValueState(AggVal, i);
+ mergeInValue(getValueState(&EVI), &EVI, EltVal);
+ } else {
+ // Otherwise, must be extracting from an array.
+ return markOverdefined(&EVI);
+ }
}
void SCCPSolver::visitInsertValueInst(InsertValueInst &IVI) {
diff --git a/lib/Transforms/Scalar/SCCVN.cpp b/lib/Transforms/Scalar/SCCVN.cpp
index c047fca..001267a 100644
--- a/lib/Transforms/Scalar/SCCVN.cpp
+++ b/lib/Transforms/Scalar/SCCVN.cpp
@@ -507,7 +507,7 @@ void ValueTable::erase(Value *V) {
/// verifyRemoved - Verify that the value is removed from all internal data
/// structures.
void ValueTable::verifyRemoved(const Value *V) const {
- for (DenseMap<Value*, uint32_t>::iterator
+ for (DenseMap<Value*, uint32_t>::const_iterator
I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) {
assert(I->first != V && "Inst still occurs in value numbering map!");
}
@@ -629,9 +629,6 @@ bool SCCVN::runOnFunction(Function& F) {
}
}
- // FIXME: This code is commented out for now, because it can lead to the
- // insertion of a lot of redundant PHIs being inserted by SSAUpdater.
-#if 0
// Perform a forward data-flow to compute availability at all points on
// the CFG.
do {
@@ -709,7 +706,6 @@ bool SCCVN::runOnFunction(Function& F) {
CurInst->eraseFromParent();
}
}
-#endif
VT.clear();
for (DenseMap<BasicBlock*, ValueNumberScope*>::iterator
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index 5669da0..b54565c 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -26,10 +26,6 @@ void LLVMAddCFGSimplificationPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createCFGSimplificationPass());
}
-void LLVMAddCondPropagationPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createCondPropagationPass());
-}
-
void LLVMAddDeadStoreEliminationPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createDeadStoreEliminationPass());
}
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index 575c93b..611505e 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -100,7 +100,7 @@ public:
/// EmitPutChar - Emit a call to the putchar function. This assumes that Char
/// is an integer.
- void EmitPutChar(Value *Char, IRBuilder<> &B);
+ Value *EmitPutChar(Value *Char, IRBuilder<> &B);
/// EmitPutS - Emit a call to the puts function. This assumes that Str is
/// some pointer.
@@ -252,18 +252,20 @@ Value *LibCallOptimization::EmitUnaryFloatFnCall(Value *Op, const char *Name,
/// EmitPutChar - Emit a call to the putchar function. This assumes that Char
/// is an integer.
-void LibCallOptimization::EmitPutChar(Value *Char, IRBuilder<> &B) {
+Value *LibCallOptimization::EmitPutChar(Value *Char, IRBuilder<> &B) {
Module *M = Caller->getParent();
Value *PutChar = M->getOrInsertFunction("putchar", Type::getInt32Ty(*Context),
Type::getInt32Ty(*Context), NULL);
CallInst *CI = B.CreateCall(PutChar,
B.CreateIntCast(Char,
Type::getInt32Ty(*Context),
+ /*isSigned*/true,
"chari"),
"putchar");
if (const Function *F = dyn_cast<Function>(PutChar->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
+ return CI;
}
/// EmitPutS - Emit a call to the puts function. This assumes that Str is
@@ -302,7 +304,8 @@ void LibCallOptimization::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B) {
Type::getInt32Ty(*Context),
Type::getInt32Ty(*Context),
File->getType(), NULL);
- Char = B.CreateIntCast(Char, Type::getInt32Ty(*Context), "chari");
+ Char = B.CreateIntCast(Char, Type::getInt32Ty(*Context), /*isSigned*/true,
+ "chari");
CallInst *CI = B.CreateCall2(F, Char, File, "fputc");
if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
@@ -955,6 +958,17 @@ struct MemCmpOpt : public LibCallOptimization {
return B.CreateZExt(B.CreateXor(LHSV, RHSV, "shortdiff"), CI->getType());
}
+ // Constant folding: memcmp(x, y, l) -> cnst (all arguments are constant)
+ std::string LHSStr, RHSStr;
+ if (GetConstantStringInfo(LHS, LHSStr) &&
+ GetConstantStringInfo(RHS, RHSStr)) {
+ // Make sure we're not reading out-of-bounds memory.
+ if (Len > LHSStr.length() || Len > RHSStr.length())
+ return 0;
+ uint64_t Ret = memcmp(LHSStr.data(), RHSStr.data(), Len);
+ return ConstantInt::get(CI->getType(), Ret);
+ }
+
return 0;
}
};
@@ -1314,11 +1328,13 @@ struct PrintFOpt : public LibCallOptimization {
return CI->use_empty() ? (Value*)CI :
ConstantInt::get(CI->getType(), 0);
- // printf("x") -> putchar('x'), even for '%'.
+ // printf("x") -> putchar('x'), even for '%'. Return the result of putchar
+ // in case there is an error writing to stdout.
if (FormatStr.size() == 1) {
- EmitPutChar(ConstantInt::get(Type::getInt32Ty(*Context), FormatStr[0]), B);
- return CI->use_empty() ? (Value*)CI :
- ConstantInt::get(CI->getType(), 1);
+ Value *Res = EmitPutChar(ConstantInt::get(Type::getInt32Ty(*Context),
+ FormatStr[0]), B);
+ if (CI->use_empty()) return CI;
+ return B.CreateIntCast(Res, CI->getType(), true);
}
// printf("foo\n") --> puts("foo")
@@ -1339,9 +1355,10 @@ struct PrintFOpt : public LibCallOptimization {
// printf("%c", chr) --> putchar(*(i8*)dst)
if (FormatStr == "%c" && CI->getNumOperands() > 2 &&
isa<IntegerType>(CI->getOperand(2)->getType())) {
- EmitPutChar(CI->getOperand(2), B);
- return CI->use_empty() ? (Value*)CI :
- ConstantInt::get(CI->getType(), 1);
+ Value *Res = EmitPutChar(CI->getOperand(2), B);
+
+ if (CI->use_empty()) return CI;
+ return B.CreateIntCast(Res, CI->getType(), true);
}
// printf("%s\n", str) --> puts(str)
@@ -2479,10 +2496,6 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
// lround, lroundf, lroundl:
// * lround(cnst) -> cnst'
//
-// memcmp:
-// * memcmp(x,y,l) -> cnst
-// (if all arguments are constant and strlen(x) <= l and strlen(y) <= l)
-//
// pow, powf, powl:
// * pow(exp(x),y) -> exp(x*y)
// * pow(sqrt(x),y) -> pow(x,y*0.5)
diff --git a/lib/Transforms/Scalar/TailDuplication.cpp b/lib/Transforms/Scalar/TailDuplication.cpp
index 4864e23..b06ae3d 100644
--- a/lib/Transforms/Scalar/TailDuplication.cpp
+++ b/lib/Transforms/Scalar/TailDuplication.cpp
@@ -359,8 +359,7 @@ void TailDup::eliminateUnconditionalBranch(BranchInst *Branch) {
Instruction *Inst = BI++;
if (isInstructionTriviallyDead(Inst))
Inst->eraseFromParent();
- else if (Constant *C = ConstantFoldInstruction(Inst,
- Inst->getContext())) {
+ else if (Constant *C = ConstantFoldInstruction(Inst)) {
Inst->replaceAllUsesWith(C);
Inst->eraseFromParent();
}
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp
index b56e170..4119cb9 100644
--- a/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -25,7 +25,7 @@
// unlikely, that the return returns something else (like constant 0), and
// can still be TRE'd. It can be TRE'd if ALL OTHER return instructions in
// the function return the exact same value.
-// 4. If it can prove that callees do not access theier caller stack frame,
+// 4. If it can prove that callees do not access their caller stack frame,
// they are marked as eligible for tail call elimination (by the code
// generator).
//
@@ -58,6 +58,7 @@
#include "llvm/Function.h"
#include "llvm/Instructions.h"
#include "llvm/Pass.h"
+#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Support/CFG.h"
#include "llvm/ADT/Statistic.h"
using namespace llvm;
@@ -75,7 +76,7 @@ namespace {
private:
bool ProcessReturningBlock(ReturnInst *RI, BasicBlock *&OldEntry,
bool &TailCallsAreMarkedTail,
- std::vector<PHINode*> &ArgumentPHIs,
+ SmallVector<PHINode*, 8> &ArgumentPHIs,
bool CannotTailCallElimCallsMarkedTail);
bool CanMoveAboveCall(Instruction *I, CallInst *CI);
Value *CanTransformAccumulatorRecursion(Instruction *I, CallInst *CI);
@@ -90,7 +91,6 @@ FunctionPass *llvm::createTailCallEliminationPass() {
return new TailCallElim();
}
-
/// AllocaMightEscapeToCalls - Return true if this alloca may be accessed by
/// callees of this function. We only do very simple analysis right now, this
/// could be expanded in the future to use mod/ref information for particular
@@ -100,7 +100,7 @@ static bool AllocaMightEscapeToCalls(AllocaInst *AI) {
return true;
}
-/// FunctionContainsAllocas - Scan the specified basic block for alloca
+/// CheckForEscapingAllocas - Scan the specified basic block for alloca
/// instructions. If it contains any that might be accessed by calls, return
/// true.
static bool CheckForEscapingAllocas(BasicBlock *BB,
@@ -127,7 +127,7 @@ bool TailCallElim::runOnFunction(Function &F) {
BasicBlock *OldEntry = 0;
bool TailCallsAreMarkedTail = false;
- std::vector<PHINode*> ArgumentPHIs;
+ SmallVector<PHINode*, 8> ArgumentPHIs;
bool MadeChange = false;
bool FunctionContainsEscapingAllocas = false;
@@ -154,7 +154,6 @@ bool TailCallElim::runOnFunction(Function &F) {
/// happen. This bug is PR962.
if (FunctionContainsEscapingAllocas)
return false;
-
// Second pass, change any tail calls to loops.
for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
@@ -204,7 +203,7 @@ bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) {
if (I->mayHaveSideEffects()) // This also handles volatile loads.
return false;
- if (LoadInst* L = dyn_cast<LoadInst>(I)) {
+ if (LoadInst *L = dyn_cast<LoadInst>(I)) {
// Loads may always be moved above calls without side effects.
if (CI->mayHaveSideEffects()) {
// Non-volatile loads may be moved above a call with side effects if it
@@ -235,7 +234,7 @@ bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) {
// We currently handle static constants and arguments that are not modified as
// part of the recursion.
//
-static bool isDynamicConstant(Value *V, CallInst *CI) {
+static bool isDynamicConstant(Value *V, CallInst *CI, ReturnInst *RI) {
if (isa<Constant>(V)) return true; // Static constants are always dyn consts
// Check to see if this is an immutable argument, if so, the value
@@ -253,6 +252,15 @@ static bool isDynamicConstant(Value *V, CallInst *CI) {
if (CI->getOperand(ArgNo+1) == Arg)
return true;
}
+
+ // Switch cases are always constant integers. If the value is being switched
+ // on and the return is only reachable from one of its cases, it's
+ // effectively constant.
+ if (BasicBlock *UniquePred = RI->getParent()->getUniquePredecessor())
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(UniquePred->getTerminator()))
+ if (SI->getCondition() == V)
+ return SI->getDefaultDest() != RI->getParent();
+
// Not a constant or immutable argument, we can't safely transform.
return false;
}
@@ -265,10 +273,6 @@ static Value *getCommonReturnValue(ReturnInst *TheRI, CallInst *CI) {
Function *F = TheRI->getParent()->getParent();
Value *ReturnedValue = 0;
- // TODO: Handle multiple value ret instructions;
- if (isa<StructType>(F->getReturnType()))
- return 0;
-
for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI)
if (ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator()))
if (RI != TheRI) {
@@ -278,7 +282,7 @@ static Value *getCommonReturnValue(ReturnInst *TheRI, CallInst *CI) {
// evaluatable at the start of the initial invocation of the function,
// instead of at the end of the evaluation.
//
- if (!isDynamicConstant(RetOp, CI))
+ if (!isDynamicConstant(RetOp, CI, RI))
return 0;
if (ReturnedValue && RetOp != ReturnedValue)
@@ -315,7 +319,7 @@ Value *TailCallElim::CanTransformAccumulatorRecursion(Instruction *I,
bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
bool &TailCallsAreMarkedTail,
- std::vector<PHINode*> &ArgumentPHIs,
+ SmallVector<PHINode*, 8> &ArgumentPHIs,
bool CannotTailCallElimCallsMarkedTail) {
BasicBlock *BB = Ret->getParent();
Function *F = BB->getParent();
OpenPOWER on IntegriCloud