summaryrefslogtreecommitdiffstats
path: root/lib/Transforms/Scalar
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/Scalar')
-rw-r--r--lib/Transforms/Scalar/ABCD.cpp6
-rw-r--r--lib/Transforms/Scalar/ADCE.cpp2
-rw-r--r--lib/Transforms/Scalar/CodeGenPrepare.cpp8
-rw-r--r--lib/Transforms/Scalar/DeadStoreElimination.cpp59
-rw-r--r--lib/Transforms/Scalar/GVN.cpp61
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp16
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp73
-rw-r--r--lib/Transforms/Scalar/LoopDeletion.cpp6
-rw-r--r--lib/Transforms/Scalar/LoopIndexSplit.cpp28
-rw-r--r--lib/Transforms/Scalar/LoopRotation.cpp4
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp143
-rw-r--r--lib/Transforms/Scalar/LoopUnswitch.cpp28
-rw-r--r--lib/Transforms/Scalar/MemCpyOptimizer.cpp4
-rw-r--r--lib/Transforms/Scalar/Reassociate.cpp5
-rw-r--r--lib/Transforms/Scalar/ScalarReplAggregates.cpp53
-rw-r--r--lib/Transforms/Scalar/SimplifyCFGPass.cpp3
-rw-r--r--lib/Transforms/Scalar/SimplifyLibCalls.cpp213
-rw-r--r--lib/Transforms/Scalar/TailDuplication.cpp5
-rw-r--r--lib/Transforms/Scalar/TailRecursionElimination.cpp96
19 files changed, 495 insertions, 318 deletions
diff --git a/lib/Transforms/Scalar/ABCD.cpp b/lib/Transforms/Scalar/ABCD.cpp
index 6135992..dcf14a6 100644
--- a/lib/Transforms/Scalar/ABCD.cpp
+++ b/lib/Transforms/Scalar/ABCD.cpp
@@ -230,7 +230,7 @@ class ABCD : public FunctionPass {
DenseMapIterator<Value*, MemoizedResultChart> begin = map.begin();
DenseMapIterator<Value*, MemoizedResultChart> end = map.end();
for (; begin != end; ++begin) {
- begin->second.clear();
+ begin->second.clear();
}
map.clear();
}
@@ -396,8 +396,8 @@ class ABCD : public FunctionPass {
/// this case the method returns true, otherwise false. It also obtains the
/// Instruction and ConstantInt from the BinaryOperator and returns it.
bool createBinaryOperatorInfo(BinaryOperator *BO, Instruction **I1,
- Instruction **I2, ConstantInt **C1,
- ConstantInt **C2);
+ Instruction **I2, ConstantInt **C1,
+ ConstantInt **C2);
/// This method creates a constraint between a Sigma and an Instruction.
/// These constraints are created as soon as we find a comparator that uses a
diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp
index 5a49841..2d19467 100644
--- a/lib/Transforms/Scalar/ADCE.cpp
+++ b/lib/Transforms/Scalar/ADCE.cpp
@@ -83,7 +83,7 @@ bool ADCE::runOnFunction(Function& F) {
for (SmallVector<Instruction*, 1024>::iterator I = worklist.begin(),
E = worklist.end(); I != E; ++I) {
- NumRemoved++;
+ ++NumRemoved;
(*I)->eraseFromParent();
}
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 93e9bfb..272066c 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -548,7 +548,8 @@ protected:
CI->eraseFromParent();
}
bool isFoldable(unsigned SizeCIOp, unsigned, bool) const {
- if (ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(SizeCIOp)))
+ if (ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp
+ - CallInst::ArgOffset)))
return SizeCI->isAllOnesValue();
return false;
}
@@ -559,7 +560,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
// Lower all uses of llvm.objectsize.*
IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
if (II && II->getIntrinsicID() == Intrinsic::objectsize) {
- bool Min = (cast<ConstantInt>(II->getOperand(2))->getZExtValue() == 1);
+ bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1);
const Type *ReturnTy = CI->getType();
Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
CI->replaceAllUsesWith(RetVal);
@@ -759,8 +760,7 @@ bool CodeGenPrepare::OptimizeInlineAsmInst(Instruction *I, CallSite CS,
}
// Compute the constraint code and ConstraintType to use.
- TLI->ComputeConstraintToUse(OpInfo, SDValue(),
- OpInfo.ConstraintType == TargetLowering::C_Memory);
+ TLI->ComputeConstraintToUse(OpInfo, SDValue());
if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
OpInfo.isIndirect) {
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 09c01d3..e047e4f 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -56,7 +56,8 @@ namespace {
}
bool runOnBasicBlock(BasicBlock &BB);
- bool handleFreeWithNonTrivialDependency(Instruction *F, MemDepResult Dep);
+ bool handleFreeWithNonTrivialDependency(const CallInst *F,
+ MemDepResult Dep);
bool handleEndBlock(BasicBlock &BB);
bool RemoveUndeadPointers(Value *Ptr, uint64_t killPointerSize,
BasicBlock::iterator &BBI,
@@ -73,7 +74,6 @@ namespace {
AU.addRequired<AliasAnalysis>();
AU.addRequired<MemoryDependenceAnalysis>();
AU.addPreserved<DominatorTree>();
- AU.addPreserved<AliasAnalysis>();
AU.addPreserved<MemoryDependenceAnalysis>();
}
@@ -123,14 +123,15 @@ static Value *getPointerOperand(Instruction *I) {
if (StoreInst *SI = dyn_cast<StoreInst>(I))
return SI->getPointerOperand();
if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I))
- return MI->getOperand(1);
-
- switch (cast<IntrinsicInst>(I)->getIntrinsicID()) {
+ return MI->getArgOperand(0);
+
+ IntrinsicInst *II = cast<IntrinsicInst>(I);
+ switch (II->getIntrinsicID()) {
default: assert(false && "Unexpected intrinsic!");
case Intrinsic::init_trampoline:
- return I->getOperand(1);
+ return II->getArgOperand(0);
case Intrinsic::lifetime_end:
- return I->getOperand(2);
+ return II->getArgOperand(1);
}
}
@@ -147,12 +148,13 @@ static unsigned getStoreSize(Instruction *I, const TargetData *TD) {
if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) {
Len = MI->getLength();
} else {
- switch (cast<IntrinsicInst>(I)->getIntrinsicID()) {
+ IntrinsicInst *II = cast<IntrinsicInst>(I);
+ switch (II->getIntrinsicID()) {
default: assert(false && "Unexpected intrinsic!");
case Intrinsic::init_trampoline:
return -1u;
case Intrinsic::lifetime_end:
- Len = I->getOperand(1);
+ Len = II->getArgOperand(0);
break;
}
}
@@ -201,8 +203,8 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
if (InstDep.isNonLocal()) continue;
// Handle frees whose dependencies are non-trivial.
- if (isFreeCall(Inst)) {
- MadeChange |= handleFreeWithNonTrivialDependency(Inst, InstDep);
+ if (const CallInst *F = isFreeCall(Inst)) {
+ MadeChange |= handleFreeWithNonTrivialDependency(F, InstDep);
continue;
}
@@ -218,7 +220,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
isElidable(DepStore)) {
// Delete the store and now-dead instructions that feed it.
DeleteDeadInstruction(DepStore);
- NumFastStores++;
+ ++NumFastStores;
MadeChange = true;
// DeleteDeadInstruction can delete the current instruction in loop
@@ -249,7 +251,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
BBI = BB.begin();
else if (BBI != BB.begin()) // Revisit this instruction if possible.
--BBI;
- NumFastStores++;
+ ++NumFastStores;
MadeChange = true;
continue;
}
@@ -270,7 +272,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
BBI = BB.begin();
else if (BBI != BB.begin()) // Revisit this instruction if possible.
--BBI;
- NumFastStores++;
+ ++NumFastStores;
MadeChange = true;
continue;
}
@@ -287,7 +289,8 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
/// handleFreeWithNonTrivialDependency - Handle frees of entire structures whose
/// dependency is a store to a field of that structure.
-bool DSE::handleFreeWithNonTrivialDependency(Instruction *F, MemDepResult Dep) {
+bool DSE::handleFreeWithNonTrivialDependency(const CallInst *F,
+ MemDepResult Dep) {
AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
Instruction *Dependency = Dep.getInst();
@@ -297,13 +300,13 @@ bool DSE::handleFreeWithNonTrivialDependency(Instruction *F, MemDepResult Dep) {
Value *DepPointer = getPointerOperand(Dependency)->getUnderlyingObject();
// Check for aliasing.
- if (AA.alias(F->getOperand(1), 1, DepPointer, 1) !=
+ if (AA.alias(F->getArgOperand(0), 1, DepPointer, 1) !=
AliasAnalysis::MustAlias)
return false;
// DCE instructions only used to calculate that store
DeleteDeadInstruction(Dependency);
- NumFastStores++;
+ ++NumFastStores;
return true;
}
@@ -349,9 +352,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
if (deadPointers.count(pointerOperand)) {
// DCE instructions only used to calculate that store.
Instruction *Dead = BBI;
- BBI++;
+ ++BBI;
DeleteDeadInstruction(Dead, &deadPointers);
- NumFastStores++;
+ ++NumFastStores;
MadeChange = true;
continue;
}
@@ -371,9 +374,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
// However, if this load is unused and not volatile, we can go ahead and
// remove it, and not have to worry about it making our pointer undead!
if (L->use_empty() && !L->isVolatile()) {
- BBI++;
+ ++BBI;
DeleteDeadInstruction(L, &deadPointers);
- NumFastOther++;
+ ++NumFastOther;
MadeChange = true;
continue;
}
@@ -391,9 +394,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
// Dead alloca's can be DCE'd when we reach them
if (A->use_empty()) {
- BBI++;
+ ++BBI;
DeleteDeadInstruction(A, &deadPointers);
- NumFastOther++;
+ ++NumFastOther;
MadeChange = true;
}
@@ -426,9 +429,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
getPointerSize(*I));
if (A == AliasAnalysis::ModRef)
- modRef++;
+ ++modRef;
else
- other++;
+ ++other;
if (A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref)
dead.push_back(*I);
@@ -442,9 +445,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
} else if (isInstructionTriviallyDead(BBI)) {
// For any non-memory-affecting non-terminators, DCE them as we reach them
Instruction *Inst = BBI;
- BBI++;
+ ++BBI;
DeleteDeadInstruction(Inst, &deadPointers);
- NumFastOther++;
+ ++NumFastOther;
MadeChange = true;
continue;
}
@@ -497,7 +500,7 @@ bool DSE::RemoveUndeadPointers(Value *killPointer, uint64_t killPointerSize,
// Remove it!
++BBI;
DeleteDeadInstruction(S, &deadPointers);
- NumFastStores++;
+ ++NumFastStores;
MadeChange = true;
continue;
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index ca8ab49..88b6776 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -35,6 +35,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/PHITransAddr.h"
@@ -271,7 +272,8 @@ Expression ValueTable::create_expression(CallInst* C) {
e.function = C->getCalledFunction();
e.opcode = Expression::CALL;
- for (CallInst::op_iterator I = C->op_begin()+1, E = C->op_end();
+ CallSite CS(C);
+ for (CallInst::op_iterator I = CS.arg_begin(), E = CS.arg_end();
I != E; ++I)
e.varargs.push_back(lookup_or_add(*I));
@@ -447,14 +449,14 @@ uint32_t ValueTable::lookup_or_add_call(CallInst* C) {
if (local_dep.isDef()) {
CallInst* local_cdep = cast<CallInst>(local_dep.getInst());
- if (local_cdep->getNumOperands() != C->getNumOperands()) {
+ if (local_cdep->getNumArgOperands() != C->getNumArgOperands()) {
valueNumbering[C] = nextValueNumber;
return nextValueNumber++;
}
- for (unsigned i = 1; i < C->getNumOperands(); ++i) {
- uint32_t c_vn = lookup_or_add(C->getOperand(i));
- uint32_t cd_vn = lookup_or_add(local_cdep->getOperand(i));
+ for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) {
+ uint32_t c_vn = lookup_or_add(C->getArgOperand(i));
+ uint32_t cd_vn = lookup_or_add(local_cdep->getArgOperand(i));
if (c_vn != cd_vn) {
valueNumbering[C] = nextValueNumber;
return nextValueNumber++;
@@ -504,13 +506,13 @@ uint32_t ValueTable::lookup_or_add_call(CallInst* C) {
return nextValueNumber++;
}
- if (cdep->getNumOperands() != C->getNumOperands()) {
+ if (cdep->getNumArgOperands() != C->getNumArgOperands()) {
valueNumbering[C] = nextValueNumber;
return nextValueNumber++;
}
- for (unsigned i = 1; i < C->getNumOperands(); ++i) {
- uint32_t c_vn = lookup_or_add(C->getOperand(i));
- uint32_t cd_vn = lookup_or_add(cdep->getOperand(i));
+ for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) {
+ uint32_t c_vn = lookup_or_add(C->getArgOperand(i));
+ uint32_t cd_vn = lookup_or_add(cdep->getArgOperand(i));
if (c_vn != cd_vn) {
valueNumbering[C] = nextValueNumber;
return nextValueNumber++;
@@ -1500,7 +1502,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
MD->invalidateCachedPointerInfo(V);
VN.erase(LI);
toErase.push_back(LI);
- NumGVNLoad++;
+ ++NumGVNLoad;
return true;
}
@@ -1723,7 +1725,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
MD->invalidateCachedPointerInfo(V);
VN.erase(LI);
toErase.push_back(LI);
- NumPRELoad++;
+ ++NumPRELoad;
return true;
}
@@ -1784,7 +1786,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
MD->invalidateCachedPointerInfo(AvailVal);
VN.erase(L);
toErase.push_back(L);
- NumGVNLoad++;
+ ++NumGVNLoad;
return true;
}
@@ -1830,7 +1832,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
MD->invalidateCachedPointerInfo(StoredVal);
VN.erase(L);
toErase.push_back(L);
- NumGVNLoad++;
+ ++NumGVNLoad;
return true;
}
@@ -1860,7 +1862,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
MD->invalidateCachedPointerInfo(DepLI);
VN.erase(L);
toErase.push_back(L);
- NumGVNLoad++;
+ ++NumGVNLoad;
return true;
}
@@ -1871,7 +1873,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
L->replaceAllUsesWith(UndefValue::get(L->getType()));
VN.erase(L);
toErase.push_back(L);
- NumGVNLoad++;
+ ++NumGVNLoad;
return true;
}
@@ -1882,7 +1884,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
L->replaceAllUsesWith(UndefValue::get(L->getType()));
VN.erase(L);
toErase.push_back(L);
- NumGVNLoad++;
+ ++NumGVNLoad;
return true;
}
}
@@ -2014,7 +2016,7 @@ bool GVN::runOnFunction(Function& F) {
BasicBlock *BB = FI;
++FI;
bool removedBlock = MergeBlockIntoPredecessor(BB, this);
- if (removedBlock) NumGVNBlocks++;
+ if (removedBlock) ++NumGVNBlocks;
Changed |= removedBlock;
}
@@ -2126,27 +2128,28 @@ bool GVN::performPRE(Function &F) {
for (pred_iterator PI = pred_begin(CurrentBlock),
PE = pred_end(CurrentBlock); PI != PE; ++PI) {
+ BasicBlock *P = *PI;
// We're not interested in PRE where the block is its
// own predecessor, or in blocks with predecessors
// that are not reachable.
- if (*PI == CurrentBlock) {
+ if (P == CurrentBlock) {
NumWithout = 2;
break;
- } else if (!localAvail.count(*PI)) {
+ } else if (!localAvail.count(P)) {
NumWithout = 2;
break;
}
DenseMap<uint32_t, Value*>::iterator predV =
- localAvail[*PI]->table.find(ValNo);
- if (predV == localAvail[*PI]->table.end()) {
- PREPred = *PI;
- NumWithout++;
+ localAvail[P]->table.find(ValNo);
+ if (predV == localAvail[P]->table.end()) {
+ PREPred = P;
+ ++NumWithout;
} else if (predV->second == CurInst) {
NumWithout = 2;
} else {
- predMap[*PI] = predV->second;
- NumWith++;
+ predMap[P] = predV->second;
+ ++NumWith;
}
}
@@ -2201,7 +2204,7 @@ bool GVN::performPRE(Function &F) {
PREInstr->setName(CurInst->getName() + ".pre");
predMap[PREPred] = PREInstr;
VN.add(PREInstr, ValNo);
- NumGVNPRE++;
+ ++NumGVNPRE;
// Update the availability map to include the new instruction.
localAvail[PREPred]->table.insert(std::make_pair(ValNo, PREInstr));
@@ -2211,8 +2214,10 @@ bool GVN::performPRE(Function &F) {
CurInst->getName() + ".pre-phi",
CurrentBlock->begin());
for (pred_iterator PI = pred_begin(CurrentBlock),
- PE = pred_end(CurrentBlock); PI != PE; ++PI)
- Phi->addIncoming(predMap[*PI], *PI);
+ PE = pred_end(CurrentBlock); PI != PE; ++PI) {
+ BasicBlock *P = *PI;
+ Phi->addIncoming(predMap[P], P);
+ }
VN.add(Phi, ValNo);
localAvail[CurrentBlock]->table[ValNo] = Phi;
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 36bea67..b5c9dd8 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -467,6 +467,17 @@ void IndVarSimplify::EliminateIVRemainders() {
}
bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
+ // If LoopSimplify form is not available, stay out of trouble. Some notes:
+ // - LSR currently only supports LoopSimplify-form loops. Indvars'
+ // canonicalization can be a pessimization without LSR to "clean up"
+ // afterwards.
+ // - We depend on having a preheader; in particular,
+ // Loop::getCanonicalInductionVariable only supports loops with preheaders,
+ // and we're in trouble if we can't find the induction variable even when
+ // we've manually inserted one.
+ if (!L->isLoopSimplifyForm())
+ return false;
+
IU = &getAnalysis<IVUsers>();
LI = &getAnalysis<LoopInfo>();
SE = &getAnalysis<ScalarEvolution>();
@@ -760,8 +771,9 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
bool UsedInLoop = false;
for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
UI != UE; ++UI) {
- BasicBlock *UseBB = cast<Instruction>(UI)->getParent();
- if (PHINode *P = dyn_cast<PHINode>(UI)) {
+ User *U = *UI;
+ BasicBlock *UseBB = cast<Instruction>(U)->getParent();
+ if (PHINode *P = dyn_cast<PHINode>(U)) {
unsigned i =
PHINode::getIncomingValueNumForOperand(UI.getOperandNo());
UseBB = P->getIncomingBlock(i);
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index df05b71..edce14c 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -18,6 +18,7 @@
#include "llvm/Pass.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LazyValueInfo.h"
+#include "llvm/Analysis/Loads.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
@@ -288,14 +289,15 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
// Perhaps getConstantOnEdge should be smart enough to do this?
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ BasicBlock *P = *PI;
// If the value is known by LazyValueInfo to be a constant in a
// predecessor, use that information to try to thread this block.
- Constant *PredCst = LVI->getConstantOnEdge(V, *PI, BB);
+ Constant *PredCst = LVI->getConstantOnEdge(V, P, BB);
if (PredCst == 0 ||
(!isa<ConstantInt>(PredCst) && !isa<UndefValue>(PredCst)))
continue;
- Result.push_back(std::make_pair(dyn_cast<ConstantInt>(PredCst), *PI));
+ Result.push_back(std::make_pair(dyn_cast<ConstantInt>(PredCst), P));
}
return !Result.empty();
@@ -345,8 +347,19 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
}
for (unsigned i = 0, e = RHSVals.size(); i != e; ++i)
if (RHSVals[i].first == InterestingVal || RHSVals[i].first == 0) {
- Result.push_back(RHSVals[i]);
- Result.back().first = InterestingVal;
+ // If we already inferred a value for this block on the LHS, don't
+ // re-add it.
+ bool HasValue = false;
+ for (unsigned r = 0, e = Result.size(); r != e; ++r)
+ if (Result[r].second == RHSVals[i].second) {
+ HasValue = true;
+ break;
+ }
+
+ if (!HasValue) {
+ Result.push_back(RHSVals[i]);
+ Result.back().first = InterestingVal;
+ }
}
return !Result.empty();
}
@@ -409,20 +422,21 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
(!isa<Instruction>(Cmp->getOperand(0)) ||
cast<Instruction>(Cmp->getOperand(0))->getParent() != BB)) {
Constant *RHSCst = cast<Constant>(Cmp->getOperand(1));
-
+
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ BasicBlock *P = *PI;
// If the value is known by LazyValueInfo to be a constant in a
// predecessor, use that information to try to thread this block.
LazyValueInfo::Tristate
Res = LVI->getPredicateOnEdge(Cmp->getPredicate(), Cmp->getOperand(0),
- RHSCst, *PI, BB);
+ RHSCst, P, BB);
if (Res == LazyValueInfo::Unknown)
continue;
Constant *ResC = ConstantInt::get(Cmp->getType(), Res);
- Result.push_back(std::make_pair(cast<ConstantInt>(ResC), *PI));
+ Result.push_back(std::make_pair(cast<ConstantInt>(ResC), P));
}
-
+
return !Result.empty();
}
}
@@ -538,18 +552,22 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
(CondInst == 0 || CondInst->getParent() != BB)) { // Non-local definition.
pred_iterator PI = pred_begin(BB), E = pred_end(BB);
if (isa<BranchInst>(BB->getTerminator())) {
- for (; PI != E; ++PI)
- if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
+ for (; PI != E; ++PI) {
+ BasicBlock *P = *PI;
+ if (BranchInst *PBI = dyn_cast<BranchInst>(P->getTerminator()))
if (PBI->isConditional() && PBI->getCondition() == Condition &&
- ProcessBranchOnDuplicateCond(*PI, BB))
+ ProcessBranchOnDuplicateCond(P, BB))
return true;
+ }
} else {
assert(isa<SwitchInst>(BB->getTerminator()) && "Unknown jump terminator");
- for (; PI != E; ++PI)
- if (SwitchInst *PSI = dyn_cast<SwitchInst>((*PI)->getTerminator()))
+ for (; PI != E; ++PI) {
+ BasicBlock *P = *PI;
+ if (SwitchInst *PSI = dyn_cast<SwitchInst>(P->getTerminator()))
if (PSI->getCondition() == Condition &&
- ProcessSwitchOnDuplicateCond(*PI, BB))
+ ProcessSwitchOnDuplicateCond(P, BB))
return true;
+ }
}
}
@@ -569,19 +587,21 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
// If we have a comparison, loop over the predecessors to see if there is
// a condition with a lexically identical value.
pred_iterator PI = pred_begin(BB), E = pred_end(BB);
- for (; PI != E; ++PI)
- if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
- if (PBI->isConditional() && *PI != BB) {
+ for (; PI != E; ++PI) {
+ BasicBlock *P = *PI;
+ if (BranchInst *PBI = dyn_cast<BranchInst>(P->getTerminator()))
+ if (PBI->isConditional() && P != BB) {
if (CmpInst *CI = dyn_cast<CmpInst>(PBI->getCondition())) {
if (CI->getOperand(0) == CondCmp->getOperand(0) &&
CI->getOperand(1) == CondCmp->getOperand(1) &&
CI->getPredicate() == CondCmp->getPredicate()) {
// TODO: Could handle things like (x != 4) --> (x == 17)
- if (ProcessBranchOnDuplicateCond(*PI, BB))
+ if (ProcessBranchOnDuplicateCond(P, BB))
return true;
}
}
}
+ }
}
}
@@ -869,9 +889,15 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
// Add all the unavailable predecessors to the PredsToSplit list.
for (pred_iterator PI = pred_begin(LoadBB), PE = pred_end(LoadBB);
- PI != PE; ++PI)
- if (!AvailablePredSet.count(*PI))
- PredsToSplit.push_back(*PI);
+ PI != PE; ++PI) {
+ BasicBlock *P = *PI;
+ // If the predecessor is an indirect goto, we can't split the edge.
+ if (isa<IndirectBrInst>(P->getTerminator()))
+ return false;
+
+ if (!AvailablePredSet.count(P))
+ PredsToSplit.push_back(P);
+ }
// Split them out to their own block.
UnavailablePred =
@@ -903,11 +929,12 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
// have multiple entries here.
for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB); PI != E;
++PI) {
+ BasicBlock *P = *PI;
AvailablePredsTy::iterator I =
std::lower_bound(AvailablePreds.begin(), AvailablePreds.end(),
- std::make_pair(*PI, (Value*)0));
+ std::make_pair(P, (Value*)0));
- assert(I != AvailablePreds.end() && I->first == *PI &&
+ assert(I != AvailablePreds.end() && I->first == P &&
"Didn't find entry for predecessor!");
PN->addIncoming(I->second, I->first);
diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp
index 48817ab..e4894e9 100644
--- a/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -83,7 +83,7 @@ bool LoopDeletion::IsLoopDead(Loop* L,
if (!L->makeLoopInvariant(I, Changed, Preheader->getTerminator()))
return false;
- BI++;
+ ++BI;
}
// Make sure that no instructions in the block have potential side-effects.
@@ -176,7 +176,7 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
BasicBlock::iterator BI = exitBlock->begin();
while (PHINode* P = dyn_cast<PHINode>(BI)) {
P->replaceUsesOfWith(exitingBlock, preheader);
- BI++;
+ ++BI;
}
// Update the dominator tree and remove the instructions and blocks that will
@@ -226,7 +226,7 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
LPM.deleteLoopFromQueue(L);
Changed = true;
- NumDeleted++;
+ ++NumDeleted;
return Changed;
}
diff --git a/lib/Transforms/Scalar/LoopIndexSplit.cpp b/lib/Transforms/Scalar/LoopIndexSplit.cpp
index 101ff5b..31058e5 100644
--- a/lib/Transforms/Scalar/LoopIndexSplit.cpp
+++ b/lib/Transforms/Scalar/LoopIndexSplit.cpp
@@ -649,7 +649,7 @@ bool LoopIndexSplit::updateLoopIterationSpace() {
}
}
}
- NumRestrictBounds++;
+ ++NumRestrictBounds;
return true;
}
@@ -958,11 +958,11 @@ bool LoopIndexSplit::splitLoop() {
continue;
for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
- BI != BE; ++BI) {
+ BI != BE; ++BI) {
Instruction *Inst = BI;
if (!Inst->isSafeToSpeculativelyExecute() && !isa<PHINode>(Inst)
- && !isa<BranchInst>(Inst) && !isa<DbgInfoIntrinsic>(Inst))
+ && !isa<BranchInst>(Inst) && !isa<DbgInfoIntrinsic>(Inst))
return false;
}
}
@@ -1016,13 +1016,13 @@ bool LoopIndexSplit::splitLoop() {
BSV = getMax(BSV, IVStartValue, Sign, PHTerm);
// [*] Clone Loop
- DenseMap<const Value *, Value *> ValueMap;
- Loop *BLoop = CloneLoop(L, LPM, LI, ValueMap, this);
+ ValueMap<const Value *, Value *> VMap;
+ Loop *BLoop = CloneLoop(L, LPM, LI, VMap, this);
Loop *ALoop = L;
// [*] ALoop's exiting edge enters BLoop's header.
// ALoop's original exit block becomes BLoop's exit block.
- PHINode *B_IndVar = cast<PHINode>(ValueMap[IndVar]);
+ PHINode *B_IndVar = cast<PHINode>(VMap[IndVar]);
BasicBlock *A_ExitingBlock = ExitCondition->getParent();
BranchInst *A_ExitInsn =
dyn_cast<BranchInst>(A_ExitingBlock->getTerminator());
@@ -1047,7 +1047,7 @@ bool LoopIndexSplit::splitLoop() {
for (BasicBlock::iterator BI = ALoop->getHeader()->begin(),
BE = ALoop->getHeader()->end(); BI != BE; ++BI) {
if (PHINode *PN = dyn_cast<PHINode>(BI)) {
- PHINode *PNClone = cast<PHINode>(ValueMap[PN]);
+ PHINode *PNClone = cast<PHINode>(VMap[PN]);
InverseMap[PNClone] = PN;
} else
break;
@@ -1085,11 +1085,11 @@ bool LoopIndexSplit::splitLoop() {
// block. Remove incoming PHINode values from ALoop's exiting block.
// Add new incoming values from BLoop's incoming exiting value.
// Update BLoop exit block's dominator info..
- BasicBlock *B_ExitingBlock = cast<BasicBlock>(ValueMap[A_ExitingBlock]);
+ BasicBlock *B_ExitingBlock = cast<BasicBlock>(VMap[A_ExitingBlock]);
for (BasicBlock::iterator BI = B_ExitBlock->begin(), BE = B_ExitBlock->end();
BI != BE; ++BI) {
if (PHINode *PN = dyn_cast<PHINode>(BI)) {
- PN->addIncoming(ValueMap[PN->getIncomingValueForBlock(A_ExitingBlock)],
+ PN->addIncoming(VMap[PN->getIncomingValueForBlock(A_ExitingBlock)],
B_ExitingBlock);
PN->removeIncomingValue(A_ExitingBlock);
} else
@@ -1131,7 +1131,7 @@ bool LoopIndexSplit::splitLoop() {
removeBlocks(A_InactiveBranch, L, A_ActiveBranch);
//[*] Eliminate split condition's inactive branch in from BLoop.
- BasicBlock *B_SplitCondBlock = cast<BasicBlock>(ValueMap[A_SplitCondBlock]);
+ BasicBlock *B_SplitCondBlock = cast<BasicBlock>(VMap[A_SplitCondBlock]);
BranchInst *B_BR = cast<BranchInst>(B_SplitCondBlock->getTerminator());
BasicBlock *B_InactiveBranch = NULL;
BasicBlock *B_ActiveBranch = NULL;
@@ -1146,9 +1146,9 @@ bool LoopIndexSplit::splitLoop() {
//[*] Move exit condition into split condition block to avoid
// executing dead loop iteration.
- ICmpInst *B_ExitCondition = cast<ICmpInst>(ValueMap[ExitCondition]);
- Instruction *B_IndVarIncrement = cast<Instruction>(ValueMap[IVIncrement]);
- ICmpInst *B_SplitCondition = cast<ICmpInst>(ValueMap[SplitCondition]);
+ ICmpInst *B_ExitCondition = cast<ICmpInst>(VMap[ExitCondition]);
+ Instruction *B_IndVarIncrement = cast<Instruction>(VMap[IVIncrement]);
+ ICmpInst *B_SplitCondition = cast<ICmpInst>(VMap[SplitCondition]);
moveExitCondition(A_SplitCondBlock, A_ActiveBranch, A_ExitBlock, ExitCondition,
cast<ICmpInst>(SplitCondition), IndVar, IVIncrement,
@@ -1159,7 +1159,7 @@ bool LoopIndexSplit::splitLoop() {
B_SplitCondition, B_IndVar, B_IndVarIncrement,
BLoop, EVOpNum);
- NumIndexSplit++;
+ ++NumIndexSplit;
return true;
}
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index 5004483..16c4a15 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -147,7 +147,7 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {
continue; // PHI nodes don't count.
if (isa<DbgInfoIntrinsic>(OI))
continue; // Debug intrinsics don't count as size.
- Size++;
+ ++Size;
}
if (Size > MAX_HEADER_SIZE)
@@ -263,7 +263,7 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {
preserveCanonicalLoopForm(LPM);
- NumRotated++;
+ ++NumRotated;
return true;
}
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 86ea3eb..a250a88 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -392,12 +392,13 @@ static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) {
return isa<SCEVAddExpr>(SE.getSignExtendExpr(A, WideTy));
}
-/// isMulSExtable - Return true if the given add can be sign-extended
+/// isMulSExtable - Return true if the given mul can be sign-extended
/// without changing its value.
-static bool isMulSExtable(const SCEVMulExpr *A, ScalarEvolution &SE) {
+static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE) {
const Type *WideTy =
- IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(A->getType()) + 1);
- return isa<SCEVMulExpr>(SE.getSignExtendExpr(A, WideTy));
+ IntegerType::get(SE.getContext(),
+ SE.getTypeSizeInBits(M->getType()) * M->getNumOperands());
+ return isa<SCEVMulExpr>(SE.getSignExtendExpr(M, WideTy));
}
/// getExactSDiv - Return an expression for LHS /s RHS, if it can be determined
@@ -413,20 +414,28 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
if (LHS == RHS)
return SE.getConstant(LHS->getType(), 1);
- // Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do some
- // folding.
- if (RHS->isAllOnesValue())
- return SE.getMulExpr(LHS, RHS);
+ // Handle a few RHS special cases.
+ const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS);
+ if (RC) {
+ const APInt &RA = RC->getValue()->getValue();
+ // Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do
+ // some folding.
+ if (RA.isAllOnesValue())
+ return SE.getMulExpr(LHS, RC);
+ // Handle x /s 1 as x.
+ if (RA == 1)
+ return LHS;
+ }
// Check for a division of a constant by a constant.
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(LHS)) {
- const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS);
if (!RC)
return 0;
- if (C->getValue()->getValue().srem(RC->getValue()->getValue()) != 0)
+ const APInt &LA = C->getValue()->getValue();
+ const APInt &RA = RC->getValue()->getValue();
+ if (LA.srem(RA) != 0)
return 0;
- return SE.getConstant(C->getValue()->getValue()
- .sdiv(RC->getValue()->getValue()));
+ return SE.getConstant(LA.sdiv(RA));
}
// Distribute the sdiv over addrec operands, if the addrec doesn't overflow.
@@ -440,6 +449,7 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
if (!Step) return 0;
return SE.getAddRecExpr(Start, Step, AR->getLoop());
}
+ return 0;
}
// Distribute the sdiv over add operands, if the add doesn't overflow.
@@ -455,10 +465,11 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
}
return SE.getAddExpr(Ops);
}
+ return 0;
}
// Check for a multiply operand that we can pull RHS out of.
- if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS))
+ if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS)) {
if (IgnoreSignificantBits || isMulSExtable(Mul, SE)) {
SmallVector<const SCEV *, 4> Ops;
bool Found = false;
@@ -475,6 +486,8 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
}
return Found ? SE.getMulExpr(Ops) : 0;
}
+ return 0;
+ }
// Otherwise we don't know.
return 0;
@@ -546,7 +559,7 @@ static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
case Intrinsic::x86_sse2_storeu_pd:
case Intrinsic::x86_sse2_storeu_dq:
case Intrinsic::x86_sse2_storel_dq:
- if (II->getOperand(1) == OperandVal)
+ if (II->getArgOperand(0) == OperandVal)
isAddress = true;
break;
}
@@ -568,7 +581,7 @@ static const Type *getAccessType(const Instruction *Inst) {
case Intrinsic::x86_sse2_storeu_pd:
case Intrinsic::x86_sse2_storeu_dq:
case Intrinsic::x86_sse2_storel_dq:
- AccessTy = II->getOperand(1)->getType();
+ AccessTy = II->getArgOperand(0)->getType();
break;
}
}
@@ -976,6 +989,8 @@ public:
void dump() const;
};
+}
+
/// HasFormula - Test whether this use as a formula which has the same
/// registers as the given formula.
bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
@@ -1203,6 +1218,32 @@ static bool isAlwaysFoldable(const SCEV *S,
return isLegalUse(AM, MinOffset, MaxOffset, Kind, AccessTy, TLI);
}
+namespace {
+
+/// UseMapDenseMapInfo - A DenseMapInfo implementation for holding
+/// DenseMaps and DenseSets of pairs of const SCEV* and LSRUse::Kind.
+struct UseMapDenseMapInfo {
+ static std::pair<const SCEV *, LSRUse::KindType> getEmptyKey() {
+ return std::make_pair(reinterpret_cast<const SCEV *>(-1), LSRUse::Basic);
+ }
+
+ static std::pair<const SCEV *, LSRUse::KindType> getTombstoneKey() {
+ return std::make_pair(reinterpret_cast<const SCEV *>(-2), LSRUse::Basic);
+ }
+
+ static unsigned
+ getHashValue(const std::pair<const SCEV *, LSRUse::KindType> &V) {
+ unsigned Result = DenseMapInfo<const SCEV *>::getHashValue(V.first);
+ Result ^= DenseMapInfo<unsigned>::getHashValue(unsigned(V.second));
+ return Result;
+ }
+
+ static bool isEqual(const std::pair<const SCEV *, LSRUse::KindType> &LHS,
+ const std::pair<const SCEV *, LSRUse::KindType> &RHS) {
+ return LHS == RHS;
+ }
+};
+
/// FormulaSorter - This class implements an ordering for formulae which sorts
/// the by their standalone cost.
class FormulaSorter {
@@ -1275,7 +1316,9 @@ class LSRInstance {
}
// Support for sharing of LSRUses between LSRFixups.
- typedef DenseMap<const SCEV *, size_t> UseMapTy;
+ typedef DenseMap<std::pair<const SCEV *, LSRUse::KindType>,
+ size_t,
+ UseMapDenseMapInfo> UseMapTy;
UseMapTy UseMap;
bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
@@ -1613,8 +1656,11 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
NewRHS = Sel->getOperand(1);
else if (SE.getSCEV(Sel->getOperand(2)) == MaxRHS)
NewRHS = Sel->getOperand(2);
+ else if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(MaxRHS))
+ NewRHS = SU->getValue();
else
- llvm_unreachable("Max doesn't match expected pattern!");
+ // Max doesn't match expected pattern.
+ return Cond;
// Determine the new comparison opcode. It may be signed or unsigned,
// and the original comparison may be either equality or inequality.
@@ -1805,6 +1851,8 @@ LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
NewMaxOffset = NewOffset;
}
// Check for a mismatched access type, and fall back conservatively as needed.
+ // TODO: Be less conservative when the type is similar and can use the same
+ // addressing modes.
if (Kind == LSRUse::Address && AccessTy != LU.AccessTy)
NewAccessTy = Type::getVoidTy(AccessTy->getContext());
@@ -1833,7 +1881,7 @@ LSRInstance::getUse(const SCEV *&Expr,
}
std::pair<UseMapTy::iterator, bool> P =
- UseMap.insert(std::make_pair(Expr, 0));
+ UseMap.insert(std::make_pair(std::make_pair(Expr, Kind), 0));
if (!P.second) {
// A use already existed with this base.
size_t LUIdx = P.first->second;
@@ -1919,7 +1967,7 @@ void LSRInstance::CollectInterestingTypesAndFactors() {
Strides.insert(AR->getStepRecurrence(SE));
Worklist.push_back(AR->getStart());
} else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
- Worklist.insert(Worklist.end(), Add->op_begin(), Add->op_end());
+ Worklist.append(Add->op_begin(), Add->op_end());
}
} while (!Worklist.empty());
}
@@ -2086,7 +2134,7 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
const SCEV *S = Worklist.pop_back_val();
if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S))
- Worklist.insert(Worklist.end(), N->op_begin(), N->op_end());
+ Worklist.append(N->op_begin(), N->op_end());
else if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
Worklist.push_back(C->getOperand());
else if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
@@ -2095,8 +2143,12 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
} else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
if (!Inserted.insert(U)) continue;
const Value *V = U->getValue();
- if (const Instruction *Inst = dyn_cast<Instruction>(V))
+ if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
+ // Look for instructions defined outside the loop.
if (L->contains(Inst)) continue;
+ } else if (isa<UndefValue>(V))
+ // Undef doesn't have a live range, so it doesn't matter.
+ continue;
for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end();
UI != UE; ++UI) {
const Instruction *UserInst = dyn_cast<Instruction>(*UI);
@@ -2155,20 +2207,23 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
/// separate registers. If C is non-null, multiply each subexpression by C.
static void CollectSubexprs(const SCEV *S, const SCEVConstant *C,
SmallVectorImpl<const SCEV *> &Ops,
+ SmallVectorImpl<const SCEV *> &UninterestingOps,
+ const Loop *L,
ScalarEvolution &SE) {
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
// Break out add operands.
for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
I != E; ++I)
- CollectSubexprs(*I, C, Ops, SE);
+ CollectSubexprs(*I, C, Ops, UninterestingOps, L, SE);
return;
} else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
// Split a non-zero base out of an addrec.
if (!AR->getStart()->isZero()) {
CollectSubexprs(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
AR->getStepRecurrence(SE),
- AR->getLoop()), C, Ops, SE);
- CollectSubexprs(AR->getStart(), C, Ops, SE);
+ AR->getLoop()),
+ C, Ops, UninterestingOps, L, SE);
+ CollectSubexprs(AR->getStart(), C, Ops, UninterestingOps, L, SE);
return;
}
} else if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
@@ -2178,13 +2233,17 @@ static void CollectSubexprs(const SCEV *S, const SCEVConstant *C,
dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
CollectSubexprs(Mul->getOperand(1),
C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0,
- Ops, SE);
+ Ops, UninterestingOps, L, SE);
return;
}
}
- // Otherwise use the value itself.
- Ops.push_back(C ? SE.getMulExpr(C, S) : S);
+ // Otherwise use the value itself. Loop-variant "unknown" values are
+ // uninteresting; we won't be able to do anything meaningful with them.
+ if (!C && isa<SCEVUnknown>(S) && !S->isLoopInvariant(L))
+ UninterestingOps.push_back(S);
+ else
+ Ops.push_back(C ? SE.getMulExpr(C, S) : S);
}
/// GenerateReassociations - Split out subexpressions from adds and the bases of
@@ -2198,8 +2257,15 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
const SCEV *BaseReg = Base.BaseRegs[i];
- SmallVector<const SCEV *, 8> AddOps;
- CollectSubexprs(BaseReg, 0, AddOps, SE);
+ SmallVector<const SCEV *, 8> AddOps, UninterestingAddOps;
+ CollectSubexprs(BaseReg, 0, AddOps, UninterestingAddOps, L, SE);
+
+ // Add any uninteresting values as one register, as we won't be able to
+ // form any interesting reassociation opportunities with them. They'll
+ // just have to be added inside the loop no matter what we do.
+ if (!UninterestingAddOps.empty())
+ AddOps.push_back(SE.getAddExpr(UninterestingAddOps));
+
if (AddOps.size() == 1) continue;
for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(),
@@ -2212,11 +2278,10 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
continue;
// Collect all operands except *J.
- SmallVector<const SCEV *, 8> InnerAddOps;
- for (SmallVectorImpl<const SCEV *>::const_iterator K = AddOps.begin(),
- KE = AddOps.end(); K != KE; ++K)
- if (K != J)
- InnerAddOps.push_back(*K);
+ SmallVector<const SCEV *, 8> InnerAddOps
+ ( ((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J);
+ InnerAddOps.append
+ (next(J), ((const SmallVector<const SCEV *, 8> &)AddOps).end());
// Don't leave just a constant behind in a register if the constant could
// be folded into an immediate field.
@@ -2350,13 +2415,12 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
for (SmallSetVector<int64_t, 8>::const_iterator
I = Factors.begin(), E = Factors.end(); I != E; ++I) {
int64_t Factor = *I;
- Formula F = Base;
// Check that the multiplication doesn't overflow.
- if (F.AM.BaseOffs == INT64_MIN && Factor == -1)
+ if (Base.AM.BaseOffs == INT64_MIN && Factor == -1)
continue;
- F.AM.BaseOffs = (uint64_t)Base.AM.BaseOffs * Factor;
- if (F.AM.BaseOffs / Factor != Base.AM.BaseOffs)
+ int64_t NewBaseOffs = (uint64_t)Base.AM.BaseOffs * Factor;
+ if (NewBaseOffs / Factor != Base.AM.BaseOffs)
continue;
// Check that multiplying with the use offset doesn't overflow.
@@ -2367,6 +2431,9 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
if (Offset / Factor != LU.MinOffset)
continue;
+ Formula F = Base;
+ F.AM.BaseOffs = NewBaseOffs;
+
// Check that this scale is legal.
if (!isLegalUse(F.AM, Offset, Offset, LU.Kind, LU.AccessTy, TLI))
continue;
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index ae7bf40..0c900ff 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -445,7 +445,7 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) {
// This is a very ad-hoc heuristic.
if (Metrics.NumInsts > Threshold ||
Metrics.NumBlocks * 5 > Threshold ||
- Metrics.NeverInline) {
+ Metrics.containsIndirectBr || Metrics.isRecursive) {
DEBUG(dbgs() << "NOT unswitching loop %"
<< currentLoop->getHeader()->getName() << ", cost too high: "
<< currentLoop->getBlocks().size() << "\n");
@@ -457,21 +457,21 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) {
}
// RemapInstruction - Convert the instruction operands from referencing the
-// current values into those specified by ValueMap.
+// current values into those specified by VMap.
//
static inline void RemapInstruction(Instruction *I,
- DenseMap<const Value *, Value*> &ValueMap) {
+ ValueMap<const Value *, Value*> &VMap) {
for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) {
Value *Op = I->getOperand(op);
- DenseMap<const Value *, Value*>::iterator It = ValueMap.find(Op);
- if (It != ValueMap.end()) Op = It->second;
+ ValueMap<const Value *, Value*>::iterator It = VMap.find(Op);
+ if (It != VMap.end()) Op = It->second;
I->setOperand(op, Op);
}
}
/// CloneLoop - Recursively clone the specified loop and all of its children,
/// mapping the blocks with the specified map.
-static Loop *CloneLoop(Loop *L, Loop *PL, DenseMap<const Value*, Value*> &VM,
+static Loop *CloneLoop(Loop *L, Loop *PL, ValueMap<const Value*, Value*> &VM,
LoopInfo *LI, LPPassManager *LPM) {
Loop *New = new Loop();
LPM->insertLoop(New, PL);
@@ -615,11 +615,11 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
// the loop preheader and exit blocks), keeping track of the mapping between
// the instructions and blocks.
NewBlocks.reserve(LoopBlocks.size());
- DenseMap<const Value*, Value*> ValueMap;
+ ValueMap<const Value*, Value*> VMap;
for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) {
- BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], ValueMap, ".us", F);
+ BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], VMap, ".us", F);
NewBlocks.push_back(NewBB);
- ValueMap[LoopBlocks[i]] = NewBB; // Keep the BB mapping.
+ VMap[LoopBlocks[i]] = NewBB; // Keep the BB mapping.
LPM->cloneBasicBlockSimpleAnalysis(LoopBlocks[i], NewBB, L);
}
@@ -629,7 +629,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
NewBlocks[0], F->end());
// Now we create the new Loop object for the versioned loop.
- Loop *NewLoop = CloneLoop(L, L->getParentLoop(), ValueMap, LI, LPM);
+ Loop *NewLoop = CloneLoop(L, L->getParentLoop(), VMap, LI, LPM);
Loop *ParentLoop = L->getParentLoop();
if (ParentLoop) {
// Make sure to add the cloned preheader and exit blocks to the parent loop
@@ -638,7 +638,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
}
for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
- BasicBlock *NewExit = cast<BasicBlock>(ValueMap[ExitBlocks[i]]);
+ BasicBlock *NewExit = cast<BasicBlock>(VMap[ExitBlocks[i]]);
// The new exit block should be in the same loop as the old one.
if (Loop *ExitBBLoop = LI->getLoopFor(ExitBlocks[i]))
ExitBBLoop->addBasicBlockToLoop(NewExit, LI->getBase());
@@ -653,8 +653,8 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
for (BasicBlock::iterator I = ExitSucc->begin(); isa<PHINode>(I); ++I) {
PN = cast<PHINode>(I);
Value *V = PN->getIncomingValueForBlock(ExitBlocks[i]);
- DenseMap<const Value *, Value*>::iterator It = ValueMap.find(V);
- if (It != ValueMap.end()) V = It->second;
+ ValueMap<const Value *, Value*>::iterator It = VMap.find(V);
+ if (It != VMap.end()) V = It->second;
PN->addIncoming(V, NewExit);
}
}
@@ -663,7 +663,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i)
for (BasicBlock::iterator I = NewBlocks[i]->begin(),
E = NewBlocks[i]->end(); I != E; ++I)
- RemapInstruction(I, ValueMap);
+ RemapInstruction(I, VMap);
// Rewrite the original preheader to select between versions of the loop.
BranchInst *OldBR = cast<BranchInst>(loopPreheader->getTerminator());
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 3611b8e..0e566c5 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -632,7 +632,7 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
// Remove the memcpy
MD.removeInstruction(cpy);
cpy->eraseFromParent();
- NumMemCpyInstr++;
+ ++NumMemCpyInstr;
return true;
}
@@ -710,7 +710,7 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
if (MD.getDependency(C) == dep) {
MD.removeInstruction(M);
M->eraseFromParent();
- NumMemCpyInstr++;
+ ++NumMemCpyInstr;
return true;
}
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index 5aca9cdc..98452f5 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -407,13 +407,14 @@ static Value *NegateValue(Value *V, Instruction *BI) {
// Okay, we need to materialize a negated version of V with an instruction.
// Scan the use lists of V to see if we have one already.
for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
- if (!BinaryOperator::isNeg(*UI)) continue;
+ User *U = *UI;
+ if (!BinaryOperator::isNeg(U)) continue;
// We found one! Now we have to make sure that the definition dominates
// this use. We do this by moving it to the entry block (if it is a
// non-instruction value) or right after the definition. These negates will
// be zapped by reassociate later, so we don't need much finesse here.
- BinaryOperator *TheNeg = cast<BinaryOperator>(*UI);
+ BinaryOperator *TheNeg = cast<BinaryOperator>(U);
// Verify that the negate is in this function, V might be a constant expr.
if (TheNeg->getParent()->getParent() != BI->getParent()->getParent())
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index 5ca9ce3..dd445f6 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -926,7 +926,7 @@ void SROA::DoScalarReplacement(AllocaInst *AI,
DeleteDeadInstructions();
AI->eraseFromParent();
- NumReplaced++;
+ ++NumReplaced;
}
/// DeleteDeadInstructions - Erase instructions on the DeadInstrs list,
@@ -965,11 +965,11 @@ void SROA::isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
isSafeGEP(GEPI, AI, GEPOffset, Info);
if (!Info.isUnsafe)
isSafeForScalarRepl(GEPI, AI, GEPOffset, Info);
- } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(UI)) {
+ } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
if (Length)
isSafeMemAccess(AI, Offset, Length->getZExtValue(), 0,
- UI.getOperandNo() == 1, Info);
+ UI.getOperandNo() == CallInst::ArgOffset, Info);
else
MarkUnsafe(Info);
} else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
@@ -1272,6 +1272,8 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
// If there is an other pointer, we want to convert it to the same pointer
// type as AI has, so we can GEP through it safely.
if (OtherPtr) {
+ unsigned AddrSpace =
+ cast<PointerType>(OtherPtr->getType())->getAddressSpace();
// Remove bitcasts and all-zero GEPs from OtherPtr. This is an
// optimization, but it's also required to detect the corner case where
@@ -1279,20 +1281,8 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
// OtherPtr may be a bitcast or GEP that currently being rewritten. (This
// function is only called for mem intrinsics that access the whole
// aggregate, so non-zero GEPs are not an issue here.)
- while (1) {
- if (BitCastInst *BC = dyn_cast<BitCastInst>(OtherPtr)) {
- OtherPtr = BC->getOperand(0);
- continue;
- }
- if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(OtherPtr)) {
- // All zero GEPs are effectively bitcasts.
- if (GEP->hasAllZeroIndices()) {
- OtherPtr = GEP->getOperand(0);
- continue;
- }
- }
- break;
- }
+ OtherPtr = OtherPtr->stripPointerCasts();
+
// Copying the alloca to itself is a no-op: just delete it.
if (OtherPtr == AI || OtherPtr == NewElts[0]) {
// This code will run twice for a no-op memcpy -- once for each operand.
@@ -1304,15 +1294,13 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
return;
}
- if (ConstantExpr *BCE = dyn_cast<ConstantExpr>(OtherPtr))
- if (BCE->getOpcode() == Instruction::BitCast)
- OtherPtr = BCE->getOperand(0);
-
// If the pointer is not the right type, insert a bitcast to the right
// type.
- if (OtherPtr->getType() != AI->getType())
- OtherPtr = new BitCastInst(OtherPtr, AI->getType(), OtherPtr->getName(),
- MI);
+ const Type *NewTy =
+ PointerType::get(AI->getType()->getElementType(), AddrSpace);
+
+ if (OtherPtr->getType() != NewTy)
+ OtherPtr = new BitCastInst(OtherPtr, NewTy, OtherPtr->getName(), MI);
}
// Process each element of the aggregate.
@@ -1373,7 +1361,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
// If the stored element is zero (common case), just store a null
// constant.
Constant *StoreVal;
- if (ConstantInt *CI = dyn_cast<ConstantInt>(MI->getOperand(2))) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(MI->getArgOperand(1))) {
if (CI->isZero()) {
StoreVal = Constant::getNullValue(EltTy); // 0.0, null, 0, <0,0>
} else {
@@ -1436,7 +1424,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
Value *Ops[] = {
SROADest ? EltPtr : OtherElt, // Dest ptr
SROADest ? OtherElt : EltPtr, // Src ptr
- ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size
+ ConstantInt::get(MI->getArgOperand(2)->getType(), EltSize), // Size
// Align
ConstantInt::get(Type::getInt32Ty(MI->getContext()), OtherEltAlign),
MI->getVolatileCst()
@@ -1451,8 +1439,8 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
} else {
assert(isa<MemSetInst>(MI));
Value *Ops[] = {
- EltPtr, MI->getOperand(2), // Dest, Value,
- ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size
+ EltPtr, MI->getArgOperand(1), // Dest, Value,
+ ConstantInt::get(MI->getArgOperand(2)->getType(), EltSize), // Size
Zero, // Align
ConstantInt::get(Type::getInt1Ty(MI->getContext()), 0) // isVolatile
};
@@ -1655,7 +1643,12 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
SrcField = BinaryOperator::CreateShl(SrcField, ShiftVal, "", LI);
}
- ResultVal = BinaryOperator::CreateOr(SrcField, ResultVal, "", LI);
+ // Don't create an 'or x, 0' on the first iteration.
+ if (!isa<Constant>(ResultVal) ||
+ !cast<Constant>(ResultVal)->isNullValue())
+ ResultVal = BinaryOperator::CreateOr(SrcField, ResultVal, "", LI);
+ else
+ ResultVal = SrcField;
}
// Handle tail padding by truncating the result
@@ -1794,7 +1787,7 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
if (isOffset) return false;
// If the memintrinsic isn't using the alloca as the dest, reject it.
- if (UI.getOperandNo() != 1) return false;
+ if (UI.getOperandNo() != CallInst::ArgOffset) return false;
// If the source of the memcpy/move is not a constant global, reject it.
if (!PointsToConstantGlobal(MI->getSource()))
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 9744100..49d93a2 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -137,6 +137,9 @@ static bool MarkAliveBlocks(BasicBlock *BB,
// they should be changed to unreachable by passes that can't modify the
// CFG.
if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
+ // Don't touch volatile stores.
+ if (SI->isVolatile()) continue;
+
Value *Ptr = SI->getOperand(1);
if (isa<UndefValue>(Ptr) ||
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index 7414be7..b1c6191 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -66,6 +66,11 @@ public:
this->TD = TD;
if (CI->getCalledFunction())
Context = &CI->getCalledFunction()->getContext();
+
+ // We never change the calling convention.
+ if (CI->getCallingConv() != llvm::CallingConv::C)
+ return NULL;
+
return CallOptimizer(CI->getCalledFunction(), CI, B);
}
};
@@ -92,6 +97,20 @@ static bool IsOnlyUsedInZeroEqualityComparison(Value *V) {
return true;
}
+/// IsOnlyUsedInEqualityComparison - Return true if it is only used in equality
+/// comparisons with With.
+static bool IsOnlyUsedInEqualityComparison(Value *V, Value *With) {
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
+ UI != E; ++UI) {
+ if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI))
+ if (IC->isEquality() && IC->getOperand(1) == With)
+ continue;
+ // Unknown instruction.
+ return false;
+ }
+ return true;
+}
+
//===----------------------------------------------------------------------===//
// String and Memory LibCall Optimizations
//===----------------------------------------------------------------------===//
@@ -110,8 +129,8 @@ struct StrCatOpt : public LibCallOptimization {
return 0;
// Extract some information from the instruction
- Value *Dst = CI->getOperand(1);
- Value *Src = CI->getOperand(2);
+ Value *Dst = CI->getArgOperand(0);
+ Value *Src = CI->getArgOperand(1);
// See if we can get the length of the input string.
uint64_t Len = GetStringLength(Src);
@@ -162,12 +181,12 @@ struct StrNCatOpt : public StrCatOpt {
return 0;
// Extract some information from the instruction
- Value *Dst = CI->getOperand(1);
- Value *Src = CI->getOperand(2);
+ Value *Dst = CI->getArgOperand(0);
+ Value *Src = CI->getArgOperand(1);
uint64_t Len;
// We don't do anything if length is not constant
- if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getOperand(3)))
+ if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
Len = LengthArg->getZExtValue();
else
return 0;
@@ -207,11 +226,11 @@ struct StrChrOpt : public LibCallOptimization {
FT->getParamType(0) != FT->getReturnType())
return 0;
- Value *SrcStr = CI->getOperand(1);
+ Value *SrcStr = CI->getArgOperand(0);
// If the second operand is non-constant, see if we can compute the length
// of the input string and turn this into memchr.
- ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getOperand(2));
+ ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
if (CharC == 0) {
// These optimizations require TargetData.
if (!TD) return 0;
@@ -220,7 +239,7 @@ struct StrChrOpt : public LibCallOptimization {
if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32))// memchr needs i32.
return 0;
- return EmitMemChr(SrcStr, CI->getOperand(2), // include nul.
+ return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul.
ConstantInt::get(TD->getIntPtrType(*Context), Len),
B, TD);
}
@@ -260,12 +279,12 @@ struct StrCmpOpt : public LibCallOptimization {
// Verify the "strcmp" function prototype.
const FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 ||
- !FT->getReturnType()->isIntegerTy(32) ||
+ !FT->getReturnType()->isIntegerTy(32) ||
FT->getParamType(0) != FT->getParamType(1) ||
FT->getParamType(0) != Type::getInt8PtrTy(*Context))
return 0;
- Value *Str1P = CI->getOperand(1), *Str2P = CI->getOperand(2);
+ Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
if (Str1P == Str2P) // strcmp(x,x) -> 0
return ConstantInt::get(CI->getType(), 0);
@@ -308,19 +327,19 @@ struct StrNCmpOpt : public LibCallOptimization {
// Verify the "strncmp" function prototype.
const FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 3 ||
- !FT->getReturnType()->isIntegerTy(32) ||
+ !FT->getReturnType()->isIntegerTy(32) ||
FT->getParamType(0) != FT->getParamType(1) ||
FT->getParamType(0) != Type::getInt8PtrTy(*Context) ||
!FT->getParamType(2)->isIntegerTy())
return 0;
- Value *Str1P = CI->getOperand(1), *Str2P = CI->getOperand(2);
+ Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
if (Str1P == Str2P) // strncmp(x,x,n) -> 0
return ConstantInt::get(CI->getType(), 0);
// Get the length argument if it is constant.
uint64_t Length;
- if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getOperand(3)))
+ if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
Length = LengthArg->getZExtValue();
else
return 0;
@@ -328,6 +347,9 @@ struct StrNCmpOpt : public LibCallOptimization {
if (Length == 0) // strncmp(x,y,0) -> 0
return ConstantInt::get(CI->getType(), 0);
+ if (TD && Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1)
+ return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, TD);
+
std::string Str1, Str2;
bool HasStr1 = GetConstantStringInfo(Str1P, Str1);
bool HasStr2 = GetConstantStringInfo(Str2P, Str2);
@@ -365,7 +387,7 @@ struct StrCpyOpt : public LibCallOptimization {
FT->getParamType(0) != Type::getInt8PtrTy(*Context))
return 0;
- Value *Dst = CI->getOperand(1), *Src = CI->getOperand(2);
+ Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
if (Dst == Src) // strcpy(x,x) -> x
return Src;
@@ -381,7 +403,7 @@ struct StrCpyOpt : public LibCallOptimization {
if (OptChkCall)
EmitMemCpyChk(Dst, Src,
ConstantInt::get(TD->getIntPtrType(*Context), Len),
- CI->getOperand(3), B, TD);
+ CI->getArgOperand(2), B, TD);
else
EmitMemCpy(Dst, Src,
ConstantInt::get(TD->getIntPtrType(*Context), Len),
@@ -402,9 +424,9 @@ struct StrNCpyOpt : public LibCallOptimization {
!FT->getParamType(2)->isIntegerTy())
return 0;
- Value *Dst = CI->getOperand(1);
- Value *Src = CI->getOperand(2);
- Value *LenOp = CI->getOperand(3);
+ Value *Dst = CI->getArgOperand(0);
+ Value *Src = CI->getArgOperand(1);
+ Value *LenOp = CI->getArgOperand(2);
// See if we can get the length of the input string.
uint64_t SrcLen = GetStringLength(Src);
@@ -452,7 +474,7 @@ struct StrLenOpt : public LibCallOptimization {
!FT->getReturnType()->isIntegerTy())
return 0;
- Value *Src = CI->getOperand(1);
+ Value *Src = CI->getArgOperand(0);
// Constant folding: strlen("xyz") -> 3
if (uint64_t Len = GetStringLength(Src))
@@ -477,7 +499,7 @@ struct StrToOpt : public LibCallOptimization {
!FT->getParamType(1)->isPointerTy())
return 0;
- Value *EndPtr = CI->getOperand(2);
+ Value *EndPtr = CI->getArgOperand(1);
if (isa<ConstantPointerNull>(EndPtr)) {
CI->setOnlyReadsMemory();
CI->addAttribute(1, Attribute::NoCapture);
@@ -500,17 +522,34 @@ struct StrStrOpt : public LibCallOptimization {
return 0;
// fold strstr(x, x) -> x.
- if (CI->getOperand(1) == CI->getOperand(2))
- return B.CreateBitCast(CI->getOperand(1), CI->getType());
+ if (CI->getArgOperand(0) == CI->getArgOperand(1))
+ return B.CreateBitCast(CI->getArgOperand(0), CI->getType());
+
+ // fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0
+ if (TD && IsOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) {
+ Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, TD);
+ Value *StrNCmp = EmitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1),
+ StrLen, B, TD);
+ for (Value::use_iterator UI = CI->use_begin(), UE = CI->use_end();
+ UI != UE; ) {
+ ICmpInst *Old = cast<ICmpInst>(UI++);
+ Value *Cmp = B.CreateICmp(Old->getPredicate(), StrNCmp,
+ ConstantInt::getNullValue(StrNCmp->getType()),
+ "cmp");
+ Old->replaceAllUsesWith(Cmp);
+ Old->eraseFromParent();
+ }
+ return CI;
+ }
// See if either input string is a constant string.
std::string SearchStr, ToFindStr;
- bool HasStr1 = GetConstantStringInfo(CI->getOperand(1), SearchStr);
- bool HasStr2 = GetConstantStringInfo(CI->getOperand(2), ToFindStr);
+ bool HasStr1 = GetConstantStringInfo(CI->getArgOperand(0), SearchStr);
+ bool HasStr2 = GetConstantStringInfo(CI->getArgOperand(1), ToFindStr);
// fold strstr(x, "") -> x.
if (HasStr2 && ToFindStr.empty())
- return B.CreateBitCast(CI->getOperand(1), CI->getType());
+ return B.CreateBitCast(CI->getArgOperand(0), CI->getType());
// If both strings are known, constant fold it.
if (HasStr1 && HasStr2) {
@@ -520,14 +559,14 @@ struct StrStrOpt : public LibCallOptimization {
return Constant::getNullValue(CI->getType());
// strstr("abcd", "bc") -> gep((char*)"abcd", 1)
- Value *Result = CastToCStr(CI->getOperand(1), B);
+ Value *Result = CastToCStr(CI->getArgOperand(0), B);
Result = B.CreateConstInBoundsGEP1_64(Result, Offset, "strstr");
return B.CreateBitCast(Result, CI->getType());
}
// fold strstr(x, "y") -> strchr(x, 'y').
if (HasStr2 && ToFindStr.size() == 1)
- return B.CreateBitCast(EmitStrChr(CI->getOperand(1), ToFindStr[0], B, TD),
+ return B.CreateBitCast(EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TD),
CI->getType());
return 0;
}
@@ -545,13 +584,13 @@ struct MemCmpOpt : public LibCallOptimization {
!FT->getReturnType()->isIntegerTy(32))
return 0;
- Value *LHS = CI->getOperand(1), *RHS = CI->getOperand(2);
+ Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1);
if (LHS == RHS) // memcmp(s,s,x) -> 0
return Constant::getNullValue(CI->getType());
// Make sure we have a constant length.
- ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getOperand(3));
+ ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
if (!LenC) return 0;
uint64_t Len = LenC->getZExtValue();
@@ -598,9 +637,9 @@ struct MemCpyOpt : public LibCallOptimization {
return 0;
// memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1)
- EmitMemCpy(CI->getOperand(1), CI->getOperand(2),
- CI->getOperand(3), 1, false, B, TD);
- return CI->getOperand(1);
+ EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), 1, false, B, TD);
+ return CI->getArgOperand(0);
}
};
@@ -620,9 +659,9 @@ struct MemMoveOpt : public LibCallOptimization {
return 0;
// memmove(x, y, n) -> llvm.memmove(x, y, n, 1)
- EmitMemMove(CI->getOperand(1), CI->getOperand(2),
- CI->getOperand(3), 1, false, B, TD);
- return CI->getOperand(1);
+ EmitMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), 1, false, B, TD);
+ return CI->getArgOperand(0);
}
};
@@ -642,10 +681,10 @@ struct MemSetOpt : public LibCallOptimization {
return 0;
// memset(p, v, n) -> llvm.memset(p, v, n, 1)
- Value *Val = B.CreateIntCast(CI->getOperand(2), Type::getInt8Ty(*Context),
+ Value *Val = B.CreateIntCast(CI->getArgOperand(1), Type::getInt8Ty(*Context),
false);
- EmitMemSet(CI->getOperand(1), Val, CI->getOperand(3), false, B, TD);
- return CI->getOperand(1);
+ EmitMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), false, B, TD);
+ return CI->getArgOperand(0);
}
};
@@ -666,7 +705,7 @@ struct PowOpt : public LibCallOptimization {
!FT->getParamType(0)->isFloatingPointTy())
return 0;
- Value *Op1 = CI->getOperand(1), *Op2 = CI->getOperand(2);
+ Value *Op1 = CI->getArgOperand(0), *Op2 = CI->getArgOperand(1);
if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) {
if (Op1C->isExactlyValue(1.0)) // pow(1.0, x) -> 1.0
return Op1C;
@@ -720,18 +759,18 @@ struct Exp2Opt : public LibCallOptimization {
!FT->getParamType(0)->isFloatingPointTy())
return 0;
- Value *Op = CI->getOperand(1);
+ Value *Op = CI->getArgOperand(0);
// Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= 32
// Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < 32
Value *LdExpArg = 0;
if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) {
if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32)
LdExpArg = B.CreateSExt(OpC->getOperand(0),
- Type::getInt32Ty(*Context), "tmp");
+ Type::getInt32Ty(*Context), "tmp");
} else if (UIToFPInst *OpC = dyn_cast<UIToFPInst>(Op)) {
if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32)
LdExpArg = B.CreateZExt(OpC->getOperand(0),
- Type::getInt32Ty(*Context), "tmp");
+ Type::getInt32Ty(*Context), "tmp");
}
if (LdExpArg) {
@@ -772,7 +811,7 @@ struct UnaryDoubleFPOpt : public LibCallOptimization {
return 0;
// If this is something like 'floor((double)floatval)', convert to floorf.
- FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getOperand(1));
+ FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getArgOperand(0));
if (Cast == 0 || !Cast->getOperand(0)->getType()->isFloatTy())
return 0;
@@ -797,11 +836,11 @@ struct FFSOpt : public LibCallOptimization {
// Just make sure this has 2 arguments of the same FP type, which match the
// result type.
if (FT->getNumParams() != 1 ||
- !FT->getReturnType()->isIntegerTy(32) ||
+ !FT->getReturnType()->isIntegerTy(32) ||
!FT->getParamType(0)->isIntegerTy())
return 0;
- Value *Op = CI->getOperand(1);
+ Value *Op = CI->getArgOperand(0);
// Constant fold.
if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
@@ -821,7 +860,7 @@ struct FFSOpt : public LibCallOptimization {
Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType), "tmp");
return B.CreateSelect(Cond, V,
- ConstantInt::get(Type::getInt32Ty(*Context), 0));
+ ConstantInt::get(Type::getInt32Ty(*Context), 0));
}
};
@@ -837,7 +876,7 @@ struct IsDigitOpt : public LibCallOptimization {
return 0;
// isdigit(c) -> (c-'0') <u 10
- Value *Op = CI->getOperand(1);
+ Value *Op = CI->getArgOperand(0);
Op = B.CreateSub(Op, ConstantInt::get(Type::getInt32Ty(*Context), '0'),
"isdigittmp");
Op = B.CreateICmpULT(Op, ConstantInt::get(Type::getInt32Ty(*Context), 10),
@@ -858,7 +897,7 @@ struct IsAsciiOpt : public LibCallOptimization {
return 0;
// isascii(c) -> c <u 128
- Value *Op = CI->getOperand(1);
+ Value *Op = CI->getArgOperand(0);
Op = B.CreateICmpULT(Op, ConstantInt::get(Type::getInt32Ty(*Context), 128),
"isascii");
return B.CreateZExt(Op, CI->getType());
@@ -877,7 +916,7 @@ struct AbsOpt : public LibCallOptimization {
return 0;
// abs(x) -> x >s -1 ? x : -x
- Value *Op = CI->getOperand(1);
+ Value *Op = CI->getArgOperand(0);
Value *Pos = B.CreateICmpSGT(Op,
Constant::getAllOnesValue(Op->getType()),
"ispos");
@@ -899,7 +938,7 @@ struct ToAsciiOpt : public LibCallOptimization {
return 0;
// isascii(c) -> c & 0x7f
- return B.CreateAnd(CI->getOperand(1),
+ return B.CreateAnd(CI->getArgOperand(0),
ConstantInt::get(CI->getType(),0x7F));
}
};
@@ -922,7 +961,7 @@ struct PrintFOpt : public LibCallOptimization {
// Check for a fixed format string.
std::string FormatStr;
- if (!GetConstantStringInfo(CI->getOperand(1), FormatStr))
+ if (!GetConstantStringInfo(CI->getArgOperand(0), FormatStr))
return 0;
// Empty format string -> noop.
@@ -954,20 +993,20 @@ struct PrintFOpt : public LibCallOptimization {
}
// Optimize specific format strings.
- // printf("%c", chr) --> putchar(*(i8*)dst)
- if (FormatStr == "%c" && CI->getNumOperands() > 2 &&
- CI->getOperand(2)->getType()->isIntegerTy()) {
- Value *Res = EmitPutChar(CI->getOperand(2), B, TD);
+ // printf("%c", chr) --> putchar(chr)
+ if (FormatStr == "%c" && CI->getNumArgOperands() > 1 &&
+ CI->getArgOperand(1)->getType()->isIntegerTy()) {
+ Value *Res = EmitPutChar(CI->getArgOperand(1), B, TD);
if (CI->use_empty()) return CI;
return B.CreateIntCast(Res, CI->getType(), true);
}
// printf("%s\n", str) --> puts(str)
- if (FormatStr == "%s\n" && CI->getNumOperands() > 2 &&
- CI->getOperand(2)->getType()->isPointerTy() &&
+ if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 &&
+ CI->getArgOperand(1)->getType()->isPointerTy() &&
CI->use_empty()) {
- EmitPutS(CI->getOperand(2), B, TD);
+ EmitPutS(CI->getArgOperand(1), B, TD);
return CI;
}
return 0;
@@ -988,11 +1027,11 @@ struct SPrintFOpt : public LibCallOptimization {
// Check for a fixed format string.
std::string FormatStr;
- if (!GetConstantStringInfo(CI->getOperand(2), FormatStr))
+ if (!GetConstantStringInfo(CI->getArgOperand(1), FormatStr))
return 0;
// If we just have a format string (nothing else crazy) transform it.
- if (CI->getNumOperands() == 3) {
+ if (CI->getNumArgOperands() == 2) {
// Make sure there's no % in the constant array. We could try to handle
// %% -> % in the future if we cared.
for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
@@ -1003,7 +1042,7 @@ struct SPrintFOpt : public LibCallOptimization {
if (!TD) return 0;
// sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1)
- EmitMemCpy(CI->getOperand(1), CI->getOperand(2), // Copy the nul byte.
+ EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), // Copy the nul byte.
ConstantInt::get(TD->getIntPtrType(*Context),
FormatStr.size()+1), 1, false, B, TD);
return ConstantInt::get(CI->getType(), FormatStr.size());
@@ -1011,16 +1050,17 @@ struct SPrintFOpt : public LibCallOptimization {
// The remaining optimizations require the format string to be "%s" or "%c"
// and have an extra operand.
- if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->getNumOperands() <4)
+ if (FormatStr.size() != 2 || FormatStr[0] != '%' ||
+ CI->getNumArgOperands() < 3)
return 0;
// Decode the second character of the format string.
if (FormatStr[1] == 'c') {
// sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0
- if (!CI->getOperand(3)->getType()->isIntegerTy()) return 0;
- Value *V = B.CreateTrunc(CI->getOperand(3),
+ if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0;
+ Value *V = B.CreateTrunc(CI->getArgOperand(2),
Type::getInt8Ty(*Context), "char");
- Value *Ptr = CastToCStr(CI->getOperand(1), B);
+ Value *Ptr = CastToCStr(CI->getArgOperand(0), B);
B.CreateStore(V, Ptr);
Ptr = B.CreateGEP(Ptr, ConstantInt::get(Type::getInt32Ty(*Context), 1),
"nul");
@@ -1034,13 +1074,13 @@ struct SPrintFOpt : public LibCallOptimization {
if (!TD) return 0;
// sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1)
- if (!CI->getOperand(3)->getType()->isPointerTy()) return 0;
+ if (!CI->getArgOperand(2)->getType()->isPointerTy()) return 0;
- Value *Len = EmitStrLen(CI->getOperand(3), B, TD);
+ Value *Len = EmitStrLen(CI->getArgOperand(2), B, TD);
Value *IncLen = B.CreateAdd(Len,
ConstantInt::get(Len->getType(), 1),
"leninc");
- EmitMemCpy(CI->getOperand(1), CI->getOperand(3), IncLen, 1, false, B, TD);
+ EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(2), IncLen, 1, false, B, TD);
// The sprintf result is the unincremented number of bytes in the string.
return B.CreateIntCast(Len, CI->getType(), false);
@@ -1064,8 +1104,8 @@ struct FWriteOpt : public LibCallOptimization {
return 0;
// Get the element size and count.
- ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getOperand(2));
- ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getOperand(3));
+ ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+ ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
if (!SizeC || !CountC) return 0;
uint64_t Bytes = SizeC->getZExtValue()*CountC->getZExtValue();
@@ -1075,8 +1115,8 @@ struct FWriteOpt : public LibCallOptimization {
// If this is writing one byte, turn it into fputc.
if (Bytes == 1) { // fwrite(S,1,1,F) -> fputc(S[0],F)
- Value *Char = B.CreateLoad(CastToCStr(CI->getOperand(1), B), "char");
- EmitFPutC(Char, CI->getOperand(4), B, TD);
+ Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char");
+ EmitFPutC(Char, CI->getArgOperand(3), B, TD);
return ConstantInt::get(CI->getType(), 1);
}
@@ -1100,11 +1140,11 @@ struct FPutsOpt : public LibCallOptimization {
return 0;
// fputs(s,F) --> fwrite(s,1,strlen(s),F)
- uint64_t Len = GetStringLength(CI->getOperand(1));
+ uint64_t Len = GetStringLength(CI->getArgOperand(0));
if (!Len) return 0;
- EmitFWrite(CI->getOperand(1),
+ EmitFWrite(CI->getArgOperand(0),
ConstantInt::get(TD->getIntPtrType(*Context), Len-1),
- CI->getOperand(2), B, TD);
+ CI->getArgOperand(1), B, TD);
return CI; // Known to have no uses (see above).
}
};
@@ -1123,11 +1163,11 @@ struct FPrintFOpt : public LibCallOptimization {
// All the optimizations depend on the format string.
std::string FormatStr;
- if (!GetConstantStringInfo(CI->getOperand(2), FormatStr))
+ if (!GetConstantStringInfo(CI->getArgOperand(1), FormatStr))
return 0;
// fprintf(F, "foo") --> fwrite("foo", 3, 1, F)
- if (CI->getNumOperands() == 3) {
+ if (CI->getNumArgOperands() == 2) {
for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
if (FormatStr[i] == '%') // Could handle %% -> % if we cared.
return 0; // We found a format specifier.
@@ -1135,31 +1175,32 @@ struct FPrintFOpt : public LibCallOptimization {
// These optimizations require TargetData.
if (!TD) return 0;
- EmitFWrite(CI->getOperand(2),
+ EmitFWrite(CI->getArgOperand(1),
ConstantInt::get(TD->getIntPtrType(*Context),
FormatStr.size()),
- CI->getOperand(1), B, TD);
+ CI->getArgOperand(0), B, TD);
return ConstantInt::get(CI->getType(), FormatStr.size());
}
// The remaining optimizations require the format string to be "%s" or "%c"
// and have an extra operand.
- if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->getNumOperands() <4)
+ if (FormatStr.size() != 2 || FormatStr[0] != '%' ||
+ CI->getNumArgOperands() < 3)
return 0;
// Decode the second character of the format string.
if (FormatStr[1] == 'c') {
- // fprintf(F, "%c", chr) --> *(i8*)dst = chr
- if (!CI->getOperand(3)->getType()->isIntegerTy()) return 0;
- EmitFPutC(CI->getOperand(3), CI->getOperand(1), B, TD);
+ // fprintf(F, "%c", chr) --> fputc(chr, F)
+ if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0;
+ EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TD);
return ConstantInt::get(CI->getType(), 1);
}
if (FormatStr[1] == 's') {
- // fprintf(F, "%s", str) -> fputs(str, F)
- if (!CI->getOperand(3)->getType()->isPointerTy() || !CI->use_empty())
+ // fprintf(F, "%s", str) --> fputs(str, F)
+ if (!CI->getArgOperand(2)->getType()->isPointerTy() || !CI->use_empty())
return 0;
- EmitFPutS(CI->getOperand(3), CI->getOperand(1), B, TD);
+ EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD);
return CI;
}
return 0;
diff --git a/lib/Transforms/Scalar/TailDuplication.cpp b/lib/Transforms/Scalar/TailDuplication.cpp
index 2306a77..9208238 100644
--- a/lib/Transforms/Scalar/TailDuplication.cpp
+++ b/lib/Transforms/Scalar/TailDuplication.cpp
@@ -206,12 +206,13 @@ static BasicBlock *FindObviousSharedDomOf(BasicBlock *SrcBlock,
// there is only one other pred, get it, otherwise we can't handle it.
PI = pred_begin(DstBlock); PE = pred_end(DstBlock);
BasicBlock *DstOtherPred = 0;
- if (*PI == SrcBlock) {
+ BasicBlock *P = *PI;
+ if (P == SrcBlock) {
if (++PI == PE) return 0;
DstOtherPred = *PI;
if (++PI != PE) return 0;
} else {
- DstOtherPred = *PI;
+ DstOtherPred = P;
if (++PI == PE || *PI != SrcBlock || ++PI != PE) return 0;
}
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 5ad5de2..01c8e5d 100644
--- a/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -16,9 +16,9 @@
// transformation from taking place, though currently the analysis cannot
// support moving any really useful instructions (only dead ones).
// 2. This pass transforms functions that are prevented from being tail
-// recursive by an associative expression to use an accumulator variable,
-// thus compiling the typical naive factorial or 'fib' implementation into
-// efficient code.
+// recursive by an associative and commutative expression to use an
+// accumulator variable, thus compiling the typical naive factorial or
+// 'fib' implementation into efficient code.
// 3. TRE is performed if the function returns void, if the return
// returns the result returned by the call, or if the function returns a
// run-time constant on all exits from the function. It is possible, though
@@ -60,6 +60,7 @@
#include "llvm/Pass.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/Loads.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/CFG.h"
#include "llvm/ADT/Statistic.h"
@@ -252,7 +253,7 @@ static bool isDynamicConstant(Value *V, CallInst *CI, ReturnInst *RI) {
// If we are passing this argument into call as the corresponding
// argument operand, then the argument is dynamically constant.
// Otherwise, we cannot transform this function safely.
- if (CI->getOperand(ArgNo+1) == Arg)
+ if (CI->getArgOperand(ArgNo) == Arg)
return true;
}
@@ -269,16 +270,16 @@ static bool isDynamicConstant(Value *V, CallInst *CI, ReturnInst *RI) {
}
// getCommonReturnValue - Check to see if the function containing the specified
-// return instruction and tail call consistently returns the same
-// runtime-constant value at all exit points. If so, return the returned value.
+// tail call consistently returns the same runtime-constant value at all exit
+// points except for IgnoreRI. If so, return the returned value.
//
-static Value *getCommonReturnValue(ReturnInst *TheRI, CallInst *CI) {
- Function *F = TheRI->getParent()->getParent();
+static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) {
+ Function *F = CI->getParent()->getParent();
Value *ReturnedValue = 0;
for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI)
if (ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator()))
- if (RI != TheRI) {
+ if (RI != IgnoreRI) {
Value *RetOp = RI->getOperand(0);
// We can only perform this transformation if the value returned is
@@ -301,9 +302,9 @@ static Value *getCommonReturnValue(ReturnInst *TheRI, CallInst *CI) {
///
Value *TailCallElim::CanTransformAccumulatorRecursion(Instruction *I,
CallInst *CI) {
- if (!I->isAssociative()) return 0;
+ if (!I->isAssociative() || !I->isCommutative()) return 0;
assert(I->getNumOperands() == 2 &&
- "Associative operations should have 2 args!");
+ "Associative/commutative operations should have 2 args!");
// Exactly one operand should be the result of the call instruction...
if ((I->getOperand(0) == CI && I->getOperand(1) == CI) ||
@@ -368,11 +369,16 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
return false;
}
- // If we are introducing accumulator recursion to eliminate associative
- // operations after the call instruction, this variable contains the initial
- // value for the accumulator. If this value is set, we actually perform
- // accumulator recursion elimination instead of simple tail recursion
- // elimination.
+ // If we are introducing accumulator recursion to eliminate operations after
+ // the call instruction that are both associative and commutative, the initial
+ // value for the accumulator is placed in this variable. If this value is set
+ // then we actually perform accumulator recursion elimination instead of
+ // simple tail recursion elimination. If the operation is an LLVM instruction
+ // (eg: "add") then it is recorded in AccumulatorRecursionInstr. If not, then
+ // we are handling the case when the return instruction returns a constant C
+ // which is different to the constant returned by other return instructions
+ // (which is recorded in AccumulatorRecursionEliminationInitVal). This is a
+ // special case of accumulator recursion, the operation being "return C".
Value *AccumulatorRecursionEliminationInitVal = 0;
Instruction *AccumulatorRecursionInstr = 0;
@@ -383,9 +389,9 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
for (BBI = CI, ++BBI; &*BBI != Ret; ++BBI)
if (!CanMoveAboveCall(BBI, CI)) {
// If we can't move the instruction above the call, it might be because it
- // is an associative operation that could be tranformed using accumulator
- // recursion elimination. Check to see if this is the case, and if so,
- // remember the initial accumulator value for later.
+ // is an associative and commutative operation that could be tranformed
+ // using accumulator recursion elimination. Check to see if this is the
+ // case, and if so, remember the initial accumulator value for later.
if ((AccumulatorRecursionEliminationInitVal =
CanTransformAccumulatorRecursion(BBI, CI))) {
// Yes, this is accumulator recursion. Remember which instruction
@@ -403,8 +409,18 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
if (Ret->getNumOperands() == 1 && Ret->getReturnValue() != CI &&
!isa<UndefValue>(Ret->getReturnValue()) &&
AccumulatorRecursionEliminationInitVal == 0 &&
- !getCommonReturnValue(Ret, CI))
- return false;
+ !getCommonReturnValue(0, CI)) {
+ // One case remains that we are able to handle: the current return
+ // instruction returns a constant, and all other return instructions
+ // return a different constant.
+ if (!isDynamicConstant(Ret->getReturnValue(), CI, Ret))
+ return false; // Current return instruction does not return a constant.
+ // Check that all other return instructions return a common constant. If
+ // so, record it in AccumulatorRecursionEliminationInitVal.
+ AccumulatorRecursionEliminationInitVal = getCommonReturnValue(Ret, CI);
+ if (!AccumulatorRecursionEliminationInitVal)
+ return false;
+ }
// OK! We can transform this tail call. If this is the first one found,
// create the new entry block, allowing us to branch back to the old entry.
@@ -453,8 +469,8 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
// Ok, now that we know we have a pseudo-entry block WITH all of the
// required PHI nodes, add entries into the PHI node for the actual
// parameters passed into the tail-recursive call.
- for (unsigned i = 0, e = CI->getNumOperands()-1; i != e; ++i)
- ArgumentPHIs[i]->addIncoming(CI->getOperand(i+1), BB);
+ for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i)
+ ArgumentPHIs[i]->addIncoming(CI->getArgOperand(i), BB);
// If we are introducing an accumulator variable to eliminate the recursion,
// do so now. Note that we _know_ that no subsequent tail recursion
@@ -464,8 +480,9 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
if (AccumulatorRecursionEliminationInitVal) {
Instruction *AccRecInstr = AccumulatorRecursionInstr;
// Start by inserting a new PHI node for the accumulator.
- PHINode *AccPN = PHINode::Create(AccRecInstr->getType(), "accumulator.tr",
- OldEntry->begin());
+ PHINode *AccPN =
+ PHINode::Create(AccumulatorRecursionEliminationInitVal->getType(),
+ "accumulator.tr", OldEntry->begin());
// Loop over all of the predecessors of the tail recursion block. For the
// real entry into the function we seed the PHI with the initial value,
@@ -475,20 +492,27 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
// it will not show up as a predecessor.
for (pred_iterator PI = pred_begin(OldEntry), PE = pred_end(OldEntry);
PI != PE; ++PI) {
- if (*PI == &F->getEntryBlock())
- AccPN->addIncoming(AccumulatorRecursionEliminationInitVal, *PI);
+ BasicBlock *P = *PI;
+ if (P == &F->getEntryBlock())
+ AccPN->addIncoming(AccumulatorRecursionEliminationInitVal, P);
else
- AccPN->addIncoming(AccPN, *PI);
+ AccPN->addIncoming(AccPN, P);
}
- // Add an incoming argument for the current block, which is computed by our
- // associative accumulator instruction.
- AccPN->addIncoming(AccRecInstr, BB);
-
- // Next, rewrite the accumulator recursion instruction so that it does not
- // use the result of the call anymore, instead, use the PHI node we just
- // inserted.
- AccRecInstr->setOperand(AccRecInstr->getOperand(0) != CI, AccPN);
+ if (AccRecInstr) {
+ // Add an incoming argument for the current block, which is computed by
+ // our associative and commutative accumulator instruction.
+ AccPN->addIncoming(AccRecInstr, BB);
+
+ // Next, rewrite the accumulator recursion instruction so that it does not
+ // use the result of the call anymore, instead, use the PHI node we just
+ // inserted.
+ AccRecInstr->setOperand(AccRecInstr->getOperand(0) != CI, AccPN);
+ } else {
+ // Add an incoming argument for the current block, which is just the
+ // constant returned by the current return instruction.
+ AccPN->addIncoming(Ret->getReturnValue(), BB);
+ }
// Finally, rewrite any return instructions in the program to return the PHI
// node instead of the "initval" that they do currently. This loop will
OpenPOWER on IntegriCloud