diff options
Diffstat (limited to 'lib/Transforms/Scalar')
-rw-r--r-- | lib/Transforms/Scalar/ABCD.cpp | 6 | ||||
-rw-r--r-- | lib/Transforms/Scalar/ADCE.cpp | 2 | ||||
-rw-r--r-- | lib/Transforms/Scalar/CodeGenPrepare.cpp | 8 | ||||
-rw-r--r-- | lib/Transforms/Scalar/DeadStoreElimination.cpp | 59 | ||||
-rw-r--r-- | lib/Transforms/Scalar/GVN.cpp | 61 | ||||
-rw-r--r-- | lib/Transforms/Scalar/IndVarSimplify.cpp | 16 | ||||
-rw-r--r-- | lib/Transforms/Scalar/JumpThreading.cpp | 73 | ||||
-rw-r--r-- | lib/Transforms/Scalar/LoopDeletion.cpp | 6 | ||||
-rw-r--r-- | lib/Transforms/Scalar/LoopIndexSplit.cpp | 28 | ||||
-rw-r--r-- | lib/Transforms/Scalar/LoopRotation.cpp | 4 | ||||
-rw-r--r-- | lib/Transforms/Scalar/LoopStrengthReduce.cpp | 143 | ||||
-rw-r--r-- | lib/Transforms/Scalar/LoopUnswitch.cpp | 28 | ||||
-rw-r--r-- | lib/Transforms/Scalar/MemCpyOptimizer.cpp | 4 | ||||
-rw-r--r-- | lib/Transforms/Scalar/Reassociate.cpp | 5 | ||||
-rw-r--r-- | lib/Transforms/Scalar/ScalarReplAggregates.cpp | 53 | ||||
-rw-r--r-- | lib/Transforms/Scalar/SimplifyCFGPass.cpp | 3 | ||||
-rw-r--r-- | lib/Transforms/Scalar/SimplifyLibCalls.cpp | 213 | ||||
-rw-r--r-- | lib/Transforms/Scalar/TailDuplication.cpp | 5 | ||||
-rw-r--r-- | lib/Transforms/Scalar/TailRecursionElimination.cpp | 96 |
19 files changed, 495 insertions, 318 deletions
diff --git a/lib/Transforms/Scalar/ABCD.cpp b/lib/Transforms/Scalar/ABCD.cpp index 6135992..dcf14a6 100644 --- a/lib/Transforms/Scalar/ABCD.cpp +++ b/lib/Transforms/Scalar/ABCD.cpp @@ -230,7 +230,7 @@ class ABCD : public FunctionPass { DenseMapIterator<Value*, MemoizedResultChart> begin = map.begin(); DenseMapIterator<Value*, MemoizedResultChart> end = map.end(); for (; begin != end; ++begin) { - begin->second.clear(); + begin->second.clear(); } map.clear(); } @@ -396,8 +396,8 @@ class ABCD : public FunctionPass { /// this case the method returns true, otherwise false. It also obtains the /// Instruction and ConstantInt from the BinaryOperator and returns it. bool createBinaryOperatorInfo(BinaryOperator *BO, Instruction **I1, - Instruction **I2, ConstantInt **C1, - ConstantInt **C2); + Instruction **I2, ConstantInt **C1, + ConstantInt **C2); /// This method creates a constraint between a Sigma and an Instruction. /// These constraints are created as soon as we find a comparator that uses a diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp index 5a49841..2d19467 100644 --- a/lib/Transforms/Scalar/ADCE.cpp +++ b/lib/Transforms/Scalar/ADCE.cpp @@ -83,7 +83,7 @@ bool ADCE::runOnFunction(Function& F) { for (SmallVector<Instruction*, 1024>::iterator I = worklist.begin(), E = worklist.end(); I != E; ++I) { - NumRemoved++; + ++NumRemoved; (*I)->eraseFromParent(); } diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index 93e9bfb..272066c 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -548,7 +548,8 @@ protected: CI->eraseFromParent(); } bool isFoldable(unsigned SizeCIOp, unsigned, bool) const { - if (ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(SizeCIOp))) + if (ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp + - CallInst::ArgOffset))) return SizeCI->isAllOnesValue(); return false; } @@ -559,7 +560,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { // Lower all uses of llvm.objectsize.* IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI); if (II && II->getIntrinsicID() == Intrinsic::objectsize) { - bool Min = (cast<ConstantInt>(II->getOperand(2))->getZExtValue() == 1); + bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1); const Type *ReturnTy = CI->getType(); Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL); CI->replaceAllUsesWith(RetVal); @@ -759,8 +760,7 @@ bool CodeGenPrepare::OptimizeInlineAsmInst(Instruction *I, CallSite CS, } // Compute the constraint code and ConstraintType to use. - TLI->ComputeConstraintToUse(OpInfo, SDValue(), - OpInfo.ConstraintType == TargetLowering::C_Memory); + TLI->ComputeConstraintToUse(OpInfo, SDValue()); if (OpInfo.ConstraintType == TargetLowering::C_Memory && OpInfo.isIndirect) { diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index 09c01d3..e047e4f 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -56,7 +56,8 @@ namespace { } bool runOnBasicBlock(BasicBlock &BB); - bool handleFreeWithNonTrivialDependency(Instruction *F, MemDepResult Dep); + bool handleFreeWithNonTrivialDependency(const CallInst *F, + MemDepResult Dep); bool handleEndBlock(BasicBlock &BB); bool RemoveUndeadPointers(Value *Ptr, uint64_t killPointerSize, BasicBlock::iterator &BBI, @@ -73,7 +74,6 @@ namespace { AU.addRequired<AliasAnalysis>(); AU.addRequired<MemoryDependenceAnalysis>(); AU.addPreserved<DominatorTree>(); - AU.addPreserved<AliasAnalysis>(); AU.addPreserved<MemoryDependenceAnalysis>(); } @@ -123,14 +123,15 @@ static Value *getPointerOperand(Instruction *I) { if (StoreInst *SI = dyn_cast<StoreInst>(I)) return SI->getPointerOperand(); if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) - return MI->getOperand(1); - - switch (cast<IntrinsicInst>(I)->getIntrinsicID()) { + return MI->getArgOperand(0); + + IntrinsicInst *II = cast<IntrinsicInst>(I); + switch (II->getIntrinsicID()) { default: assert(false && "Unexpected intrinsic!"); case Intrinsic::init_trampoline: - return I->getOperand(1); + return II->getArgOperand(0); case Intrinsic::lifetime_end: - return I->getOperand(2); + return II->getArgOperand(1); } } @@ -147,12 +148,13 @@ static unsigned getStoreSize(Instruction *I, const TargetData *TD) { if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) { Len = MI->getLength(); } else { - switch (cast<IntrinsicInst>(I)->getIntrinsicID()) { + IntrinsicInst *II = cast<IntrinsicInst>(I); + switch (II->getIntrinsicID()) { default: assert(false && "Unexpected intrinsic!"); case Intrinsic::init_trampoline: return -1u; case Intrinsic::lifetime_end: - Len = I->getOperand(1); + Len = II->getArgOperand(0); break; } } @@ -201,8 +203,8 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { if (InstDep.isNonLocal()) continue; // Handle frees whose dependencies are non-trivial. - if (isFreeCall(Inst)) { - MadeChange |= handleFreeWithNonTrivialDependency(Inst, InstDep); + if (const CallInst *F = isFreeCall(Inst)) { + MadeChange |= handleFreeWithNonTrivialDependency(F, InstDep); continue; } @@ -218,7 +220,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { isElidable(DepStore)) { // Delete the store and now-dead instructions that feed it. DeleteDeadInstruction(DepStore); - NumFastStores++; + ++NumFastStores; MadeChange = true; // DeleteDeadInstruction can delete the current instruction in loop @@ -249,7 +251,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { BBI = BB.begin(); else if (BBI != BB.begin()) // Revisit this instruction if possible. --BBI; - NumFastStores++; + ++NumFastStores; MadeChange = true; continue; } @@ -270,7 +272,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { BBI = BB.begin(); else if (BBI != BB.begin()) // Revisit this instruction if possible. --BBI; - NumFastStores++; + ++NumFastStores; MadeChange = true; continue; } @@ -287,7 +289,8 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { /// handleFreeWithNonTrivialDependency - Handle frees of entire structures whose /// dependency is a store to a field of that structure. -bool DSE::handleFreeWithNonTrivialDependency(Instruction *F, MemDepResult Dep) { +bool DSE::handleFreeWithNonTrivialDependency(const CallInst *F, + MemDepResult Dep) { AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); Instruction *Dependency = Dep.getInst(); @@ -297,13 +300,13 @@ bool DSE::handleFreeWithNonTrivialDependency(Instruction *F, MemDepResult Dep) { Value *DepPointer = getPointerOperand(Dependency)->getUnderlyingObject(); // Check for aliasing. - if (AA.alias(F->getOperand(1), 1, DepPointer, 1) != + if (AA.alias(F->getArgOperand(0), 1, DepPointer, 1) != AliasAnalysis::MustAlias) return false; // DCE instructions only used to calculate that store DeleteDeadInstruction(Dependency); - NumFastStores++; + ++NumFastStores; return true; } @@ -349,9 +352,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) { if (deadPointers.count(pointerOperand)) { // DCE instructions only used to calculate that store. Instruction *Dead = BBI; - BBI++; + ++BBI; DeleteDeadInstruction(Dead, &deadPointers); - NumFastStores++; + ++NumFastStores; MadeChange = true; continue; } @@ -371,9 +374,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) { // However, if this load is unused and not volatile, we can go ahead and // remove it, and not have to worry about it making our pointer undead! if (L->use_empty() && !L->isVolatile()) { - BBI++; + ++BBI; DeleteDeadInstruction(L, &deadPointers); - NumFastOther++; + ++NumFastOther; MadeChange = true; continue; } @@ -391,9 +394,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) { // Dead alloca's can be DCE'd when we reach them if (A->use_empty()) { - BBI++; + ++BBI; DeleteDeadInstruction(A, &deadPointers); - NumFastOther++; + ++NumFastOther; MadeChange = true; } @@ -426,9 +429,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) { getPointerSize(*I)); if (A == AliasAnalysis::ModRef) - modRef++; + ++modRef; else - other++; + ++other; if (A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref) dead.push_back(*I); @@ -442,9 +445,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) { } else if (isInstructionTriviallyDead(BBI)) { // For any non-memory-affecting non-terminators, DCE them as we reach them Instruction *Inst = BBI; - BBI++; + ++BBI; DeleteDeadInstruction(Inst, &deadPointers); - NumFastOther++; + ++NumFastOther; MadeChange = true; continue; } @@ -497,7 +500,7 @@ bool DSE::RemoveUndeadPointers(Value *killPointer, uint64_t killPointerSize, // Remove it! ++BBI; DeleteDeadInstruction(S, &deadPointers); - NumFastStores++; + ++NumFastStores; MadeChange = true; continue; diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index ca8ab49..88b6776 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -35,6 +35,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/Loads.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/PHITransAddr.h" @@ -271,7 +272,8 @@ Expression ValueTable::create_expression(CallInst* C) { e.function = C->getCalledFunction(); e.opcode = Expression::CALL; - for (CallInst::op_iterator I = C->op_begin()+1, E = C->op_end(); + CallSite CS(C); + for (CallInst::op_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E; ++I) e.varargs.push_back(lookup_or_add(*I)); @@ -447,14 +449,14 @@ uint32_t ValueTable::lookup_or_add_call(CallInst* C) { if (local_dep.isDef()) { CallInst* local_cdep = cast<CallInst>(local_dep.getInst()); - if (local_cdep->getNumOperands() != C->getNumOperands()) { + if (local_cdep->getNumArgOperands() != C->getNumArgOperands()) { valueNumbering[C] = nextValueNumber; return nextValueNumber++; } - for (unsigned i = 1; i < C->getNumOperands(); ++i) { - uint32_t c_vn = lookup_or_add(C->getOperand(i)); - uint32_t cd_vn = lookup_or_add(local_cdep->getOperand(i)); + for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) { + uint32_t c_vn = lookup_or_add(C->getArgOperand(i)); + uint32_t cd_vn = lookup_or_add(local_cdep->getArgOperand(i)); if (c_vn != cd_vn) { valueNumbering[C] = nextValueNumber; return nextValueNumber++; @@ -504,13 +506,13 @@ uint32_t ValueTable::lookup_or_add_call(CallInst* C) { return nextValueNumber++; } - if (cdep->getNumOperands() != C->getNumOperands()) { + if (cdep->getNumArgOperands() != C->getNumArgOperands()) { valueNumbering[C] = nextValueNumber; return nextValueNumber++; } - for (unsigned i = 1; i < C->getNumOperands(); ++i) { - uint32_t c_vn = lookup_or_add(C->getOperand(i)); - uint32_t cd_vn = lookup_or_add(cdep->getOperand(i)); + for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) { + uint32_t c_vn = lookup_or_add(C->getArgOperand(i)); + uint32_t cd_vn = lookup_or_add(cdep->getArgOperand(i)); if (c_vn != cd_vn) { valueNumbering[C] = nextValueNumber; return nextValueNumber++; @@ -1500,7 +1502,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI, MD->invalidateCachedPointerInfo(V); VN.erase(LI); toErase.push_back(LI); - NumGVNLoad++; + ++NumGVNLoad; return true; } @@ -1723,7 +1725,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI, MD->invalidateCachedPointerInfo(V); VN.erase(LI); toErase.push_back(LI); - NumPRELoad++; + ++NumPRELoad; return true; } @@ -1784,7 +1786,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { MD->invalidateCachedPointerInfo(AvailVal); VN.erase(L); toErase.push_back(L); - NumGVNLoad++; + ++NumGVNLoad; return true; } @@ -1830,7 +1832,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { MD->invalidateCachedPointerInfo(StoredVal); VN.erase(L); toErase.push_back(L); - NumGVNLoad++; + ++NumGVNLoad; return true; } @@ -1860,7 +1862,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { MD->invalidateCachedPointerInfo(DepLI); VN.erase(L); toErase.push_back(L); - NumGVNLoad++; + ++NumGVNLoad; return true; } @@ -1871,7 +1873,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { L->replaceAllUsesWith(UndefValue::get(L->getType())); VN.erase(L); toErase.push_back(L); - NumGVNLoad++; + ++NumGVNLoad; return true; } @@ -1882,7 +1884,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { L->replaceAllUsesWith(UndefValue::get(L->getType())); VN.erase(L); toErase.push_back(L); - NumGVNLoad++; + ++NumGVNLoad; return true; } } @@ -2014,7 +2016,7 @@ bool GVN::runOnFunction(Function& F) { BasicBlock *BB = FI; ++FI; bool removedBlock = MergeBlockIntoPredecessor(BB, this); - if (removedBlock) NumGVNBlocks++; + if (removedBlock) ++NumGVNBlocks; Changed |= removedBlock; } @@ -2126,27 +2128,28 @@ bool GVN::performPRE(Function &F) { for (pred_iterator PI = pred_begin(CurrentBlock), PE = pred_end(CurrentBlock); PI != PE; ++PI) { + BasicBlock *P = *PI; // We're not interested in PRE where the block is its // own predecessor, or in blocks with predecessors // that are not reachable. - if (*PI == CurrentBlock) { + if (P == CurrentBlock) { NumWithout = 2; break; - } else if (!localAvail.count(*PI)) { + } else if (!localAvail.count(P)) { NumWithout = 2; break; } DenseMap<uint32_t, Value*>::iterator predV = - localAvail[*PI]->table.find(ValNo); - if (predV == localAvail[*PI]->table.end()) { - PREPred = *PI; - NumWithout++; + localAvail[P]->table.find(ValNo); + if (predV == localAvail[P]->table.end()) { + PREPred = P; + ++NumWithout; } else if (predV->second == CurInst) { NumWithout = 2; } else { - predMap[*PI] = predV->second; - NumWith++; + predMap[P] = predV->second; + ++NumWith; } } @@ -2201,7 +2204,7 @@ bool GVN::performPRE(Function &F) { PREInstr->setName(CurInst->getName() + ".pre"); predMap[PREPred] = PREInstr; VN.add(PREInstr, ValNo); - NumGVNPRE++; + ++NumGVNPRE; // Update the availability map to include the new instruction. localAvail[PREPred]->table.insert(std::make_pair(ValNo, PREInstr)); @@ -2211,8 +2214,10 @@ bool GVN::performPRE(Function &F) { CurInst->getName() + ".pre-phi", CurrentBlock->begin()); for (pred_iterator PI = pred_begin(CurrentBlock), - PE = pred_end(CurrentBlock); PI != PE; ++PI) - Phi->addIncoming(predMap[*PI], *PI); + PE = pred_end(CurrentBlock); PI != PE; ++PI) { + BasicBlock *P = *PI; + Phi->addIncoming(predMap[P], P); + } VN.add(Phi, ValNo); localAvail[CurrentBlock]->table[ValNo] = Phi; diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 36bea67..b5c9dd8 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -467,6 +467,17 @@ void IndVarSimplify::EliminateIVRemainders() { } bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { + // If LoopSimplify form is not available, stay out of trouble. Some notes: + // - LSR currently only supports LoopSimplify-form loops. Indvars' + // canonicalization can be a pessimization without LSR to "clean up" + // afterwards. + // - We depend on having a preheader; in particular, + // Loop::getCanonicalInductionVariable only supports loops with preheaders, + // and we're in trouble if we can't find the induction variable even when + // we've manually inserted one. + if (!L->isLoopSimplifyForm()) + return false; + IU = &getAnalysis<IVUsers>(); LI = &getAnalysis<LoopInfo>(); SE = &getAnalysis<ScalarEvolution>(); @@ -760,8 +771,9 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) { bool UsedInLoop = false; for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); UI != UE; ++UI) { - BasicBlock *UseBB = cast<Instruction>(UI)->getParent(); - if (PHINode *P = dyn_cast<PHINode>(UI)) { + User *U = *UI; + BasicBlock *UseBB = cast<Instruction>(U)->getParent(); + if (PHINode *P = dyn_cast<PHINode>(U)) { unsigned i = PHINode::getIncomingValueNumForOperand(UI.getOperandNo()); UseBB = P->getIncomingBlock(i); diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index df05b71..edce14c 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -18,6 +18,7 @@ #include "llvm/Pass.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LazyValueInfo.h" +#include "llvm/Analysis/Loads.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SSAUpdater.h" @@ -288,14 +289,15 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){ // Perhaps getConstantOnEdge should be smart enough to do this? for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + BasicBlock *P = *PI; // If the value is known by LazyValueInfo to be a constant in a // predecessor, use that information to try to thread this block. - Constant *PredCst = LVI->getConstantOnEdge(V, *PI, BB); + Constant *PredCst = LVI->getConstantOnEdge(V, P, BB); if (PredCst == 0 || (!isa<ConstantInt>(PredCst) && !isa<UndefValue>(PredCst))) continue; - Result.push_back(std::make_pair(dyn_cast<ConstantInt>(PredCst), *PI)); + Result.push_back(std::make_pair(dyn_cast<ConstantInt>(PredCst), P)); } return !Result.empty(); @@ -345,8 +347,19 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){ } for (unsigned i = 0, e = RHSVals.size(); i != e; ++i) if (RHSVals[i].first == InterestingVal || RHSVals[i].first == 0) { - Result.push_back(RHSVals[i]); - Result.back().first = InterestingVal; + // If we already inferred a value for this block on the LHS, don't + // re-add it. + bool HasValue = false; + for (unsigned r = 0, e = Result.size(); r != e; ++r) + if (Result[r].second == RHSVals[i].second) { + HasValue = true; + break; + } + + if (!HasValue) { + Result.push_back(RHSVals[i]); + Result.back().first = InterestingVal; + } } return !Result.empty(); } @@ -409,20 +422,21 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){ (!isa<Instruction>(Cmp->getOperand(0)) || cast<Instruction>(Cmp->getOperand(0))->getParent() != BB)) { Constant *RHSCst = cast<Constant>(Cmp->getOperand(1)); - + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + BasicBlock *P = *PI; // If the value is known by LazyValueInfo to be a constant in a // predecessor, use that information to try to thread this block. LazyValueInfo::Tristate Res = LVI->getPredicateOnEdge(Cmp->getPredicate(), Cmp->getOperand(0), - RHSCst, *PI, BB); + RHSCst, P, BB); if (Res == LazyValueInfo::Unknown) continue; Constant *ResC = ConstantInt::get(Cmp->getType(), Res); - Result.push_back(std::make_pair(cast<ConstantInt>(ResC), *PI)); + Result.push_back(std::make_pair(cast<ConstantInt>(ResC), P)); } - + return !Result.empty(); } } @@ -538,18 +552,22 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { (CondInst == 0 || CondInst->getParent() != BB)) { // Non-local definition. pred_iterator PI = pred_begin(BB), E = pred_end(BB); if (isa<BranchInst>(BB->getTerminator())) { - for (; PI != E; ++PI) - if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator())) + for (; PI != E; ++PI) { + BasicBlock *P = *PI; + if (BranchInst *PBI = dyn_cast<BranchInst>(P->getTerminator())) if (PBI->isConditional() && PBI->getCondition() == Condition && - ProcessBranchOnDuplicateCond(*PI, BB)) + ProcessBranchOnDuplicateCond(P, BB)) return true; + } } else { assert(isa<SwitchInst>(BB->getTerminator()) && "Unknown jump terminator"); - for (; PI != E; ++PI) - if (SwitchInst *PSI = dyn_cast<SwitchInst>((*PI)->getTerminator())) + for (; PI != E; ++PI) { + BasicBlock *P = *PI; + if (SwitchInst *PSI = dyn_cast<SwitchInst>(P->getTerminator())) if (PSI->getCondition() == Condition && - ProcessSwitchOnDuplicateCond(*PI, BB)) + ProcessSwitchOnDuplicateCond(P, BB)) return true; + } } } @@ -569,19 +587,21 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { // If we have a comparison, loop over the predecessors to see if there is // a condition with a lexically identical value. pred_iterator PI = pred_begin(BB), E = pred_end(BB); - for (; PI != E; ++PI) - if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator())) - if (PBI->isConditional() && *PI != BB) { + for (; PI != E; ++PI) { + BasicBlock *P = *PI; + if (BranchInst *PBI = dyn_cast<BranchInst>(P->getTerminator())) + if (PBI->isConditional() && P != BB) { if (CmpInst *CI = dyn_cast<CmpInst>(PBI->getCondition())) { if (CI->getOperand(0) == CondCmp->getOperand(0) && CI->getOperand(1) == CondCmp->getOperand(1) && CI->getPredicate() == CondCmp->getPredicate()) { // TODO: Could handle things like (x != 4) --> (x == 17) - if (ProcessBranchOnDuplicateCond(*PI, BB)) + if (ProcessBranchOnDuplicateCond(P, BB)) return true; } } } + } } } @@ -869,9 +889,15 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { // Add all the unavailable predecessors to the PredsToSplit list. for (pred_iterator PI = pred_begin(LoadBB), PE = pred_end(LoadBB); - PI != PE; ++PI) - if (!AvailablePredSet.count(*PI)) - PredsToSplit.push_back(*PI); + PI != PE; ++PI) { + BasicBlock *P = *PI; + // If the predecessor is an indirect goto, we can't split the edge. + if (isa<IndirectBrInst>(P->getTerminator())) + return false; + + if (!AvailablePredSet.count(P)) + PredsToSplit.push_back(P); + } // Split them out to their own block. UnavailablePred = @@ -903,11 +929,12 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { // have multiple entries here. for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB); PI != E; ++PI) { + BasicBlock *P = *PI; AvailablePredsTy::iterator I = std::lower_bound(AvailablePreds.begin(), AvailablePreds.end(), - std::make_pair(*PI, (Value*)0)); + std::make_pair(P, (Value*)0)); - assert(I != AvailablePreds.end() && I->first == *PI && + assert(I != AvailablePreds.end() && I->first == P && "Didn't find entry for predecessor!"); PN->addIncoming(I->second, I->first); diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp index 48817ab..e4894e9 100644 --- a/lib/Transforms/Scalar/LoopDeletion.cpp +++ b/lib/Transforms/Scalar/LoopDeletion.cpp @@ -83,7 +83,7 @@ bool LoopDeletion::IsLoopDead(Loop* L, if (!L->makeLoopInvariant(I, Changed, Preheader->getTerminator())) return false; - BI++; + ++BI; } // Make sure that no instructions in the block have potential side-effects. @@ -176,7 +176,7 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { BasicBlock::iterator BI = exitBlock->begin(); while (PHINode* P = dyn_cast<PHINode>(BI)) { P->replaceUsesOfWith(exitingBlock, preheader); - BI++; + ++BI; } // Update the dominator tree and remove the instructions and blocks that will @@ -226,7 +226,7 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { LPM.deleteLoopFromQueue(L); Changed = true; - NumDeleted++; + ++NumDeleted; return Changed; } diff --git a/lib/Transforms/Scalar/LoopIndexSplit.cpp b/lib/Transforms/Scalar/LoopIndexSplit.cpp index 101ff5b..31058e5 100644 --- a/lib/Transforms/Scalar/LoopIndexSplit.cpp +++ b/lib/Transforms/Scalar/LoopIndexSplit.cpp @@ -649,7 +649,7 @@ bool LoopIndexSplit::updateLoopIterationSpace() { } } } - NumRestrictBounds++; + ++NumRestrictBounds; return true; } @@ -958,11 +958,11 @@ bool LoopIndexSplit::splitLoop() { continue; for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); - BI != BE; ++BI) { + BI != BE; ++BI) { Instruction *Inst = BI; if (!Inst->isSafeToSpeculativelyExecute() && !isa<PHINode>(Inst) - && !isa<BranchInst>(Inst) && !isa<DbgInfoIntrinsic>(Inst)) + && !isa<BranchInst>(Inst) && !isa<DbgInfoIntrinsic>(Inst)) return false; } } @@ -1016,13 +1016,13 @@ bool LoopIndexSplit::splitLoop() { BSV = getMax(BSV, IVStartValue, Sign, PHTerm); // [*] Clone Loop - DenseMap<const Value *, Value *> ValueMap; - Loop *BLoop = CloneLoop(L, LPM, LI, ValueMap, this); + ValueMap<const Value *, Value *> VMap; + Loop *BLoop = CloneLoop(L, LPM, LI, VMap, this); Loop *ALoop = L; // [*] ALoop's exiting edge enters BLoop's header. // ALoop's original exit block becomes BLoop's exit block. - PHINode *B_IndVar = cast<PHINode>(ValueMap[IndVar]); + PHINode *B_IndVar = cast<PHINode>(VMap[IndVar]); BasicBlock *A_ExitingBlock = ExitCondition->getParent(); BranchInst *A_ExitInsn = dyn_cast<BranchInst>(A_ExitingBlock->getTerminator()); @@ -1047,7 +1047,7 @@ bool LoopIndexSplit::splitLoop() { for (BasicBlock::iterator BI = ALoop->getHeader()->begin(), BE = ALoop->getHeader()->end(); BI != BE; ++BI) { if (PHINode *PN = dyn_cast<PHINode>(BI)) { - PHINode *PNClone = cast<PHINode>(ValueMap[PN]); + PHINode *PNClone = cast<PHINode>(VMap[PN]); InverseMap[PNClone] = PN; } else break; @@ -1085,11 +1085,11 @@ bool LoopIndexSplit::splitLoop() { // block. Remove incoming PHINode values from ALoop's exiting block. // Add new incoming values from BLoop's incoming exiting value. // Update BLoop exit block's dominator info.. - BasicBlock *B_ExitingBlock = cast<BasicBlock>(ValueMap[A_ExitingBlock]); + BasicBlock *B_ExitingBlock = cast<BasicBlock>(VMap[A_ExitingBlock]); for (BasicBlock::iterator BI = B_ExitBlock->begin(), BE = B_ExitBlock->end(); BI != BE; ++BI) { if (PHINode *PN = dyn_cast<PHINode>(BI)) { - PN->addIncoming(ValueMap[PN->getIncomingValueForBlock(A_ExitingBlock)], + PN->addIncoming(VMap[PN->getIncomingValueForBlock(A_ExitingBlock)], B_ExitingBlock); PN->removeIncomingValue(A_ExitingBlock); } else @@ -1131,7 +1131,7 @@ bool LoopIndexSplit::splitLoop() { removeBlocks(A_InactiveBranch, L, A_ActiveBranch); //[*] Eliminate split condition's inactive branch in from BLoop. - BasicBlock *B_SplitCondBlock = cast<BasicBlock>(ValueMap[A_SplitCondBlock]); + BasicBlock *B_SplitCondBlock = cast<BasicBlock>(VMap[A_SplitCondBlock]); BranchInst *B_BR = cast<BranchInst>(B_SplitCondBlock->getTerminator()); BasicBlock *B_InactiveBranch = NULL; BasicBlock *B_ActiveBranch = NULL; @@ -1146,9 +1146,9 @@ bool LoopIndexSplit::splitLoop() { //[*] Move exit condition into split condition block to avoid // executing dead loop iteration. - ICmpInst *B_ExitCondition = cast<ICmpInst>(ValueMap[ExitCondition]); - Instruction *B_IndVarIncrement = cast<Instruction>(ValueMap[IVIncrement]); - ICmpInst *B_SplitCondition = cast<ICmpInst>(ValueMap[SplitCondition]); + ICmpInst *B_ExitCondition = cast<ICmpInst>(VMap[ExitCondition]); + Instruction *B_IndVarIncrement = cast<Instruction>(VMap[IVIncrement]); + ICmpInst *B_SplitCondition = cast<ICmpInst>(VMap[SplitCondition]); moveExitCondition(A_SplitCondBlock, A_ActiveBranch, A_ExitBlock, ExitCondition, cast<ICmpInst>(SplitCondition), IndVar, IVIncrement, @@ -1159,7 +1159,7 @@ bool LoopIndexSplit::splitLoop() { B_SplitCondition, B_IndVar, B_IndVarIncrement, BLoop, EVOpNum); - NumIndexSplit++; + ++NumIndexSplit; return true; } diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp index 5004483..16c4a15 100644 --- a/lib/Transforms/Scalar/LoopRotation.cpp +++ b/lib/Transforms/Scalar/LoopRotation.cpp @@ -147,7 +147,7 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) { continue; // PHI nodes don't count. if (isa<DbgInfoIntrinsic>(OI)) continue; // Debug intrinsics don't count as size. - Size++; + ++Size; } if (Size > MAX_HEADER_SIZE) @@ -263,7 +263,7 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) { preserveCanonicalLoopForm(LPM); - NumRotated++; + ++NumRotated; return true; } diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 86ea3eb..a250a88 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -392,12 +392,13 @@ static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) { return isa<SCEVAddExpr>(SE.getSignExtendExpr(A, WideTy)); } -/// isMulSExtable - Return true if the given add can be sign-extended +/// isMulSExtable - Return true if the given mul can be sign-extended /// without changing its value. -static bool isMulSExtable(const SCEVMulExpr *A, ScalarEvolution &SE) { +static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE) { const Type *WideTy = - IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(A->getType()) + 1); - return isa<SCEVMulExpr>(SE.getSignExtendExpr(A, WideTy)); + IntegerType::get(SE.getContext(), + SE.getTypeSizeInBits(M->getType()) * M->getNumOperands()); + return isa<SCEVMulExpr>(SE.getSignExtendExpr(M, WideTy)); } /// getExactSDiv - Return an expression for LHS /s RHS, if it can be determined @@ -413,20 +414,28 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS, if (LHS == RHS) return SE.getConstant(LHS->getType(), 1); - // Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do some - // folding. - if (RHS->isAllOnesValue()) - return SE.getMulExpr(LHS, RHS); + // Handle a few RHS special cases. + const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS); + if (RC) { + const APInt &RA = RC->getValue()->getValue(); + // Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do + // some folding. + if (RA.isAllOnesValue()) + return SE.getMulExpr(LHS, RC); + // Handle x /s 1 as x. + if (RA == 1) + return LHS; + } // Check for a division of a constant by a constant. if (const SCEVConstant *C = dyn_cast<SCEVConstant>(LHS)) { - const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS); if (!RC) return 0; - if (C->getValue()->getValue().srem(RC->getValue()->getValue()) != 0) + const APInt &LA = C->getValue()->getValue(); + const APInt &RA = RC->getValue()->getValue(); + if (LA.srem(RA) != 0) return 0; - return SE.getConstant(C->getValue()->getValue() - .sdiv(RC->getValue()->getValue())); + return SE.getConstant(LA.sdiv(RA)); } // Distribute the sdiv over addrec operands, if the addrec doesn't overflow. @@ -440,6 +449,7 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS, if (!Step) return 0; return SE.getAddRecExpr(Start, Step, AR->getLoop()); } + return 0; } // Distribute the sdiv over add operands, if the add doesn't overflow. @@ -455,10 +465,11 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS, } return SE.getAddExpr(Ops); } + return 0; } // Check for a multiply operand that we can pull RHS out of. - if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS)) + if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS)) { if (IgnoreSignificantBits || isMulSExtable(Mul, SE)) { SmallVector<const SCEV *, 4> Ops; bool Found = false; @@ -475,6 +486,8 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS, } return Found ? SE.getMulExpr(Ops) : 0; } + return 0; + } // Otherwise we don't know. return 0; @@ -546,7 +559,7 @@ static bool isAddressUse(Instruction *Inst, Value *OperandVal) { case Intrinsic::x86_sse2_storeu_pd: case Intrinsic::x86_sse2_storeu_dq: case Intrinsic::x86_sse2_storel_dq: - if (II->getOperand(1) == OperandVal) + if (II->getArgOperand(0) == OperandVal) isAddress = true; break; } @@ -568,7 +581,7 @@ static const Type *getAccessType(const Instruction *Inst) { case Intrinsic::x86_sse2_storeu_pd: case Intrinsic::x86_sse2_storeu_dq: case Intrinsic::x86_sse2_storel_dq: - AccessTy = II->getOperand(1)->getType(); + AccessTy = II->getArgOperand(0)->getType(); break; } } @@ -976,6 +989,8 @@ public: void dump() const; }; +} + /// HasFormula - Test whether this use as a formula which has the same /// registers as the given formula. bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const { @@ -1203,6 +1218,32 @@ static bool isAlwaysFoldable(const SCEV *S, return isLegalUse(AM, MinOffset, MaxOffset, Kind, AccessTy, TLI); } +namespace { + +/// UseMapDenseMapInfo - A DenseMapInfo implementation for holding +/// DenseMaps and DenseSets of pairs of const SCEV* and LSRUse::Kind. +struct UseMapDenseMapInfo { + static std::pair<const SCEV *, LSRUse::KindType> getEmptyKey() { + return std::make_pair(reinterpret_cast<const SCEV *>(-1), LSRUse::Basic); + } + + static std::pair<const SCEV *, LSRUse::KindType> getTombstoneKey() { + return std::make_pair(reinterpret_cast<const SCEV *>(-2), LSRUse::Basic); + } + + static unsigned + getHashValue(const std::pair<const SCEV *, LSRUse::KindType> &V) { + unsigned Result = DenseMapInfo<const SCEV *>::getHashValue(V.first); + Result ^= DenseMapInfo<unsigned>::getHashValue(unsigned(V.second)); + return Result; + } + + static bool isEqual(const std::pair<const SCEV *, LSRUse::KindType> &LHS, + const std::pair<const SCEV *, LSRUse::KindType> &RHS) { + return LHS == RHS; + } +}; + /// FormulaSorter - This class implements an ordering for formulae which sorts /// the by their standalone cost. class FormulaSorter { @@ -1275,7 +1316,9 @@ class LSRInstance { } // Support for sharing of LSRUses between LSRFixups. - typedef DenseMap<const SCEV *, size_t> UseMapTy; + typedef DenseMap<std::pair<const SCEV *, LSRUse::KindType>, + size_t, + UseMapDenseMapInfo> UseMapTy; UseMapTy UseMap; bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg, @@ -1613,8 +1656,11 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) { NewRHS = Sel->getOperand(1); else if (SE.getSCEV(Sel->getOperand(2)) == MaxRHS) NewRHS = Sel->getOperand(2); + else if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(MaxRHS)) + NewRHS = SU->getValue(); else - llvm_unreachable("Max doesn't match expected pattern!"); + // Max doesn't match expected pattern. + return Cond; // Determine the new comparison opcode. It may be signed or unsigned, // and the original comparison may be either equality or inequality. @@ -1805,6 +1851,8 @@ LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg, NewMaxOffset = NewOffset; } // Check for a mismatched access type, and fall back conservatively as needed. + // TODO: Be less conservative when the type is similar and can use the same + // addressing modes. if (Kind == LSRUse::Address && AccessTy != LU.AccessTy) NewAccessTy = Type::getVoidTy(AccessTy->getContext()); @@ -1833,7 +1881,7 @@ LSRInstance::getUse(const SCEV *&Expr, } std::pair<UseMapTy::iterator, bool> P = - UseMap.insert(std::make_pair(Expr, 0)); + UseMap.insert(std::make_pair(std::make_pair(Expr, Kind), 0)); if (!P.second) { // A use already existed with this base. size_t LUIdx = P.first->second; @@ -1919,7 +1967,7 @@ void LSRInstance::CollectInterestingTypesAndFactors() { Strides.insert(AR->getStepRecurrence(SE)); Worklist.push_back(AR->getStart()); } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { - Worklist.insert(Worklist.end(), Add->op_begin(), Add->op_end()); + Worklist.append(Add->op_begin(), Add->op_end()); } } while (!Worklist.empty()); } @@ -2086,7 +2134,7 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() { const SCEV *S = Worklist.pop_back_val(); if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S)) - Worklist.insert(Worklist.end(), N->op_begin(), N->op_end()); + Worklist.append(N->op_begin(), N->op_end()); else if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S)) Worklist.push_back(C->getOperand()); else if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) { @@ -2095,8 +2143,12 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() { } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { if (!Inserted.insert(U)) continue; const Value *V = U->getValue(); - if (const Instruction *Inst = dyn_cast<Instruction>(V)) + if (const Instruction *Inst = dyn_cast<Instruction>(V)) { + // Look for instructions defined outside the loop. if (L->contains(Inst)) continue; + } else if (isa<UndefValue>(V)) + // Undef doesn't have a live range, so it doesn't matter. + continue; for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end(); UI != UE; ++UI) { const Instruction *UserInst = dyn_cast<Instruction>(*UI); @@ -2155,20 +2207,23 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() { /// separate registers. If C is non-null, multiply each subexpression by C. static void CollectSubexprs(const SCEV *S, const SCEVConstant *C, SmallVectorImpl<const SCEV *> &Ops, + SmallVectorImpl<const SCEV *> &UninterestingOps, + const Loop *L, ScalarEvolution &SE) { if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { // Break out add operands. for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); I != E; ++I) - CollectSubexprs(*I, C, Ops, SE); + CollectSubexprs(*I, C, Ops, UninterestingOps, L, SE); return; } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { // Split a non-zero base out of an addrec. if (!AR->getStart()->isZero()) { CollectSubexprs(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0), AR->getStepRecurrence(SE), - AR->getLoop()), C, Ops, SE); - CollectSubexprs(AR->getStart(), C, Ops, SE); + AR->getLoop()), + C, Ops, UninterestingOps, L, SE); + CollectSubexprs(AR->getStart(), C, Ops, UninterestingOps, L, SE); return; } } else if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) { @@ -2178,13 +2233,17 @@ static void CollectSubexprs(const SCEV *S, const SCEVConstant *C, dyn_cast<SCEVConstant>(Mul->getOperand(0))) { CollectSubexprs(Mul->getOperand(1), C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0, - Ops, SE); + Ops, UninterestingOps, L, SE); return; } } - // Otherwise use the value itself. - Ops.push_back(C ? SE.getMulExpr(C, S) : S); + // Otherwise use the value itself. Loop-variant "unknown" values are + // uninteresting; we won't be able to do anything meaningful with them. + if (!C && isa<SCEVUnknown>(S) && !S->isLoopInvariant(L)) + UninterestingOps.push_back(S); + else + Ops.push_back(C ? SE.getMulExpr(C, S) : S); } /// GenerateReassociations - Split out subexpressions from adds and the bases of @@ -2198,8 +2257,15 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) { const SCEV *BaseReg = Base.BaseRegs[i]; - SmallVector<const SCEV *, 8> AddOps; - CollectSubexprs(BaseReg, 0, AddOps, SE); + SmallVector<const SCEV *, 8> AddOps, UninterestingAddOps; + CollectSubexprs(BaseReg, 0, AddOps, UninterestingAddOps, L, SE); + + // Add any uninteresting values as one register, as we won't be able to + // form any interesting reassociation opportunities with them. They'll + // just have to be added inside the loop no matter what we do. + if (!UninterestingAddOps.empty()) + AddOps.push_back(SE.getAddExpr(UninterestingAddOps)); + if (AddOps.size() == 1) continue; for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(), @@ -2212,11 +2278,10 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, continue; // Collect all operands except *J. - SmallVector<const SCEV *, 8> InnerAddOps; - for (SmallVectorImpl<const SCEV *>::const_iterator K = AddOps.begin(), - KE = AddOps.end(); K != KE; ++K) - if (K != J) - InnerAddOps.push_back(*K); + SmallVector<const SCEV *, 8> InnerAddOps + ( ((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J); + InnerAddOps.append + (next(J), ((const SmallVector<const SCEV *, 8> &)AddOps).end()); // Don't leave just a constant behind in a register if the constant could // be folded into an immediate field. @@ -2350,13 +2415,12 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, for (SmallSetVector<int64_t, 8>::const_iterator I = Factors.begin(), E = Factors.end(); I != E; ++I) { int64_t Factor = *I; - Formula F = Base; // Check that the multiplication doesn't overflow. - if (F.AM.BaseOffs == INT64_MIN && Factor == -1) + if (Base.AM.BaseOffs == INT64_MIN && Factor == -1) continue; - F.AM.BaseOffs = (uint64_t)Base.AM.BaseOffs * Factor; - if (F.AM.BaseOffs / Factor != Base.AM.BaseOffs) + int64_t NewBaseOffs = (uint64_t)Base.AM.BaseOffs * Factor; + if (NewBaseOffs / Factor != Base.AM.BaseOffs) continue; // Check that multiplying with the use offset doesn't overflow. @@ -2367,6 +2431,9 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, if (Offset / Factor != LU.MinOffset) continue; + Formula F = Base; + F.AM.BaseOffs = NewBaseOffs; + // Check that this scale is legal. if (!isLegalUse(F.AM, Offset, Offset, LU.Kind, LU.AccessTy, TLI)) continue; diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index ae7bf40..0c900ff 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -445,7 +445,7 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) { // This is a very ad-hoc heuristic. if (Metrics.NumInsts > Threshold || Metrics.NumBlocks * 5 > Threshold || - Metrics.NeverInline) { + Metrics.containsIndirectBr || Metrics.isRecursive) { DEBUG(dbgs() << "NOT unswitching loop %" << currentLoop->getHeader()->getName() << ", cost too high: " << currentLoop->getBlocks().size() << "\n"); @@ -457,21 +457,21 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) { } // RemapInstruction - Convert the instruction operands from referencing the -// current values into those specified by ValueMap. +// current values into those specified by VMap. // static inline void RemapInstruction(Instruction *I, - DenseMap<const Value *, Value*> &ValueMap) { + ValueMap<const Value *, Value*> &VMap) { for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) { Value *Op = I->getOperand(op); - DenseMap<const Value *, Value*>::iterator It = ValueMap.find(Op); - if (It != ValueMap.end()) Op = It->second; + ValueMap<const Value *, Value*>::iterator It = VMap.find(Op); + if (It != VMap.end()) Op = It->second; I->setOperand(op, Op); } } /// CloneLoop - Recursively clone the specified loop and all of its children, /// mapping the blocks with the specified map. -static Loop *CloneLoop(Loop *L, Loop *PL, DenseMap<const Value*, Value*> &VM, +static Loop *CloneLoop(Loop *L, Loop *PL, ValueMap<const Value*, Value*> &VM, LoopInfo *LI, LPPassManager *LPM) { Loop *New = new Loop(); LPM->insertLoop(New, PL); @@ -615,11 +615,11 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, // the loop preheader and exit blocks), keeping track of the mapping between // the instructions and blocks. NewBlocks.reserve(LoopBlocks.size()); - DenseMap<const Value*, Value*> ValueMap; + ValueMap<const Value*, Value*> VMap; for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) { - BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], ValueMap, ".us", F); + BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], VMap, ".us", F); NewBlocks.push_back(NewBB); - ValueMap[LoopBlocks[i]] = NewBB; // Keep the BB mapping. + VMap[LoopBlocks[i]] = NewBB; // Keep the BB mapping. LPM->cloneBasicBlockSimpleAnalysis(LoopBlocks[i], NewBB, L); } @@ -629,7 +629,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, NewBlocks[0], F->end()); // Now we create the new Loop object for the versioned loop. - Loop *NewLoop = CloneLoop(L, L->getParentLoop(), ValueMap, LI, LPM); + Loop *NewLoop = CloneLoop(L, L->getParentLoop(), VMap, LI, LPM); Loop *ParentLoop = L->getParentLoop(); if (ParentLoop) { // Make sure to add the cloned preheader and exit blocks to the parent loop @@ -638,7 +638,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, } for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { - BasicBlock *NewExit = cast<BasicBlock>(ValueMap[ExitBlocks[i]]); + BasicBlock *NewExit = cast<BasicBlock>(VMap[ExitBlocks[i]]); // The new exit block should be in the same loop as the old one. if (Loop *ExitBBLoop = LI->getLoopFor(ExitBlocks[i])) ExitBBLoop->addBasicBlockToLoop(NewExit, LI->getBase()); @@ -653,8 +653,8 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, for (BasicBlock::iterator I = ExitSucc->begin(); isa<PHINode>(I); ++I) { PN = cast<PHINode>(I); Value *V = PN->getIncomingValueForBlock(ExitBlocks[i]); - DenseMap<const Value *, Value*>::iterator It = ValueMap.find(V); - if (It != ValueMap.end()) V = It->second; + ValueMap<const Value *, Value*>::iterator It = VMap.find(V); + if (It != VMap.end()) V = It->second; PN->addIncoming(V, NewExit); } } @@ -663,7 +663,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) for (BasicBlock::iterator I = NewBlocks[i]->begin(), E = NewBlocks[i]->end(); I != E; ++I) - RemapInstruction(I, ValueMap); + RemapInstruction(I, VMap); // Rewrite the original preheader to select between versions of the loop. BranchInst *OldBR = cast<BranchInst>(loopPreheader->getTerminator()); diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 3611b8e..0e566c5 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -632,7 +632,7 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) { // Remove the memcpy MD.removeInstruction(cpy); cpy->eraseFromParent(); - NumMemCpyInstr++; + ++NumMemCpyInstr; return true; } @@ -710,7 +710,7 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) { if (MD.getDependency(C) == dep) { MD.removeInstruction(M); M->eraseFromParent(); - NumMemCpyInstr++; + ++NumMemCpyInstr; return true; } diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index 5aca9cdc..98452f5 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -407,13 +407,14 @@ static Value *NegateValue(Value *V, Instruction *BI) { // Okay, we need to materialize a negated version of V with an instruction. // Scan the use lists of V to see if we have one already. for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ - if (!BinaryOperator::isNeg(*UI)) continue; + User *U = *UI; + if (!BinaryOperator::isNeg(U)) continue; // We found one! Now we have to make sure that the definition dominates // this use. We do this by moving it to the entry block (if it is a // non-instruction value) or right after the definition. These negates will // be zapped by reassociate later, so we don't need much finesse here. - BinaryOperator *TheNeg = cast<BinaryOperator>(*UI); + BinaryOperator *TheNeg = cast<BinaryOperator>(U); // Verify that the negate is in this function, V might be a constant expr. if (TheNeg->getParent()->getParent() != BI->getParent()->getParent()) diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 5ca9ce3..dd445f6 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -926,7 +926,7 @@ void SROA::DoScalarReplacement(AllocaInst *AI, DeleteDeadInstructions(); AI->eraseFromParent(); - NumReplaced++; + ++NumReplaced; } /// DeleteDeadInstructions - Erase instructions on the DeadInstrs list, @@ -965,11 +965,11 @@ void SROA::isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, isSafeGEP(GEPI, AI, GEPOffset, Info); if (!Info.isUnsafe) isSafeForScalarRepl(GEPI, AI, GEPOffset, Info); - } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(UI)) { + } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) { ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength()); if (Length) isSafeMemAccess(AI, Offset, Length->getZExtValue(), 0, - UI.getOperandNo() == 1, Info); + UI.getOperandNo() == CallInst::ArgOffset, Info); else MarkUnsafe(Info); } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) { @@ -1272,6 +1272,8 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, // If there is an other pointer, we want to convert it to the same pointer // type as AI has, so we can GEP through it safely. if (OtherPtr) { + unsigned AddrSpace = + cast<PointerType>(OtherPtr->getType())->getAddressSpace(); // Remove bitcasts and all-zero GEPs from OtherPtr. This is an // optimization, but it's also required to detect the corner case where @@ -1279,20 +1281,8 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, // OtherPtr may be a bitcast or GEP that currently being rewritten. (This // function is only called for mem intrinsics that access the whole // aggregate, so non-zero GEPs are not an issue here.) - while (1) { - if (BitCastInst *BC = dyn_cast<BitCastInst>(OtherPtr)) { - OtherPtr = BC->getOperand(0); - continue; - } - if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(OtherPtr)) { - // All zero GEPs are effectively bitcasts. - if (GEP->hasAllZeroIndices()) { - OtherPtr = GEP->getOperand(0); - continue; - } - } - break; - } + OtherPtr = OtherPtr->stripPointerCasts(); + // Copying the alloca to itself is a no-op: just delete it. if (OtherPtr == AI || OtherPtr == NewElts[0]) { // This code will run twice for a no-op memcpy -- once for each operand. @@ -1304,15 +1294,13 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, return; } - if (ConstantExpr *BCE = dyn_cast<ConstantExpr>(OtherPtr)) - if (BCE->getOpcode() == Instruction::BitCast) - OtherPtr = BCE->getOperand(0); - // If the pointer is not the right type, insert a bitcast to the right // type. - if (OtherPtr->getType() != AI->getType()) - OtherPtr = new BitCastInst(OtherPtr, AI->getType(), OtherPtr->getName(), - MI); + const Type *NewTy = + PointerType::get(AI->getType()->getElementType(), AddrSpace); + + if (OtherPtr->getType() != NewTy) + OtherPtr = new BitCastInst(OtherPtr, NewTy, OtherPtr->getName(), MI); } // Process each element of the aggregate. @@ -1373,7 +1361,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, // If the stored element is zero (common case), just store a null // constant. Constant *StoreVal; - if (ConstantInt *CI = dyn_cast<ConstantInt>(MI->getOperand(2))) { + if (ConstantInt *CI = dyn_cast<ConstantInt>(MI->getArgOperand(1))) { if (CI->isZero()) { StoreVal = Constant::getNullValue(EltTy); // 0.0, null, 0, <0,0> } else { @@ -1436,7 +1424,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, Value *Ops[] = { SROADest ? EltPtr : OtherElt, // Dest ptr SROADest ? OtherElt : EltPtr, // Src ptr - ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size + ConstantInt::get(MI->getArgOperand(2)->getType(), EltSize), // Size // Align ConstantInt::get(Type::getInt32Ty(MI->getContext()), OtherEltAlign), MI->getVolatileCst() @@ -1451,8 +1439,8 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, } else { assert(isa<MemSetInst>(MI)); Value *Ops[] = { - EltPtr, MI->getOperand(2), // Dest, Value, - ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size + EltPtr, MI->getArgOperand(1), // Dest, Value, + ConstantInt::get(MI->getArgOperand(2)->getType(), EltSize), // Size Zero, // Align ConstantInt::get(Type::getInt1Ty(MI->getContext()), 0) // isVolatile }; @@ -1655,7 +1643,12 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, SrcField = BinaryOperator::CreateShl(SrcField, ShiftVal, "", LI); } - ResultVal = BinaryOperator::CreateOr(SrcField, ResultVal, "", LI); + // Don't create an 'or x, 0' on the first iteration. + if (!isa<Constant>(ResultVal) || + !cast<Constant>(ResultVal)->isNullValue()) + ResultVal = BinaryOperator::CreateOr(SrcField, ResultVal, "", LI); + else + ResultVal = SrcField; } // Handle tail padding by truncating the result @@ -1794,7 +1787,7 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, if (isOffset) return false; // If the memintrinsic isn't using the alloca as the dest, reject it. - if (UI.getOperandNo() != 1) return false; + if (UI.getOperandNo() != CallInst::ArgOffset) return false; // If the source of the memcpy/move is not a constant global, reject it. if (!PointsToConstantGlobal(MI->getSource())) diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp index 9744100..49d93a2 100644 --- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -137,6 +137,9 @@ static bool MarkAliveBlocks(BasicBlock *BB, // they should be changed to unreachable by passes that can't modify the // CFG. if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) { + // Don't touch volatile stores. + if (SI->isVolatile()) continue; + Value *Ptr = SI->getOperand(1); if (isa<UndefValue>(Ptr) || diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index 7414be7..b1c6191 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -66,6 +66,11 @@ public: this->TD = TD; if (CI->getCalledFunction()) Context = &CI->getCalledFunction()->getContext(); + + // We never change the calling convention. + if (CI->getCallingConv() != llvm::CallingConv::C) + return NULL; + return CallOptimizer(CI->getCalledFunction(), CI, B); } }; @@ -92,6 +97,20 @@ static bool IsOnlyUsedInZeroEqualityComparison(Value *V) { return true; } +/// IsOnlyUsedInEqualityComparison - Return true if it is only used in equality +/// comparisons with With. +static bool IsOnlyUsedInEqualityComparison(Value *V, Value *With) { + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); + UI != E; ++UI) { + if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI)) + if (IC->isEquality() && IC->getOperand(1) == With) + continue; + // Unknown instruction. + return false; + } + return true; +} + //===----------------------------------------------------------------------===// // String and Memory LibCall Optimizations //===----------------------------------------------------------------------===// @@ -110,8 +129,8 @@ struct StrCatOpt : public LibCallOptimization { return 0; // Extract some information from the instruction - Value *Dst = CI->getOperand(1); - Value *Src = CI->getOperand(2); + Value *Dst = CI->getArgOperand(0); + Value *Src = CI->getArgOperand(1); // See if we can get the length of the input string. uint64_t Len = GetStringLength(Src); @@ -162,12 +181,12 @@ struct StrNCatOpt : public StrCatOpt { return 0; // Extract some information from the instruction - Value *Dst = CI->getOperand(1); - Value *Src = CI->getOperand(2); + Value *Dst = CI->getArgOperand(0); + Value *Src = CI->getArgOperand(1); uint64_t Len; // We don't do anything if length is not constant - if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getOperand(3))) + if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2))) Len = LengthArg->getZExtValue(); else return 0; @@ -207,11 +226,11 @@ struct StrChrOpt : public LibCallOptimization { FT->getParamType(0) != FT->getReturnType()) return 0; - Value *SrcStr = CI->getOperand(1); + Value *SrcStr = CI->getArgOperand(0); // If the second operand is non-constant, see if we can compute the length // of the input string and turn this into memchr. - ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getOperand(2)); + ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); if (CharC == 0) { // These optimizations require TargetData. if (!TD) return 0; @@ -220,7 +239,7 @@ struct StrChrOpt : public LibCallOptimization { if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32))// memchr needs i32. return 0; - return EmitMemChr(SrcStr, CI->getOperand(2), // include nul. + return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul. ConstantInt::get(TD->getIntPtrType(*Context), Len), B, TD); } @@ -260,12 +279,12 @@ struct StrCmpOpt : public LibCallOptimization { // Verify the "strcmp" function prototype. const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || - !FT->getReturnType()->isIntegerTy(32) || + !FT->getReturnType()->isIntegerTy(32) || FT->getParamType(0) != FT->getParamType(1) || FT->getParamType(0) != Type::getInt8PtrTy(*Context)) return 0; - Value *Str1P = CI->getOperand(1), *Str2P = CI->getOperand(2); + Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1); if (Str1P == Str2P) // strcmp(x,x) -> 0 return ConstantInt::get(CI->getType(), 0); @@ -308,19 +327,19 @@ struct StrNCmpOpt : public LibCallOptimization { // Verify the "strncmp" function prototype. const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || - !FT->getReturnType()->isIntegerTy(32) || + !FT->getReturnType()->isIntegerTy(32) || FT->getParamType(0) != FT->getParamType(1) || FT->getParamType(0) != Type::getInt8PtrTy(*Context) || !FT->getParamType(2)->isIntegerTy()) return 0; - Value *Str1P = CI->getOperand(1), *Str2P = CI->getOperand(2); + Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1); if (Str1P == Str2P) // strncmp(x,x,n) -> 0 return ConstantInt::get(CI->getType(), 0); // Get the length argument if it is constant. uint64_t Length; - if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getOperand(3))) + if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2))) Length = LengthArg->getZExtValue(); else return 0; @@ -328,6 +347,9 @@ struct StrNCmpOpt : public LibCallOptimization { if (Length == 0) // strncmp(x,y,0) -> 0 return ConstantInt::get(CI->getType(), 0); + if (TD && Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1) + return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, TD); + std::string Str1, Str2; bool HasStr1 = GetConstantStringInfo(Str1P, Str1); bool HasStr2 = GetConstantStringInfo(Str2P, Str2); @@ -365,7 +387,7 @@ struct StrCpyOpt : public LibCallOptimization { FT->getParamType(0) != Type::getInt8PtrTy(*Context)) return 0; - Value *Dst = CI->getOperand(1), *Src = CI->getOperand(2); + Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); if (Dst == Src) // strcpy(x,x) -> x return Src; @@ -381,7 +403,7 @@ struct StrCpyOpt : public LibCallOptimization { if (OptChkCall) EmitMemCpyChk(Dst, Src, ConstantInt::get(TD->getIntPtrType(*Context), Len), - CI->getOperand(3), B, TD); + CI->getArgOperand(2), B, TD); else EmitMemCpy(Dst, Src, ConstantInt::get(TD->getIntPtrType(*Context), Len), @@ -402,9 +424,9 @@ struct StrNCpyOpt : public LibCallOptimization { !FT->getParamType(2)->isIntegerTy()) return 0; - Value *Dst = CI->getOperand(1); - Value *Src = CI->getOperand(2); - Value *LenOp = CI->getOperand(3); + Value *Dst = CI->getArgOperand(0); + Value *Src = CI->getArgOperand(1); + Value *LenOp = CI->getArgOperand(2); // See if we can get the length of the input string. uint64_t SrcLen = GetStringLength(Src); @@ -452,7 +474,7 @@ struct StrLenOpt : public LibCallOptimization { !FT->getReturnType()->isIntegerTy()) return 0; - Value *Src = CI->getOperand(1); + Value *Src = CI->getArgOperand(0); // Constant folding: strlen("xyz") -> 3 if (uint64_t Len = GetStringLength(Src)) @@ -477,7 +499,7 @@ struct StrToOpt : public LibCallOptimization { !FT->getParamType(1)->isPointerTy()) return 0; - Value *EndPtr = CI->getOperand(2); + Value *EndPtr = CI->getArgOperand(1); if (isa<ConstantPointerNull>(EndPtr)) { CI->setOnlyReadsMemory(); CI->addAttribute(1, Attribute::NoCapture); @@ -500,17 +522,34 @@ struct StrStrOpt : public LibCallOptimization { return 0; // fold strstr(x, x) -> x. - if (CI->getOperand(1) == CI->getOperand(2)) - return B.CreateBitCast(CI->getOperand(1), CI->getType()); + if (CI->getArgOperand(0) == CI->getArgOperand(1)) + return B.CreateBitCast(CI->getArgOperand(0), CI->getType()); + + // fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0 + if (TD && IsOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) { + Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, TD); + Value *StrNCmp = EmitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1), + StrLen, B, TD); + for (Value::use_iterator UI = CI->use_begin(), UE = CI->use_end(); + UI != UE; ) { + ICmpInst *Old = cast<ICmpInst>(UI++); + Value *Cmp = B.CreateICmp(Old->getPredicate(), StrNCmp, + ConstantInt::getNullValue(StrNCmp->getType()), + "cmp"); + Old->replaceAllUsesWith(Cmp); + Old->eraseFromParent(); + } + return CI; + } // See if either input string is a constant string. std::string SearchStr, ToFindStr; - bool HasStr1 = GetConstantStringInfo(CI->getOperand(1), SearchStr); - bool HasStr2 = GetConstantStringInfo(CI->getOperand(2), ToFindStr); + bool HasStr1 = GetConstantStringInfo(CI->getArgOperand(0), SearchStr); + bool HasStr2 = GetConstantStringInfo(CI->getArgOperand(1), ToFindStr); // fold strstr(x, "") -> x. if (HasStr2 && ToFindStr.empty()) - return B.CreateBitCast(CI->getOperand(1), CI->getType()); + return B.CreateBitCast(CI->getArgOperand(0), CI->getType()); // If both strings are known, constant fold it. if (HasStr1 && HasStr2) { @@ -520,14 +559,14 @@ struct StrStrOpt : public LibCallOptimization { return Constant::getNullValue(CI->getType()); // strstr("abcd", "bc") -> gep((char*)"abcd", 1) - Value *Result = CastToCStr(CI->getOperand(1), B); + Value *Result = CastToCStr(CI->getArgOperand(0), B); Result = B.CreateConstInBoundsGEP1_64(Result, Offset, "strstr"); return B.CreateBitCast(Result, CI->getType()); } // fold strstr(x, "y") -> strchr(x, 'y'). if (HasStr2 && ToFindStr.size() == 1) - return B.CreateBitCast(EmitStrChr(CI->getOperand(1), ToFindStr[0], B, TD), + return B.CreateBitCast(EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TD), CI->getType()); return 0; } @@ -545,13 +584,13 @@ struct MemCmpOpt : public LibCallOptimization { !FT->getReturnType()->isIntegerTy(32)) return 0; - Value *LHS = CI->getOperand(1), *RHS = CI->getOperand(2); + Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1); if (LHS == RHS) // memcmp(s,s,x) -> 0 return Constant::getNullValue(CI->getType()); // Make sure we have a constant length. - ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getOperand(3)); + ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2)); if (!LenC) return 0; uint64_t Len = LenC->getZExtValue(); @@ -598,9 +637,9 @@ struct MemCpyOpt : public LibCallOptimization { return 0; // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1) - EmitMemCpy(CI->getOperand(1), CI->getOperand(2), - CI->getOperand(3), 1, false, B, TD); - return CI->getOperand(1); + EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), 1, false, B, TD); + return CI->getArgOperand(0); } }; @@ -620,9 +659,9 @@ struct MemMoveOpt : public LibCallOptimization { return 0; // memmove(x, y, n) -> llvm.memmove(x, y, n, 1) - EmitMemMove(CI->getOperand(1), CI->getOperand(2), - CI->getOperand(3), 1, false, B, TD); - return CI->getOperand(1); + EmitMemMove(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), 1, false, B, TD); + return CI->getArgOperand(0); } }; @@ -642,10 +681,10 @@ struct MemSetOpt : public LibCallOptimization { return 0; // memset(p, v, n) -> llvm.memset(p, v, n, 1) - Value *Val = B.CreateIntCast(CI->getOperand(2), Type::getInt8Ty(*Context), + Value *Val = B.CreateIntCast(CI->getArgOperand(1), Type::getInt8Ty(*Context), false); - EmitMemSet(CI->getOperand(1), Val, CI->getOperand(3), false, B, TD); - return CI->getOperand(1); + EmitMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), false, B, TD); + return CI->getArgOperand(0); } }; @@ -666,7 +705,7 @@ struct PowOpt : public LibCallOptimization { !FT->getParamType(0)->isFloatingPointTy()) return 0; - Value *Op1 = CI->getOperand(1), *Op2 = CI->getOperand(2); + Value *Op1 = CI->getArgOperand(0), *Op2 = CI->getArgOperand(1); if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) { if (Op1C->isExactlyValue(1.0)) // pow(1.0, x) -> 1.0 return Op1C; @@ -720,18 +759,18 @@ struct Exp2Opt : public LibCallOptimization { !FT->getParamType(0)->isFloatingPointTy()) return 0; - Value *Op = CI->getOperand(1); + Value *Op = CI->getArgOperand(0); // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= 32 // Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < 32 Value *LdExpArg = 0; if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) { if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32) LdExpArg = B.CreateSExt(OpC->getOperand(0), - Type::getInt32Ty(*Context), "tmp"); + Type::getInt32Ty(*Context), "tmp"); } else if (UIToFPInst *OpC = dyn_cast<UIToFPInst>(Op)) { if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32) LdExpArg = B.CreateZExt(OpC->getOperand(0), - Type::getInt32Ty(*Context), "tmp"); + Type::getInt32Ty(*Context), "tmp"); } if (LdExpArg) { @@ -772,7 +811,7 @@ struct UnaryDoubleFPOpt : public LibCallOptimization { return 0; // If this is something like 'floor((double)floatval)', convert to floorf. - FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getOperand(1)); + FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getArgOperand(0)); if (Cast == 0 || !Cast->getOperand(0)->getType()->isFloatTy()) return 0; @@ -797,11 +836,11 @@ struct FFSOpt : public LibCallOptimization { // Just make sure this has 2 arguments of the same FP type, which match the // result type. if (FT->getNumParams() != 1 || - !FT->getReturnType()->isIntegerTy(32) || + !FT->getReturnType()->isIntegerTy(32) || !FT->getParamType(0)->isIntegerTy()) return 0; - Value *Op = CI->getOperand(1); + Value *Op = CI->getArgOperand(0); // Constant fold. if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { @@ -821,7 +860,7 @@ struct FFSOpt : public LibCallOptimization { Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType), "tmp"); return B.CreateSelect(Cond, V, - ConstantInt::get(Type::getInt32Ty(*Context), 0)); + ConstantInt::get(Type::getInt32Ty(*Context), 0)); } }; @@ -837,7 +876,7 @@ struct IsDigitOpt : public LibCallOptimization { return 0; // isdigit(c) -> (c-'0') <u 10 - Value *Op = CI->getOperand(1); + Value *Op = CI->getArgOperand(0); Op = B.CreateSub(Op, ConstantInt::get(Type::getInt32Ty(*Context), '0'), "isdigittmp"); Op = B.CreateICmpULT(Op, ConstantInt::get(Type::getInt32Ty(*Context), 10), @@ -858,7 +897,7 @@ struct IsAsciiOpt : public LibCallOptimization { return 0; // isascii(c) -> c <u 128 - Value *Op = CI->getOperand(1); + Value *Op = CI->getArgOperand(0); Op = B.CreateICmpULT(Op, ConstantInt::get(Type::getInt32Ty(*Context), 128), "isascii"); return B.CreateZExt(Op, CI->getType()); @@ -877,7 +916,7 @@ struct AbsOpt : public LibCallOptimization { return 0; // abs(x) -> x >s -1 ? x : -x - Value *Op = CI->getOperand(1); + Value *Op = CI->getArgOperand(0); Value *Pos = B.CreateICmpSGT(Op, Constant::getAllOnesValue(Op->getType()), "ispos"); @@ -899,7 +938,7 @@ struct ToAsciiOpt : public LibCallOptimization { return 0; // isascii(c) -> c & 0x7f - return B.CreateAnd(CI->getOperand(1), + return B.CreateAnd(CI->getArgOperand(0), ConstantInt::get(CI->getType(),0x7F)); } }; @@ -922,7 +961,7 @@ struct PrintFOpt : public LibCallOptimization { // Check for a fixed format string. std::string FormatStr; - if (!GetConstantStringInfo(CI->getOperand(1), FormatStr)) + if (!GetConstantStringInfo(CI->getArgOperand(0), FormatStr)) return 0; // Empty format string -> noop. @@ -954,20 +993,20 @@ struct PrintFOpt : public LibCallOptimization { } // Optimize specific format strings. - // printf("%c", chr) --> putchar(*(i8*)dst) - if (FormatStr == "%c" && CI->getNumOperands() > 2 && - CI->getOperand(2)->getType()->isIntegerTy()) { - Value *Res = EmitPutChar(CI->getOperand(2), B, TD); + // printf("%c", chr) --> putchar(chr) + if (FormatStr == "%c" && CI->getNumArgOperands() > 1 && + CI->getArgOperand(1)->getType()->isIntegerTy()) { + Value *Res = EmitPutChar(CI->getArgOperand(1), B, TD); if (CI->use_empty()) return CI; return B.CreateIntCast(Res, CI->getType(), true); } // printf("%s\n", str) --> puts(str) - if (FormatStr == "%s\n" && CI->getNumOperands() > 2 && - CI->getOperand(2)->getType()->isPointerTy() && + if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 && + CI->getArgOperand(1)->getType()->isPointerTy() && CI->use_empty()) { - EmitPutS(CI->getOperand(2), B, TD); + EmitPutS(CI->getArgOperand(1), B, TD); return CI; } return 0; @@ -988,11 +1027,11 @@ struct SPrintFOpt : public LibCallOptimization { // Check for a fixed format string. std::string FormatStr; - if (!GetConstantStringInfo(CI->getOperand(2), FormatStr)) + if (!GetConstantStringInfo(CI->getArgOperand(1), FormatStr)) return 0; // If we just have a format string (nothing else crazy) transform it. - if (CI->getNumOperands() == 3) { + if (CI->getNumArgOperands() == 2) { // Make sure there's no % in the constant array. We could try to handle // %% -> % in the future if we cared. for (unsigned i = 0, e = FormatStr.size(); i != e; ++i) @@ -1003,7 +1042,7 @@ struct SPrintFOpt : public LibCallOptimization { if (!TD) return 0; // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1) - EmitMemCpy(CI->getOperand(1), CI->getOperand(2), // Copy the nul byte. + EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), // Copy the nul byte. ConstantInt::get(TD->getIntPtrType(*Context), FormatStr.size()+1), 1, false, B, TD); return ConstantInt::get(CI->getType(), FormatStr.size()); @@ -1011,16 +1050,17 @@ struct SPrintFOpt : public LibCallOptimization { // The remaining optimizations require the format string to be "%s" or "%c" // and have an extra operand. - if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->getNumOperands() <4) + if (FormatStr.size() != 2 || FormatStr[0] != '%' || + CI->getNumArgOperands() < 3) return 0; // Decode the second character of the format string. if (FormatStr[1] == 'c') { // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0 - if (!CI->getOperand(3)->getType()->isIntegerTy()) return 0; - Value *V = B.CreateTrunc(CI->getOperand(3), + if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0; + Value *V = B.CreateTrunc(CI->getArgOperand(2), Type::getInt8Ty(*Context), "char"); - Value *Ptr = CastToCStr(CI->getOperand(1), B); + Value *Ptr = CastToCStr(CI->getArgOperand(0), B); B.CreateStore(V, Ptr); Ptr = B.CreateGEP(Ptr, ConstantInt::get(Type::getInt32Ty(*Context), 1), "nul"); @@ -1034,13 +1074,13 @@ struct SPrintFOpt : public LibCallOptimization { if (!TD) return 0; // sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1) - if (!CI->getOperand(3)->getType()->isPointerTy()) return 0; + if (!CI->getArgOperand(2)->getType()->isPointerTy()) return 0; - Value *Len = EmitStrLen(CI->getOperand(3), B, TD); + Value *Len = EmitStrLen(CI->getArgOperand(2), B, TD); Value *IncLen = B.CreateAdd(Len, ConstantInt::get(Len->getType(), 1), "leninc"); - EmitMemCpy(CI->getOperand(1), CI->getOperand(3), IncLen, 1, false, B, TD); + EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(2), IncLen, 1, false, B, TD); // The sprintf result is the unincremented number of bytes in the string. return B.CreateIntCast(Len, CI->getType(), false); @@ -1064,8 +1104,8 @@ struct FWriteOpt : public LibCallOptimization { return 0; // Get the element size and count. - ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getOperand(2)); - ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getOperand(3)); + ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); + ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getArgOperand(2)); if (!SizeC || !CountC) return 0; uint64_t Bytes = SizeC->getZExtValue()*CountC->getZExtValue(); @@ -1075,8 +1115,8 @@ struct FWriteOpt : public LibCallOptimization { // If this is writing one byte, turn it into fputc. if (Bytes == 1) { // fwrite(S,1,1,F) -> fputc(S[0],F) - Value *Char = B.CreateLoad(CastToCStr(CI->getOperand(1), B), "char"); - EmitFPutC(Char, CI->getOperand(4), B, TD); + Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char"); + EmitFPutC(Char, CI->getArgOperand(3), B, TD); return ConstantInt::get(CI->getType(), 1); } @@ -1100,11 +1140,11 @@ struct FPutsOpt : public LibCallOptimization { return 0; // fputs(s,F) --> fwrite(s,1,strlen(s),F) - uint64_t Len = GetStringLength(CI->getOperand(1)); + uint64_t Len = GetStringLength(CI->getArgOperand(0)); if (!Len) return 0; - EmitFWrite(CI->getOperand(1), + EmitFWrite(CI->getArgOperand(0), ConstantInt::get(TD->getIntPtrType(*Context), Len-1), - CI->getOperand(2), B, TD); + CI->getArgOperand(1), B, TD); return CI; // Known to have no uses (see above). } }; @@ -1123,11 +1163,11 @@ struct FPrintFOpt : public LibCallOptimization { // All the optimizations depend on the format string. std::string FormatStr; - if (!GetConstantStringInfo(CI->getOperand(2), FormatStr)) + if (!GetConstantStringInfo(CI->getArgOperand(1), FormatStr)) return 0; // fprintf(F, "foo") --> fwrite("foo", 3, 1, F) - if (CI->getNumOperands() == 3) { + if (CI->getNumArgOperands() == 2) { for (unsigned i = 0, e = FormatStr.size(); i != e; ++i) if (FormatStr[i] == '%') // Could handle %% -> % if we cared. return 0; // We found a format specifier. @@ -1135,31 +1175,32 @@ struct FPrintFOpt : public LibCallOptimization { // These optimizations require TargetData. if (!TD) return 0; - EmitFWrite(CI->getOperand(2), + EmitFWrite(CI->getArgOperand(1), ConstantInt::get(TD->getIntPtrType(*Context), FormatStr.size()), - CI->getOperand(1), B, TD); + CI->getArgOperand(0), B, TD); return ConstantInt::get(CI->getType(), FormatStr.size()); } // The remaining optimizations require the format string to be "%s" or "%c" // and have an extra operand. - if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->getNumOperands() <4) + if (FormatStr.size() != 2 || FormatStr[0] != '%' || + CI->getNumArgOperands() < 3) return 0; // Decode the second character of the format string. if (FormatStr[1] == 'c') { - // fprintf(F, "%c", chr) --> *(i8*)dst = chr - if (!CI->getOperand(3)->getType()->isIntegerTy()) return 0; - EmitFPutC(CI->getOperand(3), CI->getOperand(1), B, TD); + // fprintf(F, "%c", chr) --> fputc(chr, F) + if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0; + EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TD); return ConstantInt::get(CI->getType(), 1); } if (FormatStr[1] == 's') { - // fprintf(F, "%s", str) -> fputs(str, F) - if (!CI->getOperand(3)->getType()->isPointerTy() || !CI->use_empty()) + // fprintf(F, "%s", str) --> fputs(str, F) + if (!CI->getArgOperand(2)->getType()->isPointerTy() || !CI->use_empty()) return 0; - EmitFPutS(CI->getOperand(3), CI->getOperand(1), B, TD); + EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD); return CI; } return 0; diff --git a/lib/Transforms/Scalar/TailDuplication.cpp b/lib/Transforms/Scalar/TailDuplication.cpp index 2306a77..9208238 100644 --- a/lib/Transforms/Scalar/TailDuplication.cpp +++ b/lib/Transforms/Scalar/TailDuplication.cpp @@ -206,12 +206,13 @@ static BasicBlock *FindObviousSharedDomOf(BasicBlock *SrcBlock, // there is only one other pred, get it, otherwise we can't handle it. PI = pred_begin(DstBlock); PE = pred_end(DstBlock); BasicBlock *DstOtherPred = 0; - if (*PI == SrcBlock) { + BasicBlock *P = *PI; + if (P == SrcBlock) { if (++PI == PE) return 0; DstOtherPred = *PI; if (++PI != PE) return 0; } else { - DstOtherPred = *PI; + DstOtherPred = P; if (++PI == PE || *PI != SrcBlock || ++PI != PE) return 0; } diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp index 5ad5de2..01c8e5d 100644 --- a/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -16,9 +16,9 @@ // transformation from taking place, though currently the analysis cannot // support moving any really useful instructions (only dead ones). // 2. This pass transforms functions that are prevented from being tail -// recursive by an associative expression to use an accumulator variable, -// thus compiling the typical naive factorial or 'fib' implementation into -// efficient code. +// recursive by an associative and commutative expression to use an +// accumulator variable, thus compiling the typical naive factorial or +// 'fib' implementation into efficient code. // 3. TRE is performed if the function returns void, if the return // returns the result returned by the call, or if the function returns a // run-time constant on all exits from the function. It is possible, though @@ -60,6 +60,7 @@ #include "llvm/Pass.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/Loads.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/CFG.h" #include "llvm/ADT/Statistic.h" @@ -252,7 +253,7 @@ static bool isDynamicConstant(Value *V, CallInst *CI, ReturnInst *RI) { // If we are passing this argument into call as the corresponding // argument operand, then the argument is dynamically constant. // Otherwise, we cannot transform this function safely. - if (CI->getOperand(ArgNo+1) == Arg) + if (CI->getArgOperand(ArgNo) == Arg) return true; } @@ -269,16 +270,16 @@ static bool isDynamicConstant(Value *V, CallInst *CI, ReturnInst *RI) { } // getCommonReturnValue - Check to see if the function containing the specified -// return instruction and tail call consistently returns the same -// runtime-constant value at all exit points. If so, return the returned value. +// tail call consistently returns the same runtime-constant value at all exit +// points except for IgnoreRI. If so, return the returned value. // -static Value *getCommonReturnValue(ReturnInst *TheRI, CallInst *CI) { - Function *F = TheRI->getParent()->getParent(); +static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) { + Function *F = CI->getParent()->getParent(); Value *ReturnedValue = 0; for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI) if (ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator())) - if (RI != TheRI) { + if (RI != IgnoreRI) { Value *RetOp = RI->getOperand(0); // We can only perform this transformation if the value returned is @@ -301,9 +302,9 @@ static Value *getCommonReturnValue(ReturnInst *TheRI, CallInst *CI) { /// Value *TailCallElim::CanTransformAccumulatorRecursion(Instruction *I, CallInst *CI) { - if (!I->isAssociative()) return 0; + if (!I->isAssociative() || !I->isCommutative()) return 0; assert(I->getNumOperands() == 2 && - "Associative operations should have 2 args!"); + "Associative/commutative operations should have 2 args!"); // Exactly one operand should be the result of the call instruction... if ((I->getOperand(0) == CI && I->getOperand(1) == CI) || @@ -368,11 +369,16 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry, return false; } - // If we are introducing accumulator recursion to eliminate associative - // operations after the call instruction, this variable contains the initial - // value for the accumulator. If this value is set, we actually perform - // accumulator recursion elimination instead of simple tail recursion - // elimination. + // If we are introducing accumulator recursion to eliminate operations after + // the call instruction that are both associative and commutative, the initial + // value for the accumulator is placed in this variable. If this value is set + // then we actually perform accumulator recursion elimination instead of + // simple tail recursion elimination. If the operation is an LLVM instruction + // (eg: "add") then it is recorded in AccumulatorRecursionInstr. If not, then + // we are handling the case when the return instruction returns a constant C + // which is different to the constant returned by other return instructions + // (which is recorded in AccumulatorRecursionEliminationInitVal). This is a + // special case of accumulator recursion, the operation being "return C". Value *AccumulatorRecursionEliminationInitVal = 0; Instruction *AccumulatorRecursionInstr = 0; @@ -383,9 +389,9 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry, for (BBI = CI, ++BBI; &*BBI != Ret; ++BBI) if (!CanMoveAboveCall(BBI, CI)) { // If we can't move the instruction above the call, it might be because it - // is an associative operation that could be tranformed using accumulator - // recursion elimination. Check to see if this is the case, and if so, - // remember the initial accumulator value for later. + // is an associative and commutative operation that could be tranformed + // using accumulator recursion elimination. Check to see if this is the + // case, and if so, remember the initial accumulator value for later. if ((AccumulatorRecursionEliminationInitVal = CanTransformAccumulatorRecursion(BBI, CI))) { // Yes, this is accumulator recursion. Remember which instruction @@ -403,8 +409,18 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry, if (Ret->getNumOperands() == 1 && Ret->getReturnValue() != CI && !isa<UndefValue>(Ret->getReturnValue()) && AccumulatorRecursionEliminationInitVal == 0 && - !getCommonReturnValue(Ret, CI)) - return false; + !getCommonReturnValue(0, CI)) { + // One case remains that we are able to handle: the current return + // instruction returns a constant, and all other return instructions + // return a different constant. + if (!isDynamicConstant(Ret->getReturnValue(), CI, Ret)) + return false; // Current return instruction does not return a constant. + // Check that all other return instructions return a common constant. If + // so, record it in AccumulatorRecursionEliminationInitVal. + AccumulatorRecursionEliminationInitVal = getCommonReturnValue(Ret, CI); + if (!AccumulatorRecursionEliminationInitVal) + return false; + } // OK! We can transform this tail call. If this is the first one found, // create the new entry block, allowing us to branch back to the old entry. @@ -453,8 +469,8 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry, // Ok, now that we know we have a pseudo-entry block WITH all of the // required PHI nodes, add entries into the PHI node for the actual // parameters passed into the tail-recursive call. - for (unsigned i = 0, e = CI->getNumOperands()-1; i != e; ++i) - ArgumentPHIs[i]->addIncoming(CI->getOperand(i+1), BB); + for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) + ArgumentPHIs[i]->addIncoming(CI->getArgOperand(i), BB); // If we are introducing an accumulator variable to eliminate the recursion, // do so now. Note that we _know_ that no subsequent tail recursion @@ -464,8 +480,9 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry, if (AccumulatorRecursionEliminationInitVal) { Instruction *AccRecInstr = AccumulatorRecursionInstr; // Start by inserting a new PHI node for the accumulator. - PHINode *AccPN = PHINode::Create(AccRecInstr->getType(), "accumulator.tr", - OldEntry->begin()); + PHINode *AccPN = + PHINode::Create(AccumulatorRecursionEliminationInitVal->getType(), + "accumulator.tr", OldEntry->begin()); // Loop over all of the predecessors of the tail recursion block. For the // real entry into the function we seed the PHI with the initial value, @@ -475,20 +492,27 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry, // it will not show up as a predecessor. for (pred_iterator PI = pred_begin(OldEntry), PE = pred_end(OldEntry); PI != PE; ++PI) { - if (*PI == &F->getEntryBlock()) - AccPN->addIncoming(AccumulatorRecursionEliminationInitVal, *PI); + BasicBlock *P = *PI; + if (P == &F->getEntryBlock()) + AccPN->addIncoming(AccumulatorRecursionEliminationInitVal, P); else - AccPN->addIncoming(AccPN, *PI); + AccPN->addIncoming(AccPN, P); } - // Add an incoming argument for the current block, which is computed by our - // associative accumulator instruction. - AccPN->addIncoming(AccRecInstr, BB); - - // Next, rewrite the accumulator recursion instruction so that it does not - // use the result of the call anymore, instead, use the PHI node we just - // inserted. - AccRecInstr->setOperand(AccRecInstr->getOperand(0) != CI, AccPN); + if (AccRecInstr) { + // Add an incoming argument for the current block, which is computed by + // our associative and commutative accumulator instruction. + AccPN->addIncoming(AccRecInstr, BB); + + // Next, rewrite the accumulator recursion instruction so that it does not + // use the result of the call anymore, instead, use the PHI node we just + // inserted. + AccRecInstr->setOperand(AccRecInstr->getOperand(0) != CI, AccPN); + } else { + // Add an incoming argument for the current block, which is just the + // constant returned by the current return instruction. + AccPN->addIncoming(Ret->getReturnValue(), BB); + } // Finally, rewrite any return instructions in the program to return the PHI // node instead of the "initval" that they do currently. This loop will |