diff options
author | rdivacky <rdivacky@FreeBSD.org> | 2009-11-18 14:58:34 +0000 |
---|---|---|
committer | rdivacky <rdivacky@FreeBSD.org> | 2009-11-18 14:58:34 +0000 |
commit | d2e985fd323c167e20f77b045a1d99ad166e65db (patch) | |
tree | 6a111e552c75afc66228e3d8f19b6731e4013f10 /lib/Transforms/Scalar | |
parent | ded64d5d348ce8d8c5aa42cf63f6de9dd84b7e89 (diff) | |
download | FreeBSD-src-d2e985fd323c167e20f77b045a1d99ad166e65db.zip FreeBSD-src-d2e985fd323c167e20f77b045a1d99ad166e65db.tar.gz |
Update LLVM to r89205.
Diffstat (limited to 'lib/Transforms/Scalar')
23 files changed, 1716 insertions, 1439 deletions
diff --git a/lib/Transforms/Scalar/ABCD.cpp b/lib/Transforms/Scalar/ABCD.cpp index c8541d7..e58fa63 100644 --- a/lib/Transforms/Scalar/ABCD.cpp +++ b/lib/Transforms/Scalar/ABCD.cpp @@ -412,7 +412,9 @@ class ABCD : public FunctionPass { /// If PN_op1 and PN_o2 are different from NULL, create a constraint /// PN_op2 -> PN_op1 with value. In case any of them is NULL, replace /// with the respective V_op#, if V_op# is a ConstantInt. - void createConstraintSigSig(PHINode *SIG_op1, PHINode *SIG_op2, APInt value); + void createConstraintSigSig(PHINode *SIG_op1, PHINode *SIG_op2, + ConstantInt *V_op1, ConstantInt *V_op2, + APInt value); /// Returns the sigma representing the Instruction I in BasicBlock BB. /// Returns NULL in case there is no sigma for this Instruction in this @@ -735,25 +737,27 @@ void ABCD::createConstraintCmpInst(ICmpInst *ICI, TerminatorInst *TI) { APInt Zero = APInt::getNullValue(width); CmpInst::Predicate Pred = ICI->getPredicate(); + ConstantInt *CI1 = dyn_cast<ConstantInt>(V_op1); + ConstantInt *CI2 = dyn_cast<ConstantInt>(V_op2); switch (Pred) { case CmpInst::ICMP_SGT: // signed greater than - createConstraintSigSig(SIG_op2_t, SIG_op1_t, MinusOne); - createConstraintSigSig(SIG_op1_f, SIG_op2_f, Zero); + createConstraintSigSig(SIG_op2_t, SIG_op1_t, CI2, CI1, MinusOne); + createConstraintSigSig(SIG_op1_f, SIG_op2_f, CI1, CI2, Zero); break; case CmpInst::ICMP_SGE: // signed greater or equal - createConstraintSigSig(SIG_op2_t, SIG_op1_t, Zero); - createConstraintSigSig(SIG_op1_f, SIG_op2_f, MinusOne); + createConstraintSigSig(SIG_op2_t, SIG_op1_t, CI2, CI1, Zero); + createConstraintSigSig(SIG_op1_f, SIG_op2_f, CI1, CI2, MinusOne); break; case CmpInst::ICMP_SLT: // signed less than - createConstraintSigSig(SIG_op1_t, SIG_op2_t, MinusOne); - createConstraintSigSig(SIG_op2_f, SIG_op1_f, Zero); + createConstraintSigSig(SIG_op1_t, SIG_op2_t, CI1, CI2, MinusOne); + createConstraintSigSig(SIG_op2_f, SIG_op1_f, CI2, CI1, Zero); break; case CmpInst::ICMP_SLE: // signed less or equal - createConstraintSigSig(SIG_op1_t, SIG_op2_t, Zero); - createConstraintSigSig(SIG_op2_f, SIG_op1_f, MinusOne); + createConstraintSigSig(SIG_op1_t, SIG_op2_t, CI1, CI2, Zero); + createConstraintSigSig(SIG_op2_f, SIG_op1_f, CI2, CI1, MinusOne); break; default: @@ -772,6 +776,10 @@ void ABCD::createConstraintCmpInst(ICmpInst *ICI, TerminatorInst *TI) { /// b->a and c->a with weight 0 in the lower bound graph, and the edges /// a->b and a->c with weight 0 in the upper bound graph. void ABCD::createConstraintPHINode(PHINode *PN) { + // FIXME: We really want to disallow sigma nodes, but I don't know the best + // way to detect the other than this. + if (PN->getNumOperands() == 2) return; + int32_t width = cast<IntegerType>(PN->getType())->getBitWidth(); for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { Value *V = PN->getIncomingValue(i); @@ -796,13 +804,11 @@ void ABCD::createConstraintSigInst(Instruction *I_op, BasicBlock *BB_succ_t, int32_t width = cast<IntegerType>((*SIG_op_t)->getType())->getBitWidth(); inequality_graph.addEdge(I_op, *SIG_op_t, APInt(width, 0), true); inequality_graph.addEdge(*SIG_op_t, I_op, APInt(width, 0), false); - created.insert(*SIG_op_t); } if (*SIG_op_f) { int32_t width = cast<IntegerType>((*SIG_op_f)->getType())->getBitWidth(); inequality_graph.addEdge(I_op, *SIG_op_f, APInt(width, 0), true); inequality_graph.addEdge(*SIG_op_f, I_op, APInt(width, 0), false); - created.insert(*SIG_op_f); } } @@ -810,10 +816,17 @@ void ABCD::createConstraintSigInst(Instruction *I_op, BasicBlock *BB_succ_t, /// PN_op2 -> PN_op1 with value. In case any of them is NULL, replace /// with the respective V_op#, if V_op# is a ConstantInt. void ABCD::createConstraintSigSig(PHINode *SIG_op1, PHINode *SIG_op2, + ConstantInt *V_op1, ConstantInt *V_op2, APInt value) { if (SIG_op1 && SIG_op2) { inequality_graph.addEdge(SIG_op2, SIG_op1, value, true); inequality_graph.addEdge(SIG_op1, SIG_op2, -value, false); + } else if (SIG_op1 && V_op2) { + inequality_graph.addEdge(V_op2, SIG_op1, value, true); + inequality_graph.addEdge(SIG_op1, V_op2, -value, false); + } else if (SIG_op2 && V_op1) { + inequality_graph.addEdge(SIG_op2, V_op1, value, true); + inequality_graph.addEdge(V_op1, SIG_op2, -value, false); } } @@ -1036,7 +1049,7 @@ void ABCD::InequalityGraph::printHeader(raw_ostream &OS, Function &F) const { /// Prints the body of the dot file void ABCD::InequalityGraph::printBody(raw_ostream &OS) const { - DenseMap<Value *, SmallPtrSet<Edge *, 16> >::iterator begin = + DenseMap<Value *, SmallPtrSet<Edge *, 16> >::const_iterator begin = graph.begin(), end = graph.end(); for (; begin != end ; ++begin) { diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt index e048518..5a92399 100644 --- a/lib/Transforms/Scalar/CMakeLists.txt +++ b/lib/Transforms/Scalar/CMakeLists.txt @@ -3,7 +3,6 @@ add_llvm_library(LLVMScalarOpts ADCE.cpp BasicBlockPlacement.cpp CodeGenPrepare.cpp - CondPropagate.cpp ConstantProp.cpp DCE.cpp DeadStoreElimination.cpp diff --git a/lib/Transforms/Scalar/CondPropagate.cpp b/lib/Transforms/Scalar/CondPropagate.cpp deleted file mode 100644 index 8a6c556..0000000 --- a/lib/Transforms/Scalar/CondPropagate.cpp +++ /dev/null @@ -1,289 +0,0 @@ -//===-- CondPropagate.cpp - Propagate Conditional Expressions -------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass propagates information about conditional expressions through the -// program, allowing it to eliminate conditional branches in some cases. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "condprop" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Pass.h" -#include "llvm/Type.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/SmallVector.h" -using namespace llvm; - -STATISTIC(NumBrThread, "Number of CFG edges threaded through branches"); -STATISTIC(NumSwThread, "Number of CFG edges threaded through switches"); - -namespace { - struct CondProp : public FunctionPass { - static char ID; // Pass identification, replacement for typeid - CondProp() : FunctionPass(&ID) {} - - virtual bool runOnFunction(Function &F); - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequiredID(BreakCriticalEdgesID); - //AU.addRequired<DominanceFrontier>(); - } - - private: - bool MadeChange; - SmallVector<BasicBlock *, 4> DeadBlocks; - void SimplifyBlock(BasicBlock *BB); - void SimplifyPredecessors(BranchInst *BI); - void SimplifyPredecessors(SwitchInst *SI); - void RevectorBlockTo(BasicBlock *FromBB, BasicBlock *ToBB); - bool RevectorBlockTo(BasicBlock *FromBB, Value *Cond, BranchInst *BI); - }; -} - -char CondProp::ID = 0; -static RegisterPass<CondProp> X("condprop", "Conditional Propagation"); - -FunctionPass *llvm::createCondPropagationPass() { - return new CondProp(); -} - -bool CondProp::runOnFunction(Function &F) { - bool EverMadeChange = false; - DeadBlocks.clear(); - - // While we are simplifying blocks, keep iterating. - do { - MadeChange = false; - for (Function::iterator BB = F.begin(), E = F.end(); BB != E;) - SimplifyBlock(BB++); - EverMadeChange = EverMadeChange || MadeChange; - } while (MadeChange); - - if (EverMadeChange) { - while (!DeadBlocks.empty()) { - BasicBlock *BB = DeadBlocks.back(); DeadBlocks.pop_back(); - DeleteDeadBlock(BB); - } - } - return EverMadeChange; -} - -void CondProp::SimplifyBlock(BasicBlock *BB) { - if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) { - // If this is a conditional branch based on a phi node that is defined in - // this block, see if we can simplify predecessors of this block. - if (BI->isConditional() && isa<PHINode>(BI->getCondition()) && - cast<PHINode>(BI->getCondition())->getParent() == BB) - SimplifyPredecessors(BI); - - } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) { - if (isa<PHINode>(SI->getCondition()) && - cast<PHINode>(SI->getCondition())->getParent() == BB) - SimplifyPredecessors(SI); - } - - // If possible, simplify the terminator of this block. - if (ConstantFoldTerminator(BB)) - MadeChange = true; - - // If this block ends with an unconditional branch and the only successor has - // only this block as a predecessor, merge the two blocks together. - if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) - if (BI->isUnconditional() && BI->getSuccessor(0)->getSinglePredecessor() && - BB != BI->getSuccessor(0)) { - BasicBlock *Succ = BI->getSuccessor(0); - - // If Succ has any PHI nodes, they are all single-entry PHI's. Eliminate - // them. - FoldSingleEntryPHINodes(Succ); - - // Remove BI. - BI->eraseFromParent(); - - // Move over all of the instructions. - BB->getInstList().splice(BB->end(), Succ->getInstList()); - - // Any phi nodes that had entries for Succ now have entries from BB. - Succ->replaceAllUsesWith(BB); - - // Succ is now dead, but we cannot delete it without potentially - // invalidating iterators elsewhere. Just insert an unreachable - // instruction in it and delete this block later on. - new UnreachableInst(BB->getContext(), Succ); - DeadBlocks.push_back(Succ); - MadeChange = true; - } -} - -// SimplifyPredecessors(branches) - We know that BI is a conditional branch -// based on a PHI node defined in this block. If the phi node contains constant -// operands, then the blocks corresponding to those operands can be modified to -// jump directly to the destination instead of going through this block. -void CondProp::SimplifyPredecessors(BranchInst *BI) { - // TODO: We currently only handle the most trival case, where the PHI node has - // one use (the branch), and is the only instruction besides the branch and dbg - // intrinsics in the block. - PHINode *PN = cast<PHINode>(BI->getCondition()); - - if (PN->getNumIncomingValues() == 1) { - // Eliminate single-entry PHI nodes. - FoldSingleEntryPHINodes(PN->getParent()); - return; - } - - - if (!PN->hasOneUse()) return; - - BasicBlock *BB = BI->getParent(); - if (&*BB->begin() != PN) - return; - BasicBlock::iterator BBI = BB->begin(); - BasicBlock::iterator BBE = BB->end(); - while (BBI != BBE && isa<DbgInfoIntrinsic>(++BBI)) /* empty */; - if (&*BBI != BI) - return; - - // Ok, we have this really simple case, walk the PHI operands, looking for - // constants. Walk from the end to remove operands from the end when - // possible, and to avoid invalidating "i". - for (unsigned i = PN->getNumIncomingValues(); i != 0; --i) { - Value *InVal = PN->getIncomingValue(i-1); - if (!RevectorBlockTo(PN->getIncomingBlock(i-1), InVal, BI)) - continue; - - ++NumBrThread; - - // If there were two predecessors before this simplification, or if the - // PHI node contained all the same value except for the one we just - // substituted, the PHI node may be deleted. Don't iterate through it the - // last time. - if (BI->getCondition() != PN) return; - } -} - -// SimplifyPredecessors(switch) - We know that SI is switch based on a PHI node -// defined in this block. If the phi node contains constant operands, then the -// blocks corresponding to those operands can be modified to jump directly to -// the destination instead of going through this block. -void CondProp::SimplifyPredecessors(SwitchInst *SI) { - // TODO: We currently only handle the most trival case, where the PHI node has - // one use (the branch), and is the only instruction besides the branch and - // dbg intrinsics in the block. - PHINode *PN = cast<PHINode>(SI->getCondition()); - if (!PN->hasOneUse()) return; - - BasicBlock *BB = SI->getParent(); - if (&*BB->begin() != PN) - return; - BasicBlock::iterator BBI = BB->begin(); - BasicBlock::iterator BBE = BB->end(); - while (BBI != BBE && isa<DbgInfoIntrinsic>(++BBI)) /* empty */; - if (&*BBI != SI) - return; - - // Ok, we have this really simple case, walk the PHI operands, looking for - // constants. Walk from the end to remove operands from the end when - // possible, and to avoid invalidating "i". - for (unsigned i = PN->getNumIncomingValues(); i != 0; --i) - if (ConstantInt *CI = dyn_cast<ConstantInt>(PN->getIncomingValue(i-1))) { - BasicBlock *PredBB = PN->getIncomingBlock(i-1); - if (isa<BranchInst>(PredBB->getTerminator())) { - // If we have a constant, forward the edge from its current to its - // ultimate destination. - unsigned DestCase = SI->findCaseValue(CI); - RevectorBlockTo(PredBB, SI->getSuccessor(DestCase)); - ++NumSwThread; - - // If there were two predecessors before this simplification, or if the - // PHI node contained all the same value except for the one we just - // substituted, the PHI node may be deleted. Don't iterate through it the - // last time. - if (SI->getCondition() != PN) return; - } - } -} - - -// RevectorBlockTo - Revector the unconditional branch at the end of FromBB to -// the ToBB block, which is one of the successors of its current successor. -void CondProp::RevectorBlockTo(BasicBlock *FromBB, BasicBlock *ToBB) { - BranchInst *FromBr = cast<BranchInst>(FromBB->getTerminator()); - assert(FromBr->isUnconditional() && "FromBB should end with uncond br!"); - - // Get the old block we are threading through. - BasicBlock *OldSucc = FromBr->getSuccessor(0); - - // OldSucc had multiple successors. If ToBB has multiple predecessors, then - // the edge between them would be critical, which we already took care of. - // If ToBB has single operand PHI node then take care of it here. - FoldSingleEntryPHINodes(ToBB); - - // Update PHI nodes in OldSucc to know that FromBB no longer branches to it. - OldSucc->removePredecessor(FromBB); - - // Change FromBr to branch to the new destination. - FromBr->setSuccessor(0, ToBB); - - MadeChange = true; -} - -bool CondProp::RevectorBlockTo(BasicBlock *FromBB, Value *Cond, BranchInst *BI){ - BranchInst *FromBr = cast<BranchInst>(FromBB->getTerminator()); - if (!FromBr->isUnconditional()) - return false; - - // Get the old block we are threading through. - BasicBlock *OldSucc = FromBr->getSuccessor(0); - - // If the condition is a constant, simply revector the unconditional branch at - // the end of FromBB to one of the successors of its current successor. - if (ConstantInt *CB = dyn_cast<ConstantInt>(Cond)) { - BasicBlock *ToBB = BI->getSuccessor(CB->isZero()); - - // OldSucc had multiple successors. If ToBB has multiple predecessors, then - // the edge between them would be critical, which we already took care of. - // If ToBB has single operand PHI node then take care of it here. - FoldSingleEntryPHINodes(ToBB); - - // Update PHI nodes in OldSucc to know that FromBB no longer branches to it. - OldSucc->removePredecessor(FromBB); - - // Change FromBr to branch to the new destination. - FromBr->setSuccessor(0, ToBB); - } else { - BasicBlock *Succ0 = BI->getSuccessor(0); - // Do not perform transform if the new destination has PHI nodes. The - // transform will add new preds to the PHI's. - if (isa<PHINode>(Succ0->begin())) - return false; - - BasicBlock *Succ1 = BI->getSuccessor(1); - if (isa<PHINode>(Succ1->begin())) - return false; - - // Insert the new conditional branch. - BranchInst::Create(Succ0, Succ1, Cond, FromBr); - - FoldSingleEntryPHINodes(Succ0); - FoldSingleEntryPHINodes(Succ1); - - // Update PHI nodes in OldSucc to know that FromBB no longer branches to it. - OldSucc->removePredecessor(FromBB); - - // Delete the old branch. - FromBr->eraseFromParent(); - } - - MadeChange = true; - return true; -} diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp index 4fee327..ea20813 100644 --- a/lib/Transforms/Scalar/ConstantProp.cpp +++ b/lib/Transforms/Scalar/ConstantProp.cpp @@ -66,7 +66,7 @@ bool ConstantPropagation::runOnFunction(Function &F) { WorkList.erase(WorkList.begin()); // Get an element from the worklist... if (!I->use_empty()) // Don't muck with dead instructions... - if (Constant *C = ConstantFoldInstruction(I, F.getContext())) { + if (Constant *C = ConstantFoldInstruction(I)) { // Add all of the users of this instruction to the worklist, they might // be constant propagatable now... for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index 90436f4..b0988b5 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -78,19 +78,96 @@ static RegisterPass<DSE> X("dse", "Dead Store Elimination"); FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); } -/// isValueAtLeastAsBigAs - Return true if V1 is greater than or equal to the -/// stored size of V2. This returns false if we don't know. +/// doesClobberMemory - Does this instruction clobber (write without reading) +/// some memory? +static bool doesClobberMemory(Instruction *I) { + if (isa<StoreInst>(I)) + return true; + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + switch (II->getIntrinsicID()) { + default: return false; + case Intrinsic::memset: case Intrinsic::memmove: case Intrinsic::memcpy: + case Intrinsic::init_trampoline: case Intrinsic::lifetime_end: return true; + } + } + return false; +} + +/// isElidable - If the value of this instruction and the memory it writes to is +/// unused, may we delete this instrtction? +static bool isElidable(Instruction *I) { + assert(doesClobberMemory(I)); + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) + return II->getIntrinsicID() != Intrinsic::lifetime_end; + if (StoreInst *SI = dyn_cast<StoreInst>(I)) + return !SI->isVolatile(); + return true; +} + +/// getPointerOperand - Return the pointer that is being clobbered. +static Value *getPointerOperand(Instruction *I) { + assert(doesClobberMemory(I)); + if (StoreInst *SI = dyn_cast<StoreInst>(I)) + return SI->getPointerOperand(); + if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) + return MI->getOperand(1); + IntrinsicInst *II = cast<IntrinsicInst>(I); + switch (II->getIntrinsicID()) { + default: + assert(false && "Unexpected intrinsic!"); + case Intrinsic::init_trampoline: + return II->getOperand(1); + case Intrinsic::lifetime_end: + return II->getOperand(2); + } +} + +/// getStoreSize - Return the length in bytes of the write by the clobbering +/// instruction. If variable or unknown, returns -1. +static unsigned getStoreSize(Instruction *I, const TargetData *TD) { + assert(doesClobberMemory(I)); + if (StoreInst *SI = dyn_cast<StoreInst>(I)) { + if (!TD) return -1u; + return TD->getTypeStoreSize(SI->getOperand(0)->getType()); + } + + Value *Len; + if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) { + Len = MI->getLength(); + } else { + IntrinsicInst *II = cast<IntrinsicInst>(I); + switch (II->getIntrinsicID()) { + default: + assert(false && "Unexpected intrinsic!"); + case Intrinsic::init_trampoline: + return -1u; + case Intrinsic::lifetime_end: + Len = II->getOperand(1); + break; + } + } + if (ConstantInt *LenCI = dyn_cast<ConstantInt>(Len)) + if (!LenCI->isAllOnesValue()) + return LenCI->getZExtValue(); + return -1u; +} + +/// isStoreAtLeastAsWideAs - Return true if the size of the store in I1 is +/// greater than or equal to the store in I2. This returns false if we don't +/// know. /// -static bool isValueAtLeastAsBigAs(Value *V1, Value *V2, const TargetData *TD) { - const Type *V1Ty = V1->getType(), *V2Ty = V2->getType(); +static bool isStoreAtLeastAsWideAs(Instruction *I1, Instruction *I2, + const TargetData *TD) { + const Type *I1Ty = getPointerOperand(I1)->getType(); + const Type *I2Ty = getPointerOperand(I2)->getType(); // Exactly the same type, must have exactly the same size. - if (V1Ty == V2Ty) return true; + if (I1Ty == I2Ty) return true; - // If we don't have target data, we don't know. - if (TD == 0) return false; + int I1Size = getStoreSize(I1, TD); + int I2Size = getStoreSize(I2, TD); - return TD->getTypeStoreSize(V1Ty) >= TD->getTypeStoreSize(V2Ty); + return I1Size != -1 && I2Size != -1 && I1Size >= I2Size; } bool DSE::runOnBasicBlock(BasicBlock &BB) { @@ -104,14 +181,9 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { Instruction *Inst = BBI++; // If we find a store or a free, get its memory dependence. - if (!isa<StoreInst>(Inst) && !isFreeCall(Inst)) + if (!doesClobberMemory(Inst) && !isFreeCall(Inst)) continue; - // Don't molest volatile stores or do queries that will return "clobber". - if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) - if (SI->isVolatile()) - continue; - MemDepResult InstDep = MD.getDependency(Inst); // Ignore non-local stores. @@ -124,16 +196,16 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { continue; } - StoreInst *SI = cast<StoreInst>(Inst); - // If not a definite must-alias dependency, ignore it. if (!InstDep.isDef()) continue; // If this is a store-store dependence, then the previous store is dead so // long as this store is at least as big as it. - if (StoreInst *DepStore = dyn_cast<StoreInst>(InstDep.getInst())) - if (isValueAtLeastAsBigAs(SI->getOperand(0), DepStore->getOperand(0),TD)){ + if (doesClobberMemory(InstDep.getInst())) { + Instruction *DepStore = InstDep.getInst(); + if (isStoreAtLeastAsWideAs(Inst, DepStore, TD) && + isElidable(DepStore)) { // Delete the store and now-dead instructions that feed it. DeleteDeadInstruction(DepStore); NumFastStores++; @@ -146,37 +218,43 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { --BBI; continue; } + } + + if (!isElidable(Inst)) + continue; // If we're storing the same value back to a pointer that we just // loaded from, then the store can be removed. - if (LoadInst *DepLoad = dyn_cast<LoadInst>(InstDep.getInst())) { - if (SI->getPointerOperand() == DepLoad->getPointerOperand() && - SI->getOperand(0) == DepLoad) { - // DeleteDeadInstruction can delete the current instruction. Save BBI - // in case we need it. - WeakVH NextInst(BBI); - - DeleteDeadInstruction(SI); - - if (NextInst == 0) // Next instruction deleted. - BBI = BB.begin(); - else if (BBI != BB.begin()) // Revisit this instruction if possible. - --BBI; - NumFastStores++; - MadeChange = true; - continue; + if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { + if (LoadInst *DepLoad = dyn_cast<LoadInst>(InstDep.getInst())) { + if (SI->getPointerOperand() == DepLoad->getPointerOperand() && + SI->getOperand(0) == DepLoad) { + // DeleteDeadInstruction can delete the current instruction. Save BBI + // in case we need it. + WeakVH NextInst(BBI); + + DeleteDeadInstruction(SI); + + if (NextInst == 0) // Next instruction deleted. + BBI = BB.begin(); + else if (BBI != BB.begin()) // Revisit this instruction if possible. + --BBI; + NumFastStores++; + MadeChange = true; + continue; + } } } // If this is a lifetime end marker, we can throw away the store. - if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(InstDep.getInst())) { + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(InstDep.getInst())) { if (II->getIntrinsicID() == Intrinsic::lifetime_end) { // Delete the store and now-dead instructions that feed it. // DeleteDeadInstruction can delete the current instruction. Save BBI // in case we need it. WeakVH NextInst(BBI); - DeleteDeadInstruction(SI); + DeleteDeadInstruction(Inst); if (NextInst == 0) // Next instruction deleted. BBI = BB.begin(); @@ -202,11 +280,11 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { bool DSE::handleFreeWithNonTrivialDependency(Instruction *F, MemDepResult Dep) { AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); - StoreInst *Dependency = dyn_cast_or_null<StoreInst>(Dep.getInst()); - if (!Dependency || Dependency->isVolatile()) + Instruction *Dependency = Dep.getInst(); + if (!Dependency || !doesClobberMemory(Dependency) || !isElidable(Dependency)) return false; - Value *DepPointer = Dependency->getPointerOperand()->getUnderlyingObject(); + Value *DepPointer = getPointerOperand(Dependency)->getUnderlyingObject(); // Check for aliasing. if (AA.alias(F->getOperand(1), 1, DepPointer, 1) != @@ -251,39 +329,28 @@ bool DSE::handleEndBlock(BasicBlock &BB) { --BBI; // If we find a store whose pointer is dead. - if (StoreInst* S = dyn_cast<StoreInst>(BBI)) { - if (!S->isVolatile()) { + if (doesClobberMemory(BBI)) { + if (isElidable(BBI)) { // See through pointer-to-pointer bitcasts - Value* pointerOperand = S->getPointerOperand()->getUnderlyingObject(); + Value *pointerOperand = getPointerOperand(BBI)->getUnderlyingObject(); // Alloca'd pointers or byval arguments (which are functionally like // alloca's) are valid candidates for removal. if (deadPointers.count(pointerOperand)) { // DCE instructions only used to calculate that store. + Instruction *Dead = BBI; BBI++; - DeleteDeadInstruction(S, &deadPointers); + DeleteDeadInstruction(Dead, &deadPointers); NumFastStores++; MadeChange = true; + continue; } } - continue; - } - - // We can also remove memcpy's to local variables at the end of a function. - if (MemCpyInst *M = dyn_cast<MemCpyInst>(BBI)) { - Value *dest = M->getDest()->getUnderlyingObject(); - - if (deadPointers.count(dest)) { - BBI++; - DeleteDeadInstruction(M, &deadPointers); - NumFastOther++; - MadeChange = true; + // Because a memcpy or memmove is also a load, we can't skip it if we + // didn't remove it. + if (!isa<MemTransferInst>(BBI)) continue; - } - - // Because a memcpy is also a load, we can't skip it if we didn't remove - // it. } Value* killPointer = 0; @@ -304,11 +371,11 @@ bool DSE::handleEndBlock(BasicBlock &BB) { killPointer = L->getPointerOperand(); } else if (VAArgInst* V = dyn_cast<VAArgInst>(BBI)) { killPointer = V->getOperand(0); - } else if (isa<MemCpyInst>(BBI) && - isa<ConstantInt>(cast<MemCpyInst>(BBI)->getLength())) { - killPointer = cast<MemCpyInst>(BBI)->getSource(); + } else if (isa<MemTransferInst>(BBI) && + isa<ConstantInt>(cast<MemTransferInst>(BBI)->getLength())) { + killPointer = cast<MemTransferInst>(BBI)->getSource(); killPointerSize = cast<ConstantInt>( - cast<MemCpyInst>(BBI)->getLength())->getZExtValue(); + cast<MemTransferInst>(BBI)->getLength())->getZExtValue(); } else if (AllocaInst* A = dyn_cast<AllocaInst>(BBI)) { deadPointers.erase(A); diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 0e3f750..a8f39c1 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -443,6 +443,11 @@ uint32_t ValueTable::lookup_or_add_call(CallInst* C) { valueNumbering[C] = e; return e; } + if (!MD) { + e = nextValueNumber++; + valueNumbering[C] = e; + return e; + } MemDepResult local_dep = MD->getDependency(C); @@ -624,7 +629,7 @@ uint32_t ValueTable::lookup_or_add(Value *V) { /// lookup - Returns the value number of the specified value. Fails if /// the value has not yet been numbered. uint32_t ValueTable::lookup(Value *V) const { - DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V); + DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V); assert(VI != valueNumbering.end() && "Value not numbered?"); return VI->second; } @@ -644,7 +649,7 @@ void ValueTable::erase(Value *V) { /// verifyRemoved - Verify that the value is removed from all internal data /// structures. void ValueTable::verifyRemoved(const Value *V) const { - for (DenseMap<Value*, uint32_t>::iterator + for (DenseMap<Value*, uint32_t>::const_iterator I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) { assert(I->first != V && "Inst still occurs in value numbering map!"); } @@ -669,10 +674,12 @@ namespace { bool runOnFunction(Function &F); public: static char ID; // Pass identification, replacement for typeid - GVN(bool nopre = false) : FunctionPass(&ID), NoPRE(nopre) { } + explicit GVN(bool nopre = false, bool noloads = false) + : FunctionPass(&ID), NoPRE(nopre), NoLoads(noloads), MD(0) { } private: bool NoPRE; + bool NoLoads; MemoryDependenceAnalysis *MD; DominatorTree *DT; @@ -682,7 +689,8 @@ namespace { // This transformation requires dominator postdominator info virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<DominatorTree>(); - AU.addRequired<MemoryDependenceAnalysis>(); + if (!NoLoads) + AU.addRequired<MemoryDependenceAnalysis>(); AU.addRequired<AliasAnalysis>(); AU.addPreserved<DominatorTree>(); @@ -711,7 +719,9 @@ namespace { } // createGVNPass - The public interface to this file... -FunctionPass *llvm::createGVNPass(bool NoPRE) { return new GVN(NoPRE); } +FunctionPass *llvm::createGVNPass(bool NoPRE, bool NoLoads) { + return new GVN(NoPRE, NoLoads); +} static RegisterPass<GVN> X("gvn", "Global Value Numbering"); @@ -1476,6 +1486,9 @@ bool GVN::processNonLocalLoad(LoadInst *LI, /// processLoad - Attempt to eliminate a load, first by eliminating it /// locally, and then attempting non-local elimination if that fails. bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { + if (!MD) + return false; + if (L->isVolatile()) return false; @@ -1686,7 +1699,7 @@ bool GVN::processInstruction(Instruction *I, if (constVal) { p->replaceAllUsesWith(constVal); - if (isa<PointerType>(constVal->getType())) + if (MD && isa<PointerType>(constVal->getType())) MD->invalidateCachedPointerInfo(constVal); VN.erase(p); @@ -1707,7 +1720,7 @@ bool GVN::processInstruction(Instruction *I, // Remove it! VN.erase(I); I->replaceAllUsesWith(repl); - if (isa<PointerType>(repl->getType())) + if (MD && isa<PointerType>(repl->getType())) MD->invalidateCachedPointerInfo(repl); toErase.push_back(I); return true; @@ -1721,7 +1734,8 @@ bool GVN::processInstruction(Instruction *I, /// runOnFunction - This is the main transformation entry point for a function. bool GVN::runOnFunction(Function& F) { - MD = &getAnalysis<MemoryDependenceAnalysis>(); + if (!NoLoads) + MD = &getAnalysis<MemoryDependenceAnalysis>(); DT = &getAnalysis<DominatorTree>(); VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>()); VN.setMemDep(MD); @@ -1793,7 +1807,7 @@ bool GVN::processBlock(BasicBlock *BB) { for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(), E = toErase.end(); I != E; ++I) { DEBUG(errs() << "GVN removed: " << **I << '\n'); - MD->removeInstruction(*I); + if (MD) MD->removeInstruction(*I); (*I)->eraseFromParent(); DEBUG(verifyRemoved(*I)); } @@ -1946,12 +1960,12 @@ bool GVN::performPRE(Function &F) { localAvail[CurrentBlock]->table[ValNo] = Phi; CurInst->replaceAllUsesWith(Phi); - if (isa<PointerType>(Phi->getType())) + if (MD && isa<PointerType>(Phi->getType())) MD->invalidateCachedPointerInfo(Phi); VN.erase(CurInst); DEBUG(errs() << "GVN PRE removed: " << *CurInst << '\n'); - MD->removeInstruction(CurInst); + if (MD) MD->removeInstruction(CurInst); CurInst->eraseFromParent(); DEBUG(verifyRemoved(CurInst)); Changed = true; @@ -2011,12 +2025,12 @@ void GVN::verifyRemoved(const Instruction *Inst) const { // Walk through the value number scope to make sure the instruction isn't // ferreted away in it. - for (DenseMap<BasicBlock*, ValueNumberScope*>::iterator + for (DenseMap<BasicBlock*, ValueNumberScope*>::const_iterator I = localAvail.begin(), E = localAvail.end(); I != E; ++I) { const ValueNumberScope *VNS = I->second; while (VNS) { - for (DenseMap<uint32_t, Value*>::iterator + for (DenseMap<uint32_t, Value*>::const_iterator II = VNS->table.begin(), IE = VNS->table.end(); II != IE; ++II) { assert(II->second != Inst && "Inst still in value numbering scope!"); } diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index b0bc70c..2912421 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -536,8 +536,10 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) { BasicBlock *ExitBlock = L->getExitBlock(); if (!ExitBlock) return; - Instruction *InsertPt = ExitBlock->getFirstNonPHI(); BasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader) return; + + Instruction *InsertPt = ExitBlock->getFirstNonPHI(); BasicBlock::iterator I = Preheader->getTerminator(); while (I != Preheader->begin()) { --I; diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp index 7e75cfb..1c48366 100644 --- a/lib/Transforms/Scalar/InstructionCombining.cpp +++ b/lib/Transforms/Scalar/InstructionCombining.cpp @@ -42,6 +42,7 @@ #include "llvm/GlobalVariable.h" #include "llvm/Operator.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Target/TargetData.h" @@ -283,6 +284,8 @@ namespace { Instruction *visitSelectInstWithICmp(SelectInst &SI, ICmpInst *ICI); Instruction *visitCallInst(CallInst &CI); Instruction *visitInvokeInst(InvokeInst &II); + + Instruction *SliceUpIllegalIntegerPHI(PHINode &PN); Instruction *visitPHINode(PHINode &PN); Instruction *visitGetElementPtrInst(GetElementPtrInst &GEP); Instruction *visitAllocaInst(AllocaInst &AI); @@ -380,10 +383,6 @@ namespace { /// commutative operators. bool SimplifyCommutative(BinaryOperator &I); - /// SimplifyCompare - This reorders the operands of a CmpInst to get them in - /// most-complex to least-complex order. - bool SimplifyCompare(CmpInst &I); - /// SimplifyDemandedUseBits - Attempts to replace V with a simpler value /// based on the demanded bits. Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, @@ -478,6 +477,34 @@ static const Type *getPromotedType(const Type *Ty) { return Ty; } +/// ShouldChangeType - Return true if it is desirable to convert a computation +/// from 'From' to 'To'. We don't want to convert from a legal to an illegal +/// type for example, or from a smaller to a larger illegal type. +static bool ShouldChangeType(const Type *From, const Type *To, + const TargetData *TD) { + assert(isa<IntegerType>(From) && isa<IntegerType>(To)); + + // If we don't have TD, we don't know if the source/dest are legal. + if (!TD) return false; + + unsigned FromWidth = From->getPrimitiveSizeInBits(); + unsigned ToWidth = To->getPrimitiveSizeInBits(); + bool FromLegal = TD->isLegalInteger(FromWidth); + bool ToLegal = TD->isLegalInteger(ToWidth); + + // If this is a legal integer from type, and the result would be an illegal + // type, don't do the transformation. + if (FromLegal && !ToLegal) + return false; + + // Otherwise, if both are illegal, do not increase the size of the result. We + // do allow things like i160 -> i64, but not i64 -> i160. + if (!FromLegal && !ToLegal && ToWidth > FromWidth) + return false; + + return true; +} + /// getBitCastOperand - If the specified operand is a CastInst, a constant /// expression bitcast, or a GetElementPtrInst with all zero indices, return the /// operand value, otherwise return null. @@ -584,17 +611,6 @@ bool InstCombiner::SimplifyCommutative(BinaryOperator &I) { return Changed; } -/// SimplifyCompare - For a CmpInst this function just orders the operands -/// so that theyare listed from right (least complex) to left (most complex). -/// This puts constants before unary operators before binary operators. -bool InstCombiner::SimplifyCompare(CmpInst &I) { - if (getComplexity(I.getOperand(0)) >= getComplexity(I.getOperand(1))) - return false; - I.swapOperands(); - // Compare instructions are not associative so there's nothing else we can do. - return true; -} - // dyn_castNegVal - Given a 'sub' instruction, return the RHS of the instruction // if the LHS is a constant zero (which is the 'negate' form). // @@ -4304,25 +4320,15 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { bool Changed = SimplifyCommutative(I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - if (isa<UndefValue>(Op1)) // X & undef -> 0 - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - - // and X, X = X - if (Op0 == Op1) - return ReplaceInstUsesWith(I, Op1); + if (Value *V = SimplifyAndInst(Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); + // See if we can simplify any instructions used by the instruction whose sole // purpose is to compute bits we don't care about. if (SimplifyDemandedInstructionBits(I)) return &I; - if (isa<VectorType>(I.getType())) { - if (ConstantVector *CP = dyn_cast<ConstantVector>(Op1)) { - if (CP->isAllOnesValue()) // X & <-1,-1> -> X - return ReplaceInstUsesWith(I, I.getOperand(0)); - } else if (isa<ConstantAggregateZero>(Op1)) { - return ReplaceInstUsesWith(I, Op1); // X & <0,0> -> <0,0> - } - } + if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(Op1)) { const APInt &AndRHSMask = AndRHS->getValue(); @@ -4443,42 +4449,29 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { return NV; } - Value *Op0NotVal = dyn_castNotVal(Op0); - Value *Op1NotVal = dyn_castNotVal(Op1); - - if (Op0NotVal == Op1 || Op1NotVal == Op0) // A & ~A == ~A & A == 0 - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); // (~A & ~B) == (~(A | B)) - De Morgan's Law - if (Op0NotVal && Op1NotVal && isOnlyUse(Op0) && isOnlyUse(Op1)) { - Value *Or = Builder->CreateOr(Op0NotVal, Op1NotVal, - I.getName()+".demorgan"); - return BinaryOperator::CreateNot(Or); - } - + if (Value *Op0NotVal = dyn_castNotVal(Op0)) + if (Value *Op1NotVal = dyn_castNotVal(Op1)) + if (Op0->hasOneUse() && Op1->hasOneUse()) { + Value *Or = Builder->CreateOr(Op0NotVal, Op1NotVal, + I.getName()+".demorgan"); + return BinaryOperator::CreateNot(Or); + } + { Value *A = 0, *B = 0, *C = 0, *D = 0; - if (match(Op0, m_Or(m_Value(A), m_Value(B)))) { - if (A == Op1 || B == Op1) // (A | ?) & A --> A - return ReplaceInstUsesWith(I, Op1); - - // (A|B) & ~(A&B) -> A^B - if (match(Op1, m_Not(m_And(m_Value(C), m_Value(D))))) { - if ((A == C && B == D) || (A == D && B == C)) - return BinaryOperator::CreateXor(A, B); - } - } + // (A|B) & ~(A&B) -> A^B + if (match(Op0, m_Or(m_Value(A), m_Value(B))) && + match(Op1, m_Not(m_And(m_Value(C), m_Value(D)))) && + ((A == C && B == D) || (A == D && B == C))) + return BinaryOperator::CreateXor(A, B); - if (match(Op1, m_Or(m_Value(A), m_Value(B)))) { - if (A == Op0 || B == Op0) // A & (A | ?) --> A - return ReplaceInstUsesWith(I, Op0); - - // ~(A&B) & (A|B) -> A^B - if (match(Op0, m_Not(m_And(m_Value(C), m_Value(D))))) { - if ((A == C && B == D) || (A == D && B == C)) - return BinaryOperator::CreateXor(A, B); - } - } + // ~(A&B) & (A|B) -> A^B + if (match(Op1, m_Or(m_Value(A), m_Value(B))) && + match(Op0, m_Not(m_And(m_Value(C), m_Value(D)))) && + ((A == C && B == D) || (A == D && B == C))) + return BinaryOperator::CreateXor(A, B); if (Op0->hasOneUse() && match(Op0, m_Xor(m_Value(A), m_Value(B)))) { @@ -5010,27 +5003,15 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { bool Changed = SimplifyCommutative(I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - if (isa<UndefValue>(Op1)) // X | undef -> -1 - return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); - - // or X, X = X - if (Op0 == Op1) - return ReplaceInstUsesWith(I, Op0); - + if (Value *V = SimplifyOrInst(Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); + + // See if we can simplify any instructions used by the instruction whose sole // purpose is to compute bits we don't care about. if (SimplifyDemandedInstructionBits(I)) return &I; - if (isa<VectorType>(I.getType())) { - if (isa<ConstantAggregateZero>(Op1)) { - return ReplaceInstUsesWith(I, Op0); // X | <0,0> -> X - } else if (ConstantVector *CP = dyn_cast<ConstantVector>(Op1)) { - if (CP->isAllOnesValue()) // X | <-1,-1> -> <-1,-1> - return ReplaceInstUsesWith(I, I.getOperand(1)); - } - } - // or X, -1 == -1 if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) { ConstantInt *C1 = 0; Value *X = 0; // (X & C1) | C2 --> (X | C2) & (C1|C2) @@ -5063,13 +5044,6 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { Value *A = 0, *B = 0; ConstantInt *C1 = 0, *C2 = 0; - if (match(Op0, m_And(m_Value(A), m_Value(B)))) - if (A == Op1 || B == Op1) // (A & ?) | A --> A - return ReplaceInstUsesWith(I, Op1); - if (match(Op1, m_And(m_Value(A), m_Value(B)))) - if (A == Op0 || B == Op0) // A | (A & ?) --> A - return ReplaceInstUsesWith(I, Op0); - // (A | B) | C and A | (B | C) -> bswap if possible. // (A >> B) | (C << D) and (A << B) | (B >> C) -> bswap if possible. if (match(Op0, m_Or(m_Value(), m_Value())) || @@ -5203,23 +5177,14 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { if (Ret) return Ret; } - if ((A = dyn_castNotVal(Op0))) { // ~A | Op1 - if (A == Op1) // ~A | A == -1 - return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); - } else { - A = 0; - } - // Note, A is still live here! - if ((B = dyn_castNotVal(Op1))) { // Op0 | ~B - if (Op0 == B) - return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); - - // (~A | ~B) == (~(A & B)) - De Morgan's Law - if (A && isOnlyUse(Op0) && isOnlyUse(Op1)) { - Value *And = Builder->CreateAnd(A, B, I.getName()+".demorgan"); - return BinaryOperator::CreateNot(And); - } - } + // (~A | ~B) == (~(A & B)) - De Morgan's Law + if (Value *Op0NotVal = dyn_castNotVal(Op0)) + if (Value *Op1NotVal = dyn_castNotVal(Op1)) + if (Op0->hasOneUse() && Op1->hasOneUse()) { + Value *And = Builder->CreateAnd(Op0NotVal, Op1NotVal, + I.getName()+".demorgan"); + return BinaryOperator::CreateNot(And); + } // (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B) if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1))) { @@ -5942,28 +5907,25 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, } Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { - bool Changed = SimplifyCompare(I); - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + bool Changed = false; + + /// Orders the operands of the compare so that they are listed from most + /// complex to least complex. This puts constants before unary operators, + /// before binary operators. + if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) { + I.swapOperands(); + Changed = true; + } - // Fold trivial predicates. - if (I.getPredicate() == FCmpInst::FCMP_FALSE) - return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), 0)); - if (I.getPredicate() == FCmpInst::FCMP_TRUE) - return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), 1)); + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyFCmpInst(I.getPredicate(), Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); + // Simplify 'fcmp pred X, X' if (Op0 == Op1) { switch (I.getPredicate()) { default: llvm_unreachable("Unknown predicate!"); - case FCmpInst::FCMP_UEQ: // True if unordered or equal - case FCmpInst::FCMP_UGE: // True if unordered, greater than, or equal - case FCmpInst::FCMP_ULE: // True if unordered, less than, or equal - return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), 1)); - case FCmpInst::FCMP_OGT: // True if ordered and greater than - case FCmpInst::FCMP_OLT: // True if ordered and less than - case FCmpInst::FCMP_ONE: // True if ordered and operands are unequal - return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), 0)); - case FCmpInst::FCMP_UNO: // True if unordered: isnan(X) | isnan(Y) case FCmpInst::FCMP_ULT: // True if unordered or less than case FCmpInst::FCMP_UGT: // True if unordered or greater than @@ -5984,23 +5946,8 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { } } - if (isa<UndefValue>(Op1)) // fcmp pred X, undef -> undef - return ReplaceInstUsesWith(I, UndefValue::get(I.getType())); - // Handle fcmp with constant RHS if (Constant *RHSC = dyn_cast<Constant>(Op1)) { - // If the constant is a nan, see if we can fold the comparison based on it. - if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) { - if (CFP->getValueAPF().isNaN()) { - if (FCmpInst::isOrdered(I.getPredicate())) // True if ordered and... - return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); - assert(FCmpInst::isUnordered(I.getPredicate()) && - "Comparison must be either ordered or unordered!"); - // True if unordered. - return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); - } - } - if (Instruction *LHSI = dyn_cast<Instruction>(Op0)) switch (LHSI->getOpcode()) { case Instruction::PHI: @@ -6047,26 +5994,22 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { } Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { - bool Changed = SimplifyCompare(I); + bool Changed = false; + + /// Orders the operands of the compare so that they are listed from most + /// complex to least complex. This puts constants before unary operators, + /// before binary operators. + if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) { + I.swapOperands(); + Changed = true; + } + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - const Type *Ty = Op0->getType(); - - // icmp X, X - if (Op0 == Op1) - return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), - I.isTrueWhenEqual())); - - if (isa<UndefValue>(Op1)) // X icmp undef -> undef - return ReplaceInstUsesWith(I, UndefValue::get(I.getType())); - // icmp <global/alloca*/null>, <global/alloca*/null> - Global/Stack value - // addresses never equal each other! We already know that Op0 != Op1. - if ((isa<GlobalValue>(Op0) || isa<AllocaInst>(Op0) || - isa<ConstantPointerNull>(Op0)) && - (isa<GlobalValue>(Op1) || isa<AllocaInst>(Op1) || - isa<ConstantPointerNull>(Op1))) - return ReplaceInstUsesWith(I, ConstantInt::get(Type::getInt1Ty(*Context), - !I.isTrueWhenEqual())); + if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); + + const Type *Ty = Op0->getType(); // icmp's with boolean values can always be turned into bitwise operations if (Ty == Type::getInt1Ty(*Context)) { @@ -6131,27 +6074,24 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // If we have an icmp le or icmp ge instruction, turn it into the // appropriate icmp lt or icmp gt instruction. This allows us to rely on - // them being folded in the code below. + // them being folded in the code below. The SimplifyICmpInst code has + // already handled the edge cases for us, so we just assert on them. switch (I.getPredicate()) { default: break; case ICmpInst::ICMP_ULE: - if (CI->isMaxValue(false)) // A <=u MAX -> TRUE - return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); + assert(!CI->isMaxValue(false)); // A <=u MAX -> TRUE return new ICmpInst(ICmpInst::ICMP_ULT, Op0, AddOne(CI)); case ICmpInst::ICMP_SLE: - if (CI->isMaxValue(true)) // A <=s MAX -> TRUE - return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); + assert(!CI->isMaxValue(true)); // A <=s MAX -> TRUE return new ICmpInst(ICmpInst::ICMP_SLT, Op0, AddOne(CI)); case ICmpInst::ICMP_UGE: - if (CI->isMinValue(false)) // A >=u MIN -> TRUE - return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); + assert(!CI->isMinValue(false)); // A >=u MIN -> TRUE return new ICmpInst(ICmpInst::ICMP_UGT, Op0, SubOne(CI)); case ICmpInst::ICMP_SGE: - if (CI->isMinValue(true)) // A >=s MIN -> TRUE - return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); + assert(!CI->isMinValue(true)); // A >=s MIN -> TRUE return new ICmpInst(ICmpInst::ICMP_SGT, Op0, SubOne(CI)); } @@ -8083,8 +8023,7 @@ bool InstCombiner::CanEvaluateInDifferentType(Value *V, const Type *Ty, Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty, bool isSigned) { if (Constant *C = dyn_cast<Constant>(V)) - return ConstantExpr::getIntegerCast(C, Ty, - isSigned /*Sext or ZExt*/); + return ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/); // Otherwise, it must be an instruction. Instruction *I = cast<Instruction>(V); @@ -8117,8 +8056,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty, return I->getOperand(0); // Otherwise, must be the same type of cast, so just reinsert a new one. - Res = CastInst::Create(cast<CastInst>(I)->getOpcode(), I->getOperand(0), - Ty); + Res = CastInst::Create(cast<CastInst>(I)->getOpcode(), I->getOperand(0),Ty); break; case Instruction::Select: { Value *True = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned); @@ -8167,9 +8105,15 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) { return NV; // If we are casting a PHI then fold the cast into the PHI - if (isa<PHINode>(Src)) - if (Instruction *NV = FoldOpIntoPhi(CI)) - return NV; + if (isa<PHINode>(Src)) { + // We don't do this if this would create a PHI node with an illegal type if + // it is currently legal. + if (!isa<IntegerType>(Src->getType()) || + !isa<IntegerType>(CI.getType()) || + ShouldChangeType(CI.getType(), Src->getType(), TD)) + if (Instruction *NV = FoldOpIntoPhi(CI)) + return NV; + } return 0; } @@ -8289,23 +8233,6 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) { return commonCastTransforms(CI); } -/// isSafeIntegerType - Return true if this is a basic integer type, not a crazy -/// type like i42. We don't want to introduce operations on random non-legal -/// integer types where they don't already exist in the code. In the future, -/// we should consider making this based off target-data, so that 32-bit targets -/// won't get i64 operations etc. -static bool isSafeIntegerType(const Type *Ty) { - switch (Ty->getPrimitiveSizeInBits()) { - case 8: - case 16: - case 32: - case 64: - return true; - default: - return false; - } -} - /// commonIntCastTransforms - This function implements the common transforms /// for trunc, zext, and sext. Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) { @@ -8334,8 +8261,8 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) { // Only do this if the dest type is a simple type, don't convert the // expression tree to something weird like i93 unless the source is also // strange. - if ((isSafeIntegerType(DestTy->getScalarType()) || - !isSafeIntegerType(SrcI->getType()->getScalarType())) && + if ((isa<VectorType>(DestTy) || + ShouldChangeType(SrcI->getType(), DestTy, TD)) && CanEvaluateInDifferentType(SrcI, DestTy, CI.getOpcode(), NumCastsRemoved)) { // If this cast is a truncate, evaluting in a different type always @@ -8356,6 +8283,7 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) { break; case Instruction::ZExt: { DoXForm = NumCastsRemoved >= 1; + if (!DoXForm && 0) { // If it's unnecessary to issue an AND to clear the high bits, it's // always profitable to do this xform. @@ -8522,7 +8450,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { return BinaryOperator::CreateLShr(V1, V2); } } - + return 0; } @@ -10880,9 +10808,10 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) { } -// FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary" -// operator and they all are only used by the PHI, PHI together their -// inputs, and do the operation once, to the result of the PHI. + +/// FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary" +/// operator and they all are only used by the PHI, PHI together their +/// inputs, and do the operation once, to the result of the PHI. Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) { Instruction *FirstInst = cast<Instruction>(PN.getIncomingValue(0)); @@ -10900,6 +10829,13 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) { if (isa<CastInst>(FirstInst)) { CastSrcTy = FirstInst->getOperand(0)->getType(); + + // Be careful about transforming integer PHIs. We don't want to pessimize + // the code by turning an i32 into an i1293. + if (isa<IntegerType>(PN.getType()) && isa<IntegerType>(CastSrcTy)) { + if (!ShouldChangeType(PN.getType(), CastSrcTy, TD)) + return 0; + } } else if (isa<BinaryOperator>(FirstInst) || isa<CmpInst>(FirstInst)) { // Can fold binop, compare or shift here if the RHS is a constant, // otherwise call FoldPHIArgBinOpIntoPHI. @@ -11012,6 +10948,222 @@ static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal, } +namespace { +struct PHIUsageRecord { + unsigned PHIId; // The ID # of the PHI (something determinstic to sort on) + unsigned Shift; // The amount shifted. + Instruction *Inst; // The trunc instruction. + + PHIUsageRecord(unsigned pn, unsigned Sh, Instruction *User) + : PHIId(pn), Shift(Sh), Inst(User) {} + + bool operator<(const PHIUsageRecord &RHS) const { + if (PHIId < RHS.PHIId) return true; + if (PHIId > RHS.PHIId) return false; + if (Shift < RHS.Shift) return true; + if (Shift > RHS.Shift) return false; + return Inst->getType()->getPrimitiveSizeInBits() < + RHS.Inst->getType()->getPrimitiveSizeInBits(); + } +}; + +struct LoweredPHIRecord { + PHINode *PN; // The PHI that was lowered. + unsigned Shift; // The amount shifted. + unsigned Width; // The width extracted. + + LoweredPHIRecord(PHINode *pn, unsigned Sh, const Type *Ty) + : PN(pn), Shift(Sh), Width(Ty->getPrimitiveSizeInBits()) {} + + // Ctor form used by DenseMap. + LoweredPHIRecord(PHINode *pn, unsigned Sh) + : PN(pn), Shift(Sh), Width(0) {} +}; +} + +namespace llvm { + template<> + struct DenseMapInfo<LoweredPHIRecord> { + static inline LoweredPHIRecord getEmptyKey() { + return LoweredPHIRecord(0, 0); + } + static inline LoweredPHIRecord getTombstoneKey() { + return LoweredPHIRecord(0, 1); + } + static unsigned getHashValue(const LoweredPHIRecord &Val) { + return DenseMapInfo<PHINode*>::getHashValue(Val.PN) ^ (Val.Shift>>3) ^ + (Val.Width>>3); + } + static bool isEqual(const LoweredPHIRecord &LHS, + const LoweredPHIRecord &RHS) { + return LHS.PN == RHS.PN && LHS.Shift == RHS.Shift && + LHS.Width == RHS.Width; + } + static bool isPod() { return true; } + }; +} + + +/// SliceUpIllegalIntegerPHI - This is an integer PHI and we know that it has an +/// illegal type: see if it is only used by trunc or trunc(lshr) operations. If +/// so, we split the PHI into the various pieces being extracted. This sort of +/// thing is introduced when SROA promotes an aggregate to large integer values. +/// +/// TODO: The user of the trunc may be an bitcast to float/double/vector or an +/// inttoptr. We should produce new PHIs in the right type. +/// +Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { + // PHIUsers - Keep track of all of the truncated values extracted from a set + // of PHIs, along with their offset. These are the things we want to rewrite. + SmallVector<PHIUsageRecord, 16> PHIUsers; + + // PHIs are often mutually cyclic, so we keep track of a whole set of PHI + // nodes which are extracted from. PHIsToSlice is a set we use to avoid + // revisiting PHIs, PHIsInspected is a ordered list of PHIs that we need to + // check the uses of (to ensure they are all extracts). + SmallVector<PHINode*, 8> PHIsToSlice; + SmallPtrSet<PHINode*, 8> PHIsInspected; + + PHIsToSlice.push_back(&FirstPhi); + PHIsInspected.insert(&FirstPhi); + + for (unsigned PHIId = 0; PHIId != PHIsToSlice.size(); ++PHIId) { + PHINode *PN = PHIsToSlice[PHIId]; + + for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); + UI != E; ++UI) { + Instruction *User = cast<Instruction>(*UI); + + // If the user is a PHI, inspect its uses recursively. + if (PHINode *UserPN = dyn_cast<PHINode>(User)) { + if (PHIsInspected.insert(UserPN)) + PHIsToSlice.push_back(UserPN); + continue; + } + + // Truncates are always ok. + if (isa<TruncInst>(User)) { + PHIUsers.push_back(PHIUsageRecord(PHIId, 0, User)); + continue; + } + + // Otherwise it must be a lshr which can only be used by one trunc. + if (User->getOpcode() != Instruction::LShr || + !User->hasOneUse() || !isa<TruncInst>(User->use_back()) || + !isa<ConstantInt>(User->getOperand(1))) + return 0; + + unsigned Shift = cast<ConstantInt>(User->getOperand(1))->getZExtValue(); + PHIUsers.push_back(PHIUsageRecord(PHIId, Shift, User->use_back())); + } + } + + // If we have no users, they must be all self uses, just nuke the PHI. + if (PHIUsers.empty()) + return ReplaceInstUsesWith(FirstPhi, UndefValue::get(FirstPhi.getType())); + + // If this phi node is transformable, create new PHIs for all the pieces + // extracted out of it. First, sort the users by their offset and size. + array_pod_sort(PHIUsers.begin(), PHIUsers.end()); + + DEBUG(errs() << "SLICING UP PHI: " << FirstPhi << '\n'; + for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i) + errs() << "AND USER PHI #" << i << ": " << *PHIsToSlice[i] <<'\n'; + ); + + // PredValues - This is a temporary used when rewriting PHI nodes. It is + // hoisted out here to avoid construction/destruction thrashing. + DenseMap<BasicBlock*, Value*> PredValues; + + // ExtractedVals - Each new PHI we introduce is saved here so we don't + // introduce redundant PHIs. + DenseMap<LoweredPHIRecord, PHINode*> ExtractedVals; + + for (unsigned UserI = 0, UserE = PHIUsers.size(); UserI != UserE; ++UserI) { + unsigned PHIId = PHIUsers[UserI].PHIId; + PHINode *PN = PHIsToSlice[PHIId]; + unsigned Offset = PHIUsers[UserI].Shift; + const Type *Ty = PHIUsers[UserI].Inst->getType(); + + PHINode *EltPHI; + + // If we've already lowered a user like this, reuse the previously lowered + // value. + if ((EltPHI = ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)]) == 0) { + + // Otherwise, Create the new PHI node for this user. + EltPHI = PHINode::Create(Ty, PN->getName()+".off"+Twine(Offset), PN); + assert(EltPHI->getType() != PN->getType() && + "Truncate didn't shrink phi?"); + + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + BasicBlock *Pred = PN->getIncomingBlock(i); + Value *&PredVal = PredValues[Pred]; + + // If we already have a value for this predecessor, reuse it. + if (PredVal) { + EltPHI->addIncoming(PredVal, Pred); + continue; + } + + // Handle the PHI self-reuse case. + Value *InVal = PN->getIncomingValue(i); + if (InVal == PN) { + PredVal = EltPHI; + EltPHI->addIncoming(PredVal, Pred); + continue; + } else if (PHINode *InPHI = dyn_cast<PHINode>(PN)) { + // If the incoming value was a PHI, and if it was one of the PHIs we + // already rewrote it, just use the lowered value. + if (Value *Res = ExtractedVals[LoweredPHIRecord(InPHI, Offset, Ty)]) { + PredVal = Res; + EltPHI->addIncoming(PredVal, Pred); + continue; + } + } + + // Otherwise, do an extract in the predecessor. + Builder->SetInsertPoint(Pred, Pred->getTerminator()); + Value *Res = InVal; + if (Offset) + Res = Builder->CreateLShr(Res, ConstantInt::get(InVal->getType(), + Offset), "extract"); + Res = Builder->CreateTrunc(Res, Ty, "extract.t"); + PredVal = Res; + EltPHI->addIncoming(Res, Pred); + + // If the incoming value was a PHI, and if it was one of the PHIs we are + // rewriting, we will ultimately delete the code we inserted. This + // means we need to revisit that PHI to make sure we extract out the + // needed piece. + if (PHINode *OldInVal = dyn_cast<PHINode>(PN->getIncomingValue(i))) + if (PHIsInspected.count(OldInVal)) { + unsigned RefPHIId = std::find(PHIsToSlice.begin(),PHIsToSlice.end(), + OldInVal)-PHIsToSlice.begin(); + PHIUsers.push_back(PHIUsageRecord(RefPHIId, Offset, + cast<Instruction>(Res))); + ++UserE; + } + } + PredValues.clear(); + + DEBUG(errs() << " Made element PHI for offset " << Offset << ": " + << *EltPHI << '\n'); + ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)] = EltPHI; + } + + // Replace the use of this piece with the PHI node. + ReplaceInstUsesWith(*PHIUsers[UserI].Inst, EltPHI); + } + + // Replace all the remaining uses of the PHI nodes (self uses and the lshrs) + // with undefs. + Value *Undef = UndefValue::get(FirstPhi.getType()); + for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i) + ReplaceInstUsesWith(*PHIsToSlice[i], Undef); + return ReplaceInstUsesWith(FirstPhi, Undef); +} + // PHINode simplification // Instruction *InstCombiner::visitPHINode(PHINode &PN) { @@ -11117,6 +11269,15 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) { } } + // If this is an integer PHI and we know that it has an illegal type, see if + // it is only used by trunc or trunc(lshr) operations. If so, we split the + // PHI into the various pieces being extracted. This sort of thing is + // introduced when SROA promotes an aggregate to a single large integer type. + if (isa<IntegerType>(PN.getType()) && TD && + !TD->isLegalInteger(PN.getType()->getPrimitiveSizeInBits())) + if (Instruction *Res = SliceUpIllegalIntegerPHI(PN)) + return Res; + return 0; } @@ -12210,6 +12371,47 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { return ExtractValueInst::Create(IV->getInsertedValueOperand(), exti, exte); } + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Agg)) { + // We're extracting from an intrinsic, see if we're the only user, which + // allows us to simplify multiple result intrinsics to simpler things that + // just get one value.. + if (II->hasOneUse()) { + // Check if we're grabbing the overflow bit or the result of a 'with + // overflow' intrinsic. If it's the latter we can remove the intrinsic + // and replace it with a traditional binary instruction. + switch (II->getIntrinsicID()) { + case Intrinsic::uadd_with_overflow: + case Intrinsic::sadd_with_overflow: + if (*EV.idx_begin() == 0) { // Normal result. + Value *LHS = II->getOperand(1), *RHS = II->getOperand(2); + II->replaceAllUsesWith(UndefValue::get(II->getType())); + EraseInstFromFunction(*II); + return BinaryOperator::CreateAdd(LHS, RHS); + } + break; + case Intrinsic::usub_with_overflow: + case Intrinsic::ssub_with_overflow: + if (*EV.idx_begin() == 0) { // Normal result. + Value *LHS = II->getOperand(1), *RHS = II->getOperand(2); + II->replaceAllUsesWith(UndefValue::get(II->getType())); + EraseInstFromFunction(*II); + return BinaryOperator::CreateSub(LHS, RHS); + } + break; + case Intrinsic::umul_with_overflow: + case Intrinsic::smul_with_overflow: + if (*EV.idx_begin() == 0) { // Normal result. + Value *LHS = II->getOperand(1), *RHS = II->getOperand(2); + II->replaceAllUsesWith(UndefValue::get(II->getType())); + EraseInstFromFunction(*II); + return BinaryOperator::CreateMul(LHS, RHS); + } + break; + default: + break; + } + } + } // Can't simplify extracts from other values. Note that nested extracts are // already simplified implicitely by the above (extract ( extract (insert) ) // will be translated into extract ( insert ( extract ) ) first and then just @@ -12715,29 +12917,33 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { if (isa<UndefValue>(RHS)) { std::vector<unsigned> LHSMask = getShuffleMask(LHSSVI); - std::vector<unsigned> NewMask; - for (unsigned i = 0, e = Mask.size(); i != e; ++i) - if (Mask[i] >= 2*e) - NewMask.push_back(2*e); - else - NewMask.push_back(LHSMask[Mask[i]]); + if (LHSMask.size() == Mask.size()) { + std::vector<unsigned> NewMask; + for (unsigned i = 0, e = Mask.size(); i != e; ++i) + if (Mask[i] >= 2*e) + NewMask.push_back(2*e); + else + NewMask.push_back(LHSMask[Mask[i]]); - // If the result mask is equal to the src shuffle or this shuffle mask, do - // the replacement. - if (NewMask == LHSMask || NewMask == Mask) { - unsigned LHSInNElts = - cast<VectorType>(LHSSVI->getOperand(0)->getType())->getNumElements(); - std::vector<Constant*> Elts; - for (unsigned i = 0, e = NewMask.size(); i != e; ++i) { - if (NewMask[i] >= LHSInNElts*2) { - Elts.push_back(UndefValue::get(Type::getInt32Ty(*Context))); - } else { - Elts.push_back(ConstantInt::get(Type::getInt32Ty(*Context), NewMask[i])); + // If the result mask is equal to the src shuffle or this + // shuffle mask, do the replacement. + if (NewMask == LHSMask || NewMask == Mask) { + unsigned LHSInNElts = + cast<VectorType>(LHSSVI->getOperand(0)->getType())-> + getNumElements(); + std::vector<Constant*> Elts; + for (unsigned i = 0, e = NewMask.size(); i != e; ++i) { + if (NewMask[i] >= LHSInNElts*2) { + Elts.push_back(UndefValue::get(Type::getInt32Ty(*Context))); + } else { + Elts.push_back(ConstantInt::get(Type::getInt32Ty(*Context), + NewMask[i])); + } } + return new ShuffleVectorInst(LHSSVI->getOperand(0), + LHSSVI->getOperand(1), + ConstantVector::get(Elts)); } - return new ShuffleVectorInst(LHSSVI->getOperand(0), - LHSSVI->getOperand(1), - ConstantVector::get(Elts)); } } } @@ -12824,7 +13030,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, // ConstantProp instruction if trivially constant. if (!Inst->use_empty() && isa<Constant>(Inst->getOperand(0))) - if (Constant *C = ConstantFoldInstruction(Inst, BB->getContext(), TD)) { + if (Constant *C = ConstantFoldInstruction(Inst, TD)) { DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " << *Inst << '\n'); Inst->replaceAllUsesWith(C); @@ -12846,8 +13052,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, if (!FoldedConstants.insert(CE)) continue; - Constant *NewC = - ConstantFoldConstantExpression(CE, BB->getContext(), TD); + Constant *NewC = ConstantFoldConstantExpression(CE, TD); if (NewC && NewC != CE) { *i = NewC; MadeIRChange = true; @@ -12954,7 +13159,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { // Instruction isn't dead, see if we can constant propagate it. if (!I->use_empty() && isa<Constant>(I->getOperand(0))) - if (Constant *C = ConstantFoldInstruction(I, F.getContext(), TD)) { + if (Constant *C = ConstantFoldInstruction(I, TD)) { DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n'); // Add operands to the worklist. @@ -13065,7 +13270,7 @@ bool InstCombiner::runOnFunction(Function &F) { /// Builder - This is an IRBuilder that automatically inserts new /// instructions into the worklist when they are created. IRBuilder<true, TargetFolder, InstCombineIRInserter> - TheBuilder(F.getContext(), TargetFolder(TD, F.getContext()), + TheBuilder(F.getContext(), TargetFolder(TD), InstCombineIRInserter(Worklist)); Builder = &TheBuilder; diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index 10c9ec6..5864113 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -16,7 +16,8 @@ #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" #include "llvm/Pass.h" -#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LazyValueInfo.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SSAUpdater.h" @@ -40,6 +41,12 @@ Threshold("jump-threading-threshold", cl::desc("Max block size to duplicate for jump threading"), cl::init(6), cl::Hidden); +// Turn on use of LazyValueInfo. +static cl::opt<bool> +EnableLVI("enable-jump-threading-lvi", cl::ReallyHidden); + + + namespace { /// This pass performs 'jump threading', which looks at blocks that have /// multiple predecessors and multiple successors. If one or more of the @@ -59,6 +66,7 @@ namespace { /// class JumpThreading : public FunctionPass { TargetData *TD; + LazyValueInfo *LVI; #ifdef NDEBUG SmallPtrSet<BasicBlock*, 16> LoopHeaders; #else @@ -69,20 +77,31 @@ namespace { JumpThreading() : FunctionPass(&ID) {} bool runOnFunction(Function &F); - void FindLoopHeaders(Function &F); + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + if (EnableLVI) + AU.addRequired<LazyValueInfo>(); + } + + void FindLoopHeaders(Function &F); bool ProcessBlock(BasicBlock *BB); - bool ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, BasicBlock *SuccBB); + bool ThreadEdge(BasicBlock *BB, const SmallVectorImpl<BasicBlock*> &PredBBs, + BasicBlock *SuccBB); bool DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, BasicBlock *PredBB); - - BasicBlock *FactorCommonPHIPreds(PHINode *PN, Value *Val); + + typedef SmallVectorImpl<std::pair<ConstantInt*, + BasicBlock*> > PredValueInfo; + + bool ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, + PredValueInfo &Result); + bool ProcessThreadableEdges(Value *Cond, BasicBlock *BB); + + bool ProcessBranchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB); bool ProcessSwitchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB); bool ProcessJumpOnPHI(PHINode *PN); - bool ProcessBranchOnLogical(Value *V, BasicBlock *BB, bool isAnd); - bool ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB); bool SimplifyPartiallyRedundantLoad(LoadInst *LI); }; @@ -100,6 +119,7 @@ FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); } bool JumpThreading::runOnFunction(Function &F) { DEBUG(errs() << "Jump threading on function '" << F.getName() << "'\n"); TD = getAnalysisIfAvailable<TargetData>(); + LVI = EnableLVI ? &getAnalysis<LazyValueInfo>() : 0; FindLoopHeaders(F); @@ -109,6 +129,7 @@ bool JumpThreading::runOnFunction(Function &F) { bool Changed = false; for (Function::iterator I = F.begin(), E = F.end(); I != E;) { BasicBlock *BB = I; + // Thread all of the branches we can over this block. while (ProcessBlock(BB)) Changed = true; @@ -123,6 +144,29 @@ bool JumpThreading::runOnFunction(Function &F) { LoopHeaders.erase(BB); DeleteDeadBlock(BB); Changed = true; + } else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) { + // Can't thread an unconditional jump, but if the block is "almost + // empty", we can replace uses of it with uses of the successor and make + // this dead. + if (BI->isUnconditional() && + BB != &BB->getParent()->getEntryBlock()) { + BasicBlock::iterator BBI = BB->getFirstNonPHI(); + // Ignore dbg intrinsics. + while (isa<DbgInfoIntrinsic>(BBI)) + ++BBI; + // If the terminator is the only non-phi instruction, try to nuke it. + if (BBI->isTerminator()) { + // Since TryToSimplifyUncondBranchFromEmptyBlock may delete the + // block, we have to make sure it isn't in the LoopHeaders set. We + // reinsert afterward in the rare case when the block isn't deleted. + bool ErasedFromLoopHeaders = LoopHeaders.erase(BB); + + if (TryToSimplifyUncondBranchFromEmptyBlock(BB)) + Changed = true; + else if (ErasedFromLoopHeaders) + LoopHeaders.insert(BB); + } + } } } AnotherIteration = Changed; @@ -139,6 +183,10 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) { /// Ignore PHI nodes, these will be flattened when duplication happens. BasicBlock::const_iterator I = BB->getFirstNonPHI(); + // FIXME: THREADING will delete values that are just used to compute the + // branch, so they shouldn't count against the duplication cost. + + // Sum up the cost of each instruction until we get to the terminator. Don't // include the terminator because the copy won't include it. unsigned Size = 0; @@ -173,8 +221,6 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) { return Size; } - - /// FindLoopHeaders - We do not want jump threading to turn proper loop /// structures into irreducible loops. Doing this breaks up the loop nesting /// hierarchy and pessimizes later transformations. To prevent this from @@ -198,29 +244,181 @@ void JumpThreading::FindLoopHeaders(Function &F) { LoopHeaders.insert(const_cast<BasicBlock*>(Edges[i].second)); } - -/// FactorCommonPHIPreds - If there are multiple preds with the same incoming -/// value for the PHI, factor them together so we get one block to thread for -/// the whole group. -/// This is important for things like "phi i1 [true, true, false, true, x]" -/// where we only need to clone the block for the true blocks once. +/// ComputeValueKnownInPredecessors - Given a basic block BB and a value V, see +/// if we can infer that the value is a known ConstantInt in any of our +/// predecessors. If so, return the known list of value and pred BB in the +/// result vector. If a value is known to be undef, it is returned as null. +/// +/// This returns true if there were any known values. /// -BasicBlock *JumpThreading::FactorCommonPHIPreds(PHINode *PN, Value *Val) { - SmallVector<BasicBlock*, 16> CommonPreds; - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if (PN->getIncomingValue(i) == Val) - CommonPreds.push_back(PN->getIncomingBlock(i)); - - if (CommonPreds.size() == 1) - return CommonPreds[0]; +bool JumpThreading:: +ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){ + // If V is a constantint, then it is known in all predecessors. + if (isa<ConstantInt>(V) || isa<UndefValue>(V)) { + ConstantInt *CI = dyn_cast<ConstantInt>(V); - DEBUG(errs() << " Factoring out " << CommonPreds.size() - << " common predecessors.\n"); - return SplitBlockPredecessors(PN->getParent(), - &CommonPreds[0], CommonPreds.size(), - ".thr_comm", this); -} + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) + Result.push_back(std::make_pair(CI, *PI)); + return true; + } + + // If V is a non-instruction value, or an instruction in a different block, + // then it can't be derived from a PHI. + Instruction *I = dyn_cast<Instruction>(V); + if (I == 0 || I->getParent() != BB) { + + // Okay, if this is a live-in value, see if it has a known value at the end + // of any of our predecessors. + // + // FIXME: This should be an edge property, not a block end property. + /// TODO: Per PR2563, we could infer value range information about a + /// predecessor based on its terminator. + // + if (LVI) { + // FIXME: change this to use the more-rich 'getPredicateOnEdge' method if + // "I" is a non-local compare-with-a-constant instruction. This would be + // able to handle value inequalities better, for example if the compare is + // "X < 4" and "X < 3" is known true but "X < 4" itself is not available. + // Perhaps getConstantOnEdge should be smart enough to do this? + + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + // If the value is known by LazyValueInfo to be a constant in a + // predecessor, use that information to try to thread this block. + Constant *PredCst = LVI->getConstantOnEdge(V, *PI, BB); + if (PredCst == 0 || + (!isa<ConstantInt>(PredCst) && !isa<UndefValue>(PredCst))) + continue; + + Result.push_back(std::make_pair(dyn_cast<ConstantInt>(PredCst), *PI)); + } + + return !Result.empty(); + } + + return false; + } + + /// If I is a PHI node, then we know the incoming values for any constants. + if (PHINode *PN = dyn_cast<PHINode>(I)) { + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *InVal = PN->getIncomingValue(i); + if (isa<ConstantInt>(InVal) || isa<UndefValue>(InVal)) { + ConstantInt *CI = dyn_cast<ConstantInt>(InVal); + Result.push_back(std::make_pair(CI, PN->getIncomingBlock(i))); + } + } + return !Result.empty(); + } + + SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> LHSVals, RHSVals; + + // Handle some boolean conditions. + if (I->getType()->getPrimitiveSizeInBits() == 1) { + // X | true -> true + // X & false -> false + if (I->getOpcode() == Instruction::Or || + I->getOpcode() == Instruction::And) { + ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals); + ComputeValueKnownInPredecessors(I->getOperand(1), BB, RHSVals); + + if (LHSVals.empty() && RHSVals.empty()) + return false; + + ConstantInt *InterestingVal; + if (I->getOpcode() == Instruction::Or) + InterestingVal = ConstantInt::getTrue(I->getContext()); + else + InterestingVal = ConstantInt::getFalse(I->getContext()); + + // Scan for the sentinel. + for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) + if (LHSVals[i].first == InterestingVal || LHSVals[i].first == 0) + Result.push_back(LHSVals[i]); + for (unsigned i = 0, e = RHSVals.size(); i != e; ++i) + if (RHSVals[i].first == InterestingVal || RHSVals[i].first == 0) + Result.push_back(RHSVals[i]); + return !Result.empty(); + } + + // Handle the NOT form of XOR. + if (I->getOpcode() == Instruction::Xor && + isa<ConstantInt>(I->getOperand(1)) && + cast<ConstantInt>(I->getOperand(1))->isOne()) { + ComputeValueKnownInPredecessors(I->getOperand(0), BB, Result); + if (Result.empty()) + return false; + + // Invert the known values. + for (unsigned i = 0, e = Result.size(); i != e; ++i) + if (Result[i].first) + Result[i].first = + cast<ConstantInt>(ConstantExpr::getNot(Result[i].first)); + return true; + } + } + // Handle compare with phi operand, where the PHI is defined in this block. + if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) { + PHINode *PN = dyn_cast<PHINode>(Cmp->getOperand(0)); + if (PN && PN->getParent() == BB) { + // We can do this simplification if any comparisons fold to true or false. + // See if any do. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + BasicBlock *PredBB = PN->getIncomingBlock(i); + Value *LHS = PN->getIncomingValue(i); + Value *RHS = Cmp->getOperand(1)->DoPHITranslation(BB, PredBB); + + Value *Res = SimplifyCmpInst(Cmp->getPredicate(), LHS, RHS, TD); + if (Res == 0) { + if (!LVI || !isa<Constant>(RHS)) + continue; + + LazyValueInfo::Tristate + ResT = LVI->getPredicateOnEdge(Cmp->getPredicate(), LHS, + cast<Constant>(RHS), PredBB, BB); + if (ResT == LazyValueInfo::Unknown) + continue; + Res = ConstantInt::get(Type::getInt1Ty(LHS->getContext()), ResT); + } + + if (isa<UndefValue>(Res)) + Result.push_back(std::make_pair((ConstantInt*)0, PredBB)); + else if (ConstantInt *CI = dyn_cast<ConstantInt>(Res)) + Result.push_back(std::make_pair(CI, PredBB)); + } + + return !Result.empty(); + } + + + // If comparing a live-in value against a constant, see if we know the + // live-in value on any predecessors. + if (LVI && isa<Constant>(Cmp->getOperand(1)) && + Cmp->getType()->isInteger() && // Not vector compare. + (!isa<Instruction>(Cmp->getOperand(0)) || + cast<Instruction>(Cmp->getOperand(0))->getParent() != BB)) { + Constant *RHSCst = cast<Constant>(Cmp->getOperand(1)); + + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + // If the value is known by LazyValueInfo to be a constant in a + // predecessor, use that information to try to thread this block. + LazyValueInfo::Tristate + Res = LVI->getPredicateOnEdge(Cmp->getPredicate(), Cmp->getOperand(0), + RHSCst, *PI, BB); + if (Res == LazyValueInfo::Unknown) + continue; + + Constant *ResC = ConstantInt::get(Cmp->getType(), Res); + Result.push_back(std::make_pair(cast<ConstantInt>(ResC), *PI)); + } + + return !Result.empty(); + } + } + return false; +} + + /// GetBestDestForBranchOnUndef - If we determine that the specified block ends /// in an undefined jump, decide which block is best to revector to. @@ -251,7 +449,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { // successor, merge the blocks. This encourages recursive jump threading // because now the condition in this block can be threaded through // predecessors of our predecessor block. - if (BasicBlock *SinglePred = BB->getSinglePredecessor()) + if (BasicBlock *SinglePred = BB->getSinglePredecessor()) { if (SinglePred->getTerminator()->getNumSuccessors() == 1 && SinglePred != BB) { // If SinglePred was a loop header, BB becomes one. @@ -267,10 +465,10 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { BB->moveBefore(&BB->getParent()->getEntryBlock()); return true; } - - // See if this block ends with a branch or switch. If so, see if the - // condition is a phi node. If so, and if an entry of the phi node is a - // constant, we can thread the block. + } + + // Look to see if the terminator is a branch of switch, if not we can't thread + // it. Value *Condition; if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) { // Can't thread an unconditional jump. @@ -301,7 +499,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { TerminatorInst *BBTerm = BB->getTerminator(); for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) { if (i == BestSucc) continue; - BBTerm->getSuccessor(i)->removePredecessor(BB); + RemovePredecessorAndSimplify(BBTerm->getSuccessor(i), BB, TD); } DEBUG(errs() << " In block '" << BB->getName() @@ -318,7 +516,8 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { // br COND, BBX, BBY // BBX: // br COND, BBZ, BBW - if (!Condition->hasOneUse() && // Multiple uses. + if (!LVI && + !Condition->hasOneUse() && // Multiple uses. (CondInst == 0 || CondInst->getParent() != BB)) { // Non-local definition. pred_iterator PI = pred_begin(BB), E = pred_end(BB); if (isa<BranchInst>(BB->getTerminator())) { @@ -338,52 +537,40 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { } // All the rest of our checks depend on the condition being an instruction. - if (CondInst == 0) + if (CondInst == 0) { + // FIXME: Unify this with code below. + if (LVI && ProcessThreadableEdges(Condition, BB)) + return true; return false; + } + // See if this is a phi node in the current block. if (PHINode *PN = dyn_cast<PHINode>(CondInst)) if (PN->getParent() == BB) return ProcessJumpOnPHI(PN); - // If this is a conditional branch whose condition is and/or of a phi, try to - // simplify it. - if ((CondInst->getOpcode() == Instruction::And || - CondInst->getOpcode() == Instruction::Or) && - isa<BranchInst>(BB->getTerminator()) && - ProcessBranchOnLogical(CondInst, BB, - CondInst->getOpcode() == Instruction::And)) - return true; - if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondInst)) { - if (isa<PHINode>(CondCmp->getOperand(0))) { - // If we have "br (phi != 42)" and the phi node has any constant values - // as operands, we can thread through this block. - // - // If we have "br (cmp phi, x)" and the phi node contains x such that the - // comparison uniquely identifies the branch target, we can thread - // through this block. - - if (ProcessBranchOnCompare(CondCmp, BB)) - return true; - } - - // If we have a comparison, loop over the predecessors to see if there is - // a condition with the same value. - pred_iterator PI = pred_begin(BB), E = pred_end(BB); - for (; PI != E; ++PI) - if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator())) - if (PBI->isConditional() && *PI != BB) { - if (CmpInst *CI = dyn_cast<CmpInst>(PBI->getCondition())) { - if (CI->getOperand(0) == CondCmp->getOperand(0) && - CI->getOperand(1) == CondCmp->getOperand(1) && - CI->getPredicate() == CondCmp->getPredicate()) { - // TODO: Could handle things like (x != 4) --> (x == 17) - if (ProcessBranchOnDuplicateCond(*PI, BB)) - return true; + if (!LVI && + (!isa<PHINode>(CondCmp->getOperand(0)) || + cast<PHINode>(CondCmp->getOperand(0))->getParent() != BB)) { + // If we have a comparison, loop over the predecessors to see if there is + // a condition with a lexically identical value. + pred_iterator PI = pred_begin(BB), E = pred_end(BB); + for (; PI != E; ++PI) + if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator())) + if (PBI->isConditional() && *PI != BB) { + if (CmpInst *CI = dyn_cast<CmpInst>(PBI->getCondition())) { + if (CI->getOperand(0) == CondCmp->getOperand(0) && + CI->getOperand(1) == CondCmp->getOperand(1) && + CI->getPredicate() == CondCmp->getPredicate()) { + // TODO: Could handle things like (x != 4) --> (x == 17) + if (ProcessBranchOnDuplicateCond(*PI, BB)) + return true; + } } } - } + } } // Check for some cases that are worth simplifying. Right now we want to look @@ -398,10 +585,21 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { if (isa<Constant>(CondCmp->getOperand(1))) SimplifyValue = CondCmp->getOperand(0); + // TODO: There are other places where load PRE would be profitable, such as + // more complex comparisons. if (LoadInst *LI = dyn_cast<LoadInst>(SimplifyValue)) if (SimplifyPartiallyRedundantLoad(LI)) return true; + + // Handle a variety of cases where we are branching on something derived from + // a PHI node in the current block. If we can prove that any predecessors + // compute a predictable value based on a PHI node, thread those predecessors. + // + if (ProcessThreadableEdges(CondInst, BB)) + return true; + + // TODO: If we have: "br (X > 0)" and we have a predecessor where we know // "(X == 4)" thread through this block. @@ -459,8 +657,11 @@ bool JumpThreading::ProcessBranchOnDuplicateCond(BasicBlock *PredBB, // Next, figure out which successor we are threading to. BasicBlock *SuccBB = DestBI->getSuccessor(!BranchDir); + SmallVector<BasicBlock*, 2> Preds; + Preds.push_back(PredBB); + // Ok, try to thread it! - return ThreadEdge(BB, PredBB, SuccBB); + return ThreadEdge(BB, Preds, SuccBB); } /// ProcessSwitchOnDuplicateCond - We found a block and a predecessor of that @@ -553,7 +754,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { Value *LoadedPtr = LI->getOperand(0); // If the loaded operand is defined in the LoadBB, it can't be available. - // FIXME: Could do PHI translation, that would be fun :) + // TODO: Could do simple PHI translation, that would be fun :) if (Instruction *PtrOp = dyn_cast<Instruction>(LoadedPtr)) if (PtrOp->getParent() == LoadBB) return false; @@ -562,8 +763,8 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { // the entry to its block. BasicBlock::iterator BBIt = LI; - if (Value *AvailableVal = FindAvailableLoadedValue(LoadedPtr, LoadBB, - BBIt, 6)) { + if (Value *AvailableVal = + FindAvailableLoadedValue(LoadedPtr, LoadBB, BBIt, 6)) { // If the value if the load is locally available within the block, just use // it. This frequently occurs for reg2mem'd allocas. //cerr << "LOAD ELIMINATED:\n" << *BBIt << *LI << "\n"; @@ -646,7 +847,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { // Split them out to their own block. UnavailablePred = SplitBlockPredecessors(LoadBB, &PredsToSplit[0], PredsToSplit.size(), - "thread-split", this); + "thread-pre-split", this); } // If the value isn't available in all predecessors, then there will be @@ -655,7 +856,8 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { if (UnavailablePred) { assert(UnavailablePred->getTerminator()->getNumSuccessors() == 1 && "Can't handle critical edge here!"); - Value *NewVal = new LoadInst(LoadedPtr, LI->getName()+".pr", + Value *NewVal = new LoadInst(LoadedPtr, LI->getName()+".pr", false, + LI->getAlignment(), UnavailablePred->getTerminator()); AvailablePreds.push_back(std::make_pair(UnavailablePred, NewVal)); } @@ -690,55 +892,183 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { return true; } - -/// ProcessJumpOnPHI - We have a conditional branch or switch on a PHI node in -/// the current block. See if there are any simplifications we can do based on -/// inputs to the phi node. -/// -bool JumpThreading::ProcessJumpOnPHI(PHINode *PN) { - BasicBlock *BB = PN->getParent(); +/// FindMostPopularDest - The specified list contains multiple possible +/// threadable destinations. Pick the one that occurs the most frequently in +/// the list. +static BasicBlock * +FindMostPopularDest(BasicBlock *BB, + const SmallVectorImpl<std::pair<BasicBlock*, + BasicBlock*> > &PredToDestList) { + assert(!PredToDestList.empty()); + + // Determine popularity. If there are multiple possible destinations, we + // explicitly choose to ignore 'undef' destinations. We prefer to thread + // blocks with known and real destinations to threading undef. We'll handle + // them later if interesting. + DenseMap<BasicBlock*, unsigned> DestPopularity; + for (unsigned i = 0, e = PredToDestList.size(); i != e; ++i) + if (PredToDestList[i].second) + DestPopularity[PredToDestList[i].second]++; + + // Find the most popular dest. + DenseMap<BasicBlock*, unsigned>::iterator DPI = DestPopularity.begin(); + BasicBlock *MostPopularDest = DPI->first; + unsigned Popularity = DPI->second; + SmallVector<BasicBlock*, 4> SamePopularity; + + for (++DPI; DPI != DestPopularity.end(); ++DPI) { + // If the popularity of this entry isn't higher than the popularity we've + // seen so far, ignore it. + if (DPI->second < Popularity) + ; // ignore. + else if (DPI->second == Popularity) { + // If it is the same as what we've seen so far, keep track of it. + SamePopularity.push_back(DPI->first); + } else { + // If it is more popular, remember it. + SamePopularity.clear(); + MostPopularDest = DPI->first; + Popularity = DPI->second; + } + } - // See if the phi node has any constant integer or undef values. If so, we - // can determine where the corresponding predecessor will branch. - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - Value *PredVal = PN->getIncomingValue(i); - - // Check to see if this input is a constant integer. If so, the direction - // of the branch is predictable. - if (ConstantInt *CI = dyn_cast<ConstantInt>(PredVal)) { - // Merge any common predecessors that will act the same. - BasicBlock *PredBB = FactorCommonPHIPreds(PN, CI); + // Okay, now we know the most popular destination. If there is more than + // destination, we need to determine one. This is arbitrary, but we need + // to make a deterministic decision. Pick the first one that appears in the + // successor list. + if (!SamePopularity.empty()) { + SamePopularity.push_back(MostPopularDest); + TerminatorInst *TI = BB->getTerminator(); + for (unsigned i = 0; ; ++i) { + assert(i != TI->getNumSuccessors() && "Didn't find any successor!"); - BasicBlock *SuccBB; - if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) - SuccBB = BI->getSuccessor(CI->isZero()); - else { - SwitchInst *SI = cast<SwitchInst>(BB->getTerminator()); - SuccBB = SI->getSuccessor(SI->findCaseValue(CI)); - } + if (std::find(SamePopularity.begin(), SamePopularity.end(), + TI->getSuccessor(i)) == SamePopularity.end()) + continue; - // Ok, try to thread it! - return ThreadEdge(BB, PredBB, SuccBB); + MostPopularDest = TI->getSuccessor(i); + break; } + } + + // Okay, we have finally picked the most popular destination. + return MostPopularDest; +} + +bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB) { + // If threading this would thread across a loop header, don't even try to + // thread the edge. + if (LoopHeaders.count(BB)) + return false; + + SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> PredValues; + if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues)) + return false; + assert(!PredValues.empty() && + "ComputeValueKnownInPredecessors returned true with no values"); + + DEBUG(errs() << "IN BB: " << *BB; + for (unsigned i = 0, e = PredValues.size(); i != e; ++i) { + errs() << " BB '" << BB->getName() << "': FOUND condition = "; + if (PredValues[i].first) + errs() << *PredValues[i].first; + else + errs() << "UNDEF"; + errs() << " for pred '" << PredValues[i].second->getName() + << "'.\n"; + }); + + // Decide what we want to thread through. Convert our list of known values to + // a list of known destinations for each pred. This also discards duplicate + // predecessors and keeps track of the undefined inputs (which are represented + // as a null dest in the PredToDestList). + SmallPtrSet<BasicBlock*, 16> SeenPreds; + SmallVector<std::pair<BasicBlock*, BasicBlock*>, 16> PredToDestList; + + BasicBlock *OnlyDest = 0; + BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL; + + for (unsigned i = 0, e = PredValues.size(); i != e; ++i) { + BasicBlock *Pred = PredValues[i].second; + if (!SeenPreds.insert(Pred)) + continue; // Duplicate predecessor entry. - // If the input is an undef, then it doesn't matter which way it will go. - // Pick an arbitrary dest and thread the edge. - if (UndefValue *UV = dyn_cast<UndefValue>(PredVal)) { - // Merge any common predecessors that will act the same. - BasicBlock *PredBB = FactorCommonPHIPreds(PN, UV); - BasicBlock *SuccBB = - BB->getTerminator()->getSuccessor(GetBestDestForJumpOnUndef(BB)); - - // Ok, try to thread it! - return ThreadEdge(BB, PredBB, SuccBB); + // If the predecessor ends with an indirect goto, we can't change its + // destination. + if (isa<IndirectBrInst>(Pred->getTerminator())) + continue; + + ConstantInt *Val = PredValues[i].first; + + BasicBlock *DestBB; + if (Val == 0) // Undef. + DestBB = 0; + else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) + DestBB = BI->getSuccessor(Val->isZero()); + else { + SwitchInst *SI = cast<SwitchInst>(BB->getTerminator()); + DestBB = SI->getSuccessor(SI->findCaseValue(Val)); } + + // If we have exactly one destination, remember it for efficiency below. + if (i == 0) + OnlyDest = DestBB; + else if (OnlyDest != DestBB) + OnlyDest = MultipleDestSentinel; + + PredToDestList.push_back(std::make_pair(Pred, DestBB)); } - // If the incoming values are all variables, we don't know the destination of - // any predecessors. However, if any of the predecessor blocks end in an - // unconditional branch, we can *duplicate* the jump into that block in order - // to further encourage jump threading and to eliminate cases where we have - // branch on a phi of an icmp (branch on icmp is much better). + // If all edges were unthreadable, we fail. + if (PredToDestList.empty()) + return false; + + // Determine which is the most common successor. If we have many inputs and + // this block is a switch, we want to start by threading the batch that goes + // to the most popular destination first. If we only know about one + // threadable destination (the common case) we can avoid this. + BasicBlock *MostPopularDest = OnlyDest; + + if (MostPopularDest == MultipleDestSentinel) + MostPopularDest = FindMostPopularDest(BB, PredToDestList); + + // Now that we know what the most popular destination is, factor all + // predecessors that will jump to it into a single predecessor. + SmallVector<BasicBlock*, 16> PredsToFactor; + for (unsigned i = 0, e = PredToDestList.size(); i != e; ++i) + if (PredToDestList[i].second == MostPopularDest) { + BasicBlock *Pred = PredToDestList[i].first; + + // This predecessor may be a switch or something else that has multiple + // edges to the block. Factor each of these edges by listing them + // according to # occurrences in PredsToFactor. + TerminatorInst *PredTI = Pred->getTerminator(); + for (unsigned i = 0, e = PredTI->getNumSuccessors(); i != e; ++i) + if (PredTI->getSuccessor(i) == BB) + PredsToFactor.push_back(Pred); + } + + // If the threadable edges are branching on an undefined value, we get to pick + // the destination that these predecessors should get to. + if (MostPopularDest == 0) + MostPopularDest = BB->getTerminator()-> + getSuccessor(GetBestDestForJumpOnUndef(BB)); + + // Ok, try to thread it! + return ThreadEdge(BB, PredsToFactor, MostPopularDest); +} + +/// ProcessJumpOnPHI - We have a conditional branch or switch on a PHI node in +/// the current block. See if there are any simplifications we can do based on +/// inputs to the phi node. +/// +bool JumpThreading::ProcessJumpOnPHI(PHINode *PN) { + BasicBlock *BB = PN->getParent(); + + // If any of the predecessor blocks end in an unconditional branch, we can + // *duplicate* the jump into that block in order to further encourage jump + // threading and to eliminate cases where we have branch on a phi of an icmp + // (branch on icmp is much better). // We don't want to do this tranformation for switches, because we don't // really want to duplicate a switch. @@ -759,137 +1089,6 @@ bool JumpThreading::ProcessJumpOnPHI(PHINode *PN) { } -/// ProcessJumpOnLogicalPHI - PN's basic block contains a conditional branch -/// whose condition is an AND/OR where one side is PN. If PN has constant -/// operands that permit us to evaluate the condition for some operand, thread -/// through the block. For example with: -/// br (and X, phi(Y, Z, false)) -/// the predecessor corresponding to the 'false' will always jump to the false -/// destination of the branch. -/// -bool JumpThreading::ProcessBranchOnLogical(Value *V, BasicBlock *BB, - bool isAnd) { - // If this is a binary operator tree of the same AND/OR opcode, check the - // LHS/RHS. - if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V)) - if ((isAnd && BO->getOpcode() == Instruction::And) || - (!isAnd && BO->getOpcode() == Instruction::Or)) { - if (ProcessBranchOnLogical(BO->getOperand(0), BB, isAnd)) - return true; - if (ProcessBranchOnLogical(BO->getOperand(1), BB, isAnd)) - return true; - } - - // If this isn't a PHI node, we can't handle it. - PHINode *PN = dyn_cast<PHINode>(V); - if (!PN || PN->getParent() != BB) return false; - - // We can only do the simplification for phi nodes of 'false' with AND or - // 'true' with OR. See if we have any entries in the phi for this. - unsigned PredNo = ~0U; - ConstantInt *PredCst = ConstantInt::get(Type::getInt1Ty(BB->getContext()), - !isAnd); - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - if (PN->getIncomingValue(i) == PredCst) { - PredNo = i; - break; - } - } - - // If no match, bail out. - if (PredNo == ~0U) - return false; - - // If so, we can actually do this threading. Merge any common predecessors - // that will act the same. - BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredCst); - - // Next, figure out which successor we are threading to. If this was an AND, - // the constant must be FALSE, and we must be targeting the 'false' block. - // If this is an OR, the constant must be TRUE, and we must be targeting the - // 'true' block. - BasicBlock *SuccBB = BB->getTerminator()->getSuccessor(isAnd); - - // Ok, try to thread it! - return ThreadEdge(BB, PredBB, SuccBB); -} - -/// GetResultOfComparison - Given an icmp/fcmp predicate and the left and right -/// hand sides of the compare instruction, try to determine the result. If the -/// result can not be determined, a null pointer is returned. -static Constant *GetResultOfComparison(CmpInst::Predicate pred, - Value *LHS, Value *RHS, - LLVMContext &Context) { - if (Constant *CLHS = dyn_cast<Constant>(LHS)) - if (Constant *CRHS = dyn_cast<Constant>(RHS)) - return ConstantExpr::getCompare(pred, CLHS, CRHS); - - if (LHS == RHS) - if (isa<IntegerType>(LHS->getType()) || isa<PointerType>(LHS->getType())) - return ICmpInst::isTrueWhenEqual(pred) ? - ConstantInt::getTrue(Context) : ConstantInt::getFalse(Context); - - return 0; -} - -/// ProcessBranchOnCompare - We found a branch on a comparison between a phi -/// node and a value. If we can identify when the comparison is true between -/// the phi inputs and the value, we can fold the compare for that edge and -/// thread through it. -bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) { - PHINode *PN = cast<PHINode>(Cmp->getOperand(0)); - Value *RHS = Cmp->getOperand(1); - - // If the phi isn't in the current block, an incoming edge to this block - // doesn't control the destination. - if (PN->getParent() != BB) - return false; - - // We can do this simplification if any comparisons fold to true or false. - // See if any do. - Value *PredVal = 0; - bool TrueDirection = false; - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - PredVal = PN->getIncomingValue(i); - - Constant *Res = GetResultOfComparison(Cmp->getPredicate(), PredVal, - RHS, Cmp->getContext()); - if (!Res) { - PredVal = 0; - continue; - } - - // If this folded to a constant expr, we can't do anything. - if (ConstantInt *ResC = dyn_cast<ConstantInt>(Res)) { - TrueDirection = ResC->getZExtValue(); - break; - } - // If this folded to undef, just go the false way. - if (isa<UndefValue>(Res)) { - TrueDirection = false; - break; - } - - // Otherwise, we can't fold this input. - PredVal = 0; - } - - // If no match, bail out. - if (PredVal == 0) - return false; - - // If so, we can actually do this threading. Merge any common predecessors - // that will act the same. - BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredVal); - - // Next, get our successor. - BasicBlock *SuccBB = BB->getTerminator()->getSuccessor(!TrueDirection); - - // Ok, try to thread it! - return ThreadEdge(BB, PredBB, SuccBB); -} - - /// AddPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new /// predecessor to the PHIBB block. If it has PHI nodes, add entries for /// NewPred using the entries from OldPred (suitably mapped). @@ -914,10 +1113,11 @@ static void AddPHINodeEntriesForMappedBlock(BasicBlock *PHIBB, } } -/// ThreadEdge - We have decided that it is safe and profitable to thread an -/// edge from PredBB to SuccBB across BB. Transform the IR to reflect this -/// change. -bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, +/// ThreadEdge - We have decided that it is safe and profitable to factor the +/// blocks in PredBBs to one predecessor, then thread an edge from it to SuccBB +/// across BB. Transform the IR to reflect this change. +bool JumpThreading::ThreadEdge(BasicBlock *BB, + const SmallVectorImpl<BasicBlock*> &PredBBs, BasicBlock *SuccBB) { // If threading to the same block as we come from, we would infinite loop. if (SuccBB == BB) { @@ -929,8 +1129,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, // If threading this would thread across a loop header, don't thread the edge. // See the comments above FindLoopHeaders for justifications and caveats. if (LoopHeaders.count(BB)) { - DEBUG(errs() << " Not threading from '" << PredBB->getName() - << "' across loop header BB '" << BB->getName() + DEBUG(errs() << " Not threading across loop header BB '" << BB->getName() << "' to dest BB '" << SuccBB->getName() << "' - it might create an irreducible loop!\n"); return false; @@ -943,6 +1142,17 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, return false; } + // And finally, do it! Start by factoring the predecessors is needed. + BasicBlock *PredBB; + if (PredBBs.size() == 1) + PredBB = PredBBs[0]; + else { + DEBUG(errs() << " Factoring out " << PredBBs.size() + << " common predecessors.\n"); + PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(), + ".thr_comm", this); + } + // And finally, do it! DEBUG(errs() << " Threading edge from '" << PredBB->getName() << "' to '" << SuccBB->getName() << "' with cost: " << JumpThreadCost @@ -1034,7 +1244,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, TerminatorInst *PredTerm = PredBB->getTerminator(); for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i) if (PredTerm->getSuccessor(i) == BB) { - BB->removePredecessor(PredBB); + RemovePredecessorAndSimplify(BB, PredBB, TD); PredTerm->setSuccessor(i, NewBB); } @@ -1044,9 +1254,12 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, BI = NewBB->begin(); for (BasicBlock::iterator E = NewBB->end(); BI != E; ) { Instruction *Inst = BI++; - if (Constant *C = ConstantFoldInstruction(Inst, BB->getContext(), TD)) { - Inst->replaceAllUsesWith(C); - Inst->eraseFromParent(); + + if (Value *V = SimplifyInstruction(Inst, TD)) { + WeakVH BIHandle(BI); + ReplaceAndSimplifyAllUses(Inst, V, TD); + if (BIHandle == 0) + BI = NewBB->begin(); continue; } @@ -1164,7 +1377,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, // PredBB no longer jumps to BB, remove entries in the PHI node for the edge // that we nuked. - BB->removePredecessor(PredBB); + RemovePredecessorAndSimplify(BB, PredBB, TD); // Remove the unconditional branch at the end of the PredBB block. OldPredBranch->eraseFromParent(); diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index 756fbf3..104c873 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -263,7 +263,6 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { // Get the preheader block to move instructions into... Preheader = L->getLoopPreheader(); - assert(Preheader&&"Preheader insertion pass guarantees we have a preheader!"); // Loop over the body of this loop, looking for calls, invokes, and stores. // Because subloops have already been incorporated into AST, we skip blocks in @@ -286,12 +285,14 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { // us to sink instructions in one pass, without iteration. After sinking // instructions, we perform another pass to hoist them out of the loop. // - SinkRegion(DT->getNode(L->getHeader())); - HoistRegion(DT->getNode(L->getHeader())); + if (L->hasDedicatedExits()) + SinkRegion(DT->getNode(L->getHeader())); + if (Preheader) + HoistRegion(DT->getNode(L->getHeader())); // Now that all loop invariants have been removed from the loop, promote any // memory references to scalars that we can... - if (!DisablePromotion) + if (!DisablePromotion && Preheader && L->hasDedicatedExits()) PromoteValuesInLoop(); // Clear out loops state information for the next iteration diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp index 866d8b4..48817ab 100644 --- a/lib/Transforms/Scalar/LoopDeletion.cpp +++ b/lib/Transforms/Scalar/LoopDeletion.cpp @@ -115,6 +115,10 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { if (!preheader) return false; + // If LoopSimplify form is not available, stay out of trouble. + if (!L->hasDedicatedExits()) + return false; + // We can't remove loops that contain subloops. If the subloops were dead, // they would already have been removed in earlier executions of this pass. if (L->begin() != L->end()) diff --git a/lib/Transforms/Scalar/LoopIndexSplit.cpp b/lib/Transforms/Scalar/LoopIndexSplit.cpp index 920d85c..8b6a233 100644 --- a/lib/Transforms/Scalar/LoopIndexSplit.cpp +++ b/lib/Transforms/Scalar/LoopIndexSplit.cpp @@ -209,6 +209,10 @@ bool LoopIndexSplit::runOnLoop(Loop *IncomingLoop, LPPassManager &LPM_Ref) { L = IncomingLoop; LPM = &LPM_Ref; + // If LoopSimplify form is not available, stay out of trouble. + if (!L->isLoopSimplifyForm()) + return false; + // FIXME - Nested loops make dominator info updates tricky. if (!L->getSubLoops().empty()) return false; diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp index 7a4bb35..5004483 100644 --- a/lib/Transforms/Scalar/LoopRotation.cpp +++ b/lib/Transforms/Scalar/LoopRotation.cpp @@ -15,7 +15,6 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Function.h" #include "llvm/IntrinsicInst.h" -#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/ScalarEvolution.h" @@ -49,6 +48,7 @@ namespace { AU.addRequiredID(LCSSAID); AU.addPreservedID(LCSSAID); AU.addPreserved<ScalarEvolution>(); + AU.addRequired<LoopInfo>(); AU.addPreserved<LoopInfo>(); AU.addPreserved<DominatorTree>(); AU.addPreserved<DominanceFrontier>(); @@ -104,17 +104,18 @@ bool LoopRotate::runOnLoop(Loop *Lp, LPPassManager &LPM) { bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) { L = Lp; - OrigHeader = L->getHeader(); OrigPreHeader = L->getLoopPreheader(); + if (!OrigPreHeader) return false; + OrigLatch = L->getLoopLatch(); + if (!OrigLatch) return false; + + OrigHeader = L->getHeader(); // If the loop has only one block then there is not much to rotate. if (L->getBlocks().size() == 1) return false; - assert(OrigHeader && OrigLatch && OrigPreHeader && - "Loop is not in canonical form"); - // If the loop header is not one of the loop exiting blocks then // either this loop is already rotated or it is not // suitable for loop rotation transformations. @@ -287,7 +288,7 @@ void LoopRotate::preserveCanonicalLoopForm(LPPassManager &LPM) { "bb.nph", OrigHeader->getParent(), NewHeader); - LoopInfo &LI = LPM.getAnalysis<LoopInfo>(); + LoopInfo &LI = getAnalysis<LoopInfo>(); if (Loop *PL = LI.getLoopFor(OrigPreHeader)) PL->addBasicBlockToLoop(NewPreHeader, LI.getBase()); BranchInst::Create(NewHeader, NewPreHeader); diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index e20fb16..564c7ac 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -51,6 +51,7 @@ STATISTIC(NumEliminated, "Number of strides eliminated"); STATISTIC(NumShadow, "Number of Shadow IVs optimized"); STATISTIC(NumImmSunk, "Number of common expr immediates sunk into uses"); STATISTIC(NumLoopCond, "Number of loop terminating conds optimized"); +STATISTIC(NumCountZero, "Number of count iv optimized to count toward zero"); static cl::opt<bool> EnableFullLSRMode("enable-full-lsr", cl::init(false), @@ -107,7 +108,7 @@ namespace { public: static char ID; // Pass ID, replacement for typeid - explicit LoopStrengthReduce(const TargetLowering *tli = NULL) : + explicit LoopStrengthReduce(const TargetLowering *tli = NULL) : LoopPass(&ID), TLI(tli) { } @@ -131,12 +132,10 @@ namespace { } private: - ICmpInst *ChangeCompareStride(Loop *L, ICmpInst *Cond, - IVStrideUse* &CondUse, - const SCEV *const * &CondStride); - void OptimizeIndvars(Loop *L); - void OptimizeLoopCountIV(Loop *L); + + /// OptimizeLoopTermCond - Change loop terminating condition to use the + /// postinc iv when possible. void OptimizeLoopTermCond(Loop *L); /// OptimizeShadowIV - If IV is used in a int-to-float cast @@ -148,8 +147,28 @@ namespace { ICmpInst *OptimizeMax(Loop *L, ICmpInst *Cond, IVStrideUse* &CondUse); + /// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for + /// deciding when to exit the loop is used only for that purpose, try to + /// rearrange things so it counts down to a test against zero. + bool OptimizeLoopCountIV(Loop *L); + bool OptimizeLoopCountIVOfStride(const SCEV* &Stride, + IVStrideUse* &CondUse, Loop *L); + + /// StrengthReduceIVUsersOfStride - Strength reduce all of the users of a + /// single stride of IV. All of the users may have different starting + /// values, and this may not be the only stride. + void StrengthReduceIVUsersOfStride(const SCEV *const &Stride, + IVUsersOfOneStride &Uses, + Loop *L); + void StrengthReduceIVUsers(Loop *L); + + ICmpInst *ChangeCompareStride(Loop *L, ICmpInst *Cond, + IVStrideUse* &CondUse, + const SCEV* &CondStride, + bool PostPass = false); + bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse, - const SCEV *const * &CondStride); + const SCEV* &CondStride); bool RequiresTypeConversion(const Type *Ty, const Type *NewTy); const SCEV *CheckForIVReuse(bool, bool, bool, const SCEV *const&, IVExpr&, const Type*, @@ -164,6 +183,7 @@ namespace { bool &AllUsesAreAddresses, bool &AllUsesAreOutsideLoop, std::vector<BasedUser> &UsersToProcess); + bool StrideMightBeShared(const SCEV *Stride, Loop *L, bool CheckPreInc); bool ShouldUseFullStrengthReductionMode( const std::vector<BasedUser> &UsersToProcess, const Loop *L, @@ -188,9 +208,7 @@ namespace { Instruction *IVIncInsertPt, const Loop *L, SCEVExpander &PreheaderRewriter); - void StrengthReduceStridedIVUsers(const SCEV *const &Stride, - IVUsersOfOneStride &Uses, - Loop *L); + void DeleteTriviallyDeadInstructions(); }; } @@ -208,11 +226,11 @@ Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) { /// their operands subsequently dead. void LoopStrengthReduce::DeleteTriviallyDeadInstructions() { if (DeadInsts.empty()) return; - + while (!DeadInsts.empty()) { Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.back()); DeadInsts.pop_back(); - + if (I == 0 || !isInstructionTriviallyDead(I)) continue; @@ -223,14 +241,14 @@ void LoopStrengthReduce::DeleteTriviallyDeadInstructions() { DeadInsts.push_back(U); } } - + I->eraseFromParent(); Changed = true; } } -/// containsAddRecFromDifferentLoop - Determine whether expression S involves a -/// subexpression that is an AddRec from a loop other than L. An outer loop +/// containsAddRecFromDifferentLoop - Determine whether expression S involves a +/// subexpression that is an AddRec from a loop other than L. An outer loop /// of L is OK, but not an inner loop nor a disjoint loop. static bool containsAddRecFromDifferentLoop(const SCEV *S, Loop *L) { // This is very common, put it first. @@ -256,7 +274,7 @@ static bool containsAddRecFromDifferentLoop(const SCEV *S, Loop *L) { return containsAddRecFromDifferentLoop(DE->getLHS(), L) || containsAddRecFromDifferentLoop(DE->getRHS(), L); #if 0 - // SCEVSDivExpr has been backed out temporarily, but will be back; we'll + // SCEVSDivExpr has been backed out temporarily, but will be back; we'll // need this when it is. if (const SCEVSDivExpr *DE = dyn_cast<SCEVSDivExpr>(S)) return containsAddRecFromDifferentLoop(DE->getLHS(), L) || @@ -328,7 +346,7 @@ namespace { /// field to the Imm field (below). BasedUser values are sorted by this /// field. const SCEV *Base; - + /// Inst - The instruction using the induction variable. Instruction *Inst; @@ -352,11 +370,11 @@ namespace { // instruction for a loop and uses outside the loop that are dominated by // the loop. bool isUseOfPostIncrementedValue; - + BasedUser(IVStrideUse &IVSU, ScalarEvolution *se) : SE(se), Base(IVSU.getOffset()), Inst(IVSU.getUser()), OperandValToReplace(IVSU.getOperandValToReplace()), - Imm(SE->getIntegerSCEV(0, Base->getType())), + Imm(SE->getIntegerSCEV(0, Base->getType())), isUseOfPostIncrementedValue(IVSU.isUseOfPostIncrementedValue()) {} // Once we rewrite the code to insert the new IVs we want, update the @@ -367,8 +385,8 @@ namespace { SCEVExpander &Rewriter, Loop *L, Pass *P, LoopInfo &LI, SmallVectorImpl<WeakVH> &DeadInsts); - - Value *InsertCodeForBaseAtPosition(const SCEV *const &NewBase, + + Value *InsertCodeForBaseAtPosition(const SCEV *const &NewBase, const Type *Ty, SCEVExpander &Rewriter, Instruction *IP, Loop *L, @@ -383,7 +401,7 @@ void BasedUser::dump() const { errs() << " Inst: " << *Inst; } -Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *const &NewBase, +Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *const &NewBase, const Type *Ty, SCEVExpander &Rewriter, Instruction *IP, Loop *L, @@ -393,10 +411,10 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *const &NewBase, // want to insert this expression before the user, we'd rather pull it out as // many loops as possible. Instruction *BaseInsertPt = IP; - + // Figure out the most-nested loop that IP is in. Loop *InsertLoop = LI.getLoopFor(IP->getParent()); - + // If InsertLoop is not L, and InsertLoop is nested inside of L, figure out // the preheader of the outer-most loop where NewBase is not loop invariant. if (L->contains(IP->getParent())) @@ -404,7 +422,7 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *const &NewBase, BaseInsertPt = InsertLoop->getLoopPreheader()->getTerminator(); InsertLoop = InsertLoop->getParentLoop(); } - + Value *Base = Rewriter.expandCodeFor(NewBase, 0, BaseInsertPt); const SCEV *NewValSCEV = SE->getUnknown(Base); @@ -430,7 +448,7 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase, if (!isa<PHINode>(Inst)) { // By default, insert code at the user instruction. BasicBlock::iterator InsertPt = Inst; - + // However, if the Operand is itself an instruction, the (potentially // complex) inserted code may be shared by many users. Because of this, we // want to emit code for the computation of the operand right before its old @@ -442,7 +460,7 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase, // // If this is a use outside the loop (which means after, since it is based // on a loop indvar) we use the post-incremented value, so that we don't - // artificially make the preinc value live out the bottom of the loop. + // artificially make the preinc value live out the bottom of the loop. if (!isUseOfPostIncrementedValue && L->contains(Inst->getParent())) { if (NewBasePt && isa<PHINode>(OperandValToReplace)) { InsertPt = NewBasePt; @@ -477,7 +495,7 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase, if (PN->getIncomingValue(i) == OperandValToReplace) { // If the original expression is outside the loop, put the replacement // code in the same place as the original expression, - // which need not be an immediate predecessor of this PHI. This way we + // which need not be an immediate predecessor of this PHI. This way we // need only one copy of it even if it is referenced multiple times in // the PHI. We don't do this when the original expression is inside the // loop because multiple copies sometimes do useful sinking of code in @@ -490,6 +508,7 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase, // is the canonical backedge for this loop, as this can make some // inserted code be in an illegal position. if (e != 1 && PHIPred->getTerminator()->getNumSuccessors() > 1 && + !isa<IndirectBrInst>(PHIPred->getTerminator()) && (PN->getParent() != L->getHeader() || !L->contains(PHIPred))) { // First step, split the critical edge. @@ -572,11 +591,11 @@ static bool fitsInAddressMode(const SCEV *const &V, const Type *AccessTy, static void MoveLoopVariantsToImmediateField(const SCEV *&Val, const SCEV *&Imm, Loop *L, ScalarEvolution *SE) { if (Val->isLoopInvariant(L)) return; // Nothing to do. - + if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) { SmallVector<const SCEV *, 4> NewOps; NewOps.reserve(SAE->getNumOperands()); - + for (unsigned i = 0; i != SAE->getNumOperands(); ++i) if (!SAE->getOperand(i)->isLoopInvariant(L)) { // If this is a loop-variant expression, it must stay in the immediate @@ -594,7 +613,7 @@ static void MoveLoopVariantsToImmediateField(const SCEV *&Val, const SCEV *&Imm, // Try to pull immediates out of the start value of nested addrec's. const SCEV *Start = SARE->getStart(); MoveLoopVariantsToImmediateField(Start, Imm, L, SE); - + SmallVector<const SCEV *, 4> Ops(SARE->op_begin(), SARE->op_end()); Ops[0] = Start; Val = SE->getAddRecExpr(Ops, SARE->getLoop()); @@ -617,11 +636,11 @@ static void MoveImmediateValues(const TargetLowering *TLI, if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) { SmallVector<const SCEV *, 4> NewOps; NewOps.reserve(SAE->getNumOperands()); - + for (unsigned i = 0; i != SAE->getNumOperands(); ++i) { const SCEV *NewOp = SAE->getOperand(i); MoveImmediateValues(TLI, AccessTy, NewOp, Imm, isAddress, L, SE); - + if (!NewOp->isLoopInvariant(L)) { // If this is a loop-variant expression, it must stay in the immediate // field of the expression. @@ -640,7 +659,7 @@ static void MoveImmediateValues(const TargetLowering *TLI, // Try to pull immediates out of the start value of nested addrec's. const SCEV *Start = SARE->getStart(); MoveImmediateValues(TLI, AccessTy, Start, Imm, isAddress, L, SE); - + if (Start != SARE->getStart()) { SmallVector<const SCEV *, 4> Ops(SARE->op_begin(), SARE->op_end()); Ops[0] = Start; @@ -656,8 +675,8 @@ static void MoveImmediateValues(const TargetLowering *TLI, const SCEV *SubImm = SE->getIntegerSCEV(0, Val->getType()); const SCEV *NewOp = SME->getOperand(1); MoveImmediateValues(TLI, AccessTy, NewOp, SubImm, isAddress, L, SE); - - // If we extracted something out of the subexpressions, see if we can + + // If we extracted something out of the subexpressions, see if we can // simplify this! if (NewOp != SME->getOperand(1)) { // Scale SubImm up by "8". If the result is a target constant, we are @@ -666,7 +685,7 @@ static void MoveImmediateValues(const TargetLowering *TLI, if (fitsInAddressMode(SubImm, AccessTy, TLI, false)) { // Accumulate the immediate. Imm = SE->getAddExpr(Imm, SubImm); - + // Update what is left of 'Val'. Val = SE->getMulExpr(SME->getOperand(0), NewOp); return; @@ -714,7 +733,7 @@ static void SeparateSubExprs(SmallVector<const SCEV *, 16> &SubExprs, SmallVector<const SCEV *, 4> Ops(SARE->op_begin(), SARE->op_end()); Ops[0] = Zero; // Start with zero base. SubExprs.push_back(SE->getAddRecExpr(Ops, SARE->getLoop())); - + SeparateSubExprs(SubExprs, SARE->getOperand(0), SE); } @@ -724,7 +743,7 @@ static void SeparateSubExprs(SmallVector<const SCEV *, 16> &SubExprs, } } -// This is logically local to the following function, but C++ says we have +// This is logically local to the following function, but C++ says we have // to make it file scope. struct SubExprUseData { unsigned Count; bool notAllUsesAreFree; }; @@ -762,7 +781,7 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses, // an addressing mode "for free"; such expressions are left within the loop. // struct SubExprUseData { unsigned Count; bool notAllUsesAreFree; }; std::map<const SCEV *, SubExprUseData> SubExpressionUseData; - + // UniqueSubExprs - Keep track of all of the subexpressions we see in the // order we see them. SmallVector<const SCEV *, 16> UniqueSubExprs; @@ -779,7 +798,7 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses, if (!L->contains(Uses[i].Inst->getParent())) continue; NumUsesInsideLoop++; - + // If the base is zero (which is common), return zero now, there are no // CSEs we can find. if (Uses[i].Base == Zero) return Zero; @@ -811,13 +830,13 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses, // Now that we know how many times each is used, build Result. Iterate over // UniqueSubexprs so that we have a stable ordering. for (unsigned i = 0, e = UniqueSubExprs.size(); i != e; ++i) { - std::map<const SCEV *, SubExprUseData>::iterator I = + std::map<const SCEV *, SubExprUseData>::iterator I = SubExpressionUseData.find(UniqueSubExprs[i]); assert(I != SubExpressionUseData.end() && "Entry not found?"); - if (I->second.Count == NumUsesInsideLoop) { // Found CSE! + if (I->second.Count == NumUsesInsideLoop) { // Found CSE! if (I->second.notAllUsesAreFree) Result = SE->getAddExpr(Result, I->first); - else + else FreeResult = SE->getAddExpr(FreeResult, I->first); } else // Remove non-cse's from SubExpressionUseData. @@ -849,13 +868,13 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses, // If we found no CSE's, return now. if (Result == Zero) return Result; - + // If we still have a FreeResult, remove its subexpressions from // SubExpressionUseData. This means they will remain in the use Bases. if (FreeResult != Zero) { SeparateSubExprs(SubExprs, FreeResult, SE); for (unsigned j = 0, e = SubExprs.size(); j != e; ++j) { - std::map<const SCEV *, SubExprUseData>::iterator I = + std::map<const SCEV *, SubExprUseData>::iterator I = SubExpressionUseData.find(SubExprs[j]); SubExpressionUseData.erase(I); } @@ -882,7 +901,7 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses, SubExprs.erase(SubExprs.begin()+j); --j; --e; } - + // Finally, add the non-shared expressions together. if (SubExprs.empty()) Uses[i].Base = Zero; @@ -890,11 +909,11 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses, Uses[i].Base = SE->getAddExpr(SubExprs); SubExprs.clear(); } - + return Result; } -/// ValidScale - Check whether the given Scale is valid for all loads and +/// ValidScale - Check whether the given Scale is valid for all loads and /// stores in UsersToProcess. /// bool LoopStrengthReduce::ValidScale(bool HasBaseReg, int64_t Scale, @@ -911,7 +930,7 @@ bool LoopStrengthReduce::ValidScale(bool HasBaseReg, int64_t Scale, AccessTy = getAccessType(UsersToProcess[i].Inst); else if (isa<PHINode>(UsersToProcess[i].Inst)) continue; - + TargetLowering::AddrMode AM; if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(UsersToProcess[i].Imm)) AM.BaseOffs = SC->getValue()->getSExtValue(); @@ -983,13 +1002,13 @@ bool LoopStrengthReduce::RequiresTypeConversion(const Type *Ty1, /// reuse is possible. Factors can be negative on same targets, e.g. ARM. /// /// If all uses are outside the loop, we don't require that all multiplies -/// be folded into the addressing mode, nor even that the factor be constant; -/// a multiply (executed once) outside the loop is better than another IV +/// be folded into the addressing mode, nor even that the factor be constant; +/// a multiply (executed once) outside the loop is better than another IV /// within. Well, usually. const SCEV *LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg, bool AllUsesAreAddresses, bool AllUsesAreOutsideLoop, - const SCEV *const &Stride, + const SCEV *const &Stride, IVExpr &IV, const Type *Ty, const std::vector<BasedUser>& UsersToProcess) { if (StrideNoReuse.count(Stride)) @@ -999,11 +1018,16 @@ const SCEV *LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg, int64_t SInt = SC->getValue()->getSExtValue(); for (unsigned NewStride = 0, e = IU->StrideOrder.size(); NewStride != e; ++NewStride) { - std::map<const SCEV *, IVsOfOneStride>::iterator SI = + std::map<const SCEV *, IVsOfOneStride>::iterator SI = IVsByStride.find(IU->StrideOrder[NewStride]); if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first) || StrideNoReuse.count(SI->first)) continue; + // The other stride has no uses, don't reuse it. + std::map<const SCEV *, IVUsersOfOneStride *>::iterator UI = + IU->IVUsesByStride.find(IU->StrideOrder[NewStride]); + if (UI->second->Users.empty()) + continue; int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue(); if (SI->first != Stride && (unsigned(abs64(SInt)) < SSInt || (SInt % SSInt) != 0)) @@ -1052,7 +1076,7 @@ const SCEV *LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg, // an existing IV if we can. for (unsigned NewStride = 0, e = IU->StrideOrder.size(); NewStride != e; ++NewStride) { - std::map<const SCEV *, IVsOfOneStride>::iterator SI = + std::map<const SCEV *, IVsOfOneStride>::iterator SI = IVsByStride.find(IU->StrideOrder[NewStride]); if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first)) continue; @@ -1072,9 +1096,9 @@ const SCEV *LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg, // -1*old. for (unsigned NewStride = 0, e = IU->StrideOrder.size(); NewStride != e; ++NewStride) { - std::map<const SCEV *, IVsOfOneStride>::iterator SI = + std::map<const SCEV *, IVsOfOneStride>::iterator SI = IVsByStride.find(IU->StrideOrder[NewStride]); - if (SI == IVsByStride.end()) + if (SI == IVsByStride.end()) continue; if (const SCEVMulExpr *ME = dyn_cast<SCEVMulExpr>(SI->first)) if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(ME->getOperand(0))) @@ -1104,18 +1128,18 @@ static bool PartitionByIsUseOfPostIncrementedValue(const BasedUser &Val) { static bool isNonConstantNegative(const SCEV *const &Expr) { const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Expr); if (!Mul) return false; - + // If there is a constant factor, it will be first. const SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0)); if (!SC) return false; - + // Return true if the value is negative, this matches things like (-42 * V). return SC->getValue()->getValue().isNegative(); } /// CollectIVUsers - Transform our list of users and offsets to a bit more -/// complex table. In this new vector, each 'BasedUser' contains 'Base', the base -/// of the strided accesses, as well as the old information from Uses. We +/// complex table. In this new vector, each 'BasedUser' contains 'Base', the +/// base of the strided accesses, as well as the old information from Uses. We /// progressively move information from the Base field to the Imm field, until /// we eventually have the full access expression to rewrite the use. const SCEV *LoopStrengthReduce::CollectIVUsers(const SCEV *const &Stride, @@ -1145,7 +1169,7 @@ const SCEV *LoopStrengthReduce::CollectIVUsers(const SCEV *const &Stride, // We now have a whole bunch of uses of like-strided induction variables, but // they might all have different bases. We want to emit one PHI node for this // stride which we fold as many common expressions (between the IVs) into as - // possible. Start by identifying the common expressions in the base values + // possible. Start by identifying the common expressions in the base values // for the strides (e.g. if we have "A+C+B" and "A+B+D" as our bases, find // "A+B"), emit it to the preheader, then remove the expression from the // UsersToProcess base values. @@ -1165,11 +1189,11 @@ const SCEV *LoopStrengthReduce::CollectIVUsers(const SCEV *const &Stride, if (!L->contains(UsersToProcess[i].Inst->getParent())) { UsersToProcess[i].Imm = SE->getAddExpr(UsersToProcess[i].Imm, UsersToProcess[i].Base); - UsersToProcess[i].Base = + UsersToProcess[i].Base = SE->getIntegerSCEV(0, UsersToProcess[i].Base->getType()); } else { // Not all uses are outside the loop. - AllUsesAreOutsideLoop = false; + AllUsesAreOutsideLoop = false; // Addressing modes can be folded into loads and stores. Be careful that // the store is through the expression, not of the expression though. @@ -1183,11 +1207,11 @@ const SCEV *LoopStrengthReduce::CollectIVUsers(const SCEV *const &Stride, if (isAddress) HasAddress = true; - + // If this use isn't an address, then not all uses are addresses. if (!isAddress && !isPHI) AllUsesAreAddresses = false; - + MoveImmediateValues(TLI, UsersToProcess[i].Inst, UsersToProcess[i].Base, UsersToProcess[i].Imm, isAddress, L, SE); } @@ -1198,7 +1222,7 @@ const SCEV *LoopStrengthReduce::CollectIVUsers(const SCEV *const &Stride, // for one fewer iv. if (NumPHI > 1) AllUsesAreAddresses = false; - + // There are no in-loop address uses. if (AllUsesAreAddresses && (!HasAddress && !AllUsesAreOutsideLoop)) AllUsesAreAddresses = false; @@ -1491,12 +1515,13 @@ static bool IsImmFoldedIntoAddrMode(GlobalValue *GV, int64_t Offset, return true; } -/// StrengthReduceStridedIVUsers - Strength reduce all of the users of a single +/// StrengthReduceIVUsersOfStride - Strength reduce all of the users of a single /// stride of IV. All of the users may have different starting values, and this /// may not be the only stride. -void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV *const &Stride, - IVUsersOfOneStride &Uses, - Loop *L) { +void +LoopStrengthReduce::StrengthReduceIVUsersOfStride(const SCEV *const &Stride, + IVUsersOfOneStride &Uses, + Loop *L) { // If all the users are moved to another stride, then there is nothing to do. if (Uses.Users.empty()) return; @@ -1518,8 +1543,8 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV *const &Stride, // have the full access expression to rewrite the use. std::vector<BasedUser> UsersToProcess; const SCEV *CommonExprs = CollectIVUsers(Stride, Uses, L, AllUsesAreAddresses, - AllUsesAreOutsideLoop, - UsersToProcess); + AllUsesAreOutsideLoop, + UsersToProcess); // Sort the UsersToProcess array so that users with common bases are // next to each other. @@ -1588,12 +1613,12 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV *const &Stride, const SCEV *RewriteFactor = SE->getIntegerSCEV(0, ReplacedTy); IVExpr ReuseIV(SE->getIntegerSCEV(0, Type::getInt32Ty(Preheader->getContext())), - SE->getIntegerSCEV(0, + SE->getIntegerSCEV(0, Type::getInt32Ty(Preheader->getContext())), 0); - /// Choose a strength-reduction strategy and prepare for it by creating - /// the necessary PHIs and adjusting the bookkeeping. + // Choose a strength-reduction strategy and prepare for it by creating + // the necessary PHIs and adjusting the bookkeeping. if (ShouldUseFullStrengthReductionMode(UsersToProcess, L, AllUsesAreAddresses, Stride)) { PrepareToStrengthReduceFully(UsersToProcess, Stride, CommonExprs, L, @@ -1606,7 +1631,7 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV *const &Stride, // If all uses are addresses, check if it is possible to reuse an IV. The // new IV must have a stride that is a multiple of the old stride; the // multiple must be a number that can be encoded in the scale field of the - // target addressing mode; and we must have a valid instruction after this + // target addressing mode; and we must have a valid instruction after this // substitution, including the immediate field, if any. RewriteFactor = CheckForIVReuse(HaveCommonExprs, AllUsesAreAddresses, AllUsesAreOutsideLoop, @@ -1649,7 +1674,7 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV *const &Stride, // We want this constant emitted into the preheader! This is just // using cast as a copy so BitCast (no-op cast) is appropriate BaseV = new BitCastInst(BaseV, BaseV->getType(), "preheaderinsert", - PreInsertPt); + PreInsertPt); } } @@ -1723,7 +1748,7 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV *const &Stride, assert(SE->getTypeSizeInBits(RewriteExpr->getType()) < SE->getTypeSizeInBits(ReuseIV.Base->getType()) && "Unexpected lengthening conversion!"); - typedBase = SE->getTruncateExpr(ReuseIV.Base, + typedBase = SE->getTruncateExpr(ReuseIV.Base, RewriteExpr->getType()); } RewriteExpr = SE->getMinusSCEV(RewriteExpr, typedBase); @@ -1775,11 +1800,29 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV *const &Stride, // different starting values, into different PHIs. } +void LoopStrengthReduce::StrengthReduceIVUsers(Loop *L) { + // Note: this processes each stride/type pair individually. All users + // passed into StrengthReduceIVUsersOfStride have the same type AND stride. + // Also, note that we iterate over IVUsesByStride indirectly by using + // StrideOrder. This extra layer of indirection makes the ordering of + // strides deterministic - not dependent on map order. + for (unsigned Stride = 0, e = IU->StrideOrder.size(); Stride != e; ++Stride) { + std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI = + IU->IVUsesByStride.find(IU->StrideOrder[Stride]); + assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!"); + // FIXME: Generalize to non-affine IV's. + if (!SI->first->isLoopInvariant(L)) + continue; + StrengthReduceIVUsersOfStride(SI->first, *SI->second, L); + } +} + /// FindIVUserForCond - If Cond has an operand that is an expression of an IV, /// set the IV user and stride information and return true, otherwise return /// false. -bool LoopStrengthReduce::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse, - const SCEV *const * &CondStride) { +bool LoopStrengthReduce::FindIVUserForCond(ICmpInst *Cond, + IVStrideUse *&CondUse, + const SCEV* &CondStride) { for (unsigned Stride = 0, e = IU->StrideOrder.size(); Stride != e && !CondUse; ++Stride) { std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI = @@ -1793,12 +1836,12 @@ bool LoopStrengthReduce::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse // InstCombine does it as well for simple uses, it's not clear that it // occurs enough in real life to handle. CondUse = UI; - CondStride = &SI->first; + CondStride = SI->first; return true; } } return false; -} +} namespace { // Constant strides come first which in turns are sorted by their absolute @@ -1851,8 +1894,9 @@ namespace { /// v1 = v1 + 3 /// if (v1 < 30) goto loop ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, - IVStrideUse* &CondUse, - const SCEV *const* &CondStride) { + IVStrideUse* &CondUse, + const SCEV* &CondStride, + bool PostPass) { // If there's only one stride in the loop, there's nothing to do here. if (IU->StrideOrder.size() < 2) return Cond; @@ -1860,23 +1904,31 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, // trying to change the condition because the stride will still // remain. std::map<const SCEV *, IVUsersOfOneStride *>::iterator I = - IU->IVUsesByStride.find(*CondStride); - if (I == IU->IVUsesByStride.end() || - I->second->Users.size() != 1) + IU->IVUsesByStride.find(CondStride); + if (I == IU->IVUsesByStride.end()) return Cond; + if (I->second->Users.size() > 1) { + for (ilist<IVStrideUse>::iterator II = I->second->Users.begin(), + EE = I->second->Users.end(); II != EE; ++II) { + if (II->getUser() == Cond) + continue; + if (!isInstructionTriviallyDead(II->getUser())) + return Cond; + } + } // Only handle constant strides for now. - const SCEVConstant *SC = dyn_cast<SCEVConstant>(*CondStride); + const SCEVConstant *SC = dyn_cast<SCEVConstant>(CondStride); if (!SC) return Cond; ICmpInst::Predicate Predicate = Cond->getPredicate(); int64_t CmpSSInt = SC->getValue()->getSExtValue(); - unsigned BitWidth = SE->getTypeSizeInBits((*CondStride)->getType()); + unsigned BitWidth = SE->getTypeSizeInBits(CondStride->getType()); uint64_t SignBit = 1ULL << (BitWidth-1); const Type *CmpTy = Cond->getOperand(0)->getType(); const Type *NewCmpTy = NULL; unsigned TyBits = SE->getTypeSizeInBits(CmpTy); unsigned NewTyBits = 0; - const SCEV **NewStride = NULL; + const SCEV *NewStride = NULL; Value *NewCmpLHS = NULL; Value *NewCmpRHS = NULL; int64_t Scale = 1; @@ -1885,16 +1937,31 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, if (ConstantInt *C = dyn_cast<ConstantInt>(Cond->getOperand(1))) { int64_t CmpVal = C->getValue().getSExtValue(); + // Check the relevant induction variable for conformance to + // the pattern. + const SCEV *IV = SE->getSCEV(Cond->getOperand(0)); + const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV); + if (!AR || !AR->isAffine()) + return Cond; + + const SCEVConstant *StartC = dyn_cast<SCEVConstant>(AR->getStart()); // Check stride constant and the comparision constant signs to detect // overflow. - if ((CmpVal & SignBit) != (CmpSSInt & SignBit)) - return Cond; + if (StartC) { + if ((StartC->getValue()->getSExtValue() < CmpVal && CmpSSInt < 0) || + (StartC->getValue()->getSExtValue() > CmpVal && CmpSSInt > 0)) + return Cond; + } else { + // More restrictive check for the other cases. + if ((CmpVal & SignBit) != (CmpSSInt & SignBit)) + return Cond; + } // Look for a suitable stride / iv as replacement. for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) { std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI = IU->IVUsesByStride.find(IU->StrideOrder[i]); - if (!isa<SCEVConstant>(SI->first)) + if (!isa<SCEVConstant>(SI->first) || SI->second->Users.empty()) continue; int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue(); if (SSInt == CmpSSInt || @@ -1904,6 +1971,14 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, Scale = SSInt / CmpSSInt; int64_t NewCmpVal = CmpVal * Scale; + + // If old icmp value fits in icmp immediate field, but the new one doesn't + // try something else. + if (TLI && + TLI->isLegalICmpImmediate(CmpVal) && + !TLI->isLegalICmpImmediate(NewCmpVal)) + continue; + APInt Mul = APInt(BitWidth*2, CmpVal, true); Mul = Mul * APInt(BitWidth*2, Scale, true); // Check for overflow. @@ -1918,8 +1993,6 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, (CmpVal & SignBit) != (NewCmpVal & SignBit)) continue; - if (NewCmpVal == CmpVal) - continue; // Pick the best iv to use trying to avoid a cast. NewCmpLHS = NULL; for (ilist<IVStrideUse>::iterator UI = SI->second->Users.begin(), @@ -1969,19 +2042,21 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, if (NewTyBits != TyBits && !isa<SCEVConstant>(CondUse->getOffset())) continue; - bool AllUsesAreAddresses = true; - bool AllUsesAreOutsideLoop = true; - std::vector<BasedUser> UsersToProcess; - const SCEV *CommonExprs = CollectIVUsers(SI->first, *SI->second, L, - AllUsesAreAddresses, - AllUsesAreOutsideLoop, - UsersToProcess); - // Avoid rewriting the compare instruction with an iv of new stride - // if it's likely the new stride uses will be rewritten using the - // stride of the compare instruction. - if (AllUsesAreAddresses && - ValidScale(!CommonExprs->isZero(), Scale, UsersToProcess)) - continue; + if (!PostPass) { + bool AllUsesAreAddresses = true; + bool AllUsesAreOutsideLoop = true; + std::vector<BasedUser> UsersToProcess; + const SCEV *CommonExprs = CollectIVUsers(SI->first, *SI->second, L, + AllUsesAreAddresses, + AllUsesAreOutsideLoop, + UsersToProcess); + // Avoid rewriting the compare instruction with an iv of new stride + // if it's likely the new stride uses will be rewritten using the + // stride of the compare instruction. + if (AllUsesAreAddresses && + ValidScale(!CommonExprs->isZero(), Scale, UsersToProcess)) + continue; + } // Avoid rewriting the compare instruction with an iv which has // implicit extension or truncation built into it. @@ -1994,7 +2069,7 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, if (Scale < 0 && !Cond->isEquality()) Predicate = ICmpInst::getSwappedPredicate(Predicate); - NewStride = &IU->StrideOrder[i]; + NewStride = IU->StrideOrder[i]; if (!isa<PointerType>(NewCmpTy)) NewCmpRHS = ConstantInt::get(NewCmpTy, NewCmpVal); else { @@ -2031,13 +2106,16 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, Cond = new ICmpInst(OldCond, Predicate, NewCmpLHS, NewCmpRHS, L->getHeader()->getName() + ".termcond"); + DEBUG(errs() << " Change compare stride in Inst " << *OldCond); + DEBUG(errs() << " to " << *Cond << '\n'); + // Remove the old compare instruction. The old indvar is probably dead too. DeadInsts.push_back(CondUse->getOperandValToReplace()); OldCond->replaceAllUsesWith(Cond); OldCond->eraseFromParent(); - IU->IVUsesByStride[*NewStride]->addUser(NewOffset, Cond, NewCmpLHS); - CondUse = &IU->IVUsesByStride[*NewStride]->Users.back(); + IU->IVUsesByStride[NewStride]->addUser(NewOffset, Cond, NewCmpLHS); + CondUse = &IU->IVUsesByStride[NewStride]->Users.back(); CondStride = NewStride; ++NumEliminated; Changed = true; @@ -2180,7 +2258,7 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) { const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); if (isa<SCEVCouldNotCompute>(BackedgeTakenCount)) return; - + for (unsigned Stride = 0, e = IU->StrideOrder.size(); Stride != e; ++Stride) { std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI = @@ -2199,13 +2277,13 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) { /* If shadow use is a int->float cast then insert a second IV to eliminate this cast. - for (unsigned i = 0; i < n; ++i) + for (unsigned i = 0; i < n; ++i) foo((double)i); is transformed into double d = 0.0; - for (unsigned i = 0; i < n; ++i, ++d) + for (unsigned i = 0; i < n; ++i, ++d) foo(d); */ if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser())) @@ -2227,7 +2305,7 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) { const Type *SrcTy = PH->getType(); int Mantissa = DestTy->getFPMantissaWidth(); - if (Mantissa == -1) continue; + if (Mantissa == -1) continue; if ((int)SE->getTypeSizeInBits(SrcTy) > Mantissa) continue; @@ -2239,12 +2317,12 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) { Entry = 1; Latch = 0; } - + ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry)); if (!Init) continue; Constant *NewInit = ConstantFP::get(DestTy, Init->getZExtValue()); - BinaryOperator *Incr = + BinaryOperator *Incr = dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch)); if (!Incr) continue; if (Incr->getOpcode() != Instruction::Add @@ -2271,7 +2349,7 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) { /* create new increment. '++d' in above example. */ Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue()); - BinaryOperator *NewIncr = + BinaryOperator *NewIncr = BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ? Instruction::FAdd : Instruction::FSub, NewPH, CFP, "IV.S.next.", Incr); @@ -2297,237 +2375,385 @@ void LoopStrengthReduce::OptimizeIndvars(Loop *L) { OptimizeShadowIV(L); } -/// OptimizeLoopTermCond - Change loop terminating condition to use the +bool LoopStrengthReduce::StrideMightBeShared(const SCEV* Stride, Loop *L, + bool CheckPreInc) { + int64_t SInt = cast<SCEVConstant>(Stride)->getValue()->getSExtValue(); + for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) { + std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI = + IU->IVUsesByStride.find(IU->StrideOrder[i]); + const SCEV *Share = SI->first; + if (!isa<SCEVConstant>(SI->first) || Share == Stride) + continue; + int64_t SSInt = cast<SCEVConstant>(Share)->getValue()->getSExtValue(); + if (SSInt == SInt) + return true; // This can definitely be reused. + if (unsigned(abs64(SSInt)) < SInt || (SSInt % SInt) != 0) + continue; + int64_t Scale = SSInt / SInt; + bool AllUsesAreAddresses = true; + bool AllUsesAreOutsideLoop = true; + std::vector<BasedUser> UsersToProcess; + const SCEV *CommonExprs = CollectIVUsers(SI->first, *SI->second, L, + AllUsesAreAddresses, + AllUsesAreOutsideLoop, + UsersToProcess); + if (AllUsesAreAddresses && + ValidScale(!CommonExprs->isZero(), Scale, UsersToProcess)) { + if (!CheckPreInc) + return true; + // Any pre-inc iv use? + IVUsersOfOneStride &StrideUses = *IU->IVUsesByStride[Share]; + for (ilist<IVStrideUse>::iterator I = StrideUses.Users.begin(), + E = StrideUses.Users.end(); I != E; ++I) { + if (!I->isUseOfPostIncrementedValue()) + return true; + } + } + } + return false; +} + +/// isUsedByExitBranch - Return true if icmp is used by a loop terminating +/// conditional branch or it's and / or with other conditions before being used +/// as the condition. +static bool isUsedByExitBranch(ICmpInst *Cond, Loop *L) { + BasicBlock *CondBB = Cond->getParent(); + if (!L->isLoopExiting(CondBB)) + return false; + BranchInst *TermBr = dyn_cast<BranchInst>(CondBB->getTerminator()); + if (!TermBr || !TermBr->isConditional()) + return false; + + Value *User = *Cond->use_begin(); + Instruction *UserInst = dyn_cast<Instruction>(User); + while (UserInst && + (UserInst->getOpcode() == Instruction::And || + UserInst->getOpcode() == Instruction::Or)) { + if (!UserInst->hasOneUse() || UserInst->getParent() != CondBB) + return false; + User = *User->use_begin(); + UserInst = dyn_cast<Instruction>(User); + } + return User == TermBr; +} + +static bool ShouldCountToZero(ICmpInst *Cond, IVStrideUse* &CondUse, + ScalarEvolution *SE, Loop *L, + const TargetLowering *TLI = 0) { + if (!L->contains(Cond->getParent())) + return false; + + if (!isa<SCEVConstant>(CondUse->getOffset())) + return false; + + // Handle only tests for equality for the moment. + if (!Cond->isEquality() || !Cond->hasOneUse()) + return false; + if (!isUsedByExitBranch(Cond, L)) + return false; + + Value *CondOp0 = Cond->getOperand(0); + const SCEV *IV = SE->getSCEV(CondOp0); + const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV); + if (!AR || !AR->isAffine()) + return false; + + const SCEVConstant *SC = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)); + if (!SC || SC->getValue()->getSExtValue() < 0) + // If it's already counting down, don't do anything. + return false; + + // If the RHS of the comparison is not an loop invariant, the rewrite + // cannot be done. Also bail out if it's already comparing against a zero. + // If we are checking this before cmp stride optimization, check if it's + // comparing against a already legal immediate. + Value *RHS = Cond->getOperand(1); + ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS); + if (!L->isLoopInvariant(RHS) || + (RHSC && RHSC->isZero()) || + (RHSC && TLI && TLI->isLegalICmpImmediate(RHSC->getSExtValue()))) + return false; + + // Make sure the IV is only used for counting. Value may be preinc or + // postinc; 2 uses in either case. + if (!CondOp0->hasNUses(2)) + return false; + + return true; +} + +/// OptimizeLoopTermCond - Change loop terminating condition to use the /// postinc iv when possible. void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) { - // Finally, get the terminating condition for the loop if possible. If we - // can, we want to change it to use a post-incremented version of its - // induction variable, to allow coalescing the live ranges for the IV into - // one register value. BasicBlock *LatchBlock = L->getLoopLatch(); - BasicBlock *ExitingBlock = L->getExitingBlock(); - - if (!ExitingBlock) - // Multiple exits, just look at the exit in the latch block if there is one. - ExitingBlock = LatchBlock; - BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator()); - if (!TermBr) - return; - if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition())) - return; + bool LatchExit = L->isLoopExiting(LatchBlock); + SmallVector<BasicBlock*, 8> ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); - // Search IVUsesByStride to find Cond's IVUse if there is one. - IVStrideUse *CondUse = 0; - const SCEV *const *CondStride = 0; - ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition()); - if (!FindIVUserForCond(Cond, CondUse, CondStride)) - return; // setcc doesn't use the IV. - - if (ExitingBlock != LatchBlock) { - if (!Cond->hasOneUse()) - // See below, we don't want the condition to be cloned. - return; - - // If exiting block is the latch block, we know it's safe and profitable to - // transform the icmp to use post-inc iv. Otherwise do so only if it would - // not reuse another iv and its iv would be reused by other uses. We are - // optimizing for the case where the icmp is the only use of the iv. - IVUsersOfOneStride &StrideUses = *IU->IVUsesByStride[*CondStride]; - for (ilist<IVStrideUse>::iterator I = StrideUses.Users.begin(), - E = StrideUses.Users.end(); I != E; ++I) { - if (I->getUser() == Cond) - continue; - if (!I->isUseOfPostIncrementedValue()) - return; - } + for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { + BasicBlock *ExitingBlock = ExitingBlocks[i]; - // FIXME: This is expensive, and worse still ChangeCompareStride does a - // similar check. Can we perform all the icmp related transformations after - // StrengthReduceStridedIVUsers? - if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(*CondStride)) { - int64_t SInt = SC->getValue()->getSExtValue(); - for (unsigned NewStride = 0, ee = IU->StrideOrder.size(); NewStride != ee; - ++NewStride) { - std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI = - IU->IVUsesByStride.find(IU->StrideOrder[NewStride]); - if (!isa<SCEVConstant>(SI->first) || SI->first == *CondStride) - continue; - int64_t SSInt = - cast<SCEVConstant>(SI->first)->getValue()->getSExtValue(); - if (SSInt == SInt) - return; // This can definitely be reused. - if (unsigned(abs64(SSInt)) < SInt || (SSInt % SInt) != 0) - continue; - int64_t Scale = SSInt / SInt; - bool AllUsesAreAddresses = true; - bool AllUsesAreOutsideLoop = true; - std::vector<BasedUser> UsersToProcess; - const SCEV *CommonExprs = CollectIVUsers(SI->first, *SI->second, L, - AllUsesAreAddresses, - AllUsesAreOutsideLoop, - UsersToProcess); - // Avoid rewriting the compare instruction with an iv of new stride - // if it's likely the new stride uses will be rewritten using the - // stride of the compare instruction. - if (AllUsesAreAddresses && - ValidScale(!CommonExprs->isZero(), Scale, UsersToProcess)) - return; - } - } + // Finally, get the terminating condition for the loop if possible. If we + // can, we want to change it to use a post-incremented version of its + // induction variable, to allow coalescing the live ranges for the IV into + // one register value. - StrideNoReuse.insert(*CondStride); - } + BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator()); + if (!TermBr) + continue; + // FIXME: Overly conservative, termination condition could be an 'or' etc.. + if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition())) + continue; - // If the trip count is computed in terms of a max (due to ScalarEvolution - // being unable to find a sufficient guard, for example), change the loop - // comparison to use SLT or ULT instead of NE. - Cond = OptimizeMax(L, Cond, CondUse); - - // If possible, change stride and operands of the compare instruction to - // eliminate one stride. - if (ExitingBlock == LatchBlock) - Cond = ChangeCompareStride(L, Cond, CondUse, CondStride); - - // It's possible for the setcc instruction to be anywhere in the loop, and - // possible for it to have multiple users. If it is not immediately before - // the latch block branch, move it. - if (&*++BasicBlock::iterator(Cond) != (Instruction*)TermBr) { - if (Cond->hasOneUse()) { // Condition has a single use, just move it. - Cond->moveBefore(TermBr); - } else { - // Otherwise, clone the terminating condition and insert into the loopend. - Cond = cast<ICmpInst>(Cond->clone()); - Cond->setName(L->getHeader()->getName() + ".termcond"); - LatchBlock->getInstList().insert(TermBr, Cond); - - // Clone the IVUse, as the old use still exists! - IU->IVUsesByStride[*CondStride]->addUser(CondUse->getOffset(), Cond, - CondUse->getOperandValToReplace()); - CondUse = &IU->IVUsesByStride[*CondStride]->Users.back(); + // Search IVUsesByStride to find Cond's IVUse if there is one. + IVStrideUse *CondUse = 0; + const SCEV *CondStride = 0; + ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition()); + if (!FindIVUserForCond(Cond, CondUse, CondStride)) + continue; + + // If the latch block is exiting and it's not a single block loop, it's + // not safe to use postinc iv in other exiting blocks. FIXME: overly + // conservative? How about icmp stride optimization? + bool UsePostInc = !(e > 1 && LatchExit && ExitingBlock != LatchBlock); + if (UsePostInc && ExitingBlock != LatchBlock) { + if (!Cond->hasOneUse()) + // See below, we don't want the condition to be cloned. + UsePostInc = false; + else { + // If exiting block is the latch block, we know it's safe and profitable + // to transform the icmp to use post-inc iv. Otherwise do so only if it + // would not reuse another iv and its iv would be reused by other uses. + // We are optimizing for the case where the icmp is the only use of the + // iv. + IVUsersOfOneStride &StrideUses = *IU->IVUsesByStride[CondStride]; + for (ilist<IVStrideUse>::iterator I = StrideUses.Users.begin(), + E = StrideUses.Users.end(); I != E; ++I) { + if (I->getUser() == Cond) + continue; + if (!I->isUseOfPostIncrementedValue()) { + UsePostInc = false; + break; + } + } + } + + // If iv for the stride might be shared and any of the users use pre-inc + // iv might be used, then it's not safe to use post-inc iv. + if (UsePostInc && + isa<SCEVConstant>(CondStride) && + StrideMightBeShared(CondStride, L, true)) + UsePostInc = false; } - } - // If we get to here, we know that we can transform the setcc instruction to - // use the post-incremented version of the IV, allowing us to coalesce the - // live ranges for the IV correctly. - CondUse->setOffset(SE->getMinusSCEV(CondUse->getOffset(), *CondStride)); - CondUse->setIsUseOfPostIncrementedValue(true); - Changed = true; + // If the trip count is computed in terms of a max (due to ScalarEvolution + // being unable to find a sufficient guard, for example), change the loop + // comparison to use SLT or ULT instead of NE. + Cond = OptimizeMax(L, Cond, CondUse); + + // If possible, change stride and operands of the compare instruction to + // eliminate one stride. However, avoid rewriting the compare instruction + // with an iv of new stride if it's likely the new stride uses will be + // rewritten using the stride of the compare instruction. + if (ExitingBlock == LatchBlock && isa<SCEVConstant>(CondStride)) { + // If the condition stride is a constant and it's the only use, we might + // want to optimize it first by turning it to count toward zero. + if (!StrideMightBeShared(CondStride, L, false) && + !ShouldCountToZero(Cond, CondUse, SE, L, TLI)) + Cond = ChangeCompareStride(L, Cond, CondUse, CondStride); + } - ++NumLoopCond; -} + if (!UsePostInc) + continue; -/// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for deciding -/// when to exit the loop is used only for that purpose, try to rearrange things -/// so it counts down to a test against zero. -void LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) { + DEBUG(errs() << " Change loop exiting icmp to use postinc iv: " + << *Cond << '\n'); - // If the number of times the loop is executed isn't computable, give up. - const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); - if (isa<SCEVCouldNotCompute>(BackedgeTakenCount)) - return; + // It's possible for the setcc instruction to be anywhere in the loop, and + // possible for it to have multiple users. If it is not immediately before + // the exiting block branch, move it. + if (&*++BasicBlock::iterator(Cond) != (Instruction*)TermBr) { + if (Cond->hasOneUse()) { // Condition has a single use, just move it. + Cond->moveBefore(TermBr); + } else { + // Otherwise, clone the terminating condition and insert into the + // loopend. + Cond = cast<ICmpInst>(Cond->clone()); + Cond->setName(L->getHeader()->getName() + ".termcond"); + ExitingBlock->getInstList().insert(TermBr, Cond); + + // Clone the IVUse, as the old use still exists! + IU->IVUsesByStride[CondStride]->addUser(CondUse->getOffset(), Cond, + CondUse->getOperandValToReplace()); + CondUse = &IU->IVUsesByStride[CondStride]->Users.back(); + } + } - // Get the terminating condition for the loop if possible (this isn't - // necessarily in the latch, or a block that's a predecessor of the header). - if (!L->getExitBlock()) - return; // More than one loop exit blocks. + // If we get to here, we know that we can transform the setcc instruction to + // use the post-incremented version of the IV, allowing us to coalesce the + // live ranges for the IV correctly. + CondUse->setOffset(SE->getMinusSCEV(CondUse->getOffset(), CondStride)); + CondUse->setIsUseOfPostIncrementedValue(true); + Changed = true; - // Okay, there is one exit block. Try to find the condition that causes the - // loop to be exited. - BasicBlock *ExitingBlock = L->getExitingBlock(); - if (!ExitingBlock) - return; // More than one block exiting! + ++NumLoopCond; + } +} - // Okay, we've computed the exiting block. See what condition causes us to - // exit. - // - // FIXME: we should be able to handle switch instructions (with a single exit) - BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator()); - if (TermBr == 0) return; - assert(TermBr->isConditional() && "If unconditional, it can't be in loop!"); - if (!isa<ICmpInst>(TermBr->getCondition())) - return; - ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition()); +bool LoopStrengthReduce::OptimizeLoopCountIVOfStride(const SCEV* &Stride, + IVStrideUse* &CondUse, + Loop *L) { + // If the only use is an icmp of a loop exiting conditional branch, then + // attempt the optimization. + BasedUser User = BasedUser(*CondUse, SE); + assert(isa<ICmpInst>(User.Inst) && "Expecting an ICMPInst!"); + ICmpInst *Cond = cast<ICmpInst>(User.Inst); + + // Less strict check now that compare stride optimization is done. + if (!ShouldCountToZero(Cond, CondUse, SE, L)) + return false; - // Handle only tests for equality for the moment, and only stride 1. - if (Cond->getPredicate() != CmpInst::ICMP_EQ) - return; - const SCEV *IV = SE->getSCEV(Cond->getOperand(0)); - const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV); - const SCEV *One = SE->getIntegerSCEV(1, BackedgeTakenCount->getType()); - if (!AR || !AR->isAffine() || AR->getStepRecurrence(*SE) != One) - return; - // If the RHS of the comparison is defined inside the loop, the rewrite - // cannot be done. - if (Instruction *CR = dyn_cast<Instruction>(Cond->getOperand(1))) - if (L->contains(CR->getParent())) - return; + Value *CondOp0 = Cond->getOperand(0); + PHINode *PHIExpr = dyn_cast<PHINode>(CondOp0); + Instruction *Incr; + if (!PHIExpr) { + // Value tested is postinc. Find the phi node. + Incr = dyn_cast<BinaryOperator>(CondOp0); + // FIXME: Just use User.OperandValToReplace here? + if (!Incr || Incr->getOpcode() != Instruction::Add) + return false; - // Make sure the IV is only used for counting. Value may be preinc or - // postinc; 2 uses in either case. - if (!Cond->getOperand(0)->hasNUses(2)) - return; - PHINode *phi = dyn_cast<PHINode>(Cond->getOperand(0)); - Instruction *incr; - if (phi && phi->getParent()==L->getHeader()) { - // value tested is preinc. Find the increment. - // A CmpInst is not a BinaryOperator; we depend on this. - Instruction::use_iterator UI = phi->use_begin(); - incr = dyn_cast<BinaryOperator>(UI); - if (!incr) - incr = dyn_cast<BinaryOperator>(++UI); - // 1 use for postinc value, the phi. Unnecessarily conservative? - if (!incr || !incr->hasOneUse() || incr->getOpcode()!=Instruction::Add) - return; - } else { - // Value tested is postinc. Find the phi node. - incr = dyn_cast<BinaryOperator>(Cond->getOperand(0)); - if (!incr || incr->getOpcode()!=Instruction::Add) - return; - - Instruction::use_iterator UI = Cond->getOperand(0)->use_begin(); - phi = dyn_cast<PHINode>(UI); - if (!phi) - phi = dyn_cast<PHINode>(++UI); + PHIExpr = dyn_cast<PHINode>(Incr->getOperand(0)); + if (!PHIExpr) + return false; // 1 use for preinc value, the increment. - if (!phi || phi->getParent()!=L->getHeader() || !phi->hasOneUse()) - return; + if (!PHIExpr->hasOneUse()) + return false; + } else { + assert(isa<PHINode>(CondOp0) && + "Unexpected loop exiting counting instruction sequence!"); + PHIExpr = cast<PHINode>(CondOp0); + // Value tested is preinc. Find the increment. + // A CmpInst is not a BinaryOperator; we depend on this. + Instruction::use_iterator UI = PHIExpr->use_begin(); + Incr = dyn_cast<BinaryOperator>(UI); + if (!Incr) + Incr = dyn_cast<BinaryOperator>(++UI); + // One use for postinc value, the phi. Unnecessarily conservative? + if (!Incr || !Incr->hasOneUse() || Incr->getOpcode() != Instruction::Add) + return false; } // Replace the increment with a decrement. - BinaryOperator *decr = - BinaryOperator::Create(Instruction::Sub, incr->getOperand(0), - incr->getOperand(1), "tmp", incr); - incr->replaceAllUsesWith(decr); - incr->eraseFromParent(); + DEBUG(errs() << "LSR: Examining use "); + DEBUG(WriteAsOperand(errs(), CondOp0, /*PrintType=*/false)); + DEBUG(errs() << " in Inst: " << *Cond << '\n'); + BinaryOperator *Decr = BinaryOperator::Create(Instruction::Sub, + Incr->getOperand(0), Incr->getOperand(1), "tmp", Incr); + Incr->replaceAllUsesWith(Decr); + Incr->eraseFromParent(); // Substitute endval-startval for the original startval, and 0 for the - // original endval. Since we're only testing for equality this is OK even + // original endval. Since we're only testing for equality this is OK even // if the computation wraps around. BasicBlock *Preheader = L->getLoopPreheader(); Instruction *PreInsertPt = Preheader->getTerminator(); - int inBlock = L->contains(phi->getIncomingBlock(0)) ? 1 : 0; - Value *startVal = phi->getIncomingValue(inBlock); - Value *endVal = Cond->getOperand(1); - // FIXME check for case where both are constant + unsigned InBlock = L->contains(PHIExpr->getIncomingBlock(0)) ? 1 : 0; + Value *StartVal = PHIExpr->getIncomingValue(InBlock); + Value *EndVal = Cond->getOperand(1); + DEBUG(errs() << " Optimize loop counting iv to count down [" + << *EndVal << " .. " << *StartVal << "]\n"); + + // FIXME: check for case where both are constant. Constant* Zero = ConstantInt::get(Cond->getOperand(1)->getType(), 0); - BinaryOperator *NewStartVal = - BinaryOperator::Create(Instruction::Sub, endVal, startVal, - "tmp", PreInsertPt); - phi->setIncomingValue(inBlock, NewStartVal); + BinaryOperator *NewStartVal = BinaryOperator::Create(Instruction::Sub, + EndVal, StartVal, "tmp", PreInsertPt); + PHIExpr->setIncomingValue(InBlock, NewStartVal); Cond->setOperand(1, Zero); + DEBUG(errs() << " New icmp: " << *Cond << "\n"); + + int64_t SInt = cast<SCEVConstant>(Stride)->getValue()->getSExtValue(); + const SCEV *NewStride = 0; + bool Found = false; + for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) { + const SCEV *OldStride = IU->StrideOrder[i]; + if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(OldStride)) + if (SC->getValue()->getSExtValue() == -SInt) { + Found = true; + NewStride = OldStride; + break; + } + } + + if (!Found) + NewStride = SE->getIntegerSCEV(-SInt, Stride->getType()); + IU->AddUser(NewStride, CondUse->getOffset(), Cond, Cond->getOperand(0)); + IU->IVUsesByStride[Stride]->removeUser(CondUse); + + CondUse = &IU->IVUsesByStride[NewStride]->Users.back(); + Stride = NewStride; - Changed = true; + ++NumCountZero; + + return true; } -bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) { +/// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for deciding +/// when to exit the loop is used only for that purpose, try to rearrange things +/// so it counts down to a test against zero. +bool LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) { + bool ThisChanged = false; + for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) { + const SCEV *Stride = IU->StrideOrder[i]; + std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI = + IU->IVUsesByStride.find(Stride); + assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!"); + // FIXME: Generalize to non-affine IV's. + if (!SI->first->isLoopInvariant(L)) + continue; + // If stride is a constant and it has an icmpinst use, check if we can + // optimize the loop to count down. + if (isa<SCEVConstant>(Stride) && SI->second->Users.size() == 1) { + Instruction *User = SI->second->Users.begin()->getUser(); + if (!isa<ICmpInst>(User)) + continue; + const SCEV *CondStride = Stride; + IVStrideUse *Use = &*SI->second->Users.begin(); + if (!OptimizeLoopCountIVOfStride(CondStride, Use, L)) + continue; + ThisChanged = true; + // Now check if it's possible to reuse this iv for other stride uses. + for (unsigned j = 0, ee = IU->StrideOrder.size(); j != ee; ++j) { + const SCEV *SStride = IU->StrideOrder[j]; + if (SStride == CondStride) + continue; + std::map<const SCEV *, IVUsersOfOneStride *>::iterator SII = + IU->IVUsesByStride.find(SStride); + assert(SII != IU->IVUsesByStride.end() && "Stride doesn't exist!"); + // FIXME: Generalize to non-affine IV's. + if (!SII->first->isLoopInvariant(L)) + continue; + // FIXME: Rewrite other stride using CondStride. + } + } + } + + Changed |= ThisChanged; + return ThisChanged; +} + +bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) { IU = &getAnalysis<IVUsers>(); LI = &getAnalysis<LoopInfo>(); DT = &getAnalysis<DominatorTree>(); SE = &getAnalysis<ScalarEvolution>(); Changed = false; + // If LoopSimplify form is not available, stay out of trouble. + if (!L->getLoopPreheader() || !L->getLoopLatch()) + return false; + if (!IU->IVUsesByStride.empty()) { DEBUG(errs() << "\nLSR on \"" << L->getHeader()->getParent()->getName() << "\" "; @@ -2545,7 +2771,7 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) { // Change loop terminating condition to use the postinc iv when possible // and optimize loop terminating compare. FIXME: Move this after - // StrengthReduceStridedIVUsers? + // StrengthReduceIVUsersOfStride? OptimizeLoopTermCond(L); // FIXME: We can shrink overlarge IV's here. e.g. if the code has @@ -2561,26 +2787,12 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) { // IVsByStride keeps IVs for one particular loop. assert(IVsByStride.empty() && "Stale entries in IVsByStride?"); - // Note: this processes each stride/type pair individually. All users - // passed into StrengthReduceStridedIVUsers have the same type AND stride. - // Also, note that we iterate over IVUsesByStride indirectly by using - // StrideOrder. This extra layer of indirection makes the ordering of - // strides deterministic - not dependent on map order. - for (unsigned Stride = 0, e = IU->StrideOrder.size(); - Stride != e; ++Stride) { - std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI = - IU->IVUsesByStride.find(IU->StrideOrder[Stride]); - assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!"); - // FIXME: Generalize to non-affine IV's. - if (!SI->first->isLoopInvariant(L)) - continue; - StrengthReduceStridedIVUsers(SI->first, *SI->second, L); - } - } + StrengthReduceIVUsers(L); - // After all sharing is done, see if we can adjust the loop to test against - // zero instead of counting up to a maximum. This is usually faster. - OptimizeLoopCountIV(L); + // After all sharing is done, see if we can adjust the loop to test against + // zero instead of counting up to a maximum. This is usually faster. + OptimizeLoopCountIV(L); + } // We're done analyzing this loop; release all the state we built up for it. IVsByStride.clear(); diff --git a/lib/Transforms/Scalar/LoopUnroll.cpp b/lib/Transforms/Scalar/LoopUnroll.cpp deleted file mode 100644 index 837ec59..0000000 --- a/lib/Transforms/Scalar/LoopUnroll.cpp +++ /dev/null @@ -1,177 +0,0 @@ -//===-- LoopUnroll.cpp - Loop unroller pass -------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass implements a simple loop unroller. It works best when loops have -// been canonicalized by the -indvars pass, allowing it to determine the trip -// counts of loops easily. -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "loop-unroll" -#include "llvm/IntrinsicInst.h" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/LoopPass.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Utils/UnrollLoop.h" -#include <climits> - -using namespace llvm; - -static cl::opt<unsigned> -UnrollThreshold("unroll-threshold", cl::init(100), cl::Hidden, - cl::desc("The cut-off point for automatic loop unrolling")); - -static cl::opt<unsigned> -UnrollCount("unroll-count", cl::init(0), cl::Hidden, - cl::desc("Use this unroll count for all loops, for testing purposes")); - -static cl::opt<bool> -UnrollAllowPartial("unroll-allow-partial", cl::init(false), cl::Hidden, - cl::desc("Allows loops to be partially unrolled until " - "-unroll-threshold loop size is reached.")); - -namespace { - class LoopUnroll : public LoopPass { - public: - static char ID; // Pass ID, replacement for typeid - LoopUnroll() : LoopPass(&ID) {} - - /// A magic value for use with the Threshold parameter to indicate - /// that the loop unroll should be performed regardless of how much - /// code expansion would result. - static const unsigned NoThreshold = UINT_MAX; - - bool runOnLoop(Loop *L, LPPassManager &LPM); - - /// This transformation requires natural loop information & requires that - /// loop preheaders be inserted into the CFG... - /// - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequiredID(LoopSimplifyID); - AU.addRequiredID(LCSSAID); - AU.addRequired<LoopInfo>(); - AU.addPreservedID(LCSSAID); - AU.addPreserved<LoopInfo>(); - // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info. - // If loop unroll does not preserve dom info then LCSSA pass on next - // loop will receive invalid dom info. - // For now, recreate dom info, if loop is unrolled. - AU.addPreserved<DominatorTree>(); - AU.addPreserved<DominanceFrontier>(); - } - }; -} - -char LoopUnroll::ID = 0; -static RegisterPass<LoopUnroll> X("loop-unroll", "Unroll loops"); - -Pass *llvm::createLoopUnrollPass() { return new LoopUnroll(); } - -/// ApproximateLoopSize - Approximate the size of the loop. -static unsigned ApproximateLoopSize(const Loop *L) { - unsigned Size = 0; - for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); - I != E; ++I) { - BasicBlock *BB = *I; - Instruction *Term = BB->getTerminator(); - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { - if (isa<PHINode>(I) && BB == L->getHeader()) { - // Ignore PHI nodes in the header. - } else if (I->hasOneUse() && I->use_back() == Term) { - // Ignore instructions only used by the loop terminator. - } else if (isa<DbgInfoIntrinsic>(I)) { - // Ignore debug instructions - } else if (isa<GetElementPtrInst>(I) && I->hasOneUse()) { - // Ignore GEP as they generally are subsumed into a load or store. - } else if (isa<CallInst>(I)) { - // Estimate size overhead introduced by call instructions which - // is higher than other instructions. Here 3 and 10 are magic - // numbers that help one isolated test case from PR2067 without - // negatively impacting measured benchmarks. - Size += isa<IntrinsicInst>(I) ? 3 : 10; - } else { - ++Size; - } - - // TODO: Ignore expressions derived from PHI and constants if inval of phi - // is a constant, or if operation is associative. This will get induction - // variables. - } - } - - return Size; -} - -bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { - assert(L->isLCSSAForm()); - LoopInfo *LI = &getAnalysis<LoopInfo>(); - - BasicBlock *Header = L->getHeader(); - DEBUG(errs() << "Loop Unroll: F[" << Header->getParent()->getName() - << "] Loop %" << Header->getName() << "\n"); - (void)Header; - - // Find trip count - unsigned TripCount = L->getSmallConstantTripCount(); - unsigned Count = UnrollCount; - - // Automatically select an unroll count. - if (Count == 0) { - // Conservative heuristic: if we know the trip count, see if we can - // completely unroll (subject to the threshold, checked below); otherwise - // try to find greatest modulo of the trip count which is still under - // threshold value. - if (TripCount == 0) - return false; - Count = TripCount; - } - - // Enforce the threshold. - if (UnrollThreshold != NoThreshold) { - unsigned LoopSize = ApproximateLoopSize(L); - DEBUG(errs() << " Loop Size = " << LoopSize << "\n"); - uint64_t Size = (uint64_t)LoopSize*Count; - if (TripCount != 1 && Size > UnrollThreshold) { - DEBUG(errs() << " Too large to fully unroll with count: " << Count - << " because size: " << Size << ">" << UnrollThreshold << "\n"); - if (!UnrollAllowPartial) { - DEBUG(errs() << " will not try to unroll partially because " - << "-unroll-allow-partial not given\n"); - return false; - } - // Reduce unroll count to be modulo of TripCount for partial unrolling - Count = UnrollThreshold / LoopSize; - while (Count != 0 && TripCount%Count != 0) { - Count--; - } - if (Count < 2) { - DEBUG(errs() << " could not unroll partially\n"); - return false; - } - DEBUG(errs() << " partially unrolling with count: " << Count << "\n"); - } - } - - // Unroll the loop. - Function *F = L->getHeader()->getParent(); - if (!UnrollLoop(L, Count, LI, &LPM)) - return false; - - // FIXME: Reconstruct dom info, because it is not preserved properly. - DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>(); - if (DT) { - DT->runOnFunction(*F); - DominanceFrontier *DF = getAnalysisIfAvailable<DominanceFrontier>(); - if (DF) - DF->runOnFunction(*F); - } - return true; -} diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index c7b00da..38d267a 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -32,7 +32,6 @@ #include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/Instructions.h" -#include "llvm/LLVMContext.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/LoopInfo.h" @@ -407,6 +406,10 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val){ initLoopData(); Function *F = loopHeader->getParent(); + // If LoopSimplify was unable to form a preheader, don't do any unswitching. + if (!loopPreheader) + return false; + // If the condition is trivial, always unswitch. There is no code growth for // this case. if (!IsTrivialUnswitchCondition(LoopCond)) { @@ -957,7 +960,7 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) { Worklist.pop_back(); // Simple constant folding. - if (Constant *C = ConstantFoldInstruction(I, I->getContext())) { + if (Constant *C = ConstantFoldInstruction(I)) { ReplaceUsesOfWith(I, C, Worklist, L, LPM); continue; } diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index af29f97..8466918 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -27,7 +27,6 @@ #include "llvm/Function.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" #include "llvm/Pass.h" #include "llvm/Assembly/Writer.h" #include "llvm/Support/CFG.h" @@ -198,8 +197,7 @@ static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) { /// LowerNegateToMultiply - Replace 0-X with X*-1. /// static Instruction *LowerNegateToMultiply(Instruction *Neg, - std::map<AssertingVH<>, unsigned> &ValueRankMap, - LLVMContext &Context) { + std::map<AssertingVH<>, unsigned> &ValueRankMap) { Constant *Cst = Constant::getAllOnesValue(Neg->getType()); Instruction *Res = BinaryOperator::CreateMul(Neg->getOperand(1), Cst, "",Neg); @@ -255,7 +253,6 @@ void Reassociate::LinearizeExprTree(BinaryOperator *I, std::vector<ValueEntry> &Ops) { Value *LHS = I->getOperand(0), *RHS = I->getOperand(1); unsigned Opcode = I->getOpcode(); - LLVMContext &Context = I->getContext(); // First step, linearize the expression if it is in ((A+B)+(C+D)) form. BinaryOperator *LHSBO = isReassociableOp(LHS, Opcode); @@ -265,13 +262,11 @@ void Reassociate::LinearizeExprTree(BinaryOperator *I, // transform them into multiplies by -1 so they can be reassociated. if (I->getOpcode() == Instruction::Mul) { if (!LHSBO && LHS->hasOneUse() && BinaryOperator::isNeg(LHS)) { - LHS = LowerNegateToMultiply(cast<Instruction>(LHS), - ValueRankMap, Context); + LHS = LowerNegateToMultiply(cast<Instruction>(LHS), ValueRankMap); LHSBO = isReassociableOp(LHS, Opcode); } if (!RHSBO && RHS->hasOneUse() && BinaryOperator::isNeg(RHS)) { - RHS = LowerNegateToMultiply(cast<Instruction>(RHS), - ValueRankMap, Context); + RHS = LowerNegateToMultiply(cast<Instruction>(RHS), ValueRankMap); RHSBO = isReassociableOp(RHS, Opcode); } } @@ -373,7 +368,7 @@ void Reassociate::RewriteExprTree(BinaryOperator *I, // version of the value is returned, and BI is left pointing at the instruction // that should be processed next by the reassociation pass. // -static Value *NegateValue(LLVMContext &Context, Value *V, Instruction *BI) { +static Value *NegateValue(Value *V, Instruction *BI) { // We are trying to expose opportunity for reassociation. One of the things // that we want to do to achieve this is to push a negation as deep into an // expression chain as possible, to expose the add instructions. In practice, @@ -386,8 +381,8 @@ static Value *NegateValue(LLVMContext &Context, Value *V, Instruction *BI) { if (Instruction *I = dyn_cast<Instruction>(V)) if (I->getOpcode() == Instruction::Add && I->hasOneUse()) { // Push the negates through the add. - I->setOperand(0, NegateValue(Context, I->getOperand(0), BI)); - I->setOperand(1, NegateValue(Context, I->getOperand(1), BI)); + I->setOperand(0, NegateValue(I->getOperand(0), BI)); + I->setOperand(1, NegateValue(I->getOperand(1), BI)); // We must move the add instruction here, because the neg instructions do // not dominate the old add instruction in general. By moving it, we are @@ -407,7 +402,7 @@ static Value *NegateValue(LLVMContext &Context, Value *V, Instruction *BI) { /// ShouldBreakUpSubtract - Return true if we should break up this subtract of /// X-Y into (X + -Y). -static bool ShouldBreakUpSubtract(LLVMContext &Context, Instruction *Sub) { +static bool ShouldBreakUpSubtract(Instruction *Sub) { // If this is a negation, we can't split it up! if (BinaryOperator::isNeg(Sub)) return false; @@ -431,7 +426,7 @@ static bool ShouldBreakUpSubtract(LLVMContext &Context, Instruction *Sub) { /// BreakUpSubtract - If we have (X-Y), and if either X is an add, or if this is /// only used by an add, transform this into (X+(0-Y)) to promote better /// reassociation. -static Instruction *BreakUpSubtract(LLVMContext &Context, Instruction *Sub, +static Instruction *BreakUpSubtract(Instruction *Sub, std::map<AssertingVH<>, unsigned> &ValueRankMap) { // Convert a subtract into an add and a neg instruction... so that sub // instructions can be commuted with other add instructions... @@ -439,7 +434,7 @@ static Instruction *BreakUpSubtract(LLVMContext &Context, Instruction *Sub, // Calculate the negative value of Operand 1 of the sub instruction... // and set it as the RHS of the add instruction we just made... // - Value *NegVal = NegateValue(Context, Sub->getOperand(1), Sub); + Value *NegVal = NegateValue(Sub->getOperand(1), Sub); Instruction *New = BinaryOperator::CreateAdd(Sub->getOperand(0), NegVal, "", Sub); New->takeName(Sub); @@ -457,8 +452,7 @@ static Instruction *BreakUpSubtract(LLVMContext &Context, Instruction *Sub, /// by one, change this into a multiply by a constant to assist with further /// reassociation. static Instruction *ConvertShiftToMul(Instruction *Shl, - std::map<AssertingVH<>, unsigned> &ValueRankMap, - LLVMContext &Context) { + std::map<AssertingVH<>, unsigned> &ValueRankMap) { // If an operand of this shift is a reassociable multiply, or if the shift // is used by a reassociable multiply or add, turn into a multiply. if (isReassociableOp(Shl->getOperand(0), Instruction::Mul) || @@ -781,13 +775,11 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I, /// ReassociateBB - Inspect all of the instructions in this basic block, /// reassociating them as we go. void Reassociate::ReassociateBB(BasicBlock *BB) { - LLVMContext &Context = BB->getContext(); - for (BasicBlock::iterator BBI = BB->begin(); BBI != BB->end(); ) { Instruction *BI = BBI++; if (BI->getOpcode() == Instruction::Shl && isa<ConstantInt>(BI->getOperand(1))) - if (Instruction *NI = ConvertShiftToMul(BI, ValueRankMap, Context)) { + if (Instruction *NI = ConvertShiftToMul(BI, ValueRankMap)) { MadeChange = true; BI = NI; } @@ -800,8 +792,8 @@ void Reassociate::ReassociateBB(BasicBlock *BB) { // If this is a subtract instruction which is not already in negate form, // see if we can convert it to X+-Y. if (BI->getOpcode() == Instruction::Sub) { - if (ShouldBreakUpSubtract(Context, BI)) { - BI = BreakUpSubtract(Context, BI, ValueRankMap); + if (ShouldBreakUpSubtract(BI)) { + BI = BreakUpSubtract(BI, ValueRankMap); MadeChange = true; } else if (BinaryOperator::isNeg(BI)) { // Otherwise, this is a negation. See if the operand is a multiply tree @@ -809,7 +801,7 @@ void Reassociate::ReassociateBB(BasicBlock *BB) { if (isReassociableOp(BI->getOperand(1), Instruction::Mul) && (!BI->hasOneUse() || !isReassociableOp(BI->use_back(), Instruction::Mul))) { - BI = LowerNegateToMultiply(BI, ValueRankMap, Context); + BI = LowerNegateToMultiply(BI, ValueRankMap); MadeChange = true; } } diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index 509a6db..c202a2c 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -795,9 +795,14 @@ void SCCPSolver::visitExtractValueInst(ExtractValueInst &EVI) { return markOverdefined(&EVI); Value *AggVal = EVI.getAggregateOperand(); - unsigned i = *EVI.idx_begin(); - LatticeVal EltVal = getStructValueState(AggVal, i); - mergeInValue(getValueState(&EVI), &EVI, EltVal); + if (isa<StructType>(AggVal->getType())) { + unsigned i = *EVI.idx_begin(); + LatticeVal EltVal = getStructValueState(AggVal, i); + mergeInValue(getValueState(&EVI), &EVI, EltVal); + } else { + // Otherwise, must be extracting from an array. + return markOverdefined(&EVI); + } } void SCCPSolver::visitInsertValueInst(InsertValueInst &IVI) { diff --git a/lib/Transforms/Scalar/SCCVN.cpp b/lib/Transforms/Scalar/SCCVN.cpp index c047fca..001267a 100644 --- a/lib/Transforms/Scalar/SCCVN.cpp +++ b/lib/Transforms/Scalar/SCCVN.cpp @@ -507,7 +507,7 @@ void ValueTable::erase(Value *V) { /// verifyRemoved - Verify that the value is removed from all internal data /// structures. void ValueTable::verifyRemoved(const Value *V) const { - for (DenseMap<Value*, uint32_t>::iterator + for (DenseMap<Value*, uint32_t>::const_iterator I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) { assert(I->first != V && "Inst still occurs in value numbering map!"); } @@ -629,9 +629,6 @@ bool SCCVN::runOnFunction(Function& F) { } } - // FIXME: This code is commented out for now, because it can lead to the - // insertion of a lot of redundant PHIs being inserted by SSAUpdater. -#if 0 // Perform a forward data-flow to compute availability at all points on // the CFG. do { @@ -709,7 +706,6 @@ bool SCCVN::runOnFunction(Function& F) { CurInst->eraseFromParent(); } } -#endif VT.clear(); for (DenseMap<BasicBlock*, ValueNumberScope*>::iterator diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp index 5669da0..b54565c 100644 --- a/lib/Transforms/Scalar/Scalar.cpp +++ b/lib/Transforms/Scalar/Scalar.cpp @@ -26,10 +26,6 @@ void LLVMAddCFGSimplificationPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createCFGSimplificationPass()); } -void LLVMAddCondPropagationPass(LLVMPassManagerRef PM) { - unwrap(PM)->add(createCondPropagationPass()); -} - void LLVMAddDeadStoreEliminationPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createDeadStoreEliminationPass()); } diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index 575c93b..611505e 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -100,7 +100,7 @@ public: /// EmitPutChar - Emit a call to the putchar function. This assumes that Char /// is an integer. - void EmitPutChar(Value *Char, IRBuilder<> &B); + Value *EmitPutChar(Value *Char, IRBuilder<> &B); /// EmitPutS - Emit a call to the puts function. This assumes that Str is /// some pointer. @@ -252,18 +252,20 @@ Value *LibCallOptimization::EmitUnaryFloatFnCall(Value *Op, const char *Name, /// EmitPutChar - Emit a call to the putchar function. This assumes that Char /// is an integer. -void LibCallOptimization::EmitPutChar(Value *Char, IRBuilder<> &B) { +Value *LibCallOptimization::EmitPutChar(Value *Char, IRBuilder<> &B) { Module *M = Caller->getParent(); Value *PutChar = M->getOrInsertFunction("putchar", Type::getInt32Ty(*Context), Type::getInt32Ty(*Context), NULL); CallInst *CI = B.CreateCall(PutChar, B.CreateIntCast(Char, Type::getInt32Ty(*Context), + /*isSigned*/true, "chari"), "putchar"); if (const Function *F = dyn_cast<Function>(PutChar->stripPointerCasts())) CI->setCallingConv(F->getCallingConv()); + return CI; } /// EmitPutS - Emit a call to the puts function. This assumes that Str is @@ -302,7 +304,8 @@ void LibCallOptimization::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B) { Type::getInt32Ty(*Context), Type::getInt32Ty(*Context), File->getType(), NULL); - Char = B.CreateIntCast(Char, Type::getInt32Ty(*Context), "chari"); + Char = B.CreateIntCast(Char, Type::getInt32Ty(*Context), /*isSigned*/true, + "chari"); CallInst *CI = B.CreateCall2(F, Char, File, "fputc"); if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) @@ -955,6 +958,17 @@ struct MemCmpOpt : public LibCallOptimization { return B.CreateZExt(B.CreateXor(LHSV, RHSV, "shortdiff"), CI->getType()); } + // Constant folding: memcmp(x, y, l) -> cnst (all arguments are constant) + std::string LHSStr, RHSStr; + if (GetConstantStringInfo(LHS, LHSStr) && + GetConstantStringInfo(RHS, RHSStr)) { + // Make sure we're not reading out-of-bounds memory. + if (Len > LHSStr.length() || Len > RHSStr.length()) + return 0; + uint64_t Ret = memcmp(LHSStr.data(), RHSStr.data(), Len); + return ConstantInt::get(CI->getType(), Ret); + } + return 0; } }; @@ -1314,11 +1328,13 @@ struct PrintFOpt : public LibCallOptimization { return CI->use_empty() ? (Value*)CI : ConstantInt::get(CI->getType(), 0); - // printf("x") -> putchar('x'), even for '%'. + // printf("x") -> putchar('x'), even for '%'. Return the result of putchar + // in case there is an error writing to stdout. if (FormatStr.size() == 1) { - EmitPutChar(ConstantInt::get(Type::getInt32Ty(*Context), FormatStr[0]), B); - return CI->use_empty() ? (Value*)CI : - ConstantInt::get(CI->getType(), 1); + Value *Res = EmitPutChar(ConstantInt::get(Type::getInt32Ty(*Context), + FormatStr[0]), B); + if (CI->use_empty()) return CI; + return B.CreateIntCast(Res, CI->getType(), true); } // printf("foo\n") --> puts("foo") @@ -1339,9 +1355,10 @@ struct PrintFOpt : public LibCallOptimization { // printf("%c", chr) --> putchar(*(i8*)dst) if (FormatStr == "%c" && CI->getNumOperands() > 2 && isa<IntegerType>(CI->getOperand(2)->getType())) { - EmitPutChar(CI->getOperand(2), B); - return CI->use_empty() ? (Value*)CI : - ConstantInt::get(CI->getType(), 1); + Value *Res = EmitPutChar(CI->getOperand(2), B); + + if (CI->use_empty()) return CI; + return B.CreateIntCast(Res, CI->getType(), true); } // printf("%s\n", str) --> puts(str) @@ -2479,10 +2496,6 @@ bool SimplifyLibCalls::doInitialization(Module &M) { // lround, lroundf, lroundl: // * lround(cnst) -> cnst' // -// memcmp: -// * memcmp(x,y,l) -> cnst -// (if all arguments are constant and strlen(x) <= l and strlen(y) <= l) -// // pow, powf, powl: // * pow(exp(x),y) -> exp(x*y) // * pow(sqrt(x),y) -> pow(x,y*0.5) diff --git a/lib/Transforms/Scalar/TailDuplication.cpp b/lib/Transforms/Scalar/TailDuplication.cpp index 4864e23..b06ae3d 100644 --- a/lib/Transforms/Scalar/TailDuplication.cpp +++ b/lib/Transforms/Scalar/TailDuplication.cpp @@ -359,8 +359,7 @@ void TailDup::eliminateUnconditionalBranch(BranchInst *Branch) { Instruction *Inst = BI++; if (isInstructionTriviallyDead(Inst)) Inst->eraseFromParent(); - else if (Constant *C = ConstantFoldInstruction(Inst, - Inst->getContext())) { + else if (Constant *C = ConstantFoldInstruction(Inst)) { Inst->replaceAllUsesWith(C); Inst->eraseFromParent(); } diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp index b56e170..4119cb9 100644 --- a/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -25,7 +25,7 @@ // unlikely, that the return returns something else (like constant 0), and // can still be TRE'd. It can be TRE'd if ALL OTHER return instructions in // the function return the exact same value. -// 4. If it can prove that callees do not access theier caller stack frame, +// 4. If it can prove that callees do not access their caller stack frame, // they are marked as eligible for tail call elimination (by the code // generator). // @@ -58,6 +58,7 @@ #include "llvm/Function.h" #include "llvm/Instructions.h" #include "llvm/Pass.h" +#include "llvm/Analysis/CaptureTracking.h" #include "llvm/Support/CFG.h" #include "llvm/ADT/Statistic.h" using namespace llvm; @@ -75,7 +76,7 @@ namespace { private: bool ProcessReturningBlock(ReturnInst *RI, BasicBlock *&OldEntry, bool &TailCallsAreMarkedTail, - std::vector<PHINode*> &ArgumentPHIs, + SmallVector<PHINode*, 8> &ArgumentPHIs, bool CannotTailCallElimCallsMarkedTail); bool CanMoveAboveCall(Instruction *I, CallInst *CI); Value *CanTransformAccumulatorRecursion(Instruction *I, CallInst *CI); @@ -90,7 +91,6 @@ FunctionPass *llvm::createTailCallEliminationPass() { return new TailCallElim(); } - /// AllocaMightEscapeToCalls - Return true if this alloca may be accessed by /// callees of this function. We only do very simple analysis right now, this /// could be expanded in the future to use mod/ref information for particular @@ -100,7 +100,7 @@ static bool AllocaMightEscapeToCalls(AllocaInst *AI) { return true; } -/// FunctionContainsAllocas - Scan the specified basic block for alloca +/// CheckForEscapingAllocas - Scan the specified basic block for alloca /// instructions. If it contains any that might be accessed by calls, return /// true. static bool CheckForEscapingAllocas(BasicBlock *BB, @@ -127,7 +127,7 @@ bool TailCallElim::runOnFunction(Function &F) { BasicBlock *OldEntry = 0; bool TailCallsAreMarkedTail = false; - std::vector<PHINode*> ArgumentPHIs; + SmallVector<PHINode*, 8> ArgumentPHIs; bool MadeChange = false; bool FunctionContainsEscapingAllocas = false; @@ -154,7 +154,6 @@ bool TailCallElim::runOnFunction(Function &F) { /// happen. This bug is PR962. if (FunctionContainsEscapingAllocas) return false; - // Second pass, change any tail calls to loops. for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) @@ -204,7 +203,7 @@ bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) { if (I->mayHaveSideEffects()) // This also handles volatile loads. return false; - if (LoadInst* L = dyn_cast<LoadInst>(I)) { + if (LoadInst *L = dyn_cast<LoadInst>(I)) { // Loads may always be moved above calls without side effects. if (CI->mayHaveSideEffects()) { // Non-volatile loads may be moved above a call with side effects if it @@ -235,7 +234,7 @@ bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) { // We currently handle static constants and arguments that are not modified as // part of the recursion. // -static bool isDynamicConstant(Value *V, CallInst *CI) { +static bool isDynamicConstant(Value *V, CallInst *CI, ReturnInst *RI) { if (isa<Constant>(V)) return true; // Static constants are always dyn consts // Check to see if this is an immutable argument, if so, the value @@ -253,6 +252,15 @@ static bool isDynamicConstant(Value *V, CallInst *CI) { if (CI->getOperand(ArgNo+1) == Arg) return true; } + + // Switch cases are always constant integers. If the value is being switched + // on and the return is only reachable from one of its cases, it's + // effectively constant. + if (BasicBlock *UniquePred = RI->getParent()->getUniquePredecessor()) + if (SwitchInst *SI = dyn_cast<SwitchInst>(UniquePred->getTerminator())) + if (SI->getCondition() == V) + return SI->getDefaultDest() != RI->getParent(); + // Not a constant or immutable argument, we can't safely transform. return false; } @@ -265,10 +273,6 @@ static Value *getCommonReturnValue(ReturnInst *TheRI, CallInst *CI) { Function *F = TheRI->getParent()->getParent(); Value *ReturnedValue = 0; - // TODO: Handle multiple value ret instructions; - if (isa<StructType>(F->getReturnType())) - return 0; - for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI) if (ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator())) if (RI != TheRI) { @@ -278,7 +282,7 @@ static Value *getCommonReturnValue(ReturnInst *TheRI, CallInst *CI) { // evaluatable at the start of the initial invocation of the function, // instead of at the end of the evaluation. // - if (!isDynamicConstant(RetOp, CI)) + if (!isDynamicConstant(RetOp, CI, RI)) return 0; if (ReturnedValue && RetOp != ReturnedValue) @@ -315,7 +319,7 @@ Value *TailCallElim::CanTransformAccumulatorRecursion(Instruction *I, bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry, bool &TailCallsAreMarkedTail, - std::vector<PHINode*> &ArgumentPHIs, + SmallVector<PHINode*, 8> &ArgumentPHIs, bool CannotTailCallElimCallsMarkedTail) { BasicBlock *BB = Ret->getParent(); Function *F = BB->getParent(); |