diff options
author | rdivacky <rdivacky@FreeBSD.org> | 2009-10-14 17:57:32 +0000 |
---|---|---|
committer | rdivacky <rdivacky@FreeBSD.org> | 2009-10-14 17:57:32 +0000 |
commit | cd749a9c07f1de2fb8affde90537efa4bc3e7c54 (patch) | |
tree | b21f6de4e08b89bb7931806bab798fc2a5e3a686 /lib/Transforms/Scalar/JumpThreading.cpp | |
parent | 72621d11de5b873f1695f391eb95f0b336c3d2d4 (diff) | |
download | FreeBSD-src-cd749a9c07f1de2fb8affde90537efa4bc3e7c54.zip FreeBSD-src-cd749a9c07f1de2fb8affde90537efa4bc3e7c54.tar.gz |
Update llvm to r84119.
Diffstat (limited to 'lib/Transforms/Scalar/JumpThreading.cpp')
-rw-r--r-- | lib/Transforms/Scalar/JumpThreading.cpp | 550 |
1 files changed, 366 insertions, 184 deletions
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index dee7bfb..8b11edd 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -19,6 +19,7 @@ #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Target/TargetData.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Statistic.h" @@ -26,13 +27,13 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ValueHandle.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; STATISTIC(NumThreads, "Number of jumps threaded"); STATISTIC(NumFolds, "Number of terminators folded"); +STATISTIC(NumDupes, "Number of branch blocks duplicated to eliminate phi"); static cl::opt<unsigned> Threshold("jump-threading-threshold", @@ -56,7 +57,7 @@ namespace { /// In this case, the unconditional branch at the end of the first if can be /// revectored to the false side of the second if. /// - class VISIBILITY_HIDDEN JumpThreading : public FunctionPass { + class JumpThreading : public FunctionPass { TargetData *TD; #ifdef NDEBUG SmallPtrSet<BasicBlock*, 16> LoopHeaders; @@ -68,15 +69,16 @@ namespace { JumpThreading() : FunctionPass(&ID) {} virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<TargetData>(); } bool runOnFunction(Function &F); void FindLoopHeaders(Function &F); bool ProcessBlock(BasicBlock *BB); - bool ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, BasicBlock *SuccBB, - unsigned JumpThreadCost); + bool ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, BasicBlock *SuccBB); + bool DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, + BasicBlock *PredBB); + BasicBlock *FactorCommonPHIPreds(PHINode *PN, Value *Val); bool ProcessBranchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB); bool ProcessSwitchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB); @@ -99,8 +101,8 @@ FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); } /// runOnFunction - Top level algorithm. /// bool JumpThreading::runOnFunction(Function &F) { - DOUT << "Jump threading on function '" << F.getNameStart() << "'\n"; - TD = &getAnalysis<TargetData>(); + DEBUG(errs() << "Jump threading on function '" << F.getName() << "'\n"); + TD = getAnalysisIfAvailable<TargetData>(); FindLoopHeaders(F); @@ -119,8 +121,8 @@ bool JumpThreading::runOnFunction(Function &F) { // edges which simplifies the CFG. if (pred_begin(BB) == pred_end(BB) && BB != &BB->getParent()->getEntryBlock()) { - DOUT << " JT: Deleting dead block '" << BB->getNameStart() - << "' with terminator: " << *BB->getTerminator(); + DEBUG(errs() << " JT: Deleting dead block '" << BB->getName() + << "' with terminator: " << *BB->getTerminator() << '\n'); LoopHeaders.erase(BB); DeleteDeadBlock(BB); Changed = true; @@ -134,6 +136,48 @@ bool JumpThreading::runOnFunction(Function &F) { return EverChanged; } +/// getJumpThreadDuplicationCost - Return the cost of duplicating this block to +/// thread across it. +static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) { + /// Ignore PHI nodes, these will be flattened when duplication happens. + BasicBlock::const_iterator I = BB->getFirstNonPHI(); + + // Sum up the cost of each instruction until we get to the terminator. Don't + // include the terminator because the copy won't include it. + unsigned Size = 0; + for (; !isa<TerminatorInst>(I); ++I) { + // Debugger intrinsics don't incur code size. + if (isa<DbgInfoIntrinsic>(I)) continue; + + // If this is a pointer->pointer bitcast, it is free. + if (isa<BitCastInst>(I) && isa<PointerType>(I->getType())) + continue; + + // All other instructions count for at least one unit. + ++Size; + + // Calls are more expensive. If they are non-intrinsic calls, we model them + // as having cost of 4. If they are a non-vector intrinsic, we model them + // as having cost of 2 total, and if they are a vector intrinsic, we model + // them as having cost 1. + if (const CallInst *CI = dyn_cast<CallInst>(I)) { + if (!isa<IntrinsicInst>(CI)) + Size += 3; + else if (!isa<VectorType>(CI->getType())) + Size += 1; + } + } + + // Threading through a switch statement is particularly profitable. If this + // block ends in a switch, decrease its cost to make it more likely to happen. + if (isa<SwitchInst>(I)) + Size = Size > 6 ? Size-6 : 0; + + return Size; +} + + + /// FindLoopHeaders - We do not want jump threading to turn proper loop /// structures into irreducible loops. Doing this breaks up the loop nesting /// hierarchy and pessimizes later transformations. To prevent this from @@ -173,52 +217,34 @@ BasicBlock *JumpThreading::FactorCommonPHIPreds(PHINode *PN, Value *Val) { if (CommonPreds.size() == 1) return CommonPreds[0]; - DOUT << " Factoring out " << CommonPreds.size() - << " common predecessors.\n"; + DEBUG(errs() << " Factoring out " << CommonPreds.size() + << " common predecessors.\n"); return SplitBlockPredecessors(PN->getParent(), &CommonPreds[0], CommonPreds.size(), ".thr_comm", this); } -/// getJumpThreadDuplicationCost - Return the cost of duplicating this block to -/// thread across it. -static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) { - /// Ignore PHI nodes, these will be flattened when duplication happens. - BasicBlock::const_iterator I = BB->getFirstNonPHI(); - - // Sum up the cost of each instruction until we get to the terminator. Don't - // include the terminator because the copy won't include it. - unsigned Size = 0; - for (; !isa<TerminatorInst>(I); ++I) { - // Debugger intrinsics don't incur code size. - if (isa<DbgInfoIntrinsic>(I)) continue; - - // If this is a pointer->pointer bitcast, it is free. - if (isa<BitCastInst>(I) && isa<PointerType>(I->getType())) - continue; - - // All other instructions count for at least one unit. - ++Size; - - // Calls are more expensive. If they are non-intrinsic calls, we model them - // as having cost of 4. If they are a non-vector intrinsic, we model them - // as having cost of 2 total, and if they are a vector intrinsic, we model - // them as having cost 1. - if (const CallInst *CI = dyn_cast<CallInst>(I)) { - if (!isa<IntrinsicInst>(CI)) - Size += 3; - else if (!isa<VectorType>(CI->getType())) - Size += 1; - } +/// GetBestDestForBranchOnUndef - If we determine that the specified block ends +/// in an undefined jump, decide which block is best to revector to. +/// +/// Since we can pick an arbitrary destination, we pick the successor with the +/// fewest predecessors. This should reduce the in-degree of the others. +/// +static unsigned GetBestDestForJumpOnUndef(BasicBlock *BB) { + TerminatorInst *BBTerm = BB->getTerminator(); + unsigned MinSucc = 0; + BasicBlock *TestBB = BBTerm->getSuccessor(MinSucc); + // Compute the successor with the minimum number of predecessors. + unsigned MinNumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB)); + for (unsigned i = 1, e = BBTerm->getNumSuccessors(); i != e; ++i) { + TestBB = BBTerm->getSuccessor(i); + unsigned NumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB)); + if (NumPreds < MinNumPreds) + MinSucc = i; } - // Threading through a switch statement is particularly profitable. If this - // block ends in a switch, decrease its cost to make it more likely to happen. - if (isa<SwitchInst>(I)) - Size = Size > 6 ? Size-6 : 0; - - return Size; + return MinSucc; } /// ProcessBlock - If there are any predecessors whose control can be threaded @@ -262,39 +288,28 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { // terminator to an unconditional branch. This can occur due to threading in // other blocks. if (isa<ConstantInt>(Condition)) { - DOUT << " In block '" << BB->getNameStart() - << "' folding terminator: " << *BB->getTerminator(); + DEBUG(errs() << " In block '" << BB->getName() + << "' folding terminator: " << *BB->getTerminator() << '\n'); ++NumFolds; ConstantFoldTerminator(BB); return true; } // If the terminator is branching on an undef, we can pick any of the - // successors to branch to. Since this is arbitrary, we pick the successor - // with the fewest predecessors. This should reduce the in-degree of the - // others. + // successors to branch to. Let GetBestDestForJumpOnUndef decide. if (isa<UndefValue>(Condition)) { - TerminatorInst *BBTerm = BB->getTerminator(); - unsigned MinSucc = 0; - BasicBlock *TestBB = BBTerm->getSuccessor(MinSucc); - // Compute the successor with the minimum number of predecessors. - unsigned MinNumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB)); - for (unsigned i = 1, e = BBTerm->getNumSuccessors(); i != e; ++i) { - TestBB = BBTerm->getSuccessor(i); - unsigned NumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB)); - if (NumPreds < MinNumPreds) - MinSucc = i; - } + unsigned BestSucc = GetBestDestForJumpOnUndef(BB); // Fold the branch/switch. + TerminatorInst *BBTerm = BB->getTerminator(); for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) { - if (i == MinSucc) continue; + if (i == BestSucc) continue; BBTerm->getSuccessor(i)->removePredecessor(BB); } - DOUT << " In block '" << BB->getNameStart() - << "' folding undef terminator: " << *BBTerm; - BranchInst::Create(BBTerm->getSuccessor(MinSucc), BBTerm); + DEBUG(errs() << " In block '" << BB->getName() + << "' folding undef terminator: " << *BBTerm << '\n'); + BranchInst::Create(BBTerm->getSuccessor(BestSucc), BBTerm); BBTerm->eraseFromParent(); return true; } @@ -419,8 +434,8 @@ bool JumpThreading::ProcessBranchOnDuplicateCond(BasicBlock *PredBB, else if (PredBI->getSuccessor(0) != BB) BranchDir = false; else { - DOUT << " In block '" << PredBB->getNameStart() - << "' folding terminator: " << *PredBB->getTerminator(); + DEBUG(errs() << " In block '" << PredBB->getName() + << "' folding terminator: " << *PredBB->getTerminator() << '\n'); ++NumFolds; ConstantFoldTerminator(PredBB); return true; @@ -431,29 +446,24 @@ bool JumpThreading::ProcessBranchOnDuplicateCond(BasicBlock *PredBB, // If the dest block has one predecessor, just fix the branch condition to a // constant and fold it. if (BB->getSinglePredecessor()) { - DOUT << " In block '" << BB->getNameStart() - << "' folding condition to '" << BranchDir << "': " - << *BB->getTerminator(); + DEBUG(errs() << " In block '" << BB->getName() + << "' folding condition to '" << BranchDir << "': " + << *BB->getTerminator() << '\n'); ++NumFolds; - DestBI->setCondition(Context->getConstantInt(Type::Int1Ty, BranchDir)); + Value *OldCond = DestBI->getCondition(); + DestBI->setCondition(ConstantInt::get(Type::getInt1Ty(BB->getContext()), + BranchDir)); ConstantFoldTerminator(BB); + RecursivelyDeleteTriviallyDeadInstructions(OldCond); return true; } - - // Otherwise we need to thread from PredBB to DestBB's successor which - // involves code duplication. Check to see if it is worth it. - unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB); - if (JumpThreadCost > Threshold) { - DOUT << " Not threading BB '" << BB->getNameStart() - << "' - Cost is too high: " << JumpThreadCost << "\n"; - return false; - } + // Next, figure out which successor we are threading to. BasicBlock *SuccBB = DestBI->getSuccessor(!BranchDir); // Ok, try to thread it! - return ThreadEdge(BB, PredBB, SuccBB, JumpThreadCost); + return ThreadEdge(BB, PredBB, SuccBB); } /// ProcessSwitchOnDuplicateCond - We found a block and a predecessor of that @@ -472,7 +482,6 @@ bool JumpThreading::ProcessSwitchOnDuplicateCond(BasicBlock *PredBB, if (PredBB == DestBB) return false; - SwitchInst *PredSI = cast<SwitchInst>(PredBB->getTerminator()); SwitchInst *DestSI = cast<SwitchInst>(DestBB->getTerminator()); @@ -508,8 +517,8 @@ bool JumpThreading::ProcessSwitchOnDuplicateCond(BasicBlock *PredBB, // Otherwise, we're safe to make the change. Make sure that the edge from // DestSI to DestSucc is not critical and has no PHI nodes. - DOUT << "FORWARDING EDGE " << *DestVal << " FROM: " << *PredSI; - DOUT << "THROUGH: " << *DestSI; + DEBUG(errs() << "FORWARDING EDGE " << *DestVal << " FROM: " << *PredSI); + DEBUG(errs() << "THROUGH: " << *DestSI); // If the destination has PHI nodes, just split the edge for updating // simplicity. @@ -564,7 +573,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { // If the returned value is the load itself, replace with an undef. This can // only happen in dead loops. - if (AvailableVal == LI) AvailableVal = Context->getUndef(LI->getType()); + if (AvailableVal == LI) AvailableVal = UndefValue::get(LI->getType()); LI->replaceAllUsesWith(AvailableVal); LI->eraseFromParent(); return true; @@ -685,49 +694,74 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { } -/// ProcessJumpOnPHI - We have a conditional branch of switch on a PHI node in +/// ProcessJumpOnPHI - We have a conditional branch or switch on a PHI node in /// the current block. See if there are any simplifications we can do based on /// inputs to the phi node. /// bool JumpThreading::ProcessJumpOnPHI(PHINode *PN) { - // See if the phi node has any constant values. If so, we can determine where - // the corresponding predecessor will branch. - ConstantInt *PredCst = 0; - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if ((PredCst = dyn_cast<ConstantInt>(PN->getIncomingValue(i)))) - break; - - // If no incoming value has a constant, we don't know the destination of any - // predecessors. - if (PredCst == 0) - return false; - - // See if the cost of duplicating this block is low enough. BasicBlock *BB = PN->getParent(); - unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB); - if (JumpThreadCost > Threshold) { - DOUT << " Not threading BB '" << BB->getNameStart() - << "' - Cost is too high: " << JumpThreadCost << "\n"; - return false; + + // See if the phi node has any constant integer or undef values. If so, we + // can determine where the corresponding predecessor will branch. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *PredVal = PN->getIncomingValue(i); + + // Check to see if this input is a constant integer. If so, the direction + // of the branch is predictable. + if (ConstantInt *CI = dyn_cast<ConstantInt>(PredVal)) { + // Merge any common predecessors that will act the same. + BasicBlock *PredBB = FactorCommonPHIPreds(PN, CI); + + BasicBlock *SuccBB; + if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) + SuccBB = BI->getSuccessor(CI->isZero()); + else { + SwitchInst *SI = cast<SwitchInst>(BB->getTerminator()); + SuccBB = SI->getSuccessor(SI->findCaseValue(CI)); + } + + // Ok, try to thread it! + return ThreadEdge(BB, PredBB, SuccBB); + } + + // If the input is an undef, then it doesn't matter which way it will go. + // Pick an arbitrary dest and thread the edge. + if (UndefValue *UV = dyn_cast<UndefValue>(PredVal)) { + // Merge any common predecessors that will act the same. + BasicBlock *PredBB = FactorCommonPHIPreds(PN, UV); + BasicBlock *SuccBB = + BB->getTerminator()->getSuccessor(GetBestDestForJumpOnUndef(BB)); + + // Ok, try to thread it! + return ThreadEdge(BB, PredBB, SuccBB); + } } - // If so, we can actually do this threading. Merge any common predecessors - // that will act the same. - BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredCst); + // If the incoming values are all variables, we don't know the destination of + // any predecessors. However, if any of the predecessor blocks end in an + // unconditional branch, we can *duplicate* the jump into that block in order + // to further encourage jump threading and to eliminate cases where we have + // branch on a phi of an icmp (branch on icmp is much better). + + // We don't want to do this tranformation for switches, because we don't + // really want to duplicate a switch. + if (isa<SwitchInst>(BB->getTerminator())) + return false; - // Next, figure out which successor we are threading to. - BasicBlock *SuccBB; - if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) - SuccBB = BI->getSuccessor(PredCst == Context->getConstantIntFalse()); - else { - SwitchInst *SI = cast<SwitchInst>(BB->getTerminator()); - SuccBB = SI->getSuccessor(SI->findCaseValue(PredCst)); + // Look for unconditional branch predecessors. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + BasicBlock *PredBB = PN->getIncomingBlock(i); + if (BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator())) + if (PredBr->isUnconditional() && + // Try to duplicate BB into PredBB. + DuplicateCondBranchOnPHIIntoPred(BB, PredBB)) + return true; } - - // Ok, try to thread it! - return ThreadEdge(BB, PredBB, SuccBB, JumpThreadCost); + + return false; } + /// ProcessJumpOnLogicalPHI - PN's basic block contains a conditional branch /// whose condition is an AND/OR where one side is PN. If PN has constant /// operands that permit us to evaluate the condition for some operand, thread @@ -756,7 +790,8 @@ bool JumpThreading::ProcessBranchOnLogical(Value *V, BasicBlock *BB, // We can only do the simplification for phi nodes of 'false' with AND or // 'true' with OR. See if we have any entries in the phi for this. unsigned PredNo = ~0U; - ConstantInt *PredCst = Context->getConstantInt(Type::Int1Ty, !isAnd); + ConstantInt *PredCst = ConstantInt::get(Type::getInt1Ty(BB->getContext()), + !isAnd); for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { if (PN->getIncomingValue(i) == PredCst) { PredNo = i; @@ -768,14 +803,6 @@ bool JumpThreading::ProcessBranchOnLogical(Value *V, BasicBlock *BB, if (PredNo == ~0U) return false; - // See if the cost of duplicating this block is low enough. - unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB); - if (JumpThreadCost > Threshold) { - DOUT << " Not threading BB '" << BB->getNameStart() - << "' - Cost is too high: " << JumpThreadCost << "\n"; - return false; - } - // If so, we can actually do this threading. Merge any common predecessors // that will act the same. BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredCst); @@ -787,7 +814,7 @@ bool JumpThreading::ProcessBranchOnLogical(Value *V, BasicBlock *BB, BasicBlock *SuccBB = BB->getTerminator()->getSuccessor(isAnd); // Ok, try to thread it! - return ThreadEdge(BB, PredBB, SuccBB, JumpThreadCost); + return ThreadEdge(BB, PredBB, SuccBB); } /// GetResultOfComparison - Given an icmp/fcmp predicate and the left and right @@ -795,15 +822,15 @@ bool JumpThreading::ProcessBranchOnLogical(Value *V, BasicBlock *BB, /// result can not be determined, a null pointer is returned. static Constant *GetResultOfComparison(CmpInst::Predicate pred, Value *LHS, Value *RHS, - LLVMContext* Context) { + LLVMContext &Context) { if (Constant *CLHS = dyn_cast<Constant>(LHS)) if (Constant *CRHS = dyn_cast<Constant>(RHS)) - return Context->getConstantExprCompare(pred, CLHS, CRHS); + return ConstantExpr::getCompare(pred, CLHS, CRHS); if (LHS == RHS) if (isa<IntegerType>(LHS->getType()) || isa<PointerType>(LHS->getType())) return ICmpInst::isTrueWhenEqual(pred) ? - Context->getConstantIntTrue() : Context->getConstantIntFalse(); + ConstantInt::getTrue(Context) : ConstantInt::getFalse(Context); return 0; } @@ -829,7 +856,7 @@ bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) { PredVal = PN->getIncomingValue(i); Constant *Res = GetResultOfComparison(Cmp->getPredicate(), PredVal, - RHS, Context); + RHS, Cmp->getContext()); if (!Res) { PredVal = 0; continue; @@ -854,14 +881,6 @@ bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) { if (PredVal == 0) return false; - // See if the cost of duplicating this block is low enough. - unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB); - if (JumpThreadCost > Threshold) { - DOUT << " Not threading BB '" << BB->getNameStart() - << "' - Cost is too high: " << JumpThreadCost << "\n"; - return false; - } - // If so, we can actually do this threading. Merge any common predecessors // that will act the same. BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredVal); @@ -870,58 +889,77 @@ bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) { BasicBlock *SuccBB = BB->getTerminator()->getSuccessor(!TrueDirection); // Ok, try to thread it! - return ThreadEdge(BB, PredBB, SuccBB, JumpThreadCost); + return ThreadEdge(BB, PredBB, SuccBB); } +/// AddPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new +/// predecessor to the PHIBB block. If it has PHI nodes, add entries for +/// NewPred using the entries from OldPred (suitably mapped). +static void AddPHINodeEntriesForMappedBlock(BasicBlock *PHIBB, + BasicBlock *OldPred, + BasicBlock *NewPred, + DenseMap<Instruction*, Value*> &ValueMap) { + for (BasicBlock::iterator PNI = PHIBB->begin(); + PHINode *PN = dyn_cast<PHINode>(PNI); ++PNI) { + // Ok, we have a PHI node. Figure out what the incoming value was for the + // DestBlock. + Value *IV = PN->getIncomingValueForBlock(OldPred); + + // Remap the value if necessary. + if (Instruction *Inst = dyn_cast<Instruction>(IV)) { + DenseMap<Instruction*, Value*>::iterator I = ValueMap.find(Inst); + if (I != ValueMap.end()) + IV = I->second; + } + + PN->addIncoming(IV, NewPred); + } +} + /// ThreadEdge - We have decided that it is safe and profitable to thread an /// edge from PredBB to SuccBB across BB. Transform the IR to reflect this /// change. bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, - BasicBlock *SuccBB, unsigned JumpThreadCost) { - + BasicBlock *SuccBB) { // If threading to the same block as we come from, we would infinite loop. if (SuccBB == BB) { - DOUT << " Not threading across BB '" << BB->getNameStart() - << "' - would thread to self!\n"; + DEBUG(errs() << " Not threading across BB '" << BB->getName() + << "' - would thread to self!\n"); return false; } // If threading this would thread across a loop header, don't thread the edge. // See the comments above FindLoopHeaders for justifications and caveats. if (LoopHeaders.count(BB)) { - DOUT << " Not threading from '" << PredBB->getNameStart() - << "' across loop header BB '" << BB->getNameStart() - << "' to dest BB '" << SuccBB->getNameStart() - << "' - it might create an irreducible loop!\n"; + DEBUG(errs() << " Not threading from '" << PredBB->getName() + << "' across loop header BB '" << BB->getName() + << "' to dest BB '" << SuccBB->getName() + << "' - it might create an irreducible loop!\n"); return false; } - // And finally, do it! - DOUT << " Threading edge from '" << PredBB->getNameStart() << "' to '" - << SuccBB->getNameStart() << "' with cost: " << JumpThreadCost - << ", across block:\n " - << *BB << "\n"; - - // Jump Threading can not update SSA properties correctly if the values - // defined in the duplicated block are used outside of the block itself. For - // this reason, we spill all values that are used outside of BB to the stack. - for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { - if (!I->isUsedOutsideOfBlock(BB)) - continue; - - // We found a use of I outside of BB. Create a new stack slot to - // break this inter-block usage pattern. - DemoteRegToStack(*I); + unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB); + if (JumpThreadCost > Threshold) { + DEBUG(errs() << " Not threading BB '" << BB->getName() + << "' - Cost is too high: " << JumpThreadCost << "\n"); + return false; } - + + // And finally, do it! + DEBUG(errs() << " Threading edge from '" << PredBB->getName() << "' to '" + << SuccBB->getName() << "' with cost: " << JumpThreadCost + << ", across block:\n " + << *BB << "\n"); + // We are going to have to map operands from the original BB block to the new // copy of the block 'NewBB'. If there are PHI nodes in BB, evaluate them to // account for entry from PredBB. DenseMap<Instruction*, Value*> ValueMapping; - BasicBlock *NewBB = - BasicBlock::Create(BB->getName()+".thread", BB->getParent(), BB); + BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), + BB->getName()+".thread", + BB->getParent(), BB); NewBB->moveAfter(PredBB); BasicBlock::iterator BI = BB->begin(); @@ -932,7 +970,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, // mapping and using it to remap operands in the cloned instructions. for (; !isa<TerminatorInst>(BI); ++BI) { Instruction *New = BI->clone(); - New->setName(BI->getNameStart()); + New->setName(BI->getName()); NewBB->getInstList().push_back(New); ValueMapping[BI] = New; @@ -951,21 +989,48 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, // Check to see if SuccBB has PHI nodes. If so, we need to add entries to the // PHI nodes for NewBB now. - for (BasicBlock::iterator PNI = SuccBB->begin(); isa<PHINode>(PNI); ++PNI) { - PHINode *PN = cast<PHINode>(PNI); - // Ok, we have a PHI node. Figure out what the incoming value was for the - // DestBlock. - Value *IV = PN->getIncomingValueForBlock(BB); - - // Remap the value if necessary. - if (Instruction *Inst = dyn_cast<Instruction>(IV)) { - DenseMap<Instruction*, Value*>::iterator I = ValueMapping.find(Inst); - if (I != ValueMapping.end()) - IV = I->second; + AddPHINodeEntriesForMappedBlock(SuccBB, BB, NewBB, ValueMapping); + + // If there were values defined in BB that are used outside the block, then we + // now have to update all uses of the value to use either the original value, + // the cloned value, or some PHI derived value. This can require arbitrary + // PHI insertion, of which we are prepared to do, clean these up now. + SSAUpdater SSAUpdate; + SmallVector<Use*, 16> UsesToRename; + for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { + // Scan all uses of this instruction to see if it is used outside of its + // block, and if so, record them in UsesToRename. + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; + ++UI) { + Instruction *User = cast<Instruction>(*UI); + if (PHINode *UserPN = dyn_cast<PHINode>(User)) { + if (UserPN->getIncomingBlock(UI) == BB) + continue; + } else if (User->getParent() == BB) + continue; + + UsesToRename.push_back(&UI.getUse()); } - PN->addIncoming(IV, NewBB); + + // If there are no uses outside the block, we're done with this instruction. + if (UsesToRename.empty()) + continue; + + DEBUG(errs() << "JT: Renaming non-local uses of: " << *I << "\n"); + + // We found a use of I outside of BB. Rename all uses of I that are outside + // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks + // with the two values we know. + SSAUpdate.Initialize(I); + SSAUpdate.AddAvailableValue(BB, I); + SSAUpdate.AddAvailableValue(NewBB, ValueMapping[I]); + + while (!UsesToRename.empty()) + SSAUpdate.RewriteUse(*UsesToRename.pop_back_val()); + DEBUG(errs() << "\n"); } + // Ok, NewBB is good to go. Update the terminator of PredBB to jump to // NewBB instead of BB. This eliminates predecessors from BB, which requires // us to simplify any PHI nodes in BB. @@ -982,7 +1047,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, BI = NewBB->begin(); for (BasicBlock::iterator E = NewBB->end(); BI != E; ) { Instruction *Inst = BI++; - if (Constant *C = ConstantFoldInstruction(Inst, TD)) { + if (Constant *C = ConstantFoldInstruction(Inst, BB->getContext(), TD)) { Inst->replaceAllUsesWith(C); Inst->eraseFromParent(); continue; @@ -995,3 +1060,120 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, ++NumThreads; return true; } + +/// DuplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch +/// to BB which contains an i1 PHI node and a conditional branch on that PHI. +/// If we can duplicate the contents of BB up into PredBB do so now, this +/// improves the odds that the branch will be on an analyzable instruction like +/// a compare. +bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, + BasicBlock *PredBB) { + // If BB is a loop header, then duplicating this block outside the loop would + // cause us to transform this into an irreducible loop, don't do this. + // See the comments above FindLoopHeaders for justifications and caveats. + if (LoopHeaders.count(BB)) { + DEBUG(errs() << " Not duplicating loop header '" << BB->getName() + << "' into predecessor block '" << PredBB->getName() + << "' - it might create an irreducible loop!\n"); + return false; + } + + unsigned DuplicationCost = getJumpThreadDuplicationCost(BB); + if (DuplicationCost > Threshold) { + DEBUG(errs() << " Not duplicating BB '" << BB->getName() + << "' - Cost is too high: " << DuplicationCost << "\n"); + return false; + } + + // Okay, we decided to do this! Clone all the instructions in BB onto the end + // of PredBB. + DEBUG(errs() << " Duplicating block '" << BB->getName() << "' into end of '" + << PredBB->getName() << "' to eliminate branch on phi. Cost: " + << DuplicationCost << " block is:" << *BB << "\n"); + + // We are going to have to map operands from the original BB block into the + // PredBB block. Evaluate PHI nodes in BB. + DenseMap<Instruction*, Value*> ValueMapping; + + BasicBlock::iterator BI = BB->begin(); + for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI) + ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB); + + BranchInst *OldPredBranch = cast<BranchInst>(PredBB->getTerminator()); + + // Clone the non-phi instructions of BB into PredBB, keeping track of the + // mapping and using it to remap operands in the cloned instructions. + for (; BI != BB->end(); ++BI) { + Instruction *New = BI->clone(); + New->setName(BI->getName()); + PredBB->getInstList().insert(OldPredBranch, New); + ValueMapping[BI] = New; + + // Remap operands to patch up intra-block references. + for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i) + if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) { + DenseMap<Instruction*, Value*>::iterator I = ValueMapping.find(Inst); + if (I != ValueMapping.end()) + New->setOperand(i, I->second); + } + } + + // Check to see if the targets of the branch had PHI nodes. If so, we need to + // add entries to the PHI nodes for branch from PredBB now. + BranchInst *BBBranch = cast<BranchInst>(BB->getTerminator()); + AddPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(0), BB, PredBB, + ValueMapping); + AddPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(1), BB, PredBB, + ValueMapping); + + // If there were values defined in BB that are used outside the block, then we + // now have to update all uses of the value to use either the original value, + // the cloned value, or some PHI derived value. This can require arbitrary + // PHI insertion, of which we are prepared to do, clean these up now. + SSAUpdater SSAUpdate; + SmallVector<Use*, 16> UsesToRename; + for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { + // Scan all uses of this instruction to see if it is used outside of its + // block, and if so, record them in UsesToRename. + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; + ++UI) { + Instruction *User = cast<Instruction>(*UI); + if (PHINode *UserPN = dyn_cast<PHINode>(User)) { + if (UserPN->getIncomingBlock(UI) == BB) + continue; + } else if (User->getParent() == BB) + continue; + + UsesToRename.push_back(&UI.getUse()); + } + + // If there are no uses outside the block, we're done with this instruction. + if (UsesToRename.empty()) + continue; + + DEBUG(errs() << "JT: Renaming non-local uses of: " << *I << "\n"); + + // We found a use of I outside of BB. Rename all uses of I that are outside + // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks + // with the two values we know. + SSAUpdate.Initialize(I); + SSAUpdate.AddAvailableValue(BB, I); + SSAUpdate.AddAvailableValue(PredBB, ValueMapping[I]); + + while (!UsesToRename.empty()) + SSAUpdate.RewriteUse(*UsesToRename.pop_back_val()); + DEBUG(errs() << "\n"); + } + + // PredBB no longer jumps to BB, remove entries in the PHI node for the edge + // that we nuked. + BB->removePredecessor(PredBB); + + // Remove the unconditional branch at the end of the PredBB block. + OldPredBranch->eraseFromParent(); + + ++NumDupes; + return true; +} + + |