diff options
Diffstat (limited to 'contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp')
-rw-r--r-- | contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp | 186 |
1 files changed, 108 insertions, 78 deletions
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp index 6772511..62e4fa2 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -11,9 +11,6 @@ // actual pass or policy, but provides a single function to perform loop // unrolling. // -// It works best when loops have been canonicalized by the -indvars pass, -// allowing it to determine the trip counts of loops easily. -// // The process of unrolling can produce extraneous basic blocks linked with // unconditional branches. This will be corrected in the future. // @@ -24,6 +21,7 @@ #include "llvm/BasicBlock.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Support/Debug.h" @@ -31,6 +29,7 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/SimplifyIndVar.h" using namespace llvm; // TODO: Should these be here or in LoopUnroll? @@ -61,7 +60,8 @@ static inline void RemapInstruction(Instruction *I, /// only has one predecessor, and that predecessor only has one successor. /// The LoopInfo Analysis that is passed will be kept consistent. /// Returns the new combined block. -static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI) { +static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, + LPPassManager *LPM) { // Merge basic blocks into their predecessor if there is only one distinct // pred, and if there is only one distinct successor of the predecessor, and // if there are no PHI nodes. @@ -93,6 +93,12 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI) { std::string OldName = BB->getName(); // Erase basic block from the function... + + // ScalarEvolution holds references to loop exit blocks. + if (ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>()) { + if (Loop *L = LI->getLoopFor(BB)) + SE->forgetLoop(L); + } LI->removeBlock(BB); BB->eraseFromParent(); @@ -109,12 +115,27 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI) { /// branch instruction. However, if the trip count (and multiple) are not known, /// loop unrolling will mostly produce more code that is no faster. /// +/// TripCount is generally defined as the number of times the loop header +/// executes. UnrollLoop relaxes the definition to permit early exits: here +/// TripCount is the iteration on which control exits LatchBlock if no early +/// exits were taken. Note that UnrollLoop assumes that the loop counter test +/// terminates LatchBlock in order to remove unnecesssary instances of the +/// test. In other words, control may exit the loop prior to TripCount +/// iterations via an early branch, but control may not exit the loop from the +/// LatchBlock's terminator prior to TripCount iterations. +/// +/// Similarly, TripMultiple divides the number of times that the LatchBlock may +/// execute without exiting the loop. +/// /// The LoopInfo Analysis that is passed will be kept consistent. /// /// If a LoopPassManager is passed in, and the loop is fully removed, it will be /// removed from the LoopPassManager as well. LPM can also be NULL. -bool llvm::UnrollLoop(Loop *L, unsigned Count, - LoopInfo *LI, LPPassManager *LPM) { +/// +/// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are +/// available it must also preserve those analyses. +bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, + unsigned TripMultiple, LoopInfo *LI, LPPassManager *LPM) { BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) { DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n"); @@ -129,14 +150,14 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, BasicBlock *Header = L->getHeader(); BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator()); - + if (!BI || BI->isUnconditional()) { // The loop-rotate pass can be helpful to avoid this in many cases. DEBUG(dbgs() << " Can't unroll; loop not terminated by a conditional branch.\n"); return false; } - + if (Header->hasAddressTaken()) { // The loop-rotate pass can be helpful to avoid this in many cases. DEBUG(dbgs() << @@ -146,16 +167,10 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, // Notify ScalarEvolution that the loop will be substantially changed, // if not outright eliminated. - if (ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>()) + ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>(); + if (SE) SE->forgetLoop(L); - // Find trip count - unsigned TripCount = L->getSmallConstantTripCount(); - // Find trip multiple if count is not available - unsigned TripMultiple = 1; - if (TripCount == 0) - TripMultiple = L->getSmallConstantTripMultiple(); - if (TripCount != 0) DEBUG(dbgs() << " Trip Count = " << TripCount << "\n"); if (TripMultiple != 1) @@ -208,12 +223,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, ValueToValueMapTy LastValueMap; std::vector<PHINode*> OrigPHINode; for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { - PHINode *PN = cast<PHINode>(I); - OrigPHINode.push_back(PN); - if (Instruction *I = - dyn_cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock))) - if (L->contains(I)) - LastValueMap[I] = I; + OrigPHINode.push_back(cast<PHINode>(I)); } std::vector<BasicBlock*> Headers; @@ -221,11 +231,20 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, Headers.push_back(Header); Latches.push_back(LatchBlock); + // The current on-the-fly SSA update requires blocks to be processed in + // reverse postorder so that LastValueMap contains the correct value at each + // exit. + LoopBlocksDFS DFS(L); + DFS.perform(LI); + + // Stash the DFS iterators before adding blocks to the loop. + LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO(); + LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO(); + for (unsigned It = 1; It != Count; ++It) { std::vector<BasicBlock*> NewBlocks; - - for (std::vector<BasicBlock*>::iterator BB = LoopBlocks.begin(), - E = LoopBlocks.end(); BB != E; ++BB) { + + for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { ValueToValueMapTy VMap; BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It)); Header->getParent()->getBasicBlockList().push_back(New); @@ -251,75 +270,55 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, L->addBasicBlockToLoop(New, LI->getBase()); - // Add phi entries for newly created values to all exit blocks except - // the successor of the latch block. The successor of the exit block will - // be updated specially after unrolling all the way. - if (*BB != LatchBlock) - for (succ_iterator SI = succ_begin(*BB), SE = succ_end(*BB); SI != SE; - ++SI) - if (!L->contains(*SI)) - for (BasicBlock::iterator BBI = (*SI)->begin(); - PHINode *phi = dyn_cast<PHINode>(BBI); ++BBI) { - Value *Incoming = phi->getIncomingValueForBlock(*BB); - phi->addIncoming(Incoming, New); - } - + // Add phi entries for newly created values to all exit blocks. + for (succ_iterator SI = succ_begin(*BB), SE = succ_end(*BB); + SI != SE; ++SI) { + if (L->contains(*SI)) + continue; + for (BasicBlock::iterator BBI = (*SI)->begin(); + PHINode *phi = dyn_cast<PHINode>(BBI); ++BBI) { + Value *Incoming = phi->getIncomingValueForBlock(*BB); + ValueToValueMapTy::iterator It = LastValueMap.find(Incoming); + if (It != LastValueMap.end()) + Incoming = It->second; + phi->addIncoming(Incoming, New); + } + } // Keep track of new headers and latches as we create them, so that // we can insert the proper branches later. if (*BB == Header) Headers.push_back(New); - if (*BB == LatchBlock) { + if (*BB == LatchBlock) Latches.push_back(New); - // Also, clear out the new latch's back edge so that it doesn't look - // like a new loop, so that it's amenable to being merged with adjacent - // blocks later on. - TerminatorInst *Term = New->getTerminator(); - assert(L->contains(Term->getSuccessor(!ContinueOnTrue))); - assert(Term->getSuccessor(ContinueOnTrue) == LoopExit); - Term->setSuccessor(!ContinueOnTrue, NULL); - } - NewBlocks.push_back(New); } - + // Remap all instructions in the most recent iteration for (unsigned i = 0; i < NewBlocks.size(); ++i) for (BasicBlock::iterator I = NewBlocks[i]->begin(), E = NewBlocks[i]->end(); I != E; ++I) ::RemapInstruction(I, LastValueMap); } - - // The latch block exits the loop. If there are any PHI nodes in the - // successor blocks, update them to use the appropriate values computed as the - // last iteration of the loop. - if (Count != 1) { - BasicBlock *LastIterationBB = cast<BasicBlock>(LastValueMap[LatchBlock]); - for (succ_iterator SI = succ_begin(LatchBlock), SE = succ_end(LatchBlock); - SI != SE; ++SI) { - for (BasicBlock::iterator BBI = (*SI)->begin(); - PHINode *PN = dyn_cast<PHINode>(BBI); ++BBI) { - Value *InVal = PN->removeIncomingValue(LatchBlock, false); - // If this value was defined in the loop, take the value defined by the - // last iteration of the loop. - if (Instruction *InValI = dyn_cast<Instruction>(InVal)) { - if (L->contains(InValI)) - InVal = LastValueMap[InVal]; - } - PN->addIncoming(InVal, LastIterationBB); - } - } - } - // Now, if we're doing complete unrolling, loop over the PHI nodes in the - // original block, setting them to their incoming values. - if (CompletelyUnroll) { - BasicBlock *Preheader = L->getLoopPreheader(); - for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) { - PHINode *PN = OrigPHINode[i]; + // Loop over the PHI nodes in the original block, setting incoming values. + for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) { + PHINode *PN = OrigPHINode[i]; + if (CompletelyUnroll) { PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader)); Header->getInstList().erase(PN); } + else if (Count > 1) { + Value *InVal = PN->removeIncomingValue(LatchBlock, false); + // If this value was defined in the loop, take the value defined by the + // last iteration of the loop. + if (Instruction *InValI = dyn_cast<Instruction>(InVal)) { + if (L->contains(InValI)) + InVal = LastValueMap[InVal]; + } + assert(Latches.back() == LastValueMap[LatchBlock] && "bad last latch"); + PN->addIncoming(InVal, Latches.back()); + } } // Now that all the basic blocks for the unrolled iterations are in place, @@ -351,6 +350,19 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, // iteration. Term->setSuccessor(!ContinueOnTrue, Dest); } else { + // Remove phi operands at this loop exit + if (Dest != LoopExit) { + BasicBlock *BB = Latches[i]; + for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); + SI != SE; ++SI) { + if (*SI == Headers[i]) + continue; + for (BasicBlock::iterator BBI = (*SI)->begin(); + PHINode *Phi = dyn_cast<PHINode>(BBI); ++BBI) { + Phi->removeIncomingValue(BB, false); + } + } + } // Replace the conditional branch with an unconditional one. BranchInst::Create(Dest, Term); Term->eraseFromParent(); @@ -362,11 +374,29 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator()); if (Term->isUnconditional()) { BasicBlock *Dest = Term->getSuccessor(0); - if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI)) + if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI, LPM)) std::replace(Latches.begin(), Latches.end(), Dest, Fold); } } - + + // FIXME: Reconstruct dom info, because it is not preserved properly. + // Incrementally updating domtree after loop unrolling would be easy. + if (DominatorTree *DT = LPM->getAnalysisIfAvailable<DominatorTree>()) + DT->runOnFunction(*L->getHeader()->getParent()); + + // Simplify any new induction variables in the partially unrolled loop. + if (SE && !CompletelyUnroll) { + SmallVector<WeakVH, 16> DeadInsts; + simplifyLoopIVs(L, SE, LPM, DeadInsts); + + // Aggressively clean up dead instructions that simplifyLoopIVs already + // identified. Any remaining should be cleaned up below. + while (!DeadInsts.empty()) + if (Instruction *Inst = + dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val())) + RecursivelyDeleteTriviallyDeadInstructions(Inst); + } + // At this point, the code is well formed. We now do a quick sweep over the // inserted code, doing constant propagation and dead code elimination as we // go. |