diff options
Diffstat (limited to 'contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp')
-rw-r--r-- | contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp | 198 |
1 files changed, 134 insertions, 64 deletions
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp index 7f1f78f..e346ebd 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -23,11 +23,12 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/OptimizationDiagnosticInfo.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -46,7 +47,7 @@ STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled"); STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)"); static cl::opt<bool> -UnrollRuntimeEpilog("unroll-runtime-epilog", cl::init(true), cl::Hidden, +UnrollRuntimeEpilog("unroll-runtime-epilog", cl::init(false), cl::Hidden, cl::desc("Allow runtime unrolled loops to be unrolled " "with epilog instead of prolog.")); @@ -171,20 +172,58 @@ static bool needToInsertPhisForLCSSA(Loop *L, std::vector<BasicBlock *> Blocks, return false; } +/// Adds ClonedBB to LoopInfo, creates a new loop for ClonedBB if necessary +/// and adds a mapping from the original loop to the new loop to NewLoops. +/// Returns nullptr if no new loop was created and a pointer to the +/// original loop OriginalBB was part of otherwise. +const Loop* llvm::addClonedBlockToLoopInfo(BasicBlock *OriginalBB, + BasicBlock *ClonedBB, LoopInfo *LI, + NewLoopsMap &NewLoops) { + // Figure out which loop New is in. + const Loop *OldLoop = LI->getLoopFor(OriginalBB); + assert(OldLoop && "Should (at least) be in the loop being unrolled!"); + + Loop *&NewLoop = NewLoops[OldLoop]; + if (!NewLoop) { + // Found a new sub-loop. + assert(OriginalBB == OldLoop->getHeader() && + "Header should be first in RPO"); + + NewLoop = new Loop(); + Loop *NewLoopParent = NewLoops.lookup(OldLoop->getParentLoop()); + + if (NewLoopParent) + NewLoopParent->addChildLoop(NewLoop); + else + LI->addTopLevelLoop(NewLoop); + + NewLoop->addBasicBlockToLoop(ClonedBB, *LI); + return OldLoop; + } else { + NewLoop->addBasicBlockToLoop(ClonedBB, *LI); + return nullptr; + } +} + /// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true /// if unrolling was successful, or false if the loop was unmodified. Unrolling /// can only fail when the loop's latch block is not terminated by a conditional /// branch instruction. However, if the trip count (and multiple) are not known, /// loop unrolling will mostly produce more code that is no faster. /// -/// TripCount is generally defined as the number of times the loop header -/// executes. UnrollLoop relaxes the definition to permit early exits: here -/// TripCount is the iteration on which control exits LatchBlock if no early -/// exits were taken. Note that UnrollLoop assumes that the loop counter test -/// terminates LatchBlock in order to remove unnecesssary instances of the -/// test. In other words, control may exit the loop prior to TripCount -/// iterations via an early branch, but control may not exit the loop from the -/// LatchBlock's terminator prior to TripCount iterations. +/// TripCount is the upper bound of the iteration on which control exits +/// LatchBlock. Control may exit the loop prior to TripCount iterations either +/// via an early branch in other loop block or via LatchBlock terminator. This +/// is relaxed from the general definition of trip count which is the number of +/// times the loop header executes. Note that UnrollLoop assumes that the loop +/// counter test is in LatchBlock in order to remove unnecesssary instances of +/// the test. If control can exit the loop from the LatchBlock's terminator +/// prior to TripCount iterations, flag PreserveCondBr needs to be set. +/// +/// PreserveCondBr indicates whether the conditional branch of the LatchBlock +/// needs to be preserved. It is needed when we use trip count upper bound to +/// fully unroll the loop. If PreserveOnlyFirst is also set then only the first +/// conditional branch needs to be preserved. /// /// Similarly, TripMultiple divides the number of times that the LatchBlock may /// execute without exiting the loop. @@ -196,15 +235,21 @@ static bool needToInsertPhisForLCSSA(Loop *L, std::vector<BasicBlock *> Blocks, /// runtime-unroll the loop if computing RuntimeTripCount will be expensive and /// AllowExpensiveTripCount is false. /// +/// If we want to perform PGO-based loop peeling, PeelCount is set to the +/// number of iterations we want to peel off. +/// /// The LoopInfo Analysis that is passed will be kept consistent. /// /// This utility preserves LoopInfo. It will also preserve ScalarEvolution and /// DominatorTree if they are non-null. bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, bool AllowRuntime, bool AllowExpensiveTripCount, - unsigned TripMultiple, LoopInfo *LI, ScalarEvolution *SE, - DominatorTree *DT, AssumptionCache *AC, + bool PreserveCondBr, bool PreserveOnlyFirst, + unsigned TripMultiple, unsigned PeelCount, LoopInfo *LI, + ScalarEvolution *SE, DominatorTree *DT, + AssumptionCache *AC, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA) { + BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) { DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n"); @@ -250,9 +295,8 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, if (TripCount != 0 && Count > TripCount) Count = TripCount; - // Don't enter the unroll code if there is nothing to do. This way we don't - // need to support "partial unrolling by 1". - if (TripCount == 0 && Count < 2) + // Don't enter the unroll code if there is nothing to do. + if (TripCount == 0 && Count < 2 && PeelCount == 0) return false; assert(Count > 0); @@ -272,14 +316,22 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, // now we just recompute LCSSA for the outer loop, but it should be possible // to fix it in-place. bool NeedToFixLCSSA = PreserveLCSSA && CompletelyUnroll && - std::any_of(ExitBlocks.begin(), ExitBlocks.end(), - [&](BasicBlock *BB) { return isa<PHINode>(BB->begin()); }); + any_of(ExitBlocks, [](const BasicBlock *BB) { + return isa<PHINode>(BB->begin()); + }); // We assume a run-time trip count if the compiler cannot // figure out the loop trip count and the unroll-runtime // flag is specified. bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime); + assert((!RuntimeTripCount || !PeelCount) && + "Did not expect runtime trip-count unrolling " + "and peeling for the same loop"); + + if (PeelCount) + peelLoop(L, PeelCount, LI, SE, DT, PreserveLCSSA); + // Loops containing convergent instructions must have a count that divides // their TripMultiple. DEBUG( @@ -293,9 +345,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, "Unroll count must divide trip multiple if loop contains a " "convergent operation."); }); - // Don't output the runtime loop remainder if Count is a multiple of - // TripMultiple. Such a remainder is never needed, and is unsafe if the loop - // contains a convergent instruction. + if (RuntimeTripCount && TripMultiple % Count != 0 && !UnrollRuntimeLoopRemainder(L, Count, AllowExpensiveTripCount, UnrollRuntimeEpilog, LI, SE, DT, @@ -322,35 +372,40 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, (unsigned)GreatestCommonDivisor64(Count, TripMultiple); } + using namespace ore; // Report the unrolling decision. - DebugLoc LoopLoc = L->getStartLoc(); - Function *F = Header->getParent(); - LLVMContext &Ctx = F->getContext(); - if (CompletelyUnroll) { DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName() << " with trip count " << TripCount << "!\n"); - emitOptimizationRemark(Ctx, DEBUG_TYPE, *F, LoopLoc, - Twine("completely unrolled loop with ") + - Twine(TripCount) + " iterations"); + ORE->emit(OptimizationRemark(DEBUG_TYPE, "FullyUnrolled", L->getStartLoc(), + L->getHeader()) + << "completely unrolled loop with " + << NV("UnrollCount", TripCount) << " iterations"); + } else if (PeelCount) { + DEBUG(dbgs() << "PEELING loop %" << Header->getName() + << " with iteration count " << PeelCount << "!\n"); + ORE->emit(OptimizationRemark(DEBUG_TYPE, "Peeled", L->getStartLoc(), + L->getHeader()) + << " peeled loop by " << NV("PeelCount", PeelCount) + << " iterations"); } else { - auto EmitDiag = [&](const Twine &T) { - emitOptimizationRemark(Ctx, DEBUG_TYPE, *F, LoopLoc, - "unrolled loop by a factor of " + Twine(Count) + - T); - }; + OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(), + L->getHeader()); + Diag << "unrolled loop by a factor of " << NV("UnrollCount", Count); DEBUG(dbgs() << "UNROLLING loop %" << Header->getName() << " by " << Count); if (TripMultiple == 0 || BreakoutTrip != TripMultiple) { DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip); - EmitDiag(" with a breakout at trip " + Twine(BreakoutTrip)); + ORE->emit(Diag << " with a breakout at trip " + << NV("BreakoutTrip", BreakoutTrip)); } else if (TripMultiple != 1) { DEBUG(dbgs() << " with " << TripMultiple << " trips per branch"); - EmitDiag(" with " + Twine(TripMultiple) + " trips per branch"); + ORE->emit(Diag << " with " << NV("TripMultiple", TripMultiple) + << " trips per branch"); } else if (RuntimeTripCount) { DEBUG(dbgs() << " with run-time trip count"); - EmitDiag(" with run-time trip count"); + ORE->emit(Diag << " with run-time trip count"); } DEBUG(dbgs() << "!\n"); } @@ -382,6 +437,15 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO(); std::vector<BasicBlock*> UnrolledLoopBlocks = L->getBlocks(); + + // Loop Unrolling might create new loops. While we do preserve LoopInfo, we + // might break loop-simplified form for these loops (as they, e.g., would + // share the same exit blocks). We'll keep track of loops for which we can + // break this so that later we can re-simplify them. + SmallSetVector<Loop *, 4> LoopsToSimplify; + for (Loop *SubLoop : *L) + LoopsToSimplify.insert(SubLoop); + for (unsigned It = 1; It != Count; ++It) { std::vector<BasicBlock*> NewBlocks; SmallDenseMap<const Loop *, Loop *, 4> NewLoops; @@ -397,27 +461,14 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, assert(LI->getLoopFor(*BB) == L && "Header should not be in a sub-loop"); L->addBasicBlockToLoop(New, *LI); } else { - // Figure out which loop New is in. - const Loop *OldLoop = LI->getLoopFor(*BB); - assert(OldLoop && "Should (at least) be in the loop being unrolled!"); - - Loop *&NewLoop = NewLoops[OldLoop]; - if (!NewLoop) { - // Found a new sub-loop. - assert(*BB == OldLoop->getHeader() && - "Header should be first in RPO"); - - Loop *NewLoopParent = NewLoops.lookup(OldLoop->getParentLoop()); - assert(NewLoopParent && - "Expected parent loop before sub-loop in RPO"); - NewLoop = new Loop; - NewLoopParent->addChildLoop(NewLoop); + const Loop *OldLoop = addClonedBlockToLoopInfo(*BB, New, LI, NewLoops); + if (OldLoop) { + LoopsToSimplify.insert(NewLoops[OldLoop]); // Forget the old loop, since its inputs may have changed. if (SE) SE->forgetLoop(OldLoop); } - NewLoop->addBasicBlockToLoop(New, *LI); } if (*BB == Header) @@ -480,9 +531,14 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, } // Remap all instructions in the most recent iteration - for (BasicBlock *NewBlock : NewBlocks) - for (Instruction &I : *NewBlock) + for (BasicBlock *NewBlock : NewBlocks) { + for (Instruction &I : *NewBlock) { ::remapInstruction(&I, LastValueMap); + if (auto *II = dyn_cast<IntrinsicInst>(&I)) + if (II->getIntrinsicID() == Intrinsic::assume) + AC->registerAssumption(II); + } + } } // Loop over the PHI nodes in the original block, setting incoming values. @@ -524,12 +580,16 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, if (CompletelyUnroll) { if (j == 0) Dest = LoopExit; - NeedConditional = false; - } - - // If we know the trip count or a multiple of it, we can safely use an - // unconditional branch for some iterations. - if (j != BreakoutTrip && (TripMultiple == 0 || j % TripMultiple != 0)) { + // If using trip count upper bound to completely unroll, we need to keep + // the conditional branch except the last one because the loop may exit + // after any iteration. + assert(NeedConditional && + "NeedCondition cannot be modified by both complete " + "unrolling and runtime unrolling"); + NeedConditional = (PreserveCondBr && j && !(PreserveOnlyFirst && i != 0)); + } else if (j != BreakoutTrip && (TripMultiple == 0 || j % TripMultiple != 0)) { + // If we know the trip count or a multiple of it, we can safely use an + // unconditional branch for some iterations. NeedConditional = false; } @@ -595,10 +655,6 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, } } - // FIXME: We could register any cloned assumptions instead of clearing the - // whole function's cache. - AC->clear(); - // FIXME: We only preserve DT info for complete unrolling now. Incrementally // updating domtree after partial loop unrolling should also be easy. if (DT && !CompletelyUnroll) @@ -607,7 +663,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, DEBUG(DT->verifyDomTree()); // Simplify any new induction variables in the partially unrolled loop. - if (SE && !CompletelyUnroll) { + if (SE && !CompletelyUnroll && Count > 1) { SmallVector<WeakVH, 16> DeadInsts; simplifyLoopIVs(L, SE, DT, LI, DeadInsts); @@ -636,6 +692,11 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, } } + // TODO: after peeling or unrolling, previously loop variant conditions are + // likely to fold to constants, eagerly propagating those here will require + // fewer cleanup passes to be run. Alternatively, a LoopEarlyCSE might be + // appropriate. + NumCompletelyUnrolled += CompletelyUnroll; ++NumUnrolled; @@ -663,6 +724,11 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, if (!OuterL && !CompletelyUnroll) OuterL = L; if (OuterL) { + // OuterL includes all loops for which we can break loop-simplify, so + // it's sufficient to simplify only it (it'll recursively simplify inner + // loops too). + // TODO: That potentially might be compile-time expensive. We should try + // to fix the loop-simplified form incrementally. simplifyLoop(OuterL, DT, LI, SE, AC, PreserveLCSSA); // LCSSA must be performed on the outermost affected loop. The unrolled @@ -678,6 +744,10 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, else assert(OuterL->isLCSSAForm(*DT) && "Loops should be in LCSSA form after loop-unroll."); + } else { + // Simplify loops for which we might've broken loop-simplify form. + for (Loop *SubLoop : LoopsToSimplify) + simplifyLoop(SubLoop, DT, LI, SE, AC, PreserveLCSSA); } } |