diff options
Diffstat (limited to 'contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp')
-rw-r--r-- | contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp | 175 |
1 files changed, 131 insertions, 44 deletions
diff --git a/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp index ec7f09a..8e81541 100644 --- a/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp @@ -43,21 +43,16 @@ #include "llvm/ADT/Optional.h" #include "llvm/Analysis/BranchProbabilityInfo.h" -#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/Module.h" #include "llvm/IR/PatternMatch.h" -#include "llvm/IR/ValueHandle.h" -#include "llvm/IR/Verifier.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -65,8 +60,7 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/LoopUtils.h" -#include "llvm/Transforms/Utils/SimplifyIndVar.h" -#include "llvm/Transforms/Utils/UnrollLoop.h" +#include "llvm/Transforms/Utils/LoopSimplify.h" using namespace llvm; @@ -82,6 +76,11 @@ static cl::opt<bool> PrintRangeChecks("irce-print-range-checks", cl::Hidden, static cl::opt<int> MaxExitProbReciprocal("irce-max-exit-prob-reciprocal", cl::Hidden, cl::init(10)); +static cl::opt<bool> SkipProfitabilityChecks("irce-skip-profitability-checks", + cl::Hidden, cl::init(false)); + +static const char *ClonedLoopTag = "irce.loop.clone"; + #define DEBUG_TYPE "irce" namespace { @@ -152,11 +151,10 @@ public: OS << " Operand: " << getCheckUse()->getOperandNo() << "\n"; } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + LLVM_DUMP_METHOD void dump() { print(dbgs()); } -#endif Use *getCheckUse() const { return CheckUse; } @@ -276,7 +274,7 @@ InductiveRangeCheck::parseRangeCheckICmp(Loop *L, ICmpInst *ICI, case ICmpInst::ICMP_SLE: std::swap(LHS, RHS); - // fallthrough + LLVM_FALLTHROUGH; case ICmpInst::ICMP_SGE: if (match(RHS, m_ConstantInt<0>())) { Index = LHS; @@ -286,7 +284,7 @@ InductiveRangeCheck::parseRangeCheckICmp(Loop *L, ICmpInst *ICI, case ICmpInst::ICMP_SLT: std::swap(LHS, RHS); - // fallthrough + LLVM_FALLTHROUGH; case ICmpInst::ICMP_SGT: if (match(RHS, m_ConstantInt<-1>())) { Index = LHS; @@ -302,7 +300,7 @@ InductiveRangeCheck::parseRangeCheckICmp(Loop *L, ICmpInst *ICI, case ICmpInst::ICMP_ULT: std::swap(LHS, RHS); - // fallthrough + LLVM_FALLTHROUGH; case ICmpInst::ICMP_UGT: if (IsNonNegativeAndNotLoopVarying(LHS)) { Index = RHS; @@ -392,7 +390,8 @@ void InductiveRangeCheck::extractRangeChecksFromBranch( BranchProbability LikelyTaken(15, 16); - if (BPI.getEdgeProbability(BI->getParent(), (unsigned)0) < LikelyTaken) + if (!SkipProfitabilityChecks && + BPI.getEdgeProbability(BI->getParent(), (unsigned)0) < LikelyTaken) return; SmallPtrSet<Value *, 8> Visited; @@ -400,6 +399,34 @@ void InductiveRangeCheck::extractRangeChecksFromBranch( Checks, Visited); } +// Add metadata to the loop L to disable loop optimizations. Callers need to +// confirm that optimizing loop L is not beneficial. +static void DisableAllLoopOptsOnLoop(Loop &L) { + // We do not care about any existing loopID related metadata for L, since we + // are setting all loop metadata to false. + LLVMContext &Context = L.getHeader()->getContext(); + // Reserve first location for self reference to the LoopID metadata node. + MDNode *Dummy = MDNode::get(Context, {}); + MDNode *DisableUnroll = MDNode::get( + Context, {MDString::get(Context, "llvm.loop.unroll.disable")}); + Metadata *FalseVal = + ConstantAsMetadata::get(ConstantInt::get(Type::getInt1Ty(Context), 0)); + MDNode *DisableVectorize = MDNode::get( + Context, + {MDString::get(Context, "llvm.loop.vectorize.enable"), FalseVal}); + MDNode *DisableLICMVersioning = MDNode::get( + Context, {MDString::get(Context, "llvm.loop.licm_versioning.disable")}); + MDNode *DisableDistribution= MDNode::get( + Context, + {MDString::get(Context, "llvm.loop.distribute.enable"), FalseVal}); + MDNode *NewLoopID = + MDNode::get(Context, {Dummy, DisableUnroll, DisableVectorize, + DisableLICMVersioning, DisableDistribution}); + // Set operand 0 to refer to the loop id itself. + NewLoopID->replaceOperandWith(0, NewLoopID); + L.setLoopID(NewLoopID); +} + namespace { // Keeps track of the structure of a loop. This is similar to llvm::Loop, @@ -515,6 +542,11 @@ class LoopConstrainer { // void cloneLoop(ClonedLoop &CLResult, const char *Tag) const; + // Create the appropriate loop structure needed to describe a cloned copy of + // `Original`. The clone is described by `VM`. + Loop *createClonedLoopStructure(Loop *Original, Loop *Parent, + ValueToValueMapTy &VM); + // Rewrite the iteration space of the loop denoted by (LS, Preheader). The // iteration space of the rewritten loop ends at ExitLoopAt. The start of the // iteration space is not changed. `ExitLoopAt' is assumed to be slt @@ -566,10 +598,12 @@ class LoopConstrainer { Function &F; LLVMContext &Ctx; ScalarEvolution &SE; + DominatorTree &DT; + LPPassManager &LPM; + LoopInfo &LI; // Information about the original loop we started out with. Loop &OriginalLoop; - LoopInfo &OriginalLoopInfo; const SCEV *LatchTakenCount; BasicBlock *OriginalPreheader; @@ -585,12 +619,13 @@ class LoopConstrainer { LoopStructure MainLoopStructure; public: - LoopConstrainer(Loop &L, LoopInfo &LI, const LoopStructure &LS, - ScalarEvolution &SE, InductiveRangeCheck::Range R) + LoopConstrainer(Loop &L, LoopInfo &LI, LPPassManager &LPM, + const LoopStructure &LS, ScalarEvolution &SE, + DominatorTree &DT, InductiveRangeCheck::Range R) : F(*L.getHeader()->getParent()), Ctx(L.getHeader()->getContext()), - SE(SE), OriginalLoop(L), OriginalLoopInfo(LI), LatchTakenCount(nullptr), - OriginalPreheader(nullptr), MainLoopPreheader(nullptr), Range(R), - MainLoopStructure(LS) {} + SE(SE), DT(DT), LPM(LPM), LI(LI), OriginalLoop(L), + LatchTakenCount(nullptr), OriginalPreheader(nullptr), + MainLoopPreheader(nullptr), Range(R), MainLoopStructure(LS) {} // Entry point for the algorithm. Returns true on success. bool run(); @@ -622,9 +657,19 @@ static bool CanBeSMin(ScalarEvolution &SE, const SCEV *S) { Optional<LoopStructure> LoopStructure::parseLoopStructure(ScalarEvolution &SE, BranchProbabilityInfo &BPI, Loop &L, const char *&FailureReason) { - assert(L.isLoopSimplifyForm() && "should follow from addRequired<>"); + if (!L.isLoopSimplifyForm()) { + FailureReason = "loop not in LoopSimplify form"; + return None; + } BasicBlock *Latch = L.getLoopLatch(); + assert(Latch && "Simplified loops only have one latch!"); + + if (Latch->getTerminator()->getMetadata(ClonedLoopTag)) { + FailureReason = "loop has already been cloned"; + return None; + } + if (!L.isLoopExiting(Latch)) { FailureReason = "no loop latch"; return None; @@ -648,7 +693,8 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE, BranchProbabilityInfo &BP BranchProbability ExitProbability = BPI.getEdgeProbability(LatchBr->getParent(), LatchBrExitIdx); - if (ExitProbability > BranchProbability(1, MaxExitProbReciprocal)) { + if (!SkipProfitabilityChecks && + ExitProbability > BranchProbability(1, MaxExitProbReciprocal)) { FailureReason = "short running loop, not profitable"; return None; } @@ -907,6 +953,11 @@ void LoopConstrainer::cloneLoop(LoopConstrainer::ClonedLoop &Result, return static_cast<Value *>(It->second); }; + auto *ClonedLatch = + cast<BasicBlock>(GetClonedValue(OriginalLoop.getLoopLatch())); + ClonedLatch->getTerminator()->setMetadata(ClonedLoopTag, + MDNode::get(Ctx, {})); + Result.Structure = MainLoopStructure.map(GetClonedValue); Result.Structure.Tag = Tag; @@ -924,17 +975,15 @@ void LoopConstrainer::cloneLoop(LoopConstrainer::ClonedLoop &Result, // to be edited to reflect that. No phi nodes need to be introduced because // the loop is in LCSSA. - for (auto SBBI = succ_begin(OriginalBB), SBBE = succ_end(OriginalBB); - SBBI != SBBE; ++SBBI) { - - if (OriginalLoop.contains(*SBBI)) + for (auto *SBB : successors(OriginalBB)) { + if (OriginalLoop.contains(SBB)) continue; // not an exit block - for (Instruction &I : **SBBI) { - if (!isa<PHINode>(&I)) + for (Instruction &I : *SBB) { + auto *PN = dyn_cast<PHINode>(&I); + if (!PN) break; - PHINode *PN = cast<PHINode>(&I); Value *OldIncoming = PN->getIncomingValueForBlock(OriginalBB); PN->addIncoming(GetClonedValue(OldIncoming), ClonedBB); } @@ -1020,11 +1069,11 @@ LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd( RewrittenRangeInfo RRI; - auto BBInsertLocation = std::next(Function::iterator(LS.Latch)); + BasicBlock *BBInsertLocation = LS.Latch->getNextNode(); RRI.ExitSelector = BasicBlock::Create(Ctx, Twine(LS.Tag) + ".exit.selector", - &F, &*BBInsertLocation); + &F, BBInsertLocation); RRI.PseudoExit = BasicBlock::Create(Ctx, Twine(LS.Tag) + ".pseudo.exit", &F, - &*BBInsertLocation); + BBInsertLocation); BranchInst *PreheaderJump = cast<BranchInst>(Preheader->getTerminator()); bool Increasing = LS.IndVarIncreasing; @@ -1067,11 +1116,10 @@ LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd( // each of the PHI nodes in the loop header. This feeds into the initial // value of the same PHI nodes if/when we continue execution. for (Instruction &I : *LS.Header) { - if (!isa<PHINode>(&I)) + auto *PN = dyn_cast<PHINode>(&I); + if (!PN) break; - PHINode *PN = cast<PHINode>(&I); - PHINode *NewPHI = PHINode::Create(PN->getType(), 2, PN->getName() + ".copy", BranchToContinuation); @@ -1104,11 +1152,10 @@ void LoopConstrainer::rewriteIncomingValuesForPHIs( unsigned PHIIndex = 0; for (Instruction &I : *LS.Header) { - if (!isa<PHINode>(&I)) + auto *PN = dyn_cast<PHINode>(&I); + if (!PN) break; - PHINode *PN = cast<PHINode>(&I); - for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i) if (PN->getIncomingBlock(i) == ContinuationBlock) PN->setIncomingValue(i, RRI.PHIValuesAtPseudoExit[PHIIndex++]); @@ -1125,10 +1172,10 @@ BasicBlock *LoopConstrainer::createPreheader(const LoopStructure &LS, BranchInst::Create(LS.Header, Preheader); for (Instruction &I : *LS.Header) { - if (!isa<PHINode>(&I)) + auto *PN = dyn_cast<PHINode>(&I); + if (!PN) break; - PHINode *PN = cast<PHINode>(&I); for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i) replacePHIBlock(PN, OldPreheader, Preheader); } @@ -1142,7 +1189,23 @@ void LoopConstrainer::addToParentLoopIfNeeded(ArrayRef<BasicBlock *> BBs) { return; for (BasicBlock *BB : BBs) - ParentLoop->addBasicBlockToLoop(BB, OriginalLoopInfo); + ParentLoop->addBasicBlockToLoop(BB, LI); +} + +Loop *LoopConstrainer::createClonedLoopStructure(Loop *Original, Loop *Parent, + ValueToValueMapTy &VM) { + Loop &New = LPM.addLoop(Parent); + + // Add all of the blocks in Original to the new loop. + for (auto *BB : Original->blocks()) + if (LI.getLoopFor(BB) == Original) + New.addBasicBlockToLoop(cast<BasicBlock>(VM[BB]), LI); + + // Add all of the subloops to the new loop. + for (Loop *SubLoop : *Original) + createClonedLoopStructure(SubLoop, &New, VM); + + return &New; } bool LoopConstrainer::run() { @@ -1266,8 +1329,31 @@ bool LoopConstrainer::run() { std::remove(std::begin(NewBlocks), std::end(NewBlocks), nullptr); addToParentLoopIfNeeded(makeArrayRef(std::begin(NewBlocks), NewBlocksEnd)); - addToParentLoopIfNeeded(PreLoop.Blocks); - addToParentLoopIfNeeded(PostLoop.Blocks); + + DT.recalculate(F); + + if (!PreLoop.Blocks.empty()) { + auto *L = createClonedLoopStructure( + &OriginalLoop, OriginalLoop.getParentLoop(), PreLoop.Map); + formLCSSARecursively(*L, DT, &LI, &SE); + simplifyLoop(L, &DT, &LI, &SE, nullptr, true); + // Pre loops are slow paths, we do not need to perform any loop + // optimizations on them. + DisableAllLoopOptsOnLoop(*L); + } + + if (!PostLoop.Blocks.empty()) { + auto *L = createClonedLoopStructure( + &OriginalLoop, OriginalLoop.getParentLoop(), PostLoop.Map); + formLCSSARecursively(*L, DT, &LI, &SE); + simplifyLoop(L, &DT, &LI, &SE, nullptr, true); + // Post loops are slow paths, we do not need to perform any loop + // optimizations on them. + DisableAllLoopOptsOnLoop(*L); + } + + formLCSSARecursively(OriginalLoop, DT, &LI, &SE); + simplifyLoop(&OriginalLoop, &DT, &LI, &SE, nullptr, true); return true; } @@ -1439,8 +1525,9 @@ bool InductiveRangeCheckElimination::runOnLoop(Loop *L, LPPassManager &LPM) { if (!SafeIterRange.hasValue()) return false; - LoopConstrainer LC(*L, getAnalysis<LoopInfoWrapperPass>().getLoopInfo(), LS, - SE, SafeIterRange.getValue()); + auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + LoopConstrainer LC(*L, getAnalysis<LoopInfoWrapperPass>().getLoopInfo(), LPM, + LS, SE, DT, SafeIterRange.getValue()); bool Changed = LC.run(); if (Changed) { |