summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp')
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp175
1 files changed, 131 insertions, 44 deletions
diff --git a/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
index ec7f09a..8e81541 100644
--- a/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
@@ -43,21 +43,16 @@
#include "llvm/ADT/Optional.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
-#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
-#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
-#include "llvm/IR/ValueHandle.h"
-#include "llvm/IR/Verifier.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -65,8 +60,7 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
-#include "llvm/Transforms/Utils/SimplifyIndVar.h"
-#include "llvm/Transforms/Utils/UnrollLoop.h"
+#include "llvm/Transforms/Utils/LoopSimplify.h"
using namespace llvm;
@@ -82,6 +76,11 @@ static cl::opt<bool> PrintRangeChecks("irce-print-range-checks", cl::Hidden,
static cl::opt<int> MaxExitProbReciprocal("irce-max-exit-prob-reciprocal",
cl::Hidden, cl::init(10));
+static cl::opt<bool> SkipProfitabilityChecks("irce-skip-profitability-checks",
+ cl::Hidden, cl::init(false));
+
+static const char *ClonedLoopTag = "irce.loop.clone";
+
#define DEBUG_TYPE "irce"
namespace {
@@ -152,11 +151,10 @@ public:
OS << " Operand: " << getCheckUse()->getOperandNo() << "\n";
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ LLVM_DUMP_METHOD
void dump() {
print(dbgs());
}
-#endif
Use *getCheckUse() const { return CheckUse; }
@@ -276,7 +274,7 @@ InductiveRangeCheck::parseRangeCheckICmp(Loop *L, ICmpInst *ICI,
case ICmpInst::ICMP_SLE:
std::swap(LHS, RHS);
- // fallthrough
+ LLVM_FALLTHROUGH;
case ICmpInst::ICMP_SGE:
if (match(RHS, m_ConstantInt<0>())) {
Index = LHS;
@@ -286,7 +284,7 @@ InductiveRangeCheck::parseRangeCheckICmp(Loop *L, ICmpInst *ICI,
case ICmpInst::ICMP_SLT:
std::swap(LHS, RHS);
- // fallthrough
+ LLVM_FALLTHROUGH;
case ICmpInst::ICMP_SGT:
if (match(RHS, m_ConstantInt<-1>())) {
Index = LHS;
@@ -302,7 +300,7 @@ InductiveRangeCheck::parseRangeCheckICmp(Loop *L, ICmpInst *ICI,
case ICmpInst::ICMP_ULT:
std::swap(LHS, RHS);
- // fallthrough
+ LLVM_FALLTHROUGH;
case ICmpInst::ICMP_UGT:
if (IsNonNegativeAndNotLoopVarying(LHS)) {
Index = RHS;
@@ -392,7 +390,8 @@ void InductiveRangeCheck::extractRangeChecksFromBranch(
BranchProbability LikelyTaken(15, 16);
- if (BPI.getEdgeProbability(BI->getParent(), (unsigned)0) < LikelyTaken)
+ if (!SkipProfitabilityChecks &&
+ BPI.getEdgeProbability(BI->getParent(), (unsigned)0) < LikelyTaken)
return;
SmallPtrSet<Value *, 8> Visited;
@@ -400,6 +399,34 @@ void InductiveRangeCheck::extractRangeChecksFromBranch(
Checks, Visited);
}
+// Add metadata to the loop L to disable loop optimizations. Callers need to
+// confirm that optimizing loop L is not beneficial.
+static void DisableAllLoopOptsOnLoop(Loop &L) {
+ // We do not care about any existing loopID related metadata for L, since we
+ // are setting all loop metadata to false.
+ LLVMContext &Context = L.getHeader()->getContext();
+ // Reserve first location for self reference to the LoopID metadata node.
+ MDNode *Dummy = MDNode::get(Context, {});
+ MDNode *DisableUnroll = MDNode::get(
+ Context, {MDString::get(Context, "llvm.loop.unroll.disable")});
+ Metadata *FalseVal =
+ ConstantAsMetadata::get(ConstantInt::get(Type::getInt1Ty(Context), 0));
+ MDNode *DisableVectorize = MDNode::get(
+ Context,
+ {MDString::get(Context, "llvm.loop.vectorize.enable"), FalseVal});
+ MDNode *DisableLICMVersioning = MDNode::get(
+ Context, {MDString::get(Context, "llvm.loop.licm_versioning.disable")});
+ MDNode *DisableDistribution= MDNode::get(
+ Context,
+ {MDString::get(Context, "llvm.loop.distribute.enable"), FalseVal});
+ MDNode *NewLoopID =
+ MDNode::get(Context, {Dummy, DisableUnroll, DisableVectorize,
+ DisableLICMVersioning, DisableDistribution});
+ // Set operand 0 to refer to the loop id itself.
+ NewLoopID->replaceOperandWith(0, NewLoopID);
+ L.setLoopID(NewLoopID);
+}
+
namespace {
// Keeps track of the structure of a loop. This is similar to llvm::Loop,
@@ -515,6 +542,11 @@ class LoopConstrainer {
//
void cloneLoop(ClonedLoop &CLResult, const char *Tag) const;
+ // Create the appropriate loop structure needed to describe a cloned copy of
+ // `Original`. The clone is described by `VM`.
+ Loop *createClonedLoopStructure(Loop *Original, Loop *Parent,
+ ValueToValueMapTy &VM);
+
// Rewrite the iteration space of the loop denoted by (LS, Preheader). The
// iteration space of the rewritten loop ends at ExitLoopAt. The start of the
// iteration space is not changed. `ExitLoopAt' is assumed to be slt
@@ -566,10 +598,12 @@ class LoopConstrainer {
Function &F;
LLVMContext &Ctx;
ScalarEvolution &SE;
+ DominatorTree &DT;
+ LPPassManager &LPM;
+ LoopInfo &LI;
// Information about the original loop we started out with.
Loop &OriginalLoop;
- LoopInfo &OriginalLoopInfo;
const SCEV *LatchTakenCount;
BasicBlock *OriginalPreheader;
@@ -585,12 +619,13 @@ class LoopConstrainer {
LoopStructure MainLoopStructure;
public:
- LoopConstrainer(Loop &L, LoopInfo &LI, const LoopStructure &LS,
- ScalarEvolution &SE, InductiveRangeCheck::Range R)
+ LoopConstrainer(Loop &L, LoopInfo &LI, LPPassManager &LPM,
+ const LoopStructure &LS, ScalarEvolution &SE,
+ DominatorTree &DT, InductiveRangeCheck::Range R)
: F(*L.getHeader()->getParent()), Ctx(L.getHeader()->getContext()),
- SE(SE), OriginalLoop(L), OriginalLoopInfo(LI), LatchTakenCount(nullptr),
- OriginalPreheader(nullptr), MainLoopPreheader(nullptr), Range(R),
- MainLoopStructure(LS) {}
+ SE(SE), DT(DT), LPM(LPM), LI(LI), OriginalLoop(L),
+ LatchTakenCount(nullptr), OriginalPreheader(nullptr),
+ MainLoopPreheader(nullptr), Range(R), MainLoopStructure(LS) {}
// Entry point for the algorithm. Returns true on success.
bool run();
@@ -622,9 +657,19 @@ static bool CanBeSMin(ScalarEvolution &SE, const SCEV *S) {
Optional<LoopStructure>
LoopStructure::parseLoopStructure(ScalarEvolution &SE, BranchProbabilityInfo &BPI,
Loop &L, const char *&FailureReason) {
- assert(L.isLoopSimplifyForm() && "should follow from addRequired<>");
+ if (!L.isLoopSimplifyForm()) {
+ FailureReason = "loop not in LoopSimplify form";
+ return None;
+ }
BasicBlock *Latch = L.getLoopLatch();
+ assert(Latch && "Simplified loops only have one latch!");
+
+ if (Latch->getTerminator()->getMetadata(ClonedLoopTag)) {
+ FailureReason = "loop has already been cloned";
+ return None;
+ }
+
if (!L.isLoopExiting(Latch)) {
FailureReason = "no loop latch";
return None;
@@ -648,7 +693,8 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE, BranchProbabilityInfo &BP
BranchProbability ExitProbability =
BPI.getEdgeProbability(LatchBr->getParent(), LatchBrExitIdx);
- if (ExitProbability > BranchProbability(1, MaxExitProbReciprocal)) {
+ if (!SkipProfitabilityChecks &&
+ ExitProbability > BranchProbability(1, MaxExitProbReciprocal)) {
FailureReason = "short running loop, not profitable";
return None;
}
@@ -907,6 +953,11 @@ void LoopConstrainer::cloneLoop(LoopConstrainer::ClonedLoop &Result,
return static_cast<Value *>(It->second);
};
+ auto *ClonedLatch =
+ cast<BasicBlock>(GetClonedValue(OriginalLoop.getLoopLatch()));
+ ClonedLatch->getTerminator()->setMetadata(ClonedLoopTag,
+ MDNode::get(Ctx, {}));
+
Result.Structure = MainLoopStructure.map(GetClonedValue);
Result.Structure.Tag = Tag;
@@ -924,17 +975,15 @@ void LoopConstrainer::cloneLoop(LoopConstrainer::ClonedLoop &Result,
// to be edited to reflect that. No phi nodes need to be introduced because
// the loop is in LCSSA.
- for (auto SBBI = succ_begin(OriginalBB), SBBE = succ_end(OriginalBB);
- SBBI != SBBE; ++SBBI) {
-
- if (OriginalLoop.contains(*SBBI))
+ for (auto *SBB : successors(OriginalBB)) {
+ if (OriginalLoop.contains(SBB))
continue; // not an exit block
- for (Instruction &I : **SBBI) {
- if (!isa<PHINode>(&I))
+ for (Instruction &I : *SBB) {
+ auto *PN = dyn_cast<PHINode>(&I);
+ if (!PN)
break;
- PHINode *PN = cast<PHINode>(&I);
Value *OldIncoming = PN->getIncomingValueForBlock(OriginalBB);
PN->addIncoming(GetClonedValue(OldIncoming), ClonedBB);
}
@@ -1020,11 +1069,11 @@ LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd(
RewrittenRangeInfo RRI;
- auto BBInsertLocation = std::next(Function::iterator(LS.Latch));
+ BasicBlock *BBInsertLocation = LS.Latch->getNextNode();
RRI.ExitSelector = BasicBlock::Create(Ctx, Twine(LS.Tag) + ".exit.selector",
- &F, &*BBInsertLocation);
+ &F, BBInsertLocation);
RRI.PseudoExit = BasicBlock::Create(Ctx, Twine(LS.Tag) + ".pseudo.exit", &F,
- &*BBInsertLocation);
+ BBInsertLocation);
BranchInst *PreheaderJump = cast<BranchInst>(Preheader->getTerminator());
bool Increasing = LS.IndVarIncreasing;
@@ -1067,11 +1116,10 @@ LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd(
// each of the PHI nodes in the loop header. This feeds into the initial
// value of the same PHI nodes if/when we continue execution.
for (Instruction &I : *LS.Header) {
- if (!isa<PHINode>(&I))
+ auto *PN = dyn_cast<PHINode>(&I);
+ if (!PN)
break;
- PHINode *PN = cast<PHINode>(&I);
-
PHINode *NewPHI = PHINode::Create(PN->getType(), 2, PN->getName() + ".copy",
BranchToContinuation);
@@ -1104,11 +1152,10 @@ void LoopConstrainer::rewriteIncomingValuesForPHIs(
unsigned PHIIndex = 0;
for (Instruction &I : *LS.Header) {
- if (!isa<PHINode>(&I))
+ auto *PN = dyn_cast<PHINode>(&I);
+ if (!PN)
break;
- PHINode *PN = cast<PHINode>(&I);
-
for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i)
if (PN->getIncomingBlock(i) == ContinuationBlock)
PN->setIncomingValue(i, RRI.PHIValuesAtPseudoExit[PHIIndex++]);
@@ -1125,10 +1172,10 @@ BasicBlock *LoopConstrainer::createPreheader(const LoopStructure &LS,
BranchInst::Create(LS.Header, Preheader);
for (Instruction &I : *LS.Header) {
- if (!isa<PHINode>(&I))
+ auto *PN = dyn_cast<PHINode>(&I);
+ if (!PN)
break;
- PHINode *PN = cast<PHINode>(&I);
for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i)
replacePHIBlock(PN, OldPreheader, Preheader);
}
@@ -1142,7 +1189,23 @@ void LoopConstrainer::addToParentLoopIfNeeded(ArrayRef<BasicBlock *> BBs) {
return;
for (BasicBlock *BB : BBs)
- ParentLoop->addBasicBlockToLoop(BB, OriginalLoopInfo);
+ ParentLoop->addBasicBlockToLoop(BB, LI);
+}
+
+Loop *LoopConstrainer::createClonedLoopStructure(Loop *Original, Loop *Parent,
+ ValueToValueMapTy &VM) {
+ Loop &New = LPM.addLoop(Parent);
+
+ // Add all of the blocks in Original to the new loop.
+ for (auto *BB : Original->blocks())
+ if (LI.getLoopFor(BB) == Original)
+ New.addBasicBlockToLoop(cast<BasicBlock>(VM[BB]), LI);
+
+ // Add all of the subloops to the new loop.
+ for (Loop *SubLoop : *Original)
+ createClonedLoopStructure(SubLoop, &New, VM);
+
+ return &New;
}
bool LoopConstrainer::run() {
@@ -1266,8 +1329,31 @@ bool LoopConstrainer::run() {
std::remove(std::begin(NewBlocks), std::end(NewBlocks), nullptr);
addToParentLoopIfNeeded(makeArrayRef(std::begin(NewBlocks), NewBlocksEnd));
- addToParentLoopIfNeeded(PreLoop.Blocks);
- addToParentLoopIfNeeded(PostLoop.Blocks);
+
+ DT.recalculate(F);
+
+ if (!PreLoop.Blocks.empty()) {
+ auto *L = createClonedLoopStructure(
+ &OriginalLoop, OriginalLoop.getParentLoop(), PreLoop.Map);
+ formLCSSARecursively(*L, DT, &LI, &SE);
+ simplifyLoop(L, &DT, &LI, &SE, nullptr, true);
+ // Pre loops are slow paths, we do not need to perform any loop
+ // optimizations on them.
+ DisableAllLoopOptsOnLoop(*L);
+ }
+
+ if (!PostLoop.Blocks.empty()) {
+ auto *L = createClonedLoopStructure(
+ &OriginalLoop, OriginalLoop.getParentLoop(), PostLoop.Map);
+ formLCSSARecursively(*L, DT, &LI, &SE);
+ simplifyLoop(L, &DT, &LI, &SE, nullptr, true);
+ // Post loops are slow paths, we do not need to perform any loop
+ // optimizations on them.
+ DisableAllLoopOptsOnLoop(*L);
+ }
+
+ formLCSSARecursively(OriginalLoop, DT, &LI, &SE);
+ simplifyLoop(&OriginalLoop, &DT, &LI, &SE, nullptr, true);
return true;
}
@@ -1439,8 +1525,9 @@ bool InductiveRangeCheckElimination::runOnLoop(Loop *L, LPPassManager &LPM) {
if (!SafeIterRange.hasValue())
return false;
- LoopConstrainer LC(*L, getAnalysis<LoopInfoWrapperPass>().getLoopInfo(), LS,
- SE, SafeIterRange.getValue());
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ LoopConstrainer LC(*L, getAnalysis<LoopInfoWrapperPass>().getLoopInfo(), LPM,
+ LS, SE, DT, SafeIterRange.getValue());
bool Changed = LC.run();
if (Changed) {
OpenPOWER on IntegriCloud