summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Transforms/Scalar
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Transforms/Scalar')
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp218
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LICM.cpp46
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp488
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp35
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp3
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp102
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SCCP.cpp61
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp9
10 files changed, 504 insertions, 462 deletions
diff --git a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index 087ce8a..dcdcfed 100644
--- a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -100,9 +100,9 @@ namespace {
std::unique_ptr<BranchProbabilityInfo> BPI;
bool HasProfileData;
#ifdef NDEBUG
- SmallPtrSet<BasicBlock*, 16> LoopHeaders;
+ SmallPtrSet<const BasicBlock *, 16> LoopHeaders;
#else
- SmallSet<AssertingVH<BasicBlock>, 16> LoopHeaders;
+ SmallSet<AssertingVH<const BasicBlock>, 16> LoopHeaders;
#endif
DenseSet<std::pair<Value*, BasicBlock*> > RecursionSet;
@@ -163,6 +163,7 @@ namespace {
bool SimplifyPartiallyRedundantLoad(LoadInst *LI);
bool TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB);
+ bool TryToUnfoldSelectInCurrBB(BasicBlock *BB);
private:
BasicBlock *SplitBlockPreds(BasicBlock *BB, ArrayRef<BasicBlock *> Preds,
@@ -210,11 +211,12 @@ bool JumpThreading::runOnFunction(Function &F) {
// we will loop forever. We take care of this issue by not jump threading for
// back edges. This works for normal cases but not for unreachable blocks as
// they may have cycle with no back edge.
- removeUnreachableBlocks(F);
+ bool EverChanged = false;
+ EverChanged |= removeUnreachableBlocks(F, LVI);
FindLoopHeaders(F);
- bool Changed, EverChanged = false;
+ bool Changed;
do {
Changed = false;
for (Function::iterator I = F.begin(), E = F.end(); I != E;) {
@@ -363,8 +365,8 @@ void JumpThreading::FindLoopHeaders(Function &F) {
SmallVector<std::pair<const BasicBlock*,const BasicBlock*>, 32> Edges;
FindFunctionBackedges(F, Edges);
- for (unsigned i = 0, e = Edges.size(); i != e; ++i)
- LoopHeaders.insert(const_cast<BasicBlock*>(Edges[i].second));
+ for (const auto &Edge : Edges)
+ LoopHeaders.insert(Edge.second);
}
/// getKnownConstant - Helper method to determine if we can thread over a
@@ -410,8 +412,8 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
// If V is a constant, then it is known in all predecessors.
if (Constant *KC = getKnownConstant(V, Preference)) {
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
- Result.push_back(std::make_pair(KC, *PI));
+ for (BasicBlock *Pred : predecessors(BB))
+ Result.push_back(std::make_pair(KC, Pred));
return true;
}
@@ -434,8 +436,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
// "X < 4" and "X < 3" is known true but "X < 4" itself is not available.
// Perhaps getConstantOnEdge should be smart enough to do this?
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
- BasicBlock *P = *PI;
+ for (BasicBlock *P : predecessors(BB)) {
// If the value is known by LazyValueInfo to be a constant in a
// predecessor, use that information to try to thread this block.
Constant *PredCst = LVI->getConstantOnEdge(V, P, BB, CxtI);
@@ -491,22 +492,17 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
// Scan for the sentinel. If we find an undef, force it to the
// interesting value: x|undef -> true and x&undef -> false.
- for (unsigned i = 0, e = LHSVals.size(); i != e; ++i)
- if (LHSVals[i].first == InterestingVal ||
- isa<UndefValue>(LHSVals[i].first)) {
- Result.push_back(LHSVals[i]);
- Result.back().first = InterestingVal;
- LHSKnownBBs.insert(LHSVals[i].second);
+ for (const auto &LHSVal : LHSVals)
+ if (LHSVal.first == InterestingVal || isa<UndefValue>(LHSVal.first)) {
+ Result.emplace_back(InterestingVal, LHSVal.second);
+ LHSKnownBBs.insert(LHSVal.second);
}
- for (unsigned i = 0, e = RHSVals.size(); i != e; ++i)
- if (RHSVals[i].first == InterestingVal ||
- isa<UndefValue>(RHSVals[i].first)) {
+ for (const auto &RHSVal : RHSVals)
+ if (RHSVal.first == InterestingVal || isa<UndefValue>(RHSVal.first)) {
// If we already inferred a value for this block on the LHS, don't
// re-add it.
- if (!LHSKnownBBs.count(RHSVals[i].second)) {
- Result.push_back(RHSVals[i]);
- Result.back().first = InterestingVal;
- }
+ if (!LHSKnownBBs.count(RHSVal.second))
+ Result.emplace_back(InterestingVal, RHSVal.second);
}
return !Result.empty();
@@ -522,8 +518,8 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
return false;
// Invert the known values.
- for (unsigned i = 0, e = Result.size(); i != e; ++i)
- Result[i].first = ConstantExpr::getNot(Result[i].first);
+ for (auto &R : Result)
+ R.first = ConstantExpr::getNot(R.first);
return true;
}
@@ -538,12 +534,12 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
WantInteger, CxtI);
// Try to use constant folding to simplify the binary operator.
- for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) {
- Constant *V = LHSVals[i].first;
+ for (const auto &LHSVal : LHSVals) {
+ Constant *V = LHSVal.first;
Constant *Folded = ConstantExpr::get(BO->getOpcode(), V, CI);
if (Constant *KC = getKnownConstant(Folded, WantInteger))
- Result.push_back(std::make_pair(KC, LHSVals[i].second));
+ Result.push_back(std::make_pair(KC, LHSVal.second));
}
}
@@ -591,8 +587,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
cast<Instruction>(Cmp->getOperand(0))->getParent() != BB) {
Constant *RHSCst = cast<Constant>(Cmp->getOperand(1));
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB);PI != E; ++PI){
- BasicBlock *P = *PI;
+ for (BasicBlock *P : predecessors(BB)) {
// If the value is known by LazyValueInfo to be a constant in a
// predecessor, use that information to try to thread this block.
LazyValueInfo::Tristate Res =
@@ -615,12 +610,12 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals,
WantInteger, CxtI);
- for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) {
- Constant *V = LHSVals[i].first;
+ for (const auto &LHSVal : LHSVals) {
+ Constant *V = LHSVal.first;
Constant *Folded = ConstantExpr::getCompare(Cmp->getPredicate(),
V, CmpConst);
if (Constant *KC = getKnownConstant(Folded, WantInteger))
- Result.push_back(std::make_pair(KC, LHSVals[i].second));
+ Result.push_back(std::make_pair(KC, LHSVal.second));
}
return !Result.empty();
@@ -637,8 +632,8 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
if ((TrueVal || FalseVal) &&
ComputeValueKnownInPredecessors(SI->getCondition(), BB, Conds,
WantInteger, CxtI)) {
- for (unsigned i = 0, e = Conds.size(); i != e; ++i) {
- Constant *Cond = Conds[i].first;
+ for (auto &C : Conds) {
+ Constant *Cond = C.first;
// Figure out what value to use for the condition.
bool KnownCond;
@@ -655,7 +650,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
// See if the select has a known constant value for this predecessor.
if (Constant *Val = KnownCond ? TrueVal : FalseVal)
- Result.push_back(std::make_pair(Val, Conds[i].second));
+ Result.push_back(std::make_pair(Val, C.second));
}
return !Result.empty();
@@ -665,8 +660,8 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
// If all else fails, see if LVI can figure out a constant value for us.
Constant *CI = LVI->getConstant(V, BB, CxtI);
if (Constant *KC = getKnownConstant(CI, Preference)) {
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
- Result.push_back(std::make_pair(KC, *PI));
+ for (BasicBlock *Pred : predecessors(BB))
+ Result.push_back(std::make_pair(KC, Pred));
}
return !Result.empty();
@@ -736,6 +731,9 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
}
}
+ if (TryToUnfoldSelectInCurrBB(BB))
+ return true;
+
// What kind of constant we're looking for.
ConstantPreference Preference = WantInteger;
@@ -988,10 +986,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
// If we got here, the loaded value is transparent through to the start of the
// block. Check to see if it is available in any of the predecessor blocks.
- for (pred_iterator PI = pred_begin(LoadBB), PE = pred_end(LoadBB);
- PI != PE; ++PI) {
- BasicBlock *PredBB = *PI;
-
+ for (BasicBlock *PredBB : predecessors(LoadBB)) {
// If we already scanned this predecessor, skip it.
if (!PredsScanned.insert(PredBB).second)
continue;
@@ -1038,13 +1033,11 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
SmallVector<BasicBlock*, 8> PredsToSplit;
SmallPtrSet<BasicBlock*, 8> AvailablePredSet;
- for (unsigned i = 0, e = AvailablePreds.size(); i != e; ++i)
- AvailablePredSet.insert(AvailablePreds[i].first);
+ for (const auto &AvailablePred : AvailablePreds)
+ AvailablePredSet.insert(AvailablePred.first);
// Add all the unavailable predecessors to the PredsToSplit list.
- for (pred_iterator PI = pred_begin(LoadBB), PE = pred_end(LoadBB);
- PI != PE; ++PI) {
- BasicBlock *P = *PI;
+ for (BasicBlock *P : predecessors(LoadBB)) {
// If the predecessor is an indirect goto, we can't split the edge.
if (isa<IndirectBrInst>(P->getTerminator()))
return false;
@@ -1129,9 +1122,9 @@ FindMostPopularDest(BasicBlock *BB,
// blocks with known and real destinations to threading undef. We'll handle
// them later if interesting.
DenseMap<BasicBlock*, unsigned> DestPopularity;
- for (unsigned i = 0, e = PredToDestList.size(); i != e; ++i)
- if (PredToDestList[i].second)
- DestPopularity[PredToDestList[i].second]++;
+ for (const auto &PredToDest : PredToDestList)
+ if (PredToDest.second)
+ DestPopularity[PredToDest.second]++;
// Find the most popular dest.
DenseMap<BasicBlock*, unsigned>::iterator DPI = DestPopularity.begin();
@@ -1194,10 +1187,10 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
"ComputeValueKnownInPredecessors returned true with no values");
DEBUG(dbgs() << "IN BB: " << *BB;
- for (unsigned i = 0, e = PredValues.size(); i != e; ++i) {
+ for (const auto &PredValue : PredValues) {
dbgs() << " BB '" << BB->getName() << "': FOUND condition = "
- << *PredValues[i].first
- << " for pred '" << PredValues[i].second->getName() << "'.\n";
+ << *PredValue.first
+ << " for pred '" << PredValue.second->getName() << "'.\n";
});
// Decide what we want to thread through. Convert our list of known values to
@@ -1210,8 +1203,8 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
BasicBlock *OnlyDest = nullptr;
BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL;
- for (unsigned i = 0, e = PredValues.size(); i != e; ++i) {
- BasicBlock *Pred = PredValues[i].second;
+ for (const auto &PredValue : PredValues) {
+ BasicBlock *Pred = PredValue.second;
if (!SeenPreds.insert(Pred).second)
continue; // Duplicate predecessor entry.
@@ -1220,7 +1213,7 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
if (isa<IndirectBrInst>(Pred->getTerminator()))
continue;
- Constant *Val = PredValues[i].first;
+ Constant *Val = PredValue.first;
BasicBlock *DestBB;
if (isa<UndefValue>(Val))
@@ -1260,16 +1253,15 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
// Now that we know what the most popular destination is, factor all
// predecessors that will jump to it into a single predecessor.
SmallVector<BasicBlock*, 16> PredsToFactor;
- for (unsigned i = 0, e = PredToDestList.size(); i != e; ++i)
- if (PredToDestList[i].second == MostPopularDest) {
- BasicBlock *Pred = PredToDestList[i].first;
+ for (const auto &PredToDest : PredToDestList)
+ if (PredToDest.second == MostPopularDest) {
+ BasicBlock *Pred = PredToDest.first;
// This predecessor may be a switch or something else that has multiple
// edges to the block. Factor each of these edges by listing them
// according to # occurrences in PredsToFactor.
- TerminatorInst *PredTI = Pred->getTerminator();
- for (unsigned i = 0, e = PredTI->getNumSuccessors(); i != e; ++i)
- if (PredTI->getSuccessor(i) == BB)
+ for (BasicBlock *Succ : successors(Pred))
+ if (Succ == BB)
PredsToFactor.push_back(Pred);
}
@@ -1366,11 +1358,11 @@ bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) {
// Scan the information to see which is most popular: true or false. The
// predecessors can be of the set true, false, or undef.
unsigned NumTrue = 0, NumFalse = 0;
- for (unsigned i = 0, e = XorOpValues.size(); i != e; ++i) {
- if (isa<UndefValue>(XorOpValues[i].first))
+ for (const auto &XorOpValue : XorOpValues) {
+ if (isa<UndefValue>(XorOpValue.first))
// Ignore undefs for the count.
continue;
- if (cast<ConstantInt>(XorOpValues[i].first)->isZero())
+ if (cast<ConstantInt>(XorOpValue.first)->isZero())
++NumFalse;
else
++NumTrue;
@@ -1386,12 +1378,11 @@ bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) {
// Collect all of the blocks that this can be folded into so that we can
// factor this once and clone it once.
SmallVector<BasicBlock*, 8> BlocksToFoldInto;
- for (unsigned i = 0, e = XorOpValues.size(); i != e; ++i) {
- if (XorOpValues[i].first != SplitVal &&
- !isa<UndefValue>(XorOpValues[i].first))
+ for (const auto &XorOpValue : XorOpValues) {
+ if (XorOpValue.first != SplitVal && !isa<UndefValue>(XorOpValue.first))
continue;
- BlocksToFoldInto.push_back(XorOpValues[i].second);
+ BlocksToFoldInto.push_back(XorOpValue.second);
}
// If we inferred a value for all of the predecessors, then duplication won't
@@ -1543,10 +1534,10 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
// PHI insertion, of which we are prepared to do, clean these up now.
SSAUpdater SSAUpdate;
SmallVector<Use*, 16> UsesToRename;
- for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
+ for (Instruction &I : *BB) {
// Scan all uses of this instruction to see if it is used outside of its
// block, and if so, record them in UsesToRename.
- for (Use &U : I->uses()) {
+ for (Use &U : I.uses()) {
Instruction *User = cast<Instruction>(U.getUser());
if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
if (UserPN->getIncomingBlock(U) == BB)
@@ -1561,14 +1552,14 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
if (UsesToRename.empty())
continue;
- DEBUG(dbgs() << "JT: Renaming non-local uses of: " << *I << "\n");
+ DEBUG(dbgs() << "JT: Renaming non-local uses of: " << I << "\n");
// We found a use of I outside of BB. Rename all uses of I that are outside
// its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
// with the two values we know.
- SSAUpdate.Initialize(I->getType(), I->getName());
- SSAUpdate.AddAvailableValue(BB, &*I);
- SSAUpdate.AddAvailableValue(NewBB, ValueMapping[&*I]);
+ SSAUpdate.Initialize(I.getType(), I.getName());
+ SSAUpdate.AddAvailableValue(BB, &I);
+ SSAUpdate.AddAvailableValue(NewBB, ValueMapping[&I]);
while (!UsesToRename.empty())
SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
@@ -1644,10 +1635,10 @@ void JumpThreading::UpdateBlockFreqAndEdgeWeight(BasicBlock *PredBB,
// Collect updated outgoing edges' frequencies from BB and use them to update
// edge probabilities.
SmallVector<uint64_t, 4> BBSuccFreq;
- for (auto I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
- auto SuccFreq = (*I == SuccBB)
+ for (BasicBlock *Succ : successors(BB)) {
+ auto SuccFreq = (Succ == SuccBB)
? BB2SuccBBFreq - NewBBFreq
- : BBOrigFreq * BPI->getEdgeProbability(BB, *I);
+ : BBOrigFreq * BPI->getEdgeProbability(BB, Succ);
BBSuccFreq.push_back(SuccFreq.getFrequency());
}
@@ -1783,10 +1774,10 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
// PHI insertion, of which we are prepared to do, clean these up now.
SSAUpdater SSAUpdate;
SmallVector<Use*, 16> UsesToRename;
- for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
+ for (Instruction &I : *BB) {
// Scan all uses of this instruction to see if it is used outside of its
// block, and if so, record them in UsesToRename.
- for (Use &U : I->uses()) {
+ for (Use &U : I.uses()) {
Instruction *User = cast<Instruction>(U.getUser());
if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
if (UserPN->getIncomingBlock(U) == BB)
@@ -1801,14 +1792,14 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
if (UsesToRename.empty())
continue;
- DEBUG(dbgs() << "JT: Renaming non-local uses of: " << *I << "\n");
+ DEBUG(dbgs() << "JT: Renaming non-local uses of: " << I << "\n");
// We found a use of I outside of BB. Rename all uses of I that are outside
// its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
// with the two values we know.
- SSAUpdate.Initialize(I->getType(), I->getName());
- SSAUpdate.AddAvailableValue(BB, &*I);
- SSAUpdate.AddAvailableValue(PredBB, ValueMapping[&*I]);
+ SSAUpdate.Initialize(I.getType(), I.getName());
+ SSAUpdate.AddAvailableValue(BB, &I);
+ SSAUpdate.AddAvailableValue(PredBB, ValueMapping[&I]);
while (!UsesToRename.empty())
SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
@@ -1903,3 +1894,62 @@ bool JumpThreading::TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) {
}
return false;
}
+
+/// TryToUnfoldSelectInCurrBB - Look for PHI/Select in the same BB of the form
+/// bb:
+/// %p = phi [false, %bb1], [true, %bb2], [false, %bb3], [true, %bb4], ...
+/// %s = select p, trueval, falseval
+///
+/// And expand the select into a branch structure. This later enables
+/// jump-threading over bb in this pass.
+///
+/// Using the similar approach of SimplifyCFG::FoldCondBranchOnPHI(), unfold
+/// select if the associated PHI has at least one constant. If the unfolded
+/// select is not jump-threaded, it will be folded again in the later
+/// optimizations.
+bool JumpThreading::TryToUnfoldSelectInCurrBB(BasicBlock *BB) {
+ // If threading this would thread across a loop header, don't thread the edge.
+ // See the comments above FindLoopHeaders for justifications and caveats.
+ if (LoopHeaders.count(BB))
+ return false;
+
+ // Look for a Phi/Select pair in the same basic block. The Phi feeds the
+ // condition of the Select and at least one of the incoming values is a
+ // constant.
+ for (BasicBlock::iterator BI = BB->begin();
+ PHINode *PN = dyn_cast<PHINode>(BI); ++BI) {
+ unsigned NumPHIValues = PN->getNumIncomingValues();
+ if (NumPHIValues == 0 || !PN->hasOneUse())
+ continue;
+
+ SelectInst *SI = dyn_cast<SelectInst>(PN->user_back());
+ if (!SI || SI->getParent() != BB)
+ continue;
+
+ Value *Cond = SI->getCondition();
+ if (!Cond || Cond != PN || !Cond->getType()->isIntegerTy(1))
+ continue;
+
+ bool HasConst = false;
+ for (unsigned i = 0; i != NumPHIValues; ++i) {
+ if (PN->getIncomingBlock(i) == BB)
+ return false;
+ if (isa<ConstantInt>(PN->getIncomingValue(i)))
+ HasConst = true;
+ }
+
+ if (HasConst) {
+ // Expand the select.
+ TerminatorInst *Term =
+ SplitBlockAndInsertIfThen(SI->getCondition(), SI, false);
+ PHINode *NewPN = PHINode::Create(SI->getType(), 2, "", SI);
+ NewPN->addIncoming(SI->getTrueValue(), Term->getParent());
+ NewPN->addIncoming(SI->getFalseValue(), BB);
+ SI->replaceAllUsesWith(NewPN);
+ SI->eraseFromParent();
+ return true;
+ }
+ }
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
index e01e23f..8923ff7 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -203,9 +203,7 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
CurAST = new AliasSetTracker(*AA);
// Collect Alias info from subloops.
- for (Loop::iterator LoopItr = L->begin(), LoopItrE = L->end();
- LoopItr != LoopItrE; ++LoopItr) {
- Loop *InnerL = *LoopItr;
+ for (Loop *InnerL : L->getSubLoops()) {
AliasSetTracker *InnerAST = LoopToAliasSetMap[InnerL];
assert(InnerAST && "Where is my AST?");
@@ -227,9 +225,7 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
// Because subloops have already been incorporated into AST, we skip blocks in
// subloops.
//
- for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
- I != E; ++I) {
- BasicBlock *BB = *I;
+ for (BasicBlock *BB : L->blocks()) {
if (LI->getLoopFor(BB) == L) // Ignore blocks in subloops.
CurAST->add(*BB); // Incorporate the specified basic block
}
@@ -263,9 +259,8 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
PredIteratorCache PIC;
// Loop over all of the alias sets in the tracker object.
- for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end();
- I != E; ++I)
- Changed |= promoteLoopAccessesToScalars(*I, ExitBlocks, InsertPts,
+ for (AliasSet &AS : *CurAST)
+ Changed |= promoteLoopAccessesToScalars(AS, ExitBlocks, InsertPts,
PIC, LI, DT, CurLoop,
CurAST, &SafetyInfo);
@@ -324,9 +319,9 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
// We are processing blocks in reverse dfo, so process children first.
const std::vector<DomTreeNode*> &Children = N->getChildren();
- for (unsigned i = 0, e = Children.size(); i != e; ++i)
- Changed |=
- sinkRegion(Children[i], AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo);
+ for (DomTreeNode *Child : Children)
+ Changed |= sinkRegion(Child, AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo);
+
// Only need to process the contents of this block if it is not part of a
// subloop (which would already have been processed).
if (inSubLoop(BB,CurLoop,LI)) return Changed;
@@ -407,9 +402,8 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
}
const std::vector<DomTreeNode*> &Children = N->getChildren();
- for (unsigned i = 0, e = Children.size(); i != e; ++i)
- Changed |=
- hoistRegion(Children[i], AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo);
+ for (DomTreeNode *Child : Children)
+ Changed |= hoistRegion(Child, AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo);
return Changed;
}
@@ -499,9 +493,7 @@ bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, DominatorTree *DT,
// If this call only reads from memory and there are no writes to memory
// in the loop, we can hoist or sink the call as appropriate.
bool FoundMod = false;
- for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end();
- I != E; ++I) {
- AliasSet &AS = *I;
+ for (AliasSet &AS : *CurAST) {
if (!AS.isForwardingAliasSet() && AS.isMod()) {
FoundMod = true;
break;
@@ -783,8 +775,8 @@ static bool isGuaranteedToExecute(const Instruction &Inst,
CurLoop->getExitBlocks(ExitBlocks);
// Verify that the block dominates each of the exit blocks of the loop.
- for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
- if (!DT->dominates(Inst.getParent(), ExitBlocks[i]))
+ for (BasicBlock *ExitBlock : ExitBlocks)
+ if (!DT->dominates(Inst.getParent(), ExitBlock))
return false;
// As a degenerate case, if the loop is statically infinite then we haven't
@@ -951,17 +943,17 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS,
// If there is an non-load/store instruction in the loop, we can't promote
// it.
- if (const LoadInst *load = dyn_cast<LoadInst>(UI)) {
- assert(!load->isVolatile() && "AST broken");
- if (!load->isSimple())
+ if (const LoadInst *Load = dyn_cast<LoadInst>(UI)) {
+ assert(!Load->isVolatile() && "AST broken");
+ if (!Load->isSimple())
return Changed;
- } else if (const StoreInst *store = dyn_cast<StoreInst>(UI)) {
+ } else if (const StoreInst *Store = dyn_cast<StoreInst>(UI)) {
// Stores *of* the pointer are not interesting, only stores *to* the
// pointer.
if (UI->getOperand(1) != ASIV)
continue;
- assert(!store->isVolatile() && "AST broken");
- if (!store->isSimple())
+ assert(!Store->isVolatile() && "AST broken");
+ if (!Store->isSimple())
return Changed;
// Don't sink stores from loops without dedicated block exits. Exits
// containing indirect branches are not transformed by loop simplify,
@@ -979,7 +971,7 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS,
// restrictive (and performant) alignment and if we are sure this
// instruction will be executed, update the alignment.
// Larger is better, with the exception of 0 being the best alignment.
- unsigned InstAlignment = store->getAlignment();
+ unsigned InstAlignment = Store->getAlignment();
if ((InstAlignment > Alignment || InstAlignment == 0) && Alignment != 0)
if (isGuaranteedToExecute(*UI, DT, CurLoop, SafetyInfo)) {
GuaranteedToExecute = true;
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
index bc00ff3..7b1940b 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -245,7 +245,7 @@ bool LoopDeletion::runOnLoop(Loop *L, LPPassManager &) {
loopInfo.removeBlock(BB);
// The last step is to update LoopInfo now that we've eliminated this loop.
- loopInfo.updateUnloop(L);
+ loopInfo.markAsRemoved(L);
Changed = true;
++NumDeleted;
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 56ae5c0..ecef6db 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -39,16 +39,16 @@ using namespace llvm;
#define DEBUG_TYPE "loop-unroll"
static cl::opt<unsigned>
- UnrollThreshold("unroll-threshold", cl::init(150), cl::Hidden,
+ UnrollThreshold("unroll-threshold", cl::Hidden,
cl::desc("The baseline cost threshold for loop unrolling"));
static cl::opt<unsigned> UnrollPercentDynamicCostSavedThreshold(
- "unroll-percent-dynamic-cost-saved-threshold", cl::init(20), cl::Hidden,
+ "unroll-percent-dynamic-cost-saved-threshold", cl::Hidden,
cl::desc("The percentage of estimated dynamic cost which must be saved by "
"unrolling to allow unrolling up to the max threshold."));
static cl::opt<unsigned> UnrollDynamicCostSavingsDiscount(
- "unroll-dynamic-cost-savings-discount", cl::init(2000), cl::Hidden,
+ "unroll-dynamic-cost-savings-discount", cl::Hidden,
cl::desc("This is the amount discounted from the total unroll cost when "
"the unrolled form has a high dynamic cost savings (triggered by "
"the '-unroll-perecent-dynamic-cost-saved-threshold' flag)."));
@@ -59,17 +59,17 @@ static cl::opt<unsigned> UnrollMaxIterationsCountToAnalyze(
"iterations when checking full unroll profitability"));
static cl::opt<unsigned>
-UnrollCount("unroll-count", cl::init(0), cl::Hidden,
+UnrollCount("unroll-count", cl::Hidden,
cl::desc("Use this unroll count for all loops including those with "
"unroll_count pragma values, for testing purposes"));
static cl::opt<bool>
-UnrollAllowPartial("unroll-allow-partial", cl::init(false), cl::Hidden,
+UnrollAllowPartial("unroll-allow-partial", cl::Hidden,
cl::desc("Allows loops to be partially unrolled until "
"-unroll-threshold loop size is reached."));
static cl::opt<bool>
-UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::init(false), cl::Hidden,
+UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::Hidden,
cl::desc("Unroll loops with run-time trip counts"));
static cl::opt<unsigned>
@@ -77,182 +77,95 @@ PragmaUnrollThreshold("pragma-unroll-threshold", cl::init(16 * 1024), cl::Hidden
cl::desc("Unrolled size limit for loops with an unroll(full) or "
"unroll_count pragma."));
-namespace {
- class LoopUnroll : public LoopPass {
- public:
- static char ID; // Pass ID, replacement for typeid
- LoopUnroll(int T = -1, int C = -1, int P = -1, int R = -1) : LoopPass(ID) {
- CurrentThreshold = (T == -1) ? UnrollThreshold : unsigned(T);
- CurrentPercentDynamicCostSavedThreshold =
- UnrollPercentDynamicCostSavedThreshold;
- CurrentDynamicCostSavingsDiscount = UnrollDynamicCostSavingsDiscount;
- CurrentCount = (C == -1) ? UnrollCount : unsigned(C);
- CurrentAllowPartial = (P == -1) ? UnrollAllowPartial : (bool)P;
- CurrentRuntime = (R == -1) ? UnrollRuntime : (bool)R;
-
- UserThreshold = (T != -1) || (UnrollThreshold.getNumOccurrences() > 0);
- UserPercentDynamicCostSavedThreshold =
- (UnrollPercentDynamicCostSavedThreshold.getNumOccurrences() > 0);
- UserDynamicCostSavingsDiscount =
- (UnrollDynamicCostSavingsDiscount.getNumOccurrences() > 0);
- UserAllowPartial = (P != -1) ||
- (UnrollAllowPartial.getNumOccurrences() > 0);
- UserRuntime = (R != -1) || (UnrollRuntime.getNumOccurrences() > 0);
- UserCount = (C != -1) || (UnrollCount.getNumOccurrences() > 0);
-
- initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
- }
- /// A magic value for use with the Threshold parameter to indicate
- /// that the loop unroll should be performed regardless of how much
- /// code expansion would result.
- static const unsigned NoThreshold = UINT_MAX;
-
- // Threshold to use when optsize is specified (and there is no
- // explicit -unroll-threshold).
- static const unsigned OptSizeUnrollThreshold = 50;
-
- // Default unroll count for loops with run-time trip count if
- // -unroll-count is not set
- static const unsigned UnrollRuntimeCount = 8;
-
- unsigned CurrentCount;
- unsigned CurrentThreshold;
- unsigned CurrentPercentDynamicCostSavedThreshold;
- unsigned CurrentDynamicCostSavingsDiscount;
- bool CurrentAllowPartial;
- bool CurrentRuntime;
-
- // Flags for whether the 'current' settings are user-specified.
- bool UserCount;
- bool UserThreshold;
- bool UserPercentDynamicCostSavedThreshold;
- bool UserDynamicCostSavingsDiscount;
- bool UserAllowPartial;
- bool UserRuntime;
-
- bool runOnLoop(Loop *L, LPPassManager &) override;
-
- /// This transformation requires natural loop information & requires that
- /// loop preheaders be inserted into the CFG...
- ///
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addPreserved<LoopInfoWrapperPass>();
- AU.addRequiredID(LoopSimplifyID);
- AU.addPreservedID(LoopSimplifyID);
- AU.addRequiredID(LCSSAID);
- AU.addPreservedID(LCSSAID);
- AU.addRequired<ScalarEvolutionWrapperPass>();
- AU.addPreserved<ScalarEvolutionWrapperPass>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
- // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info.
- // If loop unroll does not preserve dom info then LCSSA pass on next
- // loop will receive invalid dom info.
- // For now, recreate dom info, if loop is unrolled.
- AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
- }
+/// A magic value for use with the Threshold parameter to indicate
+/// that the loop unroll should be performed regardless of how much
+/// code expansion would result.
+static const unsigned NoThreshold = UINT_MAX;
- // Fill in the UnrollingPreferences parameter with values from the
- // TargetTransformationInfo.
- void getUnrollingPreferences(Loop *L, const TargetTransformInfo &TTI,
- TargetTransformInfo::UnrollingPreferences &UP) {
- UP.Threshold = CurrentThreshold;
- UP.PercentDynamicCostSavedThreshold =
- CurrentPercentDynamicCostSavedThreshold;
- UP.DynamicCostSavingsDiscount = CurrentDynamicCostSavingsDiscount;
- UP.OptSizeThreshold = OptSizeUnrollThreshold;
- UP.PartialThreshold = CurrentThreshold;
- UP.PartialOptSizeThreshold = OptSizeUnrollThreshold;
- UP.Count = CurrentCount;
- UP.MaxCount = UINT_MAX;
- UP.Partial = CurrentAllowPartial;
- UP.Runtime = CurrentRuntime;
- UP.AllowExpensiveTripCount = false;
- TTI.getUnrollingPreferences(L, UP);
- }
+/// Default unroll count for loops with run-time trip count if
+/// -unroll-count is not set
+static const unsigned DefaultUnrollRuntimeCount = 8;
- // Select and return an unroll count based on parameters from
- // user, unroll preferences, unroll pragmas, or a heuristic.
- // SetExplicitly is set to true if the unroll count is is set by
- // the user or a pragma rather than selected heuristically.
- unsigned
- selectUnrollCount(const Loop *L, unsigned TripCount, bool PragmaFullUnroll,
- unsigned PragmaCount,
- const TargetTransformInfo::UnrollingPreferences &UP,
- bool &SetExplicitly);
-
- // Select threshold values used to limit unrolling based on a
- // total unrolled size. Parameters Threshold and PartialThreshold
- // are set to the maximum unrolled size for fully and partially
- // unrolled loops respectively.
- void selectThresholds(const Loop *L, bool UsePragmaThreshold,
- const TargetTransformInfo::UnrollingPreferences &UP,
- unsigned &Threshold, unsigned &PartialThreshold,
- unsigned &PercentDynamicCostSavedThreshold,
- unsigned &DynamicCostSavingsDiscount) {
- // Determine the current unrolling threshold. While this is
- // normally set from UnrollThreshold, it is overridden to a
- // smaller value if the current function is marked as
- // optimize-for-size, and the unroll threshold was not user
- // specified.
- Threshold = UserThreshold ? CurrentThreshold : UP.Threshold;
- PartialThreshold = UserThreshold ? CurrentThreshold : UP.PartialThreshold;
- PercentDynamicCostSavedThreshold =
- UserPercentDynamicCostSavedThreshold
- ? CurrentPercentDynamicCostSavedThreshold
- : UP.PercentDynamicCostSavedThreshold;
- DynamicCostSavingsDiscount = UserDynamicCostSavingsDiscount
- ? CurrentDynamicCostSavingsDiscount
- : UP.DynamicCostSavingsDiscount;
-
- if (!UserThreshold &&
- // FIXME: Use Function::optForSize().
- L->getHeader()->getParent()->hasFnAttribute(
- Attribute::OptimizeForSize)) {
- Threshold = UP.OptSizeThreshold;
- PartialThreshold = UP.PartialOptSizeThreshold;
- }
- if (UsePragmaThreshold) {
- // If the loop has an unrolling pragma, we want to be more
- // aggressive with unrolling limits. Set thresholds to at
- // least the PragmaTheshold value which is larger than the
- // default limits.
- if (Threshold != NoThreshold)
- Threshold = std::max<unsigned>(Threshold, PragmaUnrollThreshold);
- if (PartialThreshold != NoThreshold)
- PartialThreshold =
- std::max<unsigned>(PartialThreshold, PragmaUnrollThreshold);
- }
- }
- bool canUnrollCompletely(Loop *L, unsigned Threshold,
- unsigned PercentDynamicCostSavedThreshold,
- unsigned DynamicCostSavingsDiscount,
- uint64_t UnrolledCost, uint64_t RolledDynamicCost);
- };
-}
+/// Gather the various unrolling parameters based on the defaults, compiler
+/// flags, TTI overrides, pragmas, and user specified parameters.
+static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
+ Loop *L, const TargetTransformInfo &TTI, Optional<unsigned> UserThreshold,
+ Optional<unsigned> UserCount, Optional<bool> UserAllowPartial,
+ Optional<bool> UserRuntime, unsigned PragmaCount, bool PragmaFullUnroll,
+ bool PragmaEnableUnroll, unsigned TripCount) {
+ TargetTransformInfo::UnrollingPreferences UP;
-char LoopUnroll::ID = 0;
-INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
-INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
-INITIALIZE_PASS_DEPENDENCY(LCSSA)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
-INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
+ // Set up the defaults
+ UP.Threshold = 150;
+ UP.PercentDynamicCostSavedThreshold = 20;
+ UP.DynamicCostSavingsDiscount = 2000;
+ UP.OptSizeThreshold = 50;
+ UP.PartialThreshold = UP.Threshold;
+ UP.PartialOptSizeThreshold = UP.OptSizeThreshold;
+ UP.Count = 0;
+ UP.MaxCount = UINT_MAX;
+ UP.Partial = false;
+ UP.Runtime = false;
+ UP.AllowExpensiveTripCount = false;
+
+ // Override with any target specific settings
+ TTI.getUnrollingPreferences(L, UP);
+
+ // Apply size attributes
+ if (L->getHeader()->getParent()->optForSize()) {
+ UP.Threshold = UP.OptSizeThreshold;
+ UP.PartialThreshold = UP.PartialOptSizeThreshold;
+ }
-Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial,
- int Runtime) {
- return new LoopUnroll(Threshold, Count, AllowPartial, Runtime);
-}
+ // Apply unroll count pragmas
+ if (PragmaCount)
+ UP.Count = PragmaCount;
+ else if (PragmaFullUnroll)
+ UP.Count = TripCount;
-Pass *llvm::createSimpleLoopUnrollPass() {
- return llvm::createLoopUnrollPass(-1, -1, 0, 0);
+ // Apply any user values specified by cl::opt
+ if (UnrollThreshold.getNumOccurrences() > 0) {
+ UP.Threshold = UnrollThreshold;
+ UP.PartialThreshold = UnrollThreshold;
+ }
+ if (UnrollPercentDynamicCostSavedThreshold.getNumOccurrences() > 0)
+ UP.PercentDynamicCostSavedThreshold =
+ UnrollPercentDynamicCostSavedThreshold;
+ if (UnrollDynamicCostSavingsDiscount.getNumOccurrences() > 0)
+ UP.DynamicCostSavingsDiscount = UnrollDynamicCostSavingsDiscount;
+ if (UnrollCount.getNumOccurrences() > 0)
+ UP.Count = UnrollCount;
+ if (UnrollAllowPartial.getNumOccurrences() > 0)
+ UP.Partial = UnrollAllowPartial;
+ if (UnrollRuntime.getNumOccurrences() > 0)
+ UP.Runtime = UnrollRuntime;
+
+ // Apply user values provided by argument
+ if (UserThreshold.hasValue()) {
+ UP.Threshold = *UserThreshold;
+ UP.PartialThreshold = *UserThreshold;
+ }
+ if (UserCount.hasValue())
+ UP.Count = *UserCount;
+ if (UserAllowPartial.hasValue())
+ UP.Partial = *UserAllowPartial;
+ if (UserRuntime.hasValue())
+ UP.Runtime = *UserRuntime;
+
+ if (PragmaCount > 0 ||
+ ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount != 0)) {
+ // If the loop has an unrolling pragma, we want to be more aggressive with
+ // unrolling limits. Set thresholds to at least the PragmaTheshold value
+ // which is larger than the default limits.
+ if (UP.Threshold != NoThreshold)
+ UP.Threshold = std::max<unsigned>(UP.Threshold, PragmaUnrollThreshold);
+ if (UP.PartialThreshold != NoThreshold)
+ UP.PartialThreshold =
+ std::max<unsigned>(UP.PartialThreshold, PragmaUnrollThreshold);
+ }
+
+ return UP;
}
namespace {
@@ -793,12 +706,11 @@ static void SetLoopAlreadyUnrolled(Loop *L) {
L->setLoopID(NewLoopID);
}
-bool LoopUnroll::canUnrollCompletely(Loop *L, unsigned Threshold,
- unsigned PercentDynamicCostSavedThreshold,
- unsigned DynamicCostSavingsDiscount,
- uint64_t UnrolledCost,
- uint64_t RolledDynamicCost) {
-
+static bool canUnrollCompletely(Loop *L, unsigned Threshold,
+ unsigned PercentDynamicCostSavedThreshold,
+ unsigned DynamicCostSavingsDiscount,
+ uint64_t UnrolledCost,
+ uint64_t RolledDynamicCost) {
if (Threshold == NoThreshold) {
DEBUG(dbgs() << " Can fully unroll, because no threshold is set.\n");
return true;
@@ -846,60 +758,13 @@ bool LoopUnroll::canUnrollCompletely(Loop *L, unsigned Threshold,
return false;
}
-unsigned LoopUnroll::selectUnrollCount(
- const Loop *L, unsigned TripCount, bool PragmaFullUnroll,
- unsigned PragmaCount, const TargetTransformInfo::UnrollingPreferences &UP,
- bool &SetExplicitly) {
- SetExplicitly = true;
-
- // User-specified count (either as a command-line option or
- // constructor parameter) has highest precedence.
- unsigned Count = UserCount ? CurrentCount : 0;
-
- // If there is no user-specified count, unroll pragmas have the next
- // highest precedence.
- if (Count == 0) {
- if (PragmaCount) {
- Count = PragmaCount;
- } else if (PragmaFullUnroll) {
- Count = TripCount;
- }
- }
-
- if (Count == 0)
- Count = UP.Count;
-
- if (Count == 0) {
- SetExplicitly = false;
- if (TripCount == 0)
- // Runtime trip count.
- Count = UnrollRuntimeCount;
- else
- // Conservative heuristic: if we know the trip count, see if we can
- // completely unroll (subject to the threshold, checked below); otherwise
- // try to find greatest modulo of the trip count which is still under
- // threshold value.
- Count = TripCount;
- }
- if (TripCount && Count > TripCount)
- return TripCount;
- return Count;
-}
-
-bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) {
- if (skipOptnoneFunction(L))
- return false;
-
- Function &F = *L->getHeader()->getParent();
-
- auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
- const TargetTransformInfo &TTI =
- getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
-
+static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
+ ScalarEvolution *SE, const TargetTransformInfo &TTI,
+ AssumptionCache &AC, bool PreserveLCSSA,
+ Optional<unsigned> ProvidedCount,
+ Optional<unsigned> ProvidedThreshold,
+ Optional<bool> ProvidedAllowPartial,
+ Optional<bool> ProvidedRuntime) {
BasicBlock *Header = L->getHeader();
DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName()
<< "] Loop %" << Header->getName() << "\n");
@@ -912,9 +777,6 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) {
unsigned PragmaCount = UnrollCountPragmaValue(L);
bool HasPragma = PragmaFullUnroll || PragmaEnableUnroll || PragmaCount > 0;
- TargetTransformInfo::UnrollingPreferences UP;
- getUnrollingPreferences(L, TTI, UP);
-
// Find trip count and trip multiple if count is not available
unsigned TripCount = 0;
unsigned TripMultiple = 1;
@@ -929,11 +791,18 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) {
TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock);
}
- // Select an initial unroll count. This may be reduced later based
- // on size thresholds.
- bool CountSetExplicitly;
- unsigned Count = selectUnrollCount(L, TripCount, PragmaFullUnroll,
- PragmaCount, UP, CountSetExplicitly);
+ TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
+ L, TTI, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial,
+ ProvidedRuntime, PragmaCount, PragmaFullUnroll, PragmaEnableUnroll,
+ TripCount);
+
+ unsigned Count = UP.Count;
+ bool CountSetExplicitly = Count != 0;
+ // Use a heuristic count if we didn't set anything explicitly.
+ if (!CountSetExplicitly)
+ Count = TripCount == 0 ? DefaultUnrollRuntimeCount : TripCount;
+ if (TripCount && Count > TripCount)
+ Count = TripCount;
unsigned NumInlineCandidates;
bool notDuplicatable;
@@ -955,21 +824,6 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) {
return false;
}
- unsigned Threshold, PartialThreshold;
- unsigned PercentDynamicCostSavedThreshold;
- unsigned DynamicCostSavingsDiscount;
- // Only use the high pragma threshold when we have a target unroll factor such
- // as with "#pragma unroll N" or a pragma indicating full unrolling and the
- // trip count is known. Otherwise we rely on the standard threshold to
- // heuristically select a reasonable unroll count.
- bool UsePragmaThreshold =
- PragmaCount > 0 ||
- ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount != 0);
-
- selectThresholds(L, UsePragmaThreshold, UP, Threshold, PartialThreshold,
- PercentDynamicCostSavedThreshold,
- DynamicCostSavingsDiscount);
-
// Given Count, TripCount and thresholds determine the type of
// unrolling which is to be performed.
enum { Full = 0, Partial = 1, Runtime = 2 };
@@ -977,19 +831,20 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) {
if (TripCount && Count == TripCount) {
Unrolling = Partial;
// If the loop is really small, we don't need to run an expensive analysis.
- if (canUnrollCompletely(L, Threshold, 100, DynamicCostSavingsDiscount,
+ if (canUnrollCompletely(L, UP.Threshold, 100, UP.DynamicCostSavingsDiscount,
UnrolledSize, UnrolledSize)) {
Unrolling = Full;
} else {
// The loop isn't that small, but we still can fully unroll it if that
// helps to remove a significant number of instructions.
// To check that, run additional analysis on the loop.
- if (Optional<EstimatedUnrollCost> Cost =
- analyzeLoopUnrollCost(L, TripCount, DT, *SE, TTI,
- Threshold + DynamicCostSavingsDiscount))
- if (canUnrollCompletely(L, Threshold, PercentDynamicCostSavedThreshold,
- DynamicCostSavingsDiscount, Cost->UnrolledCost,
- Cost->RolledDynamicCost)) {
+ if (Optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost(
+ L, TripCount, DT, *SE, TTI,
+ UP.Threshold + UP.DynamicCostSavingsDiscount))
+ if (canUnrollCompletely(L, UP.Threshold,
+ UP.PercentDynamicCostSavedThreshold,
+ UP.DynamicCostSavingsDiscount,
+ Cost->UnrolledCost, Cost->RolledDynamicCost)) {
Unrolling = Full;
}
}
@@ -1001,23 +856,22 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) {
// Reduce count based on the type of unrolling and the threshold values.
unsigned OriginalCount = Count;
- bool AllowRuntime = PragmaEnableUnroll || (PragmaCount > 0) ||
- (UserRuntime ? CurrentRuntime : UP.Runtime);
+ bool AllowRuntime = PragmaEnableUnroll || (PragmaCount > 0) || UP.Runtime;
// Don't unroll a runtime trip count loop with unroll full pragma.
if (HasRuntimeUnrollDisablePragma(L) || PragmaFullUnroll) {
AllowRuntime = false;
}
if (Unrolling == Partial) {
- bool AllowPartial = PragmaEnableUnroll ||
- (UserAllowPartial ? CurrentAllowPartial : UP.Partial);
+ bool AllowPartial = PragmaEnableUnroll || UP.Partial;
if (!AllowPartial && !CountSetExplicitly) {
DEBUG(dbgs() << " will not try to unroll partially because "
<< "-unroll-allow-partial not given\n");
return false;
}
- if (PartialThreshold != NoThreshold && UnrolledSize > PartialThreshold) {
+ if (UP.PartialThreshold != NoThreshold &&
+ UnrolledSize > UP.PartialThreshold) {
// Reduce unroll count to be modulo of TripCount for partial unrolling.
- Count = (std::max(PartialThreshold, 3u)-2) / (LoopSize-2);
+ Count = (std::max(UP.PartialThreshold, 3u) - 2) / (LoopSize - 2);
while (Count != 0 && TripCount % Count != 0)
Count--;
}
@@ -1029,7 +883,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) {
}
// Reduce unroll count to be the largest power-of-two factor of
// the original count which satisfies the threshold limit.
- while (Count != 0 && UnrolledSize > PartialThreshold) {
+ while (Count != 0 && UnrolledSize > UP.PartialThreshold) {
Count >>= 1;
UnrolledSize = (LoopSize-2) * Count + 2;
}
@@ -1086,3 +940,91 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) {
return true;
}
+
+namespace {
+class LoopUnroll : public LoopPass {
+public:
+ static char ID; // Pass ID, replacement for typeid
+ LoopUnroll(Optional<unsigned> Threshold = None,
+ Optional<unsigned> Count = None,
+ Optional<bool> AllowPartial = None, Optional<bool> Runtime = None)
+ : LoopPass(ID), ProvidedCount(Count), ProvidedThreshold(Threshold),
+ ProvidedAllowPartial(AllowPartial), ProvidedRuntime(Runtime) {
+ initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
+ }
+
+ Optional<unsigned> ProvidedCount;
+ Optional<unsigned> ProvidedThreshold;
+ Optional<bool> ProvidedAllowPartial;
+ Optional<bool> ProvidedRuntime;
+
+ bool runOnLoop(Loop *L, LPPassManager &) override {
+ if (skipOptnoneFunction(L))
+ return false;
+
+ Function &F = *L->getHeader()->getParent();
+
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ const TargetTransformInfo &TTI =
+ getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
+
+ return tryToUnrollLoop(L, DT, LI, SE, TTI, AC, PreserveLCSSA, ProvidedCount,
+ ProvidedThreshold, ProvidedAllowPartial,
+ ProvidedRuntime);
+ }
+
+ /// This transformation requires natural loop information & requires that
+ /// loop preheaders be inserted into the CFG...
+ ///
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addRequiredID(LCSSAID);
+ AU.addPreservedID(LCSSAID);
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info.
+ // If loop unroll does not preserve dom info then LCSSA pass on next
+ // loop will receive invalid dom info.
+ // For now, recreate dom info, if loop is unrolled.
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ }
+};
+}
+
+char LoopUnroll::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
+
+Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial,
+ int Runtime) {
+ // TODO: It would make more sense for this function to take the optionals
+ // directly, but that's dangerous since it would silently break out of tree
+ // callers.
+ return new LoopUnroll(Threshold == -1 ? None : Optional<unsigned>(Threshold),
+ Count == -1 ? None : Optional<unsigned>(Count),
+ AllowPartial == -1 ? None
+ : Optional<bool>(AllowPartial),
+ Runtime == -1 ? None : Optional<bool>(Runtime));
+}
+
+Pass *llvm::createSimpleLoopUnrollPass() {
+ return llvm::createLoopUnrollPass(-1, -1, 0, 0);
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 7354016..6b43b0f 100644
--- a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -529,12 +529,13 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
// We found an instruction that may write to the loaded memory.
// We can try to promote at this position instead of the store
- // position if nothing alias the store memory after this.
+ // position if nothing alias the store memory after this and the store
+ // destination is not in the range.
P = &*I;
for (; I != E; ++I) {
MemoryLocation StoreLoc = MemoryLocation::get(SI);
- if (AA.getModRefInfo(&*I, StoreLoc) != MRI_NoModRef) {
- DEBUG(dbgs() << "Alias " << *I << "\n");
+ if (&*I == SI->getOperand(1) ||
+ AA.getModRefInfo(&*I, StoreLoc) != MRI_NoModRef) {
P = nullptr;
break;
}
@@ -628,13 +629,39 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
// Ensure that the value being stored is something that can be memset'able a
// byte at a time like "0" or "-1" or any width, as well as things like
// 0xA0A0A0A0 and 0.0.
- if (Value *ByteVal = isBytewiseValue(SI->getOperand(0)))
+ auto *V = SI->getOperand(0);
+ if (Value *ByteVal = isBytewiseValue(V)) {
if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(),
ByteVal)) {
BBI = I->getIterator(); // Don't invalidate iterator.
return true;
}
+ // If we have an aggregate, we try to promote it to memset regardless
+ // of opportunity for merging as it can expose optimization opportunities
+ // in subsequent passes.
+ auto *T = V->getType();
+ if (T->isAggregateType()) {
+ uint64_t Size = DL.getTypeStoreSize(T);
+ unsigned Align = SI->getAlignment();
+ if (!Align)
+ Align = DL.getABITypeAlignment(T);
+ IRBuilder<> Builder(SI);
+ auto *M = Builder.CreateMemSet(SI->getPointerOperand(), ByteVal,
+ Size, Align, SI->isVolatile());
+
+ DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n");
+
+ MD->removeInstruction(SI);
+ SI->eraseFromParent();
+ NumMemSetInfer++;
+
+ // Make sure we do not invalidate the iterator.
+ BBI = M->getIterator();
+ return true;
+ }
+ }
+
return false;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp b/contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
index 28c610c..b56b355 100644
--- a/contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
@@ -499,7 +499,7 @@ static bool isGCSafepointPoll(Function &F) {
static bool shouldRewriteFunction(Function &F) {
// TODO: This should check the GCStrategy
if (F.hasGC()) {
- const char *FunctionGCName = F.getGC();
+ const auto &FunctionGCName = F.getGC();
const StringRef StatepointExampleName("statepoint-example");
const StringRef CoreCLRName("coreclr");
return (StatepointExampleName == FunctionGCName) ||
diff --git a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
index 401a740..bcadd4e 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -2155,7 +2155,8 @@ void Reassociate::OptimizeInst(Instruction *I) {
// During the initial run we will get to the root of the tree.
// But if we get here while we are redoing instructions, there is no
// guarantee that the root will be visited. So Redo later
- if (BO->user_back() != BO)
+ if (BO->user_back() != BO &&
+ BO->getParent() == BO->user_back()->getParent())
RedoInsts.insert(BO->user_back());
return;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index 5d253be..d77d574 100644
--- a/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -78,6 +78,13 @@ static cl::opt<bool>
AllowStatepointWithNoDeoptInfo("rs4gc-allow-statepoint-with-no-deopt-info",
cl::Hidden, cl::init(true));
+/// Should we split vectors of pointers into their individual elements? This
+/// is known to be buggy, but the alternate implementation isn't yet ready.
+/// This is purely to provide a debugging and dianostic hook until the vector
+/// split is replaced with vector relocations.
+static cl::opt<bool> UseVectorSplit("rs4gc-split-vector-values", cl::Hidden,
+ cl::init(true));
+
namespace {
struct RewriteStatepointsForGC : public ModulePass {
static char ID; // Pass identification, replacement for typeid
@@ -357,10 +364,6 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I);
/// particular element in 'I'.
static BaseDefiningValueResult
findBaseDefiningValueOfVector(Value *I) {
- assert(I->getType()->isVectorTy() &&
- cast<VectorType>(I->getType())->getElementType()->isPointerTy() &&
- "Illegal to ask for the base pointer of a non-pointer type");
-
// Each case parallels findBaseDefiningValue below, see that code for
// detailed motivation.
@@ -368,26 +371,10 @@ findBaseDefiningValueOfVector(Value *I) {
// An incoming argument to the function is a base pointer
return BaseDefiningValueResult(I, true);
- // We shouldn't see the address of a global as a vector value?
- assert(!isa<GlobalVariable>(I) &&
- "unexpected global variable found in base of vector");
-
- // inlining could possibly introduce phi node that contains
- // undef if callee has multiple returns
- if (isa<UndefValue>(I))
- // utterly meaningless, but useful for dealing with partially optimized
- // code.
+ if (isa<Constant>(I))
+ // Constant vectors consist only of constant pointers.
return BaseDefiningValueResult(I, true);
- // Due to inheritance, this must be _after_ the global variable and undef
- // checks
- if (Constant *Con = dyn_cast<Constant>(I)) {
- assert(!isa<GlobalVariable>(I) && !isa<UndefValue>(I) &&
- "order of checks wrong!");
- assert(Con->isNullValue() && "null is the only case which makes sense");
- return BaseDefiningValueResult(Con, true);
- }
-
if (isa<LoadInst>(I))
return BaseDefiningValueResult(I, true);
@@ -417,11 +404,11 @@ findBaseDefiningValueOfVector(Value *I) {
/// (i.e. a PHI or Select of two derived pointers), or c) involves a change
/// from pointer to vector type or back.
static BaseDefiningValueResult findBaseDefiningValue(Value *I) {
+ assert(I->getType()->isPtrOrPtrVectorTy() &&
+ "Illegal to ask for the base pointer of a non-pointer type");
+
if (I->getType()->isVectorTy())
return findBaseDefiningValueOfVector(I);
-
- assert(I->getType()->isPointerTy() &&
- "Illegal to ask for the base pointer of a non-pointer type");
if (isa<Argument>(I))
// An incoming argument to the function is a base pointer
@@ -1310,18 +1297,29 @@ static void CreateGCRelocates(ArrayRef<Value *> LiveVariables,
assert(Index < LiveVec.size() && "Bug in std::find?");
return Index;
};
-
- // All gc_relocate are set to i8 addrspace(1)* type. We originally generated
- // unique declarations for each pointer type, but this proved problematic
- // because the intrinsic mangling code is incomplete and fragile. Since
- // we're moving towards a single unified pointer type anyways, we can just
- // cast everything to an i8* of the right address space. A bitcast is added
- // later to convert gc_relocate to the actual value's type.
Module *M = StatepointToken->getModule();
- auto AS = cast<PointerType>(LiveVariables[0]->getType())->getAddressSpace();
- Type *Types[] = {Type::getInt8PtrTy(M->getContext(), AS)};
- Value *GCRelocateDecl =
- Intrinsic::getDeclaration(M, Intrinsic::experimental_gc_relocate, Types);
+
+ // All gc_relocate are generated as i8 addrspace(1)* (or a vector type whose
+ // element type is i8 addrspace(1)*). We originally generated unique
+ // declarations for each pointer type, but this proved problematic because
+ // the intrinsic mangling code is incomplete and fragile. Since we're moving
+ // towards a single unified pointer type anyways, we can just cast everything
+ // to an i8* of the right address space. A bitcast is added later to convert
+ // gc_relocate to the actual value's type.
+ auto getGCRelocateDecl = [&] (Type *Ty) {
+ assert(isHandledGCPointerType(Ty));
+ auto AS = Ty->getScalarType()->getPointerAddressSpace();
+ Type *NewTy = Type::getInt8PtrTy(M->getContext(), AS);
+ if (auto *VT = dyn_cast<VectorType>(Ty))
+ NewTy = VectorType::get(NewTy, VT->getNumElements());
+ return Intrinsic::getDeclaration(M, Intrinsic::experimental_gc_relocate,
+ {NewTy});
+ };
+
+ // Lazily populated map from input types to the canonicalized form mentioned
+ // in the comment above. This should probably be cached somewhere more
+ // broadly.
+ DenseMap<Type*, Value*> TypeToDeclMap;
for (unsigned i = 0; i < LiveVariables.size(); i++) {
// Generate the gc.relocate call and save the result
@@ -1329,6 +1327,11 @@ static void CreateGCRelocates(ArrayRef<Value *> LiveVariables,
Builder.getInt32(LiveStart + FindIndex(LiveVariables, BasePtrs[i]));
Value *LiveIdx = Builder.getInt32(LiveStart + i);
+ Type *Ty = LiveVariables[i]->getType();
+ if (!TypeToDeclMap.count(Ty))
+ TypeToDeclMap[Ty] = getGCRelocateDecl(Ty);
+ Value *GCRelocateDecl = TypeToDeclMap[Ty];
+
// only specify a debug name if we can give a useful one
CallInst *Reloc = Builder.CreateCall(
GCRelocateDecl, {StatepointToken, BaseIdx, LiveIdx},
@@ -2380,15 +2383,19 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
// Do a limited scalarization of any live at safepoint vector values which
// contain pointers. This enables this pass to run after vectorization at
- // the cost of some possible performance loss. TODO: it would be nice to
- // natively support vectors all the way through the backend so we don't need
- // to scalarize here.
- for (size_t i = 0; i < Records.size(); i++) {
- PartiallyConstructedSafepointRecord &Info = Records[i];
- Instruction *Statepoint = ToUpdate[i].getInstruction();
- splitVectorValues(cast<Instruction>(Statepoint), Info.LiveSet,
- Info.PointerToBase, DT);
- }
+ // the cost of some possible performance loss. Note: This is known to not
+ // handle updating of the side tables correctly which can lead to relocation
+ // bugs when the same vector is live at multiple statepoints. We're in the
+ // process of implementing the alternate lowering - relocating the
+ // vector-of-pointers as first class item and updating the backend to
+ // understand that - but that's not yet complete.
+ if (UseVectorSplit)
+ for (size_t i = 0; i < Records.size(); i++) {
+ PartiallyConstructedSafepointRecord &Info = Records[i];
+ Instruction *Statepoint = ToUpdate[i].getInstruction();
+ splitVectorValues(cast<Instruction>(Statepoint), Info.LiveSet,
+ Info.PointerToBase, DT);
+ }
// In order to reduce live set of statepoint we might choose to rematerialize
// some values instead of relocating them. This is purely an optimization and
@@ -2467,7 +2474,8 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
#ifndef NDEBUG
// sanity check
for (auto *Ptr : Live)
- assert(isGCPointerType(Ptr->getType()) && "must be a gc pointer type");
+ assert(isHandledGCPointerType(Ptr->getType()) &&
+ "must be a gc pointer type");
#endif
relocationViaAlloca(F, DT, Live, Records);
@@ -2547,7 +2555,7 @@ void RewriteStatepointsForGC::stripNonValidAttributesFromBody(Function &F) {
static bool shouldRewriteStatepointsIn(Function &F) {
// TODO: This should check the GCStrategy
if (F.hasGC()) {
- const char *FunctionGCName = F.getGC();
+ const auto &FunctionGCName = F.getGC();
const StringRef StatepointExampleName("statepoint-example");
const StringRef CoreCLRName("coreclr");
return (StatepointExampleName == FunctionGCName) ||
diff --git a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
index 2fca803..8569e08 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
@@ -757,9 +757,14 @@ void SCCPSolver::visitCastInst(CastInst &I) {
LatticeVal OpSt = getValueState(I.getOperand(0));
if (OpSt.isOverdefined()) // Inherit overdefinedness of operand
markOverdefined(&I);
- else if (OpSt.isConstant()) // Propagate constant value
- markConstant(&I, ConstantExpr::getCast(I.getOpcode(),
- OpSt.getConstant(), I.getType()));
+ else if (OpSt.isConstant()) {
+ Constant *C =
+ ConstantExpr::getCast(I.getOpcode(), OpSt.getConstant(), I.getType());
+ if (isa<UndefValue>(C))
+ return;
+ // Propagate constant value
+ markConstant(&I, C);
+ }
}
@@ -859,10 +864,14 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) {
LatticeVal &IV = ValueState[&I];
if (IV.isOverdefined()) return;
- if (V1State.isConstant() && V2State.isConstant())
- return markConstant(IV, &I,
- ConstantExpr::get(I.getOpcode(), V1State.getConstant(),
- V2State.getConstant()));
+ if (V1State.isConstant() && V2State.isConstant()) {
+ Constant *C = ConstantExpr::get(I.getOpcode(), V1State.getConstant(),
+ V2State.getConstant());
+ // X op Y -> undef.
+ if (isa<UndefValue>(C))
+ return;
+ return markConstant(IV, &I, C);
+ }
// If something is undef, wait for it to resolve.
if (!V1State.isOverdefined() && !V2State.isOverdefined())
@@ -917,10 +926,13 @@ void SCCPSolver::visitCmpInst(CmpInst &I) {
LatticeVal &IV = ValueState[&I];
if (IV.isOverdefined()) return;
- if (V1State.isConstant() && V2State.isConstant())
- return markConstant(IV, &I, ConstantExpr::getCompare(I.getPredicate(),
- V1State.getConstant(),
- V2State.getConstant()));
+ if (V1State.isConstant() && V2State.isConstant()) {
+ Constant *C = ConstantExpr::getCompare(
+ I.getPredicate(), V1State.getConstant(), V2State.getConstant());
+ if (isa<UndefValue>(C))
+ return;
+ return markConstant(IV, &I, C);
+ }
// If operands are still undefined, wait for it to resolve.
if (!V1State.isOverdefined() && !V2State.isOverdefined())
@@ -1020,8 +1032,11 @@ void SCCPSolver::visitGetElementPtrInst(GetElementPtrInst &I) {
Constant *Ptr = Operands[0];
auto Indices = makeArrayRef(Operands.begin() + 1, Operands.end());
- markConstant(&I, ConstantExpr::getGetElementPtr(I.getSourceElementType(), Ptr,
- Indices));
+ Constant *C =
+ ConstantExpr::getGetElementPtr(I.getSourceElementType(), Ptr, Indices);
+ if (isa<UndefValue>(C))
+ return;
+ markConstant(&I, C);
}
void SCCPSolver::visitStoreInst(StoreInst &SI) {
@@ -1061,9 +1076,9 @@ void SCCPSolver::visitLoadInst(LoadInst &I) {
Constant *Ptr = PtrVal.getConstant();
- // load null -> null
+ // load null is undefined.
if (isa<ConstantPointerNull>(Ptr) && I.getPointerAddressSpace() == 0)
- return markConstant(IV, &I, UndefValue::get(I.getType()));
+ return;
// Transform load (constant global) into the value loaded.
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {
@@ -1079,8 +1094,11 @@ void SCCPSolver::visitLoadInst(LoadInst &I) {
}
// Transform load from a constant into a constant if possible.
- if (Constant *C = ConstantFoldLoadFromConstPtr(Ptr, DL))
+ if (Constant *C = ConstantFoldLoadFromConstPtr(Ptr, DL)) {
+ if (isa<UndefValue>(C))
+ return;
return markConstant(IV, &I, C);
+ }
// Otherwise we cannot say for certain what value this load will produce.
// Bail out.
@@ -1122,8 +1140,12 @@ CallOverdefined:
// If we can constant fold this, mark the result of the call as a
// constant.
- if (Constant *C = ConstantFoldCall(F, Operands, TLI))
+ if (Constant *C = ConstantFoldCall(F, Operands, TLI)) {
+ // call -> undef.
+ if (isa<UndefValue>(C))
+ return;
return markConstant(I, C);
+ }
}
// Otherwise, we don't know anything about this call, mark it overdefined.
@@ -1379,6 +1401,11 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
// X % undef -> undef. No change.
if (Op1LV.isUndefined()) break;
+ // X / 0 -> undef. No change.
+ // X % 0 -> undef. No change.
+ if (Op1LV.isConstant() && Op1LV.getConstant()->isZeroValue())
+ break;
+
// undef / X -> 0. X could be maxint.
// undef % X -> 0. X could be 1.
markForcedConstant(&I, Constant::getNullValue(ITy));
diff --git a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 0e0b00d..4e84d72 100644
--- a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -425,9 +425,7 @@ bool TailCallElim::runTRE(Function &F) {
// with themselves. Check to see if we did and clean up our mess if so. This
// occurs when a function passes an argument straight through to its tail
// call.
- for (unsigned i = 0, e = ArgumentPHIs.size(); i != e; ++i) {
- PHINode *PN = ArgumentPHIs[i];
-
+ for (PHINode *PN : ArgumentPHIs) {
// If the PHI Node is a dynamic constant, replace it with the value it is.
if (Value *PNV = SimplifyInstruction(PN, F.getParent()->getDataLayout())) {
PN->replaceAllUsesWith(PNV);
@@ -468,10 +466,7 @@ bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) {
// return value of the call, it must only use things that are defined before
// the call, or movable instructions between the call and the instruction
// itself.
- for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
- if (I->getOperand(i) == CI)
- return false;
- return true;
+ return std::find(I->op_begin(), I->op_end(), CI) == I->op_end();
}
/// Return true if the specified value is the same when the return would exit
OpenPOWER on IntegriCloud