summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Transforms/Scalar
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Transforms/Scalar')
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/ADCE.cpp69
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp17
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/BDCE.cpp350
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp10
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp120
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/DCE.cpp78
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp264
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp269
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp8
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Float2Int.cpp49
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/GVN.cpp319
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp724
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp24
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp263
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LICM.cpp65
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoadCombine.cpp12
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp19
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopDistribute.cpp113
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp1347
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp4
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopInterchange.cpp79
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp566
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp141
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp447
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp828
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp278
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp455
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp6
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp174
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp51
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/NaryReassociate.cpp199
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp104
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp109
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp9
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp1826
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SCCP.cpp166
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SROA.cpp991
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SampleProfile.cpp777
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Scalar.cpp15
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp7
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Scalarizer.cpp32
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp251
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp11
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Sink.cpp25
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp9
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp12
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp42
50 files changed, 6225 insertions, 5515 deletions
diff --git a/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp b/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp
index d6fc916..590a52d 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp
@@ -1,4 +1,4 @@
-//===- DCE.cpp - Code to perform dead code elimination --------------------===//
+//===- ADCE.cpp - Code to perform dead code elimination -------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,52 +14,33 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/ADCE.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Pass.h"
+#include "llvm/Transforms/Scalar.h"
using namespace llvm;
#define DEBUG_TYPE "adce"
STATISTIC(NumRemoved, "Number of instructions removed");
-namespace {
-struct ADCE : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
- ADCE() : FunctionPass(ID) {
- initializeADCEPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function& F) override;
-
- void getAnalysisUsage(AnalysisUsage& AU) const override {
- AU.setPreservesCFG();
- }
-};
-}
-
-char ADCE::ID = 0;
-INITIALIZE_PASS(ADCE, "adce", "Aggressive Dead Code Elimination", false, false)
-
-bool ADCE::runOnFunction(Function& F) {
- if (skipOptnoneFunction(F))
- return false;
-
+static bool aggressiveDCE(Function& F) {
SmallPtrSet<Instruction*, 128> Alive;
SmallVector<Instruction*, 128> Worklist;
// Collect the set of "root" instructions that are known live.
- for (Instruction &I : inst_range(F)) {
- if (isa<TerminatorInst>(I) || isa<DbgInfoIntrinsic>(I) ||
- isa<LandingPadInst>(I) || I.mayHaveSideEffects()) {
+ for (Instruction &I : instructions(F)) {
+ if (isa<TerminatorInst>(I) || isa<DbgInfoIntrinsic>(I) || I.isEHPad() ||
+ I.mayHaveSideEffects()) {
Alive.insert(&I);
Worklist.push_back(&I);
}
@@ -79,7 +60,7 @@ bool ADCE::runOnFunction(Function& F) {
// which have no side effects and do not influence the control flow or return
// value of the function, and may therefore be deleted safely.
// NOTE: We reuse the Worklist vector here for memory efficiency.
- for (Instruction &I : inst_range(F)) {
+ for (Instruction &I : instructions(F)) {
if (!Alive.count(&I)) {
Worklist.push_back(&I);
I.dropAllReferences();
@@ -94,6 +75,34 @@ bool ADCE::runOnFunction(Function& F) {
return !Worklist.empty();
}
-FunctionPass *llvm::createAggressiveDCEPass() {
- return new ADCE();
+PreservedAnalyses ADCEPass::run(Function &F) {
+ if (aggressiveDCE(F))
+ return PreservedAnalyses::none();
+ return PreservedAnalyses::all();
}
+
+namespace {
+struct ADCELegacyPass : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ ADCELegacyPass() : FunctionPass(ID) {
+ initializeADCELegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function& F) override {
+ if (skipOptnoneFunction(F))
+ return false;
+ return aggressiveDCE(F);
+ }
+
+ void getAnalysisUsage(AnalysisUsage& AU) const override {
+ AU.setPreservesCFG();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ }
+};
+}
+
+char ADCELegacyPass::ID = 0;
+INITIALIZE_PASS(ADCELegacyPass, "adce", "Aggressive Dead Code Elimination",
+ false, false)
+
+FunctionPass *llvm::createAggressiveDCEPass() { return new ADCELegacyPass(); }
diff --git a/contrib/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/contrib/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index 8918909..4b721d3 100644
--- a/contrib/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -21,6 +21,8 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
@@ -54,13 +56,15 @@ struct AlignmentFromAssumptions : public FunctionPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<ScalarEvolution>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.setPreservesCFG();
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<ScalarEvolution>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
}
// For memory transfers, we need a common alignment for both the source and
@@ -84,7 +88,7 @@ INITIALIZE_PASS_BEGIN(AlignmentFromAssumptions, AA_NAME,
aip_name, false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_END(AlignmentFromAssumptions, AA_NAME,
aip_name, false, false)
@@ -249,8 +253,7 @@ bool AlignmentFromAssumptions::extractAlignmentInfo(CallInst *I,
// The mask must have some trailing ones (otherwise the condition is
// trivial and tells us nothing about the alignment of the left operand).
- unsigned TrailingOnes =
- MaskSCEV->getValue()->getValue().countTrailingOnes();
+ unsigned TrailingOnes = MaskSCEV->getAPInt().countTrailingOnes();
if (!TrailingOnes)
return false;
@@ -270,7 +273,7 @@ bool AlignmentFromAssumptions::extractAlignmentInfo(CallInst *I,
OffSCEV = nullptr;
if (PtrToIntInst *PToI = dyn_cast<PtrToIntInst>(AndLHS)) {
AAPtr = PToI->getPointerOperand();
- OffSCEV = SE->getConstant(Int64Ty, 0);
+ OffSCEV = SE->getZero(Int64Ty);
} else if (const SCEVAddExpr* AndLHSAddSCEV =
dyn_cast<SCEVAddExpr>(AndLHSSCEV)) {
// Try to find the ptrtoint; subtract it and the rest is the offset.
@@ -410,7 +413,7 @@ bool AlignmentFromAssumptions::processAssumption(CallInst *ACall) {
bool AlignmentFromAssumptions::runOnFunction(Function &F) {
bool Changed = false;
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- SE = &getAnalysis<ScalarEvolution>();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
NewDestAlignments.clear();
diff --git a/contrib/llvm/lib/Transforms/Scalar/BDCE.cpp b/contrib/llvm/lib/Transforms/Scalar/BDCE.cpp
index 09c605e..cb9b8b6 100644
--- a/contrib/llvm/lib/Transforms/Scalar/BDCE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/BDCE.cpp
@@ -15,26 +15,18 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/BasicBlock.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/DemandedBits.h"
#include "llvm/IR/CFG.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Dominators.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-
using namespace llvm;
#define DEBUG_TYPE "bdce"
@@ -53,342 +45,42 @@ struct BDCE : public FunctionPass {
void getAnalysisUsage(AnalysisUsage& AU) const override {
AU.setPreservesCFG();
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<DemandedBits>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
}
-
- void determineLiveOperandBits(const Instruction *UserI,
- const Instruction *I, unsigned OperandNo,
- const APInt &AOut, APInt &AB,
- APInt &KnownZero, APInt &KnownOne,
- APInt &KnownZero2, APInt &KnownOne2);
-
- AssumptionCache *AC;
- DominatorTree *DT;
};
}
char BDCE::ID = 0;
INITIALIZE_PASS_BEGIN(BDCE, "bdce", "Bit-Tracking Dead Code Elimination",
false, false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DemandedBits)
INITIALIZE_PASS_END(BDCE, "bdce", "Bit-Tracking Dead Code Elimination",
false, false)
-static bool isAlwaysLive(Instruction *I) {
- return isa<TerminatorInst>(I) || isa<DbgInfoIntrinsic>(I) ||
- isa<LandingPadInst>(I) || I->mayHaveSideEffects();
-}
-
-void BDCE::determineLiveOperandBits(const Instruction *UserI,
- const Instruction *I, unsigned OperandNo,
- const APInt &AOut, APInt &AB,
- APInt &KnownZero, APInt &KnownOne,
- APInt &KnownZero2, APInt &KnownOne2) {
- unsigned BitWidth = AB.getBitWidth();
-
- // We're called once per operand, but for some instructions, we need to
- // compute known bits of both operands in order to determine the live bits of
- // either (when both operands are instructions themselves). We don't,
- // however, want to do this twice, so we cache the result in APInts that live
- // in the caller. For the two-relevant-operands case, both operand values are
- // provided here.
- auto ComputeKnownBits =
- [&](unsigned BitWidth, const Value *V1, const Value *V2) {
- const DataLayout &DL = I->getModule()->getDataLayout();
- KnownZero = APInt(BitWidth, 0);
- KnownOne = APInt(BitWidth, 0);
- computeKnownBits(const_cast<Value *>(V1), KnownZero, KnownOne, DL, 0,
- AC, UserI, DT);
-
- if (V2) {
- KnownZero2 = APInt(BitWidth, 0);
- KnownOne2 = APInt(BitWidth, 0);
- computeKnownBits(const_cast<Value *>(V2), KnownZero2, KnownOne2, DL,
- 0, AC, UserI, DT);
- }
- };
-
- switch (UserI->getOpcode()) {
- default: break;
- case Instruction::Call:
- case Instruction::Invoke:
- if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(UserI))
- switch (II->getIntrinsicID()) {
- default: break;
- case Intrinsic::bswap:
- // The alive bits of the input are the swapped alive bits of
- // the output.
- AB = AOut.byteSwap();
- break;
- case Intrinsic::ctlz:
- if (OperandNo == 0) {
- // We need some output bits, so we need all bits of the
- // input to the left of, and including, the leftmost bit
- // known to be one.
- ComputeKnownBits(BitWidth, I, nullptr);
- AB = APInt::getHighBitsSet(BitWidth,
- std::min(BitWidth, KnownOne.countLeadingZeros()+1));
- }
- break;
- case Intrinsic::cttz:
- if (OperandNo == 0) {
- // We need some output bits, so we need all bits of the
- // input to the right of, and including, the rightmost bit
- // known to be one.
- ComputeKnownBits(BitWidth, I, nullptr);
- AB = APInt::getLowBitsSet(BitWidth,
- std::min(BitWidth, KnownOne.countTrailingZeros()+1));
- }
- break;
- }
- break;
- case Instruction::Add:
- case Instruction::Sub:
- // Find the highest live output bit. We don't need any more input
- // bits than that (adds, and thus subtracts, ripple only to the
- // left).
- AB = APInt::getLowBitsSet(BitWidth, AOut.getActiveBits());
- break;
- case Instruction::Shl:
- if (OperandNo == 0)
- if (ConstantInt *CI =
- dyn_cast<ConstantInt>(UserI->getOperand(1))) {
- uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1);
- AB = AOut.lshr(ShiftAmt);
-
- // If the shift is nuw/nsw, then the high bits are not dead
- // (because we've promised that they *must* be zero).
- const ShlOperator *S = cast<ShlOperator>(UserI);
- if (S->hasNoSignedWrap())
- AB |= APInt::getHighBitsSet(BitWidth, ShiftAmt+1);
- else if (S->hasNoUnsignedWrap())
- AB |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
- }
- break;
- case Instruction::LShr:
- if (OperandNo == 0)
- if (ConstantInt *CI =
- dyn_cast<ConstantInt>(UserI->getOperand(1))) {
- uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1);
- AB = AOut.shl(ShiftAmt);
-
- // If the shift is exact, then the low bits are not dead
- // (they must be zero).
- if (cast<LShrOperator>(UserI)->isExact())
- AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
- }
- break;
- case Instruction::AShr:
- if (OperandNo == 0)
- if (ConstantInt *CI =
- dyn_cast<ConstantInt>(UserI->getOperand(1))) {
- uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1);
- AB = AOut.shl(ShiftAmt);
- // Because the high input bit is replicated into the
- // high-order bits of the result, if we need any of those
- // bits, then we must keep the highest input bit.
- if ((AOut & APInt::getHighBitsSet(BitWidth, ShiftAmt))
- .getBoolValue())
- AB.setBit(BitWidth-1);
-
- // If the shift is exact, then the low bits are not dead
- // (they must be zero).
- if (cast<AShrOperator>(UserI)->isExact())
- AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
- }
- break;
- case Instruction::And:
- AB = AOut;
-
- // For bits that are known zero, the corresponding bits in the
- // other operand are dead (unless they're both zero, in which
- // case they can't both be dead, so just mark the LHS bits as
- // dead).
- if (OperandNo == 0) {
- ComputeKnownBits(BitWidth, I, UserI->getOperand(1));
- AB &= ~KnownZero2;
- } else {
- if (!isa<Instruction>(UserI->getOperand(0)))
- ComputeKnownBits(BitWidth, UserI->getOperand(0), I);
- AB &= ~(KnownZero & ~KnownZero2);
- }
- break;
- case Instruction::Or:
- AB = AOut;
-
- // For bits that are known one, the corresponding bits in the
- // other operand are dead (unless they're both one, in which
- // case they can't both be dead, so just mark the LHS bits as
- // dead).
- if (OperandNo == 0) {
- ComputeKnownBits(BitWidth, I, UserI->getOperand(1));
- AB &= ~KnownOne2;
- } else {
- if (!isa<Instruction>(UserI->getOperand(0)))
- ComputeKnownBits(BitWidth, UserI->getOperand(0), I);
- AB &= ~(KnownOne & ~KnownOne2);
- }
- break;
- case Instruction::Xor:
- case Instruction::PHI:
- AB = AOut;
- break;
- case Instruction::Trunc:
- AB = AOut.zext(BitWidth);
- break;
- case Instruction::ZExt:
- AB = AOut.trunc(BitWidth);
- break;
- case Instruction::SExt:
- AB = AOut.trunc(BitWidth);
- // Because the high input bit is replicated into the
- // high-order bits of the result, if we need any of those
- // bits, then we must keep the highest input bit.
- if ((AOut & APInt::getHighBitsSet(AOut.getBitWidth(),
- AOut.getBitWidth() - BitWidth))
- .getBoolValue())
- AB.setBit(BitWidth-1);
- break;
- case Instruction::Select:
- if (OperandNo != 0)
- AB = AOut;
- break;
- }
-}
-
bool BDCE::runOnFunction(Function& F) {
if (skipOptnoneFunction(F))
return false;
+ DemandedBits &DB = getAnalysis<DemandedBits>();
- AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-
- DenseMap<Instruction *, APInt> AliveBits;
SmallVector<Instruction*, 128> Worklist;
-
- // The set of visited instructions (non-integer-typed only).
- SmallPtrSet<Instruction*, 128> Visited;
-
- // Collect the set of "root" instructions that are known live.
- for (Instruction &I : inst_range(F)) {
- if (!isAlwaysLive(&I))
- continue;
-
- DEBUG(dbgs() << "BDCE: Root: " << I << "\n");
- // For integer-valued instructions, set up an initial empty set of alive
- // bits and add the instruction to the work list. For other instructions
- // add their operands to the work list (for integer values operands, mark
- // all bits as live).
- if (IntegerType *IT = dyn_cast<IntegerType>(I.getType())) {
- if (!AliveBits.count(&I)) {
- AliveBits[&I] = APInt(IT->getBitWidth(), 0);
- Worklist.push_back(&I);
- }
-
- continue;
- }
-
- // Non-integer-typed instructions...
- for (Use &OI : I.operands()) {
- if (Instruction *J = dyn_cast<Instruction>(OI)) {
- if (IntegerType *IT = dyn_cast<IntegerType>(J->getType()))
- AliveBits[J] = APInt::getAllOnesValue(IT->getBitWidth());
- Worklist.push_back(J);
- }
- }
- // To save memory, we don't add I to the Visited set here. Instead, we
- // check isAlwaysLive on every instruction when searching for dead
- // instructions later (we need to check isAlwaysLive for the
- // integer-typed instructions anyway).
- }
-
- // Propagate liveness backwards to operands.
- while (!Worklist.empty()) {
- Instruction *UserI = Worklist.pop_back_val();
-
- DEBUG(dbgs() << "BDCE: Visiting: " << *UserI);
- APInt AOut;
- if (UserI->getType()->isIntegerTy()) {
- AOut = AliveBits[UserI];
- DEBUG(dbgs() << " Alive Out: " << AOut);
- }
- DEBUG(dbgs() << "\n");
-
- if (!UserI->getType()->isIntegerTy())
- Visited.insert(UserI);
-
- APInt KnownZero, KnownOne, KnownZero2, KnownOne2;
- // Compute the set of alive bits for each operand. These are anded into the
- // existing set, if any, and if that changes the set of alive bits, the
- // operand is added to the work-list.
- for (Use &OI : UserI->operands()) {
- if (Instruction *I = dyn_cast<Instruction>(OI)) {
- if (IntegerType *IT = dyn_cast<IntegerType>(I->getType())) {
- unsigned BitWidth = IT->getBitWidth();
- APInt AB = APInt::getAllOnesValue(BitWidth);
- if (UserI->getType()->isIntegerTy() && !AOut &&
- !isAlwaysLive(UserI)) {
- AB = APInt(BitWidth, 0);
- } else {
- // If all bits of the output are dead, then all bits of the input
- // Bits of each operand that are used to compute alive bits of the
- // output are alive, all others are dead.
- determineLiveOperandBits(UserI, I, OI.getOperandNo(), AOut, AB,
- KnownZero, KnownOne,
- KnownZero2, KnownOne2);
- }
-
- // If we've added to the set of alive bits (or the operand has not
- // been previously visited), then re-queue the operand to be visited
- // again.
- APInt ABPrev(BitWidth, 0);
- auto ABI = AliveBits.find(I);
- if (ABI != AliveBits.end())
- ABPrev = ABI->second;
-
- APInt ABNew = AB | ABPrev;
- if (ABNew != ABPrev || ABI == AliveBits.end()) {
- AliveBits[I] = std::move(ABNew);
- Worklist.push_back(I);
- }
- } else if (!Visited.count(I)) {
- Worklist.push_back(I);
- }
- }
- }
- }
-
bool Changed = false;
- // The inverse of the live set is the dead set. These are those instructions
- // which have no side effects and do not influence the control flow or return
- // value of the function, and may therefore be deleted safely.
- // NOTE: We reuse the Worklist vector here for memory efficiency.
- for (Instruction &I : inst_range(F)) {
- // For live instructions that have all dead bits, first make them dead by
- // replacing all uses with something else. Then, if they don't need to
- // remain live (because they have side effects, etc.) we can remove them.
- if (I.getType()->isIntegerTy()) {
- auto ABI = AliveBits.find(&I);
- if (ABI != AliveBits.end()) {
- if (ABI->second.getBoolValue())
- continue;
-
- DEBUG(dbgs() << "BDCE: Trivializing: " << I << " (all bits dead)\n");
- // FIXME: In theory we could substitute undef here instead of zero.
- // This should be reconsidered once we settle on the semantics of
- // undef, poison, etc.
- Value *Zero = ConstantInt::get(I.getType(), 0);
- ++NumSimplified;
- I.replaceAllUsesWith(Zero);
- Changed = true;
- }
- } else if (Visited.count(&I)) {
- continue;
+ for (Instruction &I : instructions(F)) {
+ if (I.getType()->isIntegerTy() &&
+ !DB.getDemandedBits(&I).getBoolValue()) {
+ // For live instructions that have all dead bits, first make them dead by
+ // replacing all uses with something else. Then, if they don't need to
+ // remain live (because they have side effects, etc.) we can remove them.
+ DEBUG(dbgs() << "BDCE: Trivializing: " << I << " (all bits dead)\n");
+ // FIXME: In theory we could substitute undef here instead of zero.
+ // This should be reconsidered once we settle on the semantics of
+ // undef, poison, etc.
+ Value *Zero = ConstantInt::get(I.getType(), 0);
+ ++NumSimplified;
+ I.replaceAllUsesWith(Zero);
+ Changed = true;
}
-
- if (isAlwaysLive(&I))
+ if (!DB.isInstructionDead(&I))
continue;
Worklist.push_back(&I);
diff --git a/contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp b/contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
index 4288742..84f7f5f 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -223,10 +223,10 @@ Instruction *ConstantHoisting::findMatInsertPt(Instruction *Inst,
}
// The simple and common case. This also includes constant expressions.
- if (!isa<PHINode>(Inst) && !isa<LandingPadInst>(Inst))
+ if (!isa<PHINode>(Inst) && !Inst->isEHPad())
return Inst;
- // We can't insert directly before a phi node or landing pad. Insert before
+ // We can't insert directly before a phi node or an eh pad. Insert before
// the terminator of the incoming or dominating block.
assert(Entry != Inst->getParent() && "PHI or landing pad in entry block!");
if (Idx != ~0U && isa<PHINode>(Inst))
@@ -365,9 +365,9 @@ void ConstantHoisting::collectConstantCandidates(ConstCandMapType &ConstCandMap,
/// into an instruction itself.
void ConstantHoisting::collectConstantCandidates(Function &Fn) {
ConstCandMapType ConstCandMap;
- for (Function::iterator BB : Fn)
- for (BasicBlock::iterator Inst : *BB)
- collectConstantCandidates(ConstCandMap, Inst);
+ for (BasicBlock &BB : Fn)
+ for (Instruction &Inst : BB)
+ collectConstantCandidates(ConstCandMap, &Inst);
}
/// \brief Find the base constant within the given range and rebase all other
diff --git a/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 79624b2..686bd40 100644
--- a/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -13,6 +13,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LazyValueInfo.h"
#include "llvm/IR/CFG.h"
@@ -32,6 +33,7 @@ STATISTIC(NumPhis, "Number of phis propagated");
STATISTIC(NumSelects, "Number of selects propagated");
STATISTIC(NumMemAccess, "Number of memory access targets propagated");
STATISTIC(NumCmps, "Number of comparisons propagated");
+STATISTIC(NumReturns, "Number of return values propagated");
STATISTIC(NumDeadCases, "Number of switch cases removed");
namespace {
@@ -43,6 +45,11 @@ namespace {
bool processMemAccess(Instruction *I);
bool processCmp(CmpInst *C);
bool processSwitch(SwitchInst *SI);
+ bool processCallSite(CallSite CS);
+
+ /// Return a constant value for V usable at At and everything it
+ /// dominates. If no such Constant can be found, return nullptr.
+ Constant *getConstantAt(Value *V, Instruction *At);
public:
static char ID;
@@ -54,6 +61,7 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<LazyValueInfo>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
}
};
}
@@ -178,44 +186,33 @@ bool CorrelatedValuePropagation::processMemAccess(Instruction *I) {
return true;
}
-/// processCmp - If the value of this comparison could be determined locally,
-/// constant propagation would already have figured it out. Instead, walk
-/// the predecessors and statically evaluate the comparison based on information
-/// available on that edge. If a given static evaluation is true on ALL
-/// incoming edges, then it's true universally and we can simplify the compare.
+/// processCmp - See if LazyValueInfo's ability to exploit edge conditions,
+/// or range information is sufficient to prove this comparison. Even for
+/// local conditions, this can sometimes prove conditions instcombine can't by
+/// exploiting range information.
bool CorrelatedValuePropagation::processCmp(CmpInst *C) {
Value *Op0 = C->getOperand(0);
- if (isa<Instruction>(Op0) &&
- cast<Instruction>(Op0)->getParent() == C->getParent())
- return false;
-
Constant *Op1 = dyn_cast<Constant>(C->getOperand(1));
if (!Op1) return false;
- pred_iterator PI = pred_begin(C->getParent()), PE = pred_end(C->getParent());
- if (PI == PE) return false;
+ // As a policy choice, we choose not to waste compile time on anything where
+ // the comparison is testing local values. While LVI can sometimes reason
+ // about such cases, it's not its primary purpose. We do make sure to do
+ // the block local query for uses from terminator instructions, but that's
+ // handled in the code for each terminator.
+ auto *I = dyn_cast<Instruction>(Op0);
+ if (I && I->getParent() == C->getParent())
+ return false;
- LazyValueInfo::Tristate Result = LVI->getPredicateOnEdge(C->getPredicate(),
- C->getOperand(0), Op1, *PI,
- C->getParent(), C);
+ LazyValueInfo::Tristate Result =
+ LVI->getPredicateAt(C->getPredicate(), Op0, Op1, C);
if (Result == LazyValueInfo::Unknown) return false;
- ++PI;
- while (PI != PE) {
- LazyValueInfo::Tristate Res = LVI->getPredicateOnEdge(C->getPredicate(),
- C->getOperand(0), Op1, *PI,
- C->getParent(), C);
- if (Res != Result) return false;
- ++PI;
- }
-
++NumCmps;
-
if (Result == LazyValueInfo::True)
C->replaceAllUsesWith(ConstantInt::getTrue(C->getContext()));
else
C->replaceAllUsesWith(ConstantInt::getFalse(C->getContext()));
-
C->eraseFromParent();
return true;
@@ -307,6 +304,59 @@ bool CorrelatedValuePropagation::processSwitch(SwitchInst *SI) {
return Changed;
}
+/// processCallSite - Infer nonnull attributes for the arguments at the
+/// specified callsite.
+bool CorrelatedValuePropagation::processCallSite(CallSite CS) {
+ SmallVector<unsigned, 4> Indices;
+ unsigned ArgNo = 0;
+
+ for (Value *V : CS.args()) {
+ PointerType *Type = dyn_cast<PointerType>(V->getType());
+
+ if (Type && !CS.paramHasAttr(ArgNo + 1, Attribute::NonNull) &&
+ LVI->getPredicateAt(ICmpInst::ICMP_EQ, V,
+ ConstantPointerNull::get(Type),
+ CS.getInstruction()) == LazyValueInfo::False)
+ Indices.push_back(ArgNo + 1);
+ ArgNo++;
+ }
+
+ assert(ArgNo == CS.arg_size() && "sanity check");
+
+ if (Indices.empty())
+ return false;
+
+ AttributeSet AS = CS.getAttributes();
+ LLVMContext &Ctx = CS.getInstruction()->getContext();
+ AS = AS.addAttribute(Ctx, Indices, Attribute::get(Ctx, Attribute::NonNull));
+ CS.setAttributes(AS);
+
+ return true;
+}
+
+Constant *CorrelatedValuePropagation::getConstantAt(Value *V, Instruction *At) {
+ if (Constant *C = LVI->getConstant(V, At->getParent(), At))
+ return C;
+
+ // TODO: The following really should be sunk inside LVI's core algorithm, or
+ // at least the outer shims around such.
+ auto *C = dyn_cast<CmpInst>(V);
+ if (!C) return nullptr;
+
+ Value *Op0 = C->getOperand(0);
+ Constant *Op1 = dyn_cast<Constant>(C->getOperand(1));
+ if (!Op1) return nullptr;
+
+ LazyValueInfo::Tristate Result =
+ LVI->getPredicateAt(C->getPredicate(), Op0, Op1, At);
+ if (Result == LazyValueInfo::Unknown)
+ return nullptr;
+
+ return (Result == LazyValueInfo::True) ?
+ ConstantInt::getTrue(C->getContext()) :
+ ConstantInt::getFalse(C->getContext());
+}
+
bool CorrelatedValuePropagation::runOnFunction(Function &F) {
if (skipOptnoneFunction(F))
return false;
@@ -318,7 +368,7 @@ bool CorrelatedValuePropagation::runOnFunction(Function &F) {
for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
bool BBChanged = false;
for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ) {
- Instruction *II = BI++;
+ Instruction *II = &*BI++;
switch (II->getOpcode()) {
case Instruction::Select:
BBChanged |= processSelect(cast<SelectInst>(II));
@@ -334,6 +384,10 @@ bool CorrelatedValuePropagation::runOnFunction(Function &F) {
case Instruction::Store:
BBChanged |= processMemAccess(II);
break;
+ case Instruction::Call:
+ case Instruction::Invoke:
+ BBChanged |= processCallSite(CallSite(II));
+ break;
}
}
@@ -342,7 +396,21 @@ bool CorrelatedValuePropagation::runOnFunction(Function &F) {
case Instruction::Switch:
BBChanged |= processSwitch(cast<SwitchInst>(Term));
break;
+ case Instruction::Ret: {
+ auto *RI = cast<ReturnInst>(Term);
+ // Try to determine the return value if we can. This is mainly here to
+ // simplify the writing of unit tests, but also helps to enable IPO by
+ // constant folding the return values of callees.
+ auto *RetVal = RI->getReturnValue();
+ if (!RetVal) break; // handle "ret void"
+ if (isa<Constant>(RetVal)) break; // nothing to do
+ if (auto *C = getConstantAt(RetVal, RI)) {
+ ++NumReturns;
+ RI->replaceUsesOfWith(RetVal, C);
+ BBChanged = true;
+ }
}
+ };
FnChanged |= BBChanged;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/DCE.cpp b/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
index 3b262a2..b67c3c7 100644
--- a/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
@@ -17,6 +17,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instruction.h"
@@ -46,7 +47,7 @@ namespace {
TargetLibraryInfo *TLI = TLIP ? &TLIP->getTLI() : nullptr;
bool Changed = false;
for (BasicBlock::iterator DI = BB.begin(); DI != BB.end(); ) {
- Instruction *Inst = DI++;
+ Instruction *Inst = &*DI++;
if (isInstructionTriviallyDead(Inst, TLI)) {
Inst->eraseFromParent();
Changed = true;
@@ -92,6 +93,34 @@ namespace {
char DCE::ID = 0;
INITIALIZE_PASS(DCE, "dce", "Dead Code Elimination", false, false)
+static bool DCEInstruction(Instruction *I,
+ SmallSetVector<Instruction *, 16> &WorkList,
+ const TargetLibraryInfo *TLI) {
+ if (isInstructionTriviallyDead(I, TLI)) {
+ // Null out all of the instruction's operands to see if any operand becomes
+ // dead as we go.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ Value *OpV = I->getOperand(i);
+ I->setOperand(i, nullptr);
+
+ if (!OpV->use_empty() || I == OpV)
+ continue;
+
+ // If the operand is an instruction that became dead as we nulled out the
+ // operand, and if it is 'trivially' dead, delete it in a future loop
+ // iteration.
+ if (Instruction *OpI = dyn_cast<Instruction>(OpV))
+ if (isInstructionTriviallyDead(OpI, TLI))
+ WorkList.insert(OpI);
+ }
+
+ I->eraseFromParent();
+ ++DCEEliminated;
+ return true;
+ }
+ return false;
+}
+
bool DCE::runOnFunction(Function &F) {
if (skipOptnoneFunction(F))
return false;
@@ -99,39 +128,24 @@ bool DCE::runOnFunction(Function &F) {
auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
TargetLibraryInfo *TLI = TLIP ? &TLIP->getTLI() : nullptr;
- // Start out with all of the instructions in the worklist...
- std::vector<Instruction*> WorkList;
- for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i)
- WorkList.push_back(&*i);
-
- // Loop over the worklist finding instructions that are dead. If they are
- // dead make them drop all of their uses, making other instructions
- // potentially dead, and work until the worklist is empty.
- //
bool MadeChange = false;
+ SmallSetVector<Instruction *, 16> WorkList;
+ // Iterate over the original function, only adding insts to the worklist
+ // if they actually need to be revisited. This avoids having to pre-init
+ // the worklist with the entire function's worth of instructions.
+ for (inst_iterator FI = inst_begin(F), FE = inst_end(F); FI != FE;) {
+ Instruction *I = &*FI;
+ ++FI;
+
+ // We're visiting this instruction now, so make sure it's not in the
+ // worklist from an earlier visit.
+ if (!WorkList.count(I))
+ MadeChange |= DCEInstruction(I, WorkList, TLI);
+ }
+
while (!WorkList.empty()) {
- Instruction *I = WorkList.back();
- WorkList.pop_back();
-
- if (isInstructionTriviallyDead(I, TLI)) { // If the instruction is dead.
- // Loop over all of the values that the instruction uses, if there are
- // instructions being used, add them to the worklist, because they might
- // go dead after this one is removed.
- //
- for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
- if (Instruction *Used = dyn_cast<Instruction>(*OI))
- WorkList.push_back(Used);
-
- // Remove the instruction.
- I->eraseFromParent();
-
- // Remove the instruction from the worklist if it still exists in it.
- WorkList.erase(std::remove(WorkList.begin(), WorkList.end(), I),
- WorkList.end());
-
- MadeChange = true;
- ++DCEEliminated;
- }
+ Instruction *I = WorkList.pop_back_val();
+ MadeChange |= DCEInstruction(I, WorkList, TLI);
}
return MadeChange;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index c505584..36ad0a5 100644
--- a/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -21,6 +21,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
@@ -40,6 +41,7 @@ using namespace llvm;
#define DEBUG_TYPE "dse"
+STATISTIC(NumRedundantStores, "Number of redundant stores deleted");
STATISTIC(NumFastStores, "Number of stores deleted");
STATISTIC(NumFastOther , "Number of other instrs removed");
@@ -59,23 +61,24 @@ namespace {
if (skipOptnoneFunction(F))
return false;
- AA = &getAnalysis<AliasAnalysis>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
MD = &getAnalysis<MemoryDependenceAnalysis>();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- TLI = AA->getTargetLibraryInfo();
+ TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
bool Changed = false;
- for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
+ for (BasicBlock &I : F)
// Only check non-dead blocks. Dead blocks may have strange pointer
// cycles that will confuse alias analysis.
- if (DT->isReachableFromEntry(I))
- Changed |= runOnBasicBlock(*I);
+ if (DT->isReachableFromEntry(&I))
+ Changed |= runOnBasicBlock(I);
AA = nullptr; MD = nullptr; DT = nullptr;
return Changed;
}
bool runOnBasicBlock(BasicBlock &BB);
+ bool MemoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI);
bool HandleFree(CallInst *F);
bool handleEndBlock(BasicBlock &BB);
void RemoveAccessedObjects(const MemoryLocation &LoadedLoc,
@@ -85,10 +88,11 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<MemoryDependenceAnalysis>();
- AU.addPreserved<AliasAnalysis>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
AU.addPreserved<MemoryDependenceAnalysis>();
}
};
@@ -97,8 +101,10 @@ namespace {
char DSE::ID = 0;
INITIALIZE_PASS_BEGIN(DSE, "dse", "Dead Store Elimination", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(DSE, "dse", "Dead Store Elimination", false, false)
FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); }
@@ -115,7 +121,7 @@ FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); }
///
static void DeleteDeadInstruction(Instruction *I,
MemoryDependenceAnalysis &MD,
- const TargetLibraryInfo *TLI,
+ const TargetLibraryInfo &TLI,
SmallSetVector<Value*, 16> *ValueSet = nullptr) {
SmallVector<Instruction*, 32> NowDeadInsts;
@@ -140,7 +146,7 @@ static void DeleteDeadInstruction(Instruction *I,
if (!Op->use_empty()) continue;
if (Instruction *OpI = dyn_cast<Instruction>(Op))
- if (isInstructionTriviallyDead(OpI, TLI))
+ if (isInstructionTriviallyDead(OpI, &TLI))
NowDeadInsts.push_back(OpI);
}
@@ -153,7 +159,7 @@ static void DeleteDeadInstruction(Instruction *I,
/// hasMemoryWrite - Does this instruction write some memory? This only returns
/// true for things that we can analyze with other helpers below.
-static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo *TLI) {
+static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo &TLI) {
if (isa<StoreInst>(I))
return true;
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
@@ -170,20 +176,20 @@ static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo *TLI) {
}
if (auto CS = CallSite(I)) {
if (Function *F = CS.getCalledFunction()) {
- if (TLI && TLI->has(LibFunc::strcpy) &&
- F->getName() == TLI->getName(LibFunc::strcpy)) {
+ if (TLI.has(LibFunc::strcpy) &&
+ F->getName() == TLI.getName(LibFunc::strcpy)) {
return true;
}
- if (TLI && TLI->has(LibFunc::strncpy) &&
- F->getName() == TLI->getName(LibFunc::strncpy)) {
+ if (TLI.has(LibFunc::strncpy) &&
+ F->getName() == TLI.getName(LibFunc::strncpy)) {
return true;
}
- if (TLI && TLI->has(LibFunc::strcat) &&
- F->getName() == TLI->getName(LibFunc::strcat)) {
+ if (TLI.has(LibFunc::strcat) &&
+ F->getName() == TLI.getName(LibFunc::strcat)) {
return true;
}
- if (TLI && TLI->has(LibFunc::strncat) &&
- F->getName() == TLI->getName(LibFunc::strncat)) {
+ if (TLI.has(LibFunc::strncat) &&
+ F->getName() == TLI.getName(LibFunc::strncat)) {
return true;
}
}
@@ -224,9 +230,9 @@ static MemoryLocation getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {
/// getLocForRead - Return the location read by the specified "hasMemoryWrite"
/// instruction if any.
-static MemoryLocation getLocForRead(Instruction *Inst, AliasAnalysis &AA) {
- assert(hasMemoryWrite(Inst, AA.getTargetLibraryInfo()) &&
- "Unknown instruction case");
+static MemoryLocation getLocForRead(Instruction *Inst,
+ const TargetLibraryInfo &TLI) {
+ assert(hasMemoryWrite(Inst, TLI) && "Unknown instruction case");
// The only instructions that both read and write are the mem transfer
// instructions (memcpy/memmove).
@@ -313,9 +319,9 @@ static Value *getStoredPointerOperand(Instruction *I) {
}
static uint64_t getPointerSize(const Value *V, const DataLayout &DL,
- const TargetLibraryInfo *TLI) {
+ const TargetLibraryInfo &TLI) {
uint64_t Size;
- if (getObjectSize(V, Size, DL, TLI))
+ if (getObjectSize(V, Size, DL, &TLI))
return Size;
return MemoryLocation::UnknownSize;
}
@@ -336,7 +342,7 @@ namespace {
static OverwriteResult isOverwrite(const MemoryLocation &Later,
const MemoryLocation &Earlier,
const DataLayout &DL,
- const TargetLibraryInfo *TLI,
+ const TargetLibraryInfo &TLI,
int64_t &EarlierOff, int64_t &LaterOff) {
const Value *P1 = Earlier.Ptr->stripPointerCasts();
const Value *P2 = Later.Ptr->stripPointerCasts();
@@ -442,10 +448,12 @@ static OverwriteResult isOverwrite(const MemoryLocation &Later,
/// because the DSE inducing instruction may be a self-read.
static bool isPossibleSelfRead(Instruction *Inst,
const MemoryLocation &InstStoreLoc,
- Instruction *DepWrite, AliasAnalysis &AA) {
+ Instruction *DepWrite,
+ const TargetLibraryInfo &TLI,
+ AliasAnalysis &AA) {
// Self reads can only happen for instructions that read memory. Get the
// location read.
- MemoryLocation InstReadLoc = getLocForRead(Inst, AA);
+ MemoryLocation InstReadLoc = getLocForRead(Inst, TLI);
if (!InstReadLoc.Ptr) return false; // Not a reading instruction.
// If the read and written loc obviously don't alias, it isn't a read.
@@ -459,7 +467,7 @@ static bool isPossibleSelfRead(Instruction *Inst,
// Here we don't know if A/B may alias, but we do know that B/B are must
// aliases, so removing the first memcpy is safe (assuming it writes <= #
// bytes as the second one.
- MemoryLocation DepReadLoc = getLocForRead(DepWrite, AA);
+ MemoryLocation DepReadLoc = getLocForRead(DepWrite, TLI);
if (DepReadLoc.Ptr && AA.isMustAlias(InstReadLoc.Ptr, DepReadLoc.Ptr))
return false;
@@ -475,11 +483,12 @@ static bool isPossibleSelfRead(Instruction *Inst,
//===----------------------------------------------------------------------===//
bool DSE::runOnBasicBlock(BasicBlock &BB) {
+ const DataLayout &DL = BB.getModule()->getDataLayout();
bool MadeChange = false;
// Do a top-down walk on the BB.
for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) {
- Instruction *Inst = BBI++;
+ Instruction *Inst = &*BBI++;
// Handle 'free' calls specially.
if (CallInst *F = isFreeCall(Inst, TLI)) {
@@ -488,42 +497,68 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
}
// If we find something that writes memory, get its memory dependence.
- if (!hasMemoryWrite(Inst, TLI))
- continue;
-
- MemDepResult InstDep = MD->getDependency(Inst);
-
- // Ignore any store where we can't find a local dependence.
- // FIXME: cross-block DSE would be fun. :)
- if (!InstDep.isDef() && !InstDep.isClobber())
+ if (!hasMemoryWrite(Inst, *TLI))
continue;
// If we're storing the same value back to a pointer that we just
// loaded from, then the store can be removed.
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
- if (LoadInst *DepLoad = dyn_cast<LoadInst>(InstDep.getInst())) {
+
+ auto RemoveDeadInstAndUpdateBBI = [&](Instruction *DeadInst) {
+ // DeleteDeadInstruction can delete the current instruction. Save BBI
+ // in case we need it.
+ WeakVH NextInst(&*BBI);
+
+ DeleteDeadInstruction(DeadInst, *MD, *TLI);
+
+ if (!NextInst) // Next instruction deleted.
+ BBI = BB.begin();
+ else if (BBI != BB.begin()) // Revisit this instruction if possible.
+ --BBI;
+ ++NumRedundantStores;
+ MadeChange = true;
+ };
+
+ if (LoadInst *DepLoad = dyn_cast<LoadInst>(SI->getValueOperand())) {
if (SI->getPointerOperand() == DepLoad->getPointerOperand() &&
- SI->getOperand(0) == DepLoad && isRemovable(SI)) {
+ isRemovable(SI) &&
+ MemoryIsNotModifiedBetween(DepLoad, SI)) {
+
DEBUG(dbgs() << "DSE: Remove Store Of Load from same pointer:\n "
<< "LOAD: " << *DepLoad << "\n STORE: " << *SI << '\n');
- // DeleteDeadInstruction can delete the current instruction. Save BBI
- // in case we need it.
- WeakVH NextInst(BBI);
+ RemoveDeadInstAndUpdateBBI(SI);
+ continue;
+ }
+ }
- DeleteDeadInstruction(SI, *MD, TLI);
+ // Remove null stores into the calloc'ed objects
+ Constant *StoredConstant = dyn_cast<Constant>(SI->getValueOperand());
- if (!NextInst) // Next instruction deleted.
- BBI = BB.begin();
- else if (BBI != BB.begin()) // Revisit this instruction if possible.
- --BBI;
- ++NumFastStores;
- MadeChange = true;
+ if (StoredConstant && StoredConstant->isNullValue() &&
+ isRemovable(SI)) {
+ Instruction *UnderlyingPointer = dyn_cast<Instruction>(
+ GetUnderlyingObject(SI->getPointerOperand(), DL));
+
+ if (UnderlyingPointer && isCallocLikeFn(UnderlyingPointer, TLI) &&
+ MemoryIsNotModifiedBetween(UnderlyingPointer, SI)) {
+ DEBUG(dbgs()
+ << "DSE: Remove null store to the calloc'ed object:\n DEAD: "
+ << *Inst << "\n OBJECT: " << *UnderlyingPointer << '\n');
+
+ RemoveDeadInstAndUpdateBBI(SI);
continue;
}
}
}
+ MemDepResult InstDep = MD->getDependency(Inst);
+
+ // Ignore any store where we can't find a local dependence.
+ // FIXME: cross-block DSE would be fun. :)
+ if (!InstDep.isDef() && !InstDep.isClobber())
+ continue;
+
// Figure out what location is being stored to.
MemoryLocation Loc = getLocForWrite(Inst, *AA);
@@ -549,24 +584,22 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
// completely obliterated by the store to 'Loc', and c) which we know that
// 'Inst' doesn't load from, then we can remove it.
if (isRemovable(DepWrite) &&
- !isPossibleSelfRead(Inst, Loc, DepWrite, *AA)) {
+ !isPossibleSelfRead(Inst, Loc, DepWrite, *TLI, *AA)) {
int64_t InstWriteOffset, DepWriteOffset;
- const DataLayout &DL = BB.getModule()->getDataLayout();
OverwriteResult OR =
- isOverwrite(Loc, DepLoc, DL, AA->getTargetLibraryInfo(),
- DepWriteOffset, InstWriteOffset);
+ isOverwrite(Loc, DepLoc, DL, *TLI, DepWriteOffset, InstWriteOffset);
if (OR == OverwriteComplete) {
DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
<< *DepWrite << "\n KILLER: " << *Inst << '\n');
// Delete the store and now-dead instructions that feed it.
- DeleteDeadInstruction(DepWrite, *MD, TLI);
+ DeleteDeadInstruction(DepWrite, *MD, *TLI);
++NumFastStores;
MadeChange = true;
// DeleteDeadInstruction can delete the current instruction in loop
// cases, reset BBI.
- BBI = Inst;
+ BBI = Inst->getIterator();
if (BBI != BB.begin())
--BBI;
break;
@@ -609,10 +642,11 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
if (DepWrite == &BB.front()) break;
// Can't look past this instruction if it might read 'Loc'.
- if (AA->getModRefInfo(DepWrite, Loc) & AliasAnalysis::Ref)
+ if (AA->getModRefInfo(DepWrite, Loc) & MRI_Ref)
break;
- InstDep = MD->getPointerDependencyFrom(Loc, false, DepWrite, &BB);
+ InstDep = MD->getPointerDependencyFrom(Loc, false,
+ DepWrite->getIterator(), &BB);
}
}
@@ -624,6 +658,64 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
return MadeChange;
}
+/// Returns true if the memory which is accessed by the second instruction is not
+/// modified between the first and the second instruction.
+/// Precondition: Second instruction must be dominated by the first
+/// instruction.
+bool DSE::MemoryIsNotModifiedBetween(Instruction *FirstI,
+ Instruction *SecondI) {
+ SmallVector<BasicBlock *, 16> WorkList;
+ SmallPtrSet<BasicBlock *, 8> Visited;
+ BasicBlock::iterator FirstBBI(FirstI);
+ ++FirstBBI;
+ BasicBlock::iterator SecondBBI(SecondI);
+ BasicBlock *FirstBB = FirstI->getParent();
+ BasicBlock *SecondBB = SecondI->getParent();
+ MemoryLocation MemLoc = MemoryLocation::get(SecondI);
+
+ // Start checking the store-block.
+ WorkList.push_back(SecondBB);
+ bool isFirstBlock = true;
+
+ // Check all blocks going backward until we reach the load-block.
+ while (!WorkList.empty()) {
+ BasicBlock *B = WorkList.pop_back_val();
+
+ // Ignore instructions before LI if this is the FirstBB.
+ BasicBlock::iterator BI = (B == FirstBB ? FirstBBI : B->begin());
+
+ BasicBlock::iterator EI;
+ if (isFirstBlock) {
+ // Ignore instructions after SI if this is the first visit of SecondBB.
+ assert(B == SecondBB && "first block is not the store block");
+ EI = SecondBBI;
+ isFirstBlock = false;
+ } else {
+ // It's not SecondBB or (in case of a loop) the second visit of SecondBB.
+ // In this case we also have to look at instructions after SI.
+ EI = B->end();
+ }
+ for (; BI != EI; ++BI) {
+ Instruction *I = &*BI;
+ if (I->mayWriteToMemory() && I != SecondI) {
+ auto Res = AA->getModRefInfo(I, MemLoc);
+ if (Res != MRI_NoModRef)
+ return false;
+ }
+ }
+ if (B != FirstBB) {
+ assert(B != &FirstBB->getParent()->getEntryBlock() &&
+ "Should not hit the entry block because SI must be dominated by LI");
+ for (auto PredI = pred_begin(B), PE = pred_end(B); PredI != PE; ++PredI) {
+ if (!Visited.insert(*PredI).second)
+ continue;
+ WorkList.push_back(*PredI);
+ }
+ }
+ }
+ return true;
+}
+
/// Find all blocks that will unconditionally lead to the block BB and append
/// them to F.
static void FindUnconditionalPreds(SmallVectorImpl<BasicBlock *> &Blocks,
@@ -655,10 +747,11 @@ bool DSE::HandleFree(CallInst *F) {
Instruction *InstPt = BB->getTerminator();
if (BB == F->getParent()) InstPt = F;
- MemDepResult Dep = MD->getPointerDependencyFrom(Loc, false, InstPt, BB);
+ MemDepResult Dep =
+ MD->getPointerDependencyFrom(Loc, false, InstPt->getIterator(), BB);
while (Dep.isDef() || Dep.isClobber()) {
Instruction *Dependency = Dep.getInst();
- if (!hasMemoryWrite(Dependency, TLI) || !isRemovable(Dependency))
+ if (!hasMemoryWrite(Dependency, *TLI) || !isRemovable(Dependency))
break;
Value *DepPointer =
@@ -668,10 +761,10 @@ bool DSE::HandleFree(CallInst *F) {
if (!AA->isMustAlias(F->getArgOperand(0), DepPointer))
break;
- Instruction *Next = std::next(BasicBlock::iterator(Dependency));
+ auto Next = ++Dependency->getIterator();
// DCE instructions only used to calculate that store
- DeleteDeadInstruction(Dependency, *MD, TLI);
+ DeleteDeadInstruction(Dependency, *MD, *TLI);
++NumFastStores;
MadeChange = true;
@@ -704,23 +797,22 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
SmallSetVector<Value*, 16> DeadStackObjects;
// Find all of the alloca'd pointers in the entry block.
- BasicBlock *Entry = BB.getParent()->begin();
- for (BasicBlock::iterator I = Entry->begin(), E = Entry->end(); I != E; ++I) {
- if (isa<AllocaInst>(I))
- DeadStackObjects.insert(I);
+ BasicBlock &Entry = BB.getParent()->front();
+ for (Instruction &I : Entry) {
+ if (isa<AllocaInst>(&I))
+ DeadStackObjects.insert(&I);
// Okay, so these are dead heap objects, but if the pointer never escapes
// then it's leaked by this function anyways.
- else if (isAllocLikeFn(I, TLI) && !PointerMayBeCaptured(I, true, true))
- DeadStackObjects.insert(I);
+ else if (isAllocLikeFn(&I, TLI) && !PointerMayBeCaptured(&I, true, true))
+ DeadStackObjects.insert(&I);
}
// Treat byval or inalloca arguments the same, stores to them are dead at the
// end of the function.
- for (Function::arg_iterator AI = BB.getParent()->arg_begin(),
- AE = BB.getParent()->arg_end(); AI != AE; ++AI)
- if (AI->hasByValOrInAllocaAttr())
- DeadStackObjects.insert(AI);
+ for (Argument &AI : BB.getParent()->args())
+ if (AI.hasByValOrInAllocaAttr())
+ DeadStackObjects.insert(&AI);
const DataLayout &DL = BB.getModule()->getDataLayout();
@@ -729,10 +821,10 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
--BBI;
// If we find a store, check to see if it points into a dead stack value.
- if (hasMemoryWrite(BBI, TLI) && isRemovable(BBI)) {
+ if (hasMemoryWrite(&*BBI, *TLI) && isRemovable(&*BBI)) {
// See through pointer-to-pointer bitcasts
SmallVector<Value *, 4> Pointers;
- GetUnderlyingObjects(getStoredPointerOperand(BBI), Pointers, DL);
+ GetUnderlyingObjects(getStoredPointerOperand(&*BBI), Pointers, DL);
// Stores to stack values are valid candidates for removal.
bool AllDead = true;
@@ -744,7 +836,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
}
if (AllDead) {
- Instruction *Dead = BBI++;
+ Instruction *Dead = &*BBI++;
DEBUG(dbgs() << "DSE: Dead Store at End of Block:\n DEAD: "
<< *Dead << "\n Objects: ";
@@ -757,7 +849,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
dbgs() << '\n');
// DCE instructions only used to calculate that store.
- DeleteDeadInstruction(Dead, *MD, TLI, &DeadStackObjects);
+ DeleteDeadInstruction(Dead, *MD, *TLI, &DeadStackObjects);
++NumFastStores;
MadeChange = true;
continue;
@@ -765,9 +857,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
}
// Remove any dead non-memory-mutating instructions.
- if (isInstructionTriviallyDead(BBI, TLI)) {
- Instruction *Inst = BBI++;
- DeleteDeadInstruction(Inst, *MD, TLI, &DeadStackObjects);
+ if (isInstructionTriviallyDead(&*BBI, TLI)) {
+ Instruction *Inst = &*BBI++;
+ DeleteDeadInstruction(Inst, *MD, *TLI, &DeadStackObjects);
++NumFastOther;
MadeChange = true;
continue;
@@ -776,15 +868,15 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
if (isa<AllocaInst>(BBI)) {
// Remove allocas from the list of dead stack objects; there can't be
// any references before the definition.
- DeadStackObjects.remove(BBI);
+ DeadStackObjects.remove(&*BBI);
continue;
}
- if (auto CS = CallSite(BBI)) {
+ if (auto CS = CallSite(&*BBI)) {
// Remove allocation function calls from the list of dead stack objects;
// there can't be any references before the definition.
- if (isAllocLikeFn(BBI, TLI))
- DeadStackObjects.remove(BBI);
+ if (isAllocLikeFn(&*BBI, TLI))
+ DeadStackObjects.remove(&*BBI);
// If this call does not access memory, it can't be loading any of our
// pointers.
@@ -795,10 +887,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
// the call is live.
DeadStackObjects.remove_if([&](Value *I) {
// See if the call site touches the value.
- AliasAnalysis::ModRefResult A = AA->getModRefInfo(
- CS, I, getPointerSize(I, DL, AA->getTargetLibraryInfo()));
+ ModRefInfo A = AA->getModRefInfo(CS, I, getPointerSize(I, DL, *TLI));
- return A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref;
+ return A == MRI_ModRef || A == MRI_Ref;
});
// If all of the allocas were clobbered by the call then we're not going
@@ -864,8 +955,7 @@ void DSE::RemoveAccessedObjects(const MemoryLocation &LoadedLoc,
// Remove objects that could alias LoadedLoc.
DeadStackObjects.remove_if([&](Value *I) {
// See if the loaded location could alias the stack location.
- MemoryLocation StackLoc(I,
- getPointerSize(I, DL, AA->getTargetLibraryInfo()));
+ MemoryLocation StackLoc(I, getPointerSize(I, DL, *TLI));
return !AA->isNoAlias(StackLoc, LoadedLoc);
});
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
index 029b44c..7ef062e 100644
--- a/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -16,6 +16,7 @@
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/ScopedHashTable.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
@@ -263,7 +264,6 @@ namespace {
/// expected that a later pass of GVN will catch the interesting/hard cases.
class EarlyCSE {
public:
- Function &F;
const TargetLibraryInfo &TLI;
const TargetTransformInfo &TTI;
DominatorTree &DT;
@@ -281,20 +281,37 @@ public:
/// that dominated values can succeed in their lookup.
ScopedHTType AvailableValues;
- /// \brief A scoped hash table of the current values of loads.
+ /// A scoped hash table of the current values of previously encounted memory
+ /// locations.
///
- /// This allows us to get efficient access to dominating loads when we have
- /// a fully redundant load. In addition to the most recent load, we keep
- /// track of a generation count of the read, which is compared against the
- /// current generation count. The current generation count is incremented
+ /// This allows us to get efficient access to dominating loads or stores when
+ /// we have a fully redundant load. In addition to the most recent load, we
+ /// keep track of a generation count of the read, which is compared against
+ /// the current generation count. The current generation count is incremented
/// after every possibly writing memory operation, which ensures that we only
- /// CSE loads with other loads that have no intervening store.
- typedef RecyclingAllocator<
- BumpPtrAllocator,
- ScopedHashTableVal<Value *, std::pair<Value *, unsigned>>>
+ /// CSE loads with other loads that have no intervening store. Ordering
+ /// events (such as fences or atomic instructions) increment the generation
+ /// count as well; essentially, we model these as writes to all possible
+ /// locations. Note that atomic and/or volatile loads and stores can be
+ /// present the table; it is the responsibility of the consumer to inspect
+ /// the atomicity/volatility if needed.
+ struct LoadValue {
+ Value *Data;
+ unsigned Generation;
+ int MatchingId;
+ bool IsAtomic;
+ LoadValue()
+ : Data(nullptr), Generation(0), MatchingId(-1), IsAtomic(false) {}
+ LoadValue(Value *Data, unsigned Generation, unsigned MatchingId,
+ bool IsAtomic)
+ : Data(Data), Generation(Generation), MatchingId(MatchingId),
+ IsAtomic(IsAtomic) {}
+ };
+ typedef RecyclingAllocator<BumpPtrAllocator,
+ ScopedHashTableVal<Value *, LoadValue>>
LoadMapAllocator;
- typedef ScopedHashTable<Value *, std::pair<Value *, unsigned>,
- DenseMapInfo<Value *>, LoadMapAllocator> LoadHTType;
+ typedef ScopedHashTable<Value *, LoadValue, DenseMapInfo<Value *>,
+ LoadMapAllocator> LoadHTType;
LoadHTType AvailableLoads;
/// \brief A scoped hash table of the current values of read-only call
@@ -308,10 +325,9 @@ public:
unsigned CurrentGeneration;
/// \brief Set up the EarlyCSE runner for a particular function.
- EarlyCSE(Function &F, const TargetLibraryInfo &TLI,
- const TargetTransformInfo &TTI, DominatorTree &DT,
- AssumptionCache &AC)
- : F(F), TLI(TLI), TTI(TTI), DT(DT), AC(AC), CurrentGeneration(0) {}
+ EarlyCSE(const TargetLibraryInfo &TLI, const TargetTransformInfo &TTI,
+ DominatorTree &DT, AssumptionCache &AC)
+ : TLI(TLI), TTI(TTI), DT(DT), AC(AC), CurrentGeneration(0) {}
bool run();
@@ -382,57 +398,91 @@ private:
class ParseMemoryInst {
public:
ParseMemoryInst(Instruction *Inst, const TargetTransformInfo &TTI)
- : Load(false), Store(false), Vol(false), MayReadFromMemory(false),
- MayWriteToMemory(false), MatchingId(-1), Ptr(nullptr) {
- MayReadFromMemory = Inst->mayReadFromMemory();
- MayWriteToMemory = Inst->mayWriteToMemory();
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
- MemIntrinsicInfo Info;
- if (!TTI.getTgtMemIntrinsic(II, Info))
- return;
- if (Info.NumMemRefs == 1) {
- Store = Info.WriteMem;
- Load = Info.ReadMem;
- MatchingId = Info.MatchingId;
- MayReadFromMemory = Info.ReadMem;
- MayWriteToMemory = Info.WriteMem;
- Vol = Info.Vol;
- Ptr = Info.PtrVal;
- }
- } else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
- Load = true;
- Vol = !LI->isSimple();
- Ptr = LI->getPointerOperand();
+ : IsTargetMemInst(false), Inst(Inst) {
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
+ if (TTI.getTgtMemIntrinsic(II, Info) && Info.NumMemRefs == 1)
+ IsTargetMemInst = true;
+ }
+ bool isLoad() const {
+ if (IsTargetMemInst) return Info.ReadMem;
+ return isa<LoadInst>(Inst);
+ }
+ bool isStore() const {
+ if (IsTargetMemInst) return Info.WriteMem;
+ return isa<StoreInst>(Inst);
+ }
+ bool isAtomic() const {
+ if (IsTargetMemInst) {
+ assert(Info.IsSimple && "need to refine IsSimple in TTI");
+ return false;
+ }
+ return Inst->isAtomic();
+ }
+ bool isUnordered() const {
+ if (IsTargetMemInst) {
+ assert(Info.IsSimple && "need to refine IsSimple in TTI");
+ return true;
+ }
+ if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+ return LI->isUnordered();
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ return SI->isUnordered();
+ }
+ // Conservative answer
+ return !Inst->isAtomic();
+ }
+
+ bool isVolatile() const {
+ if (IsTargetMemInst) {
+ assert(Info.IsSimple && "need to refine IsSimple in TTI");
+ return false;
+ }
+ if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+ return LI->isVolatile();
} else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
- Store = true;
- Vol = !SI->isSimple();
- Ptr = SI->getPointerOperand();
+ return SI->isVolatile();
}
+ // Conservative answer
+ return true;
}
- bool isLoad() { return Load; }
- bool isStore() { return Store; }
- bool isVolatile() { return Vol; }
- bool isMatchingMemLoc(const ParseMemoryInst &Inst) {
- return Ptr == Inst.Ptr && MatchingId == Inst.MatchingId;
+
+
+ bool isMatchingMemLoc(const ParseMemoryInst &Inst) const {
+ return (getPointerOperand() == Inst.getPointerOperand() &&
+ getMatchingId() == Inst.getMatchingId());
}
- bool isValid() { return Ptr != nullptr; }
- int getMatchingId() { return MatchingId; }
- Value *getPtr() { return Ptr; }
- bool mayReadFromMemory() { return MayReadFromMemory; }
- bool mayWriteToMemory() { return MayWriteToMemory; }
+ bool isValid() const { return getPointerOperand() != nullptr; }
- private:
- bool Load;
- bool Store;
- bool Vol;
- bool MayReadFromMemory;
- bool MayWriteToMemory;
// For regular (non-intrinsic) loads/stores, this is set to -1. For
// intrinsic loads/stores, the id is retrieved from the corresponding
// field in the MemIntrinsicInfo structure. That field contains
// non-negative values only.
- int MatchingId;
- Value *Ptr;
+ int getMatchingId() const {
+ if (IsTargetMemInst) return Info.MatchingId;
+ return -1;
+ }
+ Value *getPointerOperand() const {
+ if (IsTargetMemInst) return Info.PtrVal;
+ if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+ return LI->getPointerOperand();
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ return SI->getPointerOperand();
+ }
+ return nullptr;
+ }
+ bool mayReadFromMemory() const {
+ if (IsTargetMemInst) return Info.ReadMem;
+ return Inst->mayReadFromMemory();
+ }
+ bool mayWriteToMemory() const {
+ if (IsTargetMemInst) return Info.WriteMem;
+ return Inst->mayWriteToMemory();
+ }
+
+ private:
+ bool IsTargetMemInst;
+ MemIntrinsicInfo Info;
+ Instruction *Inst;
};
bool processNode(DomTreeNode *Node);
@@ -497,7 +547,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// See if any instructions in the block can be eliminated. If so, do it. If
// not, add them to AvailableValues.
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
- Instruction *Inst = I++;
+ Instruction *Inst = &*I++;
// Dead instructions should just be removed.
if (isInstructionTriviallyDead(Inst, &TLI)) {
@@ -548,24 +598,26 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
ParseMemoryInst MemInst(Inst, TTI);
// If this is a non-volatile load, process it.
if (MemInst.isValid() && MemInst.isLoad()) {
- // Ignore volatile loads.
- if (MemInst.isVolatile()) {
+ // (conservatively) we can't peak past the ordering implied by this
+ // operation, but we can add this load to our set of available values
+ if (MemInst.isVolatile() || !MemInst.isUnordered()) {
LastStore = nullptr;
- // Don't CSE across synchronization boundaries.
- if (Inst->mayWriteToMemory())
- ++CurrentGeneration;
- continue;
+ ++CurrentGeneration;
}
// If we have an available version of this load, and if it is the right
// generation, replace this instruction.
- std::pair<Value *, unsigned> InVal =
- AvailableLoads.lookup(MemInst.getPtr());
- if (InVal.first != nullptr && InVal.second == CurrentGeneration) {
- Value *Op = getOrCreateResult(InVal.first, Inst->getType());
+ LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
+ if (InVal.Data != nullptr && InVal.Generation == CurrentGeneration &&
+ InVal.MatchingId == MemInst.getMatchingId() &&
+ // We don't yet handle removing loads with ordering of any kind.
+ !MemInst.isVolatile() && MemInst.isUnordered() &&
+ // We can't replace an atomic load with one which isn't also atomic.
+ InVal.IsAtomic >= MemInst.isAtomic()) {
+ Value *Op = getOrCreateResult(InVal.Data, Inst->getType());
if (Op != nullptr) {
DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst
- << " to: " << *InVal.first << '\n');
+ << " to: " << *InVal.Data << '\n');
if (!Inst->use_empty())
Inst->replaceAllUsesWith(Op);
Inst->eraseFromParent();
@@ -576,8 +628,10 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
}
// Otherwise, remember that we have this instruction.
- AvailableLoads.insert(MemInst.getPtr(), std::pair<Value *, unsigned>(
- Inst, CurrentGeneration));
+ AvailableLoads.insert(
+ MemInst.getPointerOperand(),
+ LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(),
+ MemInst.isAtomic()));
LastStore = nullptr;
continue;
}
@@ -613,6 +667,44 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
continue;
}
+ // A release fence requires that all stores complete before it, but does
+ // not prevent the reordering of following loads 'before' the fence. As a
+ // result, we don't need to consider it as writing to memory and don't need
+ // to advance the generation. We do need to prevent DSE across the fence,
+ // but that's handled above.
+ if (FenceInst *FI = dyn_cast<FenceInst>(Inst))
+ if (FI->getOrdering() == Release) {
+ assert(Inst->mayReadFromMemory() && "relied on to prevent DSE above");
+ continue;
+ }
+
+ // write back DSE - If we write back the same value we just loaded from
+ // the same location and haven't passed any intervening writes or ordering
+ // operations, we can remove the write. The primary benefit is in allowing
+ // the available load table to remain valid and value forward past where
+ // the store originally was.
+ if (MemInst.isValid() && MemInst.isStore()) {
+ LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
+ if (InVal.Data &&
+ InVal.Data == getOrCreateResult(Inst, InVal.Data->getType()) &&
+ InVal.Generation == CurrentGeneration &&
+ InVal.MatchingId == MemInst.getMatchingId() &&
+ // We don't yet handle removing stores with ordering of any kind.
+ !MemInst.isVolatile() && MemInst.isUnordered()) {
+ assert((!LastStore ||
+ ParseMemoryInst(LastStore, TTI).getPointerOperand() ==
+ MemInst.getPointerOperand()) &&
+ "can't have an intervening store!");
+ DEBUG(dbgs() << "EarlyCSE DSE (writeback): " << *Inst << '\n');
+ Inst->eraseFromParent();
+ Changed = true;
+ ++NumDSE;
+ // We can avoid incrementing the generation count since we were able
+ // to eliminate this store.
+ continue;
+ }
+ }
+
// Okay, this isn't something we can CSE at all. Check to see if it is
// something that could modify memory. If so, our available memory values
// cannot be used so bump the generation count.
@@ -622,8 +714,16 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
if (MemInst.isValid() && MemInst.isStore()) {
// We do a trivial form of DSE if there are two stores to the same
// location with no intervening loads. Delete the earlier store.
+ // At the moment, we don't remove ordered stores, but do remove
+ // unordered atomic stores. There's no special requirement (for
+ // unordered atomics) about removing atomic stores only in favor of
+ // other atomic stores since we we're going to execute the non-atomic
+ // one anyway and the atomic one might never have become visible.
if (LastStore) {
ParseMemoryInst LastStoreMemInst(LastStore, TTI);
+ assert(LastStoreMemInst.isUnordered() &&
+ !LastStoreMemInst.isVolatile() &&
+ "Violated invariant");
if (LastStoreMemInst.isMatchingMemLoc(MemInst)) {
DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore
<< " due to: " << *Inst << '\n');
@@ -640,12 +740,22 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// version of the pointer. It is safe to forward from volatile stores
// to non-volatile loads, so we don't have to check for volatility of
// the store.
- AvailableLoads.insert(MemInst.getPtr(), std::pair<Value *, unsigned>(
- Inst, CurrentGeneration));
-
- // Remember that this was the last store we saw for DSE.
- if (!MemInst.isVolatile())
+ AvailableLoads.insert(
+ MemInst.getPointerOperand(),
+ LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(),
+ MemInst.isAtomic()));
+
+ // Remember that this was the last unordered store we saw for DSE. We
+ // don't yet handle DSE on ordered or volatile stores since we don't
+ // have a good way to model the ordering requirement for following
+ // passes once the store is removed. We could insert a fence, but
+ // since fences are slightly stronger than stores in their ordering,
+ // it's not clear this is a profitable transform. Another option would
+ // be to merge the ordering with that of the post dominating store.
+ if (MemInst.isUnordered() && !MemInst.isVolatile())
LastStore = Inst;
+ else
+ LastStore = nullptr;
}
}
}
@@ -714,7 +824,7 @@ PreservedAnalyses EarlyCSEPass::run(Function &F,
auto &DT = AM->getResult<DominatorTreeAnalysis>(F);
auto &AC = AM->getResult<AssumptionAnalysis>(F);
- EarlyCSE CSE(F, TLI, TTI, DT, AC);
+ EarlyCSE CSE(TLI, TTI, DT, AC);
if (!CSE.run())
return PreservedAnalyses::all();
@@ -751,7 +861,7 @@ public:
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- EarlyCSE CSE(F, TLI, TTI, DT, AC);
+ EarlyCSE CSE(TLI, TTI, DT, AC);
return CSE.run();
}
@@ -761,6 +871,7 @@ public:
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
AU.setPreservesCFG();
}
};
diff --git a/contrib/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp b/contrib/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp
index 0430c18..185cdbd 100644
--- a/contrib/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp
@@ -30,7 +30,7 @@ public:
bool runOnFunction(Function &F) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
}
private:
@@ -41,7 +41,7 @@ private:
char FlattenCFGPass::ID = 0;
INITIALIZE_PASS_BEGIN(FlattenCFGPass, "flattencfg", "Flatten the CFG", false,
false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(FlattenCFGPass, "flattencfg", "Flatten the CFG", false,
false)
@@ -59,7 +59,7 @@ static bool iterativelyFlattenCFG(Function &F, AliasAnalysis *AA) {
// Loop over all of the basic blocks and remove them if they are unneeded...
//
for (Function::iterator BBIt = F.begin(); BBIt != F.end();) {
- if (FlattenCFG(BBIt++, AA)) {
+ if (FlattenCFG(&*BBIt++, AA)) {
LocalChange = true;
}
}
@@ -69,7 +69,7 @@ static bool iterativelyFlattenCFG(Function &F, AliasAnalysis *AA) {
}
bool FlattenCFGPass::runOnFunction(Function &F) {
- AA = &getAnalysis<AliasAnalysis>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
bool EverChanged = false;
// iterativelyFlattenCFG can make some blocks dead.
while (iterativelyFlattenCFG(F, AA)) {
diff --git a/contrib/llvm/lib/Transforms/Scalar/Float2Int.cpp b/contrib/llvm/lib/Transforms/Scalar/Float2Int.cpp
index c931422..7f5d786 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Float2Int.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Float2Int.cpp
@@ -19,6 +19,8 @@
#include "llvm/ADT/EquivalenceClasses.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/IRBuilder.h"
@@ -41,7 +43,7 @@ using namespace llvm;
// integer domain inputs, produce an integer output; fadd, for example.
//
// If a non-mappable instruction is seen, this entire def-use graph is marked
-// as non-transformable. If we see an instruction that converts from the
+// as non-transformable. If we see an instruction that converts from the
// integer domain to FP domain (uitofp,sitofp), we terminate our walk.
/// The largest integer type worth dealing with.
@@ -60,6 +62,7 @@ namespace {
bool runOnFunction(Function &F) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
+ AU.addPreserved<GlobalsAAWrapperPass>();
}
void findRoots(Function &F, SmallPtrSet<Instruction*,8> &Roots);
@@ -82,7 +85,9 @@ namespace {
}
char Float2Int::ID = 0;
-INITIALIZE_PASS(Float2Int, "float2int", "Float to int", false, false)
+INITIALIZE_PASS_BEGIN(Float2Int, "float2int", "Float to int", false, false)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
+INITIALIZE_PASS_END(Float2Int, "float2int", "Float to int", false, false)
// Given a FCmp predicate, return a matching ICmp predicate if one
// exists, otherwise return BAD_ICMP_PREDICATE.
@@ -125,7 +130,9 @@ static Instruction::BinaryOps mapBinOpcode(unsigned Opcode) {
// Find the roots - instructions that convert from the FP domain to
// integer domain.
void Float2Int::findRoots(Function &F, SmallPtrSet<Instruction*,8> &Roots) {
- for (auto &I : inst_range(F)) {
+ for (auto &I : instructions(F)) {
+ if (isa<VectorType>(I.getType()))
+ continue;
switch (I.getOpcode()) {
default: break;
case Instruction::FPToUI:
@@ -133,7 +140,7 @@ void Float2Int::findRoots(Function &F, SmallPtrSet<Instruction*,8> &Roots) {
Roots.insert(&I);
break;
case Instruction::FCmp:
- if (mapFCmpPred(cast<CmpInst>(&I)->getPredicate()) !=
+ if (mapFCmpPred(cast<CmpInst>(&I)->getPredicate()) !=
CmpInst::BAD_ICMP_PREDICATE)
Roots.insert(&I);
break;
@@ -176,7 +183,7 @@ ConstantRange Float2Int::validateRange(ConstantRange R) {
// - walkForwards: Iterate over SeenInsts in reverse order, so we visit
// defs before their uses. Calculate the real range info.
-// Breadth-first walk of the use-def graph; determine the set of nodes
+// Breadth-first walk of the use-def graph; determine the set of nodes
// we care about and eagerly determine if some of them are poisonous.
void Float2Int::walkBackwards(const SmallPtrSetImpl<Instruction*> &Roots) {
std::deque<Instruction*> Worklist(Roots.begin(), Roots.end());
@@ -222,14 +229,14 @@ void Float2Int::walkBackwards(const SmallPtrSetImpl<Instruction*> &Roots) {
seen(I, unknownRange());
break;
}
-
+
for (Value *O : I->operands()) {
if (Instruction *OI = dyn_cast<Instruction>(O)) {
// Unify def-use chains if they interfere.
ECs.unionSets(I, OI);
- if (SeenInsts.find(I)->second != badRange())
+ if (SeenInsts.find(I)->second != badRange())
Worklist.push_back(OI);
- } else if (!isa<ConstantFP>(O)) {
+ } else if (!isa<ConstantFP>(O)) {
// Not an instruction or ConstantFP? we can't do anything.
seen(I, badRange());
}
@@ -240,11 +247,11 @@ void Float2Int::walkBackwards(const SmallPtrSetImpl<Instruction*> &Roots) {
// Walk forwards down the list of seen instructions, so we visit defs before
// uses.
void Float2Int::walkForwards() {
- for (auto It = SeenInsts.rbegin(), E = SeenInsts.rend(); It != E; ++It) {
- if (It->second != unknownRange())
+ for (auto &It : make_range(SeenInsts.rbegin(), SeenInsts.rend())) {
+ if (It.second != unknownRange())
continue;
- Instruction *I = It->first;
+ Instruction *I = It.first;
std::function<ConstantRange(ArrayRef<ConstantRange>)> Op;
switch (I->getOpcode()) {
// FIXME: Handle select and phi nodes.
@@ -299,7 +306,7 @@ void Float2Int::walkForwards() {
for (Value *O : I->operands()) {
if (Instruction *OI = dyn_cast<Instruction>(O)) {
assert(SeenInsts.find(OI) != SeenInsts.end() &&
- "def not seen before use!");
+ "def not seen before use!");
OpRanges.push_back(SeenInsts.find(OI)->second);
} else if (ConstantFP *CF = dyn_cast<ConstantFP>(O)) {
// Work out if the floating point number can be losslessly represented
@@ -314,11 +321,11 @@ void Float2Int::walkForwards() {
APFloat F = CF->getValueAPF();
// First, weed out obviously incorrect values. Non-finite numbers
- // can't be represented and neither can negative zero, unless
+ // can't be represented and neither can negative zero, unless
// we're in fast math mode.
if (!F.isFinite() ||
(F.isZero() && F.isNegative() && isa<FPMathOperator>(I) &&
- !I->hasNoSignedZeros())) {
+ !I->hasNoSignedZeros())) {
seen(I, badRange());
Abort = true;
break;
@@ -345,7 +352,7 @@ void Float2Int::walkForwards() {
// Reduce the operands' ranges to a single range and return.
if (!Abort)
- seen(I, Op(OpRanges));
+ seen(I, Op(OpRanges));
}
}
@@ -395,7 +402,7 @@ bool Float2Int::validateAndTransform() {
R.isFullSet() || R.isSignWrappedSet())
continue;
assert(ConvertedToTy && "Must have set the convertedtoty by this point!");
-
+
// The number of bits required is the maximum of the upper and
// lower limits, plus one so it can be signed.
unsigned MinBW = std::max(R.getLower().getMinSignedBits(),
@@ -505,9 +512,8 @@ Value *Float2Int::convert(Instruction *I, Type *ToTy) {
// Perform dead code elimination on the instructions we just modified.
void Float2Int::cleanup() {
- for (auto I = ConvertedInsts.rbegin(), E = ConvertedInsts.rend();
- I != E; ++I)
- I->first->eraseFromParent();
+ for (auto &I : make_range(ConvertedInsts.rbegin(), ConvertedInsts.rend()))
+ I.first->eraseFromParent();
}
bool Float2Int::runOnFunction(Function &F) {
@@ -534,7 +540,4 @@ bool Float2Int::runOnFunction(Function &F) {
return Modified;
}
-FunctionPass *llvm::createFloat2IntPass() {
- return new Float2Int();
-}
-
+FunctionPass *llvm::createFloat2IntPass() { return new Float2Int(); }
diff --git a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
index 89a0d0a..a028b8c 100644
--- a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -28,6 +28,7 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryBuiltins.h"
@@ -128,6 +129,7 @@ namespace {
uint32_t lookup(Value *V) const;
uint32_t lookup_or_add_cmp(unsigned Opcode, CmpInst::Predicate Pred,
Value *LHS, Value *RHS);
+ bool exists(Value *V) const;
void add(Value *V, uint32_t num);
void clear();
void erase(Value *v);
@@ -388,6 +390,9 @@ uint32_t ValueTable::lookup_or_add_call(CallInst *C) {
}
}
+/// Returns true if a value number exists for the specified value.
+bool ValueTable::exists(Value *V) const { return valueNumbering.count(V) != 0; }
+
/// lookup_or_add - Returns the value number for the specified value, assigning
/// it a new number if it did not have one before.
uint32_t ValueTable::lookup_or_add(Value *V) {
@@ -608,6 +613,10 @@ namespace {
DenseMap<uint32_t, LeaderTableEntry> LeaderTable;
BumpPtrAllocator TableAllocator;
+ // Block-local map of equivalent values to their leader, does not
+ // propagate to any successors. Entries added mid-block are applied
+ // to the remaining instructions in the block.
+ SmallMapVector<llvm::Value *, llvm::Constant *, 4> ReplaceWithConstMap;
SmallVector<Instruction*, 8> InstrsToErase;
typedef SmallVector<NonLocalDepResult, 64> LoadDepVect;
@@ -689,16 +698,17 @@ namespace {
AU.addRequired<TargetLibraryInfoWrapperPass>();
if (!NoLoads)
AU.addRequired<MemoryDependenceAnalysis>();
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<AliasAnalysis>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
}
- // Helper fuctions of redundant load elimination
+ // Helper functions of redundant load elimination
bool processLoad(LoadInst *L);
bool processNonLocalLoad(LoadInst *L);
+ bool processAssumeIntrinsic(IntrinsicInst *II);
void AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
AvailValInBlkVect &ValuesPerBlock,
UnavailBlkVect &UnavailableBlocks);
@@ -719,7 +729,9 @@ namespace {
void verifyRemoved(const Instruction *I) const;
bool splitCriticalEdges();
BasicBlock *splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ);
- bool propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root);
+ bool replaceOperandsWithConsts(Instruction *I) const;
+ bool propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root,
+ bool DominatesByEdge);
bool processFoldableCondBr(BranchInst *BI);
void addDeadBlock(BasicBlock *BB);
void assignValNumForDeadCode();
@@ -738,7 +750,8 @@ INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false)
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -1290,8 +1303,7 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
SSAUpdater SSAUpdate(&NewPHIs);
SSAUpdate.Initialize(LI->getType(), LI->getName());
- for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
- const AvailableValueInBlock &AV = ValuesPerBlock[i];
+ for (const AvailableValueInBlock &AV : ValuesPerBlock) {
BasicBlock *BB = AV.BB;
if (SSAUpdate.HasValueForBlock(BB))
@@ -1301,24 +1313,7 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
}
// Perform PHI construction.
- Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent());
-
- // If new PHI nodes were created, notify alias analysis.
- if (V->getType()->getScalarType()->isPointerTy()) {
- AliasAnalysis *AA = gvn.getAliasAnalysis();
-
- // Scan the new PHIs and inform alias analysis that we've added potentially
- // escaping uses to any values that are operands to these PHIs.
- for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) {
- PHINode *P = NewPHIs[i];
- for (unsigned ii = 0, ee = P->getNumIncomingValues(); ii != ee; ++ii) {
- unsigned jj = PHINode::getOperandNumForIncomingValue(ii);
- AA->addEscapingUse(P->getOperandUse(jj));
- }
- }
- }
-
- return V;
+ return SSAUpdate.GetValueInMiddleOfBlock(LI->getParent());
}
Value *AvailableValueInBlock::MaterializeAdjustedValue(LoadInst *LI,
@@ -1518,9 +1513,8 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
// that we only have to insert *one* load (which means we're basically moving
// the load, not inserting a new one).
- SmallPtrSet<BasicBlock *, 4> Blockers;
- for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
- Blockers.insert(UnavailableBlocks[i]);
+ SmallPtrSet<BasicBlock *, 4> Blockers(UnavailableBlocks.begin(),
+ UnavailableBlocks.end());
// Let's find the first basic block with more than one predecessor. Walk
// backwards through predecessors if needed.
@@ -1550,15 +1544,22 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
// available.
MapVector<BasicBlock *, Value *> PredLoads;
DenseMap<BasicBlock*, char> FullyAvailableBlocks;
- for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
- FullyAvailableBlocks[ValuesPerBlock[i].BB] = true;
- for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
- FullyAvailableBlocks[UnavailableBlocks[i]] = false;
+ for (const AvailableValueInBlock &AV : ValuesPerBlock)
+ FullyAvailableBlocks[AV.BB] = true;
+ for (BasicBlock *UnavailableBB : UnavailableBlocks)
+ FullyAvailableBlocks[UnavailableBB] = false;
SmallVector<BasicBlock *, 4> CriticalEdgePred;
- for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB);
- PI != E; ++PI) {
- BasicBlock *Pred = *PI;
+ for (BasicBlock *Pred : predecessors(LoadBB)) {
+ // If any predecessor block is an EH pad that does not allow non-PHI
+ // instructions before the terminator, we can't PRE the load.
+ if (Pred->getTerminator()->isEHPad()) {
+ DEBUG(dbgs()
+ << "COULD NOT PRE LOAD BECAUSE OF AN EH PAD PREDECESSOR '"
+ << Pred->getName() << "': " << *LI << '\n');
+ return false;
+ }
+
if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks, 0)) {
continue;
}
@@ -1570,9 +1571,9 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
return false;
}
- if (LoadBB->isLandingPad()) {
+ if (LoadBB->isEHPad()) {
DEBUG(dbgs()
- << "COULD NOT PRE LOAD BECAUSE OF LANDING PAD CRITICAL EDGE '"
+ << "COULD NOT PRE LOAD BECAUSE OF AN EH PAD CRITICAL EDGE '"
<< Pred->getName() << "': " << *LI << '\n');
return false;
}
@@ -1655,12 +1656,12 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
<< *NewInsts.back() << '\n');
// Assign value numbers to the new instructions.
- for (unsigned i = 0, e = NewInsts.size(); i != e; ++i) {
+ for (Instruction *I : NewInsts) {
// FIXME: We really _ought_ to insert these value numbers into their
// parent's availability map. However, in doing so, we risk getting into
// ordering issues. If a block hasn't been processed yet, we would be
// marking a value as AVAIL-IN, which isn't what we intend.
- VN.lookup_or_add(NewInsts[i]);
+ VN.lookup_or_add(I);
}
for (const auto &PredLoad : PredLoads) {
@@ -1677,6 +1678,11 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
if (Tags)
NewLoad->setAAMetadata(Tags);
+ if (auto *MD = LI->getMetadata(LLVMContext::MD_invariant_load))
+ NewLoad->setMetadata(LLVMContext::MD_invariant_load, MD);
+ if (auto *InvGroupMD = LI->getMetadata(LLVMContext::MD_invariant_group))
+ NewLoad->setMetadata(LLVMContext::MD_invariant_group, InvGroupMD);
+
// Transfer DebugLoc.
NewLoad->setDebugLoc(LI->getDebugLoc());
@@ -1704,6 +1710,10 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
/// Attempt to eliminate a load whose dependencies are
/// non-local by performing PHI construction.
bool GVN::processNonLocalLoad(LoadInst *LI) {
+ // non-local speculations are not allowed under asan.
+ if (LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeAddress))
+ return false;
+
// Step 1: Find the non-local dependencies of the load.
LoadDepVect Deps;
MD->getNonLocalPointerDependency(LI, Deps);
@@ -1777,6 +1787,63 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
return PerformLoadPRE(LI, ValuesPerBlock, UnavailableBlocks);
}
+bool GVN::processAssumeIntrinsic(IntrinsicInst *IntrinsicI) {
+ assert(IntrinsicI->getIntrinsicID() == Intrinsic::assume &&
+ "This function can only be called with llvm.assume intrinsic");
+ Value *V = IntrinsicI->getArgOperand(0);
+
+ if (ConstantInt *Cond = dyn_cast<ConstantInt>(V)) {
+ if (Cond->isZero()) {
+ Type *Int8Ty = Type::getInt8Ty(V->getContext());
+ // Insert a new store to null instruction before the load to indicate that
+ // this code is not reachable. FIXME: We could insert unreachable
+ // instruction directly because we can modify the CFG.
+ new StoreInst(UndefValue::get(Int8Ty),
+ Constant::getNullValue(Int8Ty->getPointerTo()),
+ IntrinsicI);
+ }
+ markInstructionForDeletion(IntrinsicI);
+ return false;
+ }
+
+ Constant *True = ConstantInt::getTrue(V->getContext());
+ bool Changed = false;
+
+ for (BasicBlock *Successor : successors(IntrinsicI->getParent())) {
+ BasicBlockEdge Edge(IntrinsicI->getParent(), Successor);
+
+ // This property is only true in dominated successors, propagateEquality
+ // will check dominance for us.
+ Changed |= propagateEquality(V, True, Edge, false);
+ }
+
+ // We can replace assume value with true, which covers cases like this:
+ // call void @llvm.assume(i1 %cmp)
+ // br i1 %cmp, label %bb1, label %bb2 ; will change %cmp to true
+ ReplaceWithConstMap[V] = True;
+
+ // If one of *cmp *eq operand is const, adding it to map will cover this:
+ // %cmp = fcmp oeq float 3.000000e+00, %0 ; const on lhs could happen
+ // call void @llvm.assume(i1 %cmp)
+ // ret float %0 ; will change it to ret float 3.000000e+00
+ if (auto *CmpI = dyn_cast<CmpInst>(V)) {
+ if (CmpI->getPredicate() == CmpInst::Predicate::ICMP_EQ ||
+ CmpI->getPredicate() == CmpInst::Predicate::FCMP_OEQ ||
+ (CmpI->getPredicate() == CmpInst::Predicate::FCMP_UEQ &&
+ CmpI->getFastMathFlags().noNaNs())) {
+ Value *CmpLHS = CmpI->getOperand(0);
+ Value *CmpRHS = CmpI->getOperand(1);
+ if (isa<Constant>(CmpLHS))
+ std::swap(CmpLHS, CmpRHS);
+ auto *RHSConst = dyn_cast<Constant>(CmpRHS);
+
+ // If only one operand is constant.
+ if (RHSConst != nullptr && !isa<Constant>(CmpLHS))
+ ReplaceWithConstMap[CmpLHS] = RHSConst;
+ }
+ }
+ return Changed;
+}
static void patchReplacementInstruction(Instruction *I, Value *Repl) {
// Patch the replacement so that it is not more restrictive than the value
@@ -1789,7 +1856,7 @@ static void patchReplacementInstruction(Instruction *I, Value *Repl) {
if (Instruction *ReplInst = dyn_cast<Instruction>(Repl)) {
// FIXME: If both the original and replacement value are part of the
// same control-flow region (meaning that the execution of one
- // guarentees the executation of the other), then we can combine the
+ // guarantees the execution of the other), then we can combine the
// noalias scopes here and do better than the general conservative
// answer used in combineMetadata().
@@ -1797,13 +1864,10 @@ static void patchReplacementInstruction(Instruction *I, Value *Repl) {
// regions, and so we need a conservative combination of the noalias
// scopes.
static const unsigned KnownIDs[] = {
- LLVMContext::MD_tbaa,
- LLVMContext::MD_alias_scope,
- LLVMContext::MD_noalias,
- LLVMContext::MD_range,
- LLVMContext::MD_fpmath,
- LLVMContext::MD_invariant_load,
- };
+ LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
+ LLVMContext::MD_noalias, LLVMContext::MD_range,
+ LLVMContext::MD_fpmath, LLVMContext::MD_invariant_load,
+ LLVMContext::MD_invariant_group};
combineMetadata(ReplInst, I, KnownIDs);
}
}
@@ -1890,10 +1954,8 @@ bool GVN::processLoad(LoadInst *L) {
++NumGVNLoad;
return true;
}
- }
- // If the value isn't available, don't do anything!
- if (Dep.isClobber()) {
+ // If the value isn't available, don't do anything!
DEBUG(
// fast print dep, using operator<< on instruction is too slow.
dbgs() << "GVN: load ";
@@ -2049,11 +2111,31 @@ static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E,
return Pred != nullptr;
}
+// Tries to replace instruction with const, using information from
+// ReplaceWithConstMap.
+bool GVN::replaceOperandsWithConsts(Instruction *Instr) const {
+ bool Changed = false;
+ for (unsigned OpNum = 0; OpNum < Instr->getNumOperands(); ++OpNum) {
+ Value *Operand = Instr->getOperand(OpNum);
+ auto it = ReplaceWithConstMap.find(Operand);
+ if (it != ReplaceWithConstMap.end()) {
+ assert(!isa<Constant>(Operand) &&
+ "Replacing constants with constants is invalid");
+ DEBUG(dbgs() << "GVN replacing: " << *Operand << " with " << *it->second
+ << " in instruction " << *Instr << '\n');
+ Instr->setOperand(OpNum, it->second);
+ Changed = true;
+ }
+ }
+ return Changed;
+}
+
/// The given values are known to be equal in every block
/// dominated by 'Root'. Exploit this, for example by replacing 'LHS' with
/// 'RHS' everywhere in the scope. Returns whether a change was made.
-bool GVN::propagateEquality(Value *LHS, Value *RHS,
- const BasicBlockEdge &Root) {
+/// If DominatesByEdge is false, then it means that it is dominated by Root.End.
+bool GVN::propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root,
+ bool DominatesByEdge) {
SmallVector<std::pair<Value*, Value*>, 4> Worklist;
Worklist.push_back(std::make_pair(LHS, RHS));
bool Changed = false;
@@ -2065,11 +2147,13 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS,
std::pair<Value*, Value*> Item = Worklist.pop_back_val();
LHS = Item.first; RHS = Item.second;
- if (LHS == RHS) continue;
+ if (LHS == RHS)
+ continue;
assert(LHS->getType() == RHS->getType() && "Equality but unequal types!");
// Don't try to propagate equalities between constants.
- if (isa<Constant>(LHS) && isa<Constant>(RHS)) continue;
+ if (isa<Constant>(LHS) && isa<Constant>(RHS))
+ continue;
// Prefer a constant on the right-hand side, or an Argument if no constants.
if (isa<Constant>(LHS) || (isa<Argument>(LHS) && !isa<Constant>(RHS)))
@@ -2108,7 +2192,11 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS,
// LHS always has at least one use that is not dominated by Root, this will
// never do anything if LHS has only one use.
if (!LHS->hasOneUse()) {
- unsigned NumReplacements = replaceDominatedUsesWith(LHS, RHS, *DT, Root);
+ unsigned NumReplacements =
+ DominatesByEdge
+ ? replaceDominatedUsesWith(LHS, RHS, *DT, Root)
+ : replaceDominatedUsesWith(LHS, RHS, *DT, Root.getEnd());
+
Changed |= NumReplacements > 0;
NumGVNEqProp += NumReplacements;
}
@@ -2180,7 +2268,10 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS,
Value *NotCmp = findLeader(Root.getEnd(), Num);
if (NotCmp && isa<Instruction>(NotCmp)) {
unsigned NumReplacements =
- replaceDominatedUsesWith(NotCmp, NotVal, *DT, Root);
+ DominatesByEdge
+ ? replaceDominatedUsesWith(NotCmp, NotVal, *DT, Root)
+ : replaceDominatedUsesWith(NotCmp, NotVal, *DT,
+ Root.getEnd());
Changed |= NumReplacements > 0;
NumGVNEqProp += NumReplacements;
}
@@ -2220,6 +2311,10 @@ bool GVN::processInstruction(Instruction *I) {
return true;
}
+ if (IntrinsicInst *IntrinsicI = dyn_cast<IntrinsicInst>(I))
+ if (IntrinsicI->getIntrinsicID() == Intrinsic::assume)
+ return processAssumeIntrinsic(IntrinsicI);
+
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
if (processLoad(LI))
return true;
@@ -2250,11 +2345,11 @@ bool GVN::processInstruction(Instruction *I) {
Value *TrueVal = ConstantInt::getTrue(TrueSucc->getContext());
BasicBlockEdge TrueE(Parent, TrueSucc);
- Changed |= propagateEquality(BranchCond, TrueVal, TrueE);
+ Changed |= propagateEquality(BranchCond, TrueVal, TrueE, true);
Value *FalseVal = ConstantInt::getFalse(FalseSucc->getContext());
BasicBlockEdge FalseE(Parent, FalseSucc);
- Changed |= propagateEquality(BranchCond, FalseVal, FalseE);
+ Changed |= propagateEquality(BranchCond, FalseVal, FalseE, true);
return Changed;
}
@@ -2276,7 +2371,7 @@ bool GVN::processInstruction(Instruction *I) {
// If there is only a single edge, propagate the case value into it.
if (SwitchEdges.lookup(Dst) == 1) {
BasicBlockEdge E(Parent, Dst);
- Changed |= propagateEquality(SwitchCond, i.getCaseValue(), E);
+ Changed |= propagateEquality(SwitchCond, i.getCaseValue(), E, true);
}
}
return Changed;
@@ -2284,7 +2379,8 @@ bool GVN::processInstruction(Instruction *I) {
// Instructions with void type don't return a value, so there's
// no point in trying to find redundancies in them.
- if (I->getType()->isVoidTy()) return false;
+ if (I->getType()->isVoidTy())
+ return false;
uint32_t NextNum = VN.getNextUnusedValueNumber();
unsigned Num = VN.lookup_or_add(I);
@@ -2306,17 +2402,21 @@ bool GVN::processInstruction(Instruction *I) {
// Perform fast-path value-number based elimination of values inherited from
// dominators.
- Value *repl = findLeader(I->getParent(), Num);
- if (!repl) {
+ Value *Repl = findLeader(I->getParent(), Num);
+ if (!Repl) {
// Failure, just remember this instance for future use.
addToLeaderTable(Num, I, I->getParent());
return false;
+ } else if (Repl == I) {
+ // If I was the result of a shortcut PRE, it might already be in the table
+ // and the best replacement for itself. Nothing to do.
+ return false;
}
// Remove it!
- patchAndReplaceAllUsesWith(I, repl);
- if (MD && repl->getType()->getScalarType()->isPointerTy())
- MD->invalidateCachedPointerInfo(repl);
+ patchAndReplaceAllUsesWith(I, Repl);
+ if (MD && Repl->getType()->getScalarType()->isPointerTy())
+ MD->invalidateCachedPointerInfo(Repl);
markInstructionForDeletion(I);
return true;
}
@@ -2331,7 +2431,7 @@ bool GVN::runOnFunction(Function& F) {
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
- VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>());
+ VN.setAliasAnalysis(&getAnalysis<AAResultsWrapperPass>().getAAResults());
VN.setMemDep(MD);
VN.setDomTree(DT);
@@ -2341,10 +2441,10 @@ bool GVN::runOnFunction(Function& F) {
// Merge unconditional branches, allowing PRE to catch more
// optimization opportunities.
for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) {
- BasicBlock *BB = FI++;
+ BasicBlock *BB = &*FI++;
- bool removedBlock = MergeBlockIntoPredecessor(
- BB, DT, /* LoopInfo */ nullptr, VN.getAliasAnalysis(), MD);
+ bool removedBlock =
+ MergeBlockIntoPredecessor(BB, DT, /* LoopInfo */ nullptr, MD);
if (removedBlock) ++NumGVNBlocks;
Changed |= removedBlock;
@@ -2382,7 +2482,6 @@ bool GVN::runOnFunction(Function& F) {
return Changed;
}
-
bool GVN::processBlock(BasicBlock *BB) {
// FIXME: Kill off InstrsToErase by doing erasing eagerly in a helper function
// (and incrementing BI before processing an instruction).
@@ -2391,11 +2490,16 @@ bool GVN::processBlock(BasicBlock *BB) {
if (DeadBlocks.count(BB))
return false;
+ // Clearing map before every BB because it can be used only for single BB.
+ ReplaceWithConstMap.clear();
bool ChangedFunction = false;
for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
BI != BE;) {
- ChangedFunction |= processInstruction(BI);
+ if (!ReplaceWithConstMap.empty())
+ ChangedFunction |= replaceOperandsWithConsts(&*BI);
+ ChangedFunction |= processInstruction(&*BI);
+
if (InstrsToErase.empty()) {
++BI;
continue;
@@ -2439,7 +2543,14 @@ bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
Value *Op = Instr->getOperand(i);
if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op))
continue;
-
+ // This could be a newly inserted instruction, in which case, we won't
+ // find a value number, and should give up before we hurt ourselves.
+ // FIXME: Rewrite the infrastructure to let it easier to value number
+ // and process newly inserted instructions.
+ if (!VN.exists(Op)) {
+ success = false;
+ break;
+ }
if (Value *V = findLeader(Pred, VN.lookup(Op))) {
Instr->setOperand(i, V);
} else {
@@ -2499,9 +2610,7 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
BasicBlock *CurrentBlock = CurInst->getParent();
predMap.clear();
- for (pred_iterator PI = pred_begin(CurrentBlock), PE = pred_end(CurrentBlock);
- PI != PE; ++PI) {
- BasicBlock *P = *PI;
+ for (BasicBlock *P : predecessors(CurrentBlock)) {
// We're not interested in PRE where the block is its
// own predecessor, or in blocks with predecessors
// that are not reachable.
@@ -2570,7 +2679,7 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
// Create a PHI to make the value available in this block.
PHINode *Phi =
PHINode::Create(CurInst->getType(), predMap.size(),
- CurInst->getName() + ".pre-phi", CurrentBlock->begin());
+ CurInst->getName() + ".pre-phi", &CurrentBlock->front());
for (unsigned i = 0, e = predMap.size(); i != e; ++i) {
if (Value *V = predMap[i].first)
Phi->addIncoming(V, predMap[i].second);
@@ -2582,18 +2691,8 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
addToLeaderTable(ValNo, Phi, CurrentBlock);
Phi->setDebugLoc(CurInst->getDebugLoc());
CurInst->replaceAllUsesWith(Phi);
- if (Phi->getType()->getScalarType()->isPointerTy()) {
- // Because we have added a PHI-use of the pointer value, it has now
- // "escaped" from alias analysis' perspective. We need to inform
- // AA of this.
- for (unsigned ii = 0, ee = Phi->getNumIncomingValues(); ii != ee; ++ii) {
- unsigned jj = PHINode::getOperandNumForIncomingValue(ii);
- VN.getAliasAnalysis()->addEscapingUse(Phi->getOperandUse(jj));
- }
-
- if (MD)
- MD->invalidateCachedPointerInfo(Phi);
- }
+ if (MD && Phi->getType()->getScalarType()->isPointerTy())
+ MD->invalidateCachedPointerInfo(Phi);
VN.erase(CurInst);
removeFromLeaderTable(ValNo, CurInst, CurrentBlock);
@@ -2616,15 +2715,15 @@ bool GVN::performPRE(Function &F) {
if (CurrentBlock == &F.getEntryBlock())
continue;
- // Don't perform PRE on a landing pad.
- if (CurrentBlock->isLandingPad())
+ // Don't perform PRE on an EH pad.
+ if (CurrentBlock->isEHPad())
continue;
for (BasicBlock::iterator BI = CurrentBlock->begin(),
BE = CurrentBlock->end();
BI != BE;) {
- Instruction *CurInst = BI++;
- Changed = performScalarPRE(CurInst);
+ Instruction *CurInst = &*BI++;
+ Changed |= performScalarPRE(CurInst);
}
}
@@ -2637,8 +2736,8 @@ bool GVN::performPRE(Function &F) {
/// Split the critical edge connecting the given two blocks, and return
/// the block inserted to the critical edge.
BasicBlock *GVN::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) {
- BasicBlock *BB = SplitCriticalEdge(
- Pred, Succ, CriticalEdgeSplittingOptions(getAliasAnalysis(), DT));
+ BasicBlock *BB =
+ SplitCriticalEdge(Pred, Succ, CriticalEdgeSplittingOptions(DT));
if (MD)
MD->invalidateCachedPredecessors();
return BB;
@@ -2652,7 +2751,7 @@ bool GVN::splitCriticalEdges() {
do {
std::pair<TerminatorInst*, unsigned> Edge = toSplit.pop_back_val();
SplitCriticalEdge(Edge.first, Edge.second,
- CriticalEdgeSplittingOptions(getAliasAnalysis(), DT));
+ CriticalEdgeSplittingOptions(DT));
} while (!toSplit.empty());
if (MD) MD->invalidateCachedPredecessors();
return true;
@@ -2728,17 +2827,14 @@ void GVN::addDeadBlock(BasicBlock *BB) {
DeadBlocks.insert(Dom.begin(), Dom.end());
// Figure out the dominance-frontier(D).
- for (SmallVectorImpl<BasicBlock *>::iterator I = Dom.begin(),
- E = Dom.end(); I != E; I++) {
- BasicBlock *B = *I;
- for (succ_iterator SI = succ_begin(B), SE = succ_end(B); SI != SE; SI++) {
- BasicBlock *S = *SI;
+ for (BasicBlock *B : Dom) {
+ for (BasicBlock *S : successors(B)) {
if (DeadBlocks.count(S))
continue;
bool AllPredDead = true;
- for (pred_iterator PI = pred_begin(S), PE = pred_end(S); PI != PE; PI++)
- if (!DeadBlocks.count(*PI)) {
+ for (BasicBlock *P : predecessors(S))
+ if (!DeadBlocks.count(P)) {
AllPredDead = false;
break;
}
@@ -2766,10 +2862,7 @@ void GVN::addDeadBlock(BasicBlock *BB) {
continue;
SmallVector<BasicBlock *, 4> Preds(pred_begin(B), pred_end(B));
- for (SmallVectorImpl<BasicBlock *>::iterator PI = Preds.begin(),
- PE = Preds.end(); PI != PE; PI++) {
- BasicBlock *P = *PI;
-
+ for (BasicBlock *P : Preds) {
if (!DeadBlocks.count(P))
continue;
@@ -2794,7 +2887,7 @@ void GVN::addDeadBlock(BasicBlock *BB) {
// R be the target of the dead out-coming edge.
// 1) Identify the set of dead blocks implied by the branch's dead outcoming
// edge. The result of this step will be {X| X is dominated by R}
-// 2) Identify those blocks which haves at least one dead prodecessor. The
+// 2) Identify those blocks which haves at least one dead predecessor. The
// result of this step will be dominance-frontier(R).
// 3) Update the PHIs in DF(R) by replacing the operands corresponding to
// dead blocks with "UndefVal" in an hope these PHIs will optimized away.
@@ -2829,14 +2922,10 @@ bool GVN::processFoldableCondBr(BranchInst *BI) {
// instructions, it makes more sense just to "fabricate" a val-number for the
// dead code than checking if instruction involved is dead or not.
void GVN::assignValNumForDeadCode() {
- for (SetVector<BasicBlock *>::iterator I = DeadBlocks.begin(),
- E = DeadBlocks.end(); I != E; I++) {
- BasicBlock *BB = *I;
- for (BasicBlock::iterator II = BB->begin(), EE = BB->end();
- II != EE; II++) {
- Instruction *Inst = &*II;
- unsigned ValNum = VN.lookup_or_add(Inst);
- addToLeaderTable(ValNum, Inst, BB);
+ for (BasicBlock *BB : DeadBlocks) {
+ for (Instruction &Inst : *BB) {
+ unsigned ValNum = VN.lookup_or_add(&Inst);
+ addToLeaderTable(ValNum, &Inst, BB);
}
}
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 2a954d9..ec5e15f 100644
--- a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -28,9 +28,11 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/BasicBlock.h"
@@ -48,6 +50,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/SimplifyIndVar.h"
using namespace llvm;
@@ -83,64 +86,62 @@ static cl::opt<ReplaceExitVal> ReplaceExitValue(
namespace {
struct RewritePhi;
-}
-namespace {
- class IndVarSimplify : public LoopPass {
- LoopInfo *LI;
- ScalarEvolution *SE;
- DominatorTree *DT;
- TargetLibraryInfo *TLI;
- const TargetTransformInfo *TTI;
-
- SmallVector<WeakVH, 16> DeadInsts;
- bool Changed;
- public:
-
- static char ID; // Pass identification, replacement for typeid
- IndVarSimplify()
- : LoopPass(ID), LI(nullptr), SE(nullptr), DT(nullptr), Changed(false) {
- initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry());
- }
+class IndVarSimplify : public LoopPass {
+ LoopInfo *LI;
+ ScalarEvolution *SE;
+ DominatorTree *DT;
+ TargetLibraryInfo *TLI;
+ const TargetTransformInfo *TTI;
- bool runOnLoop(Loop *L, LPPassManager &LPM) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addRequired<ScalarEvolution>();
- AU.addRequiredID(LoopSimplifyID);
- AU.addRequiredID(LCSSAID);
- AU.addPreserved<ScalarEvolution>();
- AU.addPreservedID(LoopSimplifyID);
- AU.addPreservedID(LCSSAID);
- AU.setPreservesCFG();
- }
+ SmallVector<WeakVH, 16> DeadInsts;
+ bool Changed;
+public:
- private:
- void releaseMemory() override {
- DeadInsts.clear();
- }
+ static char ID; // Pass identification, replacement for typeid
+ IndVarSimplify()
+ : LoopPass(ID), LI(nullptr), SE(nullptr), DT(nullptr), Changed(false) {
+ initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry());
+ }
- bool isValidRewrite(Value *FromVal, Value *ToVal);
+ bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addRequiredID(LCSSAID);
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addPreservedID(LCSSAID);
+ AU.setPreservesCFG();
+ }
- void HandleFloatingPointIV(Loop *L, PHINode *PH);
- void RewriteNonIntegerIVs(Loop *L);
+private:
+ void releaseMemory() override {
+ DeadInsts.clear();
+ }
- void SimplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LPPassManager &LPM);
+ bool isValidRewrite(Value *FromVal, Value *ToVal);
- bool CanLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);
- void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
+ void handleFloatingPointIV(Loop *L, PHINode *PH);
+ void rewriteNonIntegerIVs(Loop *L);
- Value *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
- PHINode *IndVar, SCEVExpander &Rewriter);
+ void simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
- void SinkUnusedInvariants(Loop *L);
+ bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);
+ void rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
- Value *ExpandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S, Loop *L,
- Instruction *InsertPt, Type *Ty,
- bool &IsHighCostExpansion);
- };
+ Value *linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
+ PHINode *IndVar, SCEVExpander &Rewriter);
+
+ void sinkUnusedInvariants(Loop *L);
+
+ Value *expandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S, Loop *L,
+ Instruction *InsertPt, Type *Ty);
+};
}
char IndVarSimplify::ID = 0;
@@ -148,7 +149,7 @@ INITIALIZE_PASS_BEGIN(IndVarSimplify, "indvars",
"Induction Variable Simplification", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
INITIALIZE_PASS_END(IndVarSimplify, "indvars",
@@ -158,10 +159,10 @@ Pass *llvm::createIndVarSimplifyPass() {
return new IndVarSimplify();
}
-/// isValidRewrite - Return true if the SCEV expansion generated by the
-/// rewriter can replace the original value. SCEV guarantees that it
-/// produces the same value, but the way it is produced may be illegal IR.
-/// Ideally, this function will only be called for verification.
+/// Return true if the SCEV expansion generated by the rewriter can replace the
+/// original value. SCEV guarantees that it produces the same value, but the way
+/// it is produced may be illegal IR. Ideally, this function will only be
+/// called for verification.
bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
// If an SCEV expression subsumed multiple pointers, its expansion could
// reassociate the GEP changing the base pointer. This is illegal because the
@@ -175,10 +176,10 @@ bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
// because it understands lcssa phis while SCEV does not.
Value *FromPtr = FromVal;
Value *ToPtr = ToVal;
- if (GEPOperator *GEP = dyn_cast<GEPOperator>(FromVal)) {
+ if (auto *GEP = dyn_cast<GEPOperator>(FromVal)) {
FromPtr = GEP->getPointerOperand();
}
- if (GEPOperator *GEP = dyn_cast<GEPOperator>(ToVal)) {
+ if (auto *GEP = dyn_cast<GEPOperator>(ToVal)) {
ToPtr = GEP->getPointerOperand();
}
if (FromPtr != FromVal || ToPtr != ToVal) {
@@ -215,7 +216,7 @@ bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
/// loop. For PHI nodes, there may be multiple uses, so compute the nearest
/// common dominator for the incoming blocks.
static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
- DominatorTree *DT) {
+ DominatorTree *DT, LoopInfo *LI) {
PHINode *PHI = dyn_cast<PHINode>(User);
if (!PHI)
return User;
@@ -234,17 +235,28 @@ static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
InsertPt = InsertBB->getTerminator();
}
assert(InsertPt && "Missing phi operand");
- assert((!isa<Instruction>(Def) ||
- DT->dominates(cast<Instruction>(Def), InsertPt)) &&
- "def does not dominate all uses");
- return InsertPt;
+
+ auto *DefI = dyn_cast<Instruction>(Def);
+ if (!DefI)
+ return InsertPt;
+
+ assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses");
+
+ auto *L = LI->getLoopFor(DefI->getParent());
+ assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent())));
+
+ for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom())
+ if (LI->getLoopFor(DTN->getBlock()) == L)
+ return DTN->getBlock()->getTerminator();
+
+ llvm_unreachable("DefI dominates InsertPt!");
}
//===----------------------------------------------------------------------===//
-// RewriteNonIntegerIVs and helpers. Prefer integer IVs.
+// rewriteNonIntegerIVs and helpers. Prefer integer IVs.
//===----------------------------------------------------------------------===//
-/// ConvertToSInt - Convert APF to an integer, if possible.
+/// Convert APF to an integer, if possible.
static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
bool isExact = false;
// See if we can convert this to an int64_t
@@ -256,8 +268,8 @@ static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
return true;
}
-/// HandleFloatingPointIV - If the loop has floating induction variable
-/// then insert corresponding integer induction variable if possible.
+/// If the loop has floating induction variable then insert corresponding
+/// integer induction variable if possible.
/// For example,
/// for(double i = 0; i < 10000; ++i)
/// bar(i)
@@ -265,13 +277,12 @@ static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
/// for(int i = 0; i < 10000; ++i)
/// bar((double)i);
///
-void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
+void IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
unsigned BackEdge = IncomingEdge^1;
// Check incoming value.
- ConstantFP *InitValueVal =
- dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
+ auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
int64_t InitValue;
if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
@@ -279,8 +290,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
// Check IV increment. Reject this PN if increment operation is not
// an add or increment value can not be represented by an integer.
- BinaryOperator *Incr =
- dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
+ auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return;
// If this is not an add of the PHI with a constantfp, or if the constant fp
@@ -456,14 +466,14 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
// platforms.
if (WeakPH) {
Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
- PN->getParent()->getFirstInsertionPt());
+ &*PN->getParent()->getFirstInsertionPt());
PN->replaceAllUsesWith(Conv);
RecursivelyDeleteTriviallyDeadInstructions(PN, TLI);
}
Changed = true;
}
-void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
+void IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
// First step. Check to see if there are any floating-point recurrences.
// If there are, change them into integer recurrences, permitting analysis by
// the SCEV routines.
@@ -477,7 +487,7 @@ void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
- HandleFloatingPointIV(L, PN);
+ handleFloatingPointIV(L, PN);
// If the loop previously had floating-point IV, ScalarEvolution
// may not have been able to compute a trip count. Now that we've done some
@@ -488,7 +498,7 @@ void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
namespace {
// Collect information about PHI nodes which can be transformed in
-// RewriteLoopExitValues.
+// rewriteLoopExitValues.
struct RewritePhi {
PHINode *PN;
unsigned Ith; // Ith incoming value.
@@ -501,70 +511,37 @@ struct RewritePhi {
};
}
-Value *IndVarSimplify::ExpandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S,
+Value *IndVarSimplify::expandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S,
Loop *L, Instruction *InsertPt,
- Type *ResultTy,
- bool &IsHighCostExpansion) {
- using namespace llvm::PatternMatch;
-
- if (!Rewriter.isHighCostExpansion(S, L)) {
- IsHighCostExpansion = false;
- return Rewriter.expandCodeFor(S, ResultTy, InsertPt);
- }
-
+ Type *ResultTy) {
// Before expanding S into an expensive LLVM expression, see if we can use an
- // already existing value as the expansion for S. There is potential to make
- // this significantly smarter, but this simple heuristic already gets some
- // interesting cases.
-
- SmallVector<BasicBlock *, 4> Latches;
- L->getLoopLatches(Latches);
-
- for (BasicBlock *BB : Latches) {
- ICmpInst::Predicate Pred;
- Instruction *LHS, *RHS;
- BasicBlock *TrueBB, *FalseBB;
-
- if (!match(BB->getTerminator(),
- m_Br(m_ICmp(Pred, m_Instruction(LHS), m_Instruction(RHS)),
- TrueBB, FalseBB)))
- continue;
-
- if (SE->getSCEV(LHS) == S && DT->dominates(LHS, InsertPt)) {
- IsHighCostExpansion = false;
- return LHS;
- }
-
- if (SE->getSCEV(RHS) == S && DT->dominates(RHS, InsertPt)) {
- IsHighCostExpansion = false;
- return RHS;
- }
- }
+ // already existing value as the expansion for S.
+ if (Value *ExistingValue = Rewriter.findExistingExpansion(S, InsertPt, L))
+ if (ExistingValue->getType() == ResultTy)
+ return ExistingValue;
// We didn't find anything, fall back to using SCEVExpander.
- assert(Rewriter.isHighCostExpansion(S, L) && "this should not have changed!");
- IsHighCostExpansion = true;
return Rewriter.expandCodeFor(S, ResultTy, InsertPt);
}
//===----------------------------------------------------------------------===//
-// RewriteLoopExitValues - Optimize IV users outside the loop.
+// rewriteLoopExitValues - Optimize IV users outside the loop.
// As a side effect, reduces the amount of IV processing within the loop.
//===----------------------------------------------------------------------===//
-/// RewriteLoopExitValues - Check to see if this loop has a computable
-/// loop-invariant execution count. If so, this means that we can compute the
-/// final value of any expressions that are recurrent in the loop, and
-/// substitute the exit values from the loop into any instructions outside of
-/// the loop that use the final values of the current expressions.
+/// Check to see if this loop has a computable loop-invariant execution count.
+/// If so, this means that we can compute the final value of any expressions
+/// that are recurrent in the loop, and substitute the exit values from the loop
+/// into any instructions outside of the loop that use the final values of the
+/// current expressions.
///
/// This is mostly redundant with the regular IndVarSimplify activities that
/// happen later, except that it's more powerful in some cases, because it's
/// able to brute-force evaluate arbitrary instructions as long as they have
/// constant operands at the beginning of the loop.
-void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
- // Verify the input to the pass in already in LCSSA form.
- assert(L->isLCSSAForm(*DT));
+void IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
+ // Check a pre-condition.
+ assert(L->isRecursivelyLCSSAForm(*DT) && "Indvars did not preserve LCSSA!");
SmallVector<BasicBlock*, 8> ExitBlocks;
L->getUniqueExitBlocks(ExitBlocks);
@@ -679,9 +656,9 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
continue;
}
- bool HighCost = false;
- Value *ExitVal = ExpandSCEVIfNeeded(Rewriter, ExitValue, L, Inst,
- PN->getType(), HighCost);
+ bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst);
+ Value *ExitVal =
+ expandSCEVIfNeeded(Rewriter, ExitValue, L, Inst, PN->getType());
DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n'
<< " LoopVal = " << *Inst << "\n");
@@ -698,7 +675,7 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
}
}
- bool LoopCanBeDel = CanLoopBeDeleted(L, RewritePhiSet);
+ bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
// Transformation.
for (const RewritePhi &Phi : RewritePhiSet) {
@@ -735,10 +712,10 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
Rewriter.clearInsertPoint();
}
-/// CanLoopBeDeleted - Check whether it is possible to delete the loop after
-/// rewriting exit value. If it is possible, ignore ReplaceExitValue and
-/// do rewriting aggressively.
-bool IndVarSimplify::CanLoopBeDeleted(
+/// Check whether it is possible to delete the loop after rewriting exit
+/// value. If it is possible, ignore ReplaceExitValue and do rewriting
+/// aggressively.
+bool IndVarSimplify::canLoopBeDeleted(
Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {
BasicBlock *Preheader = L->getLoopPreheader();
@@ -782,14 +759,9 @@ bool IndVarSimplify::CanLoopBeDeleted(
++BI;
}
- for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
- LI != LE; ++LI) {
- for (BasicBlock::iterator BI = (*LI)->begin(), BE = (*LI)->end(); BI != BE;
- ++BI) {
- if (BI->mayHaveSideEffects())
- return false;
- }
- }
+ for (auto *BB : L->blocks())
+ if (any_of(*BB, [](Instruction &I) { return I.mayHaveSideEffects(); }))
+ return false;
return true;
}
@@ -799,22 +771,19 @@ bool IndVarSimplify::CanLoopBeDeleted(
//===----------------------------------------------------------------------===//
namespace {
- // Collect information about induction variables that are used by sign/zero
- // extend operations. This information is recorded by CollectExtend and
- // provides the input to WidenIV.
- struct WideIVInfo {
- PHINode *NarrowIV;
- Type *WidestNativeType; // Widest integer type created [sz]ext
- bool IsSigned; // Was a sext user seen before a zext?
-
- WideIVInfo() : NarrowIV(nullptr), WidestNativeType(nullptr),
- IsSigned(false) {}
- };
+// Collect information about induction variables that are used by sign/zero
+// extend operations. This information is recorded by CollectExtend and provides
+// the input to WidenIV.
+struct WideIVInfo {
+ PHINode *NarrowIV = nullptr;
+ Type *WidestNativeType = nullptr; // Widest integer type created [sz]ext
+ bool IsSigned = false; // Was a sext user seen before a zext?
+};
}
-/// visitCast - Update information about the induction variable that is
-/// extended by this sign or zero extend operation. This is used to determine
-/// the final width of the IV before actually widening it.
+/// Update information about the induction variable that is extended by this
+/// sign or zero extend operation. This is used to determine the final width of
+/// the IV before actually widening it.
static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
const TargetTransformInfo *TTI) {
bool IsSigned = Cast->getOpcode() == Instruction::SExt;
@@ -855,24 +824,29 @@ static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
namespace {
-/// NarrowIVDefUse - Record a link in the Narrow IV def-use chain along with the
-/// WideIV that computes the same value as the Narrow IV def. This avoids
-/// caching Use* pointers.
+/// Record a link in the Narrow IV def-use chain along with the WideIV that
+/// computes the same value as the Narrow IV def. This avoids caching Use*
+/// pointers.
struct NarrowIVDefUse {
- Instruction *NarrowDef;
- Instruction *NarrowUse;
- Instruction *WideDef;
-
- NarrowIVDefUse(): NarrowDef(nullptr), NarrowUse(nullptr), WideDef(nullptr) {}
-
- NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD):
- NarrowDef(ND), NarrowUse(NU), WideDef(WD) {}
+ Instruction *NarrowDef = nullptr;
+ Instruction *NarrowUse = nullptr;
+ Instruction *WideDef = nullptr;
+
+ // True if the narrow def is never negative. Tracking this information lets
+ // us use a sign extension instead of a zero extension or vice versa, when
+ // profitable and legal.
+ bool NeverNegative = false;
+
+ NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD,
+ bool NeverNegative)
+ : NarrowDef(ND), NarrowUse(NU), WideDef(WD),
+ NeverNegative(NeverNegative) {}
};
-/// WidenIV - The goal of this transform is to remove sign and zero extends
-/// without creating any new induction variables. To do this, it creates a new
-/// phi of the wider type and redirects all users, either removing extends or
-/// inserting truncs whenever we stop propagating the type.
+/// The goal of this transform is to remove sign and zero extends without
+/// creating any new induction variables. To do this, it creates a new phi of
+/// the wider type and redirects all users, either removing extends or inserting
+/// truncs whenever we stop propagating the type.
///
class WidenIV {
// Parameters
@@ -913,32 +887,35 @@ public:
assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
}
- PHINode *CreateWideIV(SCEVExpander &Rewriter);
+ PHINode *createWideIV(SCEVExpander &Rewriter);
protected:
- Value *getExtend(Value *NarrowOper, Type *WideType, bool IsSigned,
- Instruction *Use);
+ Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned,
+ Instruction *Use);
- Instruction *CloneIVUser(NarrowIVDefUse DU);
+ Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR);
+ Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU,
+ const SCEVAddRecExpr *WideAR);
+ Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
- const SCEVAddRecExpr *GetWideRecurrence(Instruction *NarrowUse);
+ const SCEVAddRecExpr *getWideRecurrence(Instruction *NarrowUse);
- const SCEVAddRecExpr* GetExtendedOperandRecurrence(NarrowIVDefUse DU);
+ const SCEVAddRecExpr* getExtendedOperandRecurrence(NarrowIVDefUse DU);
- const SCEV *GetSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
+ const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
unsigned OpCode) const;
- Instruction *WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
+ Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
- bool WidenLoopCompare(NarrowIVDefUse DU);
+ bool widenLoopCompare(NarrowIVDefUse DU);
void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
};
} // anonymous namespace
-/// isLoopInvariant - Perform a quick domtree based check for loop invariance
-/// assuming that V is used within the loop. LoopInfo::isLoopInvariant() seems
-/// gratuitous for this purpose.
+/// Perform a quick domtree based check for loop invariance assuming that V is
+/// used within the loop. LoopInfo::isLoopInvariant() seems gratuitous for this
+/// purpose.
static bool isLoopInvariant(Value *V, const Loop *L, const DominatorTree *DT) {
Instruction *Inst = dyn_cast<Instruction>(V);
if (!Inst)
@@ -947,8 +924,8 @@ static bool isLoopInvariant(Value *V, const Loop *L, const DominatorTree *DT) {
return DT->properlyDominates(Inst->getParent(), L->getHeader());
}
-Value *WidenIV::getExtend(Value *NarrowOper, Type *WideType, bool IsSigned,
- Instruction *Use) {
+Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
+ bool IsSigned, Instruction *Use) {
// Set the debug location and conservative insertion point.
IRBuilder<> Builder(Use);
// Hoist the insertion point into loop preheaders as far as possible.
@@ -961,10 +938,11 @@ Value *WidenIV::getExtend(Value *NarrowOper, Type *WideType, bool IsSigned,
Builder.CreateZExt(NarrowOper, WideType);
}
-/// CloneIVUser - Instantiate a wide operation to replace a narrow
-/// operation. This only needs to handle operations that can evaluation to
-/// SCEVAddRec. It can safely return 0 for any operation we decide not to clone.
-Instruction *WidenIV::CloneIVUser(NarrowIVDefUse DU) {
+/// Instantiate a wide operation to replace a narrow operation. This only needs
+/// to handle operations that can evaluation to SCEVAddRec. It can safely return
+/// 0 for any operation we decide not to clone.
+Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU,
+ const SCEVAddRecExpr *WideAR) {
unsigned Opcode = DU.NarrowUse->getOpcode();
switch (Opcode) {
default:
@@ -973,40 +951,140 @@ Instruction *WidenIV::CloneIVUser(NarrowIVDefUse DU) {
case Instruction::Mul:
case Instruction::UDiv:
case Instruction::Sub:
+ return cloneArithmeticIVUser(DU, WideAR);
+
case Instruction::And:
case Instruction::Or:
case Instruction::Xor:
case Instruction::Shl:
case Instruction::LShr:
case Instruction::AShr:
- DEBUG(dbgs() << "Cloning IVUser: " << *DU.NarrowUse << "\n");
-
- // Replace NarrowDef operands with WideDef. Otherwise, we don't know
- // anything about the narrow operand yet so must insert a [sz]ext. It is
- // probably loop invariant and will be folded or hoisted. If it actually
- // comes from a widened IV, it should be removed during a future call to
- // WidenIVUse.
- Value *LHS = (DU.NarrowUse->getOperand(0) == DU.NarrowDef) ? DU.WideDef :
- getExtend(DU.NarrowUse->getOperand(0), WideType, IsSigned, DU.NarrowUse);
- Value *RHS = (DU.NarrowUse->getOperand(1) == DU.NarrowDef) ? DU.WideDef :
- getExtend(DU.NarrowUse->getOperand(1), WideType, IsSigned, DU.NarrowUse);
-
- BinaryOperator *NarrowBO = cast<BinaryOperator>(DU.NarrowUse);
- BinaryOperator *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(),
- LHS, RHS,
- NarrowBO->getName());
- IRBuilder<> Builder(DU.NarrowUse);
- Builder.Insert(WideBO);
- if (const OverflowingBinaryOperator *OBO =
- dyn_cast<OverflowingBinaryOperator>(NarrowBO)) {
- if (OBO->hasNoUnsignedWrap()) WideBO->setHasNoUnsignedWrap();
- if (OBO->hasNoSignedWrap()) WideBO->setHasNoSignedWrap();
+ return cloneBitwiseIVUser(DU);
+ }
+}
+
+Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) {
+ Instruction *NarrowUse = DU.NarrowUse;
+ Instruction *NarrowDef = DU.NarrowDef;
+ Instruction *WideDef = DU.WideDef;
+
+ DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n");
+
+ // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
+ // about the narrow operand yet so must insert a [sz]ext. It is probably loop
+ // invariant and will be folded or hoisted. If it actually comes from a
+ // widened IV, it should be removed during a future call to widenIVUse.
+ Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
+ ? WideDef
+ : createExtendInst(NarrowUse->getOperand(0), WideType,
+ IsSigned, NarrowUse);
+ Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
+ ? WideDef
+ : createExtendInst(NarrowUse->getOperand(1), WideType,
+ IsSigned, NarrowUse);
+
+ auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
+ auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
+ NarrowBO->getName());
+ IRBuilder<> Builder(NarrowUse);
+ Builder.Insert(WideBO);
+ WideBO->copyIRFlags(NarrowBO);
+ return WideBO;
+}
+
+Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU,
+ const SCEVAddRecExpr *WideAR) {
+ Instruction *NarrowUse = DU.NarrowUse;
+ Instruction *NarrowDef = DU.NarrowDef;
+ Instruction *WideDef = DU.WideDef;
+
+ DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
+
+ unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1;
+
+ // We're trying to find X such that
+ //
+ // Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
+ //
+ // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
+ // and check using SCEV if any of them are correct.
+
+ // Returns true if extending NonIVNarrowDef according to `SignExt` is a
+ // correct solution to X.
+ auto GuessNonIVOperand = [&](bool SignExt) {
+ const SCEV *WideLHS;
+ const SCEV *WideRHS;
+
+ auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
+ if (SignExt)
+ return SE->getSignExtendExpr(S, Ty);
+ return SE->getZeroExtendExpr(S, Ty);
+ };
+
+ if (IVOpIdx == 0) {
+ WideLHS = SE->getSCEV(WideDef);
+ const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
+ WideRHS = GetExtend(NarrowRHS, WideType);
+ } else {
+ const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
+ WideLHS = GetExtend(NarrowLHS, WideType);
+ WideRHS = SE->getSCEV(WideDef);
+ }
+
+ // WideUse is "WideDef `op.wide` X" as described in the comment.
+ const SCEV *WideUse = nullptr;
+
+ switch (NarrowUse->getOpcode()) {
+ default:
+ llvm_unreachable("No other possibility!");
+
+ case Instruction::Add:
+ WideUse = SE->getAddExpr(WideLHS, WideRHS);
+ break;
+
+ case Instruction::Mul:
+ WideUse = SE->getMulExpr(WideLHS, WideRHS);
+ break;
+
+ case Instruction::UDiv:
+ WideUse = SE->getUDivExpr(WideLHS, WideRHS);
+ break;
+
+ case Instruction::Sub:
+ WideUse = SE->getMinusSCEV(WideLHS, WideRHS);
+ break;
}
- return WideBO;
+
+ return WideUse == WideAR;
+ };
+
+ bool SignExtend = IsSigned;
+ if (!GuessNonIVOperand(SignExtend)) {
+ SignExtend = !SignExtend;
+ if (!GuessNonIVOperand(SignExtend))
+ return nullptr;
}
+
+ Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
+ ? WideDef
+ : createExtendInst(NarrowUse->getOperand(0), WideType,
+ SignExtend, NarrowUse);
+ Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
+ ? WideDef
+ : createExtendInst(NarrowUse->getOperand(1), WideType,
+ SignExtend, NarrowUse);
+
+ auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
+ auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
+ NarrowBO->getName());
+
+ IRBuilder<> Builder(NarrowUse);
+ Builder.Insert(WideBO);
+ WideBO->copyIRFlags(NarrowBO);
+ return WideBO;
}
-const SCEV *WidenIV::GetSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
+const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
unsigned OpCode) const {
if (OpCode == Instruction::Add)
return SE->getAddExpr(LHS, RHS);
@@ -1022,7 +1100,7 @@ const SCEV *WidenIV::GetSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
/// operands. Generate the SCEV value for the widened operation without
/// actually modifying the IR yet. If the expression after extending the
/// operands is an AddRec for this loop, return it.
-const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) {
+const SCEVAddRecExpr* WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) {
// Handle the common case of add<nsw/nuw>
const unsigned OpCode = DU.NarrowUse->getOpcode();
@@ -1062,19 +1140,18 @@ const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) {
if (ExtendOperIdx == 0)
std::swap(lhs, rhs);
const SCEVAddRecExpr *AddRec =
- dyn_cast<SCEVAddRecExpr>(GetSCEVByOpCode(lhs, rhs, OpCode));
+ dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode));
if (!AddRec || AddRec->getLoop() != L)
return nullptr;
return AddRec;
}
-/// GetWideRecurrence - Is this instruction potentially interesting for further
-/// simplification after widening it's type? In other words, can the
-/// extend be safely hoisted out of the loop with SCEV reducing the value to a
-/// recurrence on the same loop. If so, return the sign or zero extended
-/// recurrence. Otherwise return NULL.
-const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) {
+/// Is this instruction potentially interesting for further simplification after
+/// widening it's type? In other words, can the extend be safely hoisted out of
+/// the loop with SCEV reducing the value to a recurrence on the same loop. If
+/// so, return the sign or zero extended recurrence. Otherwise return NULL.
+const SCEVAddRecExpr *WidenIV::getWideRecurrence(Instruction *NarrowUse) {
if (!SE->isSCEVable(NarrowUse->getType()))
return nullptr;
@@ -1097,10 +1174,11 @@ const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) {
/// This IV user cannot be widen. Replace this use of the original narrow IV
/// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
-static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT) {
+static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) {
DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef
<< " for user " << *DU.NarrowUse << "\n");
- IRBuilder<> Builder(getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT));
+ IRBuilder<> Builder(
+ getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI));
Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
}
@@ -1108,13 +1186,27 @@ static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT) {
/// If the narrow use is a compare instruction, then widen the compare
// (and possibly the other operand). The extend operation is hoisted into the
// loop preheader as far as possible.
-bool WidenIV::WidenLoopCompare(NarrowIVDefUse DU) {
+bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
if (!Cmp)
return false;
- // Sign of IV user and compare must match.
- if (IsSigned != CmpInst::isSigned(Cmp->getPredicate()))
+ // We can legally widen the comparison in the following two cases:
+ //
+ // - The signedness of the IV extension and comparison match
+ //
+ // - The narrow IV is always positive (and thus its sign extension is equal
+ // to its zero extension). For instance, let's say we're zero extending
+ // %narrow for the following use
+ //
+ // icmp slt i32 %narrow, %val ... (A)
+ //
+ // and %narrow is always positive. Then
+ //
+ // (A) == icmp slt i32 sext(%narrow), sext(%val)
+ // == icmp slt i32 zext(%narrow), sext(%val)
+
+ if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
return false;
Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
@@ -1123,20 +1215,21 @@ bool WidenIV::WidenLoopCompare(NarrowIVDefUse DU) {
assert (CastWidth <= IVWidth && "Unexpected width while widening compare.");
// Widen the compare instruction.
- IRBuilder<> Builder(getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT));
+ IRBuilder<> Builder(
+ getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI));
DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
// Widen the other operand of the compare, if necessary.
if (CastWidth < IVWidth) {
- Value *ExtOp = getExtend(Op, WideType, IsSigned, Cmp);
+ Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp);
DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
}
return true;
}
-/// WidenIVUse - Determine whether an individual user of the narrow IV can be
-/// widened. If so, return the wide clone of the user.
-Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
+/// Determine whether an individual user of the narrow IV can be widened. If so,
+/// return the wide clone of the user.
+Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
// Stop traversing the def-use chain at inner-loop phis or post-loop phis.
if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
@@ -1145,13 +1238,13 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
// After SimplifyCFG most loop exit targets have a single predecessor.
// Otherwise fall back to a truncate within the loop.
if (UsePhi->getNumOperands() != 1)
- truncateIVUse(DU, DT);
+ truncateIVUse(DU, DT, LI);
else {
PHINode *WidePhi =
PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
UsePhi);
WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
- IRBuilder<> Builder(WidePhi->getParent()->getFirstInsertionPt());
+ IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt());
Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
UsePhi->replaceAllUsesWith(Trunc);
DeadInsts.emplace_back(UsePhi);
@@ -1200,20 +1293,20 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
}
// Does this user itself evaluate to a recurrence after widening?
- const SCEVAddRecExpr *WideAddRec = GetWideRecurrence(DU.NarrowUse);
+ const SCEVAddRecExpr *WideAddRec = getWideRecurrence(DU.NarrowUse);
if (!WideAddRec)
- WideAddRec = GetExtendedOperandRecurrence(DU);
+ WideAddRec = getExtendedOperandRecurrence(DU);
if (!WideAddRec) {
// If use is a loop condition, try to promote the condition instead of
// truncating the IV first.
- if (WidenLoopCompare(DU))
+ if (widenLoopCompare(DU))
return nullptr;
// This user does not evaluate to a recurence after widening, so don't
// follow it. Instead insert a Trunc to kill off the original use,
// eventually isolating the original narrow IV so it can be removed.
- truncateIVUse(DU, DT);
+ truncateIVUse(DU, DT, LI);
return nullptr;
}
// Assume block terminators cannot evaluate to a recurrence. We can't to
@@ -1228,7 +1321,7 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
&& Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
WideUse = WideInc;
else {
- WideUse = CloneIVUser(DU);
+ WideUse = cloneIVUser(DU, WideAddRec);
if (!WideUse)
return nullptr;
}
@@ -1248,9 +1341,13 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
return WideUse;
}
-/// pushNarrowIVUsers - Add eligible users of NarrowDef to NarrowIVUsers.
+/// Add eligible users of NarrowDef to NarrowIVUsers.
///
void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
+ const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
+ bool NeverNegative =
+ SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
+ SE->getConstant(NarrowSCEV->getType(), 0));
for (User *U : NarrowDef->users()) {
Instruction *NarrowUser = cast<Instruction>(U);
@@ -1258,21 +1355,21 @@ void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
if (!Widened.insert(NarrowUser).second)
continue;
- NarrowIVUsers.push_back(NarrowIVDefUse(NarrowDef, NarrowUser, WideDef));
+ NarrowIVUsers.push_back(
+ NarrowIVDefUse(NarrowDef, NarrowUser, WideDef, NeverNegative));
}
}
-/// CreateWideIV - Process a single induction variable. First use the
-/// SCEVExpander to create a wide induction variable that evaluates to the same
-/// recurrence as the original narrow IV. Then use a worklist to forward
-/// traverse the narrow IV's def-use chain. After WidenIVUse has processed all
-/// interesting IV users, the narrow IV will be isolated for removal by
-/// DeleteDeadPHIs.
+/// Process a single induction variable. First use the SCEVExpander to create a
+/// wide induction variable that evaluates to the same recurrence as the
+/// original narrow IV. Then use a worklist to forward traverse the narrow IV's
+/// def-use chain. After widenIVUse has processed all interesting IV users, the
+/// narrow IV will be isolated for removal by DeleteDeadPHIs.
///
/// It would be simpler to delete uses as they are processed, but we must avoid
/// invalidating SCEV expressions.
///
-PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
+PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
// Is this phi an induction variable?
const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
if (!AddRec)
@@ -1302,11 +1399,11 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
// either find an existing phi or materialize a new one. Either way, we
// expect a well-formed cyclic phi-with-increments. i.e. any operand not part
// of the phi-SCC dominates the loop entry.
- Instruction *InsertPt = L->getHeader()->begin();
+ Instruction *InsertPt = &L->getHeader()->front();
WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
// Remembering the WideIV increment generated by SCEVExpander allows
- // WidenIVUse to reuse it when widening the narrow IV's increment. We don't
+ // widenIVUse to reuse it when widening the narrow IV's increment. We don't
// employ a general reuse mechanism because the call above is the only call to
// SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
if (BasicBlock *LatchBlock = L->getLoopLatch()) {
@@ -1329,13 +1426,13 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
// Process a def-use edge. This may replace the use, so don't hold a
// use_iterator across it.
- Instruction *WideUse = WidenIVUse(DU, Rewriter);
+ Instruction *WideUse = widenIVUse(DU, Rewriter);
// Follow all def-use edges from the previous narrow use.
if (WideUse)
pushNarrowIVUsers(DU.NarrowUse, WideUse);
- // WidenIVUse may have removed the def-use edge.
+ // widenIVUse may have removed the def-use edge.
if (DU.NarrowDef->use_empty())
DeadInsts.emplace_back(DU.NarrowDef);
}
@@ -1352,38 +1449,38 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
//===----------------------------------------------------------------------===//
namespace {
- class IndVarSimplifyVisitor : public IVVisitor {
- ScalarEvolution *SE;
- const TargetTransformInfo *TTI;
- PHINode *IVPhi;
-
- public:
- WideIVInfo WI;
-
- IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
- const TargetTransformInfo *TTI,
- const DominatorTree *DTree)
- : SE(SCEV), TTI(TTI), IVPhi(IV) {
- DT = DTree;
- WI.NarrowIV = IVPhi;
- if (ReduceLiveIVs)
- setSplitOverflowIntrinsics();
- }
+class IndVarSimplifyVisitor : public IVVisitor {
+ ScalarEvolution *SE;
+ const TargetTransformInfo *TTI;
+ PHINode *IVPhi;
- // Implement the interface used by simplifyUsersOfIV.
- void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
- };
+public:
+ WideIVInfo WI;
+
+ IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
+ const TargetTransformInfo *TTI,
+ const DominatorTree *DTree)
+ : SE(SCEV), TTI(TTI), IVPhi(IV) {
+ DT = DTree;
+ WI.NarrowIV = IVPhi;
+ if (ReduceLiveIVs)
+ setSplitOverflowIntrinsics();
+ }
+
+ // Implement the interface used by simplifyUsersOfIV.
+ void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
+};
}
-/// SimplifyAndExtend - Iteratively perform simplification on a worklist of IV
-/// users. Each successive simplification may push more users which may
-/// themselves be candidates for simplification.
+/// Iteratively perform simplification on a worklist of IV users. Each
+/// successive simplification may push more users which may themselves be
+/// candidates for simplification.
///
/// Sign/Zero extend elimination is interleaved with IV simplification.
///
-void IndVarSimplify::SimplifyAndExtend(Loop *L,
+void IndVarSimplify::simplifyAndExtend(Loop *L,
SCEVExpander &Rewriter,
- LPPassManager &LPM) {
+ LoopInfo *LI) {
SmallVector<WideIVInfo, 8> WideIVs;
SmallVector<PHINode*, 8> LoopPhis;
@@ -1400,14 +1497,14 @@ void IndVarSimplify::SimplifyAndExtend(Loop *L,
// extension. The first time SCEV attempts to normalize sign/zero extension,
// the result becomes final. So for the most predictable results, we delay
// evaluation of sign/zero extend evaluation until needed, and avoid running
- // other SCEV based analysis prior to SimplifyAndExtend.
+ // other SCEV based analysis prior to simplifyAndExtend.
do {
PHINode *CurrIV = LoopPhis.pop_back_val();
// Information about sign/zero extensions of CurrIV.
IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
- Changed |= simplifyUsersOfIV(CurrIV, SE, &LPM, DeadInsts, &Visitor);
+ Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, &Visitor);
if (Visitor.WI.WidestNativeType) {
WideIVs.push_back(Visitor.WI);
@@ -1416,7 +1513,7 @@ void IndVarSimplify::SimplifyAndExtend(Loop *L,
for (; !WideIVs.empty(); WideIVs.pop_back()) {
WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts);
- if (PHINode *WidePhi = Widener.CreateWideIV(Rewriter)) {
+ if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) {
Changed = true;
LoopPhis.push_back(WidePhi);
}
@@ -1425,12 +1522,12 @@ void IndVarSimplify::SimplifyAndExtend(Loop *L,
}
//===----------------------------------------------------------------------===//
-// LinearFunctionTestReplace and its kin. Rewrite the loop exit condition.
+// linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
//===----------------------------------------------------------------------===//
-/// canExpandBackedgeTakenCount - Return true if this loop's backedge taken
-/// count expression can be safely and cheaply expanded into an instruction
-/// sequence that can be used by LinearFunctionTestReplace.
+/// Return true if this loop's backedge taken count expression can be safely and
+/// cheaply expanded into an instruction sequence that can be used by
+/// linearFunctionTestReplace.
///
/// TODO: This fails for pointer-type loop counters with greater than one byte
/// strides, consequently preventing LFTR from running. For the purpose of LFTR
@@ -1461,8 +1558,7 @@ static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE,
return true;
}
-/// getLoopPhiForCounter - Return the loop header phi IFF IncV adds a loop
-/// invariant value to the phi.
+/// Return the loop header phi IFF IncV adds a loop invariant value to the phi.
static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) {
Instruction *IncI = dyn_cast<Instruction>(IncV);
if (!IncI)
@@ -1513,8 +1609,8 @@ static ICmpInst *getLoopTest(Loop *L) {
return dyn_cast<ICmpInst>(BI->getCondition());
}
-/// needsLFTR - LinearFunctionTestReplace policy. Return true unless we can show
-/// that the current exit test is already sufficiently canonical.
+/// linearFunctionTestReplace policy. Return true unless we can show that the
+/// current exit test is already sufficiently canonical.
static bool needsLFTR(Loop *L, DominatorTree *DT) {
// Do LFTR to simplify the exit condition to an ICMP.
ICmpInst *Cond = getLoopTest(L);
@@ -1574,10 +1670,10 @@ static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
return false;
// Optimistically handle other instructions.
- for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) {
- if (!Visited.insert(*OI).second)
+ for (Value *Op : I->operands()) {
+ if (!Visited.insert(Op).second)
continue;
- if (!hasConcreteDefImpl(*OI, Visited, Depth+1))
+ if (!hasConcreteDefImpl(Op, Visited, Depth+1))
return false;
}
return true;
@@ -1594,8 +1690,8 @@ static bool hasConcreteDef(Value *V) {
return hasConcreteDefImpl(V, Visited, 0);
}
-/// AlmostDeadIV - Return true if this IV has any uses other than the (soon to
-/// be rewritten) loop exit test.
+/// Return true if this IV has any uses other than the (soon to be rewritten)
+/// loop exit test.
static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
Value *IncV = Phi->getIncomingValue(LatchIdx);
@@ -1608,7 +1704,7 @@ static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
return true;
}
-/// FindLoopCounter - Find an affine IV in canonical form.
+/// Find an affine IV in canonical form.
///
/// BECount may be an i8* pointer type. The pointer difference is already
/// valid count without scaling the address stride, so it remains a pointer
@@ -1702,8 +1798,8 @@ static PHINode *FindLoopCounter(Loop *L, const SCEV *BECount,
return BestPhi;
}
-/// genLoopLimit - Help LinearFunctionTestReplace by generating a value that
-/// holds the RHS of the new loop test.
+/// Help linearFunctionTestReplace by generating a value that holds the RHS of
+/// the new loop test.
static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
SCEVExpander &Rewriter, ScalarEvolution *SE) {
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
@@ -1785,13 +1881,13 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
}
}
-/// LinearFunctionTestReplace - This method rewrites the exit condition of the
-/// loop to be a canonical != comparison against the incremented loop induction
-/// variable. This pass is able to rewrite the exit tests of any loop where the
-/// SCEV analysis can determine a loop-invariant trip count of the loop, which
-/// is actually a much broader range than just linear tests.
+/// This method rewrites the exit condition of the loop to be a canonical !=
+/// comparison against the incremented loop induction variable. This pass is
+/// able to rewrite the exit tests of any loop where the SCEV analysis can
+/// determine a loop-invariant trip count of the loop, which is actually a much
+/// broader range than just linear tests.
Value *IndVarSimplify::
-LinearFunctionTestReplace(Loop *L,
+linearFunctionTestReplace(Loop *L,
const SCEV *BackedgeTakenCount,
PHINode *IndVar,
SCEVExpander &Rewriter) {
@@ -1809,7 +1905,7 @@ LinearFunctionTestReplace(Loop *L,
// This addition may overflow, which is valid as long as the comparison is
// truncated to BackedgeTakenCount->getType().
IVCount = SE->getAddExpr(BackedgeTakenCount,
- SE->getConstant(BackedgeTakenCount->getType(), 1));
+ SE->getOne(BackedgeTakenCount->getType()));
// The BackedgeTaken expression contains the number of times that the
// backedge branches to the loop header. This is one less than the
// number of times the loop executes, so use the incremented indvar.
@@ -1847,8 +1943,8 @@ LinearFunctionTestReplace(Loop *L,
const SCEV *ARStep = AR->getStepRecurrence(*SE);
// For constant IVCount, avoid truncation.
if (isa<SCEVConstant>(ARStart) && isa<SCEVConstant>(IVCount)) {
- const APInt &Start = cast<SCEVConstant>(ARStart)->getValue()->getValue();
- APInt Count = cast<SCEVConstant>(IVCount)->getValue()->getValue();
+ const APInt &Start = cast<SCEVConstant>(ARStart)->getAPInt();
+ APInt Count = cast<SCEVConstant>(IVCount)->getAPInt();
// Note that the post-inc value of BackedgeTakenCount may have overflowed
// above such that IVCount is now zero.
if (IVCount != BackedgeTakenCount && Count == 0) {
@@ -1886,21 +1982,21 @@ LinearFunctionTestReplace(Loop *L,
}
//===----------------------------------------------------------------------===//
-// SinkUnusedInvariants. A late subpass to cleanup loop preheaders.
+// sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
//===----------------------------------------------------------------------===//
/// If there's a single exit block, sink any loop-invariant values that
/// were defined in the preheader but not used inside the loop into the
/// exit block to reduce register pressure in the loop.
-void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
+void IndVarSimplify::sinkUnusedInvariants(Loop *L) {
BasicBlock *ExitBlock = L->getExitBlock();
if (!ExitBlock) return;
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader) return;
- Instruction *InsertPt = ExitBlock->getFirstInsertionPt();
- BasicBlock::iterator I = Preheader->getTerminator();
+ Instruction *InsertPt = &*ExitBlock->getFirstInsertionPt();
+ BasicBlock::iterator I(Preheader->getTerminator());
while (I != Preheader->begin()) {
--I;
// New instructions were inserted at the end of the preheader.
@@ -1920,8 +2016,8 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
if (isa<DbgInfoIntrinsic>(I))
continue;
- // Skip landingpad instructions.
- if (isa<LandingPadInst>(I))
+ // Skip eh pad instructions.
+ if (I->isEHPad())
continue;
// Don't sink alloca: we never want to sink static alloca's out of the
@@ -1953,7 +2049,7 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
continue;
// Otherwise, sink it to the exit block.
- Instruction *ToMove = I;
+ Instruction *ToMove = &*I;
bool Done = false;
if (I != Preheader->begin()) {
@@ -1994,7 +2090,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
return false;
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- SE = &getAnalysis<ScalarEvolution>();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
TLI = TLIP ? &TLIP->getTLI() : nullptr;
@@ -2007,7 +2103,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// If there are any floating-point recurrences, attempt to
// transform them to use integer recurrences.
- RewriteNonIntegerIVs(L);
+ rewriteNonIntegerIVs(L);
const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
@@ -2024,7 +2120,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// other expressions involving loop IVs have been evaluated. This helps SCEV
// set no-wrap flags before normalizing sign/zero extension.
Rewriter.disableCanonicalMode();
- SimplifyAndExtend(L, Rewriter, LPM);
+ simplifyAndExtend(L, Rewriter, LI);
// Check to see if this loop has a computable loop-invariant execution count.
// If so, this means that we can compute the final value of any expressions
@@ -2034,7 +2130,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
//
if (ReplaceExitValue != NeverRepl &&
!isa<SCEVCouldNotCompute>(BackedgeTakenCount))
- RewriteLoopExitValues(L, Rewriter);
+ rewriteLoopExitValues(L, Rewriter);
// Eliminate redundant IV cycles.
NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
@@ -2054,7 +2150,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// explicitly check any assumptions made by SCEV. Brittle.
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BackedgeTakenCount);
if (!AR || AR->getLoop()->getLoopPreheader())
- (void)LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar,
+ (void)linearFunctionTestReplace(L, BackedgeTakenCount, IndVar,
Rewriter);
}
}
@@ -2074,13 +2170,13 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// Loop-invariant instructions in the preheader that aren't used in the
// loop may be sunk below the loop to reduce register pressure.
- SinkUnusedInvariants(L);
+ sinkUnusedInvariants(L);
// Clean up dead instructions.
Changed |= DeleteDeadPHIs(L->getHeader(), TLI);
+
// Check a post-condition.
- assert(L->isLCSSAForm(*DT) &&
- "Indvars did not leave the loop in lcssa form!");
+ assert(L->isRecursivelyLCSSAForm(*DT) && "Indvars did not preserve LCSSA!");
// Verify that LFTR, and any other change have not interfered with SCEV's
// ability to compute trip count.
diff --git a/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
index cbdacad..dea61f6 100644
--- a/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
@@ -214,8 +214,8 @@ public:
AU.addRequired<LoopInfoWrapperPass>();
AU.addRequiredID(LoopSimplifyID);
AU.addRequiredID(LCSSAID);
- AU.addRequired<ScalarEvolution>();
- AU.addRequired<BranchProbabilityInfo>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addRequired<BranchProbabilityInfoWrapperPass>();
}
bool runOnLoop(Loop *L, LPPassManager &LPM) override;
@@ -224,8 +224,15 @@ public:
char InductiveRangeCheckElimination::ID = 0;
}
-INITIALIZE_PASS(InductiveRangeCheckElimination, "irce",
- "Inductive range check elimination", false, false)
+INITIALIZE_PASS_BEGIN(InductiveRangeCheckElimination, "irce",
+ "Inductive range check elimination", false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
+INITIALIZE_PASS_END(InductiveRangeCheckElimination, "irce",
+ "Inductive range check elimination", false, false)
const char *InductiveRangeCheck::rangeCheckKindToStr(
InductiveRangeCheck::RangeCheckKind RCK) {
@@ -1044,9 +1051,9 @@ LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd(
auto BBInsertLocation = std::next(Function::iterator(LS.Latch));
RRI.ExitSelector = BasicBlock::Create(Ctx, Twine(LS.Tag) + ".exit.selector",
- &F, BBInsertLocation);
+ &F, &*BBInsertLocation);
RRI.PseudoExit = BasicBlock::Create(Ctx, Twine(LS.Tag) + ".pseudo.exit", &F,
- BBInsertLocation);
+ &*BBInsertLocation);
BranchInst *PreheaderJump = cast<BranchInst>(&*Preheader->rbegin());
bool Increasing = LS.IndVarIncreasing;
@@ -1399,8 +1406,9 @@ bool InductiveRangeCheckElimination::runOnLoop(Loop *L, LPPassManager &LPM) {
LLVMContext &Context = Preheader->getContext();
InductiveRangeCheck::AllocatorTy IRCAlloc;
SmallVector<InductiveRangeCheck *, 16> RangeChecks;
- ScalarEvolution &SE = getAnalysis<ScalarEvolution>();
- BranchProbabilityInfo &BPI = getAnalysis<BranchProbabilityInfo>();
+ ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ BranchProbabilityInfo &BPI =
+ getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
for (auto BBI : L->getBlocks())
if (BranchInst *TBI = dyn_cast<BranchInst>(BBI->getTerminator()))
diff --git a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index 1130d22..087ce8a 100644
--- a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -18,15 +18,22 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LazyValueInfo.h"
#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Pass.h"
@@ -36,6 +43,8 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include <algorithm>
+#include <memory>
using namespace llvm;
#define DEBUG_TYPE "jump-threading"
@@ -49,6 +58,13 @@ BBDuplicateThreshold("jump-threading-threshold",
cl::desc("Max block size to duplicate for jump threading"),
cl::init(6), cl::Hidden);
+static cl::opt<unsigned>
+ImplicationSearchThreshold(
+ "jump-threading-implication-search-threshold",
+ cl::desc("The number of predecessors to search for a stronger "
+ "condition to use to thread over a weaker condition"),
+ cl::init(3), cl::Hidden);
+
namespace {
// These are at global scope so static functions can use them too.
typedef SmallVectorImpl<std::pair<Constant*, BasicBlock*> > PredValueInfo;
@@ -80,6 +96,9 @@ namespace {
class JumpThreading : public FunctionPass {
TargetLibraryInfo *TLI;
LazyValueInfo *LVI;
+ std::unique_ptr<BlockFrequencyInfo> BFI;
+ std::unique_ptr<BranchProbabilityInfo> BPI;
+ bool HasProfileData;
#ifdef NDEBUG
SmallPtrSet<BasicBlock*, 16> LoopHeaders;
#else
@@ -114,9 +133,15 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<LazyValueInfo>();
AU.addPreserved<LazyValueInfo>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
}
+ void releaseMemory() override {
+ BFI.reset();
+ BPI.reset();
+ }
+
void FindLoopHeaders(Function &F);
bool ProcessBlock(BasicBlock *BB);
bool ThreadEdge(BasicBlock *BB, const SmallVectorImpl<BasicBlock*> &PredBBs,
@@ -134,9 +159,16 @@ namespace {
bool ProcessBranchOnPHI(PHINode *PN);
bool ProcessBranchOnXOR(BinaryOperator *BO);
+ bool ProcessImpliedCondition(BasicBlock *BB);
bool SimplifyPartiallyRedundantLoad(LoadInst *LI);
bool TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB);
+
+ private:
+ BasicBlock *SplitBlockPreds(BasicBlock *BB, ArrayRef<BasicBlock *> Preds,
+ const char *Suffix);
+ void UpdateBlockFreqAndEdgeWeight(BasicBlock *PredBB, BasicBlock *BB,
+ BasicBlock *NewBB, BasicBlock *SuccBB);
};
}
@@ -160,11 +192,21 @@ bool JumpThreading::runOnFunction(Function &F) {
DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
LVI = &getAnalysis<LazyValueInfo>();
+ BFI.reset();
+ BPI.reset();
+ // When profile data is available, we need to update edge weights after
+ // successful jump threading, which requires both BPI and BFI being available.
+ HasProfileData = F.getEntryCount().hasValue();
+ if (HasProfileData) {
+ LoopInfo LI{DominatorTree(F)};
+ BPI.reset(new BranchProbabilityInfo(F, LI));
+ BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
+ }
// Remove unreachable blocks from function as they may result in infinite
// loop. We do threading if we found something profitable. Jump threading a
// branch can create other opportunities. If these opportunities form a cycle
- // i.e. if any jump treading is undoing previous threading in the path, then
+ // i.e. if any jump threading is undoing previous threading in the path, then
// we will loop forever. We take care of this issue by not jump threading for
// back edges. This works for normal cases but not for unreachable blocks as
// they may have cycle with no back edge.
@@ -176,7 +218,7 @@ bool JumpThreading::runOnFunction(Function &F) {
do {
Changed = false;
for (Function::iterator I = F.begin(), E = F.end(); I != E;) {
- BasicBlock *BB = I;
+ BasicBlock *BB = &*I;
// Thread all of the branches we can over this block.
while (ProcessBlock(BB))
Changed = true;
@@ -239,11 +281,26 @@ bool JumpThreading::runOnFunction(Function &F) {
static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB,
unsigned Threshold) {
/// Ignore PHI nodes, these will be flattened when duplication happens.
- BasicBlock::const_iterator I = BB->getFirstNonPHI();
+ BasicBlock::const_iterator I(BB->getFirstNonPHI());
// FIXME: THREADING will delete values that are just used to compute the
// branch, so they shouldn't count against the duplication cost.
+ unsigned Bonus = 0;
+ const TerminatorInst *BBTerm = BB->getTerminator();
+ // Threading through a switch statement is particularly profitable. If this
+ // block ends in a switch, decrease its cost to make it more likely to happen.
+ if (isa<SwitchInst>(BBTerm))
+ Bonus = 6;
+
+ // The same holds for indirect branches, but slightly more so.
+ if (isa<IndirectBrInst>(BBTerm))
+ Bonus = 8;
+
+ // Bump the threshold up so the early exit from the loop doesn't skip the
+ // terminator-based Size adjustment at the end.
+ Threshold += Bonus;
+
// Sum up the cost of each instruction until we get to the terminator. Don't
// include the terminator because the copy won't include it.
unsigned Size = 0;
@@ -260,6 +317,11 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB,
if (isa<BitCastInst>(I) && I->getType()->isPointerTy())
continue;
+ // Bail out if this instruction gives back a token type, it is not possible
+ // to duplicate it if it is used outside this BB.
+ if (I->getType()->isTokenTy() && I->isUsedOutsideOfBlock(BB))
+ return ~0U;
+
// All other instructions count for at least one unit.
++Size;
@@ -268,7 +330,7 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB,
// as having cost of 2 total, and if they are a vector intrinsic, we model
// them as having cost 1.
if (const CallInst *CI = dyn_cast<CallInst>(I)) {
- if (CI->cannotDuplicate())
+ if (CI->cannotDuplicate() || CI->isConvergent())
// Blocks with NoDuplicate are modelled as having infinite cost, so they
// are never duplicated.
return ~0U;
@@ -279,16 +341,7 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB,
}
}
- // Threading through a switch statement is particularly profitable. If this
- // block ends in a switch, decrease its cost to make it more likely to happen.
- if (isa<SwitchInst>(I))
- Size = Size > 6 ? Size-6 : 0;
-
- // The same holds for indirect branches, but slightly more so.
- if (isa<IndirectBrInst>(I))
- Size = Size > 8 ? Size-8 : 0;
-
- return Size;
+ return Size > Bonus ? Size - Bonus : 0;
}
/// FindLoopHeaders - We do not want jump threading to turn proper loop
@@ -669,7 +722,8 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
// because now the condition in this block can be threaded through
// predecessors of our predecessor block.
if (BasicBlock *SinglePred = BB->getSinglePredecessor()) {
- if (SinglePred->getTerminator()->getNumSuccessors() == 1 &&
+ const TerminatorInst *TI = SinglePred->getTerminator();
+ if (!TI->isExceptional() && TI->getNumSuccessors() == 1 &&
SinglePred != BB && !hasAddressTakenAndUsed(BB)) {
// If SinglePred was a loop header, BB becomes one.
if (LoopHeaders.erase(SinglePred))
@@ -761,7 +815,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
// If we're branching on a conditional, LVI might be able to determine
// it's value at the branch instruction. We only handle comparisons
// against a constant at this time.
- // TODO: This should be extended to handle switches as well.
+ // TODO: This should be extended to handle switches as well.
BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
Constant *CondConst = dyn_cast<Constant>(CondCmp->getOperand(1));
if (CondBr && CondConst && CondBr->isConditional()) {
@@ -829,9 +883,40 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
CondInst->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
return ProcessBranchOnXOR(cast<BinaryOperator>(CondInst));
+ // Search for a stronger dominating condition that can be used to simplify a
+ // conditional branch leaving BB.
+ if (ProcessImpliedCondition(BB))
+ return true;
+
+ return false;
+}
+
+bool JumpThreading::ProcessImpliedCondition(BasicBlock *BB) {
+ auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
+ if (!BI || !BI->isConditional())
+ return false;
+
+ Value *Cond = BI->getCondition();
+ BasicBlock *CurrentBB = BB;
+ BasicBlock *CurrentPred = BB->getSinglePredecessor();
+ unsigned Iter = 0;
+
+ auto &DL = BB->getModule()->getDataLayout();
+
+ while (CurrentPred && Iter++ < ImplicationSearchThreshold) {
+ auto *PBI = dyn_cast<BranchInst>(CurrentPred->getTerminator());
+ if (!PBI || !PBI->isConditional() || PBI->getSuccessor(0) != CurrentBB)
+ return false;
- // TODO: If we have: "br (X > 0)" and we have a predecessor where we know
- // "(X == 4)", thread through this block.
+ if (isImpliedCondition(PBI->getCondition(), Cond, DL)) {
+ BI->getSuccessor(1)->removePredecessor(BB);
+ BranchInst::Create(BI->getSuccessor(0), BI);
+ BI->eraseFromParent();
+ return true;
+ }
+ CurrentBB = CurrentPred;
+ CurrentPred = CurrentBB->getSinglePredecessor();
+ }
return false;
}
@@ -850,10 +935,10 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
if (LoadBB->getSinglePredecessor())
return false;
- // If the load is defined in a landing pad, it can't be partially redundant,
- // because the edges between the invoke and the landing pad cannot have other
+ // If the load is defined in an EH pad, it can't be partially redundant,
+ // because the edges between the invoke and the EH pad cannot have other
// instructions between them.
- if (LoadBB->isLandingPad())
+ if (LoadBB->isEHPad())
return false;
Value *LoadedPtr = LI->getOperand(0);
@@ -866,11 +951,11 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
// Scan a few instructions up from the load, to see if it is obviously live at
// the entry to its block.
- BasicBlock::iterator BBIt = LI;
+ BasicBlock::iterator BBIt(LI);
if (Value *AvailableVal =
- FindAvailableLoadedValue(LoadedPtr, LoadBB, BBIt, 6)) {
- // If the value if the load is locally available within the block, just use
+ FindAvailableLoadedValue(LoadedPtr, LoadBB, BBIt, DefMaxInstsToScan)) {
+ // If the value of the load is locally available within the block, just use
// it. This frequently occurs for reg2mem'd allocas.
//cerr << "LOAD ELIMINATED:\n" << *BBIt << *LI << "\n";
@@ -914,7 +999,8 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
// Scan the predecessor to see if the value is available in the pred.
BBIt = PredBB->end();
AAMDNodes ThisAATags;
- Value *PredAvailable = FindAvailableLoadedValue(LoadedPtr, PredBB, BBIt, 6,
+ Value *PredAvailable = FindAvailableLoadedValue(LoadedPtr, PredBB, BBIt,
+ DefMaxInstsToScan,
nullptr, &ThisAATags);
if (!PredAvailable) {
OneUnavailablePred = PredBB;
@@ -968,8 +1054,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
}
// Split them out to their own block.
- UnavailablePred =
- SplitBlockPredecessors(LoadBB, PredsToSplit, "thread-pre-split");
+ UnavailablePred = SplitBlockPreds(LoadBB, PredsToSplit, "thread-pre-split");
}
// If the value isn't available in all predecessors, then there will be
@@ -995,7 +1080,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
// Create a PHI node at the start of the block for the PRE'd load value.
pred_iterator PB = pred_begin(LoadBB), PE = pred_end(LoadBB);
PHINode *PN = PHINode::Create(LI->getType(), std::distance(PB, PE), "",
- LoadBB->begin());
+ &LoadBB->front());
PN->takeName(LI);
PN->setDebugLoc(LI->getDebugLoc());
@@ -1262,7 +1347,7 @@ bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) {
// Into:
// BB':
// %Y = icmp ne i32 %A, %B
- // br i1 %Z, ...
+ // br i1 %Y, ...
PredValueInfoTy XorOpValues;
bool isLHS = true;
@@ -1387,14 +1472,14 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
return false;
}
- // And finally, do it! Start by factoring the predecessors is needed.
+ // And finally, do it! Start by factoring the predecessors if needed.
BasicBlock *PredBB;
if (PredBBs.size() == 1)
PredBB = PredBBs[0];
else {
DEBUG(dbgs() << " Factoring out " << PredBBs.size()
<< " common predecessors.\n");
- PredBB = SplitBlockPredecessors(BB, PredBBs, ".thr_comm");
+ PredBB = SplitBlockPreds(BB, PredBBs, ".thr_comm");
}
// And finally, do it!
@@ -1415,6 +1500,13 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
BB->getParent(), BB);
NewBB->moveAfter(PredBB);
+ // Set the block frequency of NewBB.
+ if (HasProfileData) {
+ auto NewBBFreq =
+ BFI->getBlockFreq(PredBB) * BPI->getEdgeProbability(PredBB, BB);
+ BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());
+ }
+
BasicBlock::iterator BI = BB->begin();
for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
@@ -1425,7 +1517,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
Instruction *New = BI->clone();
New->setName(BI->getName());
NewBB->getInstList().push_back(New);
- ValueMapping[BI] = New;
+ ValueMapping[&*BI] = New;
// Remap operands to patch up intra-block references.
for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
@@ -1438,7 +1530,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
// We didn't copy the terminator from BB over to NewBB, because there is now
// an unconditional jump to SuccBB. Insert the unconditional jump.
- BranchInst *NewBI =BranchInst::Create(SuccBB, NewBB);
+ BranchInst *NewBI = BranchInst::Create(SuccBB, NewBB);
NewBI->setDebugLoc(BB->getTerminator()->getDebugLoc());
// Check to see if SuccBB has PHI nodes. If so, we need to add entries to the
@@ -1475,8 +1567,8 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
// its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
// with the two values we know.
SSAUpdate.Initialize(I->getType(), I->getName());
- SSAUpdate.AddAvailableValue(BB, I);
- SSAUpdate.AddAvailableValue(NewBB, ValueMapping[I]);
+ SSAUpdate.AddAvailableValue(BB, &*I);
+ SSAUpdate.AddAvailableValue(NewBB, ValueMapping[&*I]);
while (!UsesToRename.empty())
SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
@@ -1499,11 +1591,98 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
// frequently happens because of phi translation.
SimplifyInstructionsInBlock(NewBB, TLI);
+ // Update the edge weight from BB to SuccBB, which should be less than before.
+ UpdateBlockFreqAndEdgeWeight(PredBB, BB, NewBB, SuccBB);
+
// Threaded an edge!
++NumThreads;
return true;
}
+/// Create a new basic block that will be the predecessor of BB and successor of
+/// all blocks in Preds. When profile data is availble, update the frequency of
+/// this new block.
+BasicBlock *JumpThreading::SplitBlockPreds(BasicBlock *BB,
+ ArrayRef<BasicBlock *> Preds,
+ const char *Suffix) {
+ // Collect the frequencies of all predecessors of BB, which will be used to
+ // update the edge weight on BB->SuccBB.
+ BlockFrequency PredBBFreq(0);
+ if (HasProfileData)
+ for (auto Pred : Preds)
+ PredBBFreq += BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, BB);
+
+ BasicBlock *PredBB = SplitBlockPredecessors(BB, Preds, Suffix);
+
+ // Set the block frequency of the newly created PredBB, which is the sum of
+ // frequencies of Preds.
+ if (HasProfileData)
+ BFI->setBlockFreq(PredBB, PredBBFreq.getFrequency());
+ return PredBB;
+}
+
+/// Update the block frequency of BB and branch weight and the metadata on the
+/// edge BB->SuccBB. This is done by scaling the weight of BB->SuccBB by 1 -
+/// Freq(PredBB->BB) / Freq(BB->SuccBB).
+void JumpThreading::UpdateBlockFreqAndEdgeWeight(BasicBlock *PredBB,
+ BasicBlock *BB,
+ BasicBlock *NewBB,
+ BasicBlock *SuccBB) {
+ if (!HasProfileData)
+ return;
+
+ assert(BFI && BPI && "BFI & BPI should have been created here");
+
+ // As the edge from PredBB to BB is deleted, we have to update the block
+ // frequency of BB.
+ auto BBOrigFreq = BFI->getBlockFreq(BB);
+ auto NewBBFreq = BFI->getBlockFreq(NewBB);
+ auto BB2SuccBBFreq = BBOrigFreq * BPI->getEdgeProbability(BB, SuccBB);
+ auto BBNewFreq = BBOrigFreq - NewBBFreq;
+ BFI->setBlockFreq(BB, BBNewFreq.getFrequency());
+
+ // Collect updated outgoing edges' frequencies from BB and use them to update
+ // edge probabilities.
+ SmallVector<uint64_t, 4> BBSuccFreq;
+ for (auto I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
+ auto SuccFreq = (*I == SuccBB)
+ ? BB2SuccBBFreq - NewBBFreq
+ : BBOrigFreq * BPI->getEdgeProbability(BB, *I);
+ BBSuccFreq.push_back(SuccFreq.getFrequency());
+ }
+
+ uint64_t MaxBBSuccFreq =
+ *std::max_element(BBSuccFreq.begin(), BBSuccFreq.end());
+
+ SmallVector<BranchProbability, 4> BBSuccProbs;
+ if (MaxBBSuccFreq == 0)
+ BBSuccProbs.assign(BBSuccFreq.size(),
+ {1, static_cast<unsigned>(BBSuccFreq.size())});
+ else {
+ for (uint64_t Freq : BBSuccFreq)
+ BBSuccProbs.push_back(
+ BranchProbability::getBranchProbability(Freq, MaxBBSuccFreq));
+ // Normalize edge probabilities so that they sum up to one.
+ BranchProbability::normalizeProbabilities(BBSuccProbs.begin(),
+ BBSuccProbs.end());
+ }
+
+ // Update edge probabilities in BPI.
+ for (int I = 0, E = BBSuccProbs.size(); I < E; I++)
+ BPI->setEdgeProbability(BB, I, BBSuccProbs[I]);
+
+ if (BBSuccProbs.size() >= 2) {
+ SmallVector<uint32_t, 4> Weights;
+ for (auto Prob : BBSuccProbs)
+ Weights.push_back(Prob.getNumerator());
+
+ auto TI = BB->getTerminator();
+ TI->setMetadata(
+ LLVMContext::MD_prof,
+ MDBuilder(TI->getParent()->getContext()).createBranchWeights(Weights));
+ }
+}
+
/// DuplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch
/// to BB which contains an i1 PHI node and a conditional branch on that PHI.
/// If we can duplicate the contents of BB up into PredBB do so now, this
@@ -1530,14 +1709,14 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
return false;
}
- // And finally, do it! Start by factoring the predecessors is needed.
+ // And finally, do it! Start by factoring the predecessors if needed.
BasicBlock *PredBB;
if (PredBBs.size() == 1)
PredBB = PredBBs[0];
else {
DEBUG(dbgs() << " Factoring out " << PredBBs.size()
<< " common predecessors.\n");
- PredBB = SplitBlockPredecessors(BB, PredBBs, ".thr_comm");
+ PredBB = SplitBlockPreds(BB, PredBBs, ".thr_comm");
}
// Okay, we decided to do this! Clone all the instructions in BB onto the end
@@ -1581,12 +1760,12 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
if (Value *IV =
SimplifyInstruction(New, BB->getModule()->getDataLayout())) {
delete New;
- ValueMapping[BI] = IV;
+ ValueMapping[&*BI] = IV;
} else {
// Otherwise, insert the new instruction into the block.
New->setName(BI->getName());
- PredBB->getInstList().insert(OldPredBranch, New);
- ValueMapping[BI] = New;
+ PredBB->getInstList().insert(OldPredBranch->getIterator(), New);
+ ValueMapping[&*BI] = New;
}
}
@@ -1628,8 +1807,8 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
// its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
// with the two values we know.
SSAUpdate.Initialize(I->getType(), I->getName());
- SSAUpdate.AddAvailableValue(BB, I);
- SSAUpdate.AddAvailableValue(PredBB, ValueMapping[I]);
+ SSAUpdate.AddAvailableValue(BB, &*I);
+ SSAUpdate.AddAvailableValue(PredBB, ValueMapping[&*I]);
while (!UsesToRename.empty())
SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
diff --git a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
index 43fc50e..6d70cdc 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -34,10 +34,13 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CFG.h"
@@ -118,9 +121,12 @@ namespace {
AU.addPreservedID(LoopSimplifyID);
AU.addRequiredID(LCSSAID);
AU.addPreservedID(LCSSAID);
- AU.addRequired<AliasAnalysis>();
- AU.addPreserved<AliasAnalysis>();
- AU.addPreserved<ScalarEvolution>();
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addPreserved<BasicAAWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<SCEVAAWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
}
@@ -164,9 +170,12 @@ INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
INITIALIZE_PASS_END(LICM, "licm", "Loop Invariant Code Motion", false, false)
Pass *llvm::createLICMPass() { return new LICM(); }
@@ -183,7 +192,7 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
// Get our Loop and Alias Analysis information...
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- AA = &getAnalysis<AliasAnalysis>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
@@ -264,9 +273,10 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
// FIXME: This is really heavy handed. It would be a bit better to use an
// SSAUpdater strategy during promotion that was LCSSA aware and reformed
// it as it went.
- if (Changed)
- formLCSSARecursively(*L, *DT, LI,
- getAnalysisIfAvailable<ScalarEvolution>());
+ if (Changed) {
+ auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
+ formLCSSARecursively(*L, *DT, LI, SEWP ? &SEWP->getSE() : nullptr);
+ }
}
// Check that neither this loop nor its parent have had LCSSA broken. LICM is
@@ -402,7 +412,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
}
/// Computes loop safety information, checks loop body & header
-/// for the possiblity of may throw exception.
+/// for the possibility of may throw exception.
///
void llvm::computeLICMSafetyInfo(LICMSafetyInfo * SafetyInfo, Loop * CurLoop) {
assert(CurLoop != nullptr && "CurLoop cant be null");
@@ -410,7 +420,7 @@ void llvm::computeLICMSafetyInfo(LICMSafetyInfo * SafetyInfo, Loop * CurLoop) {
// Setting default safety values.
SafetyInfo->MayThrow = false;
SafetyInfo->HeaderMayThrow = false;
- // Iterate over header and compute dafety info.
+ // Iterate over header and compute safety info.
for (BasicBlock::iterator I = Header->begin(), E = Header->end();
(I != E) && !SafetyInfo->HeaderMayThrow; ++I)
SafetyInfo->HeaderMayThrow |= I->mayThrow();
@@ -445,7 +455,7 @@ bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, DominatorTree *DT,
// Don't hoist loads which have may-aliased stores in loop.
uint64_t Size = 0;
if (LI->getType()->isSized())
- Size = AA->getTypeStoreSize(LI->getType());
+ Size = I.getModule()->getDataLayout().getTypeStoreSize(LI->getType());
AAMDNodes AAInfo;
LI->getAAMetadata(AAInfo);
@@ -457,10 +467,21 @@ bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, DominatorTree *DT,
return false;
// Handle simple cases by querying alias analysis.
- AliasAnalysis::ModRefBehavior Behavior = AA->getModRefBehavior(CI);
- if (Behavior == AliasAnalysis::DoesNotAccessMemory)
+ FunctionModRefBehavior Behavior = AA->getModRefBehavior(CI);
+ if (Behavior == FMRB_DoesNotAccessMemory)
return true;
if (AliasAnalysis::onlyReadsMemory(Behavior)) {
+ // A readonly argmemonly function only reads from memory pointed to by
+ // it's arguments with arbitrary offsets. If we can prove there are no
+ // writes to this memory in the loop, we can hoist or sink.
+ if (AliasAnalysis::onlyAccessesArgPointees(Behavior)) {
+ for (Value *Op : CI->arg_operands())
+ if (Op->getType()->isPointerTy() &&
+ pointerInvalidatedByLoop(Op, MemoryLocation::UnknownSize,
+ AAMDNodes(), CurAST))
+ return false;
+ return true;
+ }
// If this call only reads from memory and there are no writes to memory
// in the loop, we can hoist or sink the call as appropriate.
bool FoundMod = false;
@@ -566,7 +587,7 @@ static Instruction *CloneInstructionInExitBlock(const Instruction &I,
if (!OLoop->contains(&PN)) {
PHINode *OpPN =
PHINode::Create(OInst->getType(), PN.getNumIncomingValues(),
- OInst->getName() + ".lcssa", ExitBlock.begin());
+ OInst->getName() + ".lcssa", &ExitBlock.front());
for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
OpPN->addIncoming(OInst, PN.getIncomingBlock(i));
*OI = OpPN;
@@ -651,6 +672,10 @@ static bool hoist(Instruction &I, BasicBlock *Preheader) {
// Move the new node to the Preheader, before its terminator.
I.moveBefore(Preheader->getTerminator());
+ // Metadata can be dependent on the condition we are hoisting above.
+ // Conservatively strip all metadata on the instruction.
+ I.dropUnknownNonDebugMetadata();
+
if (isa<LoadInst>(I)) ++NumMovedLoads;
else if (isa<CallInst>(I)) ++NumMovedCalls;
++NumHoisted;
@@ -730,9 +755,9 @@ namespace {
if (!L->contains(BB)) {
// We need to create an LCSSA PHI node for the incoming value and
// store that.
- PHINode *PN = PHINode::Create(
- I->getType(), PredCache.size(BB),
- I->getName() + ".lcssa", BB->begin());
+ PHINode *PN =
+ PHINode::Create(I->getType(), PredCache.size(BB),
+ I->getName() + ".lcssa", &BB->front());
for (BasicBlock *Pred : PredCache.get(BB))
PN->addIncoming(I, Pred);
return PN;
@@ -942,7 +967,7 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS,
CurLoop->getUniqueExitBlocks(ExitBlocks);
InsertPts.resize(ExitBlocks.size());
for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
- InsertPts[i] = ExitBlocks[i]->getFirstInsertionPt();
+ InsertPts[i] = &*ExitBlocks[i]->getFirstInsertionPt();
}
// We use the SSAUpdater interface to insert phi nodes as required.
@@ -973,7 +998,7 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS,
return Changed;
}
-/// Simple Analysis hook. Clone alias set info.
+/// Simple analysis hook. Clone alias set info.
///
void LICM::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L) {
AliasSetTracker *AST = LoopToAliasSetMap.lookup(L);
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoadCombine.cpp b/contrib/llvm/lib/Transforms/Scalar/LoadCombine.cpp
index c19cd19..1648878 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoadCombine.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoadCombine.cpp
@@ -16,6 +16,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/TargetFolder.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
@@ -56,7 +57,7 @@ class LoadCombine : public BasicBlockPass {
public:
LoadCombine() : BasicBlockPass(ID), C(nullptr), AA(nullptr) {
- initializeSROAPass(*PassRegistry::getPassRegistry());
+ initializeLoadCombinePass(*PassRegistry::getPassRegistry());
}
using llvm::Pass::doInitialization;
@@ -223,7 +224,7 @@ bool LoadCombine::runOnBasicBlock(BasicBlock &BB) {
if (skipOptnoneFunction(BB))
return false;
- AA = &getAnalysis<AliasAnalysis>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
IRBuilder<true, TargetFolder> TheBuilder(
BB.getContext(), TargetFolder(BB.getModule()->getDataLayout()));
@@ -262,8 +263,8 @@ bool LoadCombine::runOnBasicBlock(BasicBlock &BB) {
void LoadCombine::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
- AU.addRequired<AliasAnalysis>();
- AU.addPreserved<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
}
char LoadCombine::ID = 0;
@@ -274,7 +275,8 @@ BasicBlockPass *llvm::createLoadCombinePass() {
INITIALIZE_PASS_BEGIN(LoadCombine, "load-combine", "Combine Adjacent Loads",
false, false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
INITIALIZE_PASS_END(LoadCombine, "load-combine", "Combine Adjacent Loads",
false, false)
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
index 98b068e..bc00ff3 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -17,6 +17,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/IR/Dominators.h"
@@ -35,18 +36,19 @@ namespace {
}
// Possibly eliminate loop L if it is dead.
- bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+ bool runOnLoop(Loop *L, LPPassManager &) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
- AU.addRequired<ScalarEvolution>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addRequiredID(LoopSimplifyID);
AU.addRequiredID(LCSSAID);
- AU.addPreserved<ScalarEvolution>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
AU.addPreservedID(LoopSimplifyID);
AU.addPreservedID(LCSSAID);
}
@@ -64,7 +66,7 @@ INITIALIZE_PASS_BEGIN(LoopDeletion, "loop-deletion",
"Delete dead loops", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
INITIALIZE_PASS_END(LoopDeletion, "loop-deletion",
@@ -130,7 +132,7 @@ bool LoopDeletion::isLoopDead(Loop *L,
/// so could change the halting/non-halting nature of a program.
/// NOTE: This entire process relies pretty heavily on LoopSimplify and LCSSA
/// in order to make various safety checks work.
-bool LoopDeletion::runOnLoop(Loop *L, LPPassManager &LPM) {
+bool LoopDeletion::runOnLoop(Loop *L, LPPassManager &) {
if (skipOptnoneFunction(L))
return false;
@@ -169,7 +171,7 @@ bool LoopDeletion::runOnLoop(Loop *L, LPPassManager &LPM) {
// Don't remove loops for which we can't solve the trip count.
// They could be infinite, in which case we'd be changing program behavior.
- ScalarEvolution &SE = getAnalysis<ScalarEvolution>();
+ ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
const SCEV *S = SE.getMaxBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(S))
return Changed;
@@ -242,9 +244,8 @@ bool LoopDeletion::runOnLoop(Loop *L, LPPassManager &LPM) {
for (BasicBlock *BB : blocks)
loopInfo.removeBlock(BB);
- // The last step is to inform the loop pass manager that we've
- // eliminated this loop.
- LPM.deleteLoopFromQueue(L);
+ // The last step is to update LoopInfo now that we've eliminated this loop.
+ loopInfo.updateUnloop(L);
Changed = true;
++NumDeleted;
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopDistribute.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
index 1b9859b..3d3cf3e 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
@@ -34,6 +34,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/LoopVersioning.h"
#include <list>
@@ -54,6 +55,11 @@ static cl::opt<bool> DistributeNonIfConvertible(
"if-convertible by the loop vectorizer"),
cl::init(false));
+static cl::opt<unsigned> DistributeSCEVCheckThreshold(
+ "loop-distribute-scev-check-threshold", cl::init(8), cl::Hidden,
+ cl::desc("The maximum number of SCEV checks allowed for Loop "
+ "Distribution"));
+
STATISTIC(NumLoopsDistributed, "Number of loops distributed");
namespace {
@@ -164,9 +170,7 @@ public:
// Delete the instructions backwards, as it has a reduced likelihood of
// having to update as many def-use and use-def chains.
- for (auto I = Unused.rbegin(), E = Unused.rend(); I != E; ++I) {
- auto *Inst = *I;
-
+ for (auto *Inst : make_range(Unused.rbegin(), Unused.rend())) {
if (!Inst->use_empty())
Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
Inst->eraseFromParent();
@@ -373,7 +377,7 @@ public:
/// \brief This performs the main chunk of the work of cloning the loops for
/// the partitions.
- void cloneLoops(Pass *P) {
+ void cloneLoops() {
BasicBlock *OrigPH = L->getLoopPreheader();
// At this point the predecessor of the preheader is either the memcheck
// block or the top part of the original preheader.
@@ -547,11 +551,11 @@ public:
MemoryInstructionDependences(
const SmallVectorImpl<Instruction *> &Instructions,
- const SmallVectorImpl<Dependence> &InterestingDependences) {
+ const SmallVectorImpl<Dependence> &Dependences) {
Accesses.append(Instructions.begin(), Instructions.end());
DEBUG(dbgs() << "Backward dependences:\n");
- for (auto &Dep : InterestingDependences)
+ for (auto &Dep : Dependences)
if (Dep.isPossiblyBackward()) {
// Note that the designations source and destination follow the program
// order, i.e. source is always first. (The direction is given by the
@@ -567,25 +571,6 @@ private:
AccessesType Accesses;
};
-/// \brief Returns the instructions that use values defined in the loop.
-static SmallVector<Instruction *, 8> findDefsUsedOutsideOfLoop(Loop *L) {
- SmallVector<Instruction *, 8> UsedOutside;
-
- for (auto *Block : L->getBlocks())
- // FIXME: I believe that this could use copy_if if the Inst reference could
- // be adapted into a pointer.
- for (auto &Inst : *Block) {
- auto Users = Inst.users();
- if (std::any_of(Users.begin(), Users.end(), [&](User *U) {
- auto *Use = cast<Instruction>(U);
- return !L->contains(Use->getParent());
- }))
- UsedOutside.push_back(&Inst);
- }
-
- return UsedOutside;
-}
-
/// \brief The pass class.
class LoopDistribute : public FunctionPass {
public:
@@ -597,6 +582,7 @@ public:
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
LAA = &getAnalysis<LoopAccessAnalysis>();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
// Build up a worklist of inner-loops to vectorize. This is necessary as the
// act of distributing a loop creates new loops and can invalidate iterators
@@ -619,6 +605,7 @@ public:
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
AU.addRequired<LoopAccessAnalysis>();
@@ -629,6 +616,45 @@ public:
static char ID;
private:
+ /// \brief Filter out checks between pointers from the same partition.
+ ///
+ /// \p PtrToPartition contains the partition number for pointers. Partition
+ /// number -1 means that the pointer is used in multiple partitions. In this
+ /// case we can't safely omit the check.
+ SmallVector<RuntimePointerChecking::PointerCheck, 4>
+ includeOnlyCrossPartitionChecks(
+ const SmallVectorImpl<RuntimePointerChecking::PointerCheck> &AllChecks,
+ const SmallVectorImpl<int> &PtrToPartition,
+ const RuntimePointerChecking *RtPtrChecking) {
+ SmallVector<RuntimePointerChecking::PointerCheck, 4> Checks;
+
+ std::copy_if(AllChecks.begin(), AllChecks.end(), std::back_inserter(Checks),
+ [&](const RuntimePointerChecking::PointerCheck &Check) {
+ for (unsigned PtrIdx1 : Check.first->Members)
+ for (unsigned PtrIdx2 : Check.second->Members)
+ // Only include this check if there is a pair of pointers
+ // that require checking and the pointers fall into
+ // separate partitions.
+ //
+ // (Note that we already know at this point that the two
+ // pointer groups need checking but it doesn't follow
+ // that each pair of pointers within the two groups need
+ // checking as well.
+ //
+ // In other words we don't want to include a check just
+ // because there is a pair of pointers between the two
+ // pointer groups that require checks and a different
+ // pair whose pointers fall into different partitions.)
+ if (RtPtrChecking->needsChecking(PtrIdx1, PtrIdx2) &&
+ !RuntimePointerChecking::arePointersInSamePartition(
+ PtrToPartition, PtrIdx1, PtrIdx2))
+ return true;
+ return false;
+ });
+
+ return Checks;
+ }
+
/// \brief Try to distribute an inner-most loop.
bool processLoop(Loop *L) {
assert(L->empty() && "Only process inner loops.");
@@ -655,9 +681,8 @@ private:
DEBUG(dbgs() << "Skipping; memory operations are safe for vectorization");
return false;
}
- auto *InterestingDependences =
- LAI.getDepChecker().getInterestingDependences();
- if (!InterestingDependences || InterestingDependences->empty()) {
+ auto *Dependences = LAI.getDepChecker().getDependences();
+ if (!Dependences || Dependences->empty()) {
DEBUG(dbgs() << "Skipping; No unsafe dependences to isolate");
return false;
}
@@ -685,7 +710,7 @@ private:
// NumUnsafeDependencesActive reaches 0.
const MemoryDepChecker &DepChecker = LAI.getDepChecker();
MemoryInstructionDependences MID(DepChecker.getMemoryInstructions(),
- *InterestingDependences);
+ *Dependences);
int NumUnsafeDependencesActive = 0;
for (auto &InstDep : MID) {
@@ -735,6 +760,13 @@ private:
return false;
}
+ // Don't distribute the loop if we need too many SCEV run-time checks.
+ const SCEVUnionPredicate &Pred = LAI.PSE.getUnionPredicate();
+ if (Pred.getComplexity() > DistributeSCEVCheckThreshold) {
+ DEBUG(dbgs() << "Too many SCEV run-time checks needed.\n");
+ return false;
+ }
+
DEBUG(dbgs() << "\nDistributing loop: " << *L << "\n");
// We're done forming the partitions set up the reverse mapping from
// instructions to partitions.
@@ -746,20 +778,25 @@ private:
if (!PH->getSinglePredecessor() || &*PH->begin() != PH->getTerminator())
SplitBlock(PH, PH->getTerminator(), DT, LI);
- // If we need run-time checks to disambiguate pointers are run-time, version
- // the loop now.
+ // If we need run-time checks, version the loop now.
auto PtrToPartition = Partitions.computePartitionSetForPointers(LAI);
- LoopVersioning LVer(LAI, L, LI, DT, &PtrToPartition);
- if (LVer.needsRuntimeChecks()) {
+ const auto *RtPtrChecking = LAI.getRuntimePointerChecking();
+ const auto &AllChecks = RtPtrChecking->getChecks();
+ auto Checks = includeOnlyCrossPartitionChecks(AllChecks, PtrToPartition,
+ RtPtrChecking);
+
+ if (!Pred.isAlwaysTrue() || !Checks.empty()) {
DEBUG(dbgs() << "\nPointers:\n");
- DEBUG(LAI.getRuntimePointerChecking()->print(dbgs(), 0, &PtrToPartition));
- LVer.versionLoop(this);
- LVer.addPHINodes(DefsUsedOutside);
+ DEBUG(LAI.getRuntimePointerChecking()->printChecks(dbgs(), Checks));
+ LoopVersioning LVer(LAI, L, LI, DT, SE, false);
+ LVer.setAliasChecks(std::move(Checks));
+ LVer.setSCEVChecks(LAI.PSE.getUnionPredicate());
+ LVer.versionLoop(DefsUsedOutside);
}
// Create identical copies of the original loop for each partition and hook
// them up sequentially.
- Partitions.cloneLoops(this);
+ Partitions.cloneLoops();
// Now, we remove the instruction from each loop that don't belong to that
// partition.
@@ -780,6 +817,7 @@ private:
LoopInfo *LI;
LoopAccessAnalysis *LAA;
DominatorTree *DT;
+ ScalarEvolution *SE;
};
} // anonymous namespace
@@ -790,6 +828,7 @@ INITIALIZE_PASS_BEGIN(LoopDistribute, LDIST_NAME, ldist_name, false, false)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopAccessAnalysis)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_END(LoopDistribute, LDIST_NAME, ldist_name, false, false)
namespace llvm {
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index a21ca24..2d577de 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -31,11 +31,6 @@
// void foo(_Complex float *P)
// for (i) { __real__(*P) = 0; __imag__(*P) = 0; }
//
-// We should enhance this to handle negative strides through memory.
-// Alternatively (and perhaps better) we could rely on an earlier pass to force
-// forward iteration through memory, which is generally better for cache
-// behavior. Negative strides *do* happen for memset/memcpy loops.
-//
// This could recognize common matrix multiplies and dot product idioms and
// replace them with calls to BLAS (if linked in??).
//
@@ -44,7 +39,10 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
@@ -67,149 +65,85 @@ STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
namespace {
- class LoopIdiomRecognize;
+class LoopIdiomRecognize : public LoopPass {
+ Loop *CurLoop;
+ AliasAnalysis *AA;
+ DominatorTree *DT;
+ LoopInfo *LI;
+ ScalarEvolution *SE;
+ TargetLibraryInfo *TLI;
+ const TargetTransformInfo *TTI;
+ const DataLayout *DL;
+
+public:
+ static char ID;
+ explicit LoopIdiomRecognize() : LoopPass(ID) {
+ initializeLoopIdiomRecognizePass(*PassRegistry::getPassRegistry());
+ }
- /// This class defines some utility functions for loop idiom recognization.
- class LIRUtil {
- public:
- /// Return true iff the block contains nothing but an uncondition branch
- /// (aka goto instruction).
- static bool isAlmostEmpty(BasicBlock *);
+ bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+
+ /// This transformation requires natural loop information & requires that
+ /// loop preheaders be inserted into the CFG.
+ ///
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addRequiredID(LCSSAID);
+ AU.addPreservedID(LCSSAID);
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<SCEVAAWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addPreserved<BasicAAWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ }
- static BranchInst *getBranch(BasicBlock *BB) {
- return dyn_cast<BranchInst>(BB->getTerminator());
- }
+private:
+ typedef SmallVector<StoreInst *, 8> StoreList;
+ StoreList StoreRefs;
- /// Derive the precondition block (i.e the block that guards the loop
- /// preheader) from the given preheader.
- static BasicBlock *getPrecondBb(BasicBlock *PreHead);
- };
-
- /// This class is to recoginize idioms of population-count conducted in
- /// a noncountable loop. Currently it only recognizes this pattern:
- /// \code
- /// while(x) {cnt++; ...; x &= x - 1; ...}
- /// \endcode
- class NclPopcountRecognize {
- LoopIdiomRecognize &LIR;
- Loop *CurLoop;
- BasicBlock *PreCondBB;
-
- typedef IRBuilder<> IRBuilderTy;
-
- public:
- explicit NclPopcountRecognize(LoopIdiomRecognize &TheLIR);
- bool recognize();
-
- private:
- /// Take a glimpse of the loop to see if we need to go ahead recoginizing
- /// the idiom.
- bool preliminaryScreen();
-
- /// Check if the given conditional branch is based on the comparison
- /// between a variable and zero, and if the variable is non-zero, the
- /// control yields to the loop entry. If the branch matches the behavior,
- /// the variable involved in the comparion is returned. This function will
- /// be called to see if the precondition and postcondition of the loop
- /// are in desirable form.
- Value *matchCondition(BranchInst *Br, BasicBlock *NonZeroTarget) const;
-
- /// Return true iff the idiom is detected in the loop. and 1) \p CntInst
- /// is set to the instruction counting the population bit. 2) \p CntPhi
- /// is set to the corresponding phi node. 3) \p Var is set to the value
- /// whose population bits are being counted.
- bool detectIdiom
- (Instruction *&CntInst, PHINode *&CntPhi, Value *&Var) const;
-
- /// Insert ctpop intrinsic function and some obviously dead instructions.
- void transform(Instruction *CntInst, PHINode *CntPhi, Value *Var);
-
- /// Create llvm.ctpop.* intrinsic function.
- CallInst *createPopcntIntrinsic(IRBuilderTy &IRB, Value *Val, DebugLoc DL);
- };
-
- class LoopIdiomRecognize : public LoopPass {
- Loop *CurLoop;
- DominatorTree *DT;
- ScalarEvolution *SE;
- TargetLibraryInfo *TLI;
- const TargetTransformInfo *TTI;
- public:
- static char ID;
- explicit LoopIdiomRecognize() : LoopPass(ID) {
- initializeLoopIdiomRecognizePass(*PassRegistry::getPassRegistry());
- DT = nullptr;
- SE = nullptr;
- TLI = nullptr;
- TTI = nullptr;
- }
+ /// \name Countable Loop Idiom Handling
+ /// @{
- bool runOnLoop(Loop *L, LPPassManager &LPM) override;
- bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
- SmallVectorImpl<BasicBlock*> &ExitBlocks);
-
- bool processLoopStore(StoreInst *SI, const SCEV *BECount);
- bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
-
- bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
- unsigned StoreAlignment,
- Value *SplatValue, Instruction *TheStore,
- const SCEVAddRecExpr *Ev,
- const SCEV *BECount);
- bool processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
- const SCEVAddRecExpr *StoreEv,
- const SCEVAddRecExpr *LoadEv,
- const SCEV *BECount);
-
- /// This transformation requires natural loop information & requires that
- /// loop preheaders be inserted into the CFG.
- ///
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addPreserved<LoopInfoWrapperPass>();
- AU.addRequiredID(LoopSimplifyID);
- AU.addPreservedID(LoopSimplifyID);
- AU.addRequiredID(LCSSAID);
- AU.addPreservedID(LCSSAID);
- AU.addRequired<AliasAnalysis>();
- AU.addPreserved<AliasAnalysis>();
- AU.addRequired<ScalarEvolution>();
- AU.addPreserved<ScalarEvolution>();
- AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
- }
+ bool runOnCountableLoop();
+ bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
+ SmallVectorImpl<BasicBlock *> &ExitBlocks);
- DominatorTree *getDominatorTree() {
- return DT ? DT
- : (DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree());
- }
+ void collectStores(BasicBlock *BB);
+ bool isLegalStore(StoreInst *SI);
+ bool processLoopStore(StoreInst *SI, const SCEV *BECount);
+ bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
- ScalarEvolution *getScalarEvolution() {
- return SE ? SE : (SE = &getAnalysis<ScalarEvolution>());
- }
+ bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
+ unsigned StoreAlignment, Value *SplatValue,
+ Instruction *TheStore, const SCEVAddRecExpr *Ev,
+ const SCEV *BECount, bool NegStride);
+ bool processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
+ const SCEVAddRecExpr *StoreEv,
+ const SCEV *BECount, bool NegStride);
- TargetLibraryInfo *getTargetLibraryInfo() {
- if (!TLI)
- TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ /// @}
+ /// \name Noncountable Loop Idiom Handling
+ /// @{
- return TLI;
- }
+ bool runOnNoncountableLoop();
- const TargetTransformInfo *getTargetTransformInfo() {
- return TTI ? TTI
- : (TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
- *CurLoop->getHeader()->getParent()));
- }
+ bool recognizePopcount();
+ void transformLoopToPopcount(BasicBlock *PreCondBB, Instruction *CntInst,
+ PHINode *CntPhi, Value *Var);
- Loop *getLoop() const { return CurLoop; }
+ /// @}
+};
- private:
- bool runOnNoncountableLoop();
- bool runOnCountableLoop();
- };
-}
+} // End anonymous namespace.
char LoopIdiomRecognize::ID = 0;
INITIALIZE_PASS_BEGIN(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms",
@@ -218,9 +152,12 @@ INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms",
false, false)
@@ -242,406 +179,64 @@ static void deleteDeadInstruction(Instruction *I,
//===----------------------------------------------------------------------===//
//
-// Implementation of LIRUtil
-//
-//===----------------------------------------------------------------------===//
-
-// This function will return true iff the given block contains nothing but goto.
-// A typical usage of this function is to check if the preheader function is
-// "almost" empty such that generated intrinsic functions can be moved across
-// the preheader and be placed at the end of the precondition block without
-// the concern of breaking data dependence.
-bool LIRUtil::isAlmostEmpty(BasicBlock *BB) {
- if (BranchInst *Br = getBranch(BB)) {
- return Br->isUnconditional() && Br == BB->begin();
- }
- return false;
-}
-
-BasicBlock *LIRUtil::getPrecondBb(BasicBlock *PreHead) {
- if (BasicBlock *BB = PreHead->getSinglePredecessor()) {
- BranchInst *Br = getBranch(BB);
- return Br && Br->isConditional() ? BB : nullptr;
- }
- return nullptr;
-}
-
-//===----------------------------------------------------------------------===//
-//
-// Implementation of NclPopcountRecognize
+// Implementation of LoopIdiomRecognize
//
//===----------------------------------------------------------------------===//
-NclPopcountRecognize::NclPopcountRecognize(LoopIdiomRecognize &TheLIR):
- LIR(TheLIR), CurLoop(TheLIR.getLoop()), PreCondBB(nullptr) {
-}
-
-bool NclPopcountRecognize::preliminaryScreen() {
- const TargetTransformInfo *TTI = LIR.getTargetTransformInfo();
- if (TTI->getPopcntSupport(32) != TargetTransformInfo::PSK_FastHardware)
- return false;
-
- // Counting population are usually conducted by few arithmetic instructions.
- // Such instructions can be easilly "absorbed" by vacant slots in a
- // non-compact loop. Therefore, recognizing popcount idiom only makes sense
- // in a compact loop.
-
- // Give up if the loop has multiple blocks or multiple backedges.
- if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
- return false;
-
- BasicBlock *LoopBody = *(CurLoop->block_begin());
- if (LoopBody->size() >= 20) {
- // The loop is too big, bail out.
- return false;
- }
-
- // It should have a preheader containing nothing but a goto instruction.
- BasicBlock *PreHead = CurLoop->getLoopPreheader();
- if (!PreHead || !LIRUtil::isAlmostEmpty(PreHead))
+bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
+ if (skipOptnoneFunction(L))
return false;
- // It should have a precondition block where the generated popcount instrinsic
- // function will be inserted.
- PreCondBB = LIRUtil::getPrecondBb(PreHead);
- if (!PreCondBB)
+ CurLoop = L;
+ // If the loop could not be converted to canonical form, it must have an
+ // indirectbr in it, just give up.
+ if (!L->getLoopPreheader())
return false;
- return true;
-}
-
-Value *NclPopcountRecognize::matchCondition(BranchInst *Br,
- BasicBlock *LoopEntry) const {
- if (!Br || !Br->isConditional())
- return nullptr;
-
- ICmpInst *Cond = dyn_cast<ICmpInst>(Br->getCondition());
- if (!Cond)
- return nullptr;
-
- ConstantInt *CmpZero = dyn_cast<ConstantInt>(Cond->getOperand(1));
- if (!CmpZero || !CmpZero->isZero())
- return nullptr;
-
- ICmpInst::Predicate Pred = Cond->getPredicate();
- if ((Pred == ICmpInst::ICMP_NE && Br->getSuccessor(0) == LoopEntry) ||
- (Pred == ICmpInst::ICMP_EQ && Br->getSuccessor(1) == LoopEntry))
- return Cond->getOperand(0);
-
- return nullptr;
-}
-
-bool NclPopcountRecognize::detectIdiom(Instruction *&CntInst,
- PHINode *&CntPhi,
- Value *&Var) const {
- // Following code tries to detect this idiom:
- //
- // if (x0 != 0)
- // goto loop-exit // the precondition of the loop
- // cnt0 = init-val;
- // do {
- // x1 = phi (x0, x2);
- // cnt1 = phi(cnt0, cnt2);
- //
- // cnt2 = cnt1 + 1;
- // ...
- // x2 = x1 & (x1 - 1);
- // ...
- // } while(x != 0);
- //
- // loop-exit:
- //
-
- // step 1: Check to see if the look-back branch match this pattern:
- // "if (a!=0) goto loop-entry".
- BasicBlock *LoopEntry;
- Instruction *DefX2, *CountInst;
- Value *VarX1, *VarX0;
- PHINode *PhiX, *CountPhi;
-
- DefX2 = CountInst = nullptr;
- VarX1 = VarX0 = nullptr;
- PhiX = CountPhi = nullptr;
- LoopEntry = *(CurLoop->block_begin());
-
- // step 1: Check if the loop-back branch is in desirable form.
- {
- if (Value *T = matchCondition (LIRUtil::getBranch(LoopEntry), LoopEntry))
- DefX2 = dyn_cast<Instruction>(T);
- else
- return false;
- }
-
- // step 2: detect instructions corresponding to "x2 = x1 & (x1 - 1)"
- {
- if (!DefX2 || DefX2->getOpcode() != Instruction::And)
- return false;
-
- BinaryOperator *SubOneOp;
-
- if ((SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(0))))
- VarX1 = DefX2->getOperand(1);
- else {
- VarX1 = DefX2->getOperand(0);
- SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(1));
- }
- if (!SubOneOp)
- return false;
-
- Instruction *SubInst = cast<Instruction>(SubOneOp);
- ConstantInt *Dec = dyn_cast<ConstantInt>(SubInst->getOperand(1));
- if (!Dec ||
- !((SubInst->getOpcode() == Instruction::Sub && Dec->isOne()) ||
- (SubInst->getOpcode() == Instruction::Add && Dec->isAllOnesValue()))) {
- return false;
- }
- }
-
- // step 3: Check the recurrence of variable X
- {
- PhiX = dyn_cast<PHINode>(VarX1);
- if (!PhiX ||
- (PhiX->getOperand(0) != DefX2 && PhiX->getOperand(1) != DefX2)) {
- return false;
- }
- }
-
- // step 4: Find the instruction which count the population: cnt2 = cnt1 + 1
- {
- CountInst = nullptr;
- for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI(),
- IterE = LoopEntry->end(); Iter != IterE; Iter++) {
- Instruction *Inst = Iter;
- if (Inst->getOpcode() != Instruction::Add)
- continue;
-
- ConstantInt *Inc = dyn_cast<ConstantInt>(Inst->getOperand(1));
- if (!Inc || !Inc->isOne())
- continue;
-
- PHINode *Phi = dyn_cast<PHINode>(Inst->getOperand(0));
- if (!Phi || Phi->getParent() != LoopEntry)
- continue;
-
- // Check if the result of the instruction is live of the loop.
- bool LiveOutLoop = false;
- for (User *U : Inst->users()) {
- if ((cast<Instruction>(U))->getParent() != LoopEntry) {
- LiveOutLoop = true; break;
- }
- }
-
- if (LiveOutLoop) {
- CountInst = Inst;
- CountPhi = Phi;
- break;
- }
- }
-
- if (!CountInst)
- return false;
- }
-
- // step 5: check if the precondition is in this form:
- // "if (x != 0) goto loop-head ; else goto somewhere-we-don't-care;"
- {
- BranchInst *PreCondBr = LIRUtil::getBranch(PreCondBB);
- Value *T = matchCondition (PreCondBr, CurLoop->getLoopPreheader());
- if (T != PhiX->getOperand(0) && T != PhiX->getOperand(1))
- return false;
-
- CntInst = CountInst;
- CntPhi = CountPhi;
- Var = T;
- }
-
- return true;
-}
-
-void NclPopcountRecognize::transform(Instruction *CntInst,
- PHINode *CntPhi, Value *Var) {
-
- ScalarEvolution *SE = LIR.getScalarEvolution();
- TargetLibraryInfo *TLI = LIR.getTargetLibraryInfo();
- BasicBlock *PreHead = CurLoop->getLoopPreheader();
- BranchInst *PreCondBr = LIRUtil::getBranch(PreCondBB);
- const DebugLoc DL = CntInst->getDebugLoc();
-
- // Assuming before transformation, the loop is following:
- // if (x) // the precondition
- // do { cnt++; x &= x - 1; } while(x);
-
- // Step 1: Insert the ctpop instruction at the end of the precondition block
- IRBuilderTy Builder(PreCondBr);
- Value *PopCnt, *PopCntZext, *NewCount, *TripCnt;
- {
- PopCnt = createPopcntIntrinsic(Builder, Var, DL);
- NewCount = PopCntZext =
- Builder.CreateZExtOrTrunc(PopCnt, cast<IntegerType>(CntPhi->getType()));
-
- if (NewCount != PopCnt)
- (cast<Instruction>(NewCount))->setDebugLoc(DL);
-
- // TripCnt is exactly the number of iterations the loop has
- TripCnt = NewCount;
-
- // If the population counter's initial value is not zero, insert Add Inst.
- Value *CntInitVal = CntPhi->getIncomingValueForBlock(PreHead);
- ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
- if (!InitConst || !InitConst->isZero()) {
- NewCount = Builder.CreateAdd(NewCount, CntInitVal);
- (cast<Instruction>(NewCount))->setDebugLoc(DL);
- }
- }
-
- // Step 2: Replace the precondition from "if(x == 0) goto loop-exit" to
- // "if(NewCount == 0) loop-exit". Withtout this change, the intrinsic
- // function would be partial dead code, and downstream passes will drag
- // it back from the precondition block to the preheader.
- {
- ICmpInst *PreCond = cast<ICmpInst>(PreCondBr->getCondition());
-
- Value *Opnd0 = PopCntZext;
- Value *Opnd1 = ConstantInt::get(PopCntZext->getType(), 0);
- if (PreCond->getOperand(0) != Var)
- std::swap(Opnd0, Opnd1);
-
- ICmpInst *NewPreCond =
- cast<ICmpInst>(Builder.CreateICmp(PreCond->getPredicate(), Opnd0, Opnd1));
- PreCondBr->setCondition(NewPreCond);
-
- RecursivelyDeleteTriviallyDeadInstructions(PreCond, TLI);
- }
-
- // Step 3: Note that the population count is exactly the trip count of the
- // loop in question, which enble us to to convert the loop from noncountable
- // loop into a countable one. The benefit is twofold:
- //
- // - If the loop only counts population, the entire loop become dead after
- // the transformation. It is lots easier to prove a countable loop dead
- // than to prove a noncountable one. (In some C dialects, a infite loop
- // isn't dead even if it computes nothing useful. In general, DCE needs
- // to prove a noncountable loop finite before safely delete it.)
- //
- // - If the loop also performs something else, it remains alive.
- // Since it is transformed to countable form, it can be aggressively
- // optimized by some optimizations which are in general not applicable
- // to a noncountable loop.
- //
- // After this step, this loop (conceptually) would look like following:
- // newcnt = __builtin_ctpop(x);
- // t = newcnt;
- // if (x)
- // do { cnt++; x &= x-1; t--) } while (t > 0);
- BasicBlock *Body = *(CurLoop->block_begin());
- {
- BranchInst *LbBr = LIRUtil::getBranch(Body);
- ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition());
- Type *Ty = TripCnt->getType();
-
- PHINode *TcPhi = PHINode::Create(Ty, 2, "tcphi", Body->begin());
-
- Builder.SetInsertPoint(LbCond);
- Value *Opnd1 = cast<Value>(TcPhi);
- Value *Opnd2 = cast<Value>(ConstantInt::get(Ty, 1));
- Instruction *TcDec =
- cast<Instruction>(Builder.CreateSub(Opnd1, Opnd2, "tcdec", false, true));
-
- TcPhi->addIncoming(TripCnt, PreHead);
- TcPhi->addIncoming(TcDec, Body);
-
- CmpInst::Predicate Pred = (LbBr->getSuccessor(0) == Body) ?
- CmpInst::ICMP_UGT : CmpInst::ICMP_SLE;
- LbCond->setPredicate(Pred);
- LbCond->setOperand(0, TcDec);
- LbCond->setOperand(1, cast<Value>(ConstantInt::get(Ty, 0)));
- }
-
- // Step 4: All the references to the original population counter outside
- // the loop are replaced with the NewCount -- the value returned from
- // __builtin_ctpop().
- CntInst->replaceUsesOutsideBlock(NewCount, Body);
-
- // step 5: Forget the "non-computable" trip-count SCEV associated with the
- // loop. The loop would otherwise not be deleted even if it becomes empty.
- SE->forgetLoop(CurLoop);
-}
-
-CallInst *NclPopcountRecognize::createPopcntIntrinsic(IRBuilderTy &IRBuilder,
- Value *Val, DebugLoc DL) {
- Value *Ops[] = { Val };
- Type *Tys[] = { Val->getType() };
-
- Module *M = (*(CurLoop->block_begin()))->getParent()->getParent();
- Value *Func = Intrinsic::getDeclaration(M, Intrinsic::ctpop, Tys);
- CallInst *CI = IRBuilder.CreateCall(Func, Ops);
- CI->setDebugLoc(DL);
-
- return CI;
-}
-
-/// recognize - detect population count idiom in a non-countable loop. If
-/// detected, transform the relevant code to popcount intrinsic function
-/// call, and return true; otherwise, return false.
-bool NclPopcountRecognize::recognize() {
-
- if (!LIR.getTargetTransformInfo())
+ // Disable loop idiom recognition if the function's name is a common idiom.
+ StringRef Name = L->getHeader()->getParent()->getName();
+ if (Name == "memset" || Name == "memcpy")
return false;
- LIR.getScalarEvolution();
-
- if (!preliminaryScreen())
- return false;
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
+ *CurLoop->getHeader()->getParent());
+ DL = &CurLoop->getHeader()->getModule()->getDataLayout();
- Instruction *CntInst;
- PHINode *CntPhi;
- Value *Val;
- if (!detectIdiom(CntInst, CntPhi, Val))
- return false;
+ if (SE->hasLoopInvariantBackedgeTakenCount(L))
+ return runOnCountableLoop();
- transform(CntInst, CntPhi, Val);
- return true;
+ return runOnNoncountableLoop();
}
-//===----------------------------------------------------------------------===//
-//
-// Implementation of LoopIdiomRecognize
-//
-//===----------------------------------------------------------------------===//
-
bool LoopIdiomRecognize::runOnCountableLoop() {
const SCEV *BECount = SE->getBackedgeTakenCount(CurLoop);
assert(!isa<SCEVCouldNotCompute>(BECount) &&
- "runOnCountableLoop() called on a loop without a predictable"
- "backedge-taken count");
+ "runOnCountableLoop() called on a loop without a predictable"
+ "backedge-taken count");
// If this loop executes exactly one time, then it should be peeled, not
// optimized by this pass.
if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
- if (BECst->getValue()->getValue() == 0)
+ if (BECst->getAPInt() == 0)
return false;
- // set DT
- (void)getDominatorTree();
-
- LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
-
- // set TLI
- (void)getTargetLibraryInfo();
-
- SmallVector<BasicBlock*, 8> ExitBlocks;
+ SmallVector<BasicBlock *, 8> ExitBlocks;
CurLoop->getUniqueExitBlocks(ExitBlocks);
DEBUG(dbgs() << "loop-idiom Scanning: F["
- << CurLoop->getHeader()->getParent()->getName()
- << "] Loop %" << CurLoop->getHeader()->getName() << "\n");
+ << CurLoop->getHeader()->getParent()->getName() << "] Loop %"
+ << CurLoop->getHeader()->getName() << "\n");
bool MadeChange = false;
// Scan all the blocks in the loop that are not in subloops.
for (auto *BB : CurLoop->getBlocks()) {
// Ignore blocks in subloops.
- if (LI.getLoopFor(BB) != CurLoop)
+ if (LI->getLoopFor(BB) != CurLoop)
continue;
MadeChange |= runOnLoopBlock(BB, BECount, ExitBlocks);
@@ -649,41 +244,109 @@ bool LoopIdiomRecognize::runOnCountableLoop() {
return MadeChange;
}
-bool LoopIdiomRecognize::runOnNoncountableLoop() {
- NclPopcountRecognize Popcount(*this);
- if (Popcount.recognize())
- return true;
+static unsigned getStoreSizeInBytes(StoreInst *SI, const DataLayout *DL) {
+ uint64_t SizeInBits = DL->getTypeSizeInBits(SI->getValueOperand()->getType());
+ assert(((SizeInBits & 7) || (SizeInBits >> 32) == 0) &&
+ "Don't overflow unsigned.");
+ return (unsigned)SizeInBits >> 3;
+}
- return false;
+static unsigned getStoreStride(const SCEVAddRecExpr *StoreEv) {
+ const SCEVConstant *ConstStride = cast<SCEVConstant>(StoreEv->getOperand(1));
+ return ConstStride->getAPInt().getZExtValue();
}
-bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
- if (skipOptnoneFunction(L))
+/// getMemSetPatternValue - If a strided store of the specified value is safe to
+/// turn into a memset_pattern16, return a ConstantArray of 16 bytes that should
+/// be passed in. Otherwise, return null.
+///
+/// Note that we don't ever attempt to use memset_pattern8 or 4, because these
+/// just replicate their input array and then pass on to memset_pattern16.
+static Constant *getMemSetPatternValue(Value *V, const DataLayout *DL) {
+ // If the value isn't a constant, we can't promote it to being in a constant
+ // array. We could theoretically do a store to an alloca or something, but
+ // that doesn't seem worthwhile.
+ Constant *C = dyn_cast<Constant>(V);
+ if (!C)
+ return nullptr;
+
+ // Only handle simple values that are a power of two bytes in size.
+ uint64_t Size = DL->getTypeSizeInBits(V->getType());
+ if (Size == 0 || (Size & 7) || (Size & (Size - 1)))
+ return nullptr;
+
+ // Don't care enough about darwin/ppc to implement this.
+ if (DL->isBigEndian())
+ return nullptr;
+
+ // Convert to size in bytes.
+ Size /= 8;
+
+ // TODO: If CI is larger than 16-bytes, we can try slicing it in half to see
+ // if the top and bottom are the same (e.g. for vectors and large integers).
+ if (Size > 16)
+ return nullptr;
+
+ // If the constant is exactly 16 bytes, just use it.
+ if (Size == 16)
+ return C;
+
+ // Otherwise, we'll use an array of the constants.
+ unsigned ArraySize = 16 / Size;
+ ArrayType *AT = ArrayType::get(V->getType(), ArraySize);
+ return ConstantArray::get(AT, std::vector<Constant *>(ArraySize, C));
+}
+
+bool LoopIdiomRecognize::isLegalStore(StoreInst *SI) {
+ // Don't touch volatile stores.
+ if (!SI->isSimple())
return false;
- CurLoop = L;
+ Value *StoredVal = SI->getValueOperand();
+ Value *StorePtr = SI->getPointerOperand();
- // If the loop could not be converted to canonical form, it must have an
- // indirectbr in it, just give up.
- if (!L->getLoopPreheader())
+ // Reject stores that are so large that they overflow an unsigned.
+ uint64_t SizeInBits = DL->getTypeSizeInBits(StoredVal->getType());
+ if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)
return false;
- // Disable loop idiom recognition if the function's name is a common idiom.
- StringRef Name = L->getHeader()->getParent()->getName();
- if (Name == "memset" || Name == "memcpy")
+ // See if the pointer expression is an AddRec like {base,+,1} on the current
+ // loop, which indicates a strided store. If we have something else, it's a
+ // random store we can't handle.
+ const SCEVAddRecExpr *StoreEv =
+ dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
+ if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
return false;
- SE = &getAnalysis<ScalarEvolution>();
- if (SE->hasLoopInvariantBackedgeTakenCount(L))
- return runOnCountableLoop();
- return runOnNoncountableLoop();
+ // Check to see if we have a constant stride.
+ if (!isa<SCEVConstant>(StoreEv->getOperand(1)))
+ return false;
+
+ return true;
+}
+
+void LoopIdiomRecognize::collectStores(BasicBlock *BB) {
+ StoreRefs.clear();
+ for (Instruction &I : *BB) {
+ StoreInst *SI = dyn_cast<StoreInst>(&I);
+ if (!SI)
+ continue;
+
+ // Make sure this is a strided store with a constant stride.
+ if (!isLegalStore(SI))
+ continue;
+
+ // Save the store locations.
+ StoreRefs.push_back(SI);
+ }
}
/// runOnLoopBlock - Process the specified block, which lives in a counted loop
/// with the specified backedge count. This block is known to be in the current
/// loop and not in any subloops.
-bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
- SmallVectorImpl<BasicBlock*> &ExitBlocks) {
+bool LoopIdiomRecognize::runOnLoopBlock(
+ BasicBlock *BB, const SCEV *BECount,
+ SmallVectorImpl<BasicBlock *> &ExitBlocks) {
// We can only promote stores in this block if they are unconditionally
// executed in the loop. For a block to be unconditionally executed, it has
// to dominate all the exit blocks of the loop. Verify this now.
@@ -692,25 +355,18 @@ bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
return false;
bool MadeChange = false;
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
- Instruction *Inst = I++;
- // Look for store instructions, which may be optimized to memset/memcpy.
- if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
- WeakVH InstPtr(I);
- if (!processLoopStore(SI, BECount)) continue;
- MadeChange = true;
-
- // If processing the store invalidated our iterator, start over from the
- // top of the block.
- if (!InstPtr)
- I = BB->begin();
- continue;
- }
+ // Look for store instructions, which may be optimized to memset/memcpy.
+ collectStores(BB);
+ for (auto &SI : StoreRefs)
+ MadeChange |= processLoopStore(SI, BECount);
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
+ Instruction *Inst = &*I++;
// Look for memset instructions, which may be optimized to a larger memset.
- if (MemSetInst *MSI = dyn_cast<MemSetInst>(Inst)) {
- WeakVH InstPtr(I);
- if (!processLoopMemSet(MSI, BECount)) continue;
+ if (MemSetInst *MSI = dyn_cast<MemSetInst>(Inst)) {
+ WeakVH InstPtr(&*I);
+ if (!processLoopMemSet(MSI, BECount))
+ continue;
MadeChange = true;
// If processing the memset invalidated our iterator, start over from the
@@ -724,71 +380,38 @@ bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
return MadeChange;
}
-
/// processLoopStore - See if this store can be promoted to a memset or memcpy.
bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
- if (!SI->isSimple()) return false;
+ assert(SI->isSimple() && "Expected only non-volatile stores.");
Value *StoredVal = SI->getValueOperand();
Value *StorePtr = SI->getPointerOperand();
- // Reject stores that are so large that they overflow an unsigned.
- auto &DL = CurLoop->getHeader()->getModule()->getDataLayout();
- uint64_t SizeInBits = DL.getTypeSizeInBits(StoredVal->getType());
- if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)
- return false;
-
- // See if the pointer expression is an AddRec like {base,+,1} on the current
- // loop, which indicates a strided store. If we have something else, it's a
- // random store we can't handle.
- const SCEVAddRecExpr *StoreEv =
- dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
- if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
- return false;
-
// Check to see if the stride matches the size of the store. If so, then we
// know that every byte is touched in the loop.
- unsigned StoreSize = (unsigned)SizeInBits >> 3;
- const SCEVConstant *Stride = dyn_cast<SCEVConstant>(StoreEv->getOperand(1));
-
- if (!Stride || StoreSize != Stride->getValue()->getValue()) {
- // TODO: Could also handle negative stride here someday, that will require
- // the validity check in mayLoopAccessLocation to be updated though.
- // Enable this to print exact negative strides.
- if (0 && Stride && StoreSize == -Stride->getValue()->getValue()) {
- dbgs() << "NEGATIVE STRIDE: " << *SI << "\n";
- dbgs() << "BB: " << *SI->getParent();
- }
-
+ const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
+ unsigned Stride = getStoreStride(StoreEv);
+ unsigned StoreSize = getStoreSizeInBytes(SI, DL);
+ if (StoreSize != Stride && StoreSize != -Stride)
return false;
- }
+
+ bool NegStride = StoreSize == -Stride;
// See if we can optimize just this store in isolation.
if (processLoopStridedStore(StorePtr, StoreSize, SI->getAlignment(),
- StoredVal, SI, StoreEv, BECount))
+ StoredVal, SI, StoreEv, BECount, NegStride))
return true;
- // If the stored value is a strided load in the same loop with the same stride
- // this this may be transformable into a memcpy. This kicks in for stuff like
- // for (i) A[i] = B[i];
- if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
- const SCEVAddRecExpr *LoadEv =
- dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getOperand(0)));
- if (LoadEv && LoadEv->getLoop() == CurLoop && LoadEv->isAffine() &&
- StoreEv->getOperand(1) == LoadEv->getOperand(1) && LI->isSimple())
- if (processLoopStoreOfLoopLoad(SI, StoreSize, StoreEv, LoadEv, BECount))
- return true;
- }
- //errs() << "UNHANDLED strided store: " << *StoreEv << " - " << *SI << "\n";
-
- return false;
+ // Optimize the store into a memcpy, if it feeds an similarly strided load.
+ return processLoopStoreOfLoopLoad(SI, StoreSize, StoreEv, BECount, NegStride);
}
/// processLoopMemSet - See if this memset can be promoted to a large memset.
-bool LoopIdiomRecognize::
-processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) {
+bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
+ const SCEV *BECount) {
// We can only handle non-volatile memsets with a constant size.
- if (MSI->isVolatile() || !isa<ConstantInt>(MSI->getLength())) return false;
+ if (MSI->isVolatile() || !isa<ConstantInt>(MSI->getLength()))
+ return false;
// If we're not allowed to hack on memset, we fail.
if (!TLI->has(LibFunc::memset))
@@ -818,17 +441,16 @@ processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) {
return false;
return processLoopStridedStore(Pointer, (unsigned)SizeInBytes,
- MSI->getAlignment(), MSI->getValue(),
- MSI, Ev, BECount);
+ MSI->getAlignment(), MSI->getValue(), MSI, Ev,
+ BECount, /*NegStride=*/false);
}
-
/// mayLoopAccessLocation - Return true if the specified loop might access the
/// specified pointer location, which is a loop-strided access. The 'Access'
/// argument specifies what the verboten forms of access are (read or write).
-static bool mayLoopAccessLocation(Value *Ptr,AliasAnalysis::ModRefResult Access,
- Loop *L, const SCEV *BECount,
- unsigned StoreSize, AliasAnalysis &AA,
+static bool mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
+ const SCEV *BECount, unsigned StoreSize,
+ AliasAnalysis &AA,
Instruction *IgnoredStore) {
// Get the location that may be stored across the loop. Since the access is
// strided positively through memory, we say that the modified location starts
@@ -838,7 +460,7 @@ static bool mayLoopAccessLocation(Value *Ptr,AliasAnalysis::ModRefResult Access,
// If the loop iterates a fixed number of times, we can refine the access size
// to be exactly the size of the memset, which is (BECount+1)*StoreSize
if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
- AccessSize = (BECst->getValue()->getZExtValue()+1)*StoreSize;
+ AccessSize = (BECst->getValue()->getZExtValue() + 1) * StoreSize;
// TODO: For this to be really effective, we have to dive into the pointer
// operand in the store. Store to &A[i] of 100 will always return may alias
@@ -849,59 +471,31 @@ static bool mayLoopAccessLocation(Value *Ptr,AliasAnalysis::ModRefResult Access,
for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E;
++BI)
for (BasicBlock::iterator I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I)
- if (&*I != IgnoredStore &&
- (AA.getModRefInfo(I, StoreLoc) & Access))
+ if (&*I != IgnoredStore && (AA.getModRefInfo(&*I, StoreLoc) & Access))
return true;
return false;
}
-/// getMemSetPatternValue - If a strided store of the specified value is safe to
-/// turn into a memset_pattern16, return a ConstantArray of 16 bytes that should
-/// be passed in. Otherwise, return null.
-///
-/// Note that we don't ever attempt to use memset_pattern8 or 4, because these
-/// just replicate their input array and then pass on to memset_pattern16.
-static Constant *getMemSetPatternValue(Value *V, const DataLayout &DL) {
- // If the value isn't a constant, we can't promote it to being in a constant
- // array. We could theoretically do a store to an alloca or something, but
- // that doesn't seem worthwhile.
- Constant *C = dyn_cast<Constant>(V);
- if (!C) return nullptr;
-
- // Only handle simple values that are a power of two bytes in size.
- uint64_t Size = DL.getTypeSizeInBits(V->getType());
- if (Size == 0 || (Size & 7) || (Size & (Size-1)))
- return nullptr;
-
- // Don't care enough about darwin/ppc to implement this.
- if (DL.isBigEndian())
- return nullptr;
-
- // Convert to size in bytes.
- Size /= 8;
-
- // TODO: If CI is larger than 16-bytes, we can try slicing it in half to see
- // if the top and bottom are the same (e.g. for vectors and large integers).
- if (Size > 16) return nullptr;
-
- // If the constant is exactly 16 bytes, just use it.
- if (Size == 16) return C;
-
- // Otherwise, we'll use an array of the constants.
- unsigned ArraySize = 16/Size;
- ArrayType *AT = ArrayType::get(V->getType(), ArraySize);
- return ConstantArray::get(AT, std::vector<Constant*>(ArraySize, C));
+// If we have a negative stride, Start refers to the end of the memory location
+// we're trying to memset. Therefore, we need to recompute the base pointer,
+// which is just Start - BECount*Size.
+static const SCEV *getStartForNegStride(const SCEV *Start, const SCEV *BECount,
+ Type *IntPtr, unsigned StoreSize,
+ ScalarEvolution *SE) {
+ const SCEV *Index = SE->getTruncateOrZeroExtend(BECount, IntPtr);
+ if (StoreSize != 1)
+ Index = SE->getMulExpr(Index, SE->getConstant(IntPtr, StoreSize),
+ SCEV::FlagNUW);
+ return SE->getMinusSCEV(Start, Index);
}
-
/// processLoopStridedStore - We see a strided store of some value. If we can
/// transform this into a memset or memset_pattern in the loop preheader, do so.
-bool LoopIdiomRecognize::
-processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
- unsigned StoreAlignment, Value *StoredVal,
- Instruction *TheStore, const SCEVAddRecExpr *Ev,
- const SCEV *BECount) {
+bool LoopIdiomRecognize::processLoopStridedStore(
+ Value *DestPtr, unsigned StoreSize, unsigned StoreAlignment,
+ Value *StoredVal, Instruction *TheStore, const SCEVAddRecExpr *Ev,
+ const SCEV *BECount, bool NegStride) {
// If the stored value is a byte-wise value (like i32 -1), then it may be
// turned into a memset of i8 -1, assuming that all the consecutive bytes
@@ -909,7 +503,6 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
// but it can be turned into memset_pattern if the target supports it.
Value *SplatValue = isBytewiseValue(StoredVal);
Constant *PatternValue = nullptr;
- auto &DL = CurLoop->getHeader()->getModule()->getDataLayout();
unsigned DestAS = DestPtr->getType()->getPointerAddressSpace();
// If we're allowed to form a memset, and the stored value would be acceptable
@@ -936,9 +529,15 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
// header. This allows us to insert code for it in the preheader.
BasicBlock *Preheader = CurLoop->getLoopPreheader();
IRBuilder<> Builder(Preheader->getTerminator());
- SCEVExpander Expander(*SE, DL, "loop-idiom");
+ SCEVExpander Expander(*SE, *DL, "loop-idiom");
Type *DestInt8PtrTy = Builder.getInt8PtrTy(DestAS);
+ Type *IntPtr = Builder.getIntPtrTy(*DL, DestAS);
+
+ const SCEV *Start = Ev->getStart();
+ // Handle negative strided loops.
+ if (NegStride)
+ Start = getStartForNegStride(Start, BECount, IntPtr, StoreSize, SE);
// Okay, we have a strided store "p[i]" of a splattable value. We can turn
// this into a memset in the loop preheader now if we want. However, this
@@ -946,12 +545,9 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
// or write to the aliased location. Check for any overlap by generating the
// base pointer and checking the region.
Value *BasePtr =
- Expander.expandCodeFor(Ev->getStart(), DestInt8PtrTy,
- Preheader->getTerminator());
-
- if (mayLoopAccessLocation(BasePtr, AliasAnalysis::ModRef,
- CurLoop, BECount,
- StoreSize, getAnalysis<AliasAnalysis>(), TheStore)) {
+ Expander.expandCodeFor(Start, DestInt8PtrTy, Preheader->getTerminator());
+ if (mayLoopAccessLocation(BasePtr, MRI_ModRef, CurLoop, BECount, StoreSize,
+ *AA, TheStore)) {
Expander.clear();
// If we generated new code for the base pointer, clean up.
RecursivelyDeleteTriviallyDeadInstructions(BasePtr, TLI);
@@ -962,36 +558,30 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
// The # stored bytes is (BECount+1)*Size. Expand the trip count out to
// pointer size if it isn't already.
- Type *IntPtr = Builder.getIntPtrTy(DL, DestAS);
BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
- const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
- SCEV::FlagNUW);
+ const SCEV *NumBytesS =
+ SE->getAddExpr(BECount, SE->getOne(IntPtr), SCEV::FlagNUW);
if (StoreSize != 1) {
NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
SCEV::FlagNUW);
}
Value *NumBytes =
- Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
+ Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
CallInst *NewCall;
if (SplatValue) {
- NewCall = Builder.CreateMemSet(BasePtr,
- SplatValue,
- NumBytes,
- StoreAlignment);
+ NewCall =
+ Builder.CreateMemSet(BasePtr, SplatValue, NumBytes, StoreAlignment);
} else {
// Everything is emitted in default address space
Type *Int8PtrTy = DestInt8PtrTy;
- Module *M = TheStore->getParent()->getParent()->getParent();
- Value *MSP = M->getOrInsertFunction("memset_pattern16",
- Builder.getVoidTy(),
- Int8PtrTy,
- Int8PtrTy,
- IntPtr,
- (void*)nullptr);
+ Module *M = TheStore->getModule();
+ Value *MSP =
+ M->getOrInsertFunction("memset_pattern16", Builder.getVoidTy(),
+ Int8PtrTy, Int8PtrTy, IntPtr, (void *)nullptr);
// Otherwise we should form a memset_pattern16. PatternValue is known to be
// an constant array of 16-bytes. Plop the value into a mergable global.
@@ -1015,26 +605,47 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
return true;
}
-/// processLoopStoreOfLoopLoad - We see a strided store whose value is a
-/// same-strided load.
-bool LoopIdiomRecognize::
-processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
- const SCEVAddRecExpr *StoreEv,
- const SCEVAddRecExpr *LoadEv,
- const SCEV *BECount) {
+/// If the stored value is a strided load in the same loop with the same stride
+/// this may be transformable into a memcpy. This kicks in for stuff like
+/// for (i) A[i] = B[i];
+bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
+ StoreInst *SI, unsigned StoreSize, const SCEVAddRecExpr *StoreEv,
+ const SCEV *BECount, bool NegStride) {
// If we're not allowed to form memcpy, we fail.
if (!TLI->has(LibFunc::memcpy))
return false;
- LoadInst *LI = cast<LoadInst>(SI->getValueOperand());
+ // The store must be feeding a non-volatile load.
+ LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand());
+ if (!LI || !LI->isSimple())
+ return false;
+
+ // See if the pointer expression is an AddRec like {base,+,1} on the current
+ // loop, which indicates a strided load. If we have something else, it's a
+ // random load we can't handle.
+ const SCEVAddRecExpr *LoadEv =
+ dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand()));
+ if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())
+ return false;
+
+ // The store and load must share the same stride.
+ if (StoreEv->getOperand(1) != LoadEv->getOperand(1))
+ return false;
// The trip count of the loop and the base pointer of the addrec SCEV is
// guaranteed to be loop invariant, which means that it should dominate the
// header. This allows us to insert code for it in the preheader.
BasicBlock *Preheader = CurLoop->getLoopPreheader();
IRBuilder<> Builder(Preheader->getTerminator());
- const DataLayout &DL = Preheader->getModule()->getDataLayout();
- SCEVExpander Expander(*SE, DL, "loop-idiom");
+ SCEVExpander Expander(*SE, *DL, "loop-idiom");
+
+ const SCEV *StrStart = StoreEv->getStart();
+ unsigned StrAS = SI->getPointerAddressSpace();
+ Type *IntPtrTy = Builder.getIntPtrTy(*DL, StrAS);
+
+ // Handle negative strided loops.
+ if (NegStride)
+ StrStart = getStartForNegStride(StrStart, BECount, IntPtrTy, StoreSize, SE);
// Okay, we have a strided store "p[i]" of a loaded value. We can turn
// this into a memcpy in the loop preheader now if we want. However, this
@@ -1042,29 +653,31 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
// or write the memory region we're storing to. This includes the load that
// feeds the stores. Check for an alias by generating the base address and
// checking everything.
- Value *StoreBasePtr =
- Expander.expandCodeFor(StoreEv->getStart(),
- Builder.getInt8PtrTy(SI->getPointerAddressSpace()),
- Preheader->getTerminator());
-
- if (mayLoopAccessLocation(StoreBasePtr, AliasAnalysis::ModRef,
- CurLoop, BECount, StoreSize,
- getAnalysis<AliasAnalysis>(), SI)) {
+ Value *StoreBasePtr = Expander.expandCodeFor(
+ StrStart, Builder.getInt8PtrTy(StrAS), Preheader->getTerminator());
+
+ if (mayLoopAccessLocation(StoreBasePtr, MRI_ModRef, CurLoop, BECount,
+ StoreSize, *AA, SI)) {
Expander.clear();
// If we generated new code for the base pointer, clean up.
RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI);
return false;
}
+ const SCEV *LdStart = LoadEv->getStart();
+ unsigned LdAS = LI->getPointerAddressSpace();
+
+ // Handle negative strided loops.
+ if (NegStride)
+ LdStart = getStartForNegStride(LdStart, BECount, IntPtrTy, StoreSize, SE);
+
// For a memcpy, we have to make sure that the input array is not being
// mutated by the loop.
- Value *LoadBasePtr =
- Expander.expandCodeFor(LoadEv->getStart(),
- Builder.getInt8PtrTy(LI->getPointerAddressSpace()),
- Preheader->getTerminator());
+ Value *LoadBasePtr = Expander.expandCodeFor(
+ LdStart, Builder.getInt8PtrTy(LdAS), Preheader->getTerminator());
- if (mayLoopAccessLocation(LoadBasePtr, AliasAnalysis::Mod, CurLoop, BECount,
- StoreSize, getAnalysis<AliasAnalysis>(), SI)) {
+ if (mayLoopAccessLocation(LoadBasePtr, MRI_Mod, CurLoop, BECount, StoreSize,
+ *AA, SI)) {
Expander.clear();
// If we generated new code for the base pointer, clean up.
RecursivelyDeleteTriviallyDeadInstructions(LoadBasePtr, TLI);
@@ -1074,34 +687,368 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
// Okay, everything is safe, we can transform this!
-
// The # stored bytes is (BECount+1)*Size. Expand the trip count out to
// pointer size if it isn't already.
- Type *IntPtrTy = Builder.getIntPtrTy(DL, SI->getPointerAddressSpace());
BECount = SE->getTruncateOrZeroExtend(BECount, IntPtrTy);
- const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtrTy, 1),
- SCEV::FlagNUW);
+ const SCEV *NumBytesS =
+ SE->getAddExpr(BECount, SE->getOne(IntPtrTy), SCEV::FlagNUW);
if (StoreSize != 1)
NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtrTy, StoreSize),
SCEV::FlagNUW);
Value *NumBytes =
- Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator());
+ Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator());
CallInst *NewCall =
- Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes,
- std::min(SI->getAlignment(), LI->getAlignment()));
+ Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes,
+ std::min(SI->getAlignment(), LI->getAlignment()));
NewCall->setDebugLoc(SI->getDebugLoc());
DEBUG(dbgs() << " Formed memcpy: " << *NewCall << "\n"
<< " from load ptr=" << *LoadEv << " at: " << *LI << "\n"
<< " from store ptr=" << *StoreEv << " at: " << *SI << "\n");
-
- // Okay, the memset has been formed. Zap the original store and anything that
+ // Okay, the memcpy has been formed. Zap the original store and anything that
// feeds into it.
deleteDeadInstruction(SI, TLI);
++NumMemCpy;
return true;
}
+
+bool LoopIdiomRecognize::runOnNoncountableLoop() {
+ return recognizePopcount();
+}
+
+/// Check if the given conditional branch is based on the comparison between
+/// a variable and zero, and if the variable is non-zero, the control yields to
+/// the loop entry. If the branch matches the behavior, the variable involved
+/// in the comparion is returned. This function will be called to see if the
+/// precondition and postcondition of the loop are in desirable form.
+static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry) {
+ if (!BI || !BI->isConditional())
+ return nullptr;
+
+ ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
+ if (!Cond)
+ return nullptr;
+
+ ConstantInt *CmpZero = dyn_cast<ConstantInt>(Cond->getOperand(1));
+ if (!CmpZero || !CmpZero->isZero())
+ return nullptr;
+
+ ICmpInst::Predicate Pred = Cond->getPredicate();
+ if ((Pred == ICmpInst::ICMP_NE && BI->getSuccessor(0) == LoopEntry) ||
+ (Pred == ICmpInst::ICMP_EQ && BI->getSuccessor(1) == LoopEntry))
+ return Cond->getOperand(0);
+
+ return nullptr;
+}
+
+/// Return true iff the idiom is detected in the loop.
+///
+/// Additionally:
+/// 1) \p CntInst is set to the instruction counting the population bit.
+/// 2) \p CntPhi is set to the corresponding phi node.
+/// 3) \p Var is set to the value whose population bits are being counted.
+///
+/// The core idiom we are trying to detect is:
+/// \code
+/// if (x0 != 0)
+/// goto loop-exit // the precondition of the loop
+/// cnt0 = init-val;
+/// do {
+/// x1 = phi (x0, x2);
+/// cnt1 = phi(cnt0, cnt2);
+///
+/// cnt2 = cnt1 + 1;
+/// ...
+/// x2 = x1 & (x1 - 1);
+/// ...
+/// } while(x != 0);
+///
+/// loop-exit:
+/// \endcode
+static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB,
+ Instruction *&CntInst, PHINode *&CntPhi,
+ Value *&Var) {
+ // step 1: Check to see if the look-back branch match this pattern:
+ // "if (a!=0) goto loop-entry".
+ BasicBlock *LoopEntry;
+ Instruction *DefX2, *CountInst;
+ Value *VarX1, *VarX0;
+ PHINode *PhiX, *CountPhi;
+
+ DefX2 = CountInst = nullptr;
+ VarX1 = VarX0 = nullptr;
+ PhiX = CountPhi = nullptr;
+ LoopEntry = *(CurLoop->block_begin());
+
+ // step 1: Check if the loop-back branch is in desirable form.
+ {
+ if (Value *T = matchCondition(
+ dyn_cast<BranchInst>(LoopEntry->getTerminator()), LoopEntry))
+ DefX2 = dyn_cast<Instruction>(T);
+ else
+ return false;
+ }
+
+ // step 2: detect instructions corresponding to "x2 = x1 & (x1 - 1)"
+ {
+ if (!DefX2 || DefX2->getOpcode() != Instruction::And)
+ return false;
+
+ BinaryOperator *SubOneOp;
+
+ if ((SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(0))))
+ VarX1 = DefX2->getOperand(1);
+ else {
+ VarX1 = DefX2->getOperand(0);
+ SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(1));
+ }
+ if (!SubOneOp)
+ return false;
+
+ Instruction *SubInst = cast<Instruction>(SubOneOp);
+ ConstantInt *Dec = dyn_cast<ConstantInt>(SubInst->getOperand(1));
+ if (!Dec ||
+ !((SubInst->getOpcode() == Instruction::Sub && Dec->isOne()) ||
+ (SubInst->getOpcode() == Instruction::Add &&
+ Dec->isAllOnesValue()))) {
+ return false;
+ }
+ }
+
+ // step 3: Check the recurrence of variable X
+ {
+ PhiX = dyn_cast<PHINode>(VarX1);
+ if (!PhiX ||
+ (PhiX->getOperand(0) != DefX2 && PhiX->getOperand(1) != DefX2)) {
+ return false;
+ }
+ }
+
+ // step 4: Find the instruction which count the population: cnt2 = cnt1 + 1
+ {
+ CountInst = nullptr;
+ for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI()->getIterator(),
+ IterE = LoopEntry->end();
+ Iter != IterE; Iter++) {
+ Instruction *Inst = &*Iter;
+ if (Inst->getOpcode() != Instruction::Add)
+ continue;
+
+ ConstantInt *Inc = dyn_cast<ConstantInt>(Inst->getOperand(1));
+ if (!Inc || !Inc->isOne())
+ continue;
+
+ PHINode *Phi = dyn_cast<PHINode>(Inst->getOperand(0));
+ if (!Phi || Phi->getParent() != LoopEntry)
+ continue;
+
+ // Check if the result of the instruction is live of the loop.
+ bool LiveOutLoop = false;
+ for (User *U : Inst->users()) {
+ if ((cast<Instruction>(U))->getParent() != LoopEntry) {
+ LiveOutLoop = true;
+ break;
+ }
+ }
+
+ if (LiveOutLoop) {
+ CountInst = Inst;
+ CountPhi = Phi;
+ break;
+ }
+ }
+
+ if (!CountInst)
+ return false;
+ }
+
+ // step 5: check if the precondition is in this form:
+ // "if (x != 0) goto loop-head ; else goto somewhere-we-don't-care;"
+ {
+ auto *PreCondBr = dyn_cast<BranchInst>(PreCondBB->getTerminator());
+ Value *T = matchCondition(PreCondBr, CurLoop->getLoopPreheader());
+ if (T != PhiX->getOperand(0) && T != PhiX->getOperand(1))
+ return false;
+
+ CntInst = CountInst;
+ CntPhi = CountPhi;
+ Var = T;
+ }
+
+ return true;
+}
+
+/// Recognizes a population count idiom in a non-countable loop.
+///
+/// If detected, transforms the relevant code to issue the popcount intrinsic
+/// function call, and returns true; otherwise, returns false.
+bool LoopIdiomRecognize::recognizePopcount() {
+ if (TTI->getPopcntSupport(32) != TargetTransformInfo::PSK_FastHardware)
+ return false;
+
+ // Counting population are usually conducted by few arithmetic instructions.
+ // Such instructions can be easily "absorbed" by vacant slots in a
+ // non-compact loop. Therefore, recognizing popcount idiom only makes sense
+ // in a compact loop.
+
+ // Give up if the loop has multiple blocks or multiple backedges.
+ if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
+ return false;
+
+ BasicBlock *LoopBody = *(CurLoop->block_begin());
+ if (LoopBody->size() >= 20) {
+ // The loop is too big, bail out.
+ return false;
+ }
+
+ // It should have a preheader containing nothing but an unconditional branch.
+ BasicBlock *PH = CurLoop->getLoopPreheader();
+ if (!PH)
+ return false;
+ if (&PH->front() != PH->getTerminator())
+ return false;
+ auto *EntryBI = dyn_cast<BranchInst>(PH->getTerminator());
+ if (!EntryBI || EntryBI->isConditional())
+ return false;
+
+ // It should have a precondition block where the generated popcount instrinsic
+ // function can be inserted.
+ auto *PreCondBB = PH->getSinglePredecessor();
+ if (!PreCondBB)
+ return false;
+ auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator());
+ if (!PreCondBI || PreCondBI->isUnconditional())
+ return false;
+
+ Instruction *CntInst;
+ PHINode *CntPhi;
+ Value *Val;
+ if (!detectPopcountIdiom(CurLoop, PreCondBB, CntInst, CntPhi, Val))
+ return false;
+
+ transformLoopToPopcount(PreCondBB, CntInst, CntPhi, Val);
+ return true;
+}
+
+static CallInst *createPopcntIntrinsic(IRBuilder<> &IRBuilder, Value *Val,
+ DebugLoc DL) {
+ Value *Ops[] = {Val};
+ Type *Tys[] = {Val->getType()};
+
+ Module *M = IRBuilder.GetInsertBlock()->getParent()->getParent();
+ Value *Func = Intrinsic::getDeclaration(M, Intrinsic::ctpop, Tys);
+ CallInst *CI = IRBuilder.CreateCall(Func, Ops);
+ CI->setDebugLoc(DL);
+
+ return CI;
+}
+
+void LoopIdiomRecognize::transformLoopToPopcount(BasicBlock *PreCondBB,
+ Instruction *CntInst,
+ PHINode *CntPhi, Value *Var) {
+ BasicBlock *PreHead = CurLoop->getLoopPreheader();
+ auto *PreCondBr = dyn_cast<BranchInst>(PreCondBB->getTerminator());
+ const DebugLoc DL = CntInst->getDebugLoc();
+
+ // Assuming before transformation, the loop is following:
+ // if (x) // the precondition
+ // do { cnt++; x &= x - 1; } while(x);
+
+ // Step 1: Insert the ctpop instruction at the end of the precondition block
+ IRBuilder<> Builder(PreCondBr);
+ Value *PopCnt, *PopCntZext, *NewCount, *TripCnt;
+ {
+ PopCnt = createPopcntIntrinsic(Builder, Var, DL);
+ NewCount = PopCntZext =
+ Builder.CreateZExtOrTrunc(PopCnt, cast<IntegerType>(CntPhi->getType()));
+
+ if (NewCount != PopCnt)
+ (cast<Instruction>(NewCount))->setDebugLoc(DL);
+
+ // TripCnt is exactly the number of iterations the loop has
+ TripCnt = NewCount;
+
+ // If the population counter's initial value is not zero, insert Add Inst.
+ Value *CntInitVal = CntPhi->getIncomingValueForBlock(PreHead);
+ ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
+ if (!InitConst || !InitConst->isZero()) {
+ NewCount = Builder.CreateAdd(NewCount, CntInitVal);
+ (cast<Instruction>(NewCount))->setDebugLoc(DL);
+ }
+ }
+
+ // Step 2: Replace the precondition from "if (x == 0) goto loop-exit" to
+ // "if (NewCount == 0) loop-exit". Without this change, the intrinsic
+ // function would be partial dead code, and downstream passes will drag
+ // it back from the precondition block to the preheader.
+ {
+ ICmpInst *PreCond = cast<ICmpInst>(PreCondBr->getCondition());
+
+ Value *Opnd0 = PopCntZext;
+ Value *Opnd1 = ConstantInt::get(PopCntZext->getType(), 0);
+ if (PreCond->getOperand(0) != Var)
+ std::swap(Opnd0, Opnd1);
+
+ ICmpInst *NewPreCond = cast<ICmpInst>(
+ Builder.CreateICmp(PreCond->getPredicate(), Opnd0, Opnd1));
+ PreCondBr->setCondition(NewPreCond);
+
+ RecursivelyDeleteTriviallyDeadInstructions(PreCond, TLI);
+ }
+
+ // Step 3: Note that the population count is exactly the trip count of the
+ // loop in question, which enable us to to convert the loop from noncountable
+ // loop into a countable one. The benefit is twofold:
+ //
+ // - If the loop only counts population, the entire loop becomes dead after
+ // the transformation. It is a lot easier to prove a countable loop dead
+ // than to prove a noncountable one. (In some C dialects, an infinite loop
+ // isn't dead even if it computes nothing useful. In general, DCE needs
+ // to prove a noncountable loop finite before safely delete it.)
+ //
+ // - If the loop also performs something else, it remains alive.
+ // Since it is transformed to countable form, it can be aggressively
+ // optimized by some optimizations which are in general not applicable
+ // to a noncountable loop.
+ //
+ // After this step, this loop (conceptually) would look like following:
+ // newcnt = __builtin_ctpop(x);
+ // t = newcnt;
+ // if (x)
+ // do { cnt++; x &= x-1; t--) } while (t > 0);
+ BasicBlock *Body = *(CurLoop->block_begin());
+ {
+ auto *LbBr = dyn_cast<BranchInst>(Body->getTerminator());
+ ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition());
+ Type *Ty = TripCnt->getType();
+
+ PHINode *TcPhi = PHINode::Create(Ty, 2, "tcphi", &Body->front());
+
+ Builder.SetInsertPoint(LbCond);
+ Instruction *TcDec = cast<Instruction>(
+ Builder.CreateSub(TcPhi, ConstantInt::get(Ty, 1),
+ "tcdec", false, true));
+
+ TcPhi->addIncoming(TripCnt, PreHead);
+ TcPhi->addIncoming(TcDec, Body);
+
+ CmpInst::Predicate Pred =
+ (LbBr->getSuccessor(0) == Body) ? CmpInst::ICMP_UGT : CmpInst::ICMP_SLE;
+ LbCond->setPredicate(Pred);
+ LbCond->setOperand(0, TcDec);
+ LbCond->setOperand(1, ConstantInt::get(Ty, 0));
+ }
+
+ // Step 4: All the references to the original population counter outside
+ // the loop are replaced with the NewCount -- the value returned from
+ // __builtin_ctpop().
+ CntInst->replaceUsesOutsideBlock(NewCount, Body);
+
+ // step 5: Forget the "non-computable" trip-count SCEV associated with the
+ // loop. The loop would otherwise not be deleted even if it becomes empty.
+ SE->forgetLoop(CurLoop);
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
index e125026..b4102fe 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -48,7 +48,7 @@ namespace {
AU.addRequiredID(LoopSimplifyID);
AU.addPreservedID(LoopSimplifyID);
AU.addPreservedID(LCSSAID);
- AU.addPreserved<ScalarEvolution>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
}
};
@@ -112,7 +112,7 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// Simplify instructions in the current basic block.
for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
- Instruction *I = BI++;
+ Instruction *I = &*BI++;
// The first time through the loop ToSimplify is empty and we try to
// simplify all instructions. On later iterations ToSimplify is not
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index 9d7e57f..4295235 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -99,7 +99,7 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
return false;
if (St && !St->isSimple())
return false;
- MemInstr.push_back(I);
+ MemInstr.push_back(&*I);
}
}
@@ -176,7 +176,7 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
}
}
- // We don't have a DepMatrix to check legality return false
+ // We don't have a DepMatrix to check legality return false.
if (DepMatrix.size() == 0)
return false;
return true;
@@ -331,9 +331,9 @@ static PHINode *getInductionVariable(Loop *L, ScalarEvolution *SE) {
class LoopInterchangeLegality {
public:
LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
- LoopInterchange *Pass)
- : OuterLoop(Outer), InnerLoop(Inner), SE(SE), CurrentPass(Pass),
- InnerLoopHasReduction(false) {}
+ LoopInfo *LI, DominatorTree *DT, bool PreserveLCSSA)
+ : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT),
+ PreserveLCSSA(PreserveLCSSA), InnerLoopHasReduction(false) {}
/// Check if the loops can be interchanged.
bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId,
@@ -357,9 +357,10 @@ private:
Loop *OuterLoop;
Loop *InnerLoop;
- /// Scev analysis.
ScalarEvolution *SE;
- LoopInterchange *CurrentPass;
+ LoopInfo *LI;
+ DominatorTree *DT;
+ bool PreserveLCSSA;
bool InnerLoopHasReduction;
};
@@ -371,7 +372,7 @@ public:
LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE)
: OuterLoop(Outer), InnerLoop(Inner), SE(SE) {}
- /// Check if the loop interchange is profitable
+ /// Check if the loop interchange is profitable.
bool isProfitable(unsigned InnerLoopId, unsigned OuterLoopId,
CharMatrix &DepMatrix);
@@ -385,12 +386,12 @@ private:
ScalarEvolution *SE;
};
-/// LoopInterchangeTransform interchanges the loop
+/// LoopInterchangeTransform interchanges the loop.
class LoopInterchangeTransform {
public:
LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
LoopInfo *LI, DominatorTree *DT,
- LoopInterchange *Pass, BasicBlock *LoopNestExit,
+ BasicBlock *LoopNestExit,
bool InnerLoopContainsReductions)
: OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT),
LoopExit(LoopNestExit),
@@ -424,21 +425,22 @@ private:
bool InnerLoopHasReduction;
};
-// Main LoopInterchange Pass
+// Main LoopInterchange Pass.
struct LoopInterchange : public FunctionPass {
static char ID;
ScalarEvolution *SE;
LoopInfo *LI;
DependenceAnalysis *DA;
DominatorTree *DT;
+ bool PreserveLCSSA;
LoopInterchange()
: FunctionPass(ID), SE(nullptr), LI(nullptr), DA(nullptr), DT(nullptr) {
initializeLoopInterchangePass(*PassRegistry::getPassRegistry());
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<ScalarEvolution>();
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
AU.addRequired<DependenceAnalysis>();
@@ -447,11 +449,13 @@ struct LoopInterchange : public FunctionPass {
}
bool runOnFunction(Function &F) override {
- SE = &getAnalysis<ScalarEvolution>();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
DA = &getAnalysis<DependenceAnalysis>();
auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
DT = DTWP ? &DTWP->getDomTree() : nullptr;
+ PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
+
// Build up a worklist of loop pairs to analyze.
SmallVector<LoopVector, 8> Worklist;
@@ -489,7 +493,7 @@ struct LoopInterchange : public FunctionPass {
unsigned selectLoopForInterchange(LoopVector LoopList) {
// TODO: Add a better heuristic to select the loop to be interchanged based
- // on the dependece matrix. Currently we select the innermost loop.
+ // on the dependence matrix. Currently we select the innermost loop.
return LoopList.size() - 1;
}
@@ -544,7 +548,7 @@ struct LoopInterchange : public FunctionPass {
}
unsigned SelecLoopId = selectLoopForInterchange(LoopList);
- // Move the selected loop outwards to the best posible position.
+ // Move the selected loop outwards to the best possible position.
for (unsigned i = SelecLoopId; i > 0; i--) {
bool Interchanged =
processLoop(LoopList, i, i - 1, LoopNestExit, DependencyMatrix);
@@ -574,7 +578,8 @@ struct LoopInterchange : public FunctionPass {
Loop *InnerLoop = LoopList[InnerLoopId];
Loop *OuterLoop = LoopList[OuterLoopId];
- LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, this);
+ LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, LI, DT,
+ PreserveLCSSA);
if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
DEBUG(dbgs() << "Not interchanging Loops. Cannot prove legality\n");
return false;
@@ -586,7 +591,7 @@ struct LoopInterchange : public FunctionPass {
return false;
}
- LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, this,
+ LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT,
LoopNestExit, LIL.hasInnerLoopReduction());
LIT.transform();
DEBUG(dbgs() << "Loops interchanged\n");
@@ -655,7 +660,7 @@ bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch \n");
// We do not have any basic block in between now make sure the outer header
- // and outer loop latch doesnt contain any unsafe instructions.
+ // and outer loop latch doesn't contain any unsafe instructions.
if (containsUnsafeInstructionsInHeader(OuterLoopHeader) ||
containsUnsafeInstructionsInLatch(OuterLoopLatch))
return false;
@@ -698,9 +703,9 @@ bool LoopInterchangeLegality::findInductionAndReductions(
return false;
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
RecurrenceDescriptor RD;
+ InductionDescriptor ID;
PHINode *PHI = cast<PHINode>(I);
- ConstantInt *StepValue = nullptr;
- if (isInductionPHI(PHI, SE, StepValue))
+ if (InductionDescriptor::isInductionPHI(PHI, SE, ID))
Inductions.push_back(PHI);
else if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD))
Reductions.push_back(PHI);
@@ -836,7 +841,7 @@ bool LoopInterchangeLegality::currentLimitations() {
else
FoundInduction = true;
}
- // The loop latch ended and we didnt find the induction variable return as
+ // The loop latch ended and we didn't find the induction variable return as
// current limitation.
if (!FoundInduction)
return true;
@@ -867,12 +872,14 @@ bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
if (!OuterLoopPreHeader || OuterLoopPreHeader == OuterLoop->getHeader() ||
isa<PHINode>(OuterLoopPreHeader->begin()) ||
!OuterLoopPreHeader->getUniquePredecessor()) {
- OuterLoopPreHeader = InsertPreheaderForLoop(OuterLoop, CurrentPass);
+ OuterLoopPreHeader =
+ InsertPreheaderForLoop(OuterLoop, DT, LI, PreserveLCSSA);
}
if (!InnerLoopPreHeader || InnerLoopPreHeader == InnerLoop->getHeader() ||
InnerLoopPreHeader == OuterLoop->getHeader()) {
- InnerLoopPreHeader = InsertPreheaderForLoop(InnerLoop, CurrentPass);
+ InnerLoopPreHeader =
+ InsertPreheaderForLoop(InnerLoop, DT, LI, PreserveLCSSA);
}
// TODO: The loops could not be interchanged due to current limitations in the
@@ -966,7 +973,7 @@ bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId,
unsigned OuterLoopId,
CharMatrix &DepMatrix) {
- // TODO: Add Better Profitibility checks.
+ // TODO: Add better profitability checks.
// e.g
// 1) Construct dependency matrix and move the one with no loop carried dep
// inside to enable vectorization.
@@ -980,7 +987,7 @@ bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId,
if (Cost < 0)
return true;
- // It is not profitable as per current cache profitibility model. But check if
+ // It is not profitable as per current cache profitability model. But check if
// we can move this loop outside to improve parallelism.
bool ImprovesPar =
isProfitabileForVectorization(InnerLoopId, OuterLoopId, DepMatrix);
@@ -996,7 +1003,7 @@ void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop,
return;
}
}
- assert(false && "Couldn't find loop");
+ llvm_unreachable("Couldn't find loop");
}
void LoopInterchangeTransform::restructureLoops(Loop *InnerLoop,
@@ -1045,7 +1052,7 @@ bool LoopInterchangeTransform::transform() {
splitInnerLoopLatch(InnerIndexVar);
DEBUG(dbgs() << "splitInnerLoopLatch Done\n");
- // Splits the inner loops phi nodes out into a seperate basic block.
+ // Splits the inner loops phi nodes out into a separate basic block.
splitInnerLoopHeader();
DEBUG(dbgs() << "splitInnerLoopHeader Done\n");
}
@@ -1113,8 +1120,8 @@ static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) {
auto &ToList = InsertBefore->getParent()->getInstList();
auto &FromList = FromBB->getInstList();
- ToList.splice(InsertBefore, FromList, FromList.begin(),
- FromBB->getTerminator());
+ ToList.splice(InsertBefore->getIterator(), FromList, FromList.begin(),
+ FromBB->getTerminator()->getIterator());
}
void LoopInterchangeTransform::adjustOuterLoopPreheader() {
@@ -1181,8 +1188,8 @@ bool LoopInterchangeTransform::adjustLoopBranches() {
if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI)
return false;
- BasicBlock *InnerLoopHeaderSucessor = InnerLoopHeader->getUniqueSuccessor();
- if (!InnerLoopHeaderSucessor)
+ BasicBlock *InnerLoopHeaderSuccessor = InnerLoopHeader->getUniqueSuccessor();
+ if (!InnerLoopHeaderSuccessor)
return false;
// Adjust Loop Preheader and headers
@@ -1198,11 +1205,11 @@ bool LoopInterchangeTransform::adjustLoopBranches() {
if (OuterLoopHeaderBI->getSuccessor(i) == OuterLoopLatch)
OuterLoopHeaderBI->setSuccessor(i, LoopExit);
else if (OuterLoopHeaderBI->getSuccessor(i) == InnerLoopPreHeader)
- OuterLoopHeaderBI->setSuccessor(i, InnerLoopHeaderSucessor);
+ OuterLoopHeaderBI->setSuccessor(i, InnerLoopHeaderSuccessor);
}
// Adjust reduction PHI's now that the incoming block has changed.
- updateIncomingBlock(InnerLoopHeaderSucessor, InnerLoopHeader,
+ updateIncomingBlock(InnerLoopHeaderSuccessor, InnerLoopHeader,
OuterLoopHeader);
BranchInst::Create(OuterLoopPreHeader, InnerLoopHeaderBI);
@@ -1286,10 +1293,10 @@ bool LoopInterchangeTransform::adjustLoopLinks() {
char LoopInterchange::ID = 0;
INITIALIZE_PASS_BEGIN(LoopInterchange, "loop-interchange",
"Interchanges loops for cache reuse", false, false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DependenceAnalysis)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
new file mode 100644
index 0000000..1064d08
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
@@ -0,0 +1,566 @@
+//===- LoopLoadElimination.cpp - Loop Load Elimination Pass ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implement a loop-aware load elimination pass.
+//
+// It uses LoopAccessAnalysis to identify loop-carried dependences with a
+// distance of one between stores and loads. These form the candidates for the
+// transformation. The source value of each store then propagated to the user
+// of the corresponding load. This makes the load dead.
+//
+// The pass can also version the loop and add memchecks in order to prove that
+// may-aliasing stores can't change the value in memory before it's read by the
+// load.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/LoopAccessAnalysis.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Utils/LoopVersioning.h"
+#include <forward_list>
+
+#define LLE_OPTION "loop-load-elim"
+#define DEBUG_TYPE LLE_OPTION
+
+using namespace llvm;
+
+static cl::opt<unsigned> CheckPerElim(
+ "runtime-check-per-loop-load-elim", cl::Hidden,
+ cl::desc("Max number of memchecks allowed per eliminated load on average"),
+ cl::init(1));
+
+static cl::opt<unsigned> LoadElimSCEVCheckThreshold(
+ "loop-load-elimination-scev-check-threshold", cl::init(8), cl::Hidden,
+ cl::desc("The maximum number of SCEV checks allowed for Loop "
+ "Load Elimination"));
+
+
+STATISTIC(NumLoopLoadEliminted, "Number of loads eliminated by LLE");
+
+namespace {
+
+/// \brief Represent a store-to-forwarding candidate.
+struct StoreToLoadForwardingCandidate {
+ LoadInst *Load;
+ StoreInst *Store;
+
+ StoreToLoadForwardingCandidate(LoadInst *Load, StoreInst *Store)
+ : Load(Load), Store(Store) {}
+
+ /// \brief Return true if the dependence from the store to the load has a
+ /// distance of one. E.g. A[i+1] = A[i]
+ bool isDependenceDistanceOfOne(PredicatedScalarEvolution &PSE) const {
+ Value *LoadPtr = Load->getPointerOperand();
+ Value *StorePtr = Store->getPointerOperand();
+ Type *LoadPtrType = LoadPtr->getType();
+ Type *LoadType = LoadPtrType->getPointerElementType();
+
+ assert(LoadPtrType->getPointerAddressSpace() ==
+ StorePtr->getType()->getPointerAddressSpace() &&
+ LoadType == StorePtr->getType()->getPointerElementType() &&
+ "Should be a known dependence");
+
+ auto &DL = Load->getParent()->getModule()->getDataLayout();
+ unsigned TypeByteSize = DL.getTypeAllocSize(const_cast<Type *>(LoadType));
+
+ auto *LoadPtrSCEV = cast<SCEVAddRecExpr>(PSE.getSCEV(LoadPtr));
+ auto *StorePtrSCEV = cast<SCEVAddRecExpr>(PSE.getSCEV(StorePtr));
+
+ // We don't need to check non-wrapping here because forward/backward
+ // dependence wouldn't be valid if these weren't monotonic accesses.
+ auto *Dist = cast<SCEVConstant>(
+ PSE.getSE()->getMinusSCEV(StorePtrSCEV, LoadPtrSCEV));
+ const APInt &Val = Dist->getAPInt();
+ return Val.abs() == TypeByteSize;
+ }
+
+ Value *getLoadPtr() const { return Load->getPointerOperand(); }
+
+#ifndef NDEBUG
+ friend raw_ostream &operator<<(raw_ostream &OS,
+ const StoreToLoadForwardingCandidate &Cand) {
+ OS << *Cand.Store << " -->\n";
+ OS.indent(2) << *Cand.Load << "\n";
+ return OS;
+ }
+#endif
+};
+
+/// \brief Check if the store dominates all latches, so as long as there is no
+/// intervening store this value will be loaded in the next iteration.
+bool doesStoreDominatesAllLatches(BasicBlock *StoreBlock, Loop *L,
+ DominatorTree *DT) {
+ SmallVector<BasicBlock *, 8> Latches;
+ L->getLoopLatches(Latches);
+ return std::all_of(Latches.begin(), Latches.end(),
+ [&](const BasicBlock *Latch) {
+ return DT->dominates(StoreBlock, Latch);
+ });
+}
+
+/// \brief The per-loop class that does most of the work.
+class LoadEliminationForLoop {
+public:
+ LoadEliminationForLoop(Loop *L, LoopInfo *LI, const LoopAccessInfo &LAI,
+ DominatorTree *DT)
+ : L(L), LI(LI), LAI(LAI), DT(DT), PSE(LAI.PSE) {}
+
+ /// \brief Look through the loop-carried and loop-independent dependences in
+ /// this loop and find store->load dependences.
+ ///
+ /// Note that no candidate is returned if LAA has failed to analyze the loop
+ /// (e.g. if it's not bottom-tested, contains volatile memops, etc.)
+ std::forward_list<StoreToLoadForwardingCandidate>
+ findStoreToLoadDependences(const LoopAccessInfo &LAI) {
+ std::forward_list<StoreToLoadForwardingCandidate> Candidates;
+
+ const auto *Deps = LAI.getDepChecker().getDependences();
+ if (!Deps)
+ return Candidates;
+
+ // Find store->load dependences (consequently true dep). Both lexically
+ // forward and backward dependences qualify. Disqualify loads that have
+ // other unknown dependences.
+
+ SmallSet<Instruction *, 4> LoadsWithUnknownDepedence;
+
+ for (const auto &Dep : *Deps) {
+ Instruction *Source = Dep.getSource(LAI);
+ Instruction *Destination = Dep.getDestination(LAI);
+
+ if (Dep.Type == MemoryDepChecker::Dependence::Unknown) {
+ if (isa<LoadInst>(Source))
+ LoadsWithUnknownDepedence.insert(Source);
+ if (isa<LoadInst>(Destination))
+ LoadsWithUnknownDepedence.insert(Destination);
+ continue;
+ }
+
+ if (Dep.isBackward())
+ // Note that the designations source and destination follow the program
+ // order, i.e. source is always first. (The direction is given by the
+ // DepType.)
+ std::swap(Source, Destination);
+ else
+ assert(Dep.isForward() && "Needs to be a forward dependence");
+
+ auto *Store = dyn_cast<StoreInst>(Source);
+ if (!Store)
+ continue;
+ auto *Load = dyn_cast<LoadInst>(Destination);
+ if (!Load)
+ continue;
+ Candidates.emplace_front(Load, Store);
+ }
+
+ if (!LoadsWithUnknownDepedence.empty())
+ Candidates.remove_if([&](const StoreToLoadForwardingCandidate &C) {
+ return LoadsWithUnknownDepedence.count(C.Load);
+ });
+
+ return Candidates;
+ }
+
+ /// \brief Return the index of the instruction according to program order.
+ unsigned getInstrIndex(Instruction *Inst) {
+ auto I = InstOrder.find(Inst);
+ assert(I != InstOrder.end() && "No index for instruction");
+ return I->second;
+ }
+
+ /// \brief If a load has multiple candidates associated (i.e. different
+ /// stores), it means that it could be forwarding from multiple stores
+ /// depending on control flow. Remove these candidates.
+ ///
+ /// Here, we rely on LAA to include the relevant loop-independent dependences.
+ /// LAA is known to omit these in the very simple case when the read and the
+ /// write within an alias set always takes place using the *same* pointer.
+ ///
+ /// However, we know that this is not the case here, i.e. we can rely on LAA
+ /// to provide us with loop-independent dependences for the cases we're
+ /// interested. Consider the case for example where a loop-independent
+ /// dependece S1->S2 invalidates the forwarding S3->S2.
+ ///
+ /// A[i] = ... (S1)
+ /// ... = A[i] (S2)
+ /// A[i+1] = ... (S3)
+ ///
+ /// LAA will perform dependence analysis here because there are two
+ /// *different* pointers involved in the same alias set (&A[i] and &A[i+1]).
+ void removeDependencesFromMultipleStores(
+ std::forward_list<StoreToLoadForwardingCandidate> &Candidates) {
+ // If Store is nullptr it means that we have multiple stores forwarding to
+ // this store.
+ typedef DenseMap<LoadInst *, const StoreToLoadForwardingCandidate *>
+ LoadToSingleCandT;
+ LoadToSingleCandT LoadToSingleCand;
+
+ for (const auto &Cand : Candidates) {
+ bool NewElt;
+ LoadToSingleCandT::iterator Iter;
+
+ std::tie(Iter, NewElt) =
+ LoadToSingleCand.insert(std::make_pair(Cand.Load, &Cand));
+ if (!NewElt) {
+ const StoreToLoadForwardingCandidate *&OtherCand = Iter->second;
+ // Already multiple stores forward to this load.
+ if (OtherCand == nullptr)
+ continue;
+
+ // Handle the very basic of case when the two stores are in the same
+ // block so deciding which one forwards is easy. The later one forwards
+ // as long as they both have a dependence distance of one to the load.
+ if (Cand.Store->getParent() == OtherCand->Store->getParent() &&
+ Cand.isDependenceDistanceOfOne(PSE) &&
+ OtherCand->isDependenceDistanceOfOne(PSE)) {
+ // They are in the same block, the later one will forward to the load.
+ if (getInstrIndex(OtherCand->Store) < getInstrIndex(Cand.Store))
+ OtherCand = &Cand;
+ } else
+ OtherCand = nullptr;
+ }
+ }
+
+ Candidates.remove_if([&](const StoreToLoadForwardingCandidate &Cand) {
+ if (LoadToSingleCand[Cand.Load] != &Cand) {
+ DEBUG(dbgs() << "Removing from candidates: \n" << Cand
+ << " The load may have multiple stores forwarding to "
+ << "it\n");
+ return true;
+ }
+ return false;
+ });
+ }
+
+ /// \brief Given two pointers operations by their RuntimePointerChecking
+ /// indices, return true if they require an alias check.
+ ///
+ /// We need a check if one is a pointer for a candidate load and the other is
+ /// a pointer for a possibly intervening store.
+ bool needsChecking(unsigned PtrIdx1, unsigned PtrIdx2,
+ const SmallSet<Value *, 4> &PtrsWrittenOnFwdingPath,
+ const std::set<Value *> &CandLoadPtrs) {
+ Value *Ptr1 =
+ LAI.getRuntimePointerChecking()->getPointerInfo(PtrIdx1).PointerValue;
+ Value *Ptr2 =
+ LAI.getRuntimePointerChecking()->getPointerInfo(PtrIdx2).PointerValue;
+ return ((PtrsWrittenOnFwdingPath.count(Ptr1) && CandLoadPtrs.count(Ptr2)) ||
+ (PtrsWrittenOnFwdingPath.count(Ptr2) && CandLoadPtrs.count(Ptr1)));
+ }
+
+ /// \brief Return pointers that are possibly written to on the path from a
+ /// forwarding store to a load.
+ ///
+ /// These pointers need to be alias-checked against the forwarding candidates.
+ SmallSet<Value *, 4> findPointersWrittenOnForwardingPath(
+ const SmallVectorImpl<StoreToLoadForwardingCandidate> &Candidates) {
+ // From FirstStore to LastLoad neither of the elimination candidate loads
+ // should overlap with any of the stores.
+ //
+ // E.g.:
+ //
+ // st1 C[i]
+ // ld1 B[i] <-------,
+ // ld0 A[i] <----, | * LastLoad
+ // ... | |
+ // st2 E[i] | |
+ // st3 B[i+1] -- | -' * FirstStore
+ // st0 A[i+1] ---'
+ // st4 D[i]
+ //
+ // st0 forwards to ld0 if the accesses in st4 and st1 don't overlap with
+ // ld0.
+
+ LoadInst *LastLoad =
+ std::max_element(Candidates.begin(), Candidates.end(),
+ [&](const StoreToLoadForwardingCandidate &A,
+ const StoreToLoadForwardingCandidate &B) {
+ return getInstrIndex(A.Load) < getInstrIndex(B.Load);
+ })
+ ->Load;
+ StoreInst *FirstStore =
+ std::min_element(Candidates.begin(), Candidates.end(),
+ [&](const StoreToLoadForwardingCandidate &A,
+ const StoreToLoadForwardingCandidate &B) {
+ return getInstrIndex(A.Store) <
+ getInstrIndex(B.Store);
+ })
+ ->Store;
+
+ // We're looking for stores after the first forwarding store until the end
+ // of the loop, then from the beginning of the loop until the last
+ // forwarded-to load. Collect the pointer for the stores.
+ SmallSet<Value *, 4> PtrsWrittenOnFwdingPath;
+
+ auto InsertStorePtr = [&](Instruction *I) {
+ if (auto *S = dyn_cast<StoreInst>(I))
+ PtrsWrittenOnFwdingPath.insert(S->getPointerOperand());
+ };
+ const auto &MemInstrs = LAI.getDepChecker().getMemoryInstructions();
+ std::for_each(MemInstrs.begin() + getInstrIndex(FirstStore) + 1,
+ MemInstrs.end(), InsertStorePtr);
+ std::for_each(MemInstrs.begin(), &MemInstrs[getInstrIndex(LastLoad)],
+ InsertStorePtr);
+
+ return PtrsWrittenOnFwdingPath;
+ }
+
+ /// \brief Determine the pointer alias checks to prove that there are no
+ /// intervening stores.
+ SmallVector<RuntimePointerChecking::PointerCheck, 4> collectMemchecks(
+ const SmallVectorImpl<StoreToLoadForwardingCandidate> &Candidates) {
+
+ SmallSet<Value *, 4> PtrsWrittenOnFwdingPath =
+ findPointersWrittenOnForwardingPath(Candidates);
+
+ // Collect the pointers of the candidate loads.
+ // FIXME: SmallSet does not work with std::inserter.
+ std::set<Value *> CandLoadPtrs;
+ std::transform(Candidates.begin(), Candidates.end(),
+ std::inserter(CandLoadPtrs, CandLoadPtrs.begin()),
+ std::mem_fn(&StoreToLoadForwardingCandidate::getLoadPtr));
+
+ const auto &AllChecks = LAI.getRuntimePointerChecking()->getChecks();
+ SmallVector<RuntimePointerChecking::PointerCheck, 4> Checks;
+
+ std::copy_if(AllChecks.begin(), AllChecks.end(), std::back_inserter(Checks),
+ [&](const RuntimePointerChecking::PointerCheck &Check) {
+ for (auto PtrIdx1 : Check.first->Members)
+ for (auto PtrIdx2 : Check.second->Members)
+ if (needsChecking(PtrIdx1, PtrIdx2,
+ PtrsWrittenOnFwdingPath, CandLoadPtrs))
+ return true;
+ return false;
+ });
+
+ DEBUG(dbgs() << "\nPointer Checks (count: " << Checks.size() << "):\n");
+ DEBUG(LAI.getRuntimePointerChecking()->printChecks(dbgs(), Checks));
+
+ return Checks;
+ }
+
+ /// \brief Perform the transformation for a candidate.
+ void
+ propagateStoredValueToLoadUsers(const StoreToLoadForwardingCandidate &Cand,
+ SCEVExpander &SEE) {
+ //
+ // loop:
+ // %x = load %gep_i
+ // = ... %x
+ // store %y, %gep_i_plus_1
+ //
+ // =>
+ //
+ // ph:
+ // %x.initial = load %gep_0
+ // loop:
+ // %x.storeforward = phi [%x.initial, %ph] [%y, %loop]
+ // %x = load %gep_i <---- now dead
+ // = ... %x.storeforward
+ // store %y, %gep_i_plus_1
+
+ Value *Ptr = Cand.Load->getPointerOperand();
+ auto *PtrSCEV = cast<SCEVAddRecExpr>(PSE.getSCEV(Ptr));
+ auto *PH = L->getLoopPreheader();
+ Value *InitialPtr = SEE.expandCodeFor(PtrSCEV->getStart(), Ptr->getType(),
+ PH->getTerminator());
+ Value *Initial =
+ new LoadInst(InitialPtr, "load_initial", PH->getTerminator());
+ PHINode *PHI = PHINode::Create(Initial->getType(), 2, "store_forwarded",
+ &L->getHeader()->front());
+ PHI->addIncoming(Initial, PH);
+ PHI->addIncoming(Cand.Store->getOperand(0), L->getLoopLatch());
+
+ Cand.Load->replaceAllUsesWith(PHI);
+ }
+
+ /// \brief Top-level driver for each loop: find store->load forwarding
+ /// candidates, add run-time checks and perform transformation.
+ bool processLoop() {
+ DEBUG(dbgs() << "\nIn \"" << L->getHeader()->getParent()->getName()
+ << "\" checking " << *L << "\n");
+ // Look for store-to-load forwarding cases across the
+ // backedge. E.g.:
+ //
+ // loop:
+ // %x = load %gep_i
+ // = ... %x
+ // store %y, %gep_i_plus_1
+ //
+ // =>
+ //
+ // ph:
+ // %x.initial = load %gep_0
+ // loop:
+ // %x.storeforward = phi [%x.initial, %ph] [%y, %loop]
+ // %x = load %gep_i <---- now dead
+ // = ... %x.storeforward
+ // store %y, %gep_i_plus_1
+
+ // First start with store->load dependences.
+ auto StoreToLoadDependences = findStoreToLoadDependences(LAI);
+ if (StoreToLoadDependences.empty())
+ return false;
+
+ // Generate an index for each load and store according to the original
+ // program order. This will be used later.
+ InstOrder = LAI.getDepChecker().generateInstructionOrderMap();
+
+ // To keep things simple for now, remove those where the load is potentially
+ // fed by multiple stores.
+ removeDependencesFromMultipleStores(StoreToLoadDependences);
+ if (StoreToLoadDependences.empty())
+ return false;
+
+ // Filter the candidates further.
+ SmallVector<StoreToLoadForwardingCandidate, 4> Candidates;
+ unsigned NumForwarding = 0;
+ for (const StoreToLoadForwardingCandidate Cand : StoreToLoadDependences) {
+ DEBUG(dbgs() << "Candidate " << Cand);
+ // Make sure that the stored values is available everywhere in the loop in
+ // the next iteration.
+ if (!doesStoreDominatesAllLatches(Cand.Store->getParent(), L, DT))
+ continue;
+
+ // Check whether the SCEV difference is the same as the induction step,
+ // thus we load the value in the next iteration.
+ if (!Cand.isDependenceDistanceOfOne(PSE))
+ continue;
+
+ ++NumForwarding;
+ DEBUG(dbgs()
+ << NumForwarding
+ << ". Valid store-to-load forwarding across the loop backedge\n");
+ Candidates.push_back(Cand);
+ }
+ if (Candidates.empty())
+ return false;
+
+ // Check intervening may-alias stores. These need runtime checks for alias
+ // disambiguation.
+ SmallVector<RuntimePointerChecking::PointerCheck, 4> Checks =
+ collectMemchecks(Candidates);
+
+ // Too many checks are likely to outweigh the benefits of forwarding.
+ if (Checks.size() > Candidates.size() * CheckPerElim) {
+ DEBUG(dbgs() << "Too many run-time checks needed.\n");
+ return false;
+ }
+
+ if (LAI.PSE.getUnionPredicate().getComplexity() >
+ LoadElimSCEVCheckThreshold) {
+ DEBUG(dbgs() << "Too many SCEV run-time checks needed.\n");
+ return false;
+ }
+
+ // Point of no-return, start the transformation. First, version the loop if
+ // necessary.
+ if (!Checks.empty() || !LAI.PSE.getUnionPredicate().isAlwaysTrue()) {
+ LoopVersioning LV(LAI, L, LI, DT, PSE.getSE(), false);
+ LV.setAliasChecks(std::move(Checks));
+ LV.setSCEVChecks(LAI.PSE.getUnionPredicate());
+ LV.versionLoop();
+ }
+
+ // Next, propagate the value stored by the store to the users of the load.
+ // Also for the first iteration, generate the initial value of the load.
+ SCEVExpander SEE(*PSE.getSE(), L->getHeader()->getModule()->getDataLayout(),
+ "storeforward");
+ for (const auto &Cand : Candidates)
+ propagateStoredValueToLoadUsers(Cand, SEE);
+ NumLoopLoadEliminted += NumForwarding;
+
+ return true;
+ }
+
+private:
+ Loop *L;
+
+ /// \brief Maps the load/store instructions to their index according to
+ /// program order.
+ DenseMap<Instruction *, unsigned> InstOrder;
+
+ // Analyses used.
+ LoopInfo *LI;
+ const LoopAccessInfo &LAI;
+ DominatorTree *DT;
+ PredicatedScalarEvolution PSE;
+};
+
+/// \brief The pass. Most of the work is delegated to the per-loop
+/// LoadEliminationForLoop class.
+class LoopLoadElimination : public FunctionPass {
+public:
+ LoopLoadElimination() : FunctionPass(ID) {
+ initializeLoopLoadEliminationPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ auto *LAA = &getAnalysis<LoopAccessAnalysis>();
+ auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+
+ // Build up a worklist of inner-loops to vectorize. This is necessary as the
+ // act of distributing a loop creates new loops and can invalidate iterators
+ // across the loops.
+ SmallVector<Loop *, 8> Worklist;
+
+ for (Loop *TopLevelLoop : *LI)
+ for (Loop *L : depth_first(TopLevelLoop))
+ // We only handle inner-most loops.
+ if (L->empty())
+ Worklist.push_back(L);
+
+ // Now walk the identified inner loops.
+ bool Changed = false;
+ for (Loop *L : Worklist) {
+ const LoopAccessInfo &LAI = LAA->getInfo(L, ValueToValueMap());
+ // The actual work is performed by LoadEliminationForLoop.
+ LoadEliminationForLoop LEL(L, LI, LAI, DT);
+ Changed |= LEL.processLoop();
+ }
+
+ // Process each loop nest in the function.
+ return Changed;
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addRequired<LoopAccessAnalysis>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ }
+
+ static char ID;
+};
+}
+
+char LoopLoadElimination::ID;
+static const char LLE_name[] = "Loop Load Elimination";
+
+INITIALIZE_PASS_BEGIN(LoopLoadElimination, LLE_OPTION, LLE_name, false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopAccessAnalysis)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_END(LoopLoadElimination, LLE_OPTION, LLE_name, false, false)
+
+namespace llvm {
+FunctionPass *createLoopLoadEliminationPass() {
+ return new LoopLoadElimination();
+}
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
index ed103e6..27c2d88 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
@@ -147,12 +147,12 @@ namespace {
bool runOnLoop(Loop *L, LPPassManager &LPM) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addRequired<ScalarEvolution>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
}
@@ -162,11 +162,15 @@ namespace {
ScalarEvolution *SE;
TargetLibraryInfo *TLI;
DominatorTree *DT;
+ bool PreserveLCSSA;
typedef SmallVector<Instruction *, 16> SmallInstructionVector;
typedef SmallSet<Instruction *, 16> SmallInstructionSet;
- // A chain of isomorphic instructions, indentified by a single-use PHI,
+ // Map between induction variable and its increment
+ DenseMap<Instruction *, int64_t> IVToIncMap;
+
+ // A chain of isomorphic instructions, identified by a single-use PHI
// representing a reduction. Only the last value may be used outside the
// loop.
struct SimpleLoopReduction {
@@ -300,22 +304,6 @@ namespace {
// The functions below can be called after we've finished processing all
// instructions in the loop, and we know which reductions were selected.
- // Is the provided instruction the PHI of a reduction selected for
- // rerolling?
- bool isSelectedPHI(Instruction *J) {
- if (!isa<PHINode>(J))
- return false;
-
- for (DenseSet<int>::iterator RI = Reds.begin(), RIE = Reds.end();
- RI != RIE; ++RI) {
- int i = *RI;
- if (cast<Instruction>(J) == PossibleReds[i].getPHI())
- return true;
- }
-
- return false;
- }
-
bool validateSelected();
void replaceSelected();
@@ -335,7 +323,7 @@ namespace {
// x[i*3+1] = y2
// x[i*3+2] = y3
//
- // Base instruction -> i*3
+ // Base instruction -> i*3
// +---+----+
// / | \
// ST[y1] +1 +2 <-- Roots
@@ -366,8 +354,11 @@ namespace {
struct DAGRootTracker {
DAGRootTracker(LoopReroll *Parent, Loop *L, Instruction *IV,
ScalarEvolution *SE, AliasAnalysis *AA,
- TargetLibraryInfo *TLI)
- : Parent(Parent), L(L), SE(SE), AA(AA), TLI(TLI), IV(IV) {}
+ TargetLibraryInfo *TLI, DominatorTree *DT, LoopInfo *LI,
+ bool PreserveLCSSA,
+ DenseMap<Instruction *, int64_t> &IncrMap)
+ : Parent(Parent), L(L), SE(SE), AA(AA), TLI(TLI), DT(DT), LI(LI),
+ PreserveLCSSA(PreserveLCSSA), IV(IV), IVToIncMap(IncrMap) {}
/// Stage 1: Find all the DAG roots for the induction variable.
bool findRoots();
@@ -413,11 +404,14 @@ namespace {
ScalarEvolution *SE;
AliasAnalysis *AA;
TargetLibraryInfo *TLI;
+ DominatorTree *DT;
+ LoopInfo *LI;
+ bool PreserveLCSSA;
// The loop induction variable.
Instruction *IV;
// Loop step amount.
- uint64_t Inc;
+ int64_t Inc;
// Loop reroll count; if Inc == 1, this records the scaling applied
// to the indvar: a[i*2+0] = ...; a[i*2+1] = ... ;
// If Inc is not 1, Scale = Inc.
@@ -430,6 +424,8 @@ namespace {
// they are used in (or specially, IL_All for instructions
// used in the loop increment mechanism).
UsesTy Uses;
+ // Map between induction variable and its increment
+ DenseMap<Instruction *, int64_t> &IVToIncMap;
};
void collectPossibleIVs(Loop *L, SmallInstructionVector &PossibleIVs);
@@ -442,10 +438,10 @@ namespace {
char LoopReroll::ID = 0;
INITIALIZE_PASS_BEGIN(LoopReroll, "loop-reroll", "Reroll loops", false, false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(LoopReroll, "loop-reroll", "Reroll loops", false, false)
@@ -477,21 +473,20 @@ void LoopReroll::collectPossibleIVs(Loop *L,
continue;
if (const SCEVAddRecExpr *PHISCEV =
- dyn_cast<SCEVAddRecExpr>(SE->getSCEV(I))) {
+ dyn_cast<SCEVAddRecExpr>(SE->getSCEV(&*I))) {
if (PHISCEV->getLoop() != L)
continue;
if (!PHISCEV->isAffine())
continue;
if (const SCEVConstant *IncSCEV =
dyn_cast<SCEVConstant>(PHISCEV->getStepRecurrence(*SE))) {
- if (!IncSCEV->getValue()->getValue().isStrictlyPositive())
+ const APInt &AInt = IncSCEV->getAPInt().abs();
+ if (IncSCEV->getValue()->isZero() || AInt.uge(MaxInc))
continue;
- if (IncSCEV->getValue()->uge(MaxInc))
- continue;
-
- DEBUG(dbgs() << "LRR: Possible IV: " << *I << " = " <<
- *PHISCEV << "\n");
- PossibleIVs.push_back(I);
+ IVToIncMap[&*I] = IncSCEV->getValue()->getSExtValue();
+ DEBUG(dbgs() << "LRR: Possible IV: " << *I << " = " << *PHISCEV
+ << "\n");
+ PossibleIVs.push_back(&*I);
}
}
}
@@ -552,7 +547,7 @@ void LoopReroll::collectPossibleReductions(Loop *L,
if (!I->getType()->isSingleValueType())
continue;
- SimpleLoopReduction SLR(I, L);
+ SimpleLoopReduction SLR(&*I, L);
if (!SLR.valid())
continue;
@@ -699,17 +694,11 @@ collectPossibleRoots(Instruction *Base, std::map<int64_t,Instruction*> &Roots) {
}
}
- int64_t V = CI->getValue().getSExtValue();
+ int64_t V = std::abs(CI->getValue().getSExtValue());
if (Roots.find(V) != Roots.end())
// No duplicates, please.
return false;
- // FIXME: Add support for negative values.
- if (V < 0) {
- DEBUG(dbgs() << "LRR: Aborting due to negative value: " << V << "\n");
- return false;
- }
-
Roots[V] = cast<Instruction>(I);
}
@@ -731,7 +720,7 @@ collectPossibleRoots(Instruction *Base, std::map<int64_t,Instruction*> &Roots) {
unsigned NumBaseUses = BaseUsers.size();
if (NumBaseUses == 0)
NumBaseUses = Roots.begin()->second->getNumUses();
-
+
// Check that every node has the same number of users.
for (auto &KV : Roots) {
if (KV.first == 0)
@@ -744,7 +733,7 @@ collectPossibleRoots(Instruction *Base, std::map<int64_t,Instruction*> &Roots) {
}
}
- return true;
+ return true;
}
bool LoopReroll::DAGRootTracker::
@@ -787,7 +776,7 @@ findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts) {
if (!collectPossibleRoots(IVU, V))
return false;
- // If we didn't get a root for index zero, then IVU must be
+ // If we didn't get a root for index zero, then IVU must be
// subsumed.
if (V.find(0) == V.end())
SubsumedInsts.insert(IVU);
@@ -818,13 +807,10 @@ findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts) {
}
bool LoopReroll::DAGRootTracker::findRoots() {
-
- const SCEVAddRecExpr *RealIVSCEV = cast<SCEVAddRecExpr>(SE->getSCEV(IV));
- Inc = cast<SCEVConstant>(RealIVSCEV->getOperand(1))->
- getValue()->getZExtValue();
+ Inc = IVToIncMap[IV];
assert(RootSets.empty() && "Unclean state!");
- if (Inc == 1) {
+ if (std::abs(Inc) == 1) {
for (auto *IVU : IV->users()) {
if (isLoopIncrement(IVU, IV))
LoopIncs.push_back(cast<Instruction>(IVU));
@@ -996,6 +982,25 @@ bool LoopReroll::DAGRootTracker::instrDependsOn(Instruction *I,
return false;
}
+static bool isIgnorableInst(const Instruction *I) {
+ if (isa<DbgInfoIntrinsic>(I))
+ return true;
+ const IntrinsicInst* II = dyn_cast<IntrinsicInst>(I);
+ if (!II)
+ return false;
+ switch (II->getIntrinsicID()) {
+ default:
+ return false;
+ case llvm::Intrinsic::annotation:
+ case Intrinsic::ptr_annotation:
+ case Intrinsic::var_annotation:
+ // TODO: the following intrinsics may also be whitelisted:
+ // lifetime_start, lifetime_end, invariant_start, invariant_end
+ return true;
+ }
+ return false;
+}
+
bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) {
// We now need to check for equivalence of the use graph of each root with
// that of the primary induction variable (excluding the roots). Our goal
@@ -1029,7 +1034,7 @@ bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) {
// Make sure all instructions in the loop are in one and only one
// set.
for (auto &KV : Uses) {
- if (KV.second.count() != 1) {
+ if (KV.second.count() != 1 && !isIgnorableInst(KV.first)) {
DEBUG(dbgs() << "LRR: Aborting - instruction is not used in 1 iteration: "
<< *KV.first << " (#uses=" << KV.second.count() << ")\n");
return false;
@@ -1103,15 +1108,15 @@ bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) {
" vs. " << *RootInst << "\n");
return false;
}
-
+
RootIt = TryIt;
RootInst = TryIt->first;
}
// All instructions between the last root and this root
- // may belong to some other iteration. If they belong to a
+ // may belong to some other iteration. If they belong to a
// future iteration, then they're dangerous to alias with.
- //
+ //
// Note that because we allow a limited amount of flexibility in the order
// that we visit nodes, LastRootIt might be *before* RootIt, in which
// case we've already checked this set of instructions so we shouldn't
@@ -1267,6 +1272,7 @@ void LoopReroll::DAGRootTracker::replace(const SCEV *IterCount) {
++J;
}
+ bool Negative = IVToIncMap[IV] < 0;
const DataLayout &DL = Header->getModule()->getDataLayout();
// We need to create a new induction variable for each different BaseInst.
@@ -1275,13 +1281,12 @@ void LoopReroll::DAGRootTracker::replace(const SCEV *IterCount) {
const SCEVAddRecExpr *RealIVSCEV =
cast<SCEVAddRecExpr>(SE->getSCEV(DRS.BaseInst));
const SCEV *Start = RealIVSCEV->getStart();
- const SCEVAddRecExpr *H = cast<SCEVAddRecExpr>
- (SE->getAddRecExpr(Start,
- SE->getConstant(RealIVSCEV->getType(), 1),
- L, SCEV::FlagAnyWrap));
+ const SCEVAddRecExpr *H = cast<SCEVAddRecExpr>(SE->getAddRecExpr(
+ Start, SE->getConstant(RealIVSCEV->getType(), Negative ? -1 : 1), L,
+ SCEV::FlagAnyWrap));
{ // Limit the lifetime of SCEVExpander.
SCEVExpander Expander(*SE, DL, "reroll");
- Value *NewIV = Expander.expandCodeFor(H, IV->getType(), Header->begin());
+ Value *NewIV = Expander.expandCodeFor(H, IV->getType(), &Header->front());
for (auto &KV : Uses) {
if (KV.second.find_first() == 0)
@@ -1294,8 +1299,8 @@ void LoopReroll::DAGRootTracker::replace(const SCEV *IterCount) {
const SCEV *ICSCEV = RealIVSCEV->evaluateAtIteration(IterCount, *SE);
// Iteration count SCEV minus 1
- const SCEV *ICMinus1SCEV =
- SE->getMinusSCEV(ICSCEV, SE->getConstant(ICSCEV->getType(), 1));
+ const SCEV *ICMinus1SCEV = SE->getMinusSCEV(
+ ICSCEV, SE->getConstant(ICSCEV->getType(), Negative ? -1 : 1));
Value *ICMinus1; // Iteration count minus 1
if (isa<SCEVConstant>(ICMinus1SCEV)) {
@@ -1303,7 +1308,7 @@ void LoopReroll::DAGRootTracker::replace(const SCEV *IterCount) {
} else {
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader)
- Preheader = InsertPreheaderForLoop(L, Parent);
+ Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA);
ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(),
Preheader->getTerminator());
@@ -1444,13 +1449,14 @@ void LoopReroll::ReductionTracker::replaceSelected() {
bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
const SCEV *IterCount,
ReductionTracker &Reductions) {
- DAGRootTracker DAGRoots(this, L, IV, SE, AA, TLI);
+ DAGRootTracker DAGRoots(this, L, IV, SE, AA, TLI, DT, LI, PreserveLCSSA,
+ IVToIncMap);
if (!DAGRoots.findRoots())
return false;
DEBUG(dbgs() << "LRR: Found all root induction increments for: " <<
*IV << "\n");
-
+
if (!DAGRoots.validate(Reductions))
return false;
if (!Reductions.validateSelected())
@@ -1469,11 +1475,12 @@ bool LoopReroll::runOnLoop(Loop *L, LPPassManager &LPM) {
if (skipOptnoneFunction(L))
return false;
- AA = &getAnalysis<AliasAnalysis>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- SE = &getAnalysis<ScalarEvolution>();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
BasicBlock *Header = L->getHeader();
DEBUG(dbgs() << "LRR: F[" << Header->getParent()->getName() <<
@@ -1490,13 +1497,13 @@ bool LoopReroll::runOnLoop(Loop *L, LPPassManager &LPM) {
return Changed;
const SCEV *LIBETC = SE->getBackedgeTakenCount(L);
- const SCEV *IterCount =
- SE->getAddExpr(LIBETC, SE->getConstant(LIBETC->getType(), 1));
+ const SCEV *IterCount = SE->getAddExpr(LIBETC, SE->getOne(LIBETC->getType()));
DEBUG(dbgs() << "LRR: iteration count = " << *IterCount << "\n");
// First, we need to find the induction variable with respect to which we can
// reroll (there may be several possible options).
SmallInstructionVector PossibleIVs;
+ IVToIncMap.clear();
collectPossibleIVs(L, PossibleIVs);
if (PossibleIVs.empty()) {
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
index a675e12..5e6c2da 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
@@ -13,11 +13,15 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CFG.h"
@@ -41,95 +45,6 @@ DefaultRotationThreshold("rotation-max-header-size", cl::init(16), cl::Hidden,
cl::desc("The default maximum header size for automatic loop rotation"));
STATISTIC(NumRotated, "Number of loops rotated");
-namespace {
-
- class LoopRotate : public LoopPass {
- public:
- static char ID; // Pass ID, replacement for typeid
- LoopRotate(int SpecifiedMaxHeaderSize = -1) : LoopPass(ID) {
- initializeLoopRotatePass(*PassRegistry::getPassRegistry());
- if (SpecifiedMaxHeaderSize == -1)
- MaxHeaderSize = DefaultRotationThreshold;
- else
- MaxHeaderSize = unsigned(SpecifiedMaxHeaderSize);
- }
-
- // LCSSA form makes instruction renaming easier.
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AssumptionCacheTracker>();
- AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addPreserved<LoopInfoWrapperPass>();
- AU.addRequiredID(LoopSimplifyID);
- AU.addPreservedID(LoopSimplifyID);
- AU.addRequiredID(LCSSAID);
- AU.addPreservedID(LCSSAID);
- AU.addPreserved<ScalarEvolution>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
- }
-
- bool runOnLoop(Loop *L, LPPassManager &LPM) override;
- bool simplifyLoopLatch(Loop *L);
- bool rotateLoop(Loop *L, bool SimplifiedLatch);
-
- private:
- unsigned MaxHeaderSize;
- LoopInfo *LI;
- const TargetTransformInfo *TTI;
- AssumptionCache *AC;
- DominatorTree *DT;
- };
-}
-
-char LoopRotate::ID = 0;
-INITIALIZE_PASS_BEGIN(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
-INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
-INITIALIZE_PASS_DEPENDENCY(LCSSA)
-INITIALIZE_PASS_END(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
-
-Pass *llvm::createLoopRotatePass(int MaxHeaderSize) {
- return new LoopRotate(MaxHeaderSize);
-}
-
-/// Rotate Loop L as many times as possible. Return true if
-/// the loop is rotated at least once.
-bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) {
- if (skipOptnoneFunction(L))
- return false;
-
- // Save the loop metadata.
- MDNode *LoopMD = L->getLoopID();
-
- Function &F = *L->getHeader()->getParent();
-
- LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- DT = DTWP ? &DTWP->getDomTree() : nullptr;
-
- // Simplify the loop latch before attempting to rotate the header
- // upward. Rotation may not be needed if the loop tail can be folded into the
- // loop exit.
- bool SimplifiedLatch = simplifyLoopLatch(L);
-
- // One loop can be rotated multiple times.
- bool MadeChange = false;
- while (rotateLoop(L, SimplifiedLatch)) {
- MadeChange = true;
- SimplifiedLatch = false;
- }
-
- // Restore the loop metadata.
- // NB! We presume LoopRotation DOESN'T ADD its own metadata.
- if ((MadeChange || SimplifiedLatch) && LoopMD)
- L->setLoopID(LoopMD);
-
- return MadeChange;
-}
/// RewriteUsesOfClonedInstructions - We just cloned the instructions from the
/// old header into the preheader. If there were uses of the values produced by
@@ -147,7 +62,7 @@ static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
// as necessary.
SSAUpdater SSA;
for (I = OrigHeader->begin(); I != E; ++I) {
- Value *OrigHeaderVal = I;
+ Value *OrigHeaderVal = &*I;
// If there are no uses of the value (e.g. because it returns void), there
// is nothing to rewrite.
@@ -196,127 +111,6 @@ static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
}
}
-/// Determine whether the instructions in this range may be safely and cheaply
-/// speculated. This is not an important enough situation to develop complex
-/// heuristics. We handle a single arithmetic instruction along with any type
-/// conversions.
-static bool shouldSpeculateInstrs(BasicBlock::iterator Begin,
- BasicBlock::iterator End, Loop *L) {
- bool seenIncrement = false;
- bool MultiExitLoop = false;
-
- if (!L->getExitingBlock())
- MultiExitLoop = true;
-
- for (BasicBlock::iterator I = Begin; I != End; ++I) {
-
- if (!isSafeToSpeculativelyExecute(I))
- return false;
-
- if (isa<DbgInfoIntrinsic>(I))
- continue;
-
- switch (I->getOpcode()) {
- default:
- return false;
- case Instruction::GetElementPtr:
- // GEPs are cheap if all indices are constant.
- if (!cast<GEPOperator>(I)->hasAllConstantIndices())
- return false;
- // fall-thru to increment case
- case Instruction::Add:
- case Instruction::Sub:
- case Instruction::And:
- case Instruction::Or:
- case Instruction::Xor:
- case Instruction::Shl:
- case Instruction::LShr:
- case Instruction::AShr: {
- Value *IVOpnd = !isa<Constant>(I->getOperand(0))
- ? I->getOperand(0)
- : !isa<Constant>(I->getOperand(1))
- ? I->getOperand(1)
- : nullptr;
- if (!IVOpnd)
- return false;
-
- // If increment operand is used outside of the loop, this speculation
- // could cause extra live range interference.
- if (MultiExitLoop) {
- for (User *UseI : IVOpnd->users()) {
- auto *UserInst = cast<Instruction>(UseI);
- if (!L->contains(UserInst))
- return false;
- }
- }
-
- if (seenIncrement)
- return false;
- seenIncrement = true;
- break;
- }
- case Instruction::Trunc:
- case Instruction::ZExt:
- case Instruction::SExt:
- // ignore type conversions
- break;
- }
- }
- return true;
-}
-
-/// Fold the loop tail into the loop exit by speculating the loop tail
-/// instructions. Typically, this is a single post-increment. In the case of a
-/// simple 2-block loop, hoisting the increment can be much better than
-/// duplicating the entire loop header. In the case of loops with early exits,
-/// rotation will not work anyway, but simplifyLoopLatch will put the loop in
-/// canonical form so downstream passes can handle it.
-///
-/// I don't believe this invalidates SCEV.
-bool LoopRotate::simplifyLoopLatch(Loop *L) {
- BasicBlock *Latch = L->getLoopLatch();
- if (!Latch || Latch->hasAddressTaken())
- return false;
-
- BranchInst *Jmp = dyn_cast<BranchInst>(Latch->getTerminator());
- if (!Jmp || !Jmp->isUnconditional())
- return false;
-
- BasicBlock *LastExit = Latch->getSinglePredecessor();
- if (!LastExit || !L->isLoopExiting(LastExit))
- return false;
-
- BranchInst *BI = dyn_cast<BranchInst>(LastExit->getTerminator());
- if (!BI)
- return false;
-
- if (!shouldSpeculateInstrs(Latch->begin(), Jmp, L))
- return false;
-
- DEBUG(dbgs() << "Folding loop latch " << Latch->getName() << " into "
- << LastExit->getName() << "\n");
-
- // Hoist the instructions from Latch into LastExit.
- LastExit->getInstList().splice(BI, Latch->getInstList(), Latch->begin(), Jmp);
-
- unsigned FallThruPath = BI->getSuccessor(0) == Latch ? 0 : 1;
- BasicBlock *Header = Jmp->getSuccessor(0);
- assert(Header == L->getHeader() && "expected a backward branch");
-
- // Remove Latch from the CFG so that LastExit becomes the new Latch.
- BI->setSuccessor(FallThruPath, Header);
- Latch->replaceSuccessorsPhiUsesWith(LastExit);
- Jmp->eraseFromParent();
-
- // Nuke the Latch block.
- assert(Latch->empty() && "unable to evacuate Latch");
- LI->removeBlock(Latch);
- if (DT)
- DT->eraseNode(Latch);
- Latch->eraseFromParent();
- return true;
-}
-
/// Rotate loop LP. Return true if the loop is rotated.
///
/// \param SimplifiedLatch is true if the latch was just folded into the final
@@ -327,7 +121,10 @@ bool LoopRotate::simplifyLoopLatch(Loop *L) {
/// rotation. LoopRotate should be repeatable and converge to a canonical
/// form. This property is satisfied because simplifying the loop latch can only
/// happen once across multiple invocations of the LoopRotate pass.
-bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
+static bool rotateLoop(Loop *L, unsigned MaxHeaderSize, LoopInfo *LI,
+ const TargetTransformInfo *TTI, AssumptionCache *AC,
+ DominatorTree *DT, ScalarEvolution *SE,
+ bool SimplifiedLatch) {
// If the loop has only one block then there is not much to rotate.
if (L->getBlocks().size() == 1)
return false;
@@ -382,7 +179,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// Anything ScalarEvolution may know about this loop or the PHI nodes
// in its header will soon be invalidated.
- if (ScalarEvolution *SE = getAnalysisIfAvailable<ScalarEvolution>())
+ if (SE)
SE->forgetLoop(L);
DEBUG(dbgs() << "LoopRotation: rotating "; L->dump());
@@ -420,7 +217,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// possible or create a clone in the OldPreHeader if not.
TerminatorInst *LoopEntryBranch = OrigPreheader->getTerminator();
while (I != E) {
- Instruction *Inst = I++;
+ Instruction *Inst = &*I++;
// If the instruction's operands are invariant and it doesn't read or write
// memory, then it is safe to hoist. Doing this doesn't change the order of
@@ -465,8 +262,8 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// terminator into OrigPreHeader. Fix up the PHI nodes in each of OrigHeader's
// successors by duplicating their incoming values for OrigHeader.
TerminatorInst *TI = OrigHeader->getTerminator();
- for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
- for (BasicBlock::iterator BI = TI->getSuccessor(i)->begin();
+ for (BasicBlock *SuccBB : TI->successors())
+ for (BasicBlock::iterator BI = SuccBB->begin();
PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
PN->addIncoming(PN->getIncomingValueForBlock(OrigHeader), OrigPreheader);
@@ -607,3 +404,221 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
++NumRotated;
return true;
}
+
+/// Determine whether the instructions in this range may be safely and cheaply
+/// speculated. This is not an important enough situation to develop complex
+/// heuristics. We handle a single arithmetic instruction along with any type
+/// conversions.
+static bool shouldSpeculateInstrs(BasicBlock::iterator Begin,
+ BasicBlock::iterator End, Loop *L) {
+ bool seenIncrement = false;
+ bool MultiExitLoop = false;
+
+ if (!L->getExitingBlock())
+ MultiExitLoop = true;
+
+ for (BasicBlock::iterator I = Begin; I != End; ++I) {
+
+ if (!isSafeToSpeculativelyExecute(&*I))
+ return false;
+
+ if (isa<DbgInfoIntrinsic>(I))
+ continue;
+
+ switch (I->getOpcode()) {
+ default:
+ return false;
+ case Instruction::GetElementPtr:
+ // GEPs are cheap if all indices are constant.
+ if (!cast<GEPOperator>(I)->hasAllConstantIndices())
+ return false;
+ // fall-thru to increment case
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr: {
+ Value *IVOpnd = !isa<Constant>(I->getOperand(0))
+ ? I->getOperand(0)
+ : !isa<Constant>(I->getOperand(1))
+ ? I->getOperand(1)
+ : nullptr;
+ if (!IVOpnd)
+ return false;
+
+ // If increment operand is used outside of the loop, this speculation
+ // could cause extra live range interference.
+ if (MultiExitLoop) {
+ for (User *UseI : IVOpnd->users()) {
+ auto *UserInst = cast<Instruction>(UseI);
+ if (!L->contains(UserInst))
+ return false;
+ }
+ }
+
+ if (seenIncrement)
+ return false;
+ seenIncrement = true;
+ break;
+ }
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ // ignore type conversions
+ break;
+ }
+ }
+ return true;
+}
+
+/// Fold the loop tail into the loop exit by speculating the loop tail
+/// instructions. Typically, this is a single post-increment. In the case of a
+/// simple 2-block loop, hoisting the increment can be much better than
+/// duplicating the entire loop header. In the case of loops with early exits,
+/// rotation will not work anyway, but simplifyLoopLatch will put the loop in
+/// canonical form so downstream passes can handle it.
+///
+/// I don't believe this invalidates SCEV.
+static bool simplifyLoopLatch(Loop *L, LoopInfo *LI, DominatorTree *DT) {
+ BasicBlock *Latch = L->getLoopLatch();
+ if (!Latch || Latch->hasAddressTaken())
+ return false;
+
+ BranchInst *Jmp = dyn_cast<BranchInst>(Latch->getTerminator());
+ if (!Jmp || !Jmp->isUnconditional())
+ return false;
+
+ BasicBlock *LastExit = Latch->getSinglePredecessor();
+ if (!LastExit || !L->isLoopExiting(LastExit))
+ return false;
+
+ BranchInst *BI = dyn_cast<BranchInst>(LastExit->getTerminator());
+ if (!BI)
+ return false;
+
+ if (!shouldSpeculateInstrs(Latch->begin(), Jmp->getIterator(), L))
+ return false;
+
+ DEBUG(dbgs() << "Folding loop latch " << Latch->getName() << " into "
+ << LastExit->getName() << "\n");
+
+ // Hoist the instructions from Latch into LastExit.
+ LastExit->getInstList().splice(BI->getIterator(), Latch->getInstList(),
+ Latch->begin(), Jmp->getIterator());
+
+ unsigned FallThruPath = BI->getSuccessor(0) == Latch ? 0 : 1;
+ BasicBlock *Header = Jmp->getSuccessor(0);
+ assert(Header == L->getHeader() && "expected a backward branch");
+
+ // Remove Latch from the CFG so that LastExit becomes the new Latch.
+ BI->setSuccessor(FallThruPath, Header);
+ Latch->replaceSuccessorsPhiUsesWith(LastExit);
+ Jmp->eraseFromParent();
+
+ // Nuke the Latch block.
+ assert(Latch->empty() && "unable to evacuate Latch");
+ LI->removeBlock(Latch);
+ if (DT)
+ DT->eraseNode(Latch);
+ Latch->eraseFromParent();
+ return true;
+}
+
+/// Rotate \c L as many times as possible. Return true if the loop is rotated
+/// at least once.
+static bool iterativelyRotateLoop(Loop *L, unsigned MaxHeaderSize, LoopInfo *LI,
+ const TargetTransformInfo *TTI,
+ AssumptionCache *AC, DominatorTree *DT,
+ ScalarEvolution *SE) {
+ // Save the loop metadata.
+ MDNode *LoopMD = L->getLoopID();
+
+ // Simplify the loop latch before attempting to rotate the header
+ // upward. Rotation may not be needed if the loop tail can be folded into the
+ // loop exit.
+ bool SimplifiedLatch = simplifyLoopLatch(L, LI, DT);
+
+ // One loop can be rotated multiple times.
+ bool MadeChange = false;
+ while (rotateLoop(L, MaxHeaderSize, LI, TTI, AC, DT, SE, SimplifiedLatch)) {
+ MadeChange = true;
+ SimplifiedLatch = false;
+ }
+
+ // Restore the loop metadata.
+ // NB! We presume LoopRotation DOESN'T ADD its own metadata.
+ if ((MadeChange || SimplifiedLatch) && LoopMD)
+ L->setLoopID(LoopMD);
+
+ return MadeChange;
+}
+
+namespace {
+
+class LoopRotate : public LoopPass {
+ unsigned MaxHeaderSize;
+
+public:
+ static char ID; // Pass ID, replacement for typeid
+ LoopRotate(int SpecifiedMaxHeaderSize = -1) : LoopPass(ID) {
+ initializeLoopRotatePass(*PassRegistry::getPassRegistry());
+ if (SpecifiedMaxHeaderSize == -1)
+ MaxHeaderSize = DefaultRotationThreshold;
+ else
+ MaxHeaderSize = unsigned(SpecifiedMaxHeaderSize);
+ }
+
+ // LCSSA form makes instruction renaming easier.
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addRequiredID(LCSSAID);
+ AU.addPreservedID(LCSSAID);
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<SCEVAAWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addPreserved<BasicAAWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ }
+
+ bool runOnLoop(Loop *L, LPPassManager &LPM) override {
+ if (skipOptnoneFunction(L))
+ return false;
+ Function &F = *L->getHeader()->getParent();
+
+ auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ const auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
+ auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
+ auto *SE = SEWP ? &SEWP->getSE() : nullptr;
+
+ return iterativelyRotateLoop(L, MaxHeaderSize, LI, TTI, AC, DT, SE);
+ }
+};
+}
+
+char LoopRotate::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
+INITIALIZE_PASS_END(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
+
+Pass *llvm::createLoopRotatePass(int MaxHeaderSize) {
+ return new LoopRotate(MaxHeaderSize);
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 4b59f3d..2101225 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -105,10 +105,33 @@ static bool StressIVChain = false;
namespace {
-/// RegSortData - This class holds data which is used to order reuse candidates.
+struct MemAccessTy {
+ /// Used in situations where the accessed memory type is unknown.
+ static const unsigned UnknownAddressSpace = ~0u;
+
+ Type *MemTy;
+ unsigned AddrSpace;
+
+ MemAccessTy() : MemTy(nullptr), AddrSpace(UnknownAddressSpace) {}
+
+ MemAccessTy(Type *Ty, unsigned AS) :
+ MemTy(Ty), AddrSpace(AS) {}
+
+ bool operator==(MemAccessTy Other) const {
+ return MemTy == Other.MemTy && AddrSpace == Other.AddrSpace;
+ }
+
+ bool operator!=(MemAccessTy Other) const { return !(*this == Other); }
+
+ static MemAccessTy getUnknown(LLVMContext &Ctx) {
+ return MemAccessTy(Type::getVoidTy(Ctx), UnknownAddressSpace);
+ }
+};
+
+/// This class holds data which is used to order reuse candidates.
class RegSortData {
public:
- /// UsedByIndices - This represents the set of LSRUse indices which reference
+ /// This represents the set of LSRUse indices which reference
/// a particular register.
SmallBitVector UsedByIndices;
@@ -122,16 +145,14 @@ void RegSortData::print(raw_ostream &OS) const {
OS << "[NumUses=" << UsedByIndices.count() << ']';
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
void RegSortData::dump() const {
print(errs()); errs() << '\n';
}
-#endif
namespace {
-/// RegUseTracker - Map register candidates to information about how they are
-/// used.
+/// Map register candidates to information about how they are used.
class RegUseTracker {
typedef DenseMap<const SCEV *, RegSortData> RegUsesTy;
@@ -139,9 +160,9 @@ class RegUseTracker {
SmallVector<const SCEV *, 16> RegSequence;
public:
- void CountRegister(const SCEV *Reg, size_t LUIdx);
- void DropRegister(const SCEV *Reg, size_t LUIdx);
- void SwapAndDropUse(size_t LUIdx, size_t LastLUIdx);
+ void countRegister(const SCEV *Reg, size_t LUIdx);
+ void dropRegister(const SCEV *Reg, size_t LUIdx);
+ void swapAndDropUse(size_t LUIdx, size_t LastLUIdx);
bool isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const;
@@ -160,7 +181,7 @@ public:
}
void
-RegUseTracker::CountRegister(const SCEV *Reg, size_t LUIdx) {
+RegUseTracker::countRegister(const SCEV *Reg, size_t LUIdx) {
std::pair<RegUsesTy::iterator, bool> Pair =
RegUsesMap.insert(std::make_pair(Reg, RegSortData()));
RegSortData &RSD = Pair.first->second;
@@ -171,7 +192,7 @@ RegUseTracker::CountRegister(const SCEV *Reg, size_t LUIdx) {
}
void
-RegUseTracker::DropRegister(const SCEV *Reg, size_t LUIdx) {
+RegUseTracker::dropRegister(const SCEV *Reg, size_t LUIdx) {
RegUsesTy::iterator It = RegUsesMap.find(Reg);
assert(It != RegUsesMap.end());
RegSortData &RSD = It->second;
@@ -180,7 +201,7 @@ RegUseTracker::DropRegister(const SCEV *Reg, size_t LUIdx) {
}
void
-RegUseTracker::SwapAndDropUse(size_t LUIdx, size_t LastLUIdx) {
+RegUseTracker::swapAndDropUse(size_t LUIdx, size_t LastLUIdx) {
assert(LUIdx <= LastLUIdx);
// Update RegUses. The data structure is not optimized for this purpose;
@@ -219,9 +240,8 @@ void RegUseTracker::clear() {
namespace {
-/// Formula - This class holds information that describes a formula for
-/// computing satisfying a use. It may include broken-out immediates and scaled
-/// registers.
+/// This class holds information that describes a formula for computing
+/// satisfying a use. It may include broken-out immediates and scaled registers.
struct Formula {
/// Global base address used for complex addressing.
GlobalValue *BaseGV;
@@ -235,8 +255,8 @@ struct Formula {
/// The scale of any complex addressing.
int64_t Scale;
- /// BaseRegs - The list of "base" registers for this use. When this is
- /// non-empty. The canonical representation of a formula is
+ /// The list of "base" registers for this use. When this is non-empty. The
+ /// canonical representation of a formula is
/// 1. BaseRegs.size > 1 implies ScaledReg != NULL and
/// 2. ScaledReg != NULL implies Scale != 1 || !BaseRegs.empty().
/// #1 enforces that the scaled register is always used when at least two
@@ -247,31 +267,31 @@ struct Formula {
/// form.
SmallVector<const SCEV *, 4> BaseRegs;
- /// ScaledReg - The 'scaled' register for this use. This should be non-null
- /// when Scale is not zero.
+ /// The 'scaled' register for this use. This should be non-null when Scale is
+ /// not zero.
const SCEV *ScaledReg;
- /// UnfoldedOffset - An additional constant offset which added near the
- /// use. This requires a temporary register, but the offset itself can
- /// live in an add immediate field rather than a register.
+ /// An additional constant offset which added near the use. This requires a
+ /// temporary register, but the offset itself can live in an add immediate
+ /// field rather than a register.
int64_t UnfoldedOffset;
Formula()
: BaseGV(nullptr), BaseOffset(0), HasBaseReg(false), Scale(0),
ScaledReg(nullptr), UnfoldedOffset(0) {}
- void InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);
+ void initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);
bool isCanonical() const;
- void Canonicalize();
+ void canonicalize();
- bool Unscale();
+ bool unscale();
size_t getNumRegs() const;
Type *getType() const;
- void DeleteBaseReg(const SCEV *&S);
+ void deleteBaseReg(const SCEV *&S);
bool referencesReg(const SCEV *S) const;
bool hasRegsUsedByUsesOtherThan(size_t LUIdx,
@@ -283,7 +303,7 @@ struct Formula {
}
-/// DoInitialMatch - Recursion helper for InitialMatch.
+/// Recursion helper for initialMatch.
static void DoInitialMatch(const SCEV *S, Loop *L,
SmallVectorImpl<const SCEV *> &Good,
SmallVectorImpl<const SCEV *> &Bad,
@@ -336,10 +356,9 @@ static void DoInitialMatch(const SCEV *S, Loop *L,
Bad.push_back(S);
}
-/// InitialMatch - Incorporate loop-variant parts of S into this Formula,
-/// attempting to keep all loop-invariant and loop-computable values in a
-/// single base register.
-void Formula::InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) {
+/// Incorporate loop-variant parts of S into this Formula, attempting to keep
+/// all loop-invariant and loop-computable values in a single base register.
+void Formula::initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) {
SmallVector<const SCEV *, 4> Good;
SmallVector<const SCEV *, 4> Bad;
DoInitialMatch(S, L, Good, Bad, SE);
@@ -355,7 +374,7 @@ void Formula::InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) {
BaseRegs.push_back(Sum);
HasBaseReg = true;
}
- Canonicalize();
+ canonicalize();
}
/// \brief Check whether or not this formula statisfies the canonical
@@ -373,7 +392,7 @@ bool Formula::isCanonical() const {
/// field. Otherwise, we would have to do special cases everywhere in LSR
/// to treat reg1 + reg2 + ... the same way as reg1 + 1*reg2 + ...
/// On the other hand, 1*reg should be canonicalized into reg.
-void Formula::Canonicalize() {
+void Formula::canonicalize() {
if (isCanonical())
return;
// So far we did not need this case. This is easy to implement but it is
@@ -394,7 +413,7 @@ void Formula::Canonicalize() {
/// In other words, this method morphes reg1 + 1*reg2 into reg1 + reg2.
/// \return true if it was possible to get rid of the scale, false otherwise.
/// \note After this operation the formula may not be in the canonical form.
-bool Formula::Unscale() {
+bool Formula::unscale() {
if (Scale != 1)
return false;
Scale = 0;
@@ -403,15 +422,14 @@ bool Formula::Unscale() {
return true;
}
-/// getNumRegs - Return the total number of register operands used by this
-/// formula. This does not include register uses implied by non-constant
-/// addrec strides.
+/// Return the total number of register operands used by this formula. This does
+/// not include register uses implied by non-constant addrec strides.
size_t Formula::getNumRegs() const {
return !!ScaledReg + BaseRegs.size();
}
-/// getType - Return the type of this formula, if it has one, or null
-/// otherwise. This type is meaningless except for the bit size.
+/// Return the type of this formula, if it has one, or null otherwise. This type
+/// is meaningless except for the bit size.
Type *Formula::getType() const {
return !BaseRegs.empty() ? BaseRegs.front()->getType() :
ScaledReg ? ScaledReg->getType() :
@@ -419,21 +437,21 @@ Type *Formula::getType() const {
nullptr;
}
-/// DeleteBaseReg - Delete the given base reg from the BaseRegs list.
-void Formula::DeleteBaseReg(const SCEV *&S) {
+/// Delete the given base reg from the BaseRegs list.
+void Formula::deleteBaseReg(const SCEV *&S) {
if (&S != &BaseRegs.back())
std::swap(S, BaseRegs.back());
BaseRegs.pop_back();
}
-/// referencesReg - Test if this formula references the given register.
+/// Test if this formula references the given register.
bool Formula::referencesReg(const SCEV *S) const {
return S == ScaledReg ||
std::find(BaseRegs.begin(), BaseRegs.end(), S) != BaseRegs.end();
}
-/// hasRegsUsedByUsesOtherThan - Test whether this formula uses registers
-/// which are used by uses other than the use with the given index.
+/// Test whether this formula uses registers which are used by uses other than
+/// the use with the given index.
bool Formula::hasRegsUsedByUsesOtherThan(size_t LUIdx,
const RegUseTracker &RegUses) const {
if (ScaledReg)
@@ -481,30 +499,29 @@ void Formula::print(raw_ostream &OS) const {
}
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
void Formula::dump() const {
print(errs()); errs() << '\n';
}
-#endif
-/// isAddRecSExtable - Return true if the given addrec can be sign-extended
-/// without changing its value.
+/// Return true if the given addrec can be sign-extended without changing its
+/// value.
static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
Type *WideTy =
IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(AR->getType()) + 1);
return isa<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy));
}
-/// isAddSExtable - Return true if the given add can be sign-extended
-/// without changing its value.
+/// Return true if the given add can be sign-extended without changing its
+/// value.
static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) {
Type *WideTy =
IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(A->getType()) + 1);
return isa<SCEVAddExpr>(SE.getSignExtendExpr(A, WideTy));
}
-/// isMulSExtable - Return true if the given mul can be sign-extended
-/// without changing its value.
+/// Return true if the given mul can be sign-extended without changing its
+/// value.
static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE) {
Type *WideTy =
IntegerType::get(SE.getContext(),
@@ -512,12 +529,11 @@ static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE) {
return isa<SCEVMulExpr>(SE.getSignExtendExpr(M, WideTy));
}
-/// getExactSDiv - Return an expression for LHS /s RHS, if it can be determined
-/// and if the remainder is known to be zero, or null otherwise. If
-/// IgnoreSignificantBits is true, expressions like (X * Y) /s Y are simplified
-/// to Y, ignoring that the multiplication may overflow, which is useful when
-/// the result will be used in a context where the most significant bits are
-/// ignored.
+/// Return an expression for LHS /s RHS, if it can be determined and if the
+/// remainder is known to be zero, or null otherwise. If IgnoreSignificantBits
+/// is true, expressions like (X * Y) /s Y are simplified to Y, ignoring that
+/// the multiplication may overflow, which is useful when the result will be
+/// used in a context where the most significant bits are ignored.
static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
ScalarEvolution &SE,
bool IgnoreSignificantBits = false) {
@@ -528,7 +544,7 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
// Handle a few RHS special cases.
const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS);
if (RC) {
- const APInt &RA = RC->getValue()->getValue();
+ const APInt &RA = RC->getAPInt();
// Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do
// some folding.
if (RA.isAllOnesValue())
@@ -542,8 +558,8 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(LHS)) {
if (!RC)
return nullptr;
- const APInt &LA = C->getValue()->getValue();
- const APInt &RA = RC->getValue()->getValue();
+ const APInt &LA = C->getAPInt();
+ const APInt &RA = RC->getAPInt();
if (LA.srem(RA) != 0)
return nullptr;
return SE.getConstant(LA.sdiv(RA));
@@ -603,12 +619,11 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
return nullptr;
}
-/// ExtractImmediate - If S involves the addition of a constant integer value,
-/// return that integer value, and mutate S to point to a new SCEV with that
-/// value excluded.
+/// If S involves the addition of a constant integer value, return that integer
+/// value, and mutate S to point to a new SCEV with that value excluded.
static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
- if (C->getValue()->getValue().getMinSignedBits() <= 64) {
+ if (C->getAPInt().getMinSignedBits() <= 64) {
S = SE.getConstant(C->getType(), 0);
return C->getValue()->getSExtValue();
}
@@ -630,9 +645,8 @@ static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {
return 0;
}
-/// ExtractSymbol - If S involves the addition of a GlobalValue address,
-/// return that symbol, and mutate S to point to a new SCEV with that
-/// value excluded.
+/// If S involves the addition of a GlobalValue address, return that symbol, and
+/// mutate S to point to a new SCEV with that value excluded.
static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {
if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) {
@@ -657,8 +671,8 @@ static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {
return nullptr;
}
-/// isAddressUse - Returns true if the specified instruction is using the
-/// specified value as an address.
+/// Returns true if the specified instruction is using the specified value as an
+/// address.
static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
bool isAddress = isa<LoadInst>(Inst);
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
@@ -682,12 +696,15 @@ static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
return isAddress;
}
-/// getAccessType - Return the type of the memory being accessed.
-static Type *getAccessType(const Instruction *Inst) {
- Type *AccessTy = Inst->getType();
- if (const StoreInst *SI = dyn_cast<StoreInst>(Inst))
- AccessTy = SI->getOperand(0)->getType();
- else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+/// Return the type of the memory being accessed.
+static MemAccessTy getAccessType(const Instruction *Inst) {
+ MemAccessTy AccessTy(Inst->getType(), MemAccessTy::UnknownAddressSpace);
+ if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ AccessTy.MemTy = SI->getOperand(0)->getType();
+ AccessTy.AddrSpace = SI->getPointerAddressSpace();
+ } else if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+ AccessTy.AddrSpace = LI->getPointerAddressSpace();
+ } else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
// Addressing modes can also be folded into prefetches and a variety
// of intrinsics.
switch (II->getIntrinsicID()) {
@@ -696,21 +713,21 @@ static Type *getAccessType(const Instruction *Inst) {
case Intrinsic::x86_sse2_storeu_pd:
case Intrinsic::x86_sse2_storeu_dq:
case Intrinsic::x86_sse2_storel_dq:
- AccessTy = II->getArgOperand(0)->getType();
+ AccessTy.MemTy = II->getArgOperand(0)->getType();
break;
}
}
// All pointers have the same requirements, so canonicalize them to an
// arbitrary pointer type to minimize variation.
- if (PointerType *PTy = dyn_cast<PointerType>(AccessTy))
- AccessTy = PointerType::get(IntegerType::get(PTy->getContext(), 1),
- PTy->getAddressSpace());
+ if (PointerType *PTy = dyn_cast<PointerType>(AccessTy.MemTy))
+ AccessTy.MemTy = PointerType::get(IntegerType::get(PTy->getContext(), 1),
+ PTy->getAddressSpace());
return AccessTy;
}
-/// isExistingPhi - Return true if this AddRec is already a phi in its loop.
+/// Return true if this AddRec is already a phi in its loop.
static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin();
PHINode *PN = dyn_cast<PHINode>(I); ++I) {
@@ -793,9 +810,8 @@ static bool isHighCostExpansion(const SCEV *S,
return true;
}
-/// DeleteTriviallyDeadInstructions - If any of the instructions is the
-/// specified set are trivially dead, delete them and see if this makes any of
-/// their operands subsequently dead.
+/// If any of the instructions is the specified set are trivially dead, delete
+/// them and see if this makes any of their operands subsequently dead.
static bool
DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) {
bool Changed = false;
@@ -842,7 +858,7 @@ static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
namespace {
-/// Cost - This class is used to measure and compare candidate formulae.
+/// This class is used to measure and compare candidate formulae.
class Cost {
/// TODO: Some of these could be merged. Also, a lexical ordering
/// isn't always optimal.
@@ -905,7 +921,7 @@ private:
}
-/// RateRegister - Tally up interesting quantities from the given register.
+/// Tally up interesting quantities from the given register.
void Cost::RateRegister(const SCEV *Reg,
SmallPtrSetImpl<const SCEV *> &Regs,
const Loop *L,
@@ -951,9 +967,9 @@ void Cost::RateRegister(const SCEV *Reg,
SE.hasComputableLoopEvolution(Reg, L);
}
-/// RatePrimaryRegister - Record this register in the set. If we haven't seen it
-/// before, rate it. Optional LoserRegs provides a way to declare any formula
-/// that refers to one of those regs an instant loser.
+/// Record this register in the set. If we haven't seen it before, rate
+/// it. Optional LoserRegs provides a way to declare any formula that refers to
+/// one of those regs an instant loser.
void Cost::RatePrimaryRegister(const SCEV *Reg,
SmallPtrSetImpl<const SCEV *> &Regs,
const Loop *L,
@@ -1024,7 +1040,7 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
assert(isValid() && "invalid cost");
}
-/// Lose - Set this cost to a losing value.
+/// Set this cost to a losing value.
void Cost::Lose() {
NumRegs = ~0u;
AddRecCost = ~0u;
@@ -1035,7 +1051,7 @@ void Cost::Lose() {
ScaleCost = ~0u;
}
-/// operator< - Choose the lower cost.
+/// Choose the lower cost.
bool Cost::operator<(const Cost &Other) const {
return std::tie(NumRegs, AddRecCost, NumIVMuls, NumBaseAdds, ScaleCost,
ImmCost, SetupCost) <
@@ -1061,37 +1077,35 @@ void Cost::print(raw_ostream &OS) const {
OS << ", plus " << SetupCost << " setup cost";
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
void Cost::dump() const {
print(errs()); errs() << '\n';
}
-#endif
namespace {
-/// LSRFixup - An operand value in an instruction which is to be replaced
-/// with some equivalent, possibly strength-reduced, replacement.
+/// An operand value in an instruction which is to be replaced with some
+/// equivalent, possibly strength-reduced, replacement.
struct LSRFixup {
- /// UserInst - The instruction which will be updated.
+ /// The instruction which will be updated.
Instruction *UserInst;
- /// OperandValToReplace - The operand of the instruction which will
- /// be replaced. The operand may be used more than once; every instance
- /// will be replaced.
+ /// The operand of the instruction which will be replaced. The operand may be
+ /// used more than once; every instance will be replaced.
Value *OperandValToReplace;
- /// PostIncLoops - If this user is to use the post-incremented value of an
- /// induction variable, this variable is non-null and holds the loop
- /// associated with the induction variable.
+ /// If this user is to use the post-incremented value of an induction
+ /// variable, this variable is non-null and holds the loop associated with the
+ /// induction variable.
PostIncLoopSet PostIncLoops;
- /// LUIdx - The index of the LSRUse describing the expression which
- /// this fixup needs, minus an offset (below).
+ /// The index of the LSRUse describing the expression which this fixup needs,
+ /// minus an offset (below).
size_t LUIdx;
- /// Offset - A constant offset to be added to the LSRUse expression.
- /// This allows multiple fixups to share the same LSRUse with different
- /// offsets, for example in an unrolled loop.
+ /// A constant offset to be added to the LSRUse expression. This allows
+ /// multiple fixups to share the same LSRUse with different offsets, for
+ /// example in an unrolled loop.
int64_t Offset;
bool isUseFullyOutsideLoop(const Loop *L) const;
@@ -1108,8 +1122,7 @@ LSRFixup::LSRFixup()
: UserInst(nullptr), OperandValToReplace(nullptr), LUIdx(~size_t(0)),
Offset(0) {}
-/// isUseFullyOutsideLoop - Test whether this fixup always uses its
-/// value outside of the given loop.
+/// Test whether this fixup always uses its value outside of the given loop.
bool LSRFixup::isUseFullyOutsideLoop(const Loop *L) const {
// PHI nodes use their value in their incoming blocks.
if (const PHINode *PN = dyn_cast<PHINode>(UserInst)) {
@@ -1149,16 +1162,15 @@ void LSRFixup::print(raw_ostream &OS) const {
OS << ", Offset=" << Offset;
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
void LSRFixup::dump() const {
print(errs()); errs() << '\n';
}
-#endif
namespace {
-/// UniquifierDenseMapInfo - A DenseMapInfo implementation for holding
-/// DenseMaps and DenseSets of sorted SmallVectors of const SCEV*.
+/// A DenseMapInfo implementation for holding DenseMaps and DenseSets of sorted
+/// SmallVectors of const SCEV*.
struct UniquifierDenseMapInfo {
static SmallVector<const SCEV *, 4> getEmptyKey() {
SmallVector<const SCEV *, 4> V;
@@ -1182,17 +1194,17 @@ struct UniquifierDenseMapInfo {
}
};
-/// LSRUse - This class holds the state that LSR keeps for each use in
-/// IVUsers, as well as uses invented by LSR itself. It includes information
-/// about what kinds of things can be folded into the user, information about
-/// the user itself, and information about how the use may be satisfied.
-/// TODO: Represent multiple users of the same expression in common?
+/// This class holds the state that LSR keeps for each use in IVUsers, as well
+/// as uses invented by LSR itself. It includes information about what kinds of
+/// things can be folded into the user, information about the user itself, and
+/// information about how the use may be satisfied. TODO: Represent multiple
+/// users of the same expression in common?
class LSRUse {
DenseSet<SmallVector<const SCEV *, 4>, UniquifierDenseMapInfo> Uniquifier;
public:
- /// KindType - An enum for a kind of use, indicating what types of
- /// scaled and immediate operands it might support.
+ /// An enum for a kind of use, indicating what types of scaled and immediate
+ /// operands it might support.
enum KindType {
Basic, ///< A normal use, with no folding.
Special, ///< A special case of basic, allowing -1 scales.
@@ -1204,15 +1216,14 @@ public:
typedef PointerIntPair<const SCEV *, 2, KindType> SCEVUseKindPair;
KindType Kind;
- Type *AccessTy;
+ MemAccessTy AccessTy;
SmallVector<int64_t, 8> Offsets;
int64_t MinOffset;
int64_t MaxOffset;
- /// AllFixupsOutsideLoop - This records whether all of the fixups using this
- /// LSRUse are outside of the loop, in which case some special-case heuristics
- /// may be used.
+ /// This records whether all of the fixups using this LSRUse are outside of
+ /// the loop, in which case some special-case heuristics may be used.
bool AllFixupsOutsideLoop;
/// RigidFormula is set to true to guarantee that this use will be associated
@@ -1222,26 +1233,24 @@ public:
/// changing the formula.
bool RigidFormula;
- /// WidestFixupType - This records the widest use type for any fixup using
- /// this LSRUse. FindUseWithSimilarFormula can't consider uses with different
- /// max fixup widths to be equivalent, because the narrower one may be relying
- /// on the implicit truncation to truncate away bogus bits.
+ /// This records the widest use type for any fixup using this
+ /// LSRUse. FindUseWithSimilarFormula can't consider uses with different max
+ /// fixup widths to be equivalent, because the narrower one may be relying on
+ /// the implicit truncation to truncate away bogus bits.
Type *WidestFixupType;
- /// Formulae - A list of ways to build a value that can satisfy this user.
- /// After the list is populated, one of these is selected heuristically and
- /// used to formulate a replacement for OperandValToReplace in UserInst.
+ /// A list of ways to build a value that can satisfy this user. After the
+ /// list is populated, one of these is selected heuristically and used to
+ /// formulate a replacement for OperandValToReplace in UserInst.
SmallVector<Formula, 12> Formulae;
- /// Regs - The set of register candidates used by all formulae in this LSRUse.
+ /// The set of register candidates used by all formulae in this LSRUse.
SmallPtrSet<const SCEV *, 4> Regs;
- LSRUse(KindType K, Type *T) : Kind(K), AccessTy(T),
- MinOffset(INT64_MAX),
- MaxOffset(INT64_MIN),
- AllFixupsOutsideLoop(true),
- RigidFormula(false),
- WidestFixupType(nullptr) {}
+ LSRUse(KindType K, MemAccessTy AT)
+ : Kind(K), AccessTy(AT), MinOffset(INT64_MAX), MaxOffset(INT64_MIN),
+ AllFixupsOutsideLoop(true), RigidFormula(false),
+ WidestFixupType(nullptr) {}
bool HasFormulaWithSameRegs(const Formula &F) const;
bool InsertFormula(const Formula &F);
@@ -1254,8 +1263,8 @@ public:
}
-/// HasFormula - Test whether this use as a formula which has the same
-/// registers as the given formula.
+/// Test whether this use as a formula which has the same registers as the given
+/// formula.
bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
SmallVector<const SCEV *, 4> Key = F.BaseRegs;
if (F.ScaledReg) Key.push_back(F.ScaledReg);
@@ -1264,9 +1273,8 @@ bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
return Uniquifier.count(Key);
}
-/// InsertFormula - If the given formula has not yet been inserted, add it to
-/// the list, and return true. Return false otherwise.
-/// The formula must be in canonical form.
+/// If the given formula has not yet been inserted, add it to the list, and
+/// return true. Return false otherwise. The formula must be in canonical form.
bool LSRUse::InsertFormula(const Formula &F) {
assert(F.isCanonical() && "Invalid canonical representation");
@@ -1300,14 +1308,14 @@ bool LSRUse::InsertFormula(const Formula &F) {
return true;
}
-/// DeleteFormula - Remove the given formula from this use's list.
+/// Remove the given formula from this use's list.
void LSRUse::DeleteFormula(Formula &F) {
if (&F != &Formulae.back())
std::swap(F, Formulae.back());
Formulae.pop_back();
}
-/// RecomputeRegs - Recompute the Regs field, and update RegUses.
+/// Recompute the Regs field, and update RegUses.
void LSRUse::RecomputeRegs(size_t LUIdx, RegUseTracker &RegUses) {
// Now that we've filtered out some formulae, recompute the Regs set.
SmallPtrSet<const SCEV *, 4> OldRegs = std::move(Regs);
@@ -1320,7 +1328,7 @@ void LSRUse::RecomputeRegs(size_t LUIdx, RegUseTracker &RegUses) {
// Update the RegTracker.
for (const SCEV *S : OldRegs)
if (!Regs.count(S))
- RegUses.DropRegister(S, LUIdx);
+ RegUses.dropRegister(S, LUIdx);
}
void LSRUse::print(raw_ostream &OS) const {
@@ -1331,10 +1339,13 @@ void LSRUse::print(raw_ostream &OS) const {
case ICmpZero: OS << "ICmpZero"; break;
case Address:
OS << "Address of ";
- if (AccessTy->isPointerTy())
+ if (AccessTy.MemTy->isPointerTy())
OS << "pointer"; // the full pointer type could be really verbose
- else
- OS << *AccessTy;
+ else {
+ OS << *AccessTy.MemTy;
+ }
+
+ OS << " in addrspace(" << AccessTy.AddrSpace << ')';
}
OS << ", Offsets={";
@@ -1353,19 +1364,19 @@ void LSRUse::print(raw_ostream &OS) const {
OS << ", widest fixup type: " << *WidestFixupType;
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
void LSRUse::dump() const {
print(errs()); errs() << '\n';
}
-#endif
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
- LSRUse::KindType Kind, Type *AccessTy,
+ LSRUse::KindType Kind, MemAccessTy AccessTy,
GlobalValue *BaseGV, int64_t BaseOffset,
bool HasBaseReg, int64_t Scale) {
switch (Kind) {
case LSRUse::Address:
- return TTI.isLegalAddressingMode(AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale);
+ return TTI.isLegalAddressingMode(AccessTy.MemTy, BaseGV, BaseOffset,
+ HasBaseReg, Scale, AccessTy.AddrSpace);
case LSRUse::ICmpZero:
// There's not even a target hook for querying whether it would be legal to
@@ -1412,7 +1423,7 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
int64_t MinOffset, int64_t MaxOffset,
- LSRUse::KindType Kind, Type *AccessTy,
+ LSRUse::KindType Kind, MemAccessTy AccessTy,
GlobalValue *BaseGV, int64_t BaseOffset,
bool HasBaseReg, int64_t Scale) {
// Check for overflow.
@@ -1433,7 +1444,7 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
int64_t MinOffset, int64_t MaxOffset,
- LSRUse::KindType Kind, Type *AccessTy,
+ LSRUse::KindType Kind, MemAccessTy AccessTy,
const Formula &F) {
// For the purpose of isAMCompletelyFolded either having a canonical formula
// or a scale not equal to zero is correct.
@@ -1447,11 +1458,11 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
F.BaseGV, F.BaseOffset, F.HasBaseReg, F.Scale);
}
-/// isLegalUse - Test whether we know how to expand the current formula.
+/// Test whether we know how to expand the current formula.
static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
- int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy,
- GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg,
- int64_t Scale) {
+ int64_t MaxOffset, LSRUse::KindType Kind,
+ MemAccessTy AccessTy, GlobalValue *BaseGV,
+ int64_t BaseOffset, bool HasBaseReg, int64_t Scale) {
// We know how to expand completely foldable formulae.
return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
BaseOffset, HasBaseReg, Scale) ||
@@ -1463,8 +1474,8 @@ static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
}
static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
- int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy,
- const Formula &F) {
+ int64_t MaxOffset, LSRUse::KindType Kind,
+ MemAccessTy AccessTy, const Formula &F) {
return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, F.BaseGV,
F.BaseOffset, F.HasBaseReg, F.Scale);
}
@@ -1490,14 +1501,12 @@ static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
switch (LU.Kind) {
case LSRUse::Address: {
// Check the scaling factor cost with both the min and max offsets.
- int ScaleCostMinOffset =
- TTI.getScalingFactorCost(LU.AccessTy, F.BaseGV,
- F.BaseOffset + LU.MinOffset,
- F.HasBaseReg, F.Scale);
- int ScaleCostMaxOffset =
- TTI.getScalingFactorCost(LU.AccessTy, F.BaseGV,
- F.BaseOffset + LU.MaxOffset,
- F.HasBaseReg, F.Scale);
+ int ScaleCostMinOffset = TTI.getScalingFactorCost(
+ LU.AccessTy.MemTy, F.BaseGV, F.BaseOffset + LU.MinOffset, F.HasBaseReg,
+ F.Scale, LU.AccessTy.AddrSpace);
+ int ScaleCostMaxOffset = TTI.getScalingFactorCost(
+ LU.AccessTy.MemTy, F.BaseGV, F.BaseOffset + LU.MaxOffset, F.HasBaseReg,
+ F.Scale, LU.AccessTy.AddrSpace);
assert(ScaleCostMinOffset >= 0 && ScaleCostMaxOffset >= 0 &&
"Legal addressing mode has an illegal cost!");
@@ -1515,7 +1524,7 @@ static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
}
static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
- LSRUse::KindType Kind, Type *AccessTy,
+ LSRUse::KindType Kind, MemAccessTy AccessTy,
GlobalValue *BaseGV, int64_t BaseOffset,
bool HasBaseReg) {
// Fast-path: zero is always foldable.
@@ -1539,7 +1548,8 @@ static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
ScalarEvolution &SE, int64_t MinOffset,
int64_t MaxOffset, LSRUse::KindType Kind,
- Type *AccessTy, const SCEV *S, bool HasBaseReg) {
+ MemAccessTy AccessTy, const SCEV *S,
+ bool HasBaseReg) {
// Fast-path: zero is always foldable.
if (S->isZero()) return true;
@@ -1564,9 +1574,9 @@ static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
namespace {
-/// IVInc - An individual increment in a Chain of IV increments.
-/// Relate an IV user to an expression that computes the IV it uses from the IV
-/// used by the previous link in the Chain.
+/// An individual increment in a Chain of IV increments. Relate an IV user to
+/// an expression that computes the IV it uses from the IV used by the previous
+/// link in the Chain.
///
/// For the head of a chain, IncExpr holds the absolute SCEV expression for the
/// original IVOperand. The head of the chain's IVOperand is only valid during
@@ -1582,8 +1592,8 @@ struct IVInc {
UserInst(U), IVOperand(O), IncExpr(E) {}
};
-// IVChain - The list of IV increments in program order.
-// We typically add the head of a chain without finding subsequent links.
+// The list of IV increments in program order. We typically add the head of a
+// chain without finding subsequent links.
struct IVChain {
SmallVector<IVInc,1> Incs;
const SCEV *ExprBase;
@@ -1595,7 +1605,7 @@ struct IVChain {
typedef SmallVectorImpl<IVInc>::const_iterator const_iterator;
- // begin - return the first increment in the chain.
+ // Return the first increment in the chain.
const_iterator begin() const {
assert(!Incs.empty());
return std::next(Incs.begin());
@@ -1604,32 +1614,30 @@ struct IVChain {
return Incs.end();
}
- // hasIncs - Returns true if this chain contains any increments.
+ // Returns true if this chain contains any increments.
bool hasIncs() const { return Incs.size() >= 2; }
- // add - Add an IVInc to the end of this chain.
+ // Add an IVInc to the end of this chain.
void add(const IVInc &X) { Incs.push_back(X); }
- // tailUserInst - Returns the last UserInst in the chain.
+ // Returns the last UserInst in the chain.
Instruction *tailUserInst() const { return Incs.back().UserInst; }
- // isProfitableIncrement - Returns true if IncExpr can be profitably added to
- // this chain.
+ // Returns true if IncExpr can be profitably added to this chain.
bool isProfitableIncrement(const SCEV *OperExpr,
const SCEV *IncExpr,
ScalarEvolution&);
};
-/// ChainUsers - Helper for CollectChains to track multiple IV increment uses.
-/// Distinguish between FarUsers that definitely cross IV increments and
-/// NearUsers that may be used between IV increments.
+/// Helper for CollectChains to track multiple IV increment uses. Distinguish
+/// between FarUsers that definitely cross IV increments and NearUsers that may
+/// be used between IV increments.
struct ChainUsers {
SmallPtrSet<Instruction*, 4> FarUsers;
SmallPtrSet<Instruction*, 4> NearUsers;
};
-/// LSRInstance - This class holds state for the main loop strength reduction
-/// logic.
+/// This class holds state for the main loop strength reduction logic.
class LSRInstance {
IVUsers &IU;
ScalarEvolution &SE;
@@ -1639,25 +1647,25 @@ class LSRInstance {
Loop *const L;
bool Changed;
- /// IVIncInsertPos - This is the insert position that the current loop's
- /// induction variable increment should be placed. In simple loops, this is
- /// the latch block's terminator. But in more complicated cases, this is a
- /// position which will dominate all the in-loop post-increment users.
+ /// This is the insert position that the current loop's induction variable
+ /// increment should be placed. In simple loops, this is the latch block's
+ /// terminator. But in more complicated cases, this is a position which will
+ /// dominate all the in-loop post-increment users.
Instruction *IVIncInsertPos;
- /// Factors - Interesting factors between use strides.
+ /// Interesting factors between use strides.
SmallSetVector<int64_t, 8> Factors;
- /// Types - Interesting use types, to facilitate truncation reuse.
+ /// Interesting use types, to facilitate truncation reuse.
SmallSetVector<Type *, 4> Types;
- /// Fixups - The list of operands which are to be replaced.
+ /// The list of operands which are to be replaced.
SmallVector<LSRFixup, 16> Fixups;
- /// Uses - The list of interesting uses.
+ /// The list of interesting uses.
SmallVector<LSRUse, 16> Uses;
- /// RegUses - Track which uses use which register candidates.
+ /// Track which uses use which register candidates.
RegUseTracker RegUses;
// Limit the number of chains to avoid quadratic behavior. We don't expect to
@@ -1665,10 +1673,10 @@ class LSRInstance {
// back to normal LSR behavior for those uses.
static const unsigned MaxChains = 8;
- /// IVChainVec - IV users can form a chain of IV increments.
+ /// IV users can form a chain of IV increments.
SmallVector<IVChain, MaxChains> IVChainVec;
- /// IVIncSet - IV users that belong to profitable IVChains.
+ /// IV users that belong to profitable IVChains.
SmallPtrSet<Use*, MaxChains> IVIncSet;
void OptimizeShadowIV();
@@ -1696,11 +1704,10 @@ class LSRInstance {
UseMapTy UseMap;
bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
- LSRUse::KindType Kind, Type *AccessTy);
+ LSRUse::KindType Kind, MemAccessTy AccessTy);
- std::pair<size_t, int64_t> getUse(const SCEV *&Expr,
- LSRUse::KindType Kind,
- Type *AccessTy);
+ std::pair<size_t, int64_t> getUse(const SCEV *&Expr, LSRUse::KindType Kind,
+ MemAccessTy AccessTy);
void DeleteUse(LSRUse &LU, size_t LUIdx);
@@ -1769,18 +1776,16 @@ class LSRInstance {
void RewriteForPHI(PHINode *PN, const LSRFixup &LF,
const Formula &F,
SCEVExpander &Rewriter,
- SmallVectorImpl<WeakVH> &DeadInsts,
- Pass *P) const;
+ SmallVectorImpl<WeakVH> &DeadInsts) const;
void Rewrite(const LSRFixup &LF,
const Formula &F,
SCEVExpander &Rewriter,
- SmallVectorImpl<WeakVH> &DeadInsts,
- Pass *P) const;
- void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
- Pass *P);
+ SmallVectorImpl<WeakVH> &DeadInsts) const;
+ void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution);
public:
- LSRInstance(Loop *L, Pass *P);
+ LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT,
+ LoopInfo &LI, const TargetTransformInfo &TTI);
bool getChanged() const { return Changed; }
@@ -1793,8 +1798,8 @@ public:
}
-/// OptimizeShadowIV - If IV is used in a int-to-float cast
-/// inside the loop then try to eliminate the cast operation.
+/// If IV is used in a int-to-float cast inside the loop then try to eliminate
+/// the cast operation.
void LSRInstance::OptimizeShadowIV() {
const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
@@ -1902,9 +1907,8 @@ void LSRInstance::OptimizeShadowIV() {
}
}
-/// FindIVUserForCond - If Cond has an operand that is an expression of an IV,
-/// set the IV user and stride information and return true, otherwise return
-/// false.
+/// If Cond has an operand that is an expression of an IV, set the IV user and
+/// stride information and return true, otherwise return false.
bool LSRInstance::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse) {
for (IVStrideUse &U : IU)
if (U.getUser() == Cond) {
@@ -1917,8 +1921,7 @@ bool LSRInstance::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse) {
return false;
}
-/// OptimizeMax - Rewrite the loop's terminating condition if it uses
-/// a max computation.
+/// Rewrite the loop's terminating condition if it uses a max computation.
///
/// This is a narrow solution to a specific, but acute, problem. For loops
/// like this:
@@ -2076,8 +2079,7 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
return NewCond;
}
-/// OptimizeLoopTermCond - Change loop terminating condition to use the
-/// postinc iv when possible.
+/// Change loop terminating condition to use the postinc iv when possible.
void
LSRInstance::OptimizeLoopTermCond() {
SmallPtrSet<Instruction *, 4> PostIncs;
@@ -2152,16 +2154,18 @@ LSRInstance::OptimizeLoopTermCond() {
C->getValue().isMinSignedValue())
goto decline_post_inc;
// Check for possible scaled-address reuse.
- Type *AccessTy = getAccessType(UI->getUser());
+ MemAccessTy AccessTy = getAccessType(UI->getUser());
int64_t Scale = C->getSExtValue();
- if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ nullptr,
- /*BaseOffset=*/ 0,
- /*HasBaseReg=*/ false, Scale))
+ if (TTI.isLegalAddressingMode(AccessTy.MemTy, /*BaseGV=*/nullptr,
+ /*BaseOffset=*/0,
+ /*HasBaseReg=*/false, Scale,
+ AccessTy.AddrSpace))
goto decline_post_inc;
Scale = -Scale;
- if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ nullptr,
- /*BaseOffset=*/ 0,
- /*HasBaseReg=*/ false, Scale))
+ if (TTI.isLegalAddressingMode(AccessTy.MemTy, /*BaseGV=*/nullptr,
+ /*BaseOffset=*/0,
+ /*HasBaseReg=*/false, Scale,
+ AccessTy.AddrSpace))
goto decline_post_inc;
}
}
@@ -2180,7 +2184,7 @@ LSRInstance::OptimizeLoopTermCond() {
ICmpInst *OldCond = Cond;
Cond = cast<ICmpInst>(Cond->clone());
Cond->setName(L->getHeader()->getName() + ".termcond");
- ExitingBlock->getInstList().insert(TermBr, Cond);
+ ExitingBlock->getInstList().insert(TermBr->getIterator(), Cond);
// Clone the IVUse, as the old use still exists!
CondUse = &IU.AddUser(Cond, CondUse->getOperandValToReplace());
@@ -2213,15 +2217,14 @@ LSRInstance::OptimizeLoopTermCond() {
}
}
-/// reconcileNewOffset - Determine if the given use can accommodate a fixup
-/// at the given offset and other details. If so, update the use and
-/// return true.
-bool
-LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
- LSRUse::KindType Kind, Type *AccessTy) {
+/// Determine if the given use can accommodate a fixup at the given offset and
+/// other details. If so, update the use and return true.
+bool LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset,
+ bool HasBaseReg, LSRUse::KindType Kind,
+ MemAccessTy AccessTy) {
int64_t NewMinOffset = LU.MinOffset;
int64_t NewMaxOffset = LU.MaxOffset;
- Type *NewAccessTy = AccessTy;
+ MemAccessTy NewAccessTy = AccessTy;
// Check for a mismatched kind. It's tempting to collapse mismatched kinds to
// something conservative, however this can pessimize in the case that one of
@@ -2232,8 +2235,10 @@ LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
// Check for a mismatched access type, and fall back conservatively as needed.
// TODO: Be less conservative when the type is similar and can use the same
// addressing modes.
- if (Kind == LSRUse::Address && AccessTy != LU.AccessTy)
- NewAccessTy = Type::getVoidTy(AccessTy->getContext());
+ if (Kind == LSRUse::Address) {
+ if (AccessTy != LU.AccessTy)
+ NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->getContext());
+ }
// Conservatively assume HasBaseReg is true for now.
if (NewOffset < LU.MinOffset) {
@@ -2257,12 +2262,12 @@ LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
return true;
}
-/// getUse - Return an LSRUse index and an offset value for a fixup which
-/// needs the given expression, with the given kind and optional access type.
-/// Either reuse an existing use or create a new one, as needed.
-std::pair<size_t, int64_t>
-LSRInstance::getUse(const SCEV *&Expr,
- LSRUse::KindType Kind, Type *AccessTy) {
+/// Return an LSRUse index and an offset value for a fixup which needs the given
+/// expression, with the given kind and optional access type. Either reuse an
+/// existing use or create a new one, as needed.
+std::pair<size_t, int64_t> LSRInstance::getUse(const SCEV *&Expr,
+ LSRUse::KindType Kind,
+ MemAccessTy AccessTy) {
const SCEV *Copy = Expr;
int64_t Offset = ExtractImmediate(Expr, SE);
@@ -2300,18 +2305,18 @@ LSRInstance::getUse(const SCEV *&Expr,
return std::make_pair(LUIdx, Offset);
}
-/// DeleteUse - Delete the given use from the Uses list.
+/// Delete the given use from the Uses list.
void LSRInstance::DeleteUse(LSRUse &LU, size_t LUIdx) {
if (&LU != &Uses.back())
std::swap(LU, Uses.back());
Uses.pop_back();
// Update RegUses.
- RegUses.SwapAndDropUse(LUIdx, Uses.size());
+ RegUses.swapAndDropUse(LUIdx, Uses.size());
}
-/// FindUseWithFormula - Look for a use distinct from OrigLU which is has
-/// a formula that has the same registers as the given formula.
+/// Look for a use distinct from OrigLU which is has a formula that has the same
+/// registers as the given formula.
LSRUse *
LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,
const LSRUse &OrigLU) {
@@ -2396,14 +2401,14 @@ void LSRInstance::CollectInterestingTypesAndFactors() {
if (const SCEVConstant *Factor =
dyn_cast_or_null<SCEVConstant>(getExactSDiv(NewStride, OldStride,
SE, true))) {
- if (Factor->getValue()->getValue().getMinSignedBits() <= 64)
- Factors.insert(Factor->getValue()->getValue().getSExtValue());
+ if (Factor->getAPInt().getMinSignedBits() <= 64)
+ Factors.insert(Factor->getAPInt().getSExtValue());
} else if (const SCEVConstant *Factor =
dyn_cast_or_null<SCEVConstant>(getExactSDiv(OldStride,
NewStride,
SE, true))) {
- if (Factor->getValue()->getValue().getMinSignedBits() <= 64)
- Factors.insert(Factor->getValue()->getValue().getSExtValue());
+ if (Factor->getAPInt().getMinSignedBits() <= 64)
+ Factors.insert(Factor->getAPInt().getSExtValue());
}
}
@@ -2415,9 +2420,9 @@ void LSRInstance::CollectInterestingTypesAndFactors() {
DEBUG(print_factors_and_types(dbgs()));
}
-/// findIVOperand - Helper for CollectChains that finds an IV operand (computed
-/// by an AddRec in this loop) within [OI,OE) or returns OE. If IVUsers mapped
-/// Instructions to IVStrideUses, we could partially skip this.
+/// Helper for CollectChains that finds an IV operand (computed by an AddRec in
+/// this loop) within [OI,OE) or returns OE. If IVUsers mapped Instructions to
+/// IVStrideUses, we could partially skip this.
static User::op_iterator
findIVOperand(User::op_iterator OI, User::op_iterator OE,
Loop *L, ScalarEvolution &SE) {
@@ -2436,29 +2441,28 @@ findIVOperand(User::op_iterator OI, User::op_iterator OE,
return OI;
}
-/// getWideOperand - IVChain logic must consistenctly peek base TruncInst
-/// operands, so wrap it in a convenient helper.
+/// IVChain logic must consistenctly peek base TruncInst operands, so wrap it in
+/// a convenient helper.
static Value *getWideOperand(Value *Oper) {
if (TruncInst *Trunc = dyn_cast<TruncInst>(Oper))
return Trunc->getOperand(0);
return Oper;
}
-/// isCompatibleIVType - Return true if we allow an IV chain to include both
-/// types.
+/// Return true if we allow an IV chain to include both types.
static bool isCompatibleIVType(Value *LVal, Value *RVal) {
Type *LType = LVal->getType();
Type *RType = RVal->getType();
return (LType == RType) || (LType->isPointerTy() && RType->isPointerTy());
}
-/// getExprBase - Return an approximation of this SCEV expression's "base", or
-/// NULL for any constant. Returning the expression itself is
-/// conservative. Returning a deeper subexpression is more precise and valid as
-/// long as it isn't less complex than another subexpression. For expressions
-/// involving multiple unscaled values, we need to return the pointer-type
-/// SCEVUnknown. This avoids forming chains across objects, such as:
-/// PrevOper==a[i], IVOper==b[i], IVInc==b-a.
+/// Return an approximation of this SCEV expression's "base", or NULL for any
+/// constant. Returning the expression itself is conservative. Returning a
+/// deeper subexpression is more precise and valid as long as it isn't less
+/// complex than another subexpression. For expressions involving multiple
+/// unscaled values, we need to return the pointer-type SCEVUnknown. This avoids
+/// forming chains across objects, such as: PrevOper==a[i], IVOper==b[i],
+/// IVInc==b-a.
///
/// Since SCEVUnknown is the rightmost type, and pointers are the rightmost
/// SCEVUnknown, we simply return the rightmost SCEV operand.
@@ -2601,8 +2605,7 @@ isProfitableChain(IVChain &Chain, SmallPtrSetImpl<Instruction*> &Users,
return cost < 0;
}
-/// ChainInstruction - Add this IV user to an existing chain or make it the head
-/// of a new chain.
+/// Add this IV user to an existing chain or make it the head of a new chain.
void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
SmallVectorImpl<ChainUsers> &ChainUsersVec) {
// When IVs are used as types of varying widths, they are generally converted
@@ -2714,7 +2717,7 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
ChainUsersVec[ChainIdx].FarUsers.erase(UserInst);
}
-/// CollectChains - Populate the vector of Chains.
+/// Populate the vector of Chains.
///
/// This decreases ILP at the architecture level. Targets with ample registers,
/// multiple memory ports, and no register renaming probably don't want
@@ -2755,19 +2758,19 @@ void LSRInstance::CollectChains() {
for (BasicBlock::iterator I = (*BBIter)->begin(), E = (*BBIter)->end();
I != E; ++I) {
// Skip instructions that weren't seen by IVUsers analysis.
- if (isa<PHINode>(I) || !IU.isIVUserOrOperand(I))
+ if (isa<PHINode>(I) || !IU.isIVUserOrOperand(&*I))
continue;
// Ignore users that are part of a SCEV expression. This way we only
// consider leaf IV Users. This effectively rediscovers a portion of
// IVUsers analysis but in program order this time.
- if (SE.isSCEVable(I->getType()) && !isa<SCEVUnknown>(SE.getSCEV(I)))
+ if (SE.isSCEVable(I->getType()) && !isa<SCEVUnknown>(SE.getSCEV(&*I)))
continue;
// Remove this instruction from any NearUsers set it may be in.
for (unsigned ChainIdx = 0, NChains = IVChainVec.size();
ChainIdx < NChains; ++ChainIdx) {
- ChainUsersVec[ChainIdx].NearUsers.erase(I);
+ ChainUsersVec[ChainIdx].NearUsers.erase(&*I);
}
// Search for operands that can be chained.
SmallPtrSet<Instruction*, 4> UniqueOperands;
@@ -2776,7 +2779,7 @@ void LSRInstance::CollectChains() {
while (IVOpIter != IVOpEnd) {
Instruction *IVOpInst = cast<Instruction>(*IVOpIter);
if (UniqueOperands.insert(IVOpInst).second)
- ChainInstruction(I, IVOpInst, ChainUsersVec);
+ ChainInstruction(&*I, IVOpInst, ChainUsersVec);
IVOpIter = findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
}
} // Continue walking down the instructions.
@@ -2828,20 +2831,20 @@ static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
if (!IncConst || !isAddressUse(UserInst, Operand))
return false;
- if (IncConst->getValue()->getValue().getMinSignedBits() > 64)
+ if (IncConst->getAPInt().getMinSignedBits() > 64)
return false;
+ MemAccessTy AccessTy = getAccessType(UserInst);
int64_t IncOffset = IncConst->getValue()->getSExtValue();
- if (!isAlwaysFoldable(TTI, LSRUse::Address,
- getAccessType(UserInst), /*BaseGV=*/ nullptr,
- IncOffset, /*HaseBaseReg=*/ false))
+ if (!isAlwaysFoldable(TTI, LSRUse::Address, AccessTy, /*BaseGV=*/nullptr,
+ IncOffset, /*HaseBaseReg=*/false))
return false;
return true;
}
-/// GenerateIVChains - Generate an add or subtract for each IVInc in a chain to
-/// materialize the IV user's operand from the previous IV user's operand.
+/// Generate an add or subtract for each IVInc in a chain to materialize the IV
+/// user's operand from the previous IV user's operand.
void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
SmallVectorImpl<WeakVH> &DeadInsts) {
// Find the new IVOperand for the head of the chain. It may have been replaced
@@ -2961,7 +2964,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
LF.PostIncLoops = U.getPostIncLoops();
LSRUse::KindType Kind = LSRUse::Basic;
- Type *AccessTy = nullptr;
+ MemAccessTy AccessTy;
if (isAddressUse(LF.UserInst, LF.OperandValToReplace)) {
Kind = LSRUse::Address;
AccessTy = getAccessType(LF.UserInst);
@@ -3027,9 +3030,8 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
DEBUG(print_fixups(dbgs()));
}
-/// InsertInitialFormula - Insert a formula for the given expression into
-/// the given use, separating out loop-variant portions from loop-invariant
-/// and loop-computable portions.
+/// Insert a formula for the given expression into the given use, separating out
+/// loop-variant portions from loop-invariant and loop-computable portions.
void
LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) {
// Mark uses whose expressions cannot be expanded.
@@ -3037,13 +3039,13 @@ LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) {
LU.RigidFormula = true;
Formula F;
- F.InitialMatch(S, L, SE);
+ F.initialMatch(S, L, SE);
bool Inserted = InsertFormula(LU, LUIdx, F);
assert(Inserted && "Initial formula already exists!"); (void)Inserted;
}
-/// InsertSupplementalFormula - Insert a simple single-register formula for
-/// the given expression into the given use.
+/// Insert a simple single-register formula for the given expression into the
+/// given use.
void
LSRInstance::InsertSupplementalFormula(const SCEV *S,
LSRUse &LU, size_t LUIdx) {
@@ -3054,17 +3056,16 @@ LSRInstance::InsertSupplementalFormula(const SCEV *S,
assert(Inserted && "Supplemental formula already exists!"); (void)Inserted;
}
-/// CountRegisters - Note which registers are used by the given formula,
-/// updating RegUses.
+/// Note which registers are used by the given formula, updating RegUses.
void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) {
if (F.ScaledReg)
- RegUses.CountRegister(F.ScaledReg, LUIdx);
+ RegUses.countRegister(F.ScaledReg, LUIdx);
for (const SCEV *BaseReg : F.BaseRegs)
- RegUses.CountRegister(BaseReg, LUIdx);
+ RegUses.countRegister(BaseReg, LUIdx);
}
-/// InsertFormula - If the given formula has not yet been inserted, add it to
-/// the list, and return true. Return false otherwise.
+/// If the given formula has not yet been inserted, add it to the list, and
+/// return true. Return false otherwise.
bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
// Do not insert formula that we will not be able to expand.
assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F) &&
@@ -3076,9 +3077,9 @@ bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
return true;
}
-/// CollectLoopInvariantFixupsAndFormulae - Check for other uses of
-/// loop-invariant values which we're tracking. These other uses will pin these
-/// values in registers, making them less profitable for elimination.
+/// Check for other uses of loop-invariant values which we're tracking. These
+/// other uses will pin these values in registers, making them less profitable
+/// for elimination.
/// TODO: This currently misses non-constant addrec step registers.
/// TODO: Should this give more weight to users inside the loop?
void
@@ -3124,6 +3125,9 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
PHINode::getIncomingValueNumForOperand(U.getOperandNo()));
if (!DT.dominates(L->getHeader(), UseBB))
continue;
+ // Don't bother if the instruction is in a BB which ends in an EHPad.
+ if (UseBB->getTerminator()->isEHPad())
+ continue;
// Ignore uses which are part of other SCEV expressions, to avoid
// analyzing them multiple times.
if (SE.isSCEVable(UserInst->getType())) {
@@ -3148,7 +3152,8 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
LSRFixup &LF = getNewFixup();
LF.UserInst = const_cast<Instruction *>(UserInst);
LF.OperandValToReplace = U;
- std::pair<size_t, int64_t> P = getUse(S, LSRUse::Basic, nullptr);
+ std::pair<size_t, int64_t> P = getUse(
+ S, LSRUse::Basic, MemAccessTy());
LF.LUIdx = P.first;
LF.Offset = P.second;
LSRUse &LU = Uses[LF.LUIdx];
@@ -3165,8 +3170,8 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
}
}
-/// CollectSubexprs - Split S into subexpressions which can be pulled out into
-/// separate registers. If C is non-null, multiply each subexpression by C.
+/// Split S into subexpressions which can be pulled out into separate
+/// registers. If C is non-null, multiply each subexpression by C.
///
/// Return remainder expression after factoring the subexpressions captured by
/// Ops. If Ops is complete, return NULL.
@@ -3300,7 +3305,7 @@ void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
F.BaseRegs.push_back(*J);
// We may have changed the number of register in base regs, adjust the
// formula accordingly.
- F.Canonicalize();
+ F.canonicalize();
if (InsertFormula(LU, LUIdx, F))
// If that formula hadn't been seen before, recurse to find more like
@@ -3309,8 +3314,7 @@ void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
}
}
-/// GenerateReassociations - Split out subexpressions from adds and the bases of
-/// addrecs.
+/// Split out subexpressions from adds and the bases of addrecs.
void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
Formula Base, unsigned Depth) {
assert(Base.isCanonical() && "Input must be in the canonical form");
@@ -3326,8 +3330,8 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
/* Idx */ -1, /* IsScaledReg */ true);
}
-/// GenerateCombinations - Generate a formula consisting of all of the
-/// loop-dominating registers added into a single register.
+/// Generate a formula consisting of all of the loop-dominating registers added
+/// into a single register.
void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
Formula Base) {
// This method is only interesting on a plurality of registers.
@@ -3336,7 +3340,7 @@ void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
// Flatten the representation, i.e., reg1 + 1*reg2 => reg1 + reg2, before
// processing the formula.
- Base.Unscale();
+ Base.unscale();
Formula F = Base;
F.BaseRegs.clear();
SmallVector<const SCEV *, 4> Ops;
@@ -3354,7 +3358,7 @@ void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
// rather than proceed with zero in a register.
if (!Sum->isZero()) {
F.BaseRegs.push_back(Sum);
- F.Canonicalize();
+ F.canonicalize();
(void)InsertFormula(LU, LUIdx, F);
}
}
@@ -3379,7 +3383,7 @@ void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx,
(void)InsertFormula(LU, LUIdx, F);
}
-/// GenerateSymbolicOffsets - Generate reuse formulae using symbolic offsets.
+/// Generate reuse formulae using symbolic offsets.
void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
Formula Base) {
// We can't add a symbolic offset if the address already contains one.
@@ -3410,8 +3414,8 @@ void LSRInstance::GenerateConstantOffsetsImpl(
F.Scale = 0;
F.ScaledReg = nullptr;
} else
- F.DeleteBaseReg(F.BaseRegs[Idx]);
- F.Canonicalize();
+ F.deleteBaseReg(F.BaseRegs[Idx]);
+ F.canonicalize();
} else if (IsScaledReg)
F.ScaledReg = NewG;
else
@@ -3452,8 +3456,8 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
/* IsScaledReg */ true);
}
-/// GenerateICmpZeroScales - For ICmpZero, check to see if we can scale up
-/// the comparison. For example, x == y -> x*c == y*c.
+/// For ICmpZero, check to see if we can scale up the comparison. For example, x
+/// == y -> x*c == y*c.
void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
Formula Base) {
if (LU.Kind != LSRUse::ICmpZero) return;
@@ -3538,8 +3542,8 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
}
}
-/// GenerateScales - Generate stride factor reuse formulae by making use of
-/// scaled-offset address modes, for example.
+/// Generate stride factor reuse formulae by making use of scaled-offset address
+/// modes, for example.
void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
// Determine the integer type for the base formula.
Type *IntTy = Base.getType();
@@ -3547,10 +3551,10 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
// If this Formula already has a scaled register, we can't add another one.
// Try to unscale the formula to generate a better scale.
- if (Base.Scale != 0 && !Base.Unscale())
+ if (Base.Scale != 0 && !Base.unscale())
return;
- assert(Base.Scale == 0 && "Unscale did not did its job!");
+ assert(Base.Scale == 0 && "unscale did not did its job!");
// Check each interesting stride.
for (int64_t Factor : Factors) {
@@ -3587,7 +3591,7 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
// TODO: This could be optimized to avoid all the copying.
Formula F = Base;
F.ScaledReg = Quotient;
- F.DeleteBaseReg(F.BaseRegs[i]);
+ F.deleteBaseReg(F.BaseRegs[i]);
// The canonical representation of 1*reg is reg, which is already in
// Base. In that case, do not try to insert the formula, it will be
// rejected anyway.
@@ -3599,7 +3603,7 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
}
}
-/// GenerateTruncates - Generate reuse formulae from different IV types.
+/// Generate reuse formulae from different IV types.
void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
// Don't bother truncating symbolic values.
if (Base.BaseGV) return;
@@ -3629,9 +3633,9 @@ void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
namespace {
-/// WorkItem - Helper class for GenerateCrossUseConstantOffsets. It's used to
-/// defer modifications so that the search phase doesn't have to worry about
-/// the data structures moving underneath it.
+/// Helper class for GenerateCrossUseConstantOffsets. It's used to defer
+/// modifications so that the search phase doesn't have to worry about the data
+/// structures moving underneath it.
struct WorkItem {
size_t LUIdx;
int64_t Imm;
@@ -3651,14 +3655,13 @@ void WorkItem::print(raw_ostream &OS) const {
<< " , add offset " << Imm;
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
void WorkItem::dump() const {
print(errs()); errs() << '\n';
}
-#endif
-/// GenerateCrossUseConstantOffsets - Look for registers which are a constant
-/// distance apart and try to form reuse opportunities between them.
+/// Look for registers which are a constant distance apart and try to form reuse
+/// opportunities between them.
void LSRInstance::GenerateCrossUseConstantOffsets() {
// Group the registers by their value without any added constant offset.
typedef std::map<int64_t, const SCEV *> ImmMapTy;
@@ -3751,7 +3754,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
// very similar but slightly different. Investigate if they
// could be merged. That way, we would not have to unscale the
// Formula.
- F.Unscale();
+ F.unscale();
// Use the immediate in the scaled register.
if (F.ScaledReg == OrigReg) {
int64_t Offset = (uint64_t)F.BaseOffset + Imm * (uint64_t)F.Scale;
@@ -3770,14 +3773,13 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
// value to the immediate would produce a value closer to zero than the
// immediate itself, then the formula isn't worthwhile.
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewF.ScaledReg))
- if (C->getValue()->isNegative() !=
- (NewF.BaseOffset < 0) &&
- (C->getValue()->getValue().abs() * APInt(BitWidth, F.Scale))
- .ule(std::abs(NewF.BaseOffset)))
+ if (C->getValue()->isNegative() != (NewF.BaseOffset < 0) &&
+ (C->getAPInt().abs() * APInt(BitWidth, F.Scale))
+ .ule(std::abs(NewF.BaseOffset)))
continue;
// OK, looks good.
- NewF.Canonicalize();
+ NewF.canonicalize();
(void)InsertFormula(LU, LUIdx, NewF);
} else {
// Use the immediate in a base register.
@@ -3801,15 +3803,15 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
// zero than the immediate itself, then the formula isn't worthwhile.
for (const SCEV *NewReg : NewF.BaseRegs)
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewReg))
- if ((C->getValue()->getValue() + NewF.BaseOffset).abs().slt(
- std::abs(NewF.BaseOffset)) &&
- (C->getValue()->getValue() +
- NewF.BaseOffset).countTrailingZeros() >=
- countTrailingZeros<uint64_t>(NewF.BaseOffset))
+ if ((C->getAPInt() + NewF.BaseOffset)
+ .abs()
+ .slt(std::abs(NewF.BaseOffset)) &&
+ (C->getAPInt() + NewF.BaseOffset).countTrailingZeros() >=
+ countTrailingZeros<uint64_t>(NewF.BaseOffset))
goto skip_formula;
// Ok, looks good.
- NewF.Canonicalize();
+ NewF.canonicalize();
(void)InsertFormula(LU, LUIdx, NewF);
break;
skip_formula:;
@@ -3819,7 +3821,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
}
}
-/// GenerateAllReuseFormulae - Generate formulae for each use.
+/// Generate formulae for each use.
void
LSRInstance::GenerateAllReuseFormulae() {
// This is split into multiple loops so that hasRegsUsedByUsesOtherThan
@@ -3959,10 +3961,9 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
// This is a rough guess that seems to work fairly well.
static const size_t ComplexityLimit = UINT16_MAX;
-/// EstimateSearchSpaceComplexity - Estimate the worst-case number of
-/// solutions the solver might have to consider. It almost never considers
-/// this many solutions because it prune the search space, but the pruning
-/// isn't always sufficient.
+/// Estimate the worst-case number of solutions the solver might have to
+/// consider. It almost never considers this many solutions because it prune the
+/// search space, but the pruning isn't always sufficient.
size_t LSRInstance::EstimateSearchSpaceComplexity() const {
size_t Power = 1;
for (const LSRUse &LU : Uses) {
@@ -3978,10 +3979,9 @@ size_t LSRInstance::EstimateSearchSpaceComplexity() const {
return Power;
}
-/// NarrowSearchSpaceByDetectingSupersets - When one formula uses a superset
-/// of the registers of another formula, it won't help reduce register
-/// pressure (though it may not necessarily hurt register pressure); remove
-/// it to simplify the system.
+/// When one formula uses a superset of the registers of another formula, it
+/// won't help reduce register pressure (though it may not necessarily hurt
+/// register pressure); remove it to simplify the system.
void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
DEBUG(dbgs() << "The search space is too complex.\n");
@@ -4042,9 +4042,8 @@ void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
}
}
-/// NarrowSearchSpaceByCollapsingUnrolledCode - When there are many registers
-/// for expressions like A, A+1, A+2, etc., allocate a single register for
-/// them.
+/// When there are many registers for expressions like A, A+1, A+2, etc.,
+/// allocate a single register for them.
void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
if (EstimateSearchSpaceComplexity() < ComplexityLimit)
return;
@@ -4121,8 +4120,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
}
-/// NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters - Call
-/// FilterOutUndesirableDedicatedRegisters again, if necessary, now that
+/// Call FilterOutUndesirableDedicatedRegisters again, if necessary, now that
/// we've done more filtering, as it may be able to find more formulae to
/// eliminate.
void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
@@ -4139,9 +4137,9 @@ void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
}
}
-/// NarrowSearchSpaceByPickingWinnerRegs - Pick a register which seems likely
-/// to be profitable, and then in any use which has any reference to that
-/// register, delete all formulae which do not reference that register.
+/// Pick a register which seems likely to be profitable, and then in any use
+/// which has any reference to that register, delete all formulae which do not
+/// reference that register.
void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
// With all other options exhausted, loop until the system is simple
// enough to handle.
@@ -4202,10 +4200,10 @@ void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
}
}
-/// NarrowSearchSpaceUsingHeuristics - If there are an extraordinary number of
-/// formulae to choose from, use some rough heuristics to prune down the number
-/// of formulae. This keeps the main solver from taking an extraordinary amount
-/// of time in some worst-case scenarios.
+/// If there are an extraordinary number of formulae to choose from, use some
+/// rough heuristics to prune down the number of formulae. This keeps the main
+/// solver from taking an extraordinary amount of time in some worst-case
+/// scenarios.
void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
NarrowSearchSpaceByDetectingSupersets();
NarrowSearchSpaceByCollapsingUnrolledCode();
@@ -4213,7 +4211,7 @@ void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
NarrowSearchSpaceByPickingWinnerRegs();
}
-/// SolveRecurse - This is the recursive solver.
+/// This is the recursive solver.
void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
Cost &SolutionCost,
SmallVectorImpl<const Formula *> &Workspace,
@@ -4291,8 +4289,8 @@ void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
}
}
-/// Solve - Choose one formula from each use. Return the results in the given
-/// Solution vector.
+/// Choose one formula from each use. Return the results in the given Solution
+/// vector.
void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {
SmallVector<const Formula *, 8> Workspace;
Cost SolutionCost;
@@ -4326,10 +4324,9 @@ void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {
assert(Solution.size() == Uses.size() && "Malformed solution!");
}
-/// HoistInsertPosition - Helper for AdjustInsertPositionForExpand. Climb up
-/// the dominator tree far as we can go while still being dominated by the
-/// input positions. This helps canonicalize the insert position, which
-/// encourages sharing.
+/// Helper for AdjustInsertPositionForExpand. Climb up the dominator tree far as
+/// we can go while still being dominated by the input positions. This helps
+/// canonicalize the insert position, which encourages sharing.
BasicBlock::iterator
LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
const SmallVectorImpl<Instruction *> &Inputs)
@@ -4365,21 +4362,21 @@ LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
// instead of at the end, so that it can be used for other expansions.
if (IDom == Inst->getParent() &&
(!BetterPos || !DT.dominates(Inst, BetterPos)))
- BetterPos = std::next(BasicBlock::iterator(Inst));
+ BetterPos = &*std::next(BasicBlock::iterator(Inst));
}
if (!AllDominate)
break;
if (BetterPos)
- IP = BetterPos;
+ IP = BetterPos->getIterator();
else
- IP = Tentative;
+ IP = Tentative->getIterator();
}
return IP;
}
-/// AdjustInsertPositionForExpand - Determine an input position which will be
-/// dominated by the operands and which will dominate the result.
+/// Determine an input position which will be dominated by the operands and
+/// which will dominate the result.
BasicBlock::iterator
LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP,
const LSRFixup &LF,
@@ -4417,7 +4414,7 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP,
}
}
- assert(!isa<PHINode>(LowestIP) && !isa<LandingPadInst>(LowestIP)
+ assert(!isa<PHINode>(LowestIP) && !LowestIP->isEHPad()
&& !isa<DbgInfoIntrinsic>(LowestIP) &&
"Insertion point must be a normal instruction");
@@ -4429,7 +4426,7 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP,
while (isa<PHINode>(IP)) ++IP;
// Ignore landingpad instructions.
- while (isa<LandingPadInst>(IP)) ++IP;
+ while (!isa<TerminatorInst>(IP) && IP->isEHPad()) ++IP;
// Ignore debug intrinsics.
while (isa<DbgInfoIntrinsic>(IP)) ++IP;
@@ -4437,13 +4434,14 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP,
// Set IP below instructions recently inserted by SCEVExpander. This keeps the
// IP consistent across expansions and allows the previously inserted
// instructions to be reused by subsequent expansion.
- while (Rewriter.isInsertedInstruction(IP) && IP != LowestIP) ++IP;
+ while (Rewriter.isInsertedInstruction(&*IP) && IP != LowestIP)
+ ++IP;
return IP;
}
-/// Expand - Emit instructions for the leading candidate expression for this
-/// LSRUse (this is called "expanding").
+/// Emit instructions for the leading candidate expression for this LSRUse (this
+/// is called "expanding").
Value *LSRInstance::Expand(const LSRFixup &LF,
const Formula &F,
BasicBlock::iterator IP,
@@ -4487,7 +4485,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
LF.UserInst, LF.OperandValToReplace,
Loops, SE, DT);
- Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr, IP)));
+ Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr, &*IP)));
}
// Expand the ScaledReg portion.
@@ -4505,14 +4503,14 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
// Expand ScaleReg as if it was part of the base regs.
if (F.Scale == 1)
Ops.push_back(
- SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, IP)));
+ SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, &*IP)));
else {
// An interesting way of "folding" with an icmp is to use a negated
// scale, which we'll implement by inserting it into the other operand
// of the icmp.
assert(F.Scale == -1 &&
"The only scale supported by ICmpZero uses is -1!");
- ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr, IP);
+ ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr, &*IP);
}
} else {
// Otherwise just expand the scaled register and an explicit scale,
@@ -4522,11 +4520,11 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
// Unless the addressing mode will not be folded.
if (!Ops.empty() && LU.Kind == LSRUse::Address &&
isAMCompletelyFolded(TTI, LU, F)) {
- Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
+ Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, &*IP);
Ops.clear();
Ops.push_back(SE.getUnknown(FullV));
}
- ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, IP));
+ ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, &*IP));
if (F.Scale != 1)
ScaledS =
SE.getMulExpr(ScaledS, SE.getConstant(ScaledS->getType(), F.Scale));
@@ -4538,7 +4536,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
if (F.BaseGV) {
// Flush the operand list to suppress SCEVExpander hoisting.
if (!Ops.empty()) {
- Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
+ Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, &*IP);
Ops.clear();
Ops.push_back(SE.getUnknown(FullV));
}
@@ -4548,7 +4546,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
// Flush the operand list to suppress SCEVExpander hoisting of both folded and
// unfolded offsets. LSR assumes they both live next to their uses.
if (!Ops.empty()) {
- Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
+ Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, &*IP);
Ops.clear();
Ops.push_back(SE.getUnknown(FullV));
}
@@ -4584,7 +4582,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
const SCEV *FullS = Ops.empty() ?
SE.getConstant(IntTy, 0) :
SE.getAddExpr(Ops);
- Value *FullV = Rewriter.expandCodeFor(FullS, Ty, IP);
+ Value *FullV = Rewriter.expandCodeFor(FullS, Ty, &*IP);
// We're done expanding now, so reset the rewriter.
Rewriter.clearPostInc();
@@ -4626,15 +4624,14 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
return FullV;
}
-/// RewriteForPHI - Helper for Rewrite. PHI nodes are special because the use
-/// of their operands effectively happens in their predecessor blocks, so the
-/// expression may need to be expanded in multiple places.
+/// Helper for Rewrite. PHI nodes are special because the use of their operands
+/// effectively happens in their predecessor blocks, so the expression may need
+/// to be expanded in multiple places.
void LSRInstance::RewriteForPHI(PHINode *PN,
const LSRFixup &LF,
const Formula &F,
SCEVExpander &Rewriter,
- SmallVectorImpl<WeakVH> &DeadInsts,
- Pass *P) const {
+ SmallVectorImpl<WeakVH> &DeadInsts) const {
DenseMap<BasicBlock *, Value *> Inserted;
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
if (PN->getIncomingValue(i) == LF.OperandValToReplace) {
@@ -4658,8 +4655,7 @@ void LSRInstance::RewriteForPHI(PHINode *PN,
.setDontDeleteUselessPHIs());
} else {
SmallVector<BasicBlock*, 2> NewBBs;
- SplitLandingPadPredecessors(Parent, BB, "", "", NewBBs,
- /*AliasAnalysis*/ nullptr, &DT, &LI);
+ SplitLandingPadPredecessors(Parent, BB, "", "", NewBBs, &DT, &LI);
NewBB = NewBBs[0];
}
// If NewBB==NULL, then SplitCriticalEdge refused to split because all
@@ -4685,7 +4681,8 @@ void LSRInstance::RewriteForPHI(PHINode *PN,
if (!Pair.second)
PN->setIncomingValue(i, Pair.first->second);
else {
- Value *FullV = Expand(LF, F, BB->getTerminator(), Rewriter, DeadInsts);
+ Value *FullV = Expand(LF, F, BB->getTerminator()->getIterator(),
+ Rewriter, DeadInsts);
// If this is reuse-by-noop-cast, insert the noop cast.
Type *OpTy = LF.OperandValToReplace->getType();
@@ -4702,20 +4699,20 @@ void LSRInstance::RewriteForPHI(PHINode *PN,
}
}
-/// Rewrite - Emit instructions for the leading candidate expression for this
-/// LSRUse (this is called "expanding"), and update the UserInst to reference
-/// the newly expanded value.
+/// Emit instructions for the leading candidate expression for this LSRUse (this
+/// is called "expanding"), and update the UserInst to reference the newly
+/// expanded value.
void LSRInstance::Rewrite(const LSRFixup &LF,
const Formula &F,
SCEVExpander &Rewriter,
- SmallVectorImpl<WeakVH> &DeadInsts,
- Pass *P) const {
+ SmallVectorImpl<WeakVH> &DeadInsts) const {
// First, find an insertion point that dominates UserInst. For PHI nodes,
// find the nearest block which dominates all the relevant uses.
if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) {
- RewriteForPHI(PN, LF, F, Rewriter, DeadInsts, P);
+ RewriteForPHI(PN, LF, F, Rewriter, DeadInsts);
} else {
- Value *FullV = Expand(LF, F, LF.UserInst, Rewriter, DeadInsts);
+ Value *FullV =
+ Expand(LF, F, LF.UserInst->getIterator(), Rewriter, DeadInsts);
// If this is reuse-by-noop-cast, insert the noop cast.
Type *OpTy = LF.OperandValToReplace->getType();
@@ -4740,11 +4737,10 @@ void LSRInstance::Rewrite(const LSRFixup &LF,
DeadInsts.emplace_back(LF.OperandValToReplace);
}
-/// ImplementSolution - Rewrite all the fixup locations with new values,
-/// following the chosen solution.
-void
-LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
- Pass *P) {
+/// Rewrite all the fixup locations with new values, following the chosen
+/// solution.
+void LSRInstance::ImplementSolution(
+ const SmallVectorImpl<const Formula *> &Solution) {
// Keep track of instructions we may have made dead, so that
// we can remove them after we are done working.
SmallVector<WeakVH, 16> DeadInsts;
@@ -4766,7 +4762,7 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
// Expand the new value definitions and update the users.
for (const LSRFixup &Fixup : Fixups) {
- Rewrite(Fixup, *Solution[Fixup.LUIdx], Rewriter, DeadInsts, P);
+ Rewrite(Fixup, *Solution[Fixup.LUIdx], Rewriter, DeadInsts);
Changed = true;
}
@@ -4782,13 +4778,11 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
Changed |= DeleteTriviallyDeadInstructions(DeadInsts);
}
-LSRInstance::LSRInstance(Loop *L, Pass *P)
- : IU(P->getAnalysis<IVUsers>()), SE(P->getAnalysis<ScalarEvolution>()),
- DT(P->getAnalysis<DominatorTreeWrapperPass>().getDomTree()),
- LI(P->getAnalysis<LoopInfoWrapperPass>().getLoopInfo()),
- TTI(P->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
- *L->getHeader()->getParent())),
- L(L), Changed(false), IVIncInsertPos(nullptr) {
+LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,
+ DominatorTree &DT, LoopInfo &LI,
+ const TargetTransformInfo &TTI)
+ : IU(IU), SE(SE), DT(DT), LI(LI), TTI(TTI), L(L), Changed(false),
+ IVIncInsertPos(nullptr) {
// If LoopSimplify form is not available, stay out of trouble.
if (!L->isLoopSimplifyForm())
return;
@@ -4879,7 +4873,7 @@ LSRInstance::LSRInstance(Loop *L, Pass *P)
#endif
// Now that we've decided what we want, make it so.
- ImplementSolution(Solution, P);
+ ImplementSolution(Solution);
}
void LSRInstance::print_factors_and_types(raw_ostream &OS) const {
@@ -4931,11 +4925,10 @@ void LSRInstance::print(raw_ostream &OS) const {
print_uses(OS);
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
void LSRInstance::dump() const {
print(errs()); errs() << '\n';
}
-#endif
namespace {
@@ -4956,7 +4949,7 @@ INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",
"Loop Strength Reduction", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(IVUsers)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
@@ -4982,8 +4975,8 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequiredID(LoopSimplifyID);
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addRequired<ScalarEvolution>();
- AU.addPreserved<ScalarEvolution>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
// Requiring LoopSimplify a second time here prevents IVUsers from running
// twice, since LoopSimplify was invalidated by running ScalarEvolution.
AU.addRequiredID(LoopSimplifyID);
@@ -4996,17 +4989,24 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
if (skipOptnoneFunction(L))
return false;
+ auto &IU = getAnalysis<IVUsers>();
+ auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ const auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
+ *L->getHeader()->getParent());
bool Changed = false;
// Run the main LSR transformation.
- Changed |= LSRInstance(L, this).getChanged();
+ Changed |= LSRInstance(L, IU, SE, DT, LI, TTI).getChanged();
// Remove any extra phis created by processing inner loops.
Changed |= DeleteDeadPHIs(L->getHeader());
if (EnablePhiElim && L->isLoopSimplifyForm()) {
SmallVector<WeakVH, 16> DeadInsts;
const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
- SCEVExpander Rewriter(getAnalysis<ScalarEvolution>(), DL, "lsr");
+ SCEVExpander Rewriter(getAnalysis<ScalarEvolutionWrapperPass>().getSE(), DL,
+ "lsr");
#ifndef NDEBUG
Rewriter.setDebugType(DEBUG_TYPE);
#endif
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index d78db6c..56ae5c0 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -14,6 +14,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/InstructionSimplify.h"
@@ -130,27 +131,29 @@ namespace {
bool UserAllowPartial;
bool UserRuntime;
- bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+ bool runOnLoop(Loop *L, LPPassManager &) override;
/// This transformation requires natural loop information & requires that
/// loop preheaders be inserted into the CFG...
///
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
AU.addRequiredID(LoopSimplifyID);
AU.addPreservedID(LoopSimplifyID);
AU.addRequiredID(LCSSAID);
AU.addPreservedID(LCSSAID);
- AU.addRequired<ScalarEvolution>();
- AU.addPreserved<ScalarEvolution>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
// FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info.
// If loop unroll does not preserve dom info then LCSSA pass on next
// loop will receive invalid dom info.
// For now, recreate dom info, if loop is unrolled.
AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
}
// Fill in the UnrollingPreferences parameter with values from the
@@ -186,7 +189,7 @@ namespace {
// total unrolled size. Parameters Threshold and PartialThreshold
// are set to the maximum unrolled size for fully and partially
// unrolled loops respectively.
- void selectThresholds(const Loop *L, bool HasPragma,
+ void selectThresholds(const Loop *L, bool UsePragmaThreshold,
const TargetTransformInfo::UnrollingPreferences &UP,
unsigned &Threshold, unsigned &PartialThreshold,
unsigned &PercentDynamicCostSavedThreshold,
@@ -207,12 +210,13 @@ namespace {
: UP.DynamicCostSavingsDiscount;
if (!UserThreshold &&
+ // FIXME: Use Function::optForSize().
L->getHeader()->getParent()->hasFnAttribute(
Attribute::OptimizeForSize)) {
Threshold = UP.OptSizeThreshold;
PartialThreshold = UP.PartialOptSizeThreshold;
}
- if (HasPragma) {
+ if (UsePragmaThreshold) {
// If the loop has an unrolling pragma, we want to be more
// aggressive with unrolling limits. Set thresholds to at
// least the PragmaTheshold value which is larger than the
@@ -235,10 +239,11 @@ char LoopUnroll::ID = 0;
INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial,
@@ -278,8 +283,8 @@ class UnrolledInstAnalyzer : private InstVisitor<UnrolledInstAnalyzer, bool> {
public:
UnrolledInstAnalyzer(unsigned Iteration,
DenseMap<Value *, Constant *> &SimplifiedValues,
- const Loop *L, ScalarEvolution &SE)
- : Iteration(Iteration), SimplifiedValues(SimplifiedValues), L(L), SE(SE) {
+ ScalarEvolution &SE)
+ : SimplifiedValues(SimplifiedValues), SE(SE) {
IterationNumber = SE.getConstant(APInt(64, Iteration));
}
@@ -295,13 +300,6 @@ private:
/// results saved.
DenseMap<Value *, SimplifiedAddress> SimplifiedAddresses;
- /// \brief Number of currently simulated iteration.
- ///
- /// If an expression is ConstAddress+Constant, then the Constant is
- /// Start + Iteration*Step, where Start and Step could be obtained from
- /// SCEVGEPCache.
- unsigned Iteration;
-
/// \brief SCEV expression corresponding to number of currently simulated
/// iteration.
const SCEV *IterationNumber;
@@ -316,7 +314,6 @@ private:
/// post-unrolling.
DenseMap<Value *, Constant *> &SimplifiedValues;
- const Loop *L;
ScalarEvolution &SE;
/// \brief Try to simplify instruction \param I using its SCEV expression.
@@ -368,11 +365,9 @@ private:
return simplifyInstWithSCEV(&I);
}
- /// TODO: Add visitors for other instruction types, e.g. ZExt, SExt.
-
/// Try to simplify binary operator I.
///
- /// TODO: Probaly it's worth to hoist the code for estimating the
+ /// TODO: Probably it's worth to hoist the code for estimating the
/// simplifications effects to a separate class, since we have a very similar
/// code in InlineCost already.
bool visitBinaryOperator(BinaryOperator &I) {
@@ -412,7 +407,7 @@ private:
auto *GV = dyn_cast<GlobalVariable>(AddressIt->second.Base);
// We're only interested in loads that can be completely folded to a
// constant.
- if (!GV || !GV->hasInitializer())
+ if (!GV || !GV->hasDefinitiveInitializer() || !GV->isConstant())
return false;
ConstantDataSequential *CDS =
@@ -420,6 +415,12 @@ private:
if (!CDS)
return false;
+ // We might have a vector load from an array. FIXME: for now we just bail
+ // out in this case, but we should be able to resolve and simplify such
+ // loads.
+ if(!CDS->isElementTypeCompatible(I.getType()))
+ return false;
+
int ElemSize = CDS->getElementType()->getPrimitiveSizeInBits() / 8U;
assert(SimplifiedAddrOp->getValue().getActiveBits() < 64 &&
"Unexpectedly large index value.");
@@ -436,6 +437,59 @@ private:
return true;
}
+
+ bool visitCastInst(CastInst &I) {
+ // Propagate constants through casts.
+ Constant *COp = dyn_cast<Constant>(I.getOperand(0));
+ if (!COp)
+ COp = SimplifiedValues.lookup(I.getOperand(0));
+ if (COp)
+ if (Constant *C =
+ ConstantExpr::getCast(I.getOpcode(), COp, I.getType())) {
+ SimplifiedValues[&I] = C;
+ return true;
+ }
+
+ return Base::visitCastInst(I);
+ }
+
+ bool visitCmpInst(CmpInst &I) {
+ Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+
+ // First try to handle simplified comparisons.
+ if (!isa<Constant>(LHS))
+ if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS))
+ LHS = SimpleLHS;
+ if (!isa<Constant>(RHS))
+ if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
+ RHS = SimpleRHS;
+
+ if (!isa<Constant>(LHS) && !isa<Constant>(RHS)) {
+ auto SimplifiedLHS = SimplifiedAddresses.find(LHS);
+ if (SimplifiedLHS != SimplifiedAddresses.end()) {
+ auto SimplifiedRHS = SimplifiedAddresses.find(RHS);
+ if (SimplifiedRHS != SimplifiedAddresses.end()) {
+ SimplifiedAddress &LHSAddr = SimplifiedLHS->second;
+ SimplifiedAddress &RHSAddr = SimplifiedRHS->second;
+ if (LHSAddr.Base == RHSAddr.Base) {
+ LHS = LHSAddr.Offset;
+ RHS = RHSAddr.Offset;
+ }
+ }
+ }
+ }
+
+ if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
+ if (Constant *CRHS = dyn_cast<Constant>(RHS)) {
+ if (Constant *C = ConstantExpr::getCompare(I.getPredicate(), CLHS, CRHS)) {
+ SimplifiedValues[&I] = C;
+ return true;
+ }
+ }
+ }
+
+ return Base::visitCmpInst(I);
+ }
};
} // namespace
@@ -443,11 +497,11 @@ private:
namespace {
struct EstimatedUnrollCost {
/// \brief The estimated cost after unrolling.
- unsigned UnrolledCost;
+ int UnrolledCost;
/// \brief The estimated dynamic cost of executing the instructions in the
/// rolled form.
- unsigned RolledDynamicCost;
+ int RolledDynamicCost;
};
}
@@ -464,10 +518,10 @@ struct EstimatedUnrollCost {
/// \returns Optional value, holding the RolledDynamicCost and UnrolledCost. If
/// the analysis failed (no benefits expected from the unrolling, or the loop is
/// too big to analyze), the returned value is None.
-Optional<EstimatedUnrollCost>
-analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, ScalarEvolution &SE,
- const TargetTransformInfo &TTI,
- unsigned MaxUnrolledLoopSize) {
+static Optional<EstimatedUnrollCost>
+analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT,
+ ScalarEvolution &SE, const TargetTransformInfo &TTI,
+ int MaxUnrolledLoopSize) {
// We want to be able to scale offsets by the trip count and add more offsets
// to them without checking for overflows, and we already don't want to
// analyze *massive* trip counts, so we force the max to be reasonably small.
@@ -481,24 +535,61 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, ScalarEvolution &SE,
SmallSetVector<BasicBlock *, 16> BBWorklist;
DenseMap<Value *, Constant *> SimplifiedValues;
+ SmallVector<std::pair<Value *, Constant *>, 4> SimplifiedInputValues;
// The estimated cost of the unrolled form of the loop. We try to estimate
// this by simplifying as much as we can while computing the estimate.
- unsigned UnrolledCost = 0;
+ int UnrolledCost = 0;
// We also track the estimated dynamic (that is, actually executed) cost in
// the rolled form. This helps identify cases when the savings from unrolling
// aren't just exposing dead control flows, but actual reduced dynamic
// instructions due to the simplifications which we expect to occur after
// unrolling.
- unsigned RolledDynamicCost = 0;
+ int RolledDynamicCost = 0;
+
+ // Ensure that we don't violate the loop structure invariants relied on by
+ // this analysis.
+ assert(L->isLoopSimplifyForm() && "Must put loop into normal form first.");
+ assert(L->isLCSSAForm(DT) &&
+ "Must have loops in LCSSA form to track live-out values.");
+
+ DEBUG(dbgs() << "Starting LoopUnroll profitability analysis...\n");
// Simulate execution of each iteration of the loop counting instructions,
// which would be simplified.
// Since the same load will take different values on different iterations,
// we literally have to go through all loop's iterations.
for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) {
+ DEBUG(dbgs() << " Analyzing iteration " << Iteration << "\n");
+
+ // Prepare for the iteration by collecting any simplified entry or backedge
+ // inputs.
+ for (Instruction &I : *L->getHeader()) {
+ auto *PHI = dyn_cast<PHINode>(&I);
+ if (!PHI)
+ break;
+
+ // The loop header PHI nodes must have exactly two input: one from the
+ // loop preheader and one from the loop latch.
+ assert(
+ PHI->getNumIncomingValues() == 2 &&
+ "Must have an incoming value only for the preheader and the latch.");
+
+ Value *V = PHI->getIncomingValueForBlock(
+ Iteration == 0 ? L->getLoopPreheader() : L->getLoopLatch());
+ Constant *C = dyn_cast<Constant>(V);
+ if (Iteration != 0 && !C)
+ C = SimplifiedValues.lookup(V);
+ if (C)
+ SimplifiedInputValues.push_back({PHI, C});
+ }
+
+ // Now clear and re-populate the map for the next iteration.
SimplifiedValues.clear();
- UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, L, SE);
+ while (!SimplifiedInputValues.empty())
+ SimplifiedValues.insert(SimplifiedInputValues.pop_back_val());
+
+ UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, SE);
BBWorklist.clear();
BBWorklist.insert(L->getHeader());
@@ -510,21 +601,67 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, ScalarEvolution &SE,
// it. We don't change the actual IR, just count optimization
// opportunities.
for (Instruction &I : *BB) {
- unsigned InstCost = TTI.getUserCost(&I);
+ int InstCost = TTI.getUserCost(&I);
// Visit the instruction to analyze its loop cost after unrolling,
// and if the visitor returns false, include this instruction in the
// unrolled cost.
if (!Analyzer.visit(I))
UnrolledCost += InstCost;
+ else {
+ DEBUG(dbgs() << " " << I
+ << " would be simplified if loop is unrolled.\n");
+ (void)0;
+ }
// Also track this instructions expected cost when executing the rolled
// loop form.
RolledDynamicCost += InstCost;
// If unrolled body turns out to be too big, bail out.
- if (UnrolledCost > MaxUnrolledLoopSize)
+ if (UnrolledCost > MaxUnrolledLoopSize) {
+ DEBUG(dbgs() << " Exceeded threshold.. exiting.\n"
+ << " UnrolledCost: " << UnrolledCost
+ << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize
+ << "\n");
return None;
+ }
+ }
+
+ TerminatorInst *TI = BB->getTerminator();
+
+ // Add in the live successors by first checking whether we have terminator
+ // that may be simplified based on the values simplified by this call.
+ if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ if (BI->isConditional()) {
+ if (Constant *SimpleCond =
+ SimplifiedValues.lookup(BI->getCondition())) {
+ BasicBlock *Succ = nullptr;
+ // Just take the first successor if condition is undef
+ if (isa<UndefValue>(SimpleCond))
+ Succ = BI->getSuccessor(0);
+ else
+ Succ = BI->getSuccessor(
+ cast<ConstantInt>(SimpleCond)->isZero() ? 1 : 0);
+ if (L->contains(Succ))
+ BBWorklist.insert(Succ);
+ continue;
+ }
+ }
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ if (Constant *SimpleCond =
+ SimplifiedValues.lookup(SI->getCondition())) {
+ BasicBlock *Succ = nullptr;
+ // Just take the first successor if condition is undef
+ if (isa<UndefValue>(SimpleCond))
+ Succ = SI->getSuccessor(0);
+ else
+ Succ = SI->findCaseValue(cast<ConstantInt>(SimpleCond))
+ .getCaseSuccessor();
+ if (L->contains(Succ))
+ BBWorklist.insert(Succ);
+ continue;
+ }
}
// Add BB's successors to the worklist.
@@ -535,9 +672,15 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, ScalarEvolution &SE,
// If we found no optimization opportunities on the first iteration, we
// won't find them on later ones too.
- if (UnrolledCost == RolledDynamicCost)
+ if (UnrolledCost == RolledDynamicCost) {
+ DEBUG(dbgs() << " No opportunities found.. exiting.\n"
+ << " UnrolledCost: " << UnrolledCost << "\n");
return None;
+ }
}
+ DEBUG(dbgs() << "Analysis finished:\n"
+ << "UnrolledCost: " << UnrolledCost << ", "
+ << "RolledDynamicCost: " << RolledDynamicCost << "\n");
return {{UnrolledCost, RolledDynamicCost}};
}
@@ -583,6 +726,12 @@ static bool HasUnrollFullPragma(const Loop *L) {
return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.full");
}
+// Returns true if the loop has an unroll(enable) pragma. This metadata is used
+// for both "#pragma unroll" and "#pragma clang loop unroll(enable)" directives.
+static bool HasUnrollEnablePragma(const Loop *L) {
+ return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.enable");
+}
+
// Returns true if the loop has an unroll(disable) pragma.
static bool HasUnrollDisablePragma(const Loop *L) {
return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.disable");
@@ -708,7 +857,7 @@ unsigned LoopUnroll::selectUnrollCount(
unsigned Count = UserCount ? CurrentCount : 0;
// If there is no user-specified count, unroll pragmas have the next
- // highest precendence.
+ // highest precedence.
if (Count == 0) {
if (PragmaCount) {
Count = PragmaCount;
@@ -737,17 +886,19 @@ unsigned LoopUnroll::selectUnrollCount(
return Count;
}
-bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
+bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) {
if (skipOptnoneFunction(L))
return false;
Function &F = *L->getHeader()->getParent();
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- ScalarEvolution *SE = &getAnalysis<ScalarEvolution>();
+ ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
const TargetTransformInfo &TTI =
getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
BasicBlock *Header = L->getHeader();
DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName()
@@ -757,8 +908,9 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
return false;
}
bool PragmaFullUnroll = HasUnrollFullPragma(L);
+ bool PragmaEnableUnroll = HasUnrollEnablePragma(L);
unsigned PragmaCount = UnrollCountPragmaValue(L);
- bool HasPragma = PragmaFullUnroll || PragmaCount > 0;
+ bool HasPragma = PragmaFullUnroll || PragmaEnableUnroll || PragmaCount > 0;
TargetTransformInfo::UnrollingPreferences UP;
getUnrollingPreferences(L, TTI, UP);
@@ -806,7 +958,15 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
unsigned Threshold, PartialThreshold;
unsigned PercentDynamicCostSavedThreshold;
unsigned DynamicCostSavingsDiscount;
- selectThresholds(L, HasPragma, UP, Threshold, PartialThreshold,
+ // Only use the high pragma threshold when we have a target unroll factor such
+ // as with "#pragma unroll N" or a pragma indicating full unrolling and the
+ // trip count is known. Otherwise we rely on the standard threshold to
+ // heuristically select a reasonable unroll count.
+ bool UsePragmaThreshold =
+ PragmaCount > 0 ||
+ ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount != 0);
+
+ selectThresholds(L, UsePragmaThreshold, UP, Threshold, PartialThreshold,
PercentDynamicCostSavedThreshold,
DynamicCostSavingsDiscount);
@@ -824,8 +984,9 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
// The loop isn't that small, but we still can fully unroll it if that
// helps to remove a significant number of instructions.
// To check that, run additional analysis on the loop.
- if (Optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost(
- L, TripCount, *SE, TTI, Threshold + DynamicCostSavingsDiscount))
+ if (Optional<EstimatedUnrollCost> Cost =
+ analyzeLoopUnrollCost(L, TripCount, DT, *SE, TTI,
+ Threshold + DynamicCostSavingsDiscount))
if (canUnrollCompletely(L, Threshold, PercentDynamicCostSavedThreshold,
DynamicCostSavingsDiscount, Cost->UnrolledCost,
Cost->RolledDynamicCost)) {
@@ -840,14 +1001,15 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
// Reduce count based on the type of unrolling and the threshold values.
unsigned OriginalCount = Count;
- bool AllowRuntime =
- (PragmaCount > 0) || (UserRuntime ? CurrentRuntime : UP.Runtime);
+ bool AllowRuntime = PragmaEnableUnroll || (PragmaCount > 0) ||
+ (UserRuntime ? CurrentRuntime : UP.Runtime);
// Don't unroll a runtime trip count loop with unroll full pragma.
if (HasRuntimeUnrollDisablePragma(L) || PragmaFullUnroll) {
AllowRuntime = false;
}
if (Unrolling == Partial) {
- bool AllowPartial = UserAllowPartial ? CurrentAllowPartial : UP.Partial;
+ bool AllowPartial = PragmaEnableUnroll ||
+ (UserAllowPartial ? CurrentAllowPartial : UP.Partial);
if (!AllowPartial && !CountSetExplicitly) {
DEBUG(dbgs() << " will not try to unroll partially because "
<< "-unroll-allow-partial not given\n");
@@ -887,23 +1049,27 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
DebugLoc LoopLoc = L->getStartLoc();
Function *F = Header->getParent();
LLVMContext &Ctx = F->getContext();
- if (PragmaFullUnroll && PragmaCount == 0) {
- if (TripCount && Count != TripCount) {
- emitOptimizationRemarkMissed(
- Ctx, DEBUG_TYPE, *F, LoopLoc,
- "Unable to fully unroll loop as directed by unroll(full) pragma "
- "because unrolled size is too large.");
- } else if (!TripCount) {
- emitOptimizationRemarkMissed(
- Ctx, DEBUG_TYPE, *F, LoopLoc,
- "Unable to fully unroll loop as directed by unroll(full) pragma "
- "because loop has a runtime trip count.");
- }
- } else if (PragmaCount > 0 && Count != OriginalCount) {
+ if ((PragmaCount > 0) && Count != OriginalCount) {
emitOptimizationRemarkMissed(
Ctx, DEBUG_TYPE, *F, LoopLoc,
"Unable to unroll loop the number of times directed by "
"unroll_count pragma because unrolled size is too large.");
+ } else if (PragmaFullUnroll && !TripCount) {
+ emitOptimizationRemarkMissed(
+ Ctx, DEBUG_TYPE, *F, LoopLoc,
+ "Unable to fully unroll loop as directed by unroll(full) pragma "
+ "because loop has a runtime trip count.");
+ } else if (PragmaEnableUnroll && Count != TripCount && Count < 2) {
+ emitOptimizationRemarkMissed(
+ Ctx, DEBUG_TYPE, *F, LoopLoc,
+ "Unable to unroll loop as directed by unroll(enable) pragma because "
+ "unrolled size is too large.");
+ } else if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
+ Count != TripCount) {
+ emitOptimizationRemarkMissed(
+ Ctx, DEBUG_TYPE, *F, LoopLoc,
+ "Unable to fully unroll loop as directed by unroll pragma because "
+ "unrolled size is too large.");
}
}
@@ -915,7 +1081,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
// Unroll the loop.
if (!UnrollLoop(L, Count, TripCount, AllowRuntime, UP.AllowExpensiveTripCount,
- TripMultiple, LI, this, &LPM, &AC))
+ TripMultiple, LI, SE, &DT, &AC, PreserveLCSSA))
return false;
return true;
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
index cbc563b..95d7f8a 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -30,6 +30,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/InstructionSimplify.h"
@@ -37,6 +38,10 @@
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Support/BranchProbability.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
@@ -70,6 +75,19 @@ static cl::opt<unsigned>
Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"),
cl::init(100), cl::Hidden);
+static cl::opt<bool>
+LoopUnswitchWithBlockFrequency("loop-unswitch-with-block-frequency",
+ cl::init(false), cl::Hidden,
+ cl::desc("Enable the use of the block frequency analysis to access PGO "
+ "heuristics to minimize code growth in cold regions."));
+
+static cl::opt<unsigned>
+ColdnessThreshold("loop-unswitch-coldness-threshold", cl::init(1), cl::Hidden,
+ cl::desc("Coldness threshold in percentage. The loop header frequency "
+ "(relative to the entry frequency) is compared with this "
+ "threshold to determine if non-trivial unswitching should be "
+ "enabled."));
+
namespace {
class LUAnalysisCache {
@@ -148,12 +166,19 @@ namespace {
LPPassManager *LPM;
AssumptionCache *AC;
- // LoopProcessWorklist - Used to check if second loop needs processing
- // after RewriteLoopBodyWithConditionConstant rewrites first loop.
+ // Used to check if second loop needs processing after
+ // RewriteLoopBodyWithConditionConstant rewrites first loop.
std::vector<Loop*> LoopProcessWorklist;
LUAnalysisCache BranchesInfo;
+ bool EnabledPGO;
+
+ // BFI and ColdEntryFreq are only used when PGO and
+ // LoopUnswitchWithBlockFrequency are enabled.
+ BlockFrequencyInfo BFI;
+ BlockFrequency ColdEntryFreq;
+
bool OptimizeForSize;
bool redoLoop;
@@ -192,9 +217,11 @@ namespace {
AU.addPreserved<LoopInfoWrapperPass>();
AU.addRequiredID(LCSSAID);
AU.addPreservedID(LCSSAID);
+ AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<ScalarEvolution>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
}
private:
@@ -210,7 +237,10 @@ namespace {
/// Split all of the edges from inside the loop to their exit blocks.
/// Update the appropriate Phi nodes as we do so.
- void SplitExitEdges(Loop *L, const SmallVectorImpl<BasicBlock *> &ExitBlocks);
+ void SplitExitEdges(Loop *L,
+ const SmallVectorImpl<BasicBlock *> &ExitBlocks);
+
+ bool TryTrivialLoopUnswitch(bool &Changed);
bool UnswitchIfProfitable(Value *LoopCond, Constant *Val,
TerminatorInst *TI = nullptr);
@@ -229,9 +259,6 @@ namespace {
TerminatorInst *TI);
void SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L);
- bool IsTrivialUnswitchCondition(Value *Cond, Constant **Val = nullptr,
- BasicBlock **LoopExit = nullptr);
-
};
}
@@ -367,9 +394,8 @@ Pass *llvm::createLoopUnswitchPass(bool Os) {
return new LoopUnswitch(Os);
}
-/// FindLIVLoopCondition - Cond is a condition that occurs in L. If it is
-/// invariant in the loop, or has an invariant piece, return the invariant.
-/// Otherwise, return null.
+/// Cond is a condition that occurs in L. If it is invariant in the loop, or has
+/// an invariant piece, return the invariant. Otherwise, return null.
static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) {
// We started analyze new instruction, increment scanned instructions counter.
@@ -411,11 +437,23 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {
*L->getHeader()->getParent());
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
LPM = &LPM_Ref;
- DominatorTreeWrapperPass *DTWP =
- getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- DT = DTWP ? &DTWP->getDomTree() : nullptr;
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
currentLoop = L;
Function *F = currentLoop->getHeader()->getParent();
+
+ EnabledPGO = F->getEntryCount().hasValue();
+
+ if (LoopUnswitchWithBlockFrequency && EnabledPGO) {
+ BranchProbabilityInfo BPI(*F, *LI);
+ BFI.calculate(*L->getHeader()->getParent(), BPI, *LI);
+
+ // Use BranchProbability to compute a minimum frequency based on
+ // function entry baseline frequency. Loops with headers below this
+ // frequency are considered as cold.
+ const BranchProbability ColdProb(ColdnessThreshold, 100);
+ ColdEntryFreq = BlockFrequency(BFI.getEntryFreq()) * ColdProb;
+ }
+
bool Changed = false;
do {
assert(currentLoop->isLCSSAForm(*DT));
@@ -423,16 +461,13 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {
Changed |= processCurrentLoop();
} while(redoLoop);
- if (Changed) {
- // FIXME: Reconstruct dom info, because it is not preserved properly.
- if (DT)
- DT->recalculate(*F);
- }
+ // FIXME: Reconstruct dom info, because it is not preserved properly.
+ if (Changed)
+ DT->recalculate(*F);
return Changed;
}
-/// processCurrentLoop - Do actual work and unswitch loop if possible
-/// and profitable.
+/// Do actual work and unswitch loop if possible and profitable.
bool LoopUnswitch::processCurrentLoop() {
bool Changed = false;
@@ -452,14 +487,48 @@ bool LoopUnswitch::processCurrentLoop() {
LLVMContext &Context = loopHeader->getContext();
- // Probably we reach the quota of branches for this loop. If so
- // stop unswitching.
+ // Analyze loop cost, and stop unswitching if loop content can not be duplicated.
if (!BranchesInfo.countLoop(
currentLoop, getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
*currentLoop->getHeader()->getParent()),
AC))
return false;
+ // Try trivial unswitch first before loop over other basic blocks in the loop.
+ if (TryTrivialLoopUnswitch(Changed)) {
+ return true;
+ }
+
+ // Do not unswitch loops containing convergent operations, as we might be
+ // making them control dependent on the unswitch value when they were not
+ // before.
+ // FIXME: This could be refined to only bail if the convergent operation is
+ // not already control-dependent on the unswitch value.
+ for (const auto BB : currentLoop->blocks()) {
+ for (auto &I : *BB) {
+ auto CS = CallSite(&I);
+ if (!CS) continue;
+ if (CS.hasFnAttr(Attribute::Convergent))
+ return false;
+ }
+ }
+
+ // Do not do non-trivial unswitch while optimizing for size.
+ // FIXME: Use Function::optForSize().
+ if (OptimizeForSize ||
+ loopHeader->getParent()->hasFnAttribute(Attribute::OptimizeForSize))
+ return false;
+
+ if (LoopUnswitchWithBlockFrequency && EnabledPGO) {
+ // Compute the weighted frequency of the hottest block in the
+ // loop (loopHeader in this case since inner loops should be
+ // processed before outer loop). If it is less than ColdFrequency,
+ // we should not unswitch.
+ BlockFrequency LoopEntryFreq = BFI.getBlockFreq(loopHeader);
+ if (LoopEntryFreq < ColdEntryFreq)
+ return false;
+ }
+
// Loop over all of the basic blocks in the loop. If we find an interior
// block that is branching on a loop-invariant condition, we can unswitch this
// loop.
@@ -528,8 +597,8 @@ bool LoopUnswitch::processCurrentLoop() {
return Changed;
}
-/// isTrivialLoopExitBlock - Check to see if all paths from BB exit the
-/// loop with no side effects (including infinite loops).
+/// Check to see if all paths from BB exit the loop with no side effects
+/// (including infinite loops).
///
/// If true, we return true and set ExitBB to the block we
/// exit through.
@@ -566,9 +635,9 @@ static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB,
return true;
}
-/// isTrivialLoopExitBlock - Return true if the specified block unconditionally
-/// leads to an exit from the specified loop, and has no side-effects in the
-/// process. If so, return the block that is exited to, otherwise return null.
+/// Return true if the specified block unconditionally leads to an exit from
+/// the specified loop, and has no side-effects in the process. If so, return
+/// the block that is exited to, otherwise return null.
static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) {
std::set<BasicBlock*> Visited;
Visited.insert(L->getHeader()); // Branches to header make infinite loops.
@@ -578,105 +647,11 @@ static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) {
return nullptr;
}
-/// IsTrivialUnswitchCondition - Check to see if this unswitch condition is
-/// trivial: that is, that the condition controls whether or not the loop does
-/// anything at all. If this is a trivial condition, unswitching produces no
-/// code duplications (equivalently, it produces a simpler loop and a new empty
-/// loop, which gets deleted).
-///
-/// If this is a trivial condition, return true, otherwise return false. When
-/// returning true, this sets Cond and Val to the condition that controls the
-/// trivial condition: when Cond dynamically equals Val, the loop is known to
-/// exit. Finally, this sets LoopExit to the BB that the loop exits to when
-/// Cond == Val.
-///
-bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val,
- BasicBlock **LoopExit) {
- BasicBlock *Header = currentLoop->getHeader();
- TerminatorInst *HeaderTerm = Header->getTerminator();
- LLVMContext &Context = Header->getContext();
-
- BasicBlock *LoopExitBB = nullptr;
- if (BranchInst *BI = dyn_cast<BranchInst>(HeaderTerm)) {
- // If the header block doesn't end with a conditional branch on Cond, we
- // can't handle it.
- if (!BI->isConditional() || BI->getCondition() != Cond)
- return false;
-
- // Check to see if a successor of the branch is guaranteed to
- // exit through a unique exit block without having any
- // side-effects. If so, determine the value of Cond that causes it to do
- // this.
- if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,
- BI->getSuccessor(0)))) {
- if (Val) *Val = ConstantInt::getTrue(Context);
- } else if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,
- BI->getSuccessor(1)))) {
- if (Val) *Val = ConstantInt::getFalse(Context);
- }
- } else if (SwitchInst *SI = dyn_cast<SwitchInst>(HeaderTerm)) {
- // If this isn't a switch on Cond, we can't handle it.
- if (SI->getCondition() != Cond) return false;
-
- // Check to see if a successor of the switch is guaranteed to go to the
- // latch block or exit through a one exit block without having any
- // side-effects. If so, determine the value of Cond that causes it to do
- // this.
- // Note that we can't trivially unswitch on the default case or
- // on already unswitched cases.
- for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
- i != e; ++i) {
- BasicBlock *LoopExitCandidate;
- if ((LoopExitCandidate = isTrivialLoopExitBlock(currentLoop,
- i.getCaseSuccessor()))) {
- // Okay, we found a trivial case, remember the value that is trivial.
- ConstantInt *CaseVal = i.getCaseValue();
-
- // Check that it was not unswitched before, since already unswitched
- // trivial vals are looks trivial too.
- if (BranchesInfo.isUnswitched(SI, CaseVal))
- continue;
- LoopExitBB = LoopExitCandidate;
- if (Val) *Val = CaseVal;
- break;
- }
- }
- }
-
- // If we didn't find a single unique LoopExit block, or if the loop exit block
- // contains phi nodes, this isn't trivial.
- if (!LoopExitBB || isa<PHINode>(LoopExitBB->begin()))
- return false; // Can't handle this.
-
- if (LoopExit) *LoopExit = LoopExitBB;
-
- // We already know that nothing uses any scalar values defined inside of this
- // loop. As such, we just have to check to see if this loop will execute any
- // side-effecting instructions (e.g. stores, calls, volatile loads) in the
- // part of the loop that the code *would* execute. We already checked the
- // tail, check the header now.
- for (BasicBlock::iterator I = Header->begin(), E = Header->end(); I != E; ++I)
- if (I->mayHaveSideEffects())
- return false;
- return true;
-}
-
-/// UnswitchIfProfitable - We have found that we can unswitch currentLoop when
-/// LoopCond == Val to simplify the loop. If we decide that this is profitable,
+/// We have found that we can unswitch currentLoop when LoopCond == Val to
+/// simplify the loop. If we decide that this is profitable,
/// unswitch the loop, reprocess the pieces, then return true.
bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val,
TerminatorInst *TI) {
- Function *F = loopHeader->getParent();
- Constant *CondVal = nullptr;
- BasicBlock *ExitBlock = nullptr;
-
- if (IsTrivialUnswitchCondition(LoopCond, &CondVal, &ExitBlock)) {
- // If the condition is trivial, always unswitch. There is no code growth
- // for this case.
- UnswitchTrivialCondition(currentLoop, LoopCond, CondVal, ExitBlock, TI);
- return true;
- }
-
// Check to see if it would be profitable to unswitch current loop.
if (!BranchesInfo.CostAllowsUnswitching()) {
DEBUG(dbgs() << "NOT unswitching loop %"
@@ -687,32 +662,27 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val,
return false;
}
- // Do not do non-trivial unswitch while optimizing for size.
- if (OptimizeForSize || F->hasFnAttribute(Attribute::OptimizeForSize))
- return false;
-
UnswitchNontrivialCondition(LoopCond, Val, currentLoop, TI);
return true;
}
-/// CloneLoop - Recursively clone the specified loop and all of its children,
+/// Recursively clone the specified loop and all of its children,
/// mapping the blocks with the specified map.
static Loop *CloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM,
LoopInfo *LI, LPPassManager *LPM) {
- Loop *New = new Loop();
- LPM->insertLoop(New, PL);
+ Loop &New = LPM->addLoop(PL);
// Add all of the blocks in L to the new loop.
for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
I != E; ++I)
if (LI->getLoopFor(*I) == L)
- New->addBasicBlockToLoop(cast<BasicBlock>(VM[*I]), *LI);
+ New.addBasicBlockToLoop(cast<BasicBlock>(VM[*I]), *LI);
// Add all of the subloops to the new loop.
for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
- CloneLoop(*I, New, VM, LI, LPM);
+ CloneLoop(*I, &New, VM, LI, LPM);
- return New;
+ return &New;
}
static void copyMetadata(Instruction *DstInst, const Instruction *SrcInst,
@@ -744,15 +714,15 @@ static void copyMetadata(Instruction *DstInst, const Instruction *SrcInst,
}
}
// fallthrough.
+ case LLVMContext::MD_make_implicit:
case LLVMContext::MD_dbg:
DstInst->setMetadata(MD.first, MD.second);
}
}
}
-/// EmitPreheaderBranchOnCondition - Emit a conditional branch on two values
-/// if LIC == Val, branch to TrueDst, otherwise branch to FalseDest. Insert the
-/// code immediately before InsertPt.
+/// Emit a conditional branch on two values if LIC == Val, branch to TrueDst,
+/// otherwise branch to FalseDest. Insert the code immediately before InsertPt.
void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
BasicBlock *TrueDest,
BasicBlock *FalseDest,
@@ -782,11 +752,11 @@ void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
SplitCriticalEdge(BI, 1, Options);
}
-/// UnswitchTrivialCondition - Given a loop that has a trivial unswitchable
-/// condition in it (a cond branch from its header block to its latch block,
-/// where the path through the loop that doesn't execute its body has no
-/// side-effects), unswitch it. This doesn't involve any code duplication, just
-/// moving the conditional branch outside of the loop and updating loop info.
+/// Given a loop that has a trivial unswitchable condition in it (a cond branch
+/// from its header block to its latch block, where the path through the loop
+/// that doesn't execute its body has no side-effects), unswitch it. This
+/// doesn't involve any code duplication, just moving the conditional branch
+/// outside of the loop and updating loop info.
void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val,
BasicBlock *ExitBlock,
TerminatorInst *TI) {
@@ -810,7 +780,7 @@ void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val,
// without actually branching to it (the exit block should be dominated by the
// loop header, not the preheader).
assert(!L->contains(ExitBlock) && "Exit block is in the loop?");
- BasicBlock *NewExit = SplitBlock(ExitBlock, ExitBlock->begin(), DT, LI);
+ BasicBlock *NewExit = SplitBlock(ExitBlock, &ExitBlock->front(), DT, LI);
// Okay, now we have a position to branch from and a position to branch to,
// insert the new conditional branch.
@@ -829,8 +799,155 @@ void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val,
++NumTrivial;
}
-/// SplitExitEdges - Split all of the edges from inside the loop to their exit
-/// blocks. Update the appropriate Phi nodes as we do so.
+/// Check if the first non-constant condition starting from the loop header is
+/// a trivial unswitch condition: that is, a condition controls whether or not
+/// the loop does anything at all. If it is a trivial condition, unswitching
+/// produces no code duplications (equivalently, it produces a simpler loop and
+/// a new empty loop, which gets deleted). Therefore always unswitch trivial
+/// condition.
+bool LoopUnswitch::TryTrivialLoopUnswitch(bool &Changed) {
+ BasicBlock *CurrentBB = currentLoop->getHeader();
+ TerminatorInst *CurrentTerm = CurrentBB->getTerminator();
+ LLVMContext &Context = CurrentBB->getContext();
+
+ // If loop header has only one reachable successor (currently via an
+ // unconditional branch or constant foldable conditional branch, but
+ // should also consider adding constant foldable switch instruction in
+ // future), we should keep looking for trivial condition candidates in
+ // the successor as well. An alternative is to constant fold conditions
+ // and merge successors into loop header (then we only need to check header's
+ // terminator). The reason for not doing this in LoopUnswitch pass is that
+ // it could potentially break LoopPassManager's invariants. Folding dead
+ // branches could either eliminate the current loop or make other loops
+ // unreachable. LCSSA form might also not be preserved after deleting
+ // branches. The following code keeps traversing loop header's successors
+ // until it finds the trivial condition candidate (condition that is not a
+ // constant). Since unswitching generates branches with constant conditions,
+ // this scenario could be very common in practice.
+ SmallSet<BasicBlock*, 8> Visited;
+
+ while (true) {
+ // If we exit loop or reach a previous visited block, then
+ // we can not reach any trivial condition candidates (unfoldable
+ // branch instructions or switch instructions) and no unswitch
+ // can happen. Exit and return false.
+ if (!currentLoop->contains(CurrentBB) || !Visited.insert(CurrentBB).second)
+ return false;
+
+ // Check if this loop will execute any side-effecting instructions (e.g.
+ // stores, calls, volatile loads) in the part of the loop that the code
+ // *would* execute. Check the header first.
+ for (Instruction &I : *CurrentBB)
+ if (I.mayHaveSideEffects())
+ return false;
+
+ // FIXME: add check for constant foldable switch instructions.
+ if (BranchInst *BI = dyn_cast<BranchInst>(CurrentTerm)) {
+ if (BI->isUnconditional()) {
+ CurrentBB = BI->getSuccessor(0);
+ } else if (BI->getCondition() == ConstantInt::getTrue(Context)) {
+ CurrentBB = BI->getSuccessor(0);
+ } else if (BI->getCondition() == ConstantInt::getFalse(Context)) {
+ CurrentBB = BI->getSuccessor(1);
+ } else {
+ // Found a trivial condition candidate: non-foldable conditional branch.
+ break;
+ }
+ } else {
+ break;
+ }
+
+ CurrentTerm = CurrentBB->getTerminator();
+ }
+
+ // CondVal is the condition that controls the trivial condition.
+ // LoopExitBB is the BasicBlock that loop exits when meets trivial condition.
+ Constant *CondVal = nullptr;
+ BasicBlock *LoopExitBB = nullptr;
+
+ if (BranchInst *BI = dyn_cast<BranchInst>(CurrentTerm)) {
+ // If this isn't branching on an invariant condition, we can't unswitch it.
+ if (!BI->isConditional())
+ return false;
+
+ Value *LoopCond = FindLIVLoopCondition(BI->getCondition(),
+ currentLoop, Changed);
+
+ // Unswitch only if the trivial condition itself is an LIV (not
+ // partial LIV which could occur in and/or)
+ if (!LoopCond || LoopCond != BI->getCondition())
+ return false;
+
+ // Check to see if a successor of the branch is guaranteed to
+ // exit through a unique exit block without having any
+ // side-effects. If so, determine the value of Cond that causes
+ // it to do this.
+ if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,
+ BI->getSuccessor(0)))) {
+ CondVal = ConstantInt::getTrue(Context);
+ } else if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,
+ BI->getSuccessor(1)))) {
+ CondVal = ConstantInt::getFalse(Context);
+ }
+
+ // If we didn't find a single unique LoopExit block, or if the loop exit
+ // block contains phi nodes, this isn't trivial.
+ if (!LoopExitBB || isa<PHINode>(LoopExitBB->begin()))
+ return false; // Can't handle this.
+
+ UnswitchTrivialCondition(currentLoop, LoopCond, CondVal, LoopExitBB,
+ CurrentTerm);
+ ++NumBranches;
+ return true;
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
+ // If this isn't switching on an invariant condition, we can't unswitch it.
+ Value *LoopCond = FindLIVLoopCondition(SI->getCondition(),
+ currentLoop, Changed);
+
+ // Unswitch only if the trivial condition itself is an LIV (not
+ // partial LIV which could occur in and/or)
+ if (!LoopCond || LoopCond != SI->getCondition())
+ return false;
+
+ // Check to see if a successor of the switch is guaranteed to go to the
+ // latch block or exit through a one exit block without having any
+ // side-effects. If so, determine the value of Cond that causes it to do
+ // this.
+ // Note that we can't trivially unswitch on the default case or
+ // on already unswitched cases.
+ for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+ i != e; ++i) {
+ BasicBlock *LoopExitCandidate;
+ if ((LoopExitCandidate = isTrivialLoopExitBlock(currentLoop,
+ i.getCaseSuccessor()))) {
+ // Okay, we found a trivial case, remember the value that is trivial.
+ ConstantInt *CaseVal = i.getCaseValue();
+
+ // Check that it was not unswitched before, since already unswitched
+ // trivial vals are looks trivial too.
+ if (BranchesInfo.isUnswitched(SI, CaseVal))
+ continue;
+ LoopExitBB = LoopExitCandidate;
+ CondVal = CaseVal;
+ break;
+ }
+ }
+
+ // If we didn't find a single unique LoopExit block, or if the loop exit
+ // block contains phi nodes, this isn't trivial.
+ if (!LoopExitBB || isa<PHINode>(LoopExitBB->begin()))
+ return false; // Can't handle this.
+
+ UnswitchTrivialCondition(currentLoop, LoopCond, CondVal, LoopExitBB,
+ nullptr);
+ ++NumSwitches;
+ return true;
+ }
+ return false;
+}
+
+/// Split all of the edges from inside the loop to their exit blocks.
+/// Update the appropriate Phi nodes as we do so.
void LoopUnswitch::SplitExitEdges(Loop *L,
const SmallVectorImpl<BasicBlock *> &ExitBlocks){
@@ -841,15 +958,14 @@ void LoopUnswitch::SplitExitEdges(Loop *L,
// Although SplitBlockPredecessors doesn't preserve loop-simplify in
// general, if we call it on all predecessors of all exits then it does.
- SplitBlockPredecessors(ExitBlock, Preds, ".us-lcssa",
- /*AliasAnalysis*/ nullptr, DT, LI,
+ SplitBlockPredecessors(ExitBlock, Preds, ".us-lcssa", DT, LI,
/*PreserveLCSSA*/ true);
}
}
-/// UnswitchNontrivialCondition - We determined that the loop is profitable
-/// to unswitch when LIC equal Val. Split it into loop versions and test the
-/// condition outside of either loop. Return the loops created as Out1/Out2.
+/// We determined that the loop is profitable to unswitch when LIC equal Val.
+/// Split it into loop versions and test the condition outside of either loop.
+/// Return the loops created as Out1/Out2.
void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
Loop *L, TerminatorInst *TI) {
Function *F = loopHeader->getParent();
@@ -858,8 +974,8 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
<< " blocks] in Function " << F->getName()
<< " when '" << *Val << "' == " << *LIC << "\n");
- if (ScalarEvolution *SE = getAnalysisIfAvailable<ScalarEvolution>())
- SE->forgetLoop(L);
+ if (auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>())
+ SEWP->getSE().forgetLoop(L);
LoopBlocks.clear();
NewBlocks.clear();
@@ -901,8 +1017,9 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
// Splice the newly inserted blocks into the function right before the
// original preheader.
- F->getBasicBlockList().splice(NewPreheader, F->getBasicBlockList(),
- NewBlocks[0], F->end());
+ F->getBasicBlockList().splice(NewPreheader->getIterator(),
+ F->getBasicBlockList(),
+ NewBlocks[0]->getIterator(), F->end());
// FIXME: We could register any cloned assumptions instead of clearing the
// whole function's cache.
@@ -944,7 +1061,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
if (LandingPadInst *LPad = NewExit->getLandingPadInst()) {
PHINode *PN = PHINode::Create(LPad->getType(), 0, "",
- ExitSucc->getFirstInsertionPt());
+ &*ExitSucc->getFirstInsertionPt());
for (pred_iterator I = pred_begin(ExitSucc), E = pred_end(ExitSucc);
I != E; ++I) {
@@ -960,7 +1077,8 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i)
for (BasicBlock::iterator I = NewBlocks[i]->begin(),
E = NewBlocks[i]->end(); I != E; ++I)
- RemapInstruction(I, VMap,RF_NoModuleLevelChanges|RF_IgnoreMissingEntries);
+ RemapInstruction(&*I, VMap,
+ RF_NoModuleLevelChanges | RF_IgnoreMissingEntries);
// Rewrite the original preheader to select between versions of the loop.
BranchInst *OldBR = cast<BranchInst>(loopPreheader->getTerminator());
@@ -994,8 +1112,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
RewriteLoopBodyWithConditionConstant(NewLoop, LICHandle, Val, true);
}
-/// RemoveFromWorklist - Remove all instances of I from the worklist vector
-/// specified.
+/// Remove all instances of I from the worklist vector specified.
static void RemoveFromWorklist(Instruction *I,
std::vector<Instruction*> &Worklist) {
@@ -1003,7 +1120,7 @@ static void RemoveFromWorklist(Instruction *I,
Worklist.end());
}
-/// ReplaceUsesOfWith - When we find that I really equals V, remove I from the
+/// When we find that I really equals V, remove I from the
/// program, replacing all uses with V and update the worklist.
static void ReplaceUsesOfWith(Instruction *I, Value *V,
std::vector<Instruction*> &Worklist,
@@ -1025,9 +1142,9 @@ static void ReplaceUsesOfWith(Instruction *I, Value *V,
++NumSimplify;
}
-// RewriteLoopBodyWithConditionConstant - We know either that the value LIC has
-// the value specified by Val in the specified loop, or we know it does NOT have
-// that value. Rewrite any uses of LIC or of properties correlated to it.
+/// We know either that the value LIC has the value specified by Val in the
+/// specified loop, or we know it does NOT have that value.
+/// Rewrite any uses of LIC or of properties correlated to it.
void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
Constant *Val,
bool IsEqual) {
@@ -1138,18 +1255,16 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
// domtree here -- instead we force it to do a full recomputation
// after the pass is complete -- but we do need to inform it of
// new blocks.
- if (DT)
- DT->addNewBlock(Abort, NewSISucc);
+ DT->addNewBlock(Abort, NewSISucc);
}
SimplifyCode(Worklist, L);
}
-/// SimplifyCode - Okay, now that we have simplified some instructions in the
-/// loop, walk over it and constant prop, dce, and fold control flow where
-/// possible. Note that this is effectively a very simple loop-structure-aware
-/// optimizer. During processing of this loop, L could very well be deleted, so
-/// it must not be used.
+/// Now that we have simplified some instructions in the loop, walk over it and
+/// constant prop, dce, and fold control flow where possible. Note that this is
+/// effectively a very simple loop-structure-aware optimizer. During processing
+/// of this loop, L could very well be deleted, so it must not be used.
///
/// FIXME: When the loop optimizer is more mature, separate this out to a new
/// pass.
@@ -1207,8 +1322,8 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
Succ->replaceAllUsesWith(Pred);
// Move all of the successor contents from Succ to Pred.
- Pred->getInstList().splice(BI, Succ->getInstList(), Succ->begin(),
- Succ->end());
+ Pred->getInstList().splice(BI->getIterator(), Succ->getInstList(),
+ Succ->begin(), Succ->end());
LPM->deleteSimpleAnalysisValue(BI, L);
BI->eraseFromParent();
RemoveFromWorklist(BI, Worklist);
diff --git a/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp b/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp
index 3314e1e..41511bc 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp
@@ -22,7 +22,7 @@ using namespace llvm;
#define DEBUG_TYPE "loweratomic"
static bool LowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI) {
- IRBuilder<> Builder(CXI->getParent(), CXI);
+ IRBuilder<> Builder(CXI);
Value *Ptr = CXI->getPointerOperand();
Value *Cmp = CXI->getCompareOperand();
Value *Val = CXI->getNewValOperand();
@@ -41,7 +41,7 @@ static bool LowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI) {
}
static bool LowerAtomicRMWInst(AtomicRMWInst *RMWI) {
- IRBuilder<> Builder(RMWI->getParent(), RMWI);
+ IRBuilder<> Builder(RMWI);
Value *Ptr = RMWI->getPointerOperand();
Value *Val = RMWI->getValOperand();
@@ -120,7 +120,7 @@ namespace {
return false;
bool Changed = false;
for (BasicBlock::iterator DI = BB.begin(), DE = BB.end(); DI != DE; ) {
- Instruction *Inst = DI++;
+ Instruction *Inst = &*DI++;
if (FenceInst *FI = dyn_cast<FenceInst>(Inst))
Changed |= LowerFenceInst(FI);
else if (AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(Inst))
diff --git a/contrib/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp b/contrib/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
index 0c47cbd..2ace902 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
@@ -139,7 +139,7 @@ static bool lowerExpectIntrinsic(Function &F) {
ExpectIntrinsicsHandled++;
}
- // remove llvm.expect intrinsics.
+ // Remove llvm.expect intrinsics.
for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
CallInst *CI = dyn_cast<CallInst>(BI++);
if (!CI)
diff --git a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 85012af..0333bf2 100644
--- a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -30,7 +31,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/Local.h"
-#include <list>
+#include <algorithm>
using namespace llvm;
#define DEBUG_TYPE "memcpyopt"
@@ -71,9 +72,9 @@ static int64_t GetOffsetFromIndex(const GEPOperator *GEP, unsigned Idx,
return Offset;
}
-/// IsPointerOffset - Return true if Ptr1 is provably equal to Ptr2 plus a
-/// constant offset, and return that constant offset. For example, Ptr1 might
-/// be &A[42], and Ptr2 might be &A[40]. In this case offset would be -8.
+/// Return true if Ptr1 is provably equal to Ptr2 plus a constant offset, and
+/// return that constant offset. For example, Ptr1 might be &A[42], and Ptr2
+/// might be &A[40]. In this case offset would be -8.
static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
const DataLayout &DL) {
Ptr1 = Ptr1->stripPointerCasts();
@@ -125,7 +126,7 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
}
-/// MemsetRange - Represents a range of memset'd bytes with the ByteVal value.
+/// Represents a range of memset'd bytes with the ByteVal value.
/// This allows us to analyze stores like:
/// store 0 -> P+1
/// store 0 -> P+0
@@ -164,8 +165,8 @@ bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
// If any of the stores are a memset, then it is always good to extend the
// memset.
- for (unsigned i = 0, e = TheStores.size(); i != e; ++i)
- if (!isa<StoreInst>(TheStores[i]))
+ for (Instruction *SI : TheStores)
+ if (!isa<StoreInst>(SI))
return true;
// Assume that the code generator is capable of merging pairs of stores
@@ -189,7 +190,7 @@ bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
unsigned NumPointerStores = Bytes / MaxIntSize;
// Assume the remaining bytes if any are done a byte at a time.
- unsigned NumByteStores = Bytes - NumPointerStores * MaxIntSize;
+ unsigned NumByteStores = Bytes % MaxIntSize;
// If we will reduce the # stores (according to this heuristic), do the
// transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
@@ -200,15 +201,14 @@ bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
namespace {
class MemsetRanges {
- /// Ranges - A sorted list of the memset ranges. We use std::list here
- /// because each element is relatively large and expensive to copy.
- std::list<MemsetRange> Ranges;
- typedef std::list<MemsetRange>::iterator range_iterator;
+ /// A sorted list of the memset ranges.
+ SmallVector<MemsetRange, 8> Ranges;
+ typedef SmallVectorImpl<MemsetRange>::iterator range_iterator;
const DataLayout &DL;
public:
MemsetRanges(const DataLayout &DL) : DL(DL) {}
- typedef std::list<MemsetRange>::const_iterator const_iterator;
+ typedef SmallVectorImpl<MemsetRange>::const_iterator const_iterator;
const_iterator begin() const { return Ranges.begin(); }
const_iterator end() const { return Ranges.end(); }
bool empty() const { return Ranges.empty(); }
@@ -240,26 +240,20 @@ public:
} // end anon namespace
-/// addRange - Add a new store to the MemsetRanges data structure. This adds a
+/// Add a new store to the MemsetRanges data structure. This adds a
/// new range for the specified store at the specified offset, merging into
/// existing ranges as appropriate.
-///
-/// Do a linear search of the ranges to see if this can be joined and/or to
-/// find the insertion point in the list. We keep the ranges sorted for
-/// simplicity here. This is a linear search of a linked list, which is ugly,
-/// however the number of ranges is limited, so this won't get crazy slow.
void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
unsigned Alignment, Instruction *Inst) {
int64_t End = Start+Size;
- range_iterator I = Ranges.begin(), E = Ranges.end();
- while (I != E && Start > I->End)
- ++I;
+ range_iterator I = std::lower_bound(Ranges.begin(), Ranges.end(), Start,
+ [](const MemsetRange &LHS, int64_t RHS) { return LHS.End < RHS; });
// We now know that I == E, in which case we didn't find anything to merge
// with, or that Start <= I->End. If End < I->Start or I == E, then we need
// to insert a new range. Handle this now.
- if (I == E || End < I->Start) {
+ if (I == Ranges.end() || End < I->Start) {
MemsetRange &R = *Ranges.insert(I, MemsetRange());
R.Start = Start;
R.End = End;
@@ -295,7 +289,7 @@ void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
if (End > I->End) {
I->End = End;
range_iterator NextI = I;
- while (++NextI != E && End >= NextI->Start) {
+ while (++NextI != Ranges.end() && End >= NextI->Start) {
// Merge the range in.
I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());
if (NextI->End > I->End)
@@ -331,9 +325,9 @@ namespace {
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<MemoryDependenceAnalysis>();
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
- AU.addPreserved<AliasAnalysis>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
AU.addPreserved<MemoryDependenceAnalysis>();
}
@@ -357,7 +351,7 @@ namespace {
char MemCpyOpt::ID = 0;
}
-// createMemCpyOptPass - The public interface to this file...
+/// The public interface to this file...
FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOpt(); }
INITIALIZE_PASS_BEGIN(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
@@ -366,14 +360,15 @@ INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
INITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
false, false)
-/// tryMergingIntoMemset - When scanning forward over instructions, we look for
-/// some other patterns to fold away. In particular, this looks for stores to
-/// neighboring locations of memory. If it sees enough consecutive ones, it
-/// attempts to merge them together into a memcpy/memset.
+/// When scanning forward over instructions, we look for some other patterns to
+/// fold away. In particular, this looks for stores to neighboring locations of
+/// memory. If it sees enough consecutive ones, it attempts to merge them
+/// together into a memcpy/memset.
Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
Value *StartPtr, Value *ByteVal) {
const DataLayout &DL = StartInst->getModule()->getDataLayout();
@@ -384,7 +379,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
// are stored.
MemsetRanges Ranges(DL);
- BasicBlock::iterator BI = StartInst;
+ BasicBlock::iterator BI(StartInst);
for (++BI; !isa<TerminatorInst>(BI); ++BI) {
if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
// If the instruction is readnone, ignore it, otherwise bail out. We
@@ -439,14 +434,12 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
// If we create any memsets, we put it right before the first instruction that
// isn't part of the memset block. This ensure that the memset is dominated
// by any addressing instruction needed by the start of the block.
- IRBuilder<> Builder(BI);
+ IRBuilder<> Builder(&*BI);
// Now that we have full information about ranges, loop over the ranges and
// emit memset's for anything big enough to be worthwhile.
Instruction *AMemSet = nullptr;
- for (MemsetRanges::const_iterator I = Ranges.begin(), E = Ranges.end();
- I != E; ++I) {
- const MemsetRange &Range = *I;
+ for (const MemsetRange &Range : Ranges) {
if (Range.TheStores.size() == 1) continue;
@@ -470,19 +463,17 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment);
DEBUG(dbgs() << "Replace stores:\n";
- for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i)
- dbgs() << *Range.TheStores[i] << '\n';
+ for (Instruction *SI : Range.TheStores)
+ dbgs() << *SI << '\n';
dbgs() << "With: " << *AMemSet << '\n');
if (!Range.TheStores.empty())
AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
// Zap all the stores.
- for (SmallVectorImpl<Instruction *>::const_iterator
- SI = Range.TheStores.begin(),
- SE = Range.TheStores.end(); SI != SE; ++SI) {
- MD->removeInstruction(*SI);
- (*SI)->eraseFromParent();
+ for (Instruction *SI : Range.TheStores) {
+ MD->removeInstruction(SI);
+ SI->eraseFromParent();
}
++NumMemSetInfer;
}
@@ -493,6 +484,16 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
if (!SI->isSimple()) return false;
+
+ // Avoid merging nontemporal stores since the resulting
+ // memcpy/memset would not be able to preserve the nontemporal hint.
+ // In theory we could teach how to propagate the !nontemporal metadata to
+ // memset calls. However, that change would force the backend to
+ // conservatively expand !nontemporal memset calls back to sequences of
+ // store instructions (effectively undoing the merging).
+ if (SI->getMetadata(LLVMContext::MD_nontemporal))
+ return false;
+
const DataLayout &DL = SI->getModule()->getDataLayout();
// Detect cases where we're performing call slot forwarding, but
@@ -509,11 +510,11 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
if (C) {
// Check that nothing touches the dest of the "copy" between
// the call and the store.
- AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+ AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
MemoryLocation StoreLoc = MemoryLocation::get(SI);
- for (BasicBlock::iterator I = --BasicBlock::iterator(SI),
- E = C; I != E; --I) {
- if (AA.getModRefInfo(&*I, StoreLoc) != AliasAnalysis::NoModRef) {
+ for (BasicBlock::iterator I = --SI->getIterator(), E = C->getIterator();
+ I != E; --I) {
+ if (AA.getModRefInfo(&*I, StoreLoc) != MRI_NoModRef) {
C = nullptr;
break;
}
@@ -554,7 +555,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
if (Value *ByteVal = isBytewiseValue(SI->getOperand(0)))
if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(),
ByteVal)) {
- BBI = I; // Don't invalidate iterator.
+ BBI = I->getIterator(); // Don't invalidate iterator.
return true;
}
@@ -567,14 +568,14 @@ bool MemCpyOpt::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
if (isa<ConstantInt>(MSI->getLength()) && !MSI->isVolatile())
if (Instruction *I = tryMergingIntoMemset(MSI, MSI->getDest(),
MSI->getValue())) {
- BBI = I; // Don't invalidate iterator.
+ BBI = I->getIterator(); // Don't invalidate iterator.
return true;
}
return false;
}
-/// performCallSlotOptzn - takes a memcpy and a call that it depends on,
+/// Takes a memcpy and a call that it depends on,
/// and checks for the possibility of a call slot optimization by having
/// the call write its result directly into the destination of the memcpy.
bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
@@ -710,12 +711,12 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
// unexpected manner, for example via a global, which we deduce from
// the use analysis, we also need to know that it does not sneakily
// access dest. We rely on AA to figure this out for us.
- AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
- AliasAnalysis::ModRefResult MR = AA.getModRefInfo(C, cpyDest, srcSize);
+ AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
+ ModRefInfo MR = AA.getModRefInfo(C, cpyDest, srcSize);
// If necessary, perform additional analysis.
- if (MR != AliasAnalysis::NoModRef)
+ if (MR != MRI_NoModRef)
MR = AA.callCapturesBefore(C, cpyDest, srcSize, &DT);
- if (MR != AliasAnalysis::NoModRef)
+ if (MR != MRI_NoModRef)
return false;
// All the checks have passed, so do the transformation.
@@ -749,11 +750,9 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
// Update AA metadata
// FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be
// handled here, but combineMetadata doesn't support them yet
- unsigned KnownIDs[] = {
- LLVMContext::MD_tbaa,
- LLVMContext::MD_alias_scope,
- LLVMContext::MD_noalias,
- };
+ unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
+ LLVMContext::MD_noalias,
+ LLVMContext::MD_invariant_group};
combineMetadata(C, cpy, KnownIDs);
// Remove the memcpy.
@@ -763,10 +762,8 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
return true;
}
-/// processMemCpyMemCpyDependence - We've found that the (upward scanning)
-/// memory dependence of memcpy 'M' is the memcpy 'MDep'. Try to simplify M to
-/// copy from MDep's input if we can.
-///
+/// We've found that the (upward scanning) memory dependence of memcpy 'M' is
+/// the memcpy 'MDep'. Try to simplify M to copy from MDep's input if we can.
bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep) {
// We can only transforms memcpy's where the dest of one is the source of the
// other.
@@ -788,7 +785,7 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep) {
if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue())
return false;
- AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+ AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
// Verify that the copied-from memory doesn't change in between the two
// transfers. For example, in:
@@ -802,8 +799,9 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep) {
//
// NOTE: This is conservative, it will stop on any read from the source loc,
// not just the defining memcpy.
- MemDepResult SourceDep = MD->getPointerDependencyFrom(
- MemoryLocation::getForSource(MDep), false, M, M->getParent());
+ MemDepResult SourceDep =
+ MD->getPointerDependencyFrom(MemoryLocation::getForSource(MDep), false,
+ M->getIterator(), M->getParent());
if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
return false;
@@ -860,8 +858,9 @@ bool MemCpyOpt::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
return false;
// Check that there are no other dependencies on the memset destination.
- MemDepResult DstDepInfo = MD->getPointerDependencyFrom(
- MemoryLocation::getForDest(MemSet), false, MemCpy, MemCpy->getParent());
+ MemDepResult DstDepInfo =
+ MD->getPointerDependencyFrom(MemoryLocation::getForDest(MemSet), false,
+ MemCpy->getIterator(), MemCpy->getParent());
if (DstDepInfo.getInst() != MemSet)
return false;
@@ -936,7 +935,7 @@ bool MemCpyOpt::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
return true;
}
-/// processMemCpy - perform simplification of memcpy's. If we have memcpy A
+/// Perform simplification of memcpy's. If we have memcpy A
/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
/// B to be a memcpy from X to Z (or potentially a memmove, depending on
/// circumstances). This allows later passes to remove the first memcpy
@@ -998,8 +997,8 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
}
MemoryLocation SrcLoc = MemoryLocation::getForSource(M);
- MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(SrcLoc, true,
- M, M->getParent());
+ MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(
+ SrcLoc, true, M->getIterator(), M->getParent());
if (SrcDepInfo.isClobber()) {
if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst()))
@@ -1037,10 +1036,10 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
return false;
}
-/// processMemMove - Transforms memmove calls to memcpy calls when the src/dst
-/// are guaranteed not to alias.
+/// Transforms memmove calls to memcpy calls when the src/dst are guaranteed
+/// not to alias.
bool MemCpyOpt::processMemMove(MemMoveInst *M) {
- AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+ AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
if (!TLI->has(LibFunc::memmove))
return false;
@@ -1053,12 +1052,11 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) {
DEBUG(dbgs() << "MemCpyOpt: Optimizing memmove -> memcpy: " << *M << "\n");
// If not, then we know we can transform this.
- Module *Mod = M->getParent()->getParent()->getParent();
Type *ArgTys[3] = { M->getRawDest()->getType(),
M->getRawSource()->getType(),
M->getLength()->getType() };
- M->setCalledFunction(Intrinsic::getDeclaration(Mod, Intrinsic::memcpy,
- ArgTys));
+ M->setCalledFunction(Intrinsic::getDeclaration(M->getModule(),
+ Intrinsic::memcpy, ArgTys));
// MemDep may have over conservative information about this instruction, just
// conservatively flush it from the cache.
@@ -1068,7 +1066,7 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) {
return true;
}
-/// processByValArgument - This is called on every byval argument in call sites.
+/// This is called on every byval argument in call sites.
bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
const DataLayout &DL = CS.getCaller()->getParent()->getDataLayout();
// Find out what feeds this byval argument.
@@ -1076,8 +1074,8 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
Type *ByValTy = cast<PointerType>(ByValArg->getType())->getElementType();
uint64_t ByValSize = DL.getTypeAllocSize(ByValTy);
MemDepResult DepInfo = MD->getPointerDependencyFrom(
- MemoryLocation(ByValArg, ByValSize), true, CS.getInstruction(),
- CS.getInstruction()->getParent());
+ MemoryLocation(ByValArg, ByValSize), true,
+ CS.getInstruction()->getIterator(), CS.getInstruction()->getParent());
if (!DepInfo.isClobber())
return false;
@@ -1119,9 +1117,9 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
//
// NOTE: This is conservative, it will stop on any read from the source loc,
// not just the defining memcpy.
- MemDepResult SourceDep =
- MD->getPointerDependencyFrom(MemoryLocation::getForSource(MDep), false,
- CS.getInstruction(), MDep->getParent());
+ MemDepResult SourceDep = MD->getPointerDependencyFrom(
+ MemoryLocation::getForSource(MDep), false,
+ CS.getInstruction()->getIterator(), MDep->getParent());
if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
return false;
@@ -1140,7 +1138,7 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
return true;
}
-/// iterateOnFunction - Executes one iteration of MemCpyOpt.
+/// Executes one iteration of MemCpyOpt.
bool MemCpyOpt::iterateOnFunction(Function &F) {
bool MadeChange = false;
@@ -1148,7 +1146,7 @@ bool MemCpyOpt::iterateOnFunction(Function &F) {
for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) {
for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
// Avoid invalidating the iterator.
- Instruction *I = BI++;
+ Instruction *I = &*BI++;
bool RepeatInstruction = false;
@@ -1177,9 +1175,7 @@ bool MemCpyOpt::iterateOnFunction(Function &F) {
return MadeChange;
}
-// MemCpyOpt::runOnFunction - This is the main transformation entry point for a
-// function.
-//
+/// This is the main transformation entry point for a function.
bool MemCpyOpt::runOnFunction(Function &F) {
if (skipOptnoneFunction(F))
return false;
diff --git a/contrib/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/contrib/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
index 643f374..c812d61 100644
--- a/contrib/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
@@ -78,6 +78,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
@@ -91,6 +92,7 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include <vector>
+
using namespace llvm;
#define DEBUG_TYPE "mldst-motion"
@@ -106,7 +108,7 @@ class MergedLoadStoreMotion : public FunctionPass {
public:
static char ID; // Pass identification, replacement for typeid
- explicit MergedLoadStoreMotion(void)
+ MergedLoadStoreMotion()
: FunctionPass(ID), MD(nullptr), MagicCompileTimeControl(250) {
initializeMergedLoadStoreMotionPass(*PassRegistry::getPassRegistry());
}
@@ -116,10 +118,11 @@ public:
private:
// This transformation requires dominator postdominator info
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
AU.addRequired<TargetLibraryInfoWrapperPass>();
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
AU.addPreserved<MemoryDependenceAnalysis>();
- AU.addPreserved<AliasAnalysis>();
}
// Helper routines
@@ -156,7 +159,7 @@ private:
};
char MergedLoadStoreMotion::ID = 0;
-}
+} // anonymous namespace
///
/// \brief createMergedLoadStoreMotionPass - The public interface to this file.
@@ -169,7 +172,8 @@ INITIALIZE_PASS_BEGIN(MergedLoadStoreMotion, "mldst-motion",
"MergedLoadStoreMotion", false, false)
INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
INITIALIZE_PASS_END(MergedLoadStoreMotion, "mldst-motion",
"MergedLoadStoreMotion", false, false)
@@ -236,12 +240,11 @@ bool MergedLoadStoreMotion::isDiamondHead(BasicBlock *BB) {
/// being loaded or protect against the load from happening
/// it is considered a hoist barrier.
///
-
bool MergedLoadStoreMotion::isLoadHoistBarrierInRange(const Instruction& Start,
const Instruction& End,
LoadInst* LI) {
MemoryLocation Loc = MemoryLocation::get(LI);
- return AA->canInstructionRangeModRef(Start, End, Loc, AliasAnalysis::Mod);
+ return AA->canInstructionRangeModRef(Start, End, Loc, MRI_Mod);
}
///
@@ -256,7 +259,7 @@ LoadInst *MergedLoadStoreMotion::canHoistFromBlock(BasicBlock *BB1,
for (BasicBlock::iterator BBI = BB1->begin(), BBE = BB1->end(); BBI != BBE;
++BBI) {
- Instruction *Inst = BBI;
+ Instruction *Inst = &*BBI;
// Only merge and hoist loads when their result in used only in BB
if (!isa<LoadInst>(Inst) || Inst->isUsedOutsideOfBlock(BB1))
@@ -293,7 +296,7 @@ void MergedLoadStoreMotion::hoistInstruction(BasicBlock *BB,
// Intersect optional metadata.
HoistCand->intersectOptionalDataWith(ElseInst);
- HoistCand->dropUnknownMetadata();
+ HoistCand->dropUnknownNonDebugMetadata();
// Prepend point for instruction insert
Instruction *HoistPt = BB->getTerminator();
@@ -363,8 +366,7 @@ bool MergedLoadStoreMotion::mergeLoads(BasicBlock *BB) {
int NLoads = 0;
for (BasicBlock::iterator BBI = Succ0->begin(), BBE = Succ0->end();
BBI != BBE;) {
-
- Instruction *I = BBI;
+ Instruction *I = &*BBI;
++BBI;
// Only move non-simple (atomic, volatile) loads.
@@ -394,11 +396,10 @@ bool MergedLoadStoreMotion::mergeLoads(BasicBlock *BB) {
/// value being stored or protect against the store from
/// happening it is considered a sink barrier.
///
-
bool MergedLoadStoreMotion::isStoreSinkBarrierInRange(const Instruction &Start,
const Instruction &End,
MemoryLocation Loc) {
- return AA->canInstructionRangeModRef(Start, End, Loc, AliasAnalysis::ModRef);
+ return AA->canInstructionRangeModRef(Start, End, Loc, MRI_ModRef);
}
///
@@ -438,23 +439,16 @@ StoreInst *MergedLoadStoreMotion::canSinkFromBlock(BasicBlock *BB1,
PHINode *MergedLoadStoreMotion::getPHIOperand(BasicBlock *BB, StoreInst *S0,
StoreInst *S1) {
// Create a phi if the values mismatch.
- PHINode *NewPN = 0;
+ PHINode *NewPN = nullptr;
Value *Opd1 = S0->getValueOperand();
Value *Opd2 = S1->getValueOperand();
if (Opd1 != Opd2) {
NewPN = PHINode::Create(Opd1->getType(), 2, Opd2->getName() + ".sink",
- BB->begin());
+ &BB->front());
NewPN->addIncoming(Opd1, S0->getParent());
NewPN->addIncoming(Opd2, S1->getParent());
- if (NewPN->getType()->getScalarType()->isPointerTy()) {
- // AA needs to be informed when a PHI-use of the pointer value is added
- for (unsigned I = 0, E = NewPN->getNumIncomingValues(); I != E; ++I) {
- unsigned J = PHINode::getOperandNumForIncomingValue(I);
- AA->addEscapingUse(NewPN->getOperandUse(J));
- }
- if (MD)
- MD->invalidateCachedPointerInfo(NewPN);
- }
+ if (MD && NewPN->getType()->getScalarType()->isPointerTy())
+ MD->invalidateCachedPointerInfo(NewPN);
}
return NewPN;
}
@@ -479,12 +473,12 @@ bool MergedLoadStoreMotion::sinkStore(BasicBlock *BB, StoreInst *S0,
BasicBlock::iterator InsertPt = BB->getFirstInsertionPt();
// Intersect optional metadata.
S0->intersectOptionalDataWith(S1);
- S0->dropUnknownMetadata();
+ S0->dropUnknownNonDebugMetadata();
// Create the new store to be inserted at the join point.
StoreInst *SNew = (StoreInst *)(S0->clone());
Instruction *ANew = A0->clone();
- SNew->insertBefore(InsertPt);
+ SNew->insertBefore(&*InsertPt);
ANew->insertBefore(SNew);
assert(S0->getParent() == A0->getParent());
@@ -566,12 +560,13 @@ bool MergedLoadStoreMotion::mergeStores(BasicBlock *T) {
}
return MergedStores;
}
+
///
/// \brief Run the transformation for each function
///
bool MergedLoadStoreMotion::runOnFunction(Function &F) {
MD = getAnalysisIfAvailable<MemoryDependenceAnalysis>();
- AA = &getAnalysis<AliasAnalysis>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
bool Changed = false;
DEBUG(dbgs() << "Instruction Merger\n");
@@ -579,7 +574,7 @@ bool MergedLoadStoreMotion::runOnFunction(Function &F) {
// Merge unconditional branches, allowing PRE to catch more
// optimization opportunities.
for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE;) {
- BasicBlock *BB = FI++;
+ BasicBlock *BB = &*FI++;
// Hoist equivalent loads and sink stores
// outside diamonds when possible
diff --git a/contrib/llvm/lib/Transforms/Scalar/NaryReassociate.cpp b/contrib/llvm/lib/Transforms/Scalar/NaryReassociate.cpp
index f42f830..c8f885e 100644
--- a/contrib/llvm/lib/Transforms/Scalar/NaryReassociate.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/NaryReassociate.cpp
@@ -71,8 +71,8 @@
//
// Limitations and TODO items:
//
-// 1) We only considers n-ary adds for now. This should be extended and
-// generalized.
+// 1) We only considers n-ary adds and muls for now. This should be extended
+// and generalized.
//
//===----------------------------------------------------------------------===//
@@ -110,11 +110,11 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<ScalarEvolution>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
AU.addPreserved<TargetLibraryInfoWrapperPass>();
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<ScalarEvolution>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
AU.setPreservesCFG();
@@ -145,12 +145,23 @@ private:
unsigned I, Value *LHS,
Value *RHS, Type *IndexedType);
- // Reassociate Add for better CSE.
- Instruction *tryReassociateAdd(BinaryOperator *I);
- // A helper function for tryReassociateAdd. LHS and RHS are explicitly passed.
- Instruction *tryReassociateAdd(Value *LHS, Value *RHS, Instruction *I);
- // Rewrites I to LHS + RHS if LHS is computed already.
- Instruction *tryReassociatedAdd(const SCEV *LHS, Value *RHS, Instruction *I);
+ // Reassociate binary operators for better CSE.
+ Instruction *tryReassociateBinaryOp(BinaryOperator *I);
+
+ // A helper function for tryReassociateBinaryOp. LHS and RHS are explicitly
+ // passed.
+ Instruction *tryReassociateBinaryOp(Value *LHS, Value *RHS,
+ BinaryOperator *I);
+ // Rewrites I to (LHS op RHS) if LHS is computed already.
+ Instruction *tryReassociatedBinaryOp(const SCEV *LHS, Value *RHS,
+ BinaryOperator *I);
+
+ // Tries to match Op1 and Op2 by using V.
+ bool matchTernaryOp(BinaryOperator *I, Value *V, Value *&Op1, Value *&Op2);
+
+ // Gets SCEV for (LHS op RHS).
+ const SCEV *getBinarySCEV(BinaryOperator *I, const SCEV *LHS,
+ const SCEV *RHS);
// Returns the closest dominator of \c Dominatee that computes
// \c CandidateExpr. Returns null if not found.
@@ -161,11 +172,6 @@ private:
// GEP's pointer size, i.e., whether Index needs to be sign-extended in order
// to be an index of GEP.
bool requiresSignExtension(Value *Index, GetElementPtrInst *GEP);
- // Returns whether V is known to be non-negative at context \c Ctxt.
- bool isKnownNonNegative(Value *V, Instruction *Ctxt);
- // Returns whether AO may sign overflow at context \c Ctxt. It computes a
- // conservative result -- it answers true when not sure.
- bool maySignOverflow(AddOperator *AO, Instruction *Ctxt);
AssumptionCache *AC;
const DataLayout *DL;
@@ -182,7 +188,7 @@ private:
// foo(a + b);
// if (p2)
// bar(a + b);
- DenseMap<const SCEV *, SmallVector<Instruction *, 2>> SeenExprs;
+ DenseMap<const SCEV *, SmallVector<WeakVH, 2>> SeenExprs;
};
} // anonymous namespace
@@ -191,7 +197,7 @@ INITIALIZE_PASS_BEGIN(NaryReassociate, "nary-reassociate", "Nary reassociation",
false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(NaryReassociate, "nary-reassociate", "Nary reassociation",
@@ -207,7 +213,7 @@ bool NaryReassociate::runOnFunction(Function &F) {
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- SE = &getAnalysis<ScalarEvolution>();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
@@ -224,6 +230,7 @@ static bool isPotentiallyNaryReassociable(Instruction *I) {
switch (I->getOpcode()) {
case Instruction::Add:
case Instruction::GetElementPtr:
+ case Instruction::Mul:
return true;
default:
return false;
@@ -239,19 +246,21 @@ bool NaryReassociate::doOneIteration(Function &F) {
Node != GraphTraits<DominatorTree *>::nodes_end(DT); ++Node) {
BasicBlock *BB = Node->getBlock();
for (auto I = BB->begin(); I != BB->end(); ++I) {
- if (SE->isSCEVable(I->getType()) && isPotentiallyNaryReassociable(I)) {
- const SCEV *OldSCEV = SE->getSCEV(I);
- if (Instruction *NewI = tryReassociate(I)) {
+ if (SE->isSCEVable(I->getType()) && isPotentiallyNaryReassociable(&*I)) {
+ const SCEV *OldSCEV = SE->getSCEV(&*I);
+ if (Instruction *NewI = tryReassociate(&*I)) {
Changed = true;
- SE->forgetValue(I);
+ SE->forgetValue(&*I);
I->replaceAllUsesWith(NewI);
- RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
- I = NewI;
+ // If SeenExprs constains I's WeakVH, that entry will be replaced with
+ // nullptr.
+ RecursivelyDeleteTriviallyDeadInstructions(&*I, TLI);
+ I = NewI->getIterator();
}
// Add the rewritten instruction to SeenExprs; the original instruction
// is deleted.
- const SCEV *NewSCEV = SE->getSCEV(I);
- SeenExprs[NewSCEV].push_back(I);
+ const SCEV *NewSCEV = SE->getSCEV(&*I);
+ SeenExprs[NewSCEV].push_back(WeakVH(&*I));
// Ideally, NewSCEV should equal OldSCEV because tryReassociate(I)
// is equivalent to I. However, ScalarEvolution::getSCEV may
// weaken nsw causing NewSCEV not to equal OldSCEV. For example, suppose
@@ -271,7 +280,7 @@ bool NaryReassociate::doOneIteration(Function &F) {
//
// This improvement is exercised in @reassociate_gep_nsw in nary-gep.ll.
if (NewSCEV != OldSCEV)
- SeenExprs[OldSCEV].push_back(I);
+ SeenExprs[OldSCEV].push_back(WeakVH(&*I));
}
}
}
@@ -281,7 +290,8 @@ bool NaryReassociate::doOneIteration(Function &F) {
Instruction *NaryReassociate::tryReassociate(Instruction *I) {
switch (I->getOpcode()) {
case Instruction::Add:
- return tryReassociateAdd(cast<BinaryOperator>(I));
+ case Instruction::Mul:
+ return tryReassociateBinaryOp(cast<BinaryOperator>(I));
case Instruction::GetElementPtr:
return tryReassociateGEP(cast<GetElementPtrInst>(I));
default:
@@ -352,27 +362,6 @@ bool NaryReassociate::requiresSignExtension(Value *Index,
return cast<IntegerType>(Index->getType())->getBitWidth() < PointerSizeInBits;
}
-bool NaryReassociate::isKnownNonNegative(Value *V, Instruction *Ctxt) {
- bool NonNegative, Negative;
- // TODO: ComputeSignBits is expensive. Consider caching the results.
- ComputeSignBit(V, NonNegative, Negative, *DL, 0, AC, Ctxt, DT);
- return NonNegative;
-}
-
-bool NaryReassociate::maySignOverflow(AddOperator *AO, Instruction *Ctxt) {
- if (AO->hasNoSignedWrap())
- return false;
-
- Value *LHS = AO->getOperand(0), *RHS = AO->getOperand(1);
- // If LHS or RHS has the same sign as the sum, AO doesn't sign overflow.
- // TODO: handle the negative case as well.
- if (isKnownNonNegative(AO, Ctxt) &&
- (isKnownNonNegative(LHS, Ctxt) || isKnownNonNegative(RHS, Ctxt)))
- return false;
-
- return true;
-}
-
GetElementPtrInst *
NaryReassociate::tryReassociateGEPAtIndex(GetElementPtrInst *GEP, unsigned I,
Type *IndexedType) {
@@ -381,7 +370,7 @@ NaryReassociate::tryReassociateGEPAtIndex(GetElementPtrInst *GEP, unsigned I,
IndexToSplit = SExt->getOperand(0);
} else if (ZExtInst *ZExt = dyn_cast<ZExtInst>(IndexToSplit)) {
// zext can be treated as sext if the source is non-negative.
- if (isKnownNonNegative(ZExt->getOperand(0), GEP))
+ if (isKnownNonNegative(ZExt->getOperand(0), *DL, 0, AC, GEP, DT))
IndexToSplit = ZExt->getOperand(0);
}
@@ -389,8 +378,11 @@ NaryReassociate::tryReassociateGEPAtIndex(GetElementPtrInst *GEP, unsigned I,
// If the I-th index needs sext and the underlying add is not equipped with
// nsw, we cannot split the add because
// sext(LHS + RHS) != sext(LHS) + sext(RHS).
- if (requiresSignExtension(IndexToSplit, GEP) && maySignOverflow(AO, GEP))
+ if (requiresSignExtension(IndexToSplit, GEP) &&
+ computeOverflowForSignedAdd(AO, *DL, AC, GEP, DT) !=
+ OverflowResult::NeverOverflows)
return nullptr;
+
Value *LHS = AO->getOperand(0), *RHS = AO->getOperand(1);
// IndexToSplit = LHS + RHS.
if (auto *NewGEP = tryReassociateGEPAtIndex(GEP, I, LHS, RHS, IndexedType))
@@ -415,7 +407,7 @@ GetElementPtrInst *NaryReassociate::tryReassociateGEPAtIndex(
IndexExprs.push_back(SE->getSCEV(*Index));
// Replace the I-th index with LHS.
IndexExprs[I] = SE->getSCEV(LHS);
- if (isKnownNonNegative(LHS, GEP) &&
+ if (isKnownNonNegative(LHS, *DL, 0, AC, GEP, DT) &&
DL->getTypeSizeInBits(LHS->getType()) <
DL->getTypeSizeInBits(GEP->getOperand(I)->getType())) {
// Zero-extend LHS if it is non-negative. InstCombine canonicalizes sext to
@@ -429,19 +421,20 @@ GetElementPtrInst *NaryReassociate::tryReassociateGEPAtIndex(
GEP->getSourceElementType(), SE->getSCEV(GEP->getPointerOperand()),
IndexExprs, GEP->isInBounds());
- auto *Candidate = findClosestMatchingDominator(CandidateExpr, GEP);
+ Value *Candidate = findClosestMatchingDominator(CandidateExpr, GEP);
if (Candidate == nullptr)
return nullptr;
- PointerType *TypeOfCandidate = dyn_cast<PointerType>(Candidate->getType());
- // Pretty rare but theoretically possible when a numeric value happens to
- // share CandidateExpr.
- if (TypeOfCandidate == nullptr)
- return nullptr;
+ IRBuilder<> Builder(GEP);
+ // Candidate does not necessarily have the same pointer type as GEP. Use
+ // bitcast or pointer cast to make sure they have the same type, so that the
+ // later RAUW doesn't complain.
+ Candidate = Builder.CreateBitOrPointerCast(Candidate, GEP->getType());
+ assert(Candidate->getType() == GEP->getType());
// NewGEP = (char *)Candidate + RHS * sizeof(IndexedType)
uint64_t IndexedSize = DL->getTypeAllocSize(IndexedType);
- Type *ElementType = TypeOfCandidate->getElementType();
+ Type *ElementType = GEP->getType()->getElementType();
uint64_t ElementSize = DL->getTypeAllocSize(ElementType);
// Another less rare case: because I is not necessarily the last index of the
// GEP, the size of the type at the I-th index (IndexedSize) is not
@@ -461,8 +454,7 @@ GetElementPtrInst *NaryReassociate::tryReassociateGEPAtIndex(
return nullptr;
// NewGEP = &Candidate[RHS * (sizeof(IndexedType) / sizeof(Candidate[0])));
- IRBuilder<> Builder(GEP);
- Type *IntPtrTy = DL->getIntPtrType(TypeOfCandidate);
+ Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
if (RHS->getType() != IntPtrTy)
RHS = Builder.CreateSExtOrTrunc(RHS, IntPtrTy);
if (IndexedSize != ElementSize) {
@@ -476,54 +468,89 @@ GetElementPtrInst *NaryReassociate::tryReassociateGEPAtIndex(
return NewGEP;
}
-Instruction *NaryReassociate::tryReassociateAdd(BinaryOperator *I) {
+Instruction *NaryReassociate::tryReassociateBinaryOp(BinaryOperator *I) {
Value *LHS = I->getOperand(0), *RHS = I->getOperand(1);
- if (auto *NewI = tryReassociateAdd(LHS, RHS, I))
+ if (auto *NewI = tryReassociateBinaryOp(LHS, RHS, I))
return NewI;
- if (auto *NewI = tryReassociateAdd(RHS, LHS, I))
+ if (auto *NewI = tryReassociateBinaryOp(RHS, LHS, I))
return NewI;
return nullptr;
}
-Instruction *NaryReassociate::tryReassociateAdd(Value *LHS, Value *RHS,
- Instruction *I) {
+Instruction *NaryReassociate::tryReassociateBinaryOp(Value *LHS, Value *RHS,
+ BinaryOperator *I) {
Value *A = nullptr, *B = nullptr;
- // To be conservative, we reassociate I only when it is the only user of A+B.
- if (LHS->hasOneUse() && match(LHS, m_Add(m_Value(A), m_Value(B)))) {
- // I = (A + B) + RHS
- // = (A + RHS) + B or (B + RHS) + A
+ // To be conservative, we reassociate I only when it is the only user of (A op
+ // B).
+ if (LHS->hasOneUse() && matchTernaryOp(I, LHS, A, B)) {
+ // I = (A op B) op RHS
+ // = (A op RHS) op B or (B op RHS) op A
const SCEV *AExpr = SE->getSCEV(A), *BExpr = SE->getSCEV(B);
const SCEV *RHSExpr = SE->getSCEV(RHS);
if (BExpr != RHSExpr) {
- if (auto *NewI = tryReassociatedAdd(SE->getAddExpr(AExpr, RHSExpr), B, I))
+ if (auto *NewI =
+ tryReassociatedBinaryOp(getBinarySCEV(I, AExpr, RHSExpr), B, I))
return NewI;
}
if (AExpr != RHSExpr) {
- if (auto *NewI = tryReassociatedAdd(SE->getAddExpr(BExpr, RHSExpr), A, I))
+ if (auto *NewI =
+ tryReassociatedBinaryOp(getBinarySCEV(I, BExpr, RHSExpr), A, I))
return NewI;
}
}
return nullptr;
}
-Instruction *NaryReassociate::tryReassociatedAdd(const SCEV *LHSExpr,
- Value *RHS, Instruction *I) {
- auto Pos = SeenExprs.find(LHSExpr);
- // Bail out if LHSExpr is not previously seen.
- if (Pos == SeenExprs.end())
- return nullptr;
-
+Instruction *NaryReassociate::tryReassociatedBinaryOp(const SCEV *LHSExpr,
+ Value *RHS,
+ BinaryOperator *I) {
// Look for the closest dominator LHS of I that computes LHSExpr, and replace
- // I with LHS + RHS.
+ // I with LHS op RHS.
auto *LHS = findClosestMatchingDominator(LHSExpr, I);
if (LHS == nullptr)
return nullptr;
- Instruction *NewI = BinaryOperator::CreateAdd(LHS, RHS, "", I);
+ Instruction *NewI = nullptr;
+ switch (I->getOpcode()) {
+ case Instruction::Add:
+ NewI = BinaryOperator::CreateAdd(LHS, RHS, "", I);
+ break;
+ case Instruction::Mul:
+ NewI = BinaryOperator::CreateMul(LHS, RHS, "", I);
+ break;
+ default:
+ llvm_unreachable("Unexpected instruction.");
+ }
NewI->takeName(I);
return NewI;
}
+bool NaryReassociate::matchTernaryOp(BinaryOperator *I, Value *V, Value *&Op1,
+ Value *&Op2) {
+ switch (I->getOpcode()) {
+ case Instruction::Add:
+ return match(V, m_Add(m_Value(Op1), m_Value(Op2)));
+ case Instruction::Mul:
+ return match(V, m_Mul(m_Value(Op1), m_Value(Op2)));
+ default:
+ llvm_unreachable("Unexpected instruction.");
+ }
+ return false;
+}
+
+const SCEV *NaryReassociate::getBinarySCEV(BinaryOperator *I, const SCEV *LHS,
+ const SCEV *RHS) {
+ switch (I->getOpcode()) {
+ case Instruction::Add:
+ return SE->getAddExpr(LHS, RHS);
+ case Instruction::Mul:
+ return SE->getMulExpr(LHS, RHS);
+ default:
+ llvm_unreachable("Unexpected instruction.");
+ }
+ return nullptr;
+}
+
Instruction *
NaryReassociate::findClosestMatchingDominator(const SCEV *CandidateExpr,
Instruction *Dominatee) {
@@ -537,9 +564,13 @@ NaryReassociate::findClosestMatchingDominator(const SCEV *CandidateExpr,
// future instruction either. Therefore, we pop it out of the stack. This
// optimization makes the algorithm O(n).
while (!Candidates.empty()) {
- Instruction *Candidate = Candidates.back();
- if (DT->dominates(Candidate, Dominatee))
- return Candidate;
+ // Candidates stores WeakVHs, so a candidate can be nullptr if it's removed
+ // during rewriting.
+ if (Value *Candidate = Candidates.back()) {
+ Instruction *CandidateInstruction = cast<Instruction>(Candidate);
+ if (DT->dominates(CandidateInstruction, Dominatee))
+ return CandidateInstruction;
+ }
Candidates.pop_back();
}
return nullptr;
diff --git a/contrib/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp b/contrib/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
index 31d7df3..9f26f78 100644
--- a/contrib/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
@@ -154,7 +154,7 @@ bool PartiallyInlineLibCalls::optimizeSQRT(CallInst *Call,
Phi->addIncoming(Call, &CurrBB);
Phi->addIncoming(LibCall, LibCallBB);
- BB = JoinBB;
+ BB = JoinBB->getIterator();
return true;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp b/contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
index 366301a..28c610c 100644
--- a/contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
@@ -27,7 +27,7 @@
// well defined state for inspection by the collector. In the current
// implementation, this is done via the insertion of poll sites at method entry
// and the backedge of most loops. We try to avoid inserting more polls than
-// are neccessary to ensure a finite period between poll sites. This is not
+// are necessary to ensure a finite period between poll sites. This is not
// because the poll itself is expensive in the generated code; it's not. Polls
// do tend to impact the optimizer itself in negative ways; we'd like to avoid
// perturbing the optimization of the method as much as we can.
@@ -91,13 +91,15 @@ STATISTIC(FiniteExecution, "Number of loops w/o safepoints finite execution");
using namespace llvm;
-// Ignore oppurtunities to avoid placing safepoints on backedges, useful for
+// Ignore opportunities to avoid placing safepoints on backedges, useful for
// validation
static cl::opt<bool> AllBackedges("spp-all-backedges", cl::Hidden,
cl::init(false));
-/// If true, do not place backedge safepoints in counted loops.
-static cl::opt<bool> SkipCounted("spp-counted", cl::Hidden, cl::init(true));
+/// How narrow does the trip count of a loop have to be to have to be considered
+/// "counted"? Counted loops do not get safepoints at backedges.
+static cl::opt<int> CountedLoopTripWidth("spp-counted-loop-trip-width",
+ cl::Hidden, cl::init(32));
// If true, split the backedge of a loop when placing the safepoint, otherwise
// split the latch block itself. Both are useful to support for
@@ -121,7 +123,7 @@ struct PlaceBackedgeSafepointsImpl : public FunctionPass {
std::vector<TerminatorInst *> PollLocations;
/// True unless we're running spp-no-calls in which case we need to disable
- /// the call dependend placement opts.
+ /// the call-dependent placement opts.
bool CallSafepointsEnabled;
ScalarEvolution *SE = nullptr;
@@ -142,7 +144,7 @@ struct PlaceBackedgeSafepointsImpl : public FunctionPass {
}
bool runOnFunction(Function &F) override {
- SE = &getAnalysis<ScalarEvolution>();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
for (auto I = LI->begin(), E = LI->end(); I != E; I++) {
@@ -153,7 +155,7 @@ struct PlaceBackedgeSafepointsImpl : public FunctionPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<ScalarEvolution>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
// We no longer modify the IR at all in this pass. Thus all
// analysis are preserved.
@@ -190,10 +192,8 @@ static void
InsertSafepointPoll(Instruction *InsertBefore,
std::vector<CallSite> &ParsePointsNeeded /*rval*/);
-static bool isGCLeafFunction(const CallSite &CS);
-
static bool needsStatepoint(const CallSite &CS) {
- if (isGCLeafFunction(CS))
+ if (callsGCLeafFunction(CS))
return false;
if (CS.isCall()) {
CallInst *call = cast<CallInst>(CS.getInstruction());
@@ -206,7 +206,7 @@ static bool needsStatepoint(const CallSite &CS) {
return true;
}
-static Value *ReplaceWithStatepoint(const CallSite &CS, Pass *P);
+static Value *ReplaceWithStatepoint(const CallSite &CS);
/// Returns true if this loop is known to contain a call safepoint which
/// must unconditionally execute on any iteration of the loop which returns
@@ -220,7 +220,7 @@ static bool containsUnconditionalCallSafepoint(Loop *L, BasicBlock *Header,
// For the moment, we look only for the 'cuts' that consist of a single call
// instruction in a block which is dominated by the Header and dominates the
// loop latch (Pred) block. Somewhat surprisingly, walking the entire chain
- // of such dominating blocks gets substaintially more occurences than just
+ // of such dominating blocks gets substantially more occurrences than just
// checking the Pred and Header blocks themselves. This may be due to the
// density of loop exit conditions caused by range and null checks.
// TODO: structure this as an analysis pass, cache the result for subloops,
@@ -255,18 +255,12 @@ static bool containsUnconditionalCallSafepoint(Loop *L, BasicBlock *Header,
/// conservatism in the analysis.
static bool mustBeFiniteCountedLoop(Loop *L, ScalarEvolution *SE,
BasicBlock *Pred) {
- // Only used when SkipCounted is off
- const unsigned upperTripBound = 8192;
-
// A conservative bound on the loop as a whole.
const SCEV *MaxTrips = SE->getMaxBackedgeTakenCount(L);
- if (MaxTrips != SE->getCouldNotCompute()) {
- if (SE->getUnsignedRange(MaxTrips).getUnsignedMax().ult(upperTripBound))
- return true;
- if (SkipCounted &&
- SE->getUnsignedRange(MaxTrips).getUnsignedMax().isIntN(32))
- return true;
- }
+ if (MaxTrips != SE->getCouldNotCompute() &&
+ SE->getUnsignedRange(MaxTrips).getUnsignedMax().isIntN(
+ CountedLoopTripWidth))
+ return true;
// If this is a conditional branch to the header with the alternate path
// being outside the loop, we can ask questions about the execution frequency
@@ -275,13 +269,10 @@ static bool mustBeFiniteCountedLoop(Loop *L, ScalarEvolution *SE,
// This returns an exact expression only. TODO: We really only need an
// upper bound here, but SE doesn't expose that.
const SCEV *MaxExec = SE->getExitCount(L, Pred);
- if (MaxExec != SE->getCouldNotCompute()) {
- if (SE->getUnsignedRange(MaxExec).getUnsignedMax().ult(upperTripBound))
- return true;
- if (SkipCounted &&
- SE->getUnsignedRange(MaxExec).getUnsignedMax().isIntN(32))
+ if (MaxExec != SE->getCouldNotCompute() &&
+ SE->getUnsignedRange(MaxExec).getUnsignedMax().isIntN(
+ CountedLoopTripWidth))
return true;
- }
}
return /* not finite */ false;
@@ -432,14 +423,14 @@ static Instruction *findLocationForEntrySafepoint(Function &F,
assert(hasNextInstruction(I) &&
"first check if there is a next instruction!");
if (I->isTerminator()) {
- return I->getParent()->getUniqueSuccessor()->begin();
+ return &I->getParent()->getUniqueSuccessor()->front();
} else {
- return std::next(BasicBlock::iterator(I));
+ return &*++I->getIterator();
}
};
Instruction *cursor = nullptr;
- for (cursor = F.getEntryBlock().begin(); hasNextInstruction(cursor);
+ for (cursor = &F.getEntryBlock().front(); hasNextInstruction(cursor);
cursor = nextInstruction(cursor)) {
// We need to ensure a safepoint poll occurs before any 'real' call. The
@@ -466,7 +457,7 @@ static Instruction *findLocationForEntrySafepoint(Function &F,
static void findCallSafepoints(Function &F,
std::vector<CallSite> &Found /*rval*/) {
assert(Found.empty() && "must be empty!");
- for (Instruction &I : inst_range(F)) {
+ for (Instruction &I : instructions(F)) {
Instruction *inst = &I;
if (isa<CallInst>(inst) || isa<InvokeInst>(inst)) {
CallSite CS(inst);
@@ -713,7 +704,7 @@ bool PlaceSafepoints::runOnFunction(Function &F) {
Invoke->getParent());
}
- Value *GCResult = ReplaceWithStatepoint(CS, nullptr);
+ Value *GCResult = ReplaceWithStatepoint(CS);
Results.push_back(GCResult);
}
assert(Results.size() == ParsePointNeeded.size());
@@ -747,7 +738,7 @@ FunctionPass *llvm::createPlaceSafepointsPass() {
INITIALIZE_PASS_BEGIN(PlaceBackedgeSafepointsImpl,
"place-backedge-safepoints-impl",
"Place Backedge Safepoints", false, false)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_END(PlaceBackedgeSafepointsImpl,
@@ -759,31 +750,6 @@ INITIALIZE_PASS_BEGIN(PlaceSafepoints, "place-safepoints", "Place Safepoints",
INITIALIZE_PASS_END(PlaceSafepoints, "place-safepoints", "Place Safepoints",
false, false)
-static bool isGCLeafFunction(const CallSite &CS) {
- Instruction *inst = CS.getInstruction();
- if (isa<IntrinsicInst>(inst)) {
- // Most LLVM intrinsics are things which can never take a safepoint.
- // As a result, we don't need to have the stack parsable at the
- // callsite. This is a highly useful optimization since intrinsic
- // calls are fairly prevelent, particularly in debug builds.
- return true;
- }
-
- // If this function is marked explicitly as a leaf call, we don't need to
- // place a safepoint of it. In fact, for correctness we *can't* in many
- // cases. Note: Indirect calls return Null for the called function,
- // these obviously aren't runtime functions with attributes
- // TODO: Support attributes on the call site as well.
- const Function *F = CS.getCalledFunction();
- bool isLeaf =
- F &&
- F->getFnAttribute("gc-leaf-function").getValueAsString().equals("true");
- if (isLeaf) {
- return true;
- }
- return false;
-}
-
static void
InsertSafepointPoll(Instruction *InsertBefore,
std::vector<CallSite> &ParsePointsNeeded /*rval*/) {
@@ -796,6 +762,7 @@ InsertSafepointPoll(Instruction *InsertBefore,
// path call - where we need to insert a safepoint (parsepoint).
auto *F = M->getFunction(GCSafepointPollName);
+ assert(F && "gc.safepoint_poll function is missing");
assert(F->getType()->getElementType() ==
FunctionType::get(Type::getVoidTy(M->getContext()), false) &&
"gc.safepoint_poll declared with wrong type");
@@ -864,10 +831,8 @@ InsertSafepointPoll(Instruction *InsertBefore,
/// Replaces the given call site (Call or Invoke) with a gc.statepoint
/// intrinsic with an empty deoptimization arguments list. This does
/// NOT do explicit relocation for GC support.
-static Value *ReplaceWithStatepoint(const CallSite &CS, /* to replace */
- Pass *P) {
- assert(CS.getInstruction()->getParent()->getParent()->getParent() &&
- "must be set");
+static Value *ReplaceWithStatepoint(const CallSite &CS /* to replace */) {
+ assert(CS.getInstruction()->getModule() && "must be set");
// TODO: technically, a pass is not allowed to get functions from within a
// function pass since it might trigger a new function addition. Refactor
@@ -917,15 +882,10 @@ static Value *ReplaceWithStatepoint(const CallSite &CS, /* to replace */
CS.getInstruction()->getContext(), AttributeSet::FunctionIndex,
AttrsToRemove);
- Value *StatepointTarget = NumPatchBytes == 0
- ? CS.getCalledValue()
- : ConstantPointerNull::get(cast<PointerType>(
- CS.getCalledValue()->getType()));
-
if (CS.isCall()) {
CallInst *ToReplace = cast<CallInst>(CS.getInstruction());
CallInst *Call = Builder.CreateGCStatepointCall(
- ID, NumPatchBytes, StatepointTarget,
+ ID, NumPatchBytes, CS.getCalledValue(),
makeArrayRef(CS.arg_begin(), CS.arg_end()), None, None,
"safepoint_token");
Call->setTailCall(ToReplace->isTailCall());
@@ -938,7 +898,7 @@ static Value *ReplaceWithStatepoint(const CallSite &CS, /* to replace */
Token = Call;
- // Put the following gc_result and gc_relocate calls immediately after the
+ // Put the following gc_result and gc_relocate calls immediately after
// the old call (which we're about to delete).
assert(ToReplace->getNextNode() && "not a terminator, must have next");
Builder.SetInsertPoint(ToReplace->getNextNode());
@@ -951,7 +911,7 @@ static Value *ReplaceWithStatepoint(const CallSite &CS, /* to replace */
// original block.
Builder.SetInsertPoint(ToReplace->getParent());
InvokeInst *Invoke = Builder.CreateGCStatepointInvoke(
- ID, NumPatchBytes, StatepointTarget, ToReplace->getNormalDest(),
+ ID, NumPatchBytes, CS.getCalledValue(), ToReplace->getNormalDest(),
ToReplace->getUnwindDest(), makeArrayRef(CS.arg_begin(), CS.arg_end()),
None, None, "safepoint_token");
@@ -967,7 +927,7 @@ static Value *ReplaceWithStatepoint(const CallSite &CS, /* to replace */
// We'll insert the gc.result into the normal block
BasicBlock *NormalDest = ToReplace->getNormalDest();
// Can not insert gc.result in case of phi nodes preset.
- // Should have removed this cases prior to runnning this function
+ // Should have removed this cases prior to running this function
assert(!isa<PHINode>(NormalDest->begin()));
Instruction *IP = &*(NormalDest->getFirstInsertionPt());
Builder.SetInsertPoint(IP);
diff --git a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
index d1acf78..fb970c7 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -26,6 +26,8 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
@@ -62,7 +64,7 @@ namespace {
/// Print out the expression identified in the Ops list.
///
static void PrintOps(Instruction *I, const SmallVectorImpl<ValueEntry> &Ops) {
- Module *M = I->getParent()->getParent()->getParent();
+ Module *M = I->getModule();
dbgs() << Instruction::getOpcodeName(I->getOpcode()) << " "
<< *Ops[0].Op->getType() << '\t';
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
@@ -82,20 +84,6 @@ namespace {
Factor(Value *Base, unsigned Power) : Base(Base), Power(Power) {}
- /// \brief Sort factors by their Base.
- struct BaseSorter {
- bool operator()(const Factor &LHS, const Factor &RHS) {
- return LHS.Base < RHS.Base;
- }
- };
-
- /// \brief Compare factors for equal bases.
- struct BaseEqual {
- bool operator()(const Factor &LHS, const Factor &RHS) {
- return LHS.Base == RHS.Base;
- }
- };
-
/// \brief Sort factors in descending order by their power.
struct PowerDescendingSorter {
bool operator()(const Factor &LHS, const Factor &RHS) {
@@ -172,6 +160,7 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
+ AU.addPreserved<GlobalsAAWrapperPass>();
}
private:
void BuildRankMap(Function &F);
@@ -255,27 +244,6 @@ static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode1,
return nullptr;
}
-static bool isUnmovableInstruction(Instruction *I) {
- switch (I->getOpcode()) {
- case Instruction::PHI:
- case Instruction::LandingPad:
- case Instruction::Alloca:
- case Instruction::Load:
- case Instruction::Invoke:
- case Instruction::UDiv:
- case Instruction::SDiv:
- case Instruction::FDiv:
- case Instruction::URem:
- case Instruction::SRem:
- case Instruction::FRem:
- return true;
- case Instruction::Call:
- return !isa<DbgInfoIntrinsic>(I);
- default:
- return false;
- }
-}
-
void Reassociate::BuildRankMap(Function &F) {
unsigned i = 2;
@@ -295,7 +263,7 @@ void Reassociate::BuildRankMap(Function &F) {
// we cannot move. This ensures that the ranks for these instructions are
// all different in the block.
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
- if (isUnmovableInstruction(I))
+ if (mayBeMemoryDependent(*I))
ValueRankMap[&*I] = ++BBRank;
}
}
@@ -913,7 +881,11 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,
/// that computes the negative version of the value specified. The negative
/// version of the value is returned, and BI is left pointing at the instruction
/// that should be processed next by the reassociation pass.
-static Value *NegateValue(Value *V, Instruction *BI) {
+/// Also add intermediate instructions to the redo list that are modified while
+/// pushing the negates through adds. These will be revisited to see if
+/// additional opportunities have been exposed.
+static Value *NegateValue(Value *V, Instruction *BI,
+ SetVector<AssertingVH<Instruction>> &ToRedo) {
if (Constant *C = dyn_cast<Constant>(V)) {
if (C->getType()->isFPOrFPVectorTy()) {
return ConstantExpr::getFNeg(C);
@@ -934,8 +906,8 @@ static Value *NegateValue(Value *V, Instruction *BI) {
if (BinaryOperator *I =
isReassociableOp(V, Instruction::Add, Instruction::FAdd)) {
// Push the negates through the add.
- I->setOperand(0, NegateValue(I->getOperand(0), BI));
- I->setOperand(1, NegateValue(I->getOperand(1), BI));
+ I->setOperand(0, NegateValue(I->getOperand(0), BI, ToRedo));
+ I->setOperand(1, NegateValue(I->getOperand(1), BI, ToRedo));
if (I->getOpcode() == Instruction::Add) {
I->setHasNoUnsignedWrap(false);
I->setHasNoSignedWrap(false);
@@ -948,6 +920,10 @@ static Value *NegateValue(Value *V, Instruction *BI) {
//
I->moveBefore(BI);
I->setName(I->getName()+".neg");
+
+ // Add the intermediate negates to the redo list as processing them later
+ // could expose more reassociating opportunities.
+ ToRedo.insert(I);
return I;
}
@@ -972,26 +948,28 @@ static Value *NegateValue(Value *V, Instruction *BI) {
if (InvokeInst *II = dyn_cast<InvokeInst>(InstInput)) {
InsertPt = II->getNormalDest()->begin();
} else {
- InsertPt = InstInput;
- ++InsertPt;
+ InsertPt = ++InstInput->getIterator();
}
while (isa<PHINode>(InsertPt)) ++InsertPt;
} else {
InsertPt = TheNeg->getParent()->getParent()->getEntryBlock().begin();
}
- TheNeg->moveBefore(InsertPt);
+ TheNeg->moveBefore(&*InsertPt);
if (TheNeg->getOpcode() == Instruction::Sub) {
TheNeg->setHasNoUnsignedWrap(false);
TheNeg->setHasNoSignedWrap(false);
} else {
TheNeg->andIRFlags(BI);
}
+ ToRedo.insert(TheNeg);
return TheNeg;
}
// Insert a 'neg' instruction that subtracts the value from zero to get the
// negation.
- return CreateNeg(V, V->getName() + ".neg", BI, BI);
+ BinaryOperator *NewNeg = CreateNeg(V, V->getName() + ".neg", BI, BI);
+ ToRedo.insert(NewNeg);
+ return NewNeg;
}
/// Return true if we should break up this subtract of X-Y into (X + -Y).
@@ -1025,14 +1003,15 @@ static bool ShouldBreakUpSubtract(Instruction *Sub) {
/// If we have (X-Y), and if either X is an add, or if this is only used by an
/// add, transform this into (X+(0-Y)) to promote better reassociation.
-static BinaryOperator *BreakUpSubtract(Instruction *Sub) {
+static BinaryOperator *
+BreakUpSubtract(Instruction *Sub, SetVector<AssertingVH<Instruction>> &ToRedo) {
// Convert a subtract into an add and a neg instruction. This allows sub
// instructions to be commuted with other add instructions.
//
// Calculate the negative value of Operand 1 of the sub instruction,
// and set it as the RHS of the add instruction we just made.
//
- Value *NegVal = NegateValue(Sub->getOperand(1), Sub);
+ Value *NegVal = NegateValue(Sub->getOperand(1), Sub, ToRedo);
BinaryOperator *New = CreateAdd(Sub->getOperand(0), NegVal, "", Sub, Sub);
Sub->setOperand(0, Constant::getNullValue(Sub->getType())); // Drop use of op.
Sub->setOperand(1, Constant::getNullValue(Sub->getType())); // Drop use of op.
@@ -1166,7 +1145,7 @@ Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) {
return nullptr;
}
- BasicBlock::iterator InsertPt = BO; ++InsertPt;
+ BasicBlock::iterator InsertPt = ++BO->getIterator();
// If this was just a single multiply, remove the multiply and return the only
// remaining operand.
@@ -1179,7 +1158,7 @@ Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) {
}
if (NeedsNegate)
- V = CreateNeg(V, "neg", InsertPt, BO);
+ V = CreateNeg(V, "neg", &*InsertPt, BO);
return V;
}
@@ -1250,7 +1229,7 @@ static Value *OptimizeAndOrXor(unsigned Opcode,
return nullptr;
}
-/// Helper funciton of CombineXorOpnd(). It creates a bitwise-and
+/// Helper function of CombineXorOpnd(). It creates a bitwise-and
/// instruction with the given two operands, and return the resulting
/// instruction. There are two special cases: 1) if the constant operand is 0,
/// it will return NULL. 2) if the constant is ~0, the symbolic operand will
@@ -2083,7 +2062,7 @@ void Reassociate::OptimizeInst(Instruction *I) {
return;
// Don't optimize floating point instructions that don't have unsafe algebra.
- if (I->getType()->isFloatingPointTy() && !I->hasUnsafeAlgebra())
+ if (I->getType()->isFPOrFPVectorTy() && !I->hasUnsafeAlgebra())
return;
// Do not reassociate boolean (i1) expressions. We want to preserve the
@@ -2099,7 +2078,7 @@ void Reassociate::OptimizeInst(Instruction *I) {
// see if we can convert it to X+-Y.
if (I->getOpcode() == Instruction::Sub) {
if (ShouldBreakUpSubtract(I)) {
- Instruction *NI = BreakUpSubtract(I);
+ Instruction *NI = BreakUpSubtract(I, RedoInsts);
RedoInsts.insert(I);
MadeChange = true;
I = NI;
@@ -2110,6 +2089,12 @@ void Reassociate::OptimizeInst(Instruction *I) {
(!I->hasOneUse() ||
!isReassociableOp(I->user_back(), Instruction::Mul))) {
Instruction *NI = LowerNegateToMultiply(I);
+ // If the negate was simplified, revisit the users to see if we can
+ // reassociate further.
+ for (User *U : NI->users()) {
+ if (BinaryOperator *Tmp = dyn_cast<BinaryOperator>(U))
+ RedoInsts.insert(Tmp);
+ }
RedoInsts.insert(I);
MadeChange = true;
I = NI;
@@ -2117,7 +2102,7 @@ void Reassociate::OptimizeInst(Instruction *I) {
}
} else if (I->getOpcode() == Instruction::FSub) {
if (ShouldBreakUpSubtract(I)) {
- Instruction *NI = BreakUpSubtract(I);
+ Instruction *NI = BreakUpSubtract(I, RedoInsts);
RedoInsts.insert(I);
MadeChange = true;
I = NI;
@@ -2127,7 +2112,13 @@ void Reassociate::OptimizeInst(Instruction *I) {
if (isReassociableOp(I->getOperand(1), Instruction::FMul) &&
(!I->hasOneUse() ||
!isReassociableOp(I->user_back(), Instruction::FMul))) {
+ // If the negate was simplified, revisit the users to see if we can
+ // reassociate further.
Instruction *NI = LowerNegateToMultiply(I);
+ for (User *U : NI->users()) {
+ if (BinaryOperator *Tmp = dyn_cast<BinaryOperator>(U))
+ RedoInsts.insert(Tmp);
+ }
RedoInsts.insert(I);
MadeChange = true;
I = NI;
@@ -2142,8 +2133,14 @@ void Reassociate::OptimizeInst(Instruction *I) {
// If this is an interior node of a reassociable tree, ignore it until we
// get to the root of the tree, to avoid N^2 analysis.
unsigned Opcode = BO->getOpcode();
- if (BO->hasOneUse() && BO->user_back()->getOpcode() == Opcode)
+ if (BO->hasOneUse() && BO->user_back()->getOpcode() == Opcode) {
+ // During the initial run we will get to the root of the tree.
+ // But if we get here while we are redoing instructions, there is no
+ // guarantee that the root will be visited. So Redo later
+ if (BO->user_back() != BO)
+ RedoInsts.insert(BO->user_back());
return;
+ }
// If this is an add tree that is used by a sub instruction, ignore it
// until we process the subtract.
@@ -2250,10 +2247,10 @@ bool Reassociate::runOnFunction(Function &F) {
for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
// Optimize every instruction in the basic block.
for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE; )
- if (isInstructionTriviallyDead(II)) {
- EraseInst(II++);
+ if (isInstructionTriviallyDead(&*II)) {
+ EraseInst(&*II++);
} else {
- OptimizeInst(II);
+ OptimizeInst(&*II);
assert(II->getParent() == BI && "Moved to a different block!");
++II;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp b/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
index 1b46727..915f897 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -82,10 +82,9 @@ bool RegToMem::runOnFunction(Function &F) {
BasicBlock::iterator I = BBEntry->begin();
while (isa<AllocaInst>(I)) ++I;
- CastInst *AllocaInsertionPoint =
- new BitCastInst(Constant::getNullValue(Type::getInt32Ty(F.getContext())),
- Type::getInt32Ty(F.getContext()),
- "reg2mem alloca point", I);
+ CastInst *AllocaInsertionPoint = new BitCastInst(
+ Constant::getNullValue(Type::getInt32Ty(F.getContext())),
+ Type::getInt32Ty(F.getContext()), "reg2mem alloca point", &*I);
// Find the escaped instructions. But don't create stack slots for
// allocas in entry block.
@@ -95,7 +94,7 @@ bool RegToMem::runOnFunction(Function &F) {
for (BasicBlock::iterator iib = ibb->begin(), iie = ibb->end();
iib != iie; ++iib) {
if (!(isa<AllocaInst>(iib) && iib->getParent() == BBEntry) &&
- valueEscapes(iib)) {
+ valueEscapes(&*iib)) {
WorkList.push_front(&*iib);
}
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index ae2ae3a..db127c3 100644
--- a/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -14,12 +14,14 @@
#include "llvm/Pass.h"
#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Dominators.h"
@@ -46,10 +48,6 @@
using namespace llvm;
-// Print tracing output
-static cl::opt<bool> TraceLSP("trace-rewrite-statepoints", cl::Hidden,
- cl::init(false));
-
// Print the liveset found at the insert location
static cl::opt<bool> PrintLiveSet("spp-print-liveset", cl::Hidden,
cl::init(false));
@@ -74,6 +72,12 @@ static cl::opt<bool, true> ClobberNonLiveOverride("rs4gc-clobber-non-live",
cl::location(ClobberNonLive),
cl::Hidden);
+static cl::opt<bool> UseDeoptBundles("rs4gc-use-deopt-bundles", cl::Hidden,
+ cl::init(false));
+static cl::opt<bool>
+ AllowStatepointWithNoDeoptInfo("rs4gc-allow-statepoint-with-no-deopt-info",
+ cl::Hidden, cl::init(true));
+
namespace {
struct RewriteStatepointsForGC : public ModulePass {
static char ID; // Pass identification, replacement for typeid
@@ -88,10 +92,10 @@ struct RewriteStatepointsForGC : public ModulePass {
Changed |= runOnFunction(F);
if (Changed) {
- // stripDereferenceabilityInfo asserts that shouldRewriteStatepointsIn
+ // stripNonValidAttributes asserts that shouldRewriteStatepointsIn
// returns true for at least one function in the module. Since at least
// one function changed, we know that the precondition is satisfied.
- stripDereferenceabilityInfo(M);
+ stripNonValidAttributes(M);
}
return Changed;
@@ -108,15 +112,16 @@ struct RewriteStatepointsForGC : public ModulePass {
/// dereferenceability that are no longer valid/correct after
/// RewriteStatepointsForGC has run. This is because semantically, after
/// RewriteStatepointsForGC runs, all calls to gc.statepoint "free" the entire
- /// heap. stripDereferenceabilityInfo (conservatively) restores correctness
+ /// heap. stripNonValidAttributes (conservatively) restores correctness
/// by erasing all attributes in the module that externally imply
/// dereferenceability.
- ///
- void stripDereferenceabilityInfo(Module &M);
+ /// Similar reasoning also applies to the noalias attributes. gc.statepoint
+ /// can touch the entire heap including noalias objects.
+ void stripNonValidAttributes(Module &M);
- // Helpers for stripDereferenceabilityInfo
- void stripDereferenceabilityInfoFromBody(Function &F);
- void stripDereferenceabilityInfoFromPrototype(Function &F);
+ // Helpers for stripNonValidAttributes
+ void stripNonValidAttributesFromBody(Function &F);
+ void stripNonValidAttributesFromPrototype(Function &F);
};
} // namespace
@@ -160,15 +165,16 @@ struct GCPtrLivenessData {
// base relation will remain. Internally, we add a mixture of the two
// types, then update all the second type to the first type
typedef DenseMap<Value *, Value *> DefiningValueMapTy;
-typedef DenseSet<llvm::Value *> StatepointLiveSetTy;
-typedef DenseMap<Instruction *, Value *> RematerializedValueMapTy;
+typedef DenseSet<Value *> StatepointLiveSetTy;
+typedef DenseMap<AssertingVH<Instruction>, AssertingVH<Value>>
+ RematerializedValueMapTy;
struct PartiallyConstructedSafepointRecord {
- /// The set of values known to be live accross this safepoint
- StatepointLiveSetTy liveset;
+ /// The set of values known to be live across this safepoint
+ StatepointLiveSetTy LiveSet;
/// Mapping from live pointers to a base-defining-value
- DenseMap<llvm::Value *, llvm::Value *> PointerToBase;
+ DenseMap<Value *, Value *> PointerToBase;
/// The *new* gc.statepoint instruction itself. This produces the token
/// that normal path gc.relocates and the gc.result are tied to.
@@ -179,12 +185,26 @@ struct PartiallyConstructedSafepointRecord {
Instruction *UnwindToken;
/// Record live values we are rematerialized instead of relocating.
- /// They are not included into 'liveset' field.
+ /// They are not included into 'LiveSet' field.
/// Maps rematerialized copy to it's original value.
RematerializedValueMapTy RematerializedValues;
};
}
+static ArrayRef<Use> GetDeoptBundleOperands(ImmutableCallSite CS) {
+ assert(UseDeoptBundles && "Should not be called otherwise!");
+
+ Optional<OperandBundleUse> DeoptBundle = CS.getOperandBundle("deopt");
+
+ if (!DeoptBundle.hasValue()) {
+ assert(AllowStatepointWithNoDeoptInfo &&
+ "Found non-leaf call without deopt info!");
+ return None;
+ }
+
+ return DeoptBundle.getValue().Inputs;
+}
+
/// Compute the live-in set for every basic block in the function
static void computeLiveInValues(DominatorTree &DT, Function &F,
GCPtrLivenessData &Data);
@@ -195,10 +215,10 @@ static void findLiveSetAtInst(Instruction *inst, GCPtrLivenessData &Data,
StatepointLiveSetTy &out);
// TODO: Once we can get to the GCStrategy, this becomes
-// Optional<bool> isGCManagedPointer(const Value *V) const override {
+// Optional<bool> isGCManagedPointer(const Type *Ty) const override {
-static bool isGCPointerType(const Type *T) {
- if (const PointerType *PT = dyn_cast<PointerType>(T))
+static bool isGCPointerType(Type *T) {
+ if (auto *PT = dyn_cast<PointerType>(T))
// For the sake of this example GC, we arbitrarily pick addrspace(1) as our
// GC managed heap. We know that a pointer into this heap needs to be
// updated and that no other pointer does.
@@ -233,9 +253,8 @@ static bool containsGCPtrType(Type *Ty) {
if (ArrayType *AT = dyn_cast<ArrayType>(Ty))
return containsGCPtrType(AT->getElementType());
if (StructType *ST = dyn_cast<StructType>(Ty))
- return std::any_of(
- ST->subtypes().begin(), ST->subtypes().end(),
- [](Type *SubType) { return containsGCPtrType(SubType); });
+ return std::any_of(ST->subtypes().begin(), ST->subtypes().end(),
+ containsGCPtrType);
return false;
}
@@ -247,7 +266,7 @@ static bool isUnhandledGCPointerType(Type *Ty) {
}
#endif
-static bool order_by_name(llvm::Value *a, llvm::Value *b) {
+static bool order_by_name(Value *a, Value *b) {
if (a->hasName() && b->hasName()) {
return -1 == a->getName().compare(b->getName());
} else if (a->hasName() && !b->hasName()) {
@@ -260,6 +279,13 @@ static bool order_by_name(llvm::Value *a, llvm::Value *b) {
}
}
+// Return the name of the value suffixed with the provided value, or if the
+// value didn't have a name, the default value specified.
+static std::string suffixed_name_or(Value *V, StringRef Suffix,
+ StringRef DefaultName) {
+ return V->hasName() ? (V->getName() + Suffix).str() : DefaultName.str();
+}
+
// Conservatively identifies any definitions which might be live at the
// given instruction. The analysis is performed immediately before the
// given instruction. Values defined by that instruction are not considered
@@ -269,30 +295,56 @@ static void analyzeParsePointLiveness(
const CallSite &CS, PartiallyConstructedSafepointRecord &result) {
Instruction *inst = CS.getInstruction();
- StatepointLiveSetTy liveset;
- findLiveSetAtInst(inst, OriginalLivenessData, liveset);
+ StatepointLiveSetTy LiveSet;
+ findLiveSetAtInst(inst, OriginalLivenessData, LiveSet);
if (PrintLiveSet) {
// Note: This output is used by several of the test cases
- // The order of elemtns in a set is not stable, put them in a vec and sort
+ // The order of elements in a set is not stable, put them in a vec and sort
// by name
- SmallVector<Value *, 64> temp;
- temp.insert(temp.end(), liveset.begin(), liveset.end());
- std::sort(temp.begin(), temp.end(), order_by_name);
+ SmallVector<Value *, 64> Temp;
+ Temp.insert(Temp.end(), LiveSet.begin(), LiveSet.end());
+ std::sort(Temp.begin(), Temp.end(), order_by_name);
errs() << "Live Variables:\n";
- for (Value *V : temp) {
- errs() << " " << V->getName(); // no newline
- V->dump();
- }
+ for (Value *V : Temp)
+ dbgs() << " " << V->getName() << " " << *V << "\n";
}
if (PrintLiveSetSize) {
errs() << "Safepoint For: " << CS.getCalledValue()->getName() << "\n";
- errs() << "Number live values: " << liveset.size() << "\n";
+ errs() << "Number live values: " << LiveSet.size() << "\n";
+ }
+ result.LiveSet = LiveSet;
+}
+
+static bool isKnownBaseResult(Value *V);
+namespace {
+/// A single base defining value - An immediate base defining value for an
+/// instruction 'Def' is an input to 'Def' whose base is also a base of 'Def'.
+/// For instructions which have multiple pointer [vector] inputs or that
+/// transition between vector and scalar types, there is no immediate base
+/// defining value. The 'base defining value' for 'Def' is the transitive
+/// closure of this relation stopping at the first instruction which has no
+/// immediate base defining value. The b.d.v. might itself be a base pointer,
+/// but it can also be an arbitrary derived pointer.
+struct BaseDefiningValueResult {
+ /// Contains the value which is the base defining value.
+ Value * const BDV;
+ /// True if the base defining value is also known to be an actual base
+ /// pointer.
+ const bool IsKnownBase;
+ BaseDefiningValueResult(Value *BDV, bool IsKnownBase)
+ : BDV(BDV), IsKnownBase(IsKnownBase) {
+#ifndef NDEBUG
+ // Check consistency between new and old means of checking whether a BDV is
+ // a base.
+ bool MustBeBase = isKnownBaseResult(BDV);
+ assert(!MustBeBase || MustBeBase == IsKnownBase);
+#endif
}
- result.liveset = liveset;
+};
}
-static Value *findBaseDefiningValue(Value *I);
+static BaseDefiningValueResult findBaseDefiningValue(Value *I);
/// Return a base defining value for the 'Index' element of the given vector
/// instruction 'I'. If Index is null, returns a BDV for the entire vector
@@ -303,8 +355,8 @@ static Value *findBaseDefiningValue(Value *I);
/// vector returned is a BDV (and possibly a base) of the entire vector 'I'.
/// If the later, the return pointer is a BDV (or possibly a base) for the
/// particular element in 'I'.
-static std::pair<Value *, bool>
-findBaseDefiningValueOfVector(Value *I, Value *Index = nullptr) {
+static BaseDefiningValueResult
+findBaseDefiningValueOfVector(Value *I) {
assert(I->getType()->isVectorTy() &&
cast<VectorType>(I->getType())->getElementType()->isPointerTy() &&
"Illegal to ask for the base pointer of a non-pointer type");
@@ -314,7 +366,7 @@ findBaseDefiningValueOfVector(Value *I, Value *Index = nullptr) {
if (isa<Argument>(I))
// An incoming argument to the function is a base pointer
- return std::make_pair(I, true);
+ return BaseDefiningValueResult(I, true);
// We shouldn't see the address of a global as a vector value?
assert(!isa<GlobalVariable>(I) &&
@@ -325,7 +377,7 @@ findBaseDefiningValueOfVector(Value *I, Value *Index = nullptr) {
if (isa<UndefValue>(I))
// utterly meaningless, but useful for dealing with partially optimized
// code.
- return std::make_pair(I, true);
+ return BaseDefiningValueResult(I, true);
// Due to inheritance, this must be _after_ the global variable and undef
// checks
@@ -333,31 +385,17 @@ findBaseDefiningValueOfVector(Value *I, Value *Index = nullptr) {
assert(!isa<GlobalVariable>(I) && !isa<UndefValue>(I) &&
"order of checks wrong!");
assert(Con->isNullValue() && "null is the only case which makes sense");
- return std::make_pair(Con, true);
+ return BaseDefiningValueResult(Con, true);
}
if (isa<LoadInst>(I))
- return std::make_pair(I, true);
-
- // For an insert element, we might be able to look through it if we know
- // something about the indexes.
- if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(I)) {
- if (Index) {
- Value *InsertIndex = IEI->getOperand(2);
- // This index is inserting the value, look for its BDV
- if (InsertIndex == Index)
- return std::make_pair(findBaseDefiningValue(IEI->getOperand(1)), false);
- // Both constant, and can't be equal per above. This insert is definitely
- // not relevant, look back at the rest of the vector and keep trying.
- if (isa<ConstantInt>(Index) && isa<ConstantInt>(InsertIndex))
- return findBaseDefiningValueOfVector(IEI->getOperand(0), Index);
- }
-
+ return BaseDefiningValueResult(I, true);
+
+ if (isa<InsertElementInst>(I))
// We don't know whether this vector contains entirely base pointers or
// not. To be conservatively correct, we treat it as a BDV and will
// duplicate code as needed to construct a parallel vector of bases.
- return std::make_pair(IEI, false);
- }
+ return BaseDefiningValueResult(I, false);
if (isa<ShuffleVectorInst>(I))
// We don't know whether this vector contains entirely base pointers or
@@ -365,105 +403,62 @@ findBaseDefiningValueOfVector(Value *I, Value *Index = nullptr) {
// duplicate code as needed to construct a parallel vector of bases.
// TODO: There a number of local optimizations which could be applied here
// for particular sufflevector patterns.
- return std::make_pair(I, false);
+ return BaseDefiningValueResult(I, false);
// A PHI or Select is a base defining value. The outer findBasePointer
// algorithm is responsible for constructing a base value for this BDV.
assert((isa<SelectInst>(I) || isa<PHINode>(I)) &&
"unknown vector instruction - no base found for vector element");
- return std::make_pair(I, false);
+ return BaseDefiningValueResult(I, false);
}
-static bool isKnownBaseResult(Value *V);
-
/// Helper function for findBasePointer - Will return a value which either a)
-/// defines the base pointer for the input or b) blocks the simple search
-/// (i.e. a PHI or Select of two derived pointers)
-static Value *findBaseDefiningValue(Value *I) {
+/// defines the base pointer for the input, b) blocks the simple search
+/// (i.e. a PHI or Select of two derived pointers), or c) involves a change
+/// from pointer to vector type or back.
+static BaseDefiningValueResult findBaseDefiningValue(Value *I) {
if (I->getType()->isVectorTy())
- return findBaseDefiningValueOfVector(I).first;
+ return findBaseDefiningValueOfVector(I);
assert(I->getType()->isPointerTy() &&
"Illegal to ask for the base pointer of a non-pointer type");
- // This case is a bit of a hack - it only handles extracts from vectors which
- // trivially contain only base pointers or cases where we can directly match
- // the index of the original extract element to an insertion into the vector.
- // See note inside the function for how to improve this.
- if (auto *EEI = dyn_cast<ExtractElementInst>(I)) {
- Value *VectorOperand = EEI->getVectorOperand();
- Value *Index = EEI->getIndexOperand();
- std::pair<Value *, bool> pair =
- findBaseDefiningValueOfVector(VectorOperand, Index);
- Value *VectorBase = pair.first;
- if (VectorBase->getType()->isPointerTy())
- // We found a BDV for this specific element with the vector. This is an
- // optimization, but in practice it covers most of the useful cases
- // created via scalarization.
- return VectorBase;
- else {
- assert(VectorBase->getType()->isVectorTy());
- if (pair.second)
- // If the entire vector returned is known to be entirely base pointers,
- // then the extractelement is valid base for this value.
- return EEI;
- else {
- // Otherwise, we have an instruction which potentially produces a
- // derived pointer and we need findBasePointers to clone code for us
- // such that we can create an instruction which produces the
- // accompanying base pointer.
- // Note: This code is currently rather incomplete. We don't currently
- // support the general form of shufflevector of insertelement.
- // Conceptually, these are just 'base defining values' of the same
- // variety as phi or select instructions. We need to update the
- // findBasePointers algorithm to insert new 'base-only' versions of the
- // original instructions. This is relative straight forward to do, but
- // the case which would motivate the work hasn't shown up in real
- // workloads yet.
- assert((isa<PHINode>(VectorBase) || isa<SelectInst>(VectorBase)) &&
- "need to extend findBasePointers for generic vector"
- "instruction cases");
- return VectorBase;
- }
- }
- }
-
if (isa<Argument>(I))
// An incoming argument to the function is a base pointer
// We should have never reached here if this argument isn't an gc value
- return I;
+ return BaseDefiningValueResult(I, true);
if (isa<GlobalVariable>(I))
// base case
- return I;
+ return BaseDefiningValueResult(I, true);
// inlining could possibly introduce phi node that contains
// undef if callee has multiple returns
if (isa<UndefValue>(I))
// utterly meaningless, but useful for dealing with
// partially optimized code.
- return I;
+ return BaseDefiningValueResult(I, true);
// Due to inheritance, this must be _after_ the global variable and undef
// checks
- if (Constant *Con = dyn_cast<Constant>(I)) {
+ if (isa<Constant>(I)) {
assert(!isa<GlobalVariable>(I) && !isa<UndefValue>(I) &&
"order of checks wrong!");
- // Note: Finding a constant base for something marked for relocation
- // doesn't really make sense. The most likely case is either a) some
- // screwed up the address space usage or b) your validating against
- // compiled C++ code w/o the proper separation. The only real exception
- // is a null pointer. You could have generic code written to index of
- // off a potentially null value and have proven it null. We also use
- // null pointers in dead paths of relocation phis (which we might later
- // want to find a base pointer for).
- assert(isa<ConstantPointerNull>(Con) &&
- "null is the only case which makes sense");
- return Con;
+ // Note: Even for frontends which don't have constant references, we can
+ // see constants appearing after optimizations. A simple example is
+ // specialization of an address computation on null feeding into a merge
+ // point where the actual use of the now-constant input is protected by
+ // another null check. (e.g. test4 in constants.ll)
+ return BaseDefiningValueResult(I, true);
}
if (CastInst *CI = dyn_cast<CastInst>(I)) {
Value *Def = CI->stripPointerCasts();
+ // If stripping pointer casts changes the address space there is an
+ // addrspacecast in between.
+ assert(cast<PointerType>(Def->getType())->getAddressSpace() ==
+ cast<PointerType>(CI->getType())->getAddressSpace() &&
+ "unsupported addrspacecast");
// If we find a cast instruction here, it means we've found a cast which is
// not simply a pointer cast (i.e. an inttoptr). We don't know how to
// handle int->ptr conversion.
@@ -472,7 +467,9 @@ static Value *findBaseDefiningValue(Value *I) {
}
if (isa<LoadInst>(I))
- return I; // The value loaded is an gc base itself
+ // The value loaded is an gc base itself
+ return BaseDefiningValueResult(I, true);
+
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I))
// The base of this GEP is the base
@@ -480,14 +477,11 @@ static Value *findBaseDefiningValue(Value *I) {
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
switch (II->getIntrinsicID()) {
- case Intrinsic::experimental_gc_result_ptr:
default:
// fall through to general call handling
break;
case Intrinsic::experimental_gc_statepoint:
- case Intrinsic::experimental_gc_result_float:
- case Intrinsic::experimental_gc_result_int:
- llvm_unreachable("these don't produce pointers");
+ llvm_unreachable("statepoints don't produce pointers");
case Intrinsic::experimental_gc_relocate: {
// Rerunning safepoint insertion after safepoints are already
// inserted is not supported. It could probably be made to work,
@@ -506,17 +500,17 @@ static Value *findBaseDefiningValue(Value *I) {
// pointers. This should probably be generalized via attributes to support
// both source language and internal functions.
if (isa<CallInst>(I) || isa<InvokeInst>(I))
- return I;
+ return BaseDefiningValueResult(I, true);
// I have absolutely no idea how to implement this part yet. It's not
- // neccessarily hard, I just haven't really looked at it yet.
+ // necessarily hard, I just haven't really looked at it yet.
assert(!isa<LandingPadInst>(I) && "Landing Pad is unimplemented");
if (isa<AtomicCmpXchgInst>(I))
// A CAS is effectively a atomic store and load combined under a
// predicate. From the perspective of base pointers, we just treat it
// like a load.
- return I;
+ return BaseDefiningValueResult(I, true);
assert(!isa<AtomicRMWInst>(I) && "Xchg handled above, all others are "
"binary ops which don't apply to pointers");
@@ -525,34 +519,41 @@ static Value *findBaseDefiningValue(Value *I) {
// stack, but in either case, this is simply a field load. As a result,
// this is a defining definition of the base just like a load is.
if (isa<ExtractValueInst>(I))
- return I;
+ return BaseDefiningValueResult(I, true);
// We should never see an insert vector since that would require we be
// tracing back a struct value not a pointer value.
assert(!isa<InsertValueInst>(I) &&
"Base pointer for a struct is meaningless");
+ // An extractelement produces a base result exactly when it's input does.
+ // We may need to insert a parallel instruction to extract the appropriate
+ // element out of the base vector corresponding to the input. Given this,
+ // it's analogous to the phi and select case even though it's not a merge.
+ if (isa<ExtractElementInst>(I))
+ // Note: There a lot of obvious peephole cases here. This are deliberately
+ // handled after the main base pointer inference algorithm to make writing
+ // test cases to exercise that code easier.
+ return BaseDefiningValueResult(I, false);
+
// The last two cases here don't return a base pointer. Instead, they
- // return a value which dynamically selects from amoung several base
+ // return a value which dynamically selects from among several base
// derived pointers (each with it's own base potentially). It's the job of
// the caller to resolve these.
assert((isa<SelectInst>(I) || isa<PHINode>(I)) &&
"missing instruction case in findBaseDefiningValing");
- return I;
+ return BaseDefiningValueResult(I, false);
}
/// Returns the base defining value for this value.
static Value *findBaseDefiningValueCached(Value *I, DefiningValueMapTy &Cache) {
Value *&Cached = Cache[I];
if (!Cached) {
- Cached = findBaseDefiningValue(I);
+ Cached = findBaseDefiningValue(I).BDV;
+ DEBUG(dbgs() << "fBDV-cached: " << I->getName() << " -> "
+ << Cached->getName() << "\n");
}
assert(Cache[I] != nullptr);
-
- if (TraceLSP) {
- dbgs() << "fBDV-cached: " << I->getName() << " -> " << Cached->getName()
- << "\n";
- }
return Cached;
}
@@ -572,7 +573,9 @@ static Value *findBaseOrBDV(Value *I, DefiningValueMapTy &Cache) {
/// Given the result of a call to findBaseDefiningValue, or findBaseOrBDV,
/// is it known to be a base pointer? Or do we need to continue searching.
static bool isKnownBaseResult(Value *V) {
- if (!isa<PHINode>(V) && !isa<SelectInst>(V)) {
+ if (!isa<PHINode>(V) && !isa<SelectInst>(V) &&
+ !isa<ExtractElementInst>(V) && !isa<InsertElementInst>(V) &&
+ !isa<ShuffleVectorInst>(V)) {
// no recursion possible
return true;
}
@@ -587,17 +590,19 @@ static bool isKnownBaseResult(Value *V) {
return false;
}
-// TODO: find a better name for this
namespace {
-class PhiState {
+/// Models the state of a single base defining value in the findBasePointer
+/// algorithm for determining where a new instruction is needed to propagate
+/// the base of this BDV.
+class BDVState {
public:
enum Status { Unknown, Base, Conflict };
- PhiState(Status s, Value *b = nullptr) : status(s), base(b) {
+ BDVState(Status s, Value *b = nullptr) : status(s), base(b) {
assert(status != Base || b);
}
- PhiState(Value *b) : status(Base), base(b) {}
- PhiState() : status(Unknown), base(nullptr) {}
+ explicit BDVState(Value *b) : status(Base), base(b) {}
+ BDVState() : status(Unknown), base(nullptr) {}
Status getStatus() const { return status; }
Value *getBase() const { return base; }
@@ -606,72 +611,80 @@ public:
bool isUnknown() const { return getStatus() == Unknown; }
bool isConflict() const { return getStatus() == Conflict; }
- bool operator==(const PhiState &other) const {
+ bool operator==(const BDVState &other) const {
return base == other.base && status == other.status;
}
- bool operator!=(const PhiState &other) const { return !(*this == other); }
+ bool operator!=(const BDVState &other) const { return !(*this == other); }
- void dump() {
- errs() << status << " (" << base << " - "
- << (base ? base->getName() : "nullptr") << "): ";
+ LLVM_DUMP_METHOD
+ void dump() const { print(dbgs()); dbgs() << '\n'; }
+
+ void print(raw_ostream &OS) const {
+ switch (status) {
+ case Unknown:
+ OS << "U";
+ break;
+ case Base:
+ OS << "B";
+ break;
+ case Conflict:
+ OS << "C";
+ break;
+ };
+ OS << " (" << base << " - "
+ << (base ? base->getName() : "nullptr") << "): ";
}
private:
Status status;
- Value *base; // non null only if status == base
+ AssertingVH<Value> base; // non null only if status == base
};
+}
-typedef DenseMap<Value *, PhiState> ConflictStateMapTy;
-// Values of type PhiState form a lattice, and this is a helper
+#ifndef NDEBUG
+static raw_ostream &operator<<(raw_ostream &OS, const BDVState &State) {
+ State.print(OS);
+ return OS;
+}
+#endif
+
+namespace {
+// Values of type BDVState form a lattice, and this is a helper
// class that implementes the meet operation. The meat of the meet
-// operation is implemented in MeetPhiStates::pureMeet
-class MeetPhiStates {
+// operation is implemented in MeetBDVStates::pureMeet
+class MeetBDVStates {
public:
- // phiStates is a mapping from PHINodes and SelectInst's to PhiStates.
- explicit MeetPhiStates(const ConflictStateMapTy &phiStates)
- : phiStates(phiStates) {}
-
- // Destructively meet the current result with the base V. V can
- // either be a merge instruction (SelectInst / PHINode), in which
- // case its status is looked up in the phiStates map; or a regular
- // SSA value, in which case it is assumed to be a base.
- void meetWith(Value *V) {
- PhiState otherState = getStateForBDV(V);
- assert((MeetPhiStates::pureMeet(otherState, currentResult) ==
- MeetPhiStates::pureMeet(currentResult, otherState)) &&
- "math is wrong: meet does not commute!");
- currentResult = MeetPhiStates::pureMeet(otherState, currentResult);
+ /// Initializes the currentResult to the TOP state so that if can be met with
+ /// any other state to produce that state.
+ MeetBDVStates() {}
+
+ // Destructively meet the current result with the given BDVState
+ void meetWith(BDVState otherState) {
+ currentResult = meet(otherState, currentResult);
}
- PhiState getResult() const { return currentResult; }
+ BDVState getResult() const { return currentResult; }
private:
- const ConflictStateMapTy &phiStates;
- PhiState currentResult;
-
- /// Return a phi state for a base defining value. We'll generate a new
- /// base state for known bases and expect to find a cached state otherwise
- PhiState getStateForBDV(Value *baseValue) {
- if (isKnownBaseResult(baseValue)) {
- return PhiState(baseValue);
- } else {
- return lookupFromMap(baseValue);
- }
- }
+ BDVState currentResult;
- PhiState lookupFromMap(Value *V) {
- auto I = phiStates.find(V);
- assert(I != phiStates.end() && "lookup failed!");
- return I->second;
+ /// Perform a meet operation on two elements of the BDVState lattice.
+ static BDVState meet(BDVState LHS, BDVState RHS) {
+ assert((pureMeet(LHS, RHS) == pureMeet(RHS, LHS)) &&
+ "math is wrong: meet does not commute!");
+ BDVState Result = pureMeet(LHS, RHS);
+ DEBUG(dbgs() << "meet of " << LHS << " with " << RHS
+ << " produced " << Result << "\n");
+ return Result;
}
- static PhiState pureMeet(const PhiState &stateA, const PhiState &stateB) {
+ static BDVState pureMeet(const BDVState &stateA, const BDVState &stateB) {
switch (stateA.getStatus()) {
- case PhiState::Unknown:
+ case BDVState::Unknown:
return stateB;
- case PhiState::Base:
+ case BDVState::Base:
assert(stateA.getBase() && "can't be null");
if (stateB.isUnknown())
return stateA;
@@ -681,18 +694,20 @@ private:
assert(stateA == stateB && "equality broken!");
return stateA;
}
- return PhiState(PhiState::Conflict);
+ return BDVState(BDVState::Conflict);
}
assert(stateB.isConflict() && "only three states!");
- return PhiState(PhiState::Conflict);
+ return BDVState(BDVState::Conflict);
- case PhiState::Conflict:
+ case BDVState::Conflict:
return stateA;
}
llvm_unreachable("only three states!");
}
};
}
+
+
/// For a given value or instruction, figure out what base ptr it's derived
/// from. For gc objects, this is simply itself. On success, returns a value
/// which is the base pointer. (This is reliable and can be used for
@@ -723,171 +738,252 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) {
//
// Note: A simpler form of this would be to add the conflict form of all
// PHIs without running the optimistic algorithm. This would be
- // analougous to pessimistic data flow and would likely lead to an
+ // analogous to pessimistic data flow and would likely lead to an
// overall worse solution.
- ConflictStateMapTy states;
- states[def] = PhiState();
- // Recursively fill in all phis & selects reachable from the initial one
- // for which we don't already know a definite base value for
- // TODO: This should be rewritten with a worklist
- bool done = false;
- while (!done) {
- done = true;
- // Since we're adding elements to 'states' as we run, we can't keep
- // iterators into the set.
- SmallVector<Value *, 16> Keys;
- Keys.reserve(states.size());
- for (auto Pair : states) {
- Value *V = Pair.first;
- Keys.push_back(V);
- }
- for (Value *v : Keys) {
- assert(!isKnownBaseResult(v) && "why did it get added?");
- if (PHINode *phi = dyn_cast<PHINode>(v)) {
- assert(phi->getNumIncomingValues() > 0 &&
- "zero input phis are illegal");
- for (Value *InVal : phi->incoming_values()) {
- Value *local = findBaseOrBDV(InVal, cache);
- if (!isKnownBaseResult(local) && states.find(local) == states.end()) {
- states[local] = PhiState();
- done = false;
- }
- }
- } else if (SelectInst *sel = dyn_cast<SelectInst>(v)) {
- Value *local = findBaseOrBDV(sel->getTrueValue(), cache);
- if (!isKnownBaseResult(local) && states.find(local) == states.end()) {
- states[local] = PhiState();
- done = false;
- }
- local = findBaseOrBDV(sel->getFalseValue(), cache);
- if (!isKnownBaseResult(local) && states.find(local) == states.end()) {
- states[local] = PhiState();
- done = false;
- }
+#ifndef NDEBUG
+ auto isExpectedBDVType = [](Value *BDV) {
+ return isa<PHINode>(BDV) || isa<SelectInst>(BDV) ||
+ isa<ExtractElementInst>(BDV) || isa<InsertElementInst>(BDV);
+ };
+#endif
+
+ // Once populated, will contain a mapping from each potentially non-base BDV
+ // to a lattice value (described above) which corresponds to that BDV.
+ // We use the order of insertion (DFS over the def/use graph) to provide a
+ // stable deterministic ordering for visiting DenseMaps (which are unordered)
+ // below. This is important for deterministic compilation.
+ MapVector<Value *, BDVState> States;
+
+ // Recursively fill in all base defining values reachable from the initial
+ // one for which we don't already know a definite base value for
+ /* scope */ {
+ SmallVector<Value*, 16> Worklist;
+ Worklist.push_back(def);
+ States.insert(std::make_pair(def, BDVState()));
+ while (!Worklist.empty()) {
+ Value *Current = Worklist.pop_back_val();
+ assert(!isKnownBaseResult(Current) && "why did it get added?");
+
+ auto visitIncomingValue = [&](Value *InVal) {
+ Value *Base = findBaseOrBDV(InVal, cache);
+ if (isKnownBaseResult(Base))
+ // Known bases won't need new instructions introduced and can be
+ // ignored safely
+ return;
+ assert(isExpectedBDVType(Base) && "the only non-base values "
+ "we see should be base defining values");
+ if (States.insert(std::make_pair(Base, BDVState())).second)
+ Worklist.push_back(Base);
+ };
+ if (PHINode *Phi = dyn_cast<PHINode>(Current)) {
+ for (Value *InVal : Phi->incoming_values())
+ visitIncomingValue(InVal);
+ } else if (SelectInst *Sel = dyn_cast<SelectInst>(Current)) {
+ visitIncomingValue(Sel->getTrueValue());
+ visitIncomingValue(Sel->getFalseValue());
+ } else if (auto *EE = dyn_cast<ExtractElementInst>(Current)) {
+ visitIncomingValue(EE->getVectorOperand());
+ } else if (auto *IE = dyn_cast<InsertElementInst>(Current)) {
+ visitIncomingValue(IE->getOperand(0)); // vector operand
+ visitIncomingValue(IE->getOperand(1)); // scalar operand
+ } else {
+ // There is one known class of instructions we know we don't handle.
+ assert(isa<ShuffleVectorInst>(Current));
+ llvm_unreachable("unimplemented instruction case");
}
}
}
- if (TraceLSP) {
- errs() << "States after initialization:\n";
- for (auto Pair : states) {
- Instruction *v = cast<Instruction>(Pair.first);
- PhiState state = Pair.second;
- state.dump();
- v->dump();
- }
+#ifndef NDEBUG
+ DEBUG(dbgs() << "States after initialization:\n");
+ for (auto Pair : States) {
+ DEBUG(dbgs() << " " << Pair.second << " for " << *Pair.first << "\n");
}
+#endif
- // TODO: come back and revisit the state transitions around inputs which
- // have reached conflict state. The current version seems too conservative.
+ // Return a phi state for a base defining value. We'll generate a new
+ // base state for known bases and expect to find a cached state otherwise.
+ auto getStateForBDV = [&](Value *baseValue) {
+ if (isKnownBaseResult(baseValue))
+ return BDVState(baseValue);
+ auto I = States.find(baseValue);
+ assert(I != States.end() && "lookup failed!");
+ return I->second;
+ };
bool progress = true;
while (progress) {
#ifndef NDEBUG
- size_t oldSize = states.size();
+ const size_t oldSize = States.size();
#endif
progress = false;
- // We're only changing keys in this loop, thus safe to keep iterators
- for (auto Pair : states) {
- MeetPhiStates calculateMeet(states);
- Value *v = Pair.first;
- assert(!isKnownBaseResult(v) && "why did it get added?");
- if (SelectInst *select = dyn_cast<SelectInst>(v)) {
- calculateMeet.meetWith(findBaseOrBDV(select->getTrueValue(), cache));
- calculateMeet.meetWith(findBaseOrBDV(select->getFalseValue(), cache));
- } else
- for (Value *Val : cast<PHINode>(v)->incoming_values())
- calculateMeet.meetWith(findBaseOrBDV(Val, cache));
-
- PhiState oldState = states[v];
- PhiState newState = calculateMeet.getResult();
+ // We're only changing values in this loop, thus safe to keep iterators.
+ // Since this is computing a fixed point, the order of visit does not
+ // effect the result. TODO: We could use a worklist here and make this run
+ // much faster.
+ for (auto Pair : States) {
+ Value *BDV = Pair.first;
+ assert(!isKnownBaseResult(BDV) && "why did it get added?");
+
+ // Given an input value for the current instruction, return a BDVState
+ // instance which represents the BDV of that value.
+ auto getStateForInput = [&](Value *V) mutable {
+ Value *BDV = findBaseOrBDV(V, cache);
+ return getStateForBDV(BDV);
+ };
+
+ MeetBDVStates calculateMeet;
+ if (SelectInst *select = dyn_cast<SelectInst>(BDV)) {
+ calculateMeet.meetWith(getStateForInput(select->getTrueValue()));
+ calculateMeet.meetWith(getStateForInput(select->getFalseValue()));
+ } else if (PHINode *Phi = dyn_cast<PHINode>(BDV)) {
+ for (Value *Val : Phi->incoming_values())
+ calculateMeet.meetWith(getStateForInput(Val));
+ } else if (auto *EE = dyn_cast<ExtractElementInst>(BDV)) {
+ // The 'meet' for an extractelement is slightly trivial, but it's still
+ // useful in that it drives us to conflict if our input is.
+ calculateMeet.meetWith(getStateForInput(EE->getVectorOperand()));
+ } else {
+ // Given there's a inherent type mismatch between the operands, will
+ // *always* produce Conflict.
+ auto *IE = cast<InsertElementInst>(BDV);
+ calculateMeet.meetWith(getStateForInput(IE->getOperand(0)));
+ calculateMeet.meetWith(getStateForInput(IE->getOperand(1)));
+ }
+
+ BDVState oldState = States[BDV];
+ BDVState newState = calculateMeet.getResult();
if (oldState != newState) {
progress = true;
- states[v] = newState;
+ States[BDV] = newState;
}
}
- assert(oldSize <= states.size());
- assert(oldSize == states.size() || progress);
+ assert(oldSize == States.size() &&
+ "fixed point shouldn't be adding any new nodes to state");
}
- if (TraceLSP) {
- errs() << "States after meet iteration:\n";
- for (auto Pair : states) {
- Instruction *v = cast<Instruction>(Pair.first);
- PhiState state = Pair.second;
- state.dump();
- v->dump();
- }
+#ifndef NDEBUG
+ DEBUG(dbgs() << "States after meet iteration:\n");
+ for (auto Pair : States) {
+ DEBUG(dbgs() << " " << Pair.second << " for " << *Pair.first << "\n");
}
-
+#endif
+
// Insert Phis for all conflicts
- // We want to keep naming deterministic in the loop that follows, so
- // sort the keys before iteration. This is useful in allowing us to
- // write stable tests. Note that there is no invalidation issue here.
- SmallVector<Value *, 16> Keys;
- Keys.reserve(states.size());
- for (auto Pair : states) {
- Value *V = Pair.first;
- Keys.push_back(V);
- }
- std::sort(Keys.begin(), Keys.end(), order_by_name);
// TODO: adjust naming patterns to avoid this order of iteration dependency
- for (Value *V : Keys) {
- Instruction *v = cast<Instruction>(V);
- PhiState state = states[V];
- assert(!isKnownBaseResult(v) && "why did it get added?");
- assert(!state.isUnknown() && "Optimistic algorithm didn't complete!");
- if (!state.isConflict())
+ for (auto Pair : States) {
+ Instruction *I = cast<Instruction>(Pair.first);
+ BDVState State = Pair.second;
+ assert(!isKnownBaseResult(I) && "why did it get added?");
+ assert(!State.isUnknown() && "Optimistic algorithm didn't complete!");
+
+ // extractelement instructions are a bit special in that we may need to
+ // insert an extract even when we know an exact base for the instruction.
+ // The problem is that we need to convert from a vector base to a scalar
+ // base for the particular indice we're interested in.
+ if (State.isBase() && isa<ExtractElementInst>(I) &&
+ isa<VectorType>(State.getBase()->getType())) {
+ auto *EE = cast<ExtractElementInst>(I);
+ // TODO: In many cases, the new instruction is just EE itself. We should
+ // exploit this, but can't do it here since it would break the invariant
+ // about the BDV not being known to be a base.
+ auto *BaseInst = ExtractElementInst::Create(State.getBase(),
+ EE->getIndexOperand(),
+ "base_ee", EE);
+ BaseInst->setMetadata("is_base_value", MDNode::get(I->getContext(), {}));
+ States[I] = BDVState(BDVState::Base, BaseInst);
+ }
+
+ // Since we're joining a vector and scalar base, they can never be the
+ // same. As a result, we should always see insert element having reached
+ // the conflict state.
+ if (isa<InsertElementInst>(I)) {
+ assert(State.isConflict());
+ }
+
+ if (!State.isConflict())
continue;
- if (isa<PHINode>(v)) {
- int num_preds =
- std::distance(pred_begin(v->getParent()), pred_end(v->getParent()));
- assert(num_preds > 0 && "how did we reach here");
- PHINode *phi = PHINode::Create(v->getType(), num_preds, "base_phi", v);
- // Add metadata marking this as a base value
- auto *const_1 = ConstantInt::get(
- Type::getInt32Ty(
- v->getParent()->getParent()->getParent()->getContext()),
- 1);
- auto MDConst = ConstantAsMetadata::get(const_1);
- MDNode *md = MDNode::get(
- v->getParent()->getParent()->getParent()->getContext(), MDConst);
- phi->setMetadata("is_base_value", md);
- states[v] = PhiState(PhiState::Conflict, phi);
+ /// Create and insert a new instruction which will represent the base of
+ /// the given instruction 'I'.
+ auto MakeBaseInstPlaceholder = [](Instruction *I) -> Instruction* {
+ if (isa<PHINode>(I)) {
+ BasicBlock *BB = I->getParent();
+ int NumPreds = std::distance(pred_begin(BB), pred_end(BB));
+ assert(NumPreds > 0 && "how did we reach here");
+ std::string Name = suffixed_name_or(I, ".base", "base_phi");
+ return PHINode::Create(I->getType(), NumPreds, Name, I);
+ } else if (SelectInst *Sel = dyn_cast<SelectInst>(I)) {
+ // The undef will be replaced later
+ UndefValue *Undef = UndefValue::get(Sel->getType());
+ std::string Name = suffixed_name_or(I, ".base", "base_select");
+ return SelectInst::Create(Sel->getCondition(), Undef,
+ Undef, Name, Sel);
+ } else if (auto *EE = dyn_cast<ExtractElementInst>(I)) {
+ UndefValue *Undef = UndefValue::get(EE->getVectorOperand()->getType());
+ std::string Name = suffixed_name_or(I, ".base", "base_ee");
+ return ExtractElementInst::Create(Undef, EE->getIndexOperand(), Name,
+ EE);
+ } else {
+ auto *IE = cast<InsertElementInst>(I);
+ UndefValue *VecUndef = UndefValue::get(IE->getOperand(0)->getType());
+ UndefValue *ScalarUndef = UndefValue::get(IE->getOperand(1)->getType());
+ std::string Name = suffixed_name_or(I, ".base", "base_ie");
+ return InsertElementInst::Create(VecUndef, ScalarUndef,
+ IE->getOperand(2), Name, IE);
+ }
+
+ };
+ Instruction *BaseInst = MakeBaseInstPlaceholder(I);
+ // Add metadata marking this as a base value
+ BaseInst->setMetadata("is_base_value", MDNode::get(I->getContext(), {}));
+ States[I] = BDVState(BDVState::Conflict, BaseInst);
+ }
+
+ // Returns a instruction which produces the base pointer for a given
+ // instruction. The instruction is assumed to be an input to one of the BDVs
+ // seen in the inference algorithm above. As such, we must either already
+ // know it's base defining value is a base, or have inserted a new
+ // instruction to propagate the base of it's BDV and have entered that newly
+ // introduced instruction into the state table. In either case, we are
+ // assured to be able to determine an instruction which produces it's base
+ // pointer.
+ auto getBaseForInput = [&](Value *Input, Instruction *InsertPt) {
+ Value *BDV = findBaseOrBDV(Input, cache);
+ Value *Base = nullptr;
+ if (isKnownBaseResult(BDV)) {
+ Base = BDV;
} else {
- SelectInst *sel = cast<SelectInst>(v);
- // The undef will be replaced later
- UndefValue *undef = UndefValue::get(sel->getType());
- SelectInst *basesel = SelectInst::Create(sel->getCondition(), undef,
- undef, "base_select", sel);
- // Add metadata marking this as a base value
- auto *const_1 = ConstantInt::get(
- Type::getInt32Ty(
- v->getParent()->getParent()->getParent()->getContext()),
- 1);
- auto MDConst = ConstantAsMetadata::get(const_1);
- MDNode *md = MDNode::get(
- v->getParent()->getParent()->getParent()->getContext(), MDConst);
- basesel->setMetadata("is_base_value", md);
- states[v] = PhiState(PhiState::Conflict, basesel);
+ // Either conflict or base.
+ assert(States.count(BDV));
+ Base = States[BDV].getBase();
}
- }
+ assert(Base && "can't be null");
+ // The cast is needed since base traversal may strip away bitcasts
+ if (Base->getType() != Input->getType() &&
+ InsertPt) {
+ Base = new BitCastInst(Base, Input->getType(), "cast",
+ InsertPt);
+ }
+ return Base;
+ };
- // Fixup all the inputs of the new PHIs
- for (auto Pair : states) {
- Instruction *v = cast<Instruction>(Pair.first);
- PhiState state = Pair.second;
+ // Fixup all the inputs of the new PHIs. Visit order needs to be
+ // deterministic and predictable because we're naming newly created
+ // instructions.
+ for (auto Pair : States) {
+ Instruction *BDV = cast<Instruction>(Pair.first);
+ BDVState State = Pair.second;
- assert(!isKnownBaseResult(v) && "why did it get added?");
- assert(!state.isUnknown() && "Optimistic algorithm didn't complete!");
- if (!state.isConflict())
+ assert(!isKnownBaseResult(BDV) && "why did it get added?");
+ assert(!State.isUnknown() && "Optimistic algorithm didn't complete!");
+ if (!State.isConflict())
continue;
- if (PHINode *basephi = dyn_cast<PHINode>(state.getBase())) {
- PHINode *phi = cast<PHINode>(v);
+ if (PHINode *basephi = dyn_cast<PHINode>(State.getBase())) {
+ PHINode *phi = cast<PHINode>(BDV);
unsigned NumPHIValues = phi->getNumIncomingValues();
for (unsigned i = 0; i < NumPHIValues; i++) {
Value *InVal = phi->getIncomingValue(i);
@@ -906,104 +1002,145 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) {
if (blockIndex != -1) {
Value *oldBase = basephi->getIncomingValue(blockIndex);
basephi->addIncoming(oldBase, InBB);
+
#ifndef NDEBUG
- Value *base = findBaseOrBDV(InVal, cache);
- if (!isKnownBaseResult(base)) {
- // Either conflict or base.
- assert(states.count(base));
- base = states[base].getBase();
- assert(base != nullptr && "unknown PhiState!");
- }
-
- // In essense this assert states: the only way two
+ Value *Base = getBaseForInput(InVal, nullptr);
+ // In essence this assert states: the only way two
// values incoming from the same basic block may be
// different is by being different bitcasts of the same
// value. A cleanup that remains TODO is changing
// findBaseOrBDV to return an llvm::Value of the correct
// type (and still remain pure). This will remove the
// need to add bitcasts.
- assert(base->stripPointerCasts() == oldBase->stripPointerCasts() &&
+ assert(Base->stripPointerCasts() == oldBase->stripPointerCasts() &&
"sanity -- findBaseOrBDV should be pure!");
#endif
continue;
}
- // Find either the defining value for the PHI or the normal base for
- // a non-phi node
- Value *base = findBaseOrBDV(InVal, cache);
- if (!isKnownBaseResult(base)) {
- // Either conflict or base.
- assert(states.count(base));
- base = states[base].getBase();
- assert(base != nullptr && "unknown PhiState!");
- }
- assert(base && "can't be null");
- // Must use original input BB since base may not be Instruction
- // The cast is needed since base traversal may strip away bitcasts
- if (base->getType() != basephi->getType()) {
- base = new BitCastInst(base, basephi->getType(), "cast",
- InBB->getTerminator());
- }
- basephi->addIncoming(base, InBB);
+ // Find the instruction which produces the base for each input. We may
+ // need to insert a bitcast in the incoming block.
+ // TODO: Need to split critical edges if insertion is needed
+ Value *Base = getBaseForInput(InVal, InBB->getTerminator());
+ basephi->addIncoming(Base, InBB);
}
assert(basephi->getNumIncomingValues() == NumPHIValues);
- } else {
- SelectInst *basesel = cast<SelectInst>(state.getBase());
- SelectInst *sel = cast<SelectInst>(v);
+ } else if (SelectInst *BaseSel = dyn_cast<SelectInst>(State.getBase())) {
+ SelectInst *Sel = cast<SelectInst>(BDV);
// Operand 1 & 2 are true, false path respectively. TODO: refactor to
// something more safe and less hacky.
for (int i = 1; i <= 2; i++) {
- Value *InVal = sel->getOperand(i);
- // Find either the defining value for the PHI or the normal base for
- // a non-phi node
- Value *base = findBaseOrBDV(InVal, cache);
- if (!isKnownBaseResult(base)) {
- // Either conflict or base.
- assert(states.count(base));
- base = states[base].getBase();
- assert(base != nullptr && "unknown PhiState!");
- }
- assert(base && "can't be null");
- // Must use original input BB since base may not be Instruction
- // The cast is needed since base traversal may strip away bitcasts
- if (base->getType() != basesel->getType()) {
- base = new BitCastInst(base, basesel->getType(), "cast", basesel);
- }
- basesel->setOperand(i, base);
+ Value *InVal = Sel->getOperand(i);
+ // Find the instruction which produces the base for each input. We may
+ // need to insert a bitcast.
+ Value *Base = getBaseForInput(InVal, BaseSel);
+ BaseSel->setOperand(i, Base);
}
+ } else if (auto *BaseEE = dyn_cast<ExtractElementInst>(State.getBase())) {
+ Value *InVal = cast<ExtractElementInst>(BDV)->getVectorOperand();
+ // Find the instruction which produces the base for each input. We may
+ // need to insert a bitcast.
+ Value *Base = getBaseForInput(InVal, BaseEE);
+ BaseEE->setOperand(0, Base);
+ } else {
+ auto *BaseIE = cast<InsertElementInst>(State.getBase());
+ auto *BdvIE = cast<InsertElementInst>(BDV);
+ auto UpdateOperand = [&](int OperandIdx) {
+ Value *InVal = BdvIE->getOperand(OperandIdx);
+ Value *Base = getBaseForInput(InVal, BaseIE);
+ BaseIE->setOperand(OperandIdx, Base);
+ };
+ UpdateOperand(0); // vector operand
+ UpdateOperand(1); // scalar operand
+ }
+
+ }
+
+ // Now that we're done with the algorithm, see if we can optimize the
+ // results slightly by reducing the number of new instructions needed.
+ // Arguably, this should be integrated into the algorithm above, but
+ // doing as a post process step is easier to reason about for the moment.
+ DenseMap<Value *, Value *> ReverseMap;
+ SmallPtrSet<Instruction *, 16> NewInsts;
+ SmallSetVector<AssertingVH<Instruction>, 16> Worklist;
+ // Note: We need to visit the states in a deterministic order. We uses the
+ // Keys we sorted above for this purpose. Note that we are papering over a
+ // bigger problem with the algorithm above - it's visit order is not
+ // deterministic. A larger change is needed to fix this.
+ for (auto Pair : States) {
+ auto *BDV = Pair.first;
+ auto State = Pair.second;
+ Value *Base = State.getBase();
+ assert(BDV && Base);
+ assert(!isKnownBaseResult(BDV) && "why did it get added?");
+ assert(isKnownBaseResult(Base) &&
+ "must be something we 'know' is a base pointer");
+ if (!State.isConflict())
+ continue;
+
+ ReverseMap[Base] = BDV;
+ if (auto *BaseI = dyn_cast<Instruction>(Base)) {
+ NewInsts.insert(BaseI);
+ Worklist.insert(BaseI);
+ }
+ }
+ auto ReplaceBaseInstWith = [&](Value *BDV, Instruction *BaseI,
+ Value *Replacement) {
+ // Add users which are new instructions (excluding self references)
+ for (User *U : BaseI->users())
+ if (auto *UI = dyn_cast<Instruction>(U))
+ if (NewInsts.count(UI) && UI != BaseI)
+ Worklist.insert(UI);
+ // Then do the actual replacement
+ NewInsts.erase(BaseI);
+ ReverseMap.erase(BaseI);
+ BaseI->replaceAllUsesWith(Replacement);
+ assert(States.count(BDV));
+ assert(States[BDV].isConflict() && States[BDV].getBase() == BaseI);
+ States[BDV] = BDVState(BDVState::Conflict, Replacement);
+ BaseI->eraseFromParent();
+ };
+ const DataLayout &DL = cast<Instruction>(def)->getModule()->getDataLayout();
+ while (!Worklist.empty()) {
+ Instruction *BaseI = Worklist.pop_back_val();
+ assert(NewInsts.count(BaseI));
+ Value *Bdv = ReverseMap[BaseI];
+ if (auto *BdvI = dyn_cast<Instruction>(Bdv))
+ if (BaseI->isIdenticalTo(BdvI)) {
+ DEBUG(dbgs() << "Identical Base: " << *BaseI << "\n");
+ ReplaceBaseInstWith(Bdv, BaseI, Bdv);
+ continue;
+ }
+ if (Value *V = SimplifyInstruction(BaseI, DL)) {
+ DEBUG(dbgs() << "Base " << *BaseI << " simplified to " << *V << "\n");
+ ReplaceBaseInstWith(Bdv, BaseI, V);
+ continue;
}
}
// Cache all of our results so we can cheaply reuse them
// NOTE: This is actually two caches: one of the base defining value
// relation and one of the base pointer relation! FIXME
- for (auto item : states) {
- Value *v = item.first;
- Value *base = item.second.getBase();
- assert(v && base);
- assert(!isKnownBaseResult(v) && "why did it get added?");
-
- if (TraceLSP) {
- std::string fromstr =
- cache.count(v) ? (cache[v]->hasName() ? cache[v]->getName() : "")
- : "none";
- errs() << "Updating base value cache"
- << " for: " << (v->hasName() ? v->getName() : "")
- << " from: " << fromstr
- << " to: " << (base->hasName() ? base->getName() : "") << "\n";
- }
-
- assert(isKnownBaseResult(base) &&
- "must be something we 'know' is a base pointer");
- if (cache.count(v)) {
+ for (auto Pair : States) {
+ auto *BDV = Pair.first;
+ Value *base = Pair.second.getBase();
+ assert(BDV && base);
+
+ std::string fromstr = cache.count(BDV) ? cache[BDV]->getName() : "none";
+ DEBUG(dbgs() << "Updating base value cache"
+ << " for: " << BDV->getName()
+ << " from: " << fromstr
+ << " to: " << base->getName() << "\n");
+
+ if (cache.count(BDV)) {
// Once we transition from the BDV relation being store in the cache to
// the base relation being stored, it must be stable
- assert((!isKnownBaseResult(cache[v]) || cache[v] == base) &&
+ assert((!isKnownBaseResult(cache[BDV]) || cache[BDV] == base) &&
"base relation should be stable");
}
- cache[v] = base;
+ cache[BDV] = base;
}
- assert(cache.find(def) != cache.end());
+ assert(cache.count(def));
return cache[def];
}
@@ -1024,7 +1161,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) {
// pointer was a base pointer.
static void
findBasePointers(const StatepointLiveSetTy &live,
- DenseMap<llvm::Value *, llvm::Value *> &PointerToBase,
+ DenseMap<Value *, Value *> &PointerToBase,
DominatorTree *DT, DefiningValueMapTy &DVCache) {
// For the naming of values inserted to be deterministic - which makes for
// much cleaner and more stable tests - we need to assign an order to the
@@ -1043,7 +1180,7 @@ findBasePointers(const StatepointLiveSetTy &live,
// If you see this trip and like to live really dangerously, the code should
// be correct, just with idioms the verifier can't handle. You can try
- // disabling the verifier at your own substaintial risk.
+ // disabling the verifier at your own substantial risk.
assert(!isa<ConstantPointerNull>(base) &&
"the relocation code needs adjustment to handle the relocation of "
"a null pointer constant without causing false positives in the "
@@ -1056,8 +1193,8 @@ findBasePointers(const StatepointLiveSetTy &live,
static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache,
const CallSite &CS,
PartiallyConstructedSafepointRecord &result) {
- DenseMap<llvm::Value *, llvm::Value *> PointerToBase;
- findBasePointers(result.liveset, PointerToBase, &DT, DVCache);
+ DenseMap<Value *, Value *> PointerToBase;
+ findBasePointers(result.LiveSet, PointerToBase, &DT, DVCache);
if (PrintBasePointers) {
// Note: Need to print these in a stable order since this is checked in
@@ -1071,8 +1208,11 @@ static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache,
std::sort(Temp.begin(), Temp.end(), order_by_name);
for (Value *Ptr : Temp) {
Value *Base = PointerToBase[Ptr];
- errs() << " derived %" << Ptr->getName() << " base %" << Base->getName()
- << "\n";
+ errs() << " derived ";
+ Ptr->printAsOperand(errs(), false);
+ errs() << " base ";
+ Base->printAsOperand(errs(), false);
+ errs() << "\n";;
}
}
@@ -1086,10 +1226,10 @@ static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
PartiallyConstructedSafepointRecord &result);
static void recomputeLiveInValues(
- Function &F, DominatorTree &DT, Pass *P, ArrayRef<CallSite> toUpdate,
+ Function &F, DominatorTree &DT, ArrayRef<CallSite> toUpdate,
MutableArrayRef<struct PartiallyConstructedSafepointRecord> records) {
// TODO-PERF: reuse the original liveness, then simply run the dataflow
- // again. The old values are still live and will help it stablize quickly.
+ // again. The old values are still live and will help it stabilize quickly.
GCPtrLivenessData RevisedLivenessData;
computeLiveInValues(DT, F, RevisedLivenessData);
for (size_t i = 0; i < records.size(); i++) {
@@ -1099,69 +1239,66 @@ static void recomputeLiveInValues(
}
}
-// When inserting gc.relocate calls, we need to ensure there are no uses
-// of the original value between the gc.statepoint and the gc.relocate call.
-// One case which can arise is a phi node starting one of the successor blocks.
-// We also need to be able to insert the gc.relocates only on the path which
-// goes through the statepoint. We might need to split an edge to make this
-// possible.
+// When inserting gc.relocate and gc.result calls, we need to ensure there are
+// no uses of the original value / return value between the gc.statepoint and
+// the gc.relocate / gc.result call. One case which can arise is a phi node
+// starting one of the successor blocks. We also need to be able to insert the
+// gc.relocates only on the path which goes through the statepoint. We might
+// need to split an edge to make this possible.
static BasicBlock *
normalizeForInvokeSafepoint(BasicBlock *BB, BasicBlock *InvokeParent,
DominatorTree &DT) {
BasicBlock *Ret = BB;
- if (!BB->getUniquePredecessor()) {
- Ret = SplitBlockPredecessors(BB, InvokeParent, "", nullptr, &DT);
- }
+ if (!BB->getUniquePredecessor())
+ Ret = SplitBlockPredecessors(BB, InvokeParent, "", &DT);
- // Now that 'ret' has unique predecessor we can safely remove all phi nodes
+ // Now that 'Ret' has unique predecessor we can safely remove all phi nodes
// from it
FoldSingleEntryPHINodes(Ret);
- assert(!isa<PHINode>(Ret->begin()));
+ assert(!isa<PHINode>(Ret->begin()) &&
+ "All PHI nodes should have been removed!");
- // At this point, we can safely insert a gc.relocate as the first instruction
- // in Ret if needed.
+ // At this point, we can safely insert a gc.relocate or gc.result as the first
+ // instruction in Ret if needed.
return Ret;
}
-static int find_index(ArrayRef<Value *> livevec, Value *val) {
- auto itr = std::find(livevec.begin(), livevec.end(), val);
- assert(livevec.end() != itr);
- size_t index = std::distance(livevec.begin(), itr);
- assert(index < livevec.size());
- return index;
-}
-
-// Create new attribute set containing only attributes which can be transfered
+// Create new attribute set containing only attributes which can be transferred
// from original call to the safepoint.
static AttributeSet legalizeCallAttributes(AttributeSet AS) {
- AttributeSet ret;
+ AttributeSet Ret;
for (unsigned Slot = 0; Slot < AS.getNumSlots(); Slot++) {
- unsigned index = AS.getSlotIndex(Slot);
+ unsigned Index = AS.getSlotIndex(Slot);
- if (index == AttributeSet::ReturnIndex ||
- index == AttributeSet::FunctionIndex) {
+ if (Index == AttributeSet::ReturnIndex ||
+ Index == AttributeSet::FunctionIndex) {
- for (auto it = AS.begin(Slot), it_end = AS.end(Slot); it != it_end;
- ++it) {
- Attribute attr = *it;
+ for (Attribute Attr : make_range(AS.begin(Slot), AS.end(Slot))) {
// Do not allow certain attributes - just skip them
// Safepoint can not be read only or read none.
- if (attr.hasAttribute(Attribute::ReadNone) ||
- attr.hasAttribute(Attribute::ReadOnly))
+ if (Attr.hasAttribute(Attribute::ReadNone) ||
+ Attr.hasAttribute(Attribute::ReadOnly))
+ continue;
+
+ // These attributes control the generation of the gc.statepoint call /
+ // invoke itself; and once the gc.statepoint is in place, they're of no
+ // use.
+ if (Attr.hasAttribute("statepoint-num-patch-bytes") ||
+ Attr.hasAttribute("statepoint-id"))
continue;
- ret = ret.addAttributes(
- AS.getContext(), index,
- AttributeSet::get(AS.getContext(), index, AttrBuilder(attr)));
+ Ret = Ret.addAttributes(
+ AS.getContext(), Index,
+ AttributeSet::get(AS.getContext(), Index, AttrBuilder(Attr)));
}
}
// Just skip parameter attributes for now
}
- return ret;
+ return Ret;
}
/// Helper function to place all gc relocates necessary for the given
@@ -1173,225 +1310,290 @@ static AttributeSet legalizeCallAttributes(AttributeSet AS) {
/// statepointToken - statepoint instruction to which relocates should be
/// bound.
/// Builder - Llvm IR builder to be used to construct new calls.
-static void CreateGCRelocates(ArrayRef<llvm::Value *> LiveVariables,
+static void CreateGCRelocates(ArrayRef<Value *> LiveVariables,
const int LiveStart,
- ArrayRef<llvm::Value *> BasePtrs,
+ ArrayRef<Value *> BasePtrs,
Instruction *StatepointToken,
IRBuilder<> Builder) {
- SmallVector<Instruction *, 64> NewDefs;
- NewDefs.reserve(LiveVariables.size());
+ if (LiveVariables.empty())
+ return;
- Module *M = StatepointToken->getParent()->getParent()->getParent();
+ auto FindIndex = [](ArrayRef<Value *> LiveVec, Value *Val) {
+ auto ValIt = std::find(LiveVec.begin(), LiveVec.end(), Val);
+ assert(ValIt != LiveVec.end() && "Val not found in LiveVec!");
+ size_t Index = std::distance(LiveVec.begin(), ValIt);
+ assert(Index < LiveVec.size() && "Bug in std::find?");
+ return Index;
+ };
- for (unsigned i = 0; i < LiveVariables.size(); i++) {
- // We generate a (potentially) unique declaration for every pointer type
- // combination. This results is some blow up the function declarations in
- // the IR, but removes the need for argument bitcasts which shrinks the IR
- // greatly and makes it much more readable.
- SmallVector<Type *, 1> Types; // one per 'any' type
- // All gc_relocate are set to i8 addrspace(1)* type. This could help avoid
- // cases where the actual value's type mangling is not supported by llvm. A
- // bitcast is added later to convert gc_relocate to the actual value's type.
- Types.push_back(Type::getInt8PtrTy(M->getContext(), 1));
- Value *GCRelocateDecl = Intrinsic::getDeclaration(
- M, Intrinsic::experimental_gc_relocate, Types);
+ // All gc_relocate are set to i8 addrspace(1)* type. We originally generated
+ // unique declarations for each pointer type, but this proved problematic
+ // because the intrinsic mangling code is incomplete and fragile. Since
+ // we're moving towards a single unified pointer type anyways, we can just
+ // cast everything to an i8* of the right address space. A bitcast is added
+ // later to convert gc_relocate to the actual value's type.
+ Module *M = StatepointToken->getModule();
+ auto AS = cast<PointerType>(LiveVariables[0]->getType())->getAddressSpace();
+ Type *Types[] = {Type::getInt8PtrTy(M->getContext(), AS)};
+ Value *GCRelocateDecl =
+ Intrinsic::getDeclaration(M, Intrinsic::experimental_gc_relocate, Types);
+ for (unsigned i = 0; i < LiveVariables.size(); i++) {
// Generate the gc.relocate call and save the result
Value *BaseIdx =
- ConstantInt::get(Type::getInt32Ty(M->getContext()),
- LiveStart + find_index(LiveVariables, BasePtrs[i]));
- Value *LiveIdx = ConstantInt::get(
- Type::getInt32Ty(M->getContext()),
- LiveStart + find_index(LiveVariables, LiveVariables[i]));
+ Builder.getInt32(LiveStart + FindIndex(LiveVariables, BasePtrs[i]));
+ Value *LiveIdx = Builder.getInt32(LiveStart + i);
// only specify a debug name if we can give a useful one
- Value *Reloc = Builder.CreateCall(
+ CallInst *Reloc = Builder.CreateCall(
GCRelocateDecl, {StatepointToken, BaseIdx, LiveIdx},
- LiveVariables[i]->hasName() ? LiveVariables[i]->getName() + ".relocated"
- : "");
+ suffixed_name_or(LiveVariables[i], ".relocated", ""));
// Trick CodeGen into thinking there are lots of free registers at this
// fake call.
- cast<CallInst>(Reloc)->setCallingConv(CallingConv::Cold);
+ Reloc->setCallingConv(CallingConv::Cold);
+ }
+}
- NewDefs.push_back(cast<Instruction>(Reloc));
+namespace {
+
+/// This struct is used to defer RAUWs and `eraseFromParent` s. Using this
+/// avoids having to worry about keeping around dangling pointers to Values.
+class DeferredReplacement {
+ AssertingVH<Instruction> Old;
+ AssertingVH<Instruction> New;
+
+public:
+ explicit DeferredReplacement(Instruction *Old, Instruction *New) :
+ Old(Old), New(New) {
+ assert(Old != New && "Not allowed!");
}
- assert(NewDefs.size() == LiveVariables.size() &&
- "missing or extra redefinition at safepoint");
+
+ /// Does the task represented by this instance.
+ void doReplacement() {
+ Instruction *OldI = Old;
+ Instruction *NewI = New;
+
+ assert(OldI != NewI && "Disallowed at construction?!");
+
+ Old = nullptr;
+ New = nullptr;
+
+ if (NewI)
+ OldI->replaceAllUsesWith(NewI);
+ OldI->eraseFromParent();
+ }
+};
}
static void
-makeStatepointExplicitImpl(const CallSite &CS, /* to replace */
- const SmallVectorImpl<llvm::Value *> &basePtrs,
- const SmallVectorImpl<llvm::Value *> &liveVariables,
- Pass *P,
- PartiallyConstructedSafepointRecord &result) {
- assert(basePtrs.size() == liveVariables.size());
- assert(isStatepoint(CS) &&
+makeStatepointExplicitImpl(const CallSite CS, /* to replace */
+ const SmallVectorImpl<Value *> &BasePtrs,
+ const SmallVectorImpl<Value *> &LiveVariables,
+ PartiallyConstructedSafepointRecord &Result,
+ std::vector<DeferredReplacement> &Replacements) {
+ assert(BasePtrs.size() == LiveVariables.size());
+ assert((UseDeoptBundles || isStatepoint(CS)) &&
"This method expects to be rewriting a statepoint");
- BasicBlock *BB = CS.getInstruction()->getParent();
- assert(BB);
- Function *F = BB->getParent();
- assert(F && "must be set");
- Module *M = F->getParent();
- (void)M;
- assert(M && "must be set");
-
- // We're not changing the function signature of the statepoint since the gc
- // arguments go into the var args section.
- Function *gc_statepoint_decl = CS.getCalledFunction();
-
// Then go ahead and use the builder do actually do the inserts. We insert
// immediately before the previous instruction under the assumption that all
// arguments will be available here. We can't insert afterwards since we may
// be replacing a terminator.
- Instruction *insertBefore = CS.getInstruction();
- IRBuilder<> Builder(insertBefore);
- // Copy all of the arguments from the original statepoint - this includes the
- // target, call args, and deopt args
- SmallVector<llvm::Value *, 64> args;
- args.insert(args.end(), CS.arg_begin(), CS.arg_end());
- // TODO: Clear the 'needs rewrite' flag
-
- // add all the pointers to be relocated (gc arguments)
- // Capture the start of the live variable list for use in the gc_relocates
- const int live_start = args.size();
- args.insert(args.end(), liveVariables.begin(), liveVariables.end());
+ Instruction *InsertBefore = CS.getInstruction();
+ IRBuilder<> Builder(InsertBefore);
+
+ ArrayRef<Value *> GCArgs(LiveVariables);
+ uint64_t StatepointID = 0xABCDEF00;
+ uint32_t NumPatchBytes = 0;
+ uint32_t Flags = uint32_t(StatepointFlags::None);
+
+ ArrayRef<Use> CallArgs;
+ ArrayRef<Use> DeoptArgs;
+ ArrayRef<Use> TransitionArgs;
+
+ Value *CallTarget = nullptr;
+
+ if (UseDeoptBundles) {
+ CallArgs = {CS.arg_begin(), CS.arg_end()};
+ DeoptArgs = GetDeoptBundleOperands(CS);
+ // TODO: we don't fill in TransitionArgs or Flags in this branch, but we
+ // could have an operand bundle for that too.
+ AttributeSet OriginalAttrs = CS.getAttributes();
+
+ Attribute AttrID = OriginalAttrs.getAttribute(AttributeSet::FunctionIndex,
+ "statepoint-id");
+ if (AttrID.isStringAttribute())
+ AttrID.getValueAsString().getAsInteger(10, StatepointID);
+
+ Attribute AttrNumPatchBytes = OriginalAttrs.getAttribute(
+ AttributeSet::FunctionIndex, "statepoint-num-patch-bytes");
+ if (AttrNumPatchBytes.isStringAttribute())
+ AttrNumPatchBytes.getValueAsString().getAsInteger(10, NumPatchBytes);
+
+ CallTarget = CS.getCalledValue();
+ } else {
+ // This branch will be gone soon, and we will soon only support the
+ // UseDeoptBundles == true configuration.
+ Statepoint OldSP(CS);
+ StatepointID = OldSP.getID();
+ NumPatchBytes = OldSP.getNumPatchBytes();
+ Flags = OldSP.getFlags();
+
+ CallArgs = {OldSP.arg_begin(), OldSP.arg_end()};
+ DeoptArgs = {OldSP.vm_state_begin(), OldSP.vm_state_end()};
+ TransitionArgs = {OldSP.gc_transition_args_begin(),
+ OldSP.gc_transition_args_end()};
+ CallTarget = OldSP.getCalledValue();
+ }
// Create the statepoint given all the arguments
- Instruction *token = nullptr;
- AttributeSet return_attributes;
+ Instruction *Token = nullptr;
+ AttributeSet ReturnAttrs;
if (CS.isCall()) {
- CallInst *toReplace = cast<CallInst>(CS.getInstruction());
- CallInst *call =
- Builder.CreateCall(gc_statepoint_decl, args, "safepoint_token");
- call->setTailCall(toReplace->isTailCall());
- call->setCallingConv(toReplace->getCallingConv());
+ CallInst *ToReplace = cast<CallInst>(CS.getInstruction());
+ CallInst *Call = Builder.CreateGCStatepointCall(
+ StatepointID, NumPatchBytes, CallTarget, Flags, CallArgs,
+ TransitionArgs, DeoptArgs, GCArgs, "safepoint_token");
+
+ Call->setTailCall(ToReplace->isTailCall());
+ Call->setCallingConv(ToReplace->getCallingConv());
// Currently we will fail on parameter attributes and on certain
// function attributes.
- AttributeSet new_attrs = legalizeCallAttributes(toReplace->getAttributes());
- // In case if we can handle this set of sttributes - set up function attrs
+ AttributeSet NewAttrs = legalizeCallAttributes(ToReplace->getAttributes());
+ // In case if we can handle this set of attributes - set up function attrs
// directly on statepoint and return attrs later for gc_result intrinsic.
- call->setAttributes(new_attrs.getFnAttributes());
- return_attributes = new_attrs.getRetAttributes();
+ Call->setAttributes(NewAttrs.getFnAttributes());
+ ReturnAttrs = NewAttrs.getRetAttributes();
- token = call;
+ Token = Call;
// Put the following gc_result and gc_relocate calls immediately after the
// the old call (which we're about to delete)
- BasicBlock::iterator next(toReplace);
- assert(BB->end() != next && "not a terminator, must have next");
- next++;
- Instruction *IP = &*(next);
- Builder.SetInsertPoint(IP);
- Builder.SetCurrentDebugLocation(IP->getDebugLoc());
-
+ assert(ToReplace->getNextNode() && "Not a terminator, must have next!");
+ Builder.SetInsertPoint(ToReplace->getNextNode());
+ Builder.SetCurrentDebugLocation(ToReplace->getNextNode()->getDebugLoc());
} else {
- InvokeInst *toReplace = cast<InvokeInst>(CS.getInstruction());
+ InvokeInst *ToReplace = cast<InvokeInst>(CS.getInstruction());
// Insert the new invoke into the old block. We'll remove the old one in a
// moment at which point this will become the new terminator for the
// original block.
- InvokeInst *invoke = InvokeInst::Create(
- gc_statepoint_decl, toReplace->getNormalDest(),
- toReplace->getUnwindDest(), args, "", toReplace->getParent());
- invoke->setCallingConv(toReplace->getCallingConv());
+ InvokeInst *Invoke = Builder.CreateGCStatepointInvoke(
+ StatepointID, NumPatchBytes, CallTarget, ToReplace->getNormalDest(),
+ ToReplace->getUnwindDest(), Flags, CallArgs, TransitionArgs, DeoptArgs,
+ GCArgs, "statepoint_token");
+
+ Invoke->setCallingConv(ToReplace->getCallingConv());
// Currently we will fail on parameter attributes and on certain
// function attributes.
- AttributeSet new_attrs = legalizeCallAttributes(toReplace->getAttributes());
- // In case if we can handle this set of sttributes - set up function attrs
+ AttributeSet NewAttrs = legalizeCallAttributes(ToReplace->getAttributes());
+ // In case if we can handle this set of attributes - set up function attrs
// directly on statepoint and return attrs later for gc_result intrinsic.
- invoke->setAttributes(new_attrs.getFnAttributes());
- return_attributes = new_attrs.getRetAttributes();
+ Invoke->setAttributes(NewAttrs.getFnAttributes());
+ ReturnAttrs = NewAttrs.getRetAttributes();
- token = invoke;
+ Token = Invoke;
// Generate gc relocates in exceptional path
- BasicBlock *unwindBlock = toReplace->getUnwindDest();
- assert(!isa<PHINode>(unwindBlock->begin()) &&
- unwindBlock->getUniquePredecessor() &&
+ BasicBlock *UnwindBlock = ToReplace->getUnwindDest();
+ assert(!isa<PHINode>(UnwindBlock->begin()) &&
+ UnwindBlock->getUniquePredecessor() &&
"can't safely insert in this block!");
- Instruction *IP = &*(unwindBlock->getFirstInsertionPt());
- Builder.SetInsertPoint(IP);
- Builder.SetCurrentDebugLocation(toReplace->getDebugLoc());
+ Builder.SetInsertPoint(&*UnwindBlock->getFirstInsertionPt());
+ Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
- // Extract second element from landingpad return value. We will attach
- // exceptional gc relocates to it.
- const unsigned idx = 1;
- Instruction *exceptional_token =
- cast<Instruction>(Builder.CreateExtractValue(
- unwindBlock->getLandingPadInst(), idx, "relocate_token"));
- result.UnwindToken = exceptional_token;
+ // Attach exceptional gc relocates to the landingpad.
+ Instruction *ExceptionalToken = UnwindBlock->getLandingPadInst();
+ Result.UnwindToken = ExceptionalToken;
- // Just throw away return value. We will use the one we got for normal
- // block.
- (void)CreateGCRelocates(liveVariables, live_start, basePtrs,
- exceptional_token, Builder);
+ const unsigned LiveStartIdx = Statepoint(Token).gcArgsStartIdx();
+ CreateGCRelocates(LiveVariables, LiveStartIdx, BasePtrs, ExceptionalToken,
+ Builder);
// Generate gc relocates and returns for normal block
- BasicBlock *normalDest = toReplace->getNormalDest();
- assert(!isa<PHINode>(normalDest->begin()) &&
- normalDest->getUniquePredecessor() &&
+ BasicBlock *NormalDest = ToReplace->getNormalDest();
+ assert(!isa<PHINode>(NormalDest->begin()) &&
+ NormalDest->getUniquePredecessor() &&
"can't safely insert in this block!");
- IP = &*(normalDest->getFirstInsertionPt());
- Builder.SetInsertPoint(IP);
+ Builder.SetInsertPoint(&*NormalDest->getFirstInsertionPt());
// gc relocates will be generated later as if it were regular call
// statepoint
}
- assert(token);
-
- // Take the name of the original value call if it had one.
- token->takeName(CS.getInstruction());
+ assert(Token && "Should be set in one of the above branches!");
+
+ if (UseDeoptBundles) {
+ Token->setName("statepoint_token");
+ if (!CS.getType()->isVoidTy() && !CS.getInstruction()->use_empty()) {
+ StringRef Name =
+ CS.getInstruction()->hasName() ? CS.getInstruction()->getName() : "";
+ CallInst *GCResult = Builder.CreateGCResult(Token, CS.getType(), Name);
+ GCResult->setAttributes(CS.getAttributes().getRetAttributes());
+
+ // We cannot RAUW or delete CS.getInstruction() because it could be in the
+ // live set of some other safepoint, in which case that safepoint's
+ // PartiallyConstructedSafepointRecord will hold a raw pointer to this
+ // llvm::Instruction. Instead, we defer the replacement and deletion to
+ // after the live sets have been made explicit in the IR, and we no longer
+ // have raw pointers to worry about.
+ Replacements.emplace_back(CS.getInstruction(), GCResult);
+ } else {
+ Replacements.emplace_back(CS.getInstruction(), nullptr);
+ }
+ } else {
+ assert(!CS.getInstruction()->hasNUsesOrMore(2) &&
+ "only valid use before rewrite is gc.result");
+ assert(!CS.getInstruction()->hasOneUse() ||
+ isGCResult(cast<Instruction>(*CS.getInstruction()->user_begin())));
-// The GCResult is already inserted, we just need to find it
-#ifndef NDEBUG
- Instruction *toReplace = CS.getInstruction();
- assert((toReplace->hasNUses(0) || toReplace->hasNUses(1)) &&
- "only valid use before rewrite is gc.result");
- assert(!toReplace->hasOneUse() ||
- isGCResult(cast<Instruction>(*toReplace->user_begin())));
-#endif
+ // Take the name of the original statepoint token if there was one.
+ Token->takeName(CS.getInstruction());
- // Update the gc.result of the original statepoint (if any) to use the newly
- // inserted statepoint. This is safe to do here since the token can't be
- // considered a live reference.
- CS.getInstruction()->replaceAllUsesWith(token);
+ // Update the gc.result of the original statepoint (if any) to use the newly
+ // inserted statepoint. This is safe to do here since the token can't be
+ // considered a live reference.
+ CS.getInstruction()->replaceAllUsesWith(Token);
+ CS.getInstruction()->eraseFromParent();
+ }
- result.StatepointToken = token;
+ Result.StatepointToken = Token;
// Second, create a gc.relocate for every live variable
- CreateGCRelocates(liveVariables, live_start, basePtrs, token, Builder);
+ const unsigned LiveStartIdx = Statepoint(Token).gcArgsStartIdx();
+ CreateGCRelocates(LiveVariables, LiveStartIdx, BasePtrs, Token, Builder);
}
namespace {
-struct name_ordering {
- Value *base;
- Value *derived;
- bool operator()(name_ordering const &a, name_ordering const &b) {
- return -1 == a.derived->getName().compare(b.derived->getName());
+struct NameOrdering {
+ Value *Base;
+ Value *Derived;
+
+ bool operator()(NameOrdering const &a, NameOrdering const &b) {
+ return -1 == a.Derived->getName().compare(b.Derived->getName());
}
};
}
-static void stablize_order(SmallVectorImpl<Value *> &basevec,
- SmallVectorImpl<Value *> &livevec) {
- assert(basevec.size() == livevec.size());
-
- SmallVector<name_ordering, 64> temp;
- for (size_t i = 0; i < basevec.size(); i++) {
- name_ordering v;
- v.base = basevec[i];
- v.derived = livevec[i];
- temp.push_back(v);
- }
- std::sort(temp.begin(), temp.end(), name_ordering());
- for (size_t i = 0; i < basevec.size(); i++) {
- basevec[i] = temp[i].base;
- livevec[i] = temp[i].derived;
+
+static void StabilizeOrder(SmallVectorImpl<Value *> &BaseVec,
+ SmallVectorImpl<Value *> &LiveVec) {
+ assert(BaseVec.size() == LiveVec.size());
+
+ SmallVector<NameOrdering, 64> Temp;
+ for (size_t i = 0; i < BaseVec.size(); i++) {
+ NameOrdering v;
+ v.Base = BaseVec[i];
+ v.Derived = LiveVec[i];
+ Temp.push_back(v);
+ }
+
+ std::sort(Temp.begin(), Temp.end(), NameOrdering());
+ for (size_t i = 0; i < BaseVec.size(); i++) {
+ BaseVec[i] = Temp[i].Base;
+ LiveVec[i] = Temp[i].Derived;
}
}
@@ -1401,40 +1603,39 @@ static void stablize_order(SmallVectorImpl<Value *> &basevec,
// WARNING: Does not do any fixup to adjust users of the original live
// values. That's the callers responsibility.
static void
-makeStatepointExplicit(DominatorTree &DT, const CallSite &CS, Pass *P,
- PartiallyConstructedSafepointRecord &result) {
- auto liveset = result.liveset;
- auto PointerToBase = result.PointerToBase;
+makeStatepointExplicit(DominatorTree &DT, const CallSite &CS,
+ PartiallyConstructedSafepointRecord &Result,
+ std::vector<DeferredReplacement> &Replacements) {
+ const auto &LiveSet = Result.LiveSet;
+ const auto &PointerToBase = Result.PointerToBase;
// Convert to vector for efficient cross referencing.
- SmallVector<Value *, 64> basevec, livevec;
- livevec.reserve(liveset.size());
- basevec.reserve(liveset.size());
- for (Value *L : liveset) {
- livevec.push_back(L);
-
- assert(PointerToBase.find(L) != PointerToBase.end());
- Value *base = PointerToBase[L];
- basevec.push_back(base);
+ SmallVector<Value *, 64> BaseVec, LiveVec;
+ LiveVec.reserve(LiveSet.size());
+ BaseVec.reserve(LiveSet.size());
+ for (Value *L : LiveSet) {
+ LiveVec.push_back(L);
+ assert(PointerToBase.count(L));
+ Value *Base = PointerToBase.find(L)->second;
+ BaseVec.push_back(Base);
}
- assert(livevec.size() == basevec.size());
+ assert(LiveVec.size() == BaseVec.size());
// To make the output IR slightly more stable (for use in diffs), ensure a
// fixed order of the values in the safepoint (by sorting the value name).
// The order is otherwise meaningless.
- stablize_order(basevec, livevec);
+ StabilizeOrder(BaseVec, LiveVec);
// Do the actual rewriting and delete the old statepoint
- makeStatepointExplicitImpl(CS, basevec, livevec, P, result);
- CS.getInstruction()->eraseFromParent();
+ makeStatepointExplicitImpl(CS, BaseVec, LiveVec, Result, Replacements);
}
// Helper function for the relocationViaAlloca.
-// It receives iterator to the statepoint gc relocates and emits store to the
-// assigned
-// location (via allocaMap) for the each one of them.
-// Add visited values into the visitedLiveValues set we will later use them
-// for sanity check.
+//
+// It receives iterator to the statepoint gc relocates and emits a store to the
+// assigned location (via allocaMap) for the each one of them. It adds the
+// visited values into the visitedLiveValues set, which we will later use them
+// for sanity checking.
static void
insertRelocationStores(iterator_range<Value::user_iterator> GCRelocs,
DenseMap<Value *, Value *> &AllocaMap,
@@ -1459,13 +1660,15 @@ insertRelocationStores(iterator_range<Value::user_iterator> GCRelocs,
Value *Alloca = AllocaMap[OriginalValue];
// Emit store into the related alloca
- // All gc_relocate are i8 addrspace(1)* typed, and it must be bitcasted to
+ // All gc_relocates are i8 addrspace(1)* typed, and it must be bitcasted to
// the correct type according to alloca.
- assert(RelocatedValue->getNextNode() && "Should always have one since it's not a terminator");
+ assert(RelocatedValue->getNextNode() &&
+ "Should always have one since it's not a terminator");
IRBuilder<> Builder(RelocatedValue->getNextNode());
Value *CastedRelocatedValue =
- Builder.CreateBitCast(RelocatedValue, cast<AllocaInst>(Alloca)->getAllocatedType(),
- RelocatedValue->hasName() ? RelocatedValue->getName() + ".casted" : "");
+ Builder.CreateBitCast(RelocatedValue,
+ cast<AllocaInst>(Alloca)->getAllocatedType(),
+ suffixed_name_or(RelocatedValue, ".casted", ""));
StoreInst *Store = new StoreInst(CastedRelocatedValue, Alloca);
Store->insertAfter(cast<Instruction>(CastedRelocatedValue));
@@ -1501,10 +1704,10 @@ insertRematerializationStores(
}
}
-/// do all the relocation update via allocas and mem2reg
+/// Do all the relocation update via allocas and mem2reg
static void relocationViaAlloca(
Function &F, DominatorTree &DT, ArrayRef<Value *> Live,
- ArrayRef<struct PartiallyConstructedSafepointRecord> Records) {
+ ArrayRef<PartiallyConstructedSafepointRecord> Records) {
#ifndef NDEBUG
// record initial number of (static) allocas; we'll check we have the same
// number when we get done.
@@ -1531,15 +1734,12 @@ static void relocationViaAlloca(
PromotableAllocas.push_back(Alloca);
};
- // emit alloca for each live gc pointer
- for (unsigned i = 0; i < Live.size(); i++) {
- emitAllocaFor(Live[i]);
- }
-
- // emit allocas for rematerialized values
- for (size_t i = 0; i < Records.size(); i++) {
- const struct PartiallyConstructedSafepointRecord &Info = Records[i];
+ // Emit alloca for each live gc pointer
+ for (Value *V : Live)
+ emitAllocaFor(V);
+ // Emit allocas for rematerialized values
+ for (const auto &Info : Records)
for (auto RematerializedValuePair : Info.RematerializedValues) {
Value *OriginalValue = RematerializedValuePair.second;
if (AllocaMap.count(OriginalValue) != 0)
@@ -1548,20 +1748,17 @@ static void relocationViaAlloca(
emitAllocaFor(OriginalValue);
++NumRematerializedValues;
}
- }
// The next two loops are part of the same conceptual operation. We need to
// insert a store to the alloca after the original def and at each
// redefinition. We need to insert a load before each use. These are split
// into distinct loops for performance reasons.
- // update gc pointer after each statepoint
- // either store a relocated value or null (if no relocated value found for
- // this gc pointer and it is not a gc_result)
- // this must happen before we update the statepoint with load of alloca
- // otherwise we lose the link between statepoint and old def
- for (size_t i = 0; i < Records.size(); i++) {
- const struct PartiallyConstructedSafepointRecord &Info = Records[i];
+ // Update gc pointer after each statepoint: either store a relocated value or
+ // null (if no relocated value was found for this gc pointer and it is not a
+ // gc_result). This must happen before we update the statepoint with load of
+ // alloca otherwise we lose the link between statepoint and old def.
+ for (const auto &Info : Records) {
Value *Statepoint = Info.StatepointToken;
// This will be used for consistency check
@@ -1582,7 +1779,7 @@ static void relocationViaAlloca(
VisitedLiveValues);
if (ClobberNonLive) {
- // As a debuging aid, pretend that an unrelocated pointer becomes null at
+ // As a debugging aid, pretend that an unrelocated pointer becomes null at
// the gc.statepoint. This will turn some subtle GC problems into
// slightly easier to debug SEGVs. Note that on large IR files with
// lots of gc.statepoints this is extremely costly both memory and time
@@ -1612,23 +1809,22 @@ static void relocationViaAlloca(
// Insert the clobbering stores. These may get intermixed with the
// gc.results and gc.relocates, but that's fine.
if (auto II = dyn_cast<InvokeInst>(Statepoint)) {
- InsertClobbersAt(II->getNormalDest()->getFirstInsertionPt());
- InsertClobbersAt(II->getUnwindDest()->getFirstInsertionPt());
+ InsertClobbersAt(&*II->getNormalDest()->getFirstInsertionPt());
+ InsertClobbersAt(&*II->getUnwindDest()->getFirstInsertionPt());
} else {
- BasicBlock::iterator Next(cast<CallInst>(Statepoint));
- Next++;
- InsertClobbersAt(Next);
+ InsertClobbersAt(cast<Instruction>(Statepoint)->getNextNode());
}
}
}
- // update use with load allocas and add store for gc_relocated
+
+ // Update use with load allocas and add store for gc_relocated.
for (auto Pair : AllocaMap) {
Value *Def = Pair.first;
Value *Alloca = Pair.second;
- // we pre-record the uses of allocas so that we dont have to worry about
- // later update
- // that change the user information.
+ // We pre-record the uses of allocas so that we dont have to worry about
+ // later update that changes the user information..
+
SmallVector<Instruction *, 20> Uses;
// PERF: trade a linear scan for repeated reallocation
Uses.reserve(std::distance(Def->user_begin(), Def->user_end()));
@@ -1663,9 +1859,9 @@ static void relocationViaAlloca(
}
}
- // emit store for the initial gc value
- // store must be inserted after load, otherwise store will be in alloca's
- // use list and an extra load will be inserted before it
+ // Emit store for the initial gc value. Store must be inserted after load,
+ // otherwise store will be in alloca's use list and an extra load will be
+ // inserted before it.
StoreInst *Store = new StoreInst(Def, Alloca);
if (Instruction *Inst = dyn_cast<Instruction>(Def)) {
if (InvokeInst *Invoke = dyn_cast<InvokeInst>(Inst)) {
@@ -1688,14 +1884,13 @@ static void relocationViaAlloca(
assert(PromotableAllocas.size() == Live.size() + NumRematerializedValues &&
"we must have the same allocas with lives");
if (!PromotableAllocas.empty()) {
- // apply mem2reg to promote alloca to SSA
+ // Apply mem2reg to promote alloca to SSA
PromoteMemToReg(PromotableAllocas, DT);
}
#ifndef NDEBUG
- for (auto I = F.getEntryBlock().begin(), E = F.getEntryBlock().end(); I != E;
- I++)
- if (isa<AllocaInst>(*I))
+ for (auto &I : F.getEntryBlock())
+ if (isa<AllocaInst>(I))
InitialAllocaNum--;
assert(InitialAllocaNum == 0 && "We must not introduce any extra allocas");
#endif
@@ -1719,28 +1914,27 @@ static void insertUseHolderAfter(CallSite &CS, const ArrayRef<Value *> Values,
// No values to hold live, might as well not insert the empty holder
return;
- Module *M = CS.getInstruction()->getParent()->getParent()->getParent();
+ Module *M = CS.getInstruction()->getModule();
// Use a dummy vararg function to actually hold the values live
Function *Func = cast<Function>(M->getOrInsertFunction(
"__tmp_use", FunctionType::get(Type::getVoidTy(M->getContext()), true)));
if (CS.isCall()) {
// For call safepoints insert dummy calls right after safepoint
- BasicBlock::iterator Next(CS.getInstruction());
- Next++;
- Holders.push_back(CallInst::Create(Func, Values, "", Next));
+ Holders.push_back(CallInst::Create(Func, Values, "",
+ &*++CS.getInstruction()->getIterator()));
return;
}
// For invoke safepooints insert dummy calls both in normal and
// exceptional destination blocks
auto *II = cast<InvokeInst>(CS.getInstruction());
Holders.push_back(CallInst::Create(
- Func, Values, "", II->getNormalDest()->getFirstInsertionPt()));
+ Func, Values, "", &*II->getNormalDest()->getFirstInsertionPt()));
Holders.push_back(CallInst::Create(
- Func, Values, "", II->getUnwindDest()->getFirstInsertionPt()));
+ Func, Values, "", &*II->getUnwindDest()->getFirstInsertionPt()));
}
static void findLiveReferences(
- Function &F, DominatorTree &DT, Pass *P, ArrayRef<CallSite> toUpdate,
+ Function &F, DominatorTree &DT, ArrayRef<CallSite> toUpdate,
MutableArrayRef<struct PartiallyConstructedSafepointRecord> records) {
GCPtrLivenessData OriginalLivenessData;
computeLiveInValues(DT, F, OriginalLivenessData);
@@ -1751,12 +1945,12 @@ static void findLiveReferences(
}
}
-/// Remove any vector of pointers from the liveset by scalarizing them over the
-/// statepoint instruction. Adds the scalarized pieces to the liveset. It
-/// would be preferrable to include the vector in the statepoint itself, but
+/// Remove any vector of pointers from the live set by scalarizing them over the
+/// statepoint instruction. Adds the scalarized pieces to the live set. It
+/// would be preferable to include the vector in the statepoint itself, but
/// the lowering code currently does not handle that. Extending it would be
/// slightly non-trivial since it requires a format change. Given how rare
-/// such cases are (for the moment?) scalarizing is an acceptable comprimise.
+/// such cases are (for the moment?) scalarizing is an acceptable compromise.
static void splitVectorValues(Instruction *StatepointInst,
StatepointLiveSetTy &LiveSet,
DenseMap<Value *, Value *>& PointerToBase,
@@ -1887,7 +2081,7 @@ static void splitVectorValues(Instruction *StatepointInst,
// Helper function for the "rematerializeLiveValues". It walks use chain
// starting from the "CurrentValue" until it meets "BaseValue". Only "simple"
// values are visited (currently it is GEP's and casts). Returns true if it
-// sucessfully reached "BaseValue" and false otherwise.
+// successfully reached "BaseValue" and false otherwise.
// Fills "ChainToBase" array with all visited values. "BaseValue" is not
// recorded.
static bool findRematerializableChainToBasePointer(
@@ -1907,16 +2101,12 @@ static bool findRematerializableChainToBasePointer(
}
if (CastInst *CI = dyn_cast<CastInst>(CurrentValue)) {
- Value *Def = CI->stripPointerCasts();
-
- // This two checks are basically similar. First one is here for the
- // consistency with findBasePointers logic.
- assert(!isa<CastInst>(Def) && "not a pointer cast found");
if (!CI->isNoopCast(CI->getModule()->getDataLayout()))
return false;
ChainToBase.push_back(CI);
- return findRematerializableChainToBasePointer(ChainToBase, Def, BaseValue);
+ return findRematerializableChainToBasePointer(ChainToBase,
+ CI->getOperand(0), BaseValue);
}
// Not supported instruction in the chain
@@ -1957,8 +2147,8 @@ chainToBasePointerCost(SmallVectorImpl<Instruction*> &Chain,
return Cost;
}
-// From the statepoint liveset pick values that are cheaper to recompute then to
-// relocate. Remove this values from the liveset, rematerialize them after
+// From the statepoint live set pick values that are cheaper to recompute then
+// to relocate. Remove this values from the live set, rematerialize them after
// statepoint and record them in "Info" structure. Note that similar to
// relocated values we don't do any user adjustments here.
static void rematerializeLiveValues(CallSite CS,
@@ -1970,10 +2160,10 @@ static void rematerializeLiveValues(CallSite CS,
// We can not di this in following loop due to iterator invalidation.
SmallVector<Value *, 32> LiveValuesToBeDeleted;
- for (Value *LiveValue: Info.liveset) {
+ for (Value *LiveValue: Info.LiveSet) {
// For each live pointer find it's defining chain
SmallVector<Instruction *, 3> ChainToBase;
- assert(Info.PointerToBase.find(LiveValue) != Info.PointerToBase.end());
+ assert(Info.PointerToBase.count(LiveValue));
bool FoundChain =
findRematerializableChainToBasePointer(ChainToBase,
LiveValue,
@@ -2059,9 +2249,9 @@ static void rematerializeLiveValues(CallSite CS,
InvokeInst *Invoke = cast<InvokeInst>(CS.getInstruction());
Instruction *NormalInsertBefore =
- Invoke->getNormalDest()->getFirstInsertionPt();
+ &*Invoke->getNormalDest()->getFirstInsertionPt();
Instruction *UnwindInsertBefore =
- Invoke->getUnwindDest()->getFirstInsertionPt();
+ &*Invoke->getUnwindDest()->getFirstInsertionPt();
Instruction *NormalRematerializedValue =
rematerializeChain(NormalInsertBefore);
@@ -2075,22 +2265,23 @@ static void rematerializeLiveValues(CallSite CS,
// Remove rematerializaed values from the live set
for (auto LiveValue: LiveValuesToBeDeleted) {
- Info.liveset.erase(LiveValue);
+ Info.LiveSet.erase(LiveValue);
}
}
-static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
- SmallVectorImpl<CallSite> &toUpdate) {
+static bool insertParsePoints(Function &F, DominatorTree &DT,
+ TargetTransformInfo &TTI,
+ SmallVectorImpl<CallSite> &ToUpdate) {
#ifndef NDEBUG
// sanity check the input
- std::set<CallSite> uniqued;
- uniqued.insert(toUpdate.begin(), toUpdate.end());
- assert(uniqued.size() == toUpdate.size() && "no duplicates please!");
+ std::set<CallSite> Uniqued;
+ Uniqued.insert(ToUpdate.begin(), ToUpdate.end());
+ assert(Uniqued.size() == ToUpdate.size() && "no duplicates please!");
- for (size_t i = 0; i < toUpdate.size(); i++) {
- CallSite &CS = toUpdate[i];
+ for (CallSite CS : ToUpdate) {
assert(CS.getInstruction()->getParent()->getParent() == &F);
- assert(isStatepoint(CS) && "expected to already be a deopt statepoint");
+ assert((UseDeoptBundles || isStatepoint(CS)) &&
+ "expected to already be a deopt statepoint");
}
#endif
@@ -2098,50 +2289,45 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
// the top of the successor blocks. See the comment on
// normalForInvokeSafepoint on exactly what is needed. Note that this step
// may restructure the CFG.
- for (CallSite CS : toUpdate) {
+ for (CallSite CS : ToUpdate) {
if (!CS.isInvoke())
continue;
- InvokeInst *invoke = cast<InvokeInst>(CS.getInstruction());
- normalizeForInvokeSafepoint(invoke->getNormalDest(), invoke->getParent(),
- DT);
- normalizeForInvokeSafepoint(invoke->getUnwindDest(), invoke->getParent(),
- DT);
+ auto *II = cast<InvokeInst>(CS.getInstruction());
+ normalizeForInvokeSafepoint(II->getNormalDest(), II->getParent(), DT);
+ normalizeForInvokeSafepoint(II->getUnwindDest(), II->getParent(), DT);
}
// A list of dummy calls added to the IR to keep various values obviously
// live in the IR. We'll remove all of these when done.
- SmallVector<CallInst *, 64> holders;
+ SmallVector<CallInst *, 64> Holders;
// Insert a dummy call with all of the arguments to the vm_state we'll need
// for the actual safepoint insertion. This ensures reference arguments in
// the deopt argument list are considered live through the safepoint (and
// thus makes sure they get relocated.)
- for (size_t i = 0; i < toUpdate.size(); i++) {
- CallSite &CS = toUpdate[i];
- Statepoint StatepointCS(CS);
-
+ for (CallSite CS : ToUpdate) {
SmallVector<Value *, 64> DeoptValues;
- for (Use &U : StatepointCS.vm_state_args()) {
- Value *Arg = cast<Value>(&U);
+
+ iterator_range<const Use *> DeoptStateRange =
+ UseDeoptBundles
+ ? iterator_range<const Use *>(GetDeoptBundleOperands(CS))
+ : iterator_range<const Use *>(Statepoint(CS).vm_state_args());
+
+ for (Value *Arg : DeoptStateRange) {
assert(!isUnhandledGCPointerType(Arg->getType()) &&
"support for FCA unimplemented");
if (isHandledGCPointerType(Arg->getType()))
DeoptValues.push_back(Arg);
}
- insertUseHolderAfter(CS, DeoptValues, holders);
- }
- SmallVector<struct PartiallyConstructedSafepointRecord, 64> records;
- records.reserve(toUpdate.size());
- for (size_t i = 0; i < toUpdate.size(); i++) {
- struct PartiallyConstructedSafepointRecord info;
- records.push_back(info);
+ insertUseHolderAfter(CS, DeoptValues, Holders);
}
- assert(records.size() == toUpdate.size());
- // A) Identify all gc pointers which are staticly live at the given call
+ SmallVector<PartiallyConstructedSafepointRecord, 64> Records(ToUpdate.size());
+
+ // A) Identify all gc pointers which are statically live at the given call
// site.
- findLiveReferences(F, DT, P, toUpdate, records);
+ findLiveReferences(F, DT, ToUpdate, Records);
// B) Find the base pointers for each live pointer
/* scope for caching */ {
@@ -2150,10 +2336,9 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
// large numbers of duplicate base_phis.
DefiningValueMapTy DVCache;
- for (size_t i = 0; i < records.size(); i++) {
- struct PartiallyConstructedSafepointRecord &info = records[i];
- CallSite &CS = toUpdate[i];
- findBasePointers(DT, DVCache, CS, info);
+ for (size_t i = 0; i < Records.size(); i++) {
+ PartiallyConstructedSafepointRecord &info = Records[i];
+ findBasePointers(DT, DVCache, ToUpdate[i], info);
}
} // end of cache scope
@@ -2170,63 +2355,75 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
// the base pointers which were identified for that safepoint. We'll then
// ask liveness for _every_ base inserted to see what is now live. Then we
// remove the dummy calls.
- holders.reserve(holders.size() + records.size());
- for (size_t i = 0; i < records.size(); i++) {
- struct PartiallyConstructedSafepointRecord &info = records[i];
- CallSite &CS = toUpdate[i];
+ Holders.reserve(Holders.size() + Records.size());
+ for (size_t i = 0; i < Records.size(); i++) {
+ PartiallyConstructedSafepointRecord &Info = Records[i];
SmallVector<Value *, 128> Bases;
- for (auto Pair : info.PointerToBase) {
+ for (auto Pair : Info.PointerToBase)
Bases.push_back(Pair.second);
- }
- insertUseHolderAfter(CS, Bases, holders);
+
+ insertUseHolderAfter(ToUpdate[i], Bases, Holders);
}
// By selecting base pointers, we've effectively inserted new uses. Thus, we
// need to rerun liveness. We may *also* have inserted new defs, but that's
// not the key issue.
- recomputeLiveInValues(F, DT, P, toUpdate, records);
+ recomputeLiveInValues(F, DT, ToUpdate, Records);
if (PrintBasePointers) {
- for (size_t i = 0; i < records.size(); i++) {
- struct PartiallyConstructedSafepointRecord &info = records[i];
+ for (auto &Info : Records) {
errs() << "Base Pairs: (w/Relocation)\n";
- for (auto Pair : info.PointerToBase) {
- errs() << " derived %" << Pair.first->getName() << " base %"
- << Pair.second->getName() << "\n";
+ for (auto Pair : Info.PointerToBase) {
+ errs() << " derived ";
+ Pair.first->printAsOperand(errs(), false);
+ errs() << " base ";
+ Pair.second->printAsOperand(errs(), false);
+ errs() << "\n";
}
}
}
- for (size_t i = 0; i < holders.size(); i++) {
- holders[i]->eraseFromParent();
- holders[i] = nullptr;
- }
- holders.clear();
+
+ // It is possible that non-constant live variables have a constant base. For
+ // example, a GEP with a variable offset from a global. In this case we can
+ // remove it from the liveset. We already don't add constants to the liveset
+ // because we assume they won't move at runtime and the GC doesn't need to be
+ // informed about them. The same reasoning applies if the base is constant.
+ // Note that the relocation placement code relies on this filtering for
+ // correctness as it expects the base to be in the liveset, which isn't true
+ // if the base is constant.
+ for (auto &Info : Records)
+ for (auto &BasePair : Info.PointerToBase)
+ if (isa<Constant>(BasePair.second))
+ Info.LiveSet.erase(BasePair.first);
+
+ for (CallInst *CI : Holders)
+ CI->eraseFromParent();
+
+ Holders.clear();
// Do a limited scalarization of any live at safepoint vector values which
// contain pointers. This enables this pass to run after vectorization at
// the cost of some possible performance loss. TODO: it would be nice to
// natively support vectors all the way through the backend so we don't need
// to scalarize here.
- for (size_t i = 0; i < records.size(); i++) {
- struct PartiallyConstructedSafepointRecord &info = records[i];
- Instruction *statepoint = toUpdate[i].getInstruction();
- splitVectorValues(cast<Instruction>(statepoint), info.liveset,
- info.PointerToBase, DT);
+ for (size_t i = 0; i < Records.size(); i++) {
+ PartiallyConstructedSafepointRecord &Info = Records[i];
+ Instruction *Statepoint = ToUpdate[i].getInstruction();
+ splitVectorValues(cast<Instruction>(Statepoint), Info.LiveSet,
+ Info.PointerToBase, DT);
}
// In order to reduce live set of statepoint we might choose to rematerialize
- // some values instead of relocating them. This is purelly an optimization and
+ // some values instead of relocating them. This is purely an optimization and
// does not influence correctness.
- TargetTransformInfo &TTI =
- P->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ for (size_t i = 0; i < Records.size(); i++)
+ rematerializeLiveValues(ToUpdate[i], Records[i], TTI);
- for (size_t i = 0; i < records.size(); i++) {
- struct PartiallyConstructedSafepointRecord &info = records[i];
- CallSite &CS = toUpdate[i];
-
- rematerializeLiveValues(CS, info, TTI);
- }
+ // We need this to safely RAUW and delete call or invoke return values that
+ // may themselves be live over a statepoint. For details, please see usage in
+ // makeStatepointExplicitImpl.
+ std::vector<DeferredReplacement> Replacements;
// Now run through and replace the existing statepoints with new ones with
// the live variables listed. We do not yet update uses of the values being
@@ -2234,61 +2431,77 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
// survive to the last iteration of this loop. (By construction, the
// previous statepoint can not be a live variable, thus we can and remove
// the old statepoint calls as we go.)
- for (size_t i = 0; i < records.size(); i++) {
- struct PartiallyConstructedSafepointRecord &info = records[i];
- CallSite &CS = toUpdate[i];
- makeStatepointExplicit(DT, CS, P, info);
+ for (size_t i = 0; i < Records.size(); i++)
+ makeStatepointExplicit(DT, ToUpdate[i], Records[i], Replacements);
+
+ ToUpdate.clear(); // prevent accident use of invalid CallSites
+
+ for (auto &PR : Replacements)
+ PR.doReplacement();
+
+ Replacements.clear();
+
+ for (auto &Info : Records) {
+ // These live sets may contain state Value pointers, since we replaced calls
+ // with operand bundles with calls wrapped in gc.statepoint, and some of
+ // those calls may have been def'ing live gc pointers. Clear these out to
+ // avoid accidentally using them.
+ //
+ // TODO: We should create a separate data structure that does not contain
+ // these live sets, and migrate to using that data structure from this point
+ // onward.
+ Info.LiveSet.clear();
+ Info.PointerToBase.clear();
}
- toUpdate.clear(); // prevent accident use of invalid CallSites
// Do all the fixups of the original live variables to their relocated selves
- SmallVector<Value *, 128> live;
- for (size_t i = 0; i < records.size(); i++) {
- struct PartiallyConstructedSafepointRecord &info = records[i];
+ SmallVector<Value *, 128> Live;
+ for (size_t i = 0; i < Records.size(); i++) {
+ PartiallyConstructedSafepointRecord &Info = Records[i];
+
// We can't simply save the live set from the original insertion. One of
// the live values might be the result of a call which needs a safepoint.
// That Value* no longer exists and we need to use the new gc_result.
- // Thankfully, the liveset is embedded in the statepoint (and updated), so
+ // Thankfully, the live set is embedded in the statepoint (and updated), so
// we just grab that.
- Statepoint statepoint(info.StatepointToken);
- live.insert(live.end(), statepoint.gc_args_begin(),
- statepoint.gc_args_end());
+ Statepoint Statepoint(Info.StatepointToken);
+ Live.insert(Live.end(), Statepoint.gc_args_begin(),
+ Statepoint.gc_args_end());
#ifndef NDEBUG
// Do some basic sanity checks on our liveness results before performing
// relocation. Relocation can and will turn mistakes in liveness results
// into non-sensical code which is must harder to debug.
// TODO: It would be nice to test consistency as well
- assert(DT.isReachableFromEntry(info.StatepointToken->getParent()) &&
+ assert(DT.isReachableFromEntry(Info.StatepointToken->getParent()) &&
"statepoint must be reachable or liveness is meaningless");
- for (Value *V : statepoint.gc_args()) {
+ for (Value *V : Statepoint.gc_args()) {
if (!isa<Instruction>(V))
// Non-instruction values trivial dominate all possible uses
continue;
- auto LiveInst = cast<Instruction>(V);
+ auto *LiveInst = cast<Instruction>(V);
assert(DT.isReachableFromEntry(LiveInst->getParent()) &&
"unreachable values should never be live");
- assert(DT.dominates(LiveInst, info.StatepointToken) &&
+ assert(DT.dominates(LiveInst, Info.StatepointToken) &&
"basic SSA liveness expectation violated by liveness analysis");
}
#endif
}
- unique_unsorted(live);
+ unique_unsorted(Live);
#ifndef NDEBUG
// sanity check
- for (auto ptr : live) {
- assert(isGCPointerType(ptr->getType()) && "must be a gc pointer type");
- }
+ for (auto *Ptr : Live)
+ assert(isGCPointerType(Ptr->getType()) && "must be a gc pointer type");
#endif
- relocationViaAlloca(F, DT, live, records);
- return !records.empty();
+ relocationViaAlloca(F, DT, Live, Records);
+ return !Records.empty();
}
// Handles both return values and arguments for Functions and CallSites.
template <typename AttrHolder>
-static void RemoveDerefAttrAtIndex(LLVMContext &Ctx, AttrHolder &AH,
- unsigned Index) {
+static void RemoveNonValidAttrAtIndex(LLVMContext &Ctx, AttrHolder &AH,
+ unsigned Index) {
AttrBuilder R;
if (AH.getDereferenceableBytes(Index))
R.addAttribute(Attribute::get(Ctx, Attribute::Dereferenceable,
@@ -2296,6 +2509,8 @@ static void RemoveDerefAttrAtIndex(LLVMContext &Ctx, AttrHolder &AH,
if (AH.getDereferenceableOrNullBytes(Index))
R.addAttribute(Attribute::get(Ctx, Attribute::DereferenceableOrNull,
AH.getDereferenceableOrNullBytes(Index)));
+ if (AH.doesNotAlias(Index))
+ R.addAttribute(Attribute::NoAlias);
if (!R.empty())
AH.setAttributes(AH.getAttributes().removeAttributes(
@@ -2303,25 +2518,25 @@ static void RemoveDerefAttrAtIndex(LLVMContext &Ctx, AttrHolder &AH,
}
void
-RewriteStatepointsForGC::stripDereferenceabilityInfoFromPrototype(Function &F) {
+RewriteStatepointsForGC::stripNonValidAttributesFromPrototype(Function &F) {
LLVMContext &Ctx = F.getContext();
for (Argument &A : F.args())
if (isa<PointerType>(A.getType()))
- RemoveDerefAttrAtIndex(Ctx, F, A.getArgNo() + 1);
+ RemoveNonValidAttrAtIndex(Ctx, F, A.getArgNo() + 1);
if (isa<PointerType>(F.getReturnType()))
- RemoveDerefAttrAtIndex(Ctx, F, AttributeSet::ReturnIndex);
+ RemoveNonValidAttrAtIndex(Ctx, F, AttributeSet::ReturnIndex);
}
-void RewriteStatepointsForGC::stripDereferenceabilityInfoFromBody(Function &F) {
+void RewriteStatepointsForGC::stripNonValidAttributesFromBody(Function &F) {
if (F.empty())
return;
LLVMContext &Ctx = F.getContext();
MDBuilder Builder(Ctx);
- for (Instruction &I : inst_range(F)) {
+ for (Instruction &I : instructions(F)) {
if (const MDNode *MD = I.getMetadata(LLVMContext::MD_tbaa)) {
assert(MD->getNumOperands() < 5 && "unrecognized metadata shape!");
bool IsImmutableTBAA =
@@ -2344,9 +2559,9 @@ void RewriteStatepointsForGC::stripDereferenceabilityInfoFromBody(Function &F) {
if (CallSite CS = CallSite(&I)) {
for (int i = 0, e = CS.arg_size(); i != e; i++)
if (isa<PointerType>(CS.getArgument(i)->getType()))
- RemoveDerefAttrAtIndex(Ctx, CS, i + 1);
+ RemoveNonValidAttrAtIndex(Ctx, CS, i + 1);
if (isa<PointerType>(CS.getType()))
- RemoveDerefAttrAtIndex(Ctx, CS, AttributeSet::ReturnIndex);
+ RemoveNonValidAttrAtIndex(Ctx, CS, AttributeSet::ReturnIndex);
}
}
}
@@ -2365,17 +2580,17 @@ static bool shouldRewriteStatepointsIn(Function &F) {
return false;
}
-void RewriteStatepointsForGC::stripDereferenceabilityInfo(Module &M) {
+void RewriteStatepointsForGC::stripNonValidAttributes(Module &M) {
#ifndef NDEBUG
assert(std::any_of(M.begin(), M.end(), shouldRewriteStatepointsIn) &&
"precondition!");
#endif
for (Function &F : M)
- stripDereferenceabilityInfoFromPrototype(F);
+ stripNonValidAttributesFromPrototype(F);
for (Function &F : M)
- stripDereferenceabilityInfoFromBody(F);
+ stripNonValidAttributesFromBody(F);
}
bool RewriteStatepointsForGC::runOnFunction(Function &F) {
@@ -2389,15 +2604,27 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F) {
return false;
DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
+ TargetTransformInfo &TTI =
+ getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+
+ auto NeedsRewrite = [](Instruction &I) {
+ if (UseDeoptBundles) {
+ if (ImmutableCallSite CS = ImmutableCallSite(&I))
+ return !callsGCLeafFunction(CS);
+ return false;
+ }
+
+ return isStatepoint(I);
+ };
// Gather all the statepoints which need rewritten. Be careful to only
// consider those in reachable code since we need to ask dominance queries
// when rewriting. We'll delete the unreachable ones in a moment.
SmallVector<CallSite, 64> ParsePointNeeded;
bool HasUnreachableStatepoint = false;
- for (Instruction &I : inst_range(F)) {
+ for (Instruction &I : instructions(F)) {
// TODO: only the ones with the flag set!
- if (isStatepoint(I)) {
+ if (NeedsRewrite(I)) {
if (DT.isReachableFromEntry(I.getParent()))
ParsePointNeeded.push_back(CallSite(&I));
else
@@ -2428,7 +2655,38 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F) {
FoldSingleEntryPHINodes(&BB);
}
- MadeChange |= insertParsePoints(F, DT, this, ParsePointNeeded);
+ // Before we start introducing relocations, we want to tweak the IR a bit to
+ // avoid unfortunate code generation effects. The main example is that we
+ // want to try to make sure the comparison feeding a branch is after any
+ // safepoints. Otherwise, we end up with a comparison of pre-relocation
+ // values feeding a branch after relocation. This is semantically correct,
+ // but results in extra register pressure since both the pre-relocation and
+ // post-relocation copies must be available in registers. For code without
+ // relocations this is handled elsewhere, but teaching the scheduler to
+ // reverse the transform we're about to do would be slightly complex.
+ // Note: This may extend the live range of the inputs to the icmp and thus
+ // increase the liveset of any statepoint we move over. This is profitable
+ // as long as all statepoints are in rare blocks. If we had in-register
+ // lowering for live values this would be a much safer transform.
+ auto getConditionInst = [](TerminatorInst *TI) -> Instruction* {
+ if (auto *BI = dyn_cast<BranchInst>(TI))
+ if (BI->isConditional())
+ return dyn_cast<Instruction>(BI->getCondition());
+ // TODO: Extend this to handle switches
+ return nullptr;
+ };
+ for (BasicBlock &BB : F) {
+ TerminatorInst *TI = BB.getTerminator();
+ if (auto *Cond = getConditionInst(TI))
+ // TODO: Handle more than just ICmps here. We should be able to move
+ // most instructions without side effects or memory access.
+ if (isa<ICmpInst>(Cond) && Cond->hasOneUse()) {
+ MadeChange = true;
+ Cond->moveBefore(TI);
+ }
+ }
+
+ MadeChange |= insertParsePoints(F, DT, TTI, ParsePointNeeded);
return MadeChange;
}
@@ -2461,7 +2719,7 @@ static void computeLiveInValues(BasicBlock::reverse_iterator rbegin,
"support for FCA unimplemented");
if (isHandledGCPointerType(V->getType()) && !isa<Constant>(V)) {
// The choice to exclude all things constant here is slightly subtle.
- // There are two idependent reasons:
+ // There are two independent reasons:
// - We assume that things which are constant (from LLVM's definition)
// do not move at runtime. For example, the address of a global
// variable is fixed, even though it's contents may not be.
@@ -2599,7 +2857,7 @@ static void computeLiveInValues(DominatorTree &DT, Function &F,
} // while( !worklist.empty() )
#ifndef NDEBUG
- // Sanity check our ouput against SSA properties. This helps catch any
+ // Sanity check our output against SSA properties. This helps catch any
// missing kills during the above iteration.
for (BasicBlock &BB : F) {
checkBasicSSA(DT, Data, BB);
@@ -2620,7 +2878,7 @@ static void findLiveSetAtInst(Instruction *Inst, GCPtrLivenessData &Data,
// call result is not live (normal), nor are it's arguments
// (unless they're used again later). This adjustment is
// specifically what we need to relocate
- BasicBlock::reverse_iterator rend(Inst);
+ BasicBlock::reverse_iterator rend(Inst->getIterator());
computeLiveInValues(BB->rbegin(), rend, LiveOut);
LiveOut.erase(Inst);
Out.insert(LiveOut.begin(), LiveOut.end());
@@ -2669,5 +2927,5 @@ static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
assert(Updated.count(KVPair.first) && "record for non-live value");
#endif
- Info.liveset = Updated;
+ Info.LiveSet = Updated;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
index 4d3a708..2fca803 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
@@ -24,6 +24,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/CallSite.h"
@@ -479,6 +480,13 @@ private:
void visitExtractValueInst(ExtractValueInst &EVI);
void visitInsertValueInst(InsertValueInst &IVI);
void visitLandingPadInst(LandingPadInst &I) { markAnythingOverdefined(&I); }
+ void visitFuncletPadInst(FuncletPadInst &FPI) {
+ markAnythingOverdefined(&FPI);
+ }
+ void visitCatchSwitchInst(CatchSwitchInst &CPI) {
+ markAnythingOverdefined(&CPI);
+ visitTerminatorInst(CPI);
+ }
// Instructions that cannot be folded away.
void visitStoreInst (StoreInst &I);
@@ -539,9 +547,9 @@ void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI,
return;
}
- if (isa<InvokeInst>(TI)) {
- // Invoke instructions successors are always executable.
- Succs[0] = Succs[1] = true;
+ // Unwinding instructions successors are always executable.
+ if (TI.isExceptional()) {
+ Succs.assign(TI.getNumSuccessors(), true);
return;
}
@@ -605,8 +613,8 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {
return BI->getSuccessor(CI->isZero()) == To;
}
- // Invoke instructions successors are always executable.
- if (isa<InvokeInst>(TI))
+ // Unwinding instructions successors are always executable.
+ if (TI->isExceptional())
return true;
if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
@@ -630,7 +638,7 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {
#ifndef NDEBUG
dbgs() << "Unknown terminator instruction: " << *TI << '\n';
#endif
- llvm_unreachable(nullptr);
+ llvm_unreachable("SCCP: Don't know how to handle this terminator!");
}
// visit Implementations - Something changed in this instruction, either an
@@ -1126,7 +1134,7 @@ CallOverdefined:
// entry block executable and merge in the actual arguments to the call into
// the formal arguments of the function.
if (!TrackingIncomingArguments.empty() && TrackingIncomingArguments.count(F)){
- MarkBlockExecutable(F->begin());
+ MarkBlockExecutable(&F->front());
// Propagate information from this call site into the callee.
CallSite::arg_iterator CAI = CS.arg_begin();
@@ -1135,17 +1143,17 @@ CallOverdefined:
// If this argument is byval, and if the function is not readonly, there
// will be an implicit copy formed of the input aggregate.
if (AI->hasByValAttr() && !F->onlyReadsMemory()) {
- markOverdefined(AI);
+ markOverdefined(&*AI);
continue;
}
if (StructType *STy = dyn_cast<StructType>(AI->getType())) {
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
LatticeVal CallArg = getStructValueState(*CAI, i);
- mergeInValue(getStructValueState(AI, i), AI, CallArg);
+ mergeInValue(getStructValueState(&*AI, i), &*AI, CallArg);
}
} else {
- mergeInValue(AI, getValueState(*CAI));
+ mergeInValue(&*AI, getValueState(*CAI));
}
}
}
@@ -1246,18 +1254,18 @@ void SCCPSolver::Solve() {
/// even if X isn't defined.
bool SCCPSolver::ResolvedUndefsIn(Function &F) {
for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
- if (!BBExecutable.count(BB))
+ if (!BBExecutable.count(&*BB))
continue;
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ for (Instruction &I : *BB) {
// Look for instructions which produce undef values.
- if (I->getType()->isVoidTy()) continue;
+ if (I.getType()->isVoidTy()) continue;
- if (StructType *STy = dyn_cast<StructType>(I->getType())) {
+ if (StructType *STy = dyn_cast<StructType>(I.getType())) {
// Only a few things that can be structs matter for undef.
// Tracked calls must never be marked overdefined in ResolvedUndefsIn.
- if (CallSite CS = CallSite(I))
+ if (CallSite CS = CallSite(&I))
if (Function *F = CS.getCalledFunction())
if (MRVFunctionsTracked.count(F))
continue;
@@ -1270,14 +1278,14 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
// Send the results of everything else to overdefined. We could be
// more precise than this but it isn't worth bothering.
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
- LatticeVal &LV = getStructValueState(I, i);
+ LatticeVal &LV = getStructValueState(&I, i);
if (LV.isUndefined())
- markOverdefined(LV, I);
+ markOverdefined(LV, &I);
}
continue;
}
- LatticeVal &LV = getValueState(I);
+ LatticeVal &LV = getValueState(&I);
if (!LV.isUndefined()) continue;
// extractvalue is safe; check here because the argument is a struct.
@@ -1287,24 +1295,24 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
// Compute the operand LatticeVals, for convenience below.
// Anything taking a struct is conservatively assumed to require
// overdefined markings.
- if (I->getOperand(0)->getType()->isStructTy()) {
- markOverdefined(I);
+ if (I.getOperand(0)->getType()->isStructTy()) {
+ markOverdefined(&I);
return true;
}
- LatticeVal Op0LV = getValueState(I->getOperand(0));
+ LatticeVal Op0LV = getValueState(I.getOperand(0));
LatticeVal Op1LV;
- if (I->getNumOperands() == 2) {
- if (I->getOperand(1)->getType()->isStructTy()) {
- markOverdefined(I);
+ if (I.getNumOperands() == 2) {
+ if (I.getOperand(1)->getType()->isStructTy()) {
+ markOverdefined(&I);
return true;
}
- Op1LV = getValueState(I->getOperand(1));
+ Op1LV = getValueState(I.getOperand(1));
}
// If this is an instructions whose result is defined even if the input is
// not fully defined, propagate the information.
- Type *ITy = I->getType();
- switch (I->getOpcode()) {
+ Type *ITy = I.getType();
+ switch (I.getOpcode()) {
case Instruction::Add:
case Instruction::Sub:
case Instruction::Trunc:
@@ -1318,9 +1326,9 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
case Instruction::FRem:
// Floating-point binary operation: be conservative.
if (Op0LV.isUndefined() && Op1LV.isUndefined())
- markForcedConstant(I, Constant::getNullValue(ITy));
+ markForcedConstant(&I, Constant::getNullValue(ITy));
else
- markOverdefined(I);
+ markOverdefined(&I);
return true;
case Instruction::ZExt:
case Instruction::SExt:
@@ -1332,7 +1340,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
case Instruction::SIToFP:
case Instruction::UIToFP:
// undef -> 0; some outputs are impossible
- markForcedConstant(I, Constant::getNullValue(ITy));
+ markForcedConstant(&I, Constant::getNullValue(ITy));
return true;
case Instruction::Mul:
case Instruction::And:
@@ -1341,7 +1349,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
break;
// undef * X -> 0. X could be zero.
// undef & X -> 0. X could be zero.
- markForcedConstant(I, Constant::getNullValue(ITy));
+ markForcedConstant(&I, Constant::getNullValue(ITy));
return true;
case Instruction::Or:
@@ -1349,7 +1357,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
if (Op0LV.isUndefined() && Op1LV.isUndefined())
break;
// undef | X -> -1. X could be -1.
- markForcedConstant(I, Constant::getAllOnesValue(ITy));
+ markForcedConstant(&I, Constant::getAllOnesValue(ITy));
return true;
case Instruction::Xor:
@@ -1357,7 +1365,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
// necessary, but we try to be nice to people who expect this
// behavior in simple cases
if (Op0LV.isUndefined() && Op1LV.isUndefined()) {
- markForcedConstant(I, Constant::getNullValue(ITy));
+ markForcedConstant(&I, Constant::getNullValue(ITy));
return true;
}
// undef ^ X -> undef
@@ -1373,7 +1381,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
// undef / X -> 0. X could be maxint.
// undef % X -> 0. X could be 1.
- markForcedConstant(I, Constant::getNullValue(ITy));
+ markForcedConstant(&I, Constant::getNullValue(ITy));
return true;
case Instruction::AShr:
@@ -1381,7 +1389,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
if (Op1LV.isUndefined()) break;
// undef >>a X -> all ones
- markForcedConstant(I, Constant::getAllOnesValue(ITy));
+ markForcedConstant(&I, Constant::getAllOnesValue(ITy));
return true;
case Instruction::LShr:
case Instruction::Shl:
@@ -1391,17 +1399,17 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
// undef << X -> 0
// undef >> X -> 0
- markForcedConstant(I, Constant::getNullValue(ITy));
+ markForcedConstant(&I, Constant::getNullValue(ITy));
return true;
case Instruction::Select:
- Op1LV = getValueState(I->getOperand(1));
+ Op1LV = getValueState(I.getOperand(1));
// undef ? X : Y -> X or Y. There could be commonality between X/Y.
if (Op0LV.isUndefined()) {
if (!Op1LV.isConstant()) // Pick the constant one if there is any.
- Op1LV = getValueState(I->getOperand(2));
+ Op1LV = getValueState(I.getOperand(2));
} else if (Op1LV.isUndefined()) {
// c ? undef : undef -> undef. No change.
- Op1LV = getValueState(I->getOperand(2));
+ Op1LV = getValueState(I.getOperand(2));
if (Op1LV.isUndefined())
break;
// Otherwise, c ? undef : x -> x.
@@ -1410,9 +1418,9 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
}
if (Op1LV.isConstant())
- markForcedConstant(I, Op1LV.getConstant());
+ markForcedConstant(&I, Op1LV.getConstant());
else
- markOverdefined(I);
+ markOverdefined(&I);
return true;
case Instruction::Load:
// A load here means one of two things: a load of undef from a global,
@@ -1421,9 +1429,9 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
break;
case Instruction::ICmp:
// X == undef -> undef. Other comparisons get more complicated.
- if (cast<ICmpInst>(I)->isEquality())
+ if (cast<ICmpInst>(&I)->isEquality())
break;
- markOverdefined(I);
+ markOverdefined(&I);
return true;
case Instruction::Call:
case Instruction::Invoke: {
@@ -1432,19 +1440,19 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
// 2. It could be constant-foldable.
// Because of the way we solve return values, tracked calls must
// never be marked overdefined in ResolvedUndefsIn.
- if (Function *F = CallSite(I).getCalledFunction())
+ if (Function *F = CallSite(&I).getCalledFunction())
if (TrackedRetVals.count(F))
break;
// If the call is constant-foldable, we mark it overdefined because
// we do not know what return values are valid.
- markOverdefined(I);
+ markOverdefined(&I);
return true;
}
default:
// If we don't know what should happen here, conservatively mark it
// overdefined.
- markOverdefined(I);
+ markOverdefined(&I);
return true;
}
}
@@ -1462,7 +1470,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
// false.
if (isa<UndefValue>(BI->getCondition())) {
BI->setCondition(ConstantInt::getFalse(BI->getContext()));
- markEdgeExecutable(BB, TI->getSuccessor(1));
+ markEdgeExecutable(&*BB, TI->getSuccessor(1));
return true;
}
@@ -1484,7 +1492,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
// the first constant.
if (isa<UndefValue>(SI->getCondition())) {
SI->setCondition(SI->case_begin().getCaseValue());
- markEdgeExecutable(BB, SI->case_begin().getCaseSuccessor());
+ markEdgeExecutable(&*BB, SI->case_begin().getCaseSuccessor());
return true;
}
@@ -1506,6 +1514,7 @@ namespace {
struct SCCP : public FunctionPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
}
static char ID; // Pass identification, replacement for typeid
SCCP() : FunctionPass(ID) {
@@ -1541,11 +1550,10 @@ static void DeleteInstructionInBlock(BasicBlock *BB) {
Instruction *EndInst = BB->getTerminator(); // Last not to be deleted.
while (EndInst != BB->begin()) {
// Delete the next to last instruction.
- BasicBlock::iterator I = EndInst;
- Instruction *Inst = --I;
+ Instruction *Inst = &*--EndInst->getIterator();
if (!Inst->use_empty())
Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
- if (isa<LandingPadInst>(Inst)) {
+ if (Inst->isEHPad()) {
EndInst = Inst;
continue;
}
@@ -1568,11 +1576,11 @@ bool SCCP::runOnFunction(Function &F) {
SCCPSolver Solver(DL, TLI);
// Mark the first block of the function as being executable.
- Solver.MarkBlockExecutable(F.begin());
+ Solver.MarkBlockExecutable(&F.front());
// Mark all arguments to the function as being overdefined.
- for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end(); AI != E;++AI)
- Solver.markAnythingOverdefined(AI);
+ for (Argument &AI : F.args())
+ Solver.markAnythingOverdefined(&AI);
// Solve for constants.
bool ResolvedUndefs = true;
@@ -1589,8 +1597,8 @@ bool SCCP::runOnFunction(Function &F) {
// as we cannot modify the CFG of the function.
for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
- if (!Solver.isBlockExecutable(BB)) {
- DeleteInstructionInBlock(BB);
+ if (!Solver.isBlockExecutable(&*BB)) {
+ DeleteInstructionInBlock(&*BB);
MadeChanges = true;
continue;
}
@@ -1599,7 +1607,7 @@ bool SCCP::runOnFunction(Function &F) {
// constants if we have found them to be of constant values.
//
for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) {
- Instruction *Inst = BI++;
+ Instruction *Inst = &*BI++;
if (Inst->getType()->isVoidTy() || isa<TerminatorInst>(Inst))
continue;
@@ -1713,36 +1721,34 @@ bool IPSCCP::runOnModule(Module &M) {
// If this is a strong or ODR definition of this function, then we can
// propagate information about its result into callsites of it.
if (!F->mayBeOverridden())
- Solver.AddTrackedFunction(F);
+ Solver.AddTrackedFunction(&*F);
// If this function only has direct calls that we can see, we can track its
// arguments and return value aggressively, and can assume it is not called
// unless we see evidence to the contrary.
if (F->hasLocalLinkage()) {
- if (AddressIsTaken(F))
- AddressTakenFunctions.insert(F);
+ if (AddressIsTaken(&*F))
+ AddressTakenFunctions.insert(&*F);
else {
- Solver.AddArgumentTrackedFunction(F);
+ Solver.AddArgumentTrackedFunction(&*F);
continue;
}
}
// Assume the function is called.
- Solver.MarkBlockExecutable(F->begin());
+ Solver.MarkBlockExecutable(&F->front());
// Assume nothing about the incoming arguments.
- for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
- AI != E; ++AI)
- Solver.markAnythingOverdefined(AI);
+ for (Argument &AI : F->args())
+ Solver.markAnythingOverdefined(&AI);
}
// Loop over global variables. We inform the solver about any internal global
// variables that do not have their 'addresses taken'. If they don't have
// their addresses taken, we can propagate constants through them.
- for (Module::global_iterator G = M.global_begin(), E = M.global_end();
- G != E; ++G)
- if (!G->isConstant() && G->hasLocalLinkage() && !AddressIsTaken(G))
- Solver.TrackValueOfGlobalVariable(G);
+ for (GlobalVariable &G : M.globals())
+ if (!G.isConstant() && G.hasLocalLinkage() && !AddressIsTaken(&G))
+ Solver.TrackValueOfGlobalVariable(&G);
// Solve for constants.
bool ResolvedUndefs = true;
@@ -1763,7 +1769,10 @@ bool IPSCCP::runOnModule(Module &M) {
SmallVector<BasicBlock*, 512> BlocksToErase;
for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
- if (Solver.isBlockExecutable(F->begin())) {
+ if (F->isDeclaration())
+ continue;
+
+ if (Solver.isBlockExecutable(&F->front())) {
for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
AI != E; ++AI) {
if (AI->use_empty() || AI->getType()->isStructTy()) continue;
@@ -1771,7 +1780,7 @@ bool IPSCCP::runOnModule(Module &M) {
// TODO: Could use getStructLatticeValueFor to find out if the entire
// result is a constant and replace it entirely if so.
- LatticeVal IV = Solver.getLatticeValueFor(AI);
+ LatticeVal IV = Solver.getLatticeValueFor(&*AI);
if (IV.isOverdefined()) continue;
Constant *CST = IV.isConstant() ?
@@ -1786,28 +1795,27 @@ bool IPSCCP::runOnModule(Module &M) {
}
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
- if (!Solver.isBlockExecutable(BB)) {
- DeleteInstructionInBlock(BB);
+ if (!Solver.isBlockExecutable(&*BB)) {
+ DeleteInstructionInBlock(&*BB);
MadeChanges = true;
TerminatorInst *TI = BB->getTerminator();
- for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
- BasicBlock *Succ = TI->getSuccessor(i);
+ for (BasicBlock *Succ : TI->successors()) {
if (!Succ->empty() && isa<PHINode>(Succ->begin()))
- TI->getSuccessor(i)->removePredecessor(BB);
+ Succ->removePredecessor(&*BB);
}
if (!TI->use_empty())
TI->replaceAllUsesWith(UndefValue::get(TI->getType()));
TI->eraseFromParent();
- new UnreachableInst(M.getContext(), BB);
+ new UnreachableInst(M.getContext(), &*BB);
if (&*BB != &F->front())
- BlocksToErase.push_back(BB);
+ BlocksToErase.push_back(&*BB);
continue;
}
for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) {
- Instruction *Inst = BI++;
+ Instruction *Inst = &*BI++;
if (Inst->getType()->isVoidTy() || Inst->getType()->isStructTy())
continue;
diff --git a/contrib/llvm/lib/Transforms/Scalar/SROA.cpp b/contrib/llvm/lib/Transforms/Scalar/SROA.cpp
index 947513a..a7361b5 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -23,12 +23,12 @@
///
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/SROA.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/PtrUseVisitor.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -37,8 +37,6 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/Instructions.h"
@@ -53,9 +51,9 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/TimeValue.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
-#include "llvm/Transforms/Utils/SSAUpdater.h"
#if __cplusplus >= 201103L && !defined(NDEBUG)
// We only use this for a debug check in C++11
@@ -63,6 +61,7 @@
#endif
using namespace llvm;
+using namespace llvm::sroa;
#define DEBUG_TYPE "sroa"
@@ -77,11 +76,6 @@ STATISTIC(NumLoadsSpeculated, "Number of loads speculated to allow promotion");
STATISTIC(NumDeleted, "Number of instructions deleted");
STATISTIC(NumVectorized, "Number of vectorized aggregates");
-/// Hidden option to force the pass to not use DomTree and mem2reg, instead
-/// forming SSA values through the SSAUpdater infrastructure.
-static cl::opt<bool> ForceSSAUpdater("force-ssa-updater", cl::init(false),
- cl::Hidden);
-
/// Hidden option to enable randomly shuffling the slices to help uncover
/// instability in their order.
static cl::opt<bool> SROARandomShuffleSlices("sroa-random-shuffle-slices",
@@ -205,7 +199,6 @@ template <typename T> struct isPodLike;
template <> struct isPodLike<Slice> { static const bool value = true; };
}
-namespace {
/// \brief Representation of the alloca slices.
///
/// This class represents the slices of an alloca which are formed by its
@@ -213,7 +206,7 @@ namespace {
/// for the slices used and we reflect that in this structure. The uses are
/// stored, sorted by increasing beginning offset and with unsplittable slices
/// starting at a particular offset before splittable slices.
-class AllocaSlices {
+class llvm::sroa::AllocaSlices {
public:
/// \brief Construct the slices of a particular alloca.
AllocaSlices(const DataLayout &DL, AllocaInst &AI);
@@ -253,281 +246,10 @@ public:
std::inplace_merge(Slices.begin(), SliceI, Slices.end());
}
- // Forward declare an iterator to befriend it.
+ // Forward declare the iterator and range accessor for walking the
+ // partitions.
class partition_iterator;
-
- /// \brief A partition of the slices.
- ///
- /// An ephemeral representation for a range of slices which can be viewed as
- /// a partition of the alloca. This range represents a span of the alloca's
- /// memory which cannot be split, and provides access to all of the slices
- /// overlapping some part of the partition.
- ///
- /// Objects of this type are produced by traversing the alloca's slices, but
- /// are only ephemeral and not persistent.
- class Partition {
- private:
- friend class AllocaSlices;
- friend class AllocaSlices::partition_iterator;
-
- /// \brief The begining and ending offsets of the alloca for this partition.
- uint64_t BeginOffset, EndOffset;
-
- /// \brief The start end end iterators of this partition.
- iterator SI, SJ;
-
- /// \brief A collection of split slice tails overlapping the partition.
- SmallVector<Slice *, 4> SplitTails;
-
- /// \brief Raw constructor builds an empty partition starting and ending at
- /// the given iterator.
- Partition(iterator SI) : SI(SI), SJ(SI) {}
-
- public:
- /// \brief The start offset of this partition.
- ///
- /// All of the contained slices start at or after this offset.
- uint64_t beginOffset() const { return BeginOffset; }
-
- /// \brief The end offset of this partition.
- ///
- /// All of the contained slices end at or before this offset.
- uint64_t endOffset() const { return EndOffset; }
-
- /// \brief The size of the partition.
- ///
- /// Note that this can never be zero.
- uint64_t size() const {
- assert(BeginOffset < EndOffset && "Partitions must span some bytes!");
- return EndOffset - BeginOffset;
- }
-
- /// \brief Test whether this partition contains no slices, and merely spans
- /// a region occupied by split slices.
- bool empty() const { return SI == SJ; }
-
- /// \name Iterate slices that start within the partition.
- /// These may be splittable or unsplittable. They have a begin offset >= the
- /// partition begin offset.
- /// @{
- // FIXME: We should probably define a "concat_iterator" helper and use that
- // to stitch together pointee_iterators over the split tails and the
- // contiguous iterators of the partition. That would give a much nicer
- // interface here. We could then additionally expose filtered iterators for
- // split, unsplit, and unsplittable splices based on the usage patterns.
- iterator begin() const { return SI; }
- iterator end() const { return SJ; }
- /// @}
-
- /// \brief Get the sequence of split slice tails.
- ///
- /// These tails are of slices which start before this partition but are
- /// split and overlap into the partition. We accumulate these while forming
- /// partitions.
- ArrayRef<Slice *> splitSliceTails() const { return SplitTails; }
- };
-
- /// \brief An iterator over partitions of the alloca's slices.
- ///
- /// This iterator implements the core algorithm for partitioning the alloca's
- /// slices. It is a forward iterator as we don't support backtracking for
- /// efficiency reasons, and re-use a single storage area to maintain the
- /// current set of split slices.
- ///
- /// It is templated on the slice iterator type to use so that it can operate
- /// with either const or non-const slice iterators.
- class partition_iterator
- : public iterator_facade_base<partition_iterator,
- std::forward_iterator_tag, Partition> {
- friend class AllocaSlices;
-
- /// \brief Most of the state for walking the partitions is held in a class
- /// with a nice interface for examining them.
- Partition P;
-
- /// \brief We need to keep the end of the slices to know when to stop.
- AllocaSlices::iterator SE;
-
- /// \brief We also need to keep track of the maximum split end offset seen.
- /// FIXME: Do we really?
- uint64_t MaxSplitSliceEndOffset;
-
- /// \brief Sets the partition to be empty at given iterator, and sets the
- /// end iterator.
- partition_iterator(AllocaSlices::iterator SI, AllocaSlices::iterator SE)
- : P(SI), SE(SE), MaxSplitSliceEndOffset(0) {
- // If not already at the end, advance our state to form the initial
- // partition.
- if (SI != SE)
- advance();
- }
-
- /// \brief Advance the iterator to the next partition.
- ///
- /// Requires that the iterator not be at the end of the slices.
- void advance() {
- assert((P.SI != SE || !P.SplitTails.empty()) &&
- "Cannot advance past the end of the slices!");
-
- // Clear out any split uses which have ended.
- if (!P.SplitTails.empty()) {
- if (P.EndOffset >= MaxSplitSliceEndOffset) {
- // If we've finished all splits, this is easy.
- P.SplitTails.clear();
- MaxSplitSliceEndOffset = 0;
- } else {
- // Remove the uses which have ended in the prior partition. This
- // cannot change the max split slice end because we just checked that
- // the prior partition ended prior to that max.
- P.SplitTails.erase(
- std::remove_if(
- P.SplitTails.begin(), P.SplitTails.end(),
- [&](Slice *S) { return S->endOffset() <= P.EndOffset; }),
- P.SplitTails.end());
- assert(std::any_of(P.SplitTails.begin(), P.SplitTails.end(),
- [&](Slice *S) {
- return S->endOffset() == MaxSplitSliceEndOffset;
- }) &&
- "Could not find the current max split slice offset!");
- assert(std::all_of(P.SplitTails.begin(), P.SplitTails.end(),
- [&](Slice *S) {
- return S->endOffset() <= MaxSplitSliceEndOffset;
- }) &&
- "Max split slice end offset is not actually the max!");
- }
- }
-
- // If P.SI is already at the end, then we've cleared the split tail and
- // now have an end iterator.
- if (P.SI == SE) {
- assert(P.SplitTails.empty() && "Failed to clear the split slices!");
- return;
- }
-
- // If we had a non-empty partition previously, set up the state for
- // subsequent partitions.
- if (P.SI != P.SJ) {
- // Accumulate all the splittable slices which started in the old
- // partition into the split list.
- for (Slice &S : P)
- if (S.isSplittable() && S.endOffset() > P.EndOffset) {
- P.SplitTails.push_back(&S);
- MaxSplitSliceEndOffset =
- std::max(S.endOffset(), MaxSplitSliceEndOffset);
- }
-
- // Start from the end of the previous partition.
- P.SI = P.SJ;
-
- // If P.SI is now at the end, we at most have a tail of split slices.
- if (P.SI == SE) {
- P.BeginOffset = P.EndOffset;
- P.EndOffset = MaxSplitSliceEndOffset;
- return;
- }
-
- // If the we have split slices and the next slice is after a gap and is
- // not splittable immediately form an empty partition for the split
- // slices up until the next slice begins.
- if (!P.SplitTails.empty() && P.SI->beginOffset() != P.EndOffset &&
- !P.SI->isSplittable()) {
- P.BeginOffset = P.EndOffset;
- P.EndOffset = P.SI->beginOffset();
- return;
- }
- }
-
- // OK, we need to consume new slices. Set the end offset based on the
- // current slice, and step SJ past it. The beginning offset of the
- // parttion is the beginning offset of the next slice unless we have
- // pre-existing split slices that are continuing, in which case we begin
- // at the prior end offset.
- P.BeginOffset = P.SplitTails.empty() ? P.SI->beginOffset() : P.EndOffset;
- P.EndOffset = P.SI->endOffset();
- ++P.SJ;
-
- // There are two strategies to form a partition based on whether the
- // partition starts with an unsplittable slice or a splittable slice.
- if (!P.SI->isSplittable()) {
- // When we're forming an unsplittable region, it must always start at
- // the first slice and will extend through its end.
- assert(P.BeginOffset == P.SI->beginOffset());
-
- // Form a partition including all of the overlapping slices with this
- // unsplittable slice.
- while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
- if (!P.SJ->isSplittable())
- P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
- ++P.SJ;
- }
-
- // We have a partition across a set of overlapping unsplittable
- // partitions.
- return;
- }
-
- // If we're starting with a splittable slice, then we need to form
- // a synthetic partition spanning it and any other overlapping splittable
- // splices.
- assert(P.SI->isSplittable() && "Forming a splittable partition!");
-
- // Collect all of the overlapping splittable slices.
- while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset &&
- P.SJ->isSplittable()) {
- P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
- ++P.SJ;
- }
-
- // Back upiP.EndOffset if we ended the span early when encountering an
- // unsplittable slice. This synthesizes the early end offset of
- // a partition spanning only splittable slices.
- if (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
- assert(!P.SJ->isSplittable());
- P.EndOffset = P.SJ->beginOffset();
- }
- }
-
- public:
- bool operator==(const partition_iterator &RHS) const {
- assert(SE == RHS.SE &&
- "End iterators don't match between compared partition iterators!");
-
- // The observed positions of partitions is marked by the P.SI iterator and
- // the emptyness of the split slices. The latter is only relevant when
- // P.SI == SE, as the end iterator will additionally have an empty split
- // slices list, but the prior may have the same P.SI and a tail of split
- // slices.
- if (P.SI == RHS.P.SI &&
- P.SplitTails.empty() == RHS.P.SplitTails.empty()) {
- assert(P.SJ == RHS.P.SJ &&
- "Same set of slices formed two different sized partitions!");
- assert(P.SplitTails.size() == RHS.P.SplitTails.size() &&
- "Same slice position with differently sized non-empty split "
- "slice tails!");
- return true;
- }
- return false;
- }
-
- partition_iterator &operator++() {
- advance();
- return *this;
- }
-
- Partition &operator*() { return P; }
- };
-
- /// \brief A forward range over the partitions of the alloca's slices.
- ///
- /// This accesses an iterator range over the partitions of the alloca's
- /// slices. It computes these partitions on the fly based on the overlapping
- /// offsets of the slices and the ability to split them. It will visit "empty"
- /// partitions to cover regions of the alloca only accessed via split
- /// slices.
- iterator_range<partition_iterator> partitions() {
- return make_range(partition_iterator(begin(), end()),
- partition_iterator(end(), end()));
- }
+ iterator_range<partition_iterator> partitions();
/// \brief Access the dead users for this alloca.
ArrayRef<Instruction *> getDeadUsers() const { return DeadUsers; }
@@ -595,6 +317,280 @@ private:
/// the alloca.
SmallVector<Use *, 8> DeadOperands;
};
+
+/// \brief A partition of the slices.
+///
+/// An ephemeral representation for a range of slices which can be viewed as
+/// a partition of the alloca. This range represents a span of the alloca's
+/// memory which cannot be split, and provides access to all of the slices
+/// overlapping some part of the partition.
+///
+/// Objects of this type are produced by traversing the alloca's slices, but
+/// are only ephemeral and not persistent.
+class llvm::sroa::Partition {
+private:
+ friend class AllocaSlices;
+ friend class AllocaSlices::partition_iterator;
+
+ typedef AllocaSlices::iterator iterator;
+
+ /// \brief The beginning and ending offsets of the alloca for this
+ /// partition.
+ uint64_t BeginOffset, EndOffset;
+
+ /// \brief The start end end iterators of this partition.
+ iterator SI, SJ;
+
+ /// \brief A collection of split slice tails overlapping the partition.
+ SmallVector<Slice *, 4> SplitTails;
+
+ /// \brief Raw constructor builds an empty partition starting and ending at
+ /// the given iterator.
+ Partition(iterator SI) : SI(SI), SJ(SI) {}
+
+public:
+ /// \brief The start offset of this partition.
+ ///
+ /// All of the contained slices start at or after this offset.
+ uint64_t beginOffset() const { return BeginOffset; }
+
+ /// \brief The end offset of this partition.
+ ///
+ /// All of the contained slices end at or before this offset.
+ uint64_t endOffset() const { return EndOffset; }
+
+ /// \brief The size of the partition.
+ ///
+ /// Note that this can never be zero.
+ uint64_t size() const {
+ assert(BeginOffset < EndOffset && "Partitions must span some bytes!");
+ return EndOffset - BeginOffset;
+ }
+
+ /// \brief Test whether this partition contains no slices, and merely spans
+ /// a region occupied by split slices.
+ bool empty() const { return SI == SJ; }
+
+ /// \name Iterate slices that start within the partition.
+ /// These may be splittable or unsplittable. They have a begin offset >= the
+ /// partition begin offset.
+ /// @{
+ // FIXME: We should probably define a "concat_iterator" helper and use that
+ // to stitch together pointee_iterators over the split tails and the
+ // contiguous iterators of the partition. That would give a much nicer
+ // interface here. We could then additionally expose filtered iterators for
+ // split, unsplit, and unsplittable splices based on the usage patterns.
+ iterator begin() const { return SI; }
+ iterator end() const { return SJ; }
+ /// @}
+
+ /// \brief Get the sequence of split slice tails.
+ ///
+ /// These tails are of slices which start before this partition but are
+ /// split and overlap into the partition. We accumulate these while forming
+ /// partitions.
+ ArrayRef<Slice *> splitSliceTails() const { return SplitTails; }
+};
+
+/// \brief An iterator over partitions of the alloca's slices.
+///
+/// This iterator implements the core algorithm for partitioning the alloca's
+/// slices. It is a forward iterator as we don't support backtracking for
+/// efficiency reasons, and re-use a single storage area to maintain the
+/// current set of split slices.
+///
+/// It is templated on the slice iterator type to use so that it can operate
+/// with either const or non-const slice iterators.
+class AllocaSlices::partition_iterator
+ : public iterator_facade_base<partition_iterator, std::forward_iterator_tag,
+ Partition> {
+ friend class AllocaSlices;
+
+ /// \brief Most of the state for walking the partitions is held in a class
+ /// with a nice interface for examining them.
+ Partition P;
+
+ /// \brief We need to keep the end of the slices to know when to stop.
+ AllocaSlices::iterator SE;
+
+ /// \brief We also need to keep track of the maximum split end offset seen.
+ /// FIXME: Do we really?
+ uint64_t MaxSplitSliceEndOffset;
+
+ /// \brief Sets the partition to be empty at given iterator, and sets the
+ /// end iterator.
+ partition_iterator(AllocaSlices::iterator SI, AllocaSlices::iterator SE)
+ : P(SI), SE(SE), MaxSplitSliceEndOffset(0) {
+ // If not already at the end, advance our state to form the initial
+ // partition.
+ if (SI != SE)
+ advance();
+ }
+
+ /// \brief Advance the iterator to the next partition.
+ ///
+ /// Requires that the iterator not be at the end of the slices.
+ void advance() {
+ assert((P.SI != SE || !P.SplitTails.empty()) &&
+ "Cannot advance past the end of the slices!");
+
+ // Clear out any split uses which have ended.
+ if (!P.SplitTails.empty()) {
+ if (P.EndOffset >= MaxSplitSliceEndOffset) {
+ // If we've finished all splits, this is easy.
+ P.SplitTails.clear();
+ MaxSplitSliceEndOffset = 0;
+ } else {
+ // Remove the uses which have ended in the prior partition. This
+ // cannot change the max split slice end because we just checked that
+ // the prior partition ended prior to that max.
+ P.SplitTails.erase(
+ std::remove_if(
+ P.SplitTails.begin(), P.SplitTails.end(),
+ [&](Slice *S) { return S->endOffset() <= P.EndOffset; }),
+ P.SplitTails.end());
+ assert(std::any_of(P.SplitTails.begin(), P.SplitTails.end(),
+ [&](Slice *S) {
+ return S->endOffset() == MaxSplitSliceEndOffset;
+ }) &&
+ "Could not find the current max split slice offset!");
+ assert(std::all_of(P.SplitTails.begin(), P.SplitTails.end(),
+ [&](Slice *S) {
+ return S->endOffset() <= MaxSplitSliceEndOffset;
+ }) &&
+ "Max split slice end offset is not actually the max!");
+ }
+ }
+
+ // If P.SI is already at the end, then we've cleared the split tail and
+ // now have an end iterator.
+ if (P.SI == SE) {
+ assert(P.SplitTails.empty() && "Failed to clear the split slices!");
+ return;
+ }
+
+ // If we had a non-empty partition previously, set up the state for
+ // subsequent partitions.
+ if (P.SI != P.SJ) {
+ // Accumulate all the splittable slices which started in the old
+ // partition into the split list.
+ for (Slice &S : P)
+ if (S.isSplittable() && S.endOffset() > P.EndOffset) {
+ P.SplitTails.push_back(&S);
+ MaxSplitSliceEndOffset =
+ std::max(S.endOffset(), MaxSplitSliceEndOffset);
+ }
+
+ // Start from the end of the previous partition.
+ P.SI = P.SJ;
+
+ // If P.SI is now at the end, we at most have a tail of split slices.
+ if (P.SI == SE) {
+ P.BeginOffset = P.EndOffset;
+ P.EndOffset = MaxSplitSliceEndOffset;
+ return;
+ }
+
+ // If the we have split slices and the next slice is after a gap and is
+ // not splittable immediately form an empty partition for the split
+ // slices up until the next slice begins.
+ if (!P.SplitTails.empty() && P.SI->beginOffset() != P.EndOffset &&
+ !P.SI->isSplittable()) {
+ P.BeginOffset = P.EndOffset;
+ P.EndOffset = P.SI->beginOffset();
+ return;
+ }
+ }
+
+ // OK, we need to consume new slices. Set the end offset based on the
+ // current slice, and step SJ past it. The beginning offset of the
+ // partition is the beginning offset of the next slice unless we have
+ // pre-existing split slices that are continuing, in which case we begin
+ // at the prior end offset.
+ P.BeginOffset = P.SplitTails.empty() ? P.SI->beginOffset() : P.EndOffset;
+ P.EndOffset = P.SI->endOffset();
+ ++P.SJ;
+
+ // There are two strategies to form a partition based on whether the
+ // partition starts with an unsplittable slice or a splittable slice.
+ if (!P.SI->isSplittable()) {
+ // When we're forming an unsplittable region, it must always start at
+ // the first slice and will extend through its end.
+ assert(P.BeginOffset == P.SI->beginOffset());
+
+ // Form a partition including all of the overlapping slices with this
+ // unsplittable slice.
+ while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
+ if (!P.SJ->isSplittable())
+ P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
+ ++P.SJ;
+ }
+
+ // We have a partition across a set of overlapping unsplittable
+ // partitions.
+ return;
+ }
+
+ // If we're starting with a splittable slice, then we need to form
+ // a synthetic partition spanning it and any other overlapping splittable
+ // splices.
+ assert(P.SI->isSplittable() && "Forming a splittable partition!");
+
+ // Collect all of the overlapping splittable slices.
+ while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset &&
+ P.SJ->isSplittable()) {
+ P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
+ ++P.SJ;
+ }
+
+ // Back upiP.EndOffset if we ended the span early when encountering an
+ // unsplittable slice. This synthesizes the early end offset of
+ // a partition spanning only splittable slices.
+ if (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
+ assert(!P.SJ->isSplittable());
+ P.EndOffset = P.SJ->beginOffset();
+ }
+ }
+
+public:
+ bool operator==(const partition_iterator &RHS) const {
+ assert(SE == RHS.SE &&
+ "End iterators don't match between compared partition iterators!");
+
+ // The observed positions of partitions is marked by the P.SI iterator and
+ // the emptiness of the split slices. The latter is only relevant when
+ // P.SI == SE, as the end iterator will additionally have an empty split
+ // slices list, but the prior may have the same P.SI and a tail of split
+ // slices.
+ if (P.SI == RHS.P.SI && P.SplitTails.empty() == RHS.P.SplitTails.empty()) {
+ assert(P.SJ == RHS.P.SJ &&
+ "Same set of slices formed two different sized partitions!");
+ assert(P.SplitTails.size() == RHS.P.SplitTails.size() &&
+ "Same slice position with differently sized non-empty split "
+ "slice tails!");
+ return true;
+ }
+ return false;
+ }
+
+ partition_iterator &operator++() {
+ advance();
+ return *this;
+ }
+
+ Partition &operator*() { return P; }
+};
+
+/// \brief A forward range over the partitions of the alloca's slices.
+///
+/// This accesses an iterator range over the partitions of the alloca's
+/// slices. It computes these partitions on the fly based on the overlapping
+/// offsets of the slices and the ability to split them. It will visit "empty"
+/// partitions to cover regions of the alloca only accessed via split
+/// slices.
+iterator_range<AllocaSlices::partition_iterator> AllocaSlices::partitions() {
+ return make_range(partition_iterator(begin(), end()),
+ partition_iterator(end(), end()));
}
static Value *foldSelectInst(SelectInst &SI) {
@@ -1072,217 +1068,6 @@ LLVM_DUMP_METHOD void AllocaSlices::dump() const { print(dbgs()); }
#endif // !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-namespace {
-/// \brief Implementation of LoadAndStorePromoter for promoting allocas.
-///
-/// This subclass of LoadAndStorePromoter adds overrides to handle promoting
-/// the loads and stores of an alloca instruction, as well as updating its
-/// debug information. This is used when a domtree is unavailable and thus
-/// mem2reg in its full form can't be used to handle promotion of allocas to
-/// scalar values.
-class AllocaPromoter : public LoadAndStorePromoter {
- AllocaInst &AI;
- DIBuilder &DIB;
-
- SmallVector<DbgDeclareInst *, 4> DDIs;
- SmallVector<DbgValueInst *, 4> DVIs;
-
-public:
- AllocaPromoter(ArrayRef<const Instruction *> Insts,
- SSAUpdater &S,
- AllocaInst &AI, DIBuilder &DIB)
- : LoadAndStorePromoter(Insts, S), AI(AI), DIB(DIB) {}
-
- void run(const SmallVectorImpl<Instruction *> &Insts) {
- // Retain the debug information attached to the alloca for use when
- // rewriting loads and stores.
- if (auto *L = LocalAsMetadata::getIfExists(&AI)) {
- if (auto *DINode = MetadataAsValue::getIfExists(AI.getContext(), L)) {
- for (User *U : DINode->users())
- if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U))
- DDIs.push_back(DDI);
- else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(U))
- DVIs.push_back(DVI);
- }
- }
-
- LoadAndStorePromoter::run(Insts);
-
- // While we have the debug information, clear it off of the alloca. The
- // caller takes care of deleting the alloca.
- while (!DDIs.empty())
- DDIs.pop_back_val()->eraseFromParent();
- while (!DVIs.empty())
- DVIs.pop_back_val()->eraseFromParent();
- }
-
- bool
- isInstInList(Instruction *I,
- const SmallVectorImpl<Instruction *> &Insts) const override {
- Value *Ptr;
- if (LoadInst *LI = dyn_cast<LoadInst>(I))
- Ptr = LI->getOperand(0);
- else
- Ptr = cast<StoreInst>(I)->getPointerOperand();
-
- // Only used to detect cycles, which will be rare and quickly found as
- // we're walking up a chain of defs rather than down through uses.
- SmallPtrSet<Value *, 4> Visited;
-
- do {
- if (Ptr == &AI)
- return true;
-
- if (BitCastInst *BCI = dyn_cast<BitCastInst>(Ptr))
- Ptr = BCI->getOperand(0);
- else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr))
- Ptr = GEPI->getPointerOperand();
- else
- return false;
-
- } while (Visited.insert(Ptr).second);
-
- return false;
- }
-
- void updateDebugInfo(Instruction *Inst) const override {
- for (DbgDeclareInst *DDI : DDIs)
- if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
- ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
- else if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
- ConvertDebugDeclareToDebugValue(DDI, LI, DIB);
- for (DbgValueInst *DVI : DVIs) {
- Value *Arg = nullptr;
- if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
- // If an argument is zero extended then use argument directly. The ZExt
- // may be zapped by an optimization pass in future.
- if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0)))
- Arg = dyn_cast<Argument>(ZExt->getOperand(0));
- else if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
- Arg = dyn_cast<Argument>(SExt->getOperand(0));
- if (!Arg)
- Arg = SI->getValueOperand();
- } else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
- Arg = LI->getPointerOperand();
- } else {
- continue;
- }
- DIB.insertDbgValueIntrinsic(Arg, 0, DVI->getVariable(),
- DVI->getExpression(), DVI->getDebugLoc(),
- Inst);
- }
- }
-};
-} // end anon namespace
-
-namespace {
-/// \brief An optimization pass providing Scalar Replacement of Aggregates.
-///
-/// This pass takes allocations which can be completely analyzed (that is, they
-/// don't escape) and tries to turn them into scalar SSA values. There are
-/// a few steps to this process.
-///
-/// 1) It takes allocations of aggregates and analyzes the ways in which they
-/// are used to try to split them into smaller allocations, ideally of
-/// a single scalar data type. It will split up memcpy and memset accesses
-/// as necessary and try to isolate individual scalar accesses.
-/// 2) It will transform accesses into forms which are suitable for SSA value
-/// promotion. This can be replacing a memset with a scalar store of an
-/// integer value, or it can involve speculating operations on a PHI or
-/// select to be a PHI or select of the results.
-/// 3) Finally, this will try to detect a pattern of accesses which map cleanly
-/// onto insert and extract operations on a vector value, and convert them to
-/// this form. By doing so, it will enable promotion of vector aggregates to
-/// SSA vector values.
-class SROA : public FunctionPass {
- const bool RequiresDomTree;
-
- LLVMContext *C;
- DominatorTree *DT;
- AssumptionCache *AC;
-
- /// \brief Worklist of alloca instructions to simplify.
- ///
- /// Each alloca in the function is added to this. Each new alloca formed gets
- /// added to it as well to recursively simplify unless that alloca can be
- /// directly promoted. Finally, each time we rewrite a use of an alloca other
- /// the one being actively rewritten, we add it back onto the list if not
- /// already present to ensure it is re-visited.
- SetVector<AllocaInst *, SmallVector<AllocaInst *, 16>> Worklist;
-
- /// \brief A collection of instructions to delete.
- /// We try to batch deletions to simplify code and make things a bit more
- /// efficient.
- SetVector<Instruction *, SmallVector<Instruction *, 8>> DeadInsts;
-
- /// \brief Post-promotion worklist.
- ///
- /// Sometimes we discover an alloca which has a high probability of becoming
- /// viable for SROA after a round of promotion takes place. In those cases,
- /// the alloca is enqueued here for re-processing.
- ///
- /// Note that we have to be very careful to clear allocas out of this list in
- /// the event they are deleted.
- SetVector<AllocaInst *, SmallVector<AllocaInst *, 16>> PostPromotionWorklist;
-
- /// \brief A collection of alloca instructions we can directly promote.
- std::vector<AllocaInst *> PromotableAllocas;
-
- /// \brief A worklist of PHIs to speculate prior to promoting allocas.
- ///
- /// All of these PHIs have been checked for the safety of speculation and by
- /// being speculated will allow promoting allocas currently in the promotable
- /// queue.
- SetVector<PHINode *, SmallVector<PHINode *, 2>> SpeculatablePHIs;
-
- /// \brief A worklist of select instructions to speculate prior to promoting
- /// allocas.
- ///
- /// All of these select instructions have been checked for the safety of
- /// speculation and by being speculated will allow promoting allocas
- /// currently in the promotable queue.
- SetVector<SelectInst *, SmallVector<SelectInst *, 2>> SpeculatableSelects;
-
-public:
- SROA(bool RequiresDomTree = true)
- : FunctionPass(ID), RequiresDomTree(RequiresDomTree), C(nullptr),
- DT(nullptr) {
- initializeSROAPass(*PassRegistry::getPassRegistry());
- }
- bool runOnFunction(Function &F) override;
- void getAnalysisUsage(AnalysisUsage &AU) const override;
-
- const char *getPassName() const override { return "SROA"; }
- static char ID;
-
-private:
- friend class PHIOrSelectSpeculator;
- friend class AllocaSliceRewriter;
-
- bool presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS);
- AllocaInst *rewritePartition(AllocaInst &AI, AllocaSlices &AS,
- AllocaSlices::Partition &P);
- bool splitAlloca(AllocaInst &AI, AllocaSlices &AS);
- bool runOnAlloca(AllocaInst &AI);
- void clobberUse(Use &U);
- void deleteDeadInstructions(SmallPtrSetImpl<AllocaInst *> &DeletedAllocas);
- bool promoteAllocas(Function &F);
-};
-}
-
-char SROA::ID = 0;
-
-FunctionPass *llvm::createSROAPass(bool RequiresDomTree) {
- return new SROA(RequiresDomTree);
-}
-
-INITIALIZE_PASS_BEGIN(SROA, "sroa", "Scalar Replacement Of Aggregates", false,
- false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_END(SROA, "sroa", "Scalar Replacement Of Aggregates", false,
- false)
-
/// Walk the range of a partitioning looking for a common type to cover this
/// sequence of slices.
static Type *findCommonType(AllocaSlices::const_iterator B,
@@ -1373,7 +1158,7 @@ static bool isSafePHIToSpeculate(PHINode &PN) {
// Ensure that there are no instructions between the PHI and the load that
// could store.
- for (BasicBlock::iterator BBI = &PN; &*BBI != LI; ++BBI)
+ for (BasicBlock::iterator BBI(PN); &*BBI != LI; ++BBI)
if (BBI->mayWriteToMemory())
return false;
@@ -1934,10 +1719,10 @@ static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
/// \brief Test whether the given slice use can be promoted to a vector.
///
-/// This function is called to test each entry in a partioning which is slated
+/// This function is called to test each entry in a partition which is slated
/// for a single slice.
-static bool isVectorPromotionViableForSlice(AllocaSlices::Partition &P,
- const Slice &S, VectorType *Ty,
+static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S,
+ VectorType *Ty,
uint64_t ElementSize,
const DataLayout &DL) {
// First validate the slice offsets.
@@ -2012,8 +1797,7 @@ static bool isVectorPromotionViableForSlice(AllocaSlices::Partition &P,
/// SSA value. We only can ensure this for a limited set of operations, and we
/// don't want to do the rewrites unless we are confident that the result will
/// be promotable, so we have an early test here.
-static VectorType *isVectorPromotionViable(AllocaSlices::Partition &P,
- const DataLayout &DL) {
+static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
// Collect the candidate types for vector-based promotion. Also track whether
// we have different element types.
SmallVector<VectorType *, 4> CandidateTys;
@@ -2130,7 +1914,7 @@ static bool isIntegerWideningViableForSlice(const Slice &S,
uint64_t RelEnd = S.endOffset() - AllocBeginOffset;
// We can't reasonably handle cases where the load or store extends past
- // the end of the aloca's type and into its padding.
+ // the end of the alloca's type and into its padding.
if (RelEnd > Size)
return false;
@@ -2199,7 +1983,7 @@ static bool isIntegerWideningViableForSlice(const Slice &S,
/// This is a quick test to check whether we can rewrite the integer loads and
/// stores to a particular alloca into wider loads and stores and be able to
/// promote the resulting alloca.
-static bool isIntegerWideningViable(AllocaSlices::Partition &P, Type *AllocaTy,
+static bool isIntegerWideningViable(Partition &P, Type *AllocaTy,
const DataLayout &DL) {
uint64_t SizeInBits = DL.getTypeSizeInBits(AllocaTy);
// Don't create integer types larger than the maximum bitwidth.
@@ -2368,14 +2152,14 @@ static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V,
return V;
}
-namespace {
/// \brief Visitor to rewrite instructions using p particular slice of an alloca
/// to use a new alloca.
///
/// Also implements the rewriting to vector-based accesses when the partition
/// passes the isVectorPromotionViable predicate. Most of the rewriting logic
/// lives here.
-class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
+class llvm::sroa::AllocaSliceRewriter
+ : public InstVisitor<AllocaSliceRewriter, bool> {
// Befriend the base class so it can delegate to private visit methods.
friend class llvm::InstVisitor<AllocaSliceRewriter, bool>;
typedef llvm::InstVisitor<AllocaSliceRewriter, bool> Base;
@@ -2583,9 +2367,19 @@ private:
V = convertValue(DL, IRB, V, IntTy);
assert(NewBeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
- if (Offset > 0 || NewEndOffset < NewAllocaEndOffset)
- V = extractInteger(DL, IRB, V, cast<IntegerType>(LI.getType()), Offset,
- "extract");
+ if (Offset > 0 || NewEndOffset < NewAllocaEndOffset) {
+ IntegerType *ExtractTy = Type::getIntNTy(LI.getContext(), SliceSize * 8);
+ V = extractInteger(DL, IRB, V, ExtractTy, Offset, "extract");
+ }
+ // It is possible that the extracted type is not the load type. This
+ // happens if there is a load past the end of the alloca, and as
+ // a consequence the slice is narrower but still a candidate for integer
+ // lowering. To handle this case, we just zero extend the extracted
+ // integer.
+ assert(cast<IntegerType>(LI.getType())->getBitWidth() >= SliceSize * 8 &&
+ "Can only handle an extract for an overly wide load");
+ if (cast<IntegerType>(LI.getType())->getBitWidth() > SliceSize * 8)
+ V = IRB.CreateZExt(V, LI.getType());
return V;
}
@@ -2648,7 +2442,7 @@ private:
DL.getTypeStoreSizeInBits(LI.getType()) &&
"Non-byte-multiple bit width");
// Move the insertion point just past the load so that we can refer to it.
- IRB.SetInsertPoint(std::next(BasicBlock::iterator(&LI)));
+ IRB.SetInsertPoint(&*std::next(BasicBlock::iterator(&LI)));
// Create a placeholder value with the same type as LI to use as the
// basis for the new value. This allows us to replace the uses of LI with
// the computed value, and then replace the placeholder with LI, leaving
@@ -3126,7 +2920,7 @@ private:
// dominate the PHI.
IRBuilderTy PtrBuilder(IRB);
if (isa<PHINode>(OldPtr))
- PtrBuilder.SetInsertPoint(OldPtr->getParent()->getFirstInsertionPt());
+ PtrBuilder.SetInsertPoint(&*OldPtr->getParent()->getFirstInsertionPt());
else
PtrBuilder.SetInsertPoint(OldPtr);
PtrBuilder.SetCurrentDebugLocation(OldPtr->getDebugLoc());
@@ -3169,7 +2963,6 @@ private:
return true;
}
};
-}
namespace {
/// \brief Visitor to rewrite aggregate loads and stores as scalar.
@@ -3181,8 +2974,6 @@ class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
// Befriend the base class so it can delegate to private visit methods.
friend class llvm::InstVisitor<AggLoadStoreRewriter, bool>;
- const DataLayout &DL;
-
/// Queue of pointer uses to analyze and potentially rewrite.
SmallVector<Use *, 8> Queue;
@@ -3194,8 +2985,6 @@ class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
Use *U;
public:
- AggLoadStoreRewriter(const DataLayout &DL) : DL(DL) {}
-
/// Rewrite loads and stores through a pointer and all pointers derived from
/// it.
bool rewrite(Instruction &I) {
@@ -3711,7 +3500,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
return true;
}),
Stores.end());
- // Now we have to go *back* through all te stores, because a later store may
+ // Now we have to go *back* through all the stores, because a later store may
// have caused an earlier store's load to become unsplittable and if it is
// unsplittable for the later store, then we can't rely on it being split in
// the earlier store either.
@@ -3773,7 +3562,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
"Cannot represent alloca access size using 64-bit integers!");
Instruction *BasePtr = cast<Instruction>(LI->getPointerOperand());
- IRB.SetInsertPoint(BasicBlock::iterator(LI));
+ IRB.SetInsertPoint(LI);
DEBUG(dbgs() << " Splitting load: " << *LI << "\n");
@@ -3825,7 +3614,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
}
Value *StoreBasePtr = SI->getPointerOperand();
- IRB.SetInsertPoint(BasicBlock::iterator(SI));
+ IRB.SetInsertPoint(SI);
DEBUG(dbgs() << " Splitting store of load: " << *SI << "\n");
@@ -3914,7 +3703,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
if (SplitLoads) {
PLoad = (*SplitLoads)[Idx];
} else {
- IRB.SetInsertPoint(BasicBlock::iterator(LI));
+ IRB.SetInsertPoint(LI);
PLoad = IRB.CreateAlignedLoad(
getAdjustedPtr(IRB, DL, LoadBasePtr,
APInt(DL.getPointerSizeInBits(), PartOffset),
@@ -3924,7 +3713,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
}
// And store this partition.
- IRB.SetInsertPoint(BasicBlock::iterator(SI));
+ IRB.SetInsertPoint(SI);
StoreInst *PStore = IRB.CreateAlignedStore(
PLoad, getAdjustedPtr(IRB, DL, StoreBasePtr,
APInt(DL.getPointerSizeInBits(), PartOffset),
@@ -3972,7 +3761,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
// Mark the original store as dead now that we've split it up and kill its
// slice. Note that we leave the original load in place unless this store
- // was its ownly use. It may in turn be split up if it is an alloca load
+ // was its only use. It may in turn be split up if it is an alloca load
// for some other alloca, but it may be a normal load. This may introduce
// redundant loads, but where those can be merged the rest of the optimizer
// should handle the merging, and this uncovers SSA splits which is more
@@ -4024,7 +3813,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
/// at enabling promotion and if it was successful queues the alloca to be
/// promoted.
AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
- AllocaSlices::Partition &P) {
+ Partition &P) {
// Try to compute a friendly type for this partition of the alloca. This
// won't always succeed, in which case we fall back to a legal integer type
// or an i8 array of an appropriate size.
@@ -4230,12 +4019,11 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
std::max<unsigned>(NumPartitions, MaxPartitionsPerAlloca);
// Migrate debug information from the old alloca to the new alloca(s)
- // and the individial partitions.
+ // and the individual partitions.
if (DbgDeclareInst *DbgDecl = FindAllocaDbgDeclare(&AI)) {
auto *Var = DbgDecl->getVariable();
auto *Expr = DbgDecl->getExpression();
- DIBuilder DIB(*AI.getParent()->getParent()->getParent(),
- /*AllowUnresolved*/ false);
+ DIBuilder DIB(*AI.getModule(), /*AllowUnresolved*/ false);
bool IsSplit = Pieces.size() > 1;
for (auto Piece : Pieces) {
// Create a piece expression describing the new partition or reuse AI's
@@ -4308,7 +4096,7 @@ bool SROA::runOnAlloca(AllocaInst &AI) {
// First, split any FCA loads and stores touching this alloca to promote
// better splitting and promotion opportunities.
- AggLoadStoreRewriter AggRewriter(DL);
+ AggLoadStoreRewriter AggRewriter;
Changed |= AggRewriter.rewrite(AI);
// Build the slices using a recursive instruction-visiting builder.
@@ -4388,107 +4176,29 @@ void SROA::deleteDeadInstructions(
}
}
-static void enqueueUsersInWorklist(Instruction &I,
- SmallVectorImpl<Instruction *> &Worklist,
- SmallPtrSetImpl<Instruction *> &Visited) {
- for (User *U : I.users())
- if (Visited.insert(cast<Instruction>(U)).second)
- Worklist.push_back(cast<Instruction>(U));
-}
-
/// \brief Promote the allocas, using the best available technique.
///
/// This attempts to promote whatever allocas have been identified as viable in
/// the PromotableAllocas list. If that list is empty, there is nothing to do.
-/// If there is a domtree available, we attempt to promote using the full power
-/// of mem2reg. Otherwise, we build and use the AllocaPromoter above which is
-/// based on the SSAUpdater utilities. This function returns whether any
-/// promotion occurred.
+/// This function returns whether any promotion occurred.
bool SROA::promoteAllocas(Function &F) {
if (PromotableAllocas.empty())
return false;
NumPromoted += PromotableAllocas.size();
- if (DT && !ForceSSAUpdater) {
- DEBUG(dbgs() << "Promoting allocas with mem2reg...\n");
- PromoteMemToReg(PromotableAllocas, *DT, nullptr, AC);
- PromotableAllocas.clear();
- return true;
- }
-
- DEBUG(dbgs() << "Promoting allocas with SSAUpdater...\n");
- SSAUpdater SSA;
- DIBuilder DIB(*F.getParent(), /*AllowUnresolved*/ false);
- SmallVector<Instruction *, 64> Insts;
-
- // We need a worklist to walk the uses of each alloca.
- SmallVector<Instruction *, 8> Worklist;
- SmallPtrSet<Instruction *, 8> Visited;
- SmallVector<Instruction *, 32> DeadInsts;
-
- for (unsigned Idx = 0, Size = PromotableAllocas.size(); Idx != Size; ++Idx) {
- AllocaInst *AI = PromotableAllocas[Idx];
- Insts.clear();
- Worklist.clear();
- Visited.clear();
-
- enqueueUsersInWorklist(*AI, Worklist, Visited);
-
- while (!Worklist.empty()) {
- Instruction *I = Worklist.pop_back_val();
-
- // FIXME: Currently the SSAUpdater infrastructure doesn't reason about
- // lifetime intrinsics and so we strip them (and the bitcasts+GEPs
- // leading to them) here. Eventually it should use them to optimize the
- // scalar values produced.
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
- assert(II->getIntrinsicID() == Intrinsic::lifetime_start ||
- II->getIntrinsicID() == Intrinsic::lifetime_end);
- II->eraseFromParent();
- continue;
- }
-
- // Push the loads and stores we find onto the list. SROA will already
- // have validated that all loads and stores are viable candidates for
- // promotion.
- if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
- assert(LI->getType() == AI->getAllocatedType());
- Insts.push_back(LI);
- continue;
- }
- if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
- assert(SI->getValueOperand()->getType() == AI->getAllocatedType());
- Insts.push_back(SI);
- continue;
- }
-
- // For everything else, we know that only no-op bitcasts and GEPs will
- // make it this far, just recurse through them and recall them for later
- // removal.
- DeadInsts.push_back(I);
- enqueueUsersInWorklist(*I, Worklist, Visited);
- }
- AllocaPromoter(Insts, SSA, *AI, DIB).run(Insts);
- while (!DeadInsts.empty())
- DeadInsts.pop_back_val()->eraseFromParent();
- AI->eraseFromParent();
- }
-
+ DEBUG(dbgs() << "Promoting allocas with mem2reg...\n");
+ PromoteMemToReg(PromotableAllocas, *DT, nullptr, AC);
PromotableAllocas.clear();
return true;
}
-bool SROA::runOnFunction(Function &F) {
- if (skipOptnoneFunction(F))
- return false;
-
+PreservedAnalyses SROA::runImpl(Function &F, DominatorTree &RunDT,
+ AssumptionCache &RunAC) {
DEBUG(dbgs() << "SROA function: " << F.getName() << "\n");
C = &F.getContext();
- DominatorTreeWrapperPass *DTWP =
- getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- DT = DTWP ? &DTWP->getDomTree() : nullptr;
- AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ DT = &RunDT;
+ AC = &RunAC;
BasicBlock &EntryBB = F.getEntryBlock();
for (BasicBlock::iterator I = EntryBB.begin(), E = std::prev(EntryBB.end());
@@ -4527,12 +4237,55 @@ bool SROA::runOnFunction(Function &F) {
PostPromotionWorklist.clear();
} while (!Worklist.empty());
- return Changed;
+ // FIXME: Even when promoting allocas we should preserve some abstract set of
+ // CFG-specific analyses.
+ return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
}
-void SROA::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<AssumptionCacheTracker>();
- if (RequiresDomTree)
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.setPreservesCFG();
+PreservedAnalyses SROA::run(Function &F, AnalysisManager<Function> *AM) {
+ return runImpl(F, AM->getResult<DominatorTreeAnalysis>(F),
+ AM->getResult<AssumptionAnalysis>(F));
}
+
+/// A legacy pass for the legacy pass manager that wraps the \c SROA pass.
+///
+/// This is in the llvm namespace purely to allow it to be a friend of the \c
+/// SROA pass.
+class llvm::sroa::SROALegacyPass : public FunctionPass {
+ /// The SROA implementation.
+ SROA Impl;
+
+public:
+ SROALegacyPass() : FunctionPass(ID) {
+ initializeSROALegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+ bool runOnFunction(Function &F) override {
+ if (skipOptnoneFunction(F))
+ return false;
+
+ auto PA = Impl.runImpl(
+ F, getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
+ getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F));
+ return !PA.areAllPreserved();
+ }
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.setPreservesCFG();
+ }
+
+ const char *getPassName() const override { return "SROA"; }
+ static char ID;
+};
+
+char SROALegacyPass::ID = 0;
+
+FunctionPass *llvm::createSROAPass() { return new SROALegacyPass(); }
+
+INITIALIZE_PASS_BEGIN(SROALegacyPass, "sroa",
+ "Scalar Replacement Of Aggregates", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(SROALegacyPass, "sroa", "Scalar Replacement Of Aggregates",
+ false, false)
diff --git a/contrib/llvm/lib/Transforms/Scalar/SampleProfile.cpp b/contrib/llvm/lib/Transforms/Scalar/SampleProfile.cpp
deleted file mode 100644
index c8dfa54..0000000
--- a/contrib/llvm/lib/Transforms/Scalar/SampleProfile.cpp
+++ /dev/null
@@ -1,777 +0,0 @@
-//===- SampleProfile.cpp - Incorporate sample profiles into the IR --------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the SampleProfileLoader transformation. This pass
-// reads a profile file generated by a sampling profiler (e.g. Linux Perf -
-// http://perf.wiki.kernel.org/) and generates IR metadata to reflect the
-// profile information in the given profile.
-//
-// This pass generates branch weight annotations on the IR:
-//
-// - prof: Represents branch weights. This annotation is added to branches
-// to indicate the weights of each edge coming out of the branch.
-// The weight of each edge is the weight of the target block for
-// that edge. The weight of a block B is computed as the maximum
-// number of samples found in B.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/PostDominators.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DebugInfo.h"
-#include "llvm/IR/DiagnosticInfo.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/InstIterator.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/MDBuilder.h"
-#include "llvm/IR/Metadata.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/ProfileData/SampleProfReader.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include <cctype>
-
-using namespace llvm;
-using namespace sampleprof;
-
-#define DEBUG_TYPE "sample-profile"
-
-// Command line option to specify the file to read samples from. This is
-// mainly used for debugging.
-static cl::opt<std::string> SampleProfileFile(
- "sample-profile-file", cl::init(""), cl::value_desc("filename"),
- cl::desc("Profile file loaded by -sample-profile"), cl::Hidden);
-static cl::opt<unsigned> SampleProfileMaxPropagateIterations(
- "sample-profile-max-propagate-iterations", cl::init(100),
- cl::desc("Maximum number of iterations to go through when propagating "
- "sample block/edge weights through the CFG."));
-
-namespace {
-typedef DenseMap<BasicBlock *, unsigned> BlockWeightMap;
-typedef DenseMap<BasicBlock *, BasicBlock *> EquivalenceClassMap;
-typedef std::pair<BasicBlock *, BasicBlock *> Edge;
-typedef DenseMap<Edge, unsigned> EdgeWeightMap;
-typedef DenseMap<BasicBlock *, SmallVector<BasicBlock *, 8>> BlockEdgeMap;
-
-/// \brief Sample profile pass.
-///
-/// This pass reads profile data from the file specified by
-/// -sample-profile-file and annotates every affected function with the
-/// profile information found in that file.
-class SampleProfileLoader : public FunctionPass {
-public:
- // Class identification, replacement for typeinfo
- static char ID;
-
- SampleProfileLoader(StringRef Name = SampleProfileFile)
- : FunctionPass(ID), DT(nullptr), PDT(nullptr), LI(nullptr), Ctx(nullptr),
- Reader(), Samples(nullptr), Filename(Name), ProfileIsValid(false) {
- initializeSampleProfileLoaderPass(*PassRegistry::getPassRegistry());
- }
-
- bool doInitialization(Module &M) override;
-
- void dump() { Reader->dump(); }
-
- const char *getPassName() const override { return "Sample profile pass"; }
-
- bool runOnFunction(Function &F) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<PostDominatorTree>();
- }
-
-protected:
- unsigned getFunctionLoc(Function &F);
- bool emitAnnotations(Function &F);
- unsigned getInstWeight(Instruction &I);
- unsigned getBlockWeight(BasicBlock *BB);
- void printEdgeWeight(raw_ostream &OS, Edge E);
- void printBlockWeight(raw_ostream &OS, BasicBlock *BB);
- void printBlockEquivalence(raw_ostream &OS, BasicBlock *BB);
- bool computeBlockWeights(Function &F);
- void findEquivalenceClasses(Function &F);
- void findEquivalencesFor(BasicBlock *BB1,
- SmallVector<BasicBlock *, 8> Descendants,
- DominatorTreeBase<BasicBlock> *DomTree);
- void propagateWeights(Function &F);
- unsigned visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge);
- void buildEdges(Function &F);
- bool propagateThroughEdges(Function &F);
-
- /// \brief Line number for the function header. Used to compute absolute
- /// line numbers from the relative line numbers found in the profile.
- unsigned HeaderLineno;
-
- /// \brief Map basic blocks to their computed weights.
- ///
- /// The weight of a basic block is defined to be the maximum
- /// of all the instruction weights in that block.
- BlockWeightMap BlockWeights;
-
- /// \brief Map edges to their computed weights.
- ///
- /// Edge weights are computed by propagating basic block weights in
- /// SampleProfile::propagateWeights.
- EdgeWeightMap EdgeWeights;
-
- /// \brief Set of visited blocks during propagation.
- SmallPtrSet<BasicBlock *, 128> VisitedBlocks;
-
- /// \brief Set of visited edges during propagation.
- SmallSet<Edge, 128> VisitedEdges;
-
- /// \brief Equivalence classes for block weights.
- ///
- /// Two blocks BB1 and BB2 are in the same equivalence class if they
- /// dominate and post-dominate each other, and they are in the same loop
- /// nest. When this happens, the two blocks are guaranteed to execute
- /// the same number of times.
- EquivalenceClassMap EquivalenceClass;
-
- /// \brief Dominance, post-dominance and loop information.
- DominatorTree *DT;
- PostDominatorTree *PDT;
- LoopInfo *LI;
-
- /// \brief Predecessors for each basic block in the CFG.
- BlockEdgeMap Predecessors;
-
- /// \brief Successors for each basic block in the CFG.
- BlockEdgeMap Successors;
-
- /// \brief LLVM context holding the debug data we need.
- LLVMContext *Ctx;
-
- /// \brief Profile reader object.
- std::unique_ptr<SampleProfileReader> Reader;
-
- /// \brief Samples collected for the body of this function.
- FunctionSamples *Samples;
-
- /// \brief Name of the profile file to load.
- StringRef Filename;
-
- /// \brief Flag indicating whether the profile input loaded successfully.
- bool ProfileIsValid;
-};
-}
-
-/// \brief Print the weight of edge \p E on stream \p OS.
-///
-/// \param OS Stream to emit the output to.
-/// \param E Edge to print.
-void SampleProfileLoader::printEdgeWeight(raw_ostream &OS, Edge E) {
- OS << "weight[" << E.first->getName() << "->" << E.second->getName()
- << "]: " << EdgeWeights[E] << "\n";
-}
-
-/// \brief Print the equivalence class of block \p BB on stream \p OS.
-///
-/// \param OS Stream to emit the output to.
-/// \param BB Block to print.
-void SampleProfileLoader::printBlockEquivalence(raw_ostream &OS,
- BasicBlock *BB) {
- BasicBlock *Equiv = EquivalenceClass[BB];
- OS << "equivalence[" << BB->getName()
- << "]: " << ((Equiv) ? EquivalenceClass[BB]->getName() : "NONE") << "\n";
-}
-
-/// \brief Print the weight of block \p BB on stream \p OS.
-///
-/// \param OS Stream to emit the output to.
-/// \param BB Block to print.
-void SampleProfileLoader::printBlockWeight(raw_ostream &OS, BasicBlock *BB) {
- OS << "weight[" << BB->getName() << "]: " << BlockWeights[BB] << "\n";
-}
-
-/// \brief Get the weight for an instruction.
-///
-/// The "weight" of an instruction \p Inst is the number of samples
-/// collected on that instruction at runtime. To retrieve it, we
-/// need to compute the line number of \p Inst relative to the start of its
-/// function. We use HeaderLineno to compute the offset. We then
-/// look up the samples collected for \p Inst using BodySamples.
-///
-/// \param Inst Instruction to query.
-///
-/// \returns The profiled weight of I.
-unsigned SampleProfileLoader::getInstWeight(Instruction &Inst) {
- DebugLoc DLoc = Inst.getDebugLoc();
- if (!DLoc)
- return 0;
-
- unsigned Lineno = DLoc.getLine();
- if (Lineno < HeaderLineno)
- return 0;
-
- const DILocation *DIL = DLoc;
- int LOffset = Lineno - HeaderLineno;
- unsigned Discriminator = DIL->getDiscriminator();
- unsigned Weight = Samples->samplesAt(LOffset, Discriminator);
- DEBUG(dbgs() << " " << Lineno << "." << Discriminator << ":" << Inst
- << " (line offset: " << LOffset << "." << Discriminator
- << " - weight: " << Weight << ")\n");
- return Weight;
-}
-
-/// \brief Compute the weight of a basic block.
-///
-/// The weight of basic block \p BB is the maximum weight of all the
-/// instructions in BB. The weight of \p BB is computed and cached in
-/// the BlockWeights map.
-///
-/// \param BB The basic block to query.
-///
-/// \returns The computed weight of BB.
-unsigned SampleProfileLoader::getBlockWeight(BasicBlock *BB) {
- // If we've computed BB's weight before, return it.
- std::pair<BlockWeightMap::iterator, bool> Entry =
- BlockWeights.insert(std::make_pair(BB, 0));
- if (!Entry.second)
- return Entry.first->second;
-
- // Otherwise, compute and cache BB's weight.
- unsigned Weight = 0;
- for (auto &I : BB->getInstList()) {
- unsigned InstWeight = getInstWeight(I);
- if (InstWeight > Weight)
- Weight = InstWeight;
- }
- Entry.first->second = Weight;
- return Weight;
-}
-
-/// \brief Compute and store the weights of every basic block.
-///
-/// This populates the BlockWeights map by computing
-/// the weights of every basic block in the CFG.
-///
-/// \param F The function to query.
-bool SampleProfileLoader::computeBlockWeights(Function &F) {
- bool Changed = false;
- DEBUG(dbgs() << "Block weights\n");
- for (auto &BB : F) {
- unsigned Weight = getBlockWeight(&BB);
- Changed |= (Weight > 0);
- DEBUG(printBlockWeight(dbgs(), &BB));
- }
-
- return Changed;
-}
-
-/// \brief Find equivalence classes for the given block.
-///
-/// This finds all the blocks that are guaranteed to execute the same
-/// number of times as \p BB1. To do this, it traverses all the
-/// descendants of \p BB1 in the dominator or post-dominator tree.
-///
-/// A block BB2 will be in the same equivalence class as \p BB1 if
-/// the following holds:
-///
-/// 1- \p BB1 is a descendant of BB2 in the opposite tree. So, if BB2
-/// is a descendant of \p BB1 in the dominator tree, then BB2 should
-/// dominate BB1 in the post-dominator tree.
-///
-/// 2- Both BB2 and \p BB1 must be in the same loop.
-///
-/// For every block BB2 that meets those two requirements, we set BB2's
-/// equivalence class to \p BB1.
-///
-/// \param BB1 Block to check.
-/// \param Descendants Descendants of \p BB1 in either the dom or pdom tree.
-/// \param DomTree Opposite dominator tree. If \p Descendants is filled
-/// with blocks from \p BB1's dominator tree, then
-/// this is the post-dominator tree, and vice versa.
-void SampleProfileLoader::findEquivalencesFor(
- BasicBlock *BB1, SmallVector<BasicBlock *, 8> Descendants,
- DominatorTreeBase<BasicBlock> *DomTree) {
- for (auto *BB2 : Descendants) {
- bool IsDomParent = DomTree->dominates(BB2, BB1);
- bool IsInSameLoop = LI->getLoopFor(BB1) == LI->getLoopFor(BB2);
- if (BB1 != BB2 && VisitedBlocks.insert(BB2).second && IsDomParent &&
- IsInSameLoop) {
- EquivalenceClass[BB2] = BB1;
-
- // If BB2 is heavier than BB1, make BB2 have the same weight
- // as BB1.
- //
- // Note that we don't worry about the opposite situation here
- // (when BB2 is lighter than BB1). We will deal with this
- // during the propagation phase. Right now, we just want to
- // make sure that BB1 has the largest weight of all the
- // members of its equivalence set.
- unsigned &BB1Weight = BlockWeights[BB1];
- unsigned &BB2Weight = BlockWeights[BB2];
- BB1Weight = std::max(BB1Weight, BB2Weight);
- }
- }
-}
-
-/// \brief Find equivalence classes.
-///
-/// Since samples may be missing from blocks, we can fill in the gaps by setting
-/// the weights of all the blocks in the same equivalence class to the same
-/// weight. To compute the concept of equivalence, we use dominance and loop
-/// information. Two blocks B1 and B2 are in the same equivalence class if B1
-/// dominates B2, B2 post-dominates B1 and both are in the same loop.
-///
-/// \param F The function to query.
-void SampleProfileLoader::findEquivalenceClasses(Function &F) {
- SmallVector<BasicBlock *, 8> DominatedBBs;
- DEBUG(dbgs() << "\nBlock equivalence classes\n");
- // Find equivalence sets based on dominance and post-dominance information.
- for (auto &BB : F) {
- BasicBlock *BB1 = &BB;
-
- // Compute BB1's equivalence class once.
- if (EquivalenceClass.count(BB1)) {
- DEBUG(printBlockEquivalence(dbgs(), BB1));
- continue;
- }
-
- // By default, blocks are in their own equivalence class.
- EquivalenceClass[BB1] = BB1;
-
- // Traverse all the blocks dominated by BB1. We are looking for
- // every basic block BB2 such that:
- //
- // 1- BB1 dominates BB2.
- // 2- BB2 post-dominates BB1.
- // 3- BB1 and BB2 are in the same loop nest.
- //
- // If all those conditions hold, it means that BB2 is executed
- // as many times as BB1, so they are placed in the same equivalence
- // class by making BB2's equivalence class be BB1.
- DominatedBBs.clear();
- DT->getDescendants(BB1, DominatedBBs);
- findEquivalencesFor(BB1, DominatedBBs, PDT->DT);
-
- // Repeat the same logic for all the blocks post-dominated by BB1.
- // We are looking for every basic block BB2 such that:
- //
- // 1- BB1 post-dominates BB2.
- // 2- BB2 dominates BB1.
- // 3- BB1 and BB2 are in the same loop nest.
- //
- // If all those conditions hold, BB2's equivalence class is BB1.
- DominatedBBs.clear();
- PDT->getDescendants(BB1, DominatedBBs);
- findEquivalencesFor(BB1, DominatedBBs, DT);
-
- DEBUG(printBlockEquivalence(dbgs(), BB1));
- }
-
- // Assign weights to equivalence classes.
- //
- // All the basic blocks in the same equivalence class will execute
- // the same number of times. Since we know that the head block in
- // each equivalence class has the largest weight, assign that weight
- // to all the blocks in that equivalence class.
- DEBUG(dbgs() << "\nAssign the same weight to all blocks in the same class\n");
- for (auto &BI : F) {
- BasicBlock *BB = &BI;
- BasicBlock *EquivBB = EquivalenceClass[BB];
- if (BB != EquivBB)
- BlockWeights[BB] = BlockWeights[EquivBB];
- DEBUG(printBlockWeight(dbgs(), BB));
- }
-}
-
-/// \brief Visit the given edge to decide if it has a valid weight.
-///
-/// If \p E has not been visited before, we copy to \p UnknownEdge
-/// and increment the count of unknown edges.
-///
-/// \param E Edge to visit.
-/// \param NumUnknownEdges Current number of unknown edges.
-/// \param UnknownEdge Set if E has not been visited before.
-///
-/// \returns E's weight, if known. Otherwise, return 0.
-unsigned SampleProfileLoader::visitEdge(Edge E, unsigned *NumUnknownEdges,
- Edge *UnknownEdge) {
- if (!VisitedEdges.count(E)) {
- (*NumUnknownEdges)++;
- *UnknownEdge = E;
- return 0;
- }
-
- return EdgeWeights[E];
-}
-
-/// \brief Propagate weights through incoming/outgoing edges.
-///
-/// If the weight of a basic block is known, and there is only one edge
-/// with an unknown weight, we can calculate the weight of that edge.
-///
-/// Similarly, if all the edges have a known count, we can calculate the
-/// count of the basic block, if needed.
-///
-/// \param F Function to process.
-///
-/// \returns True if new weights were assigned to edges or blocks.
-bool SampleProfileLoader::propagateThroughEdges(Function &F) {
- bool Changed = false;
- DEBUG(dbgs() << "\nPropagation through edges\n");
- for (auto &BI : F) {
- BasicBlock *BB = &BI;
-
- // Visit all the predecessor and successor edges to determine
- // which ones have a weight assigned already. Note that it doesn't
- // matter that we only keep track of a single unknown edge. The
- // only case we are interested in handling is when only a single
- // edge is unknown (see setEdgeOrBlockWeight).
- for (unsigned i = 0; i < 2; i++) {
- unsigned TotalWeight = 0;
- unsigned NumUnknownEdges = 0;
- Edge UnknownEdge, SelfReferentialEdge;
-
- if (i == 0) {
- // First, visit all predecessor edges.
- for (auto *Pred : Predecessors[BB]) {
- Edge E = std::make_pair(Pred, BB);
- TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge);
- if (E.first == E.second)
- SelfReferentialEdge = E;
- }
- } else {
- // On the second round, visit all successor edges.
- for (auto *Succ : Successors[BB]) {
- Edge E = std::make_pair(BB, Succ);
- TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge);
- }
- }
-
- // After visiting all the edges, there are three cases that we
- // can handle immediately:
- //
- // - All the edge weights are known (i.e., NumUnknownEdges == 0).
- // In this case, we simply check that the sum of all the edges
- // is the same as BB's weight. If not, we change BB's weight
- // to match. Additionally, if BB had not been visited before,
- // we mark it visited.
- //
- // - Only one edge is unknown and BB has already been visited.
- // In this case, we can compute the weight of the edge by
- // subtracting the total block weight from all the known
- // edge weights. If the edges weight more than BB, then the
- // edge of the last remaining edge is set to zero.
- //
- // - There exists a self-referential edge and the weight of BB is
- // known. In this case, this edge can be based on BB's weight.
- // We add up all the other known edges and set the weight on
- // the self-referential edge as we did in the previous case.
- //
- // In any other case, we must continue iterating. Eventually,
- // all edges will get a weight, or iteration will stop when
- // it reaches SampleProfileMaxPropagateIterations.
- if (NumUnknownEdges <= 1) {
- unsigned &BBWeight = BlockWeights[BB];
- if (NumUnknownEdges == 0) {
- // If we already know the weight of all edges, the weight of the
- // basic block can be computed. It should be no larger than the sum
- // of all edge weights.
- if (TotalWeight > BBWeight) {
- BBWeight = TotalWeight;
- Changed = true;
- DEBUG(dbgs() << "All edge weights for " << BB->getName()
- << " known. Set weight for block: ";
- printBlockWeight(dbgs(), BB););
- }
- if (VisitedBlocks.insert(BB).second)
- Changed = true;
- } else if (NumUnknownEdges == 1 && VisitedBlocks.count(BB)) {
- // If there is a single unknown edge and the block has been
- // visited, then we can compute E's weight.
- if (BBWeight >= TotalWeight)
- EdgeWeights[UnknownEdge] = BBWeight - TotalWeight;
- else
- EdgeWeights[UnknownEdge] = 0;
- VisitedEdges.insert(UnknownEdge);
- Changed = true;
- DEBUG(dbgs() << "Set weight for edge: ";
- printEdgeWeight(dbgs(), UnknownEdge));
- }
- } else if (SelfReferentialEdge.first && VisitedBlocks.count(BB)) {
- unsigned &BBWeight = BlockWeights[BB];
- // We have a self-referential edge and the weight of BB is known.
- if (BBWeight >= TotalWeight)
- EdgeWeights[SelfReferentialEdge] = BBWeight - TotalWeight;
- else
- EdgeWeights[SelfReferentialEdge] = 0;
- VisitedEdges.insert(SelfReferentialEdge);
- Changed = true;
- DEBUG(dbgs() << "Set self-referential edge weight to: ";
- printEdgeWeight(dbgs(), SelfReferentialEdge));
- }
- }
- }
-
- return Changed;
-}
-
-/// \brief Build in/out edge lists for each basic block in the CFG.
-///
-/// We are interested in unique edges. If a block B1 has multiple
-/// edges to another block B2, we only add a single B1->B2 edge.
-void SampleProfileLoader::buildEdges(Function &F) {
- for (auto &BI : F) {
- BasicBlock *B1 = &BI;
-
- // Add predecessors for B1.
- SmallPtrSet<BasicBlock *, 16> Visited;
- if (!Predecessors[B1].empty())
- llvm_unreachable("Found a stale predecessors list in a basic block.");
- for (pred_iterator PI = pred_begin(B1), PE = pred_end(B1); PI != PE; ++PI) {
- BasicBlock *B2 = *PI;
- if (Visited.insert(B2).second)
- Predecessors[B1].push_back(B2);
- }
-
- // Add successors for B1.
- Visited.clear();
- if (!Successors[B1].empty())
- llvm_unreachable("Found a stale successors list in a basic block.");
- for (succ_iterator SI = succ_begin(B1), SE = succ_end(B1); SI != SE; ++SI) {
- BasicBlock *B2 = *SI;
- if (Visited.insert(B2).second)
- Successors[B1].push_back(B2);
- }
- }
-}
-
-/// \brief Propagate weights into edges
-///
-/// The following rules are applied to every block BB in the CFG:
-///
-/// - If BB has a single predecessor/successor, then the weight
-/// of that edge is the weight of the block.
-///
-/// - If all incoming or outgoing edges are known except one, and the
-/// weight of the block is already known, the weight of the unknown
-/// edge will be the weight of the block minus the sum of all the known
-/// edges. If the sum of all the known edges is larger than BB's weight,
-/// we set the unknown edge weight to zero.
-///
-/// - If there is a self-referential edge, and the weight of the block is
-/// known, the weight for that edge is set to the weight of the block
-/// minus the weight of the other incoming edges to that block (if
-/// known).
-void SampleProfileLoader::propagateWeights(Function &F) {
- bool Changed = true;
- unsigned i = 0;
-
- // Add an entry count to the function using the samples gathered
- // at the function entry.
- F.setEntryCount(Samples->getHeadSamples());
-
- // Before propagation starts, build, for each block, a list of
- // unique predecessors and successors. This is necessary to handle
- // identical edges in multiway branches. Since we visit all blocks and all
- // edges of the CFG, it is cleaner to build these lists once at the start
- // of the pass.
- buildEdges(F);
-
- // Propagate until we converge or we go past the iteration limit.
- while (Changed && i++ < SampleProfileMaxPropagateIterations) {
- Changed = propagateThroughEdges(F);
- }
-
- // Generate MD_prof metadata for every branch instruction using the
- // edge weights computed during propagation.
- DEBUG(dbgs() << "\nPropagation complete. Setting branch weights\n");
- MDBuilder MDB(F.getContext());
- for (auto &BI : F) {
- BasicBlock *BB = &BI;
- TerminatorInst *TI = BB->getTerminator();
- if (TI->getNumSuccessors() == 1)
- continue;
- if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI))
- continue;
-
- DEBUG(dbgs() << "\nGetting weights for branch at line "
- << TI->getDebugLoc().getLine() << ".\n");
- SmallVector<unsigned, 4> Weights;
- bool AllWeightsZero = true;
- for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) {
- BasicBlock *Succ = TI->getSuccessor(I);
- Edge E = std::make_pair(BB, Succ);
- unsigned Weight = EdgeWeights[E];
- DEBUG(dbgs() << "\t"; printEdgeWeight(dbgs(), E));
- Weights.push_back(Weight);
- if (Weight != 0)
- AllWeightsZero = false;
- }
-
- // Only set weights if there is at least one non-zero weight.
- // In any other case, let the analyzer set weights.
- if (!AllWeightsZero) {
- DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n");
- TI->setMetadata(llvm::LLVMContext::MD_prof,
- MDB.createBranchWeights(Weights));
- } else {
- DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n");
- }
- }
-}
-
-/// \brief Get the line number for the function header.
-///
-/// This looks up function \p F in the current compilation unit and
-/// retrieves the line number where the function is defined. This is
-/// line 0 for all the samples read from the profile file. Every line
-/// number is relative to this line.
-///
-/// \param F Function object to query.
-///
-/// \returns the line number where \p F is defined. If it returns 0,
-/// it means that there is no debug information available for \p F.
-unsigned SampleProfileLoader::getFunctionLoc(Function &F) {
- if (DISubprogram *S = getDISubprogram(&F))
- return S->getLine();
-
- // If could not find the start of \p F, emit a diagnostic to inform the user
- // about the missed opportunity.
- F.getContext().diagnose(DiagnosticInfoSampleProfile(
- "No debug information found in function " + F.getName() +
- ": Function profile not used",
- DS_Warning));
- return 0;
-}
-
-/// \brief Generate branch weight metadata for all branches in \p F.
-///
-/// Branch weights are computed out of instruction samples using a
-/// propagation heuristic. Propagation proceeds in 3 phases:
-///
-/// 1- Assignment of block weights. All the basic blocks in the function
-/// are initial assigned the same weight as their most frequently
-/// executed instruction.
-///
-/// 2- Creation of equivalence classes. Since samples may be missing from
-/// blocks, we can fill in the gaps by setting the weights of all the
-/// blocks in the same equivalence class to the same weight. To compute
-/// the concept of equivalence, we use dominance and loop information.
-/// Two blocks B1 and B2 are in the same equivalence class if B1
-/// dominates B2, B2 post-dominates B1 and both are in the same loop.
-///
-/// 3- Propagation of block weights into edges. This uses a simple
-/// propagation heuristic. The following rules are applied to every
-/// block BB in the CFG:
-///
-/// - If BB has a single predecessor/successor, then the weight
-/// of that edge is the weight of the block.
-///
-/// - If all the edges are known except one, and the weight of the
-/// block is already known, the weight of the unknown edge will
-/// be the weight of the block minus the sum of all the known
-/// edges. If the sum of all the known edges is larger than BB's weight,
-/// we set the unknown edge weight to zero.
-///
-/// - If there is a self-referential edge, and the weight of the block is
-/// known, the weight for that edge is set to the weight of the block
-/// minus the weight of the other incoming edges to that block (if
-/// known).
-///
-/// Since this propagation is not guaranteed to finalize for every CFG, we
-/// only allow it to proceed for a limited number of iterations (controlled
-/// by -sample-profile-max-propagate-iterations).
-///
-/// FIXME: Try to replace this propagation heuristic with a scheme
-/// that is guaranteed to finalize. A work-list approach similar to
-/// the standard value propagation algorithm used by SSA-CCP might
-/// work here.
-///
-/// Once all the branch weights are computed, we emit the MD_prof
-/// metadata on BB using the computed values for each of its branches.
-///
-/// \param F The function to query.
-///
-/// \returns true if \p F was modified. Returns false, otherwise.
-bool SampleProfileLoader::emitAnnotations(Function &F) {
- bool Changed = false;
-
- // Initialize invariants used during computation and propagation.
- HeaderLineno = getFunctionLoc(F);
- if (HeaderLineno == 0)
- return false;
-
- DEBUG(dbgs() << "Line number for the first instruction in " << F.getName()
- << ": " << HeaderLineno << "\n");
-
- // Compute basic block weights.
- Changed |= computeBlockWeights(F);
-
- if (Changed) {
- // Find equivalence classes.
- findEquivalenceClasses(F);
-
- // Propagate weights to all edges.
- propagateWeights(F);
- }
-
- return Changed;
-}
-
-char SampleProfileLoader::ID = 0;
-INITIALIZE_PASS_BEGIN(SampleProfileLoader, "sample-profile",
- "Sample Profile loader", false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(PostDominatorTree)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(AddDiscriminators)
-INITIALIZE_PASS_END(SampleProfileLoader, "sample-profile",
- "Sample Profile loader", false, false)
-
-bool SampleProfileLoader::doInitialization(Module &M) {
- auto ReaderOrErr = SampleProfileReader::create(Filename, M.getContext());
- if (std::error_code EC = ReaderOrErr.getError()) {
- std::string Msg = "Could not open profile: " + EC.message();
- M.getContext().diagnose(DiagnosticInfoSampleProfile(Filename.data(), Msg));
- return false;
- }
- Reader = std::move(ReaderOrErr.get());
- ProfileIsValid = (Reader->read() == sampleprof_error::success);
- return true;
-}
-
-FunctionPass *llvm::createSampleProfileLoaderPass() {
- return new SampleProfileLoader(SampleProfileFile);
-}
-
-FunctionPass *llvm::createSampleProfileLoaderPass(StringRef Name) {
- return new SampleProfileLoader(Name);
-}
-
-bool SampleProfileLoader::runOnFunction(Function &F) {
- if (!ProfileIsValid)
- return false;
-
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- PDT = &getAnalysis<PostDominatorTree>();
- LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- Ctx = &F.getParent()->getContext();
- Samples = Reader->getSamplesFor(F);
- if (!Samples->empty())
- return emitAnnotations(F);
- return false;
-}
diff --git a/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp b/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
index d5d3605..52d477c 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
@@ -16,7 +16,10 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm-c/Initialization.h"
#include "llvm-c/Transforms/Scalar.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ScopedNoAliasAA.h"
+#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Verifier.h"
#include "llvm/InitializePasses.h"
@@ -27,10 +30,9 @@ using namespace llvm;
/// initializeScalarOptsPasses - Initialize all passes linked into the
/// ScalarOpts library.
void llvm::initializeScalarOpts(PassRegistry &Registry) {
- initializeADCEPass(Registry);
+ initializeADCELegacyPassPass(Registry);
initializeBDCEPass(Registry);
initializeAlignmentFromAssumptionsPass(Registry);
- initializeSampleProfileLoaderPass(Registry);
initializeConstantHoistingPass(Registry);
initializeConstantPropagationPass(Registry);
initializeCorrelatedValuePropagationPass(Registry);
@@ -66,7 +68,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeRewriteStatepointsForGCPass(Registry);
initializeSCCPPass(Registry);
initializeIPSCCPPass(Registry);
- initializeSROAPass(Registry);
+ initializeSROALegacyPassPass(Registry);
initializeSROA_DTPass(Registry);
initializeSROA_SSAUpPass(Registry);
initializeCFGSimplifyPassPass(Registry);
@@ -81,6 +83,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializePlaceSafepointsPass(Registry);
initializeFloat2IntPass(Registry);
initializeLoopDistributePass(Registry);
+ initializeLoopLoadEliminationPass(Registry);
}
void LLVMInitializeScalarOpts(LLVMPassRegistryRef R) {
@@ -225,15 +228,15 @@ void LLVMAddEarlyCSEPass(LLVMPassManagerRef PM) {
}
void LLVMAddTypeBasedAliasAnalysisPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createTypeBasedAliasAnalysisPass());
+ unwrap(PM)->add(createTypeBasedAAWrapperPass());
}
void LLVMAddScopedNoAliasAAPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createScopedNoAliasAAPass());
+ unwrap(PM)->add(createScopedNoAliasAAWrapperPass());
}
void LLVMAddBasicAliasAnalysisPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createBasicAliasAnalysisPass());
+ unwrap(PM)->add(createBasicAAWrapperPass());
}
void LLVMAddLowerExpectIntrinsicPass(LLVMPassManagerRef PM) {
diff --git a/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index d955da7..114d22d 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -60,6 +60,7 @@ STATISTIC(NumAdjusted, "Number of scalar allocas adjusted to allow promotion");
STATISTIC(NumConverted, "Number of aggregates converted to scalar");
namespace {
+#define SROA SROA_
struct SROA : public FunctionPass {
SROA(int T, bool hasDT, char &ID, int ST, int AT, int SLT)
: FunctionPass(ID), HasDomTree(hasDT) {
@@ -382,8 +383,8 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
// Create and insert the integer alloca.
NewTy = IntegerType::get(AI->getContext(), BitWidth);
}
- AllocaInst *NewAI = new AllocaInst(NewTy, nullptr, "",
- AI->getParent()->begin());
+ AllocaInst *NewAI =
+ new AllocaInst(NewTy, nullptr, "", &AI->getParent()->front());
ConvertUsesToScalar(AI, NewAI, 0, nullptr);
return NewAI;
}
@@ -1195,7 +1196,7 @@ static bool isSafePHIToSpeculate(PHINode *PN) {
// Ensure that there are no instructions between the PHI and the load that
// could store.
- for (BasicBlock::iterator BBI = PN; &*BBI != LI; ++BBI)
+ for (BasicBlock::iterator BBI(PN); &*BBI != LI; ++BBI)
if (BBI->mayWriteToMemory())
return false;
diff --git a/contrib/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/contrib/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index 0493003..054bacd 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -253,10 +253,10 @@ bool Scalarizer::doInitialization(Module &M) {
}
bool Scalarizer::runOnFunction(Function &F) {
- for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) {
- BasicBlock *BB = BBI;
- for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) {
- Instruction *I = II;
+ assert(Gathered.empty() && Scattered.empty());
+ for (BasicBlock &BB : F) {
+ for (BasicBlock::iterator II = BB.begin(), IE = BB.end(); II != IE;) {
+ Instruction *I = &*II;
bool Done = visit(I);
++II;
if (Done && I->getType()->isVoidTy())
@@ -285,7 +285,7 @@ Scatterer Scalarizer::scatter(Instruction *Point, Value *V) {
}
// In the fallback case, just put the scattered before Point and
// keep the result local to Point.
- return Scatterer(Point->getParent(), Point, V);
+ return Scatterer(Point->getParent(), Point->getIterator(), V);
}
// Replace Op with the gathered form of the components in CV. Defer the
@@ -377,7 +377,7 @@ bool Scalarizer::splitBinary(Instruction &I, const Splitter &Split) {
return false;
unsigned NumElems = VT->getNumElements();
- IRBuilder<> Builder(I.getParent(), &I);
+ IRBuilder<> Builder(&I);
Scatterer Op0 = scatter(&I, I.getOperand(0));
Scatterer Op1 = scatter(&I, I.getOperand(1));
assert(Op0.size() == NumElems && "Mismatched binary operation");
@@ -397,7 +397,7 @@ bool Scalarizer::visitSelectInst(SelectInst &SI) {
return false;
unsigned NumElems = VT->getNumElements();
- IRBuilder<> Builder(SI.getParent(), &SI);
+ IRBuilder<> Builder(&SI);
Scatterer Op1 = scatter(&SI, SI.getOperand(1));
Scatterer Op2 = scatter(&SI, SI.getOperand(2));
assert(Op1.size() == NumElems && "Mismatched select");
@@ -438,7 +438,7 @@ bool Scalarizer::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
if (!VT)
return false;
- IRBuilder<> Builder(GEPI.getParent(), &GEPI);
+ IRBuilder<> Builder(&GEPI);
unsigned NumElems = VT->getNumElements();
unsigned NumIndices = GEPI.getNumIndices();
@@ -472,7 +472,7 @@ bool Scalarizer::visitCastInst(CastInst &CI) {
return false;
unsigned NumElems = VT->getNumElements();
- IRBuilder<> Builder(CI.getParent(), &CI);
+ IRBuilder<> Builder(&CI);
Scatterer Op0 = scatter(&CI, CI.getOperand(0));
assert(Op0.size() == NumElems && "Mismatched cast");
ValueVector Res;
@@ -492,7 +492,7 @@ bool Scalarizer::visitBitCastInst(BitCastInst &BCI) {
unsigned DstNumElems = DstVT->getNumElements();
unsigned SrcNumElems = SrcVT->getNumElements();
- IRBuilder<> Builder(BCI.getParent(), &BCI);
+ IRBuilder<> Builder(&BCI);
Scatterer Op0 = scatter(&BCI, BCI.getOperand(0));
ValueVector Res;
Res.resize(DstNumElems);
@@ -569,7 +569,7 @@ bool Scalarizer::visitPHINode(PHINode &PHI) {
return false;
unsigned NumElems = VT->getNumElements();
- IRBuilder<> Builder(PHI.getParent(), &PHI);
+ IRBuilder<> Builder(&PHI);
ValueVector Res;
Res.resize(NumElems);
@@ -600,7 +600,7 @@ bool Scalarizer::visitLoadInst(LoadInst &LI) {
return false;
unsigned NumElems = Layout.VecTy->getNumElements();
- IRBuilder<> Builder(LI.getParent(), &LI);
+ IRBuilder<> Builder(&LI);
Scatterer Ptr = scatter(&LI, LI.getPointerOperand());
ValueVector Res;
Res.resize(NumElems);
@@ -625,7 +625,7 @@ bool Scalarizer::visitStoreInst(StoreInst &SI) {
return false;
unsigned NumElems = Layout.VecTy->getNumElements();
- IRBuilder<> Builder(SI.getParent(), &SI);
+ IRBuilder<> Builder(&SI);
Scatterer Ptr = scatter(&SI, SI.getPointerOperand());
Scatterer Val = scatter(&SI, FullValue);
@@ -642,7 +642,9 @@ bool Scalarizer::visitStoreInst(StoreInst &SI) {
// Delete the instructions that we scalarized. If a full vector result
// is still needed, recreate it using InsertElements.
bool Scalarizer::finish() {
- if (Gathered.empty())
+ // The presence of data in Gathered or Scattered indicates changes
+ // made to the Function.
+ if (Gathered.empty() && Scattered.empty())
return false;
for (GatherList::iterator GMI = Gathered.begin(), GME = Gathered.end();
GMI != GME; ++GMI) {
@@ -655,7 +657,7 @@ bool Scalarizer::finish() {
Value *Res = UndefValue::get(Ty);
BasicBlock *BB = Op->getParent();
unsigned Count = Ty->getVectorNumElements();
- IRBuilder<> Builder(BB, Op);
+ IRBuilder<> Builder(Op);
if (isa<PHINode>(Op))
Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
for (unsigned I = 0; I < Count; ++I)
diff --git a/contrib/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/contrib/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index 4a87531..86a10d2 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -156,6 +156,10 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Constants.h"
@@ -164,6 +168,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Operator.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h"
@@ -174,6 +179,7 @@
#include "llvm/IR/IRBuilder.h"
using namespace llvm;
+using namespace llvm::PatternMatch;
static cl::opt<bool> DisableSeparateConstOffsetFromGEP(
"disable-separate-const-offset-from-gep", cl::init(false),
@@ -319,8 +325,11 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
AU.setPreservesCFG();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
}
bool doInitialization(Module &M) override {
@@ -373,15 +382,42 @@ private:
///
/// Verified in @i32_add in split-gep.ll
bool canonicalizeArrayIndicesToPointerSize(GetElementPtrInst *GEP);
+ /// Optimize sext(a)+sext(b) to sext(a+b) when a+b can't sign overflow.
+ /// SeparateConstOffsetFromGEP distributes a sext to leaves before extracting
+ /// the constant offset. After extraction, it becomes desirable to reunion the
+ /// distributed sexts. For example,
+ ///
+ /// &a[sext(i +nsw (j +nsw 5)]
+ /// => distribute &a[sext(i) +nsw (sext(j) +nsw 5)]
+ /// => constant extraction &a[sext(i) + sext(j)] + 5
+ /// => reunion &a[sext(i +nsw j)] + 5
+ bool reuniteExts(Function &F);
+ /// A helper that reunites sexts in an instruction.
+ bool reuniteExts(Instruction *I);
+ /// Find the closest dominator of <Dominatee> that is equivalent to <Key>.
+ Instruction *findClosestMatchingDominator(const SCEV *Key,
+ Instruction *Dominatee);
/// Verify F is free of dead code.
void verifyNoDeadCode(Function &F);
+ bool hasMoreThanOneUseInLoop(Value *v, Loop *L);
+ // Swap the index operand of two GEP.
+ void swapGEPOperand(GetElementPtrInst *First, GetElementPtrInst *Second);
+ // Check if it is safe to swap operand of two GEP.
+ bool isLegalToSwapOperand(GetElementPtrInst *First, GetElementPtrInst *Second,
+ Loop *CurLoop);
+
const DataLayout *DL;
- const DominatorTree *DT;
+ DominatorTree *DT;
+ ScalarEvolution *SE;
const TargetMachine *TM;
+
+ LoopInfo *LI;
+ TargetLibraryInfo *TLI;
/// Whether to lower a GEP with multiple indices into arithmetic operations or
/// multiple GEPs with a single index.
bool LowerGEP;
+ DenseMap<const SCEV *, SmallVector<Instruction *, 2>> DominatingExprs;
};
} // anonymous namespace
@@ -391,7 +427,10 @@ INITIALIZE_PASS_BEGIN(
"Split GEPs to a variadic base and a constant offset for better CSE", false,
false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(
SeparateConstOffsetFromGEP, "separate-const-offset-from-gep",
"Split GEPs to a variadic base and a constant offset for better CSE", false,
@@ -734,6 +773,13 @@ void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(
Type *I8PtrTy =
Builder.getInt8PtrTy(Variadic->getType()->getPointerAddressSpace());
Value *ResultPtr = Variadic->getOperand(0);
+ Loop *L = LI->getLoopFor(Variadic->getParent());
+ // Check if the base is not loop invariant or used more than once.
+ bool isSwapCandidate =
+ L && L->isLoopInvariant(ResultPtr) &&
+ !hasMoreThanOneUseInLoop(ResultPtr, L);
+ Value *FirstResult = nullptr;
+
if (ResultPtr->getType() != I8PtrTy)
ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
@@ -762,6 +808,8 @@ void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(
// Create an ugly GEP with a single index for each index.
ResultPtr =
Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Idx, "uglygep");
+ if (FirstResult == nullptr)
+ FirstResult = ResultPtr;
}
}
@@ -770,7 +818,17 @@ void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(
Value *Offset = ConstantInt::get(IntPtrTy, AccumulativeByteOffset);
ResultPtr =
Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Offset, "uglygep");
- }
+ } else
+ isSwapCandidate = false;
+
+ // If we created a GEP with constant index, and the base is loop invariant,
+ // then we swap the first one with it, so LICM can move constant GEP out
+ // later.
+ GetElementPtrInst *FirstGEP = dyn_cast<GetElementPtrInst>(FirstResult);
+ GetElementPtrInst *SecondGEP = dyn_cast<GetElementPtrInst>(ResultPtr);
+ if (isSwapCandidate && isLegalToSwapOperand(FirstGEP, SecondGEP, L))
+ swapGEPOperand(FirstGEP, SecondGEP);
+
if (ResultPtr->getType() != Variadic->getType())
ResultPtr = Builder.CreateBitCast(ResultPtr, Variadic->getType());
@@ -891,13 +949,13 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
// Clear the inbounds attribute because the new index may be off-bound.
// e.g.,
//
- // b = add i64 a, 5
- // addr = gep inbounds float* p, i64 b
+ // b = add i64 a, 5
+ // addr = gep inbounds float, float* p, i64 b
//
// is transformed to:
//
- // addr2 = gep float* p, i64 a
- // addr = gep float* addr2, i64 5
+ // addr2 = gep float, float* p, i64 a ; inbounds removed
+ // addr = gep inbounds float, float* addr2, i64 5
//
// If a is -4, although the old index b is in bounds, the new index a is
// off-bound. http://llvm.org/docs/LangRef.html#id181 says "if the
@@ -907,6 +965,7 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
//
// TODO(jingyue): do some range analysis to keep as many inbounds as
// possible. GEPs with inbounds are more friendly to alias analysis.
+ bool GEPWasInBounds = GEP->isInBounds();
GEP->setIsInBounds(false);
// Lowers a GEP to either GEPs with a single index or arithmetic operations.
@@ -968,6 +1027,8 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
NewGEP = GetElementPtrInst::Create(GEP->getResultElementType(), NewGEP,
ConstantInt::get(IntPtrTy, Index, true),
GEP->getName(), GEP);
+ // Inherit the inbounds attribute of the original GEP.
+ cast<GetElementPtrInst>(NewGEP)->setIsInBounds(GEPWasInBounds);
} else {
// Unlikely but possible. For example,
// #pragma pack(1)
@@ -990,6 +1051,8 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
Type::getInt8Ty(GEP->getContext()), NewGEP,
ConstantInt::get(IntPtrTy, AccumulativeByteOffset, true), "uglygep",
GEP);
+ // Inherit the inbounds attribute of the original GEP.
+ cast<GetElementPtrInst>(NewGEP)->setIsInBounds(GEPWasInBounds);
if (GEP->getType() != I8PtrTy)
NewGEP = new BitCastInst(NewGEP, GEP->getType(), GEP->getName(), GEP);
}
@@ -1008,24 +1071,96 @@ bool SeparateConstOffsetFromGEP::runOnFunction(Function &F) {
return false;
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
bool Changed = false;
for (Function::iterator B = F.begin(), BE = F.end(); B != BE; ++B) {
- for (BasicBlock::iterator I = B->begin(), IE = B->end(); I != IE; ) {
- if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I++)) {
+ for (BasicBlock::iterator I = B->begin(), IE = B->end(); I != IE;)
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I++))
Changed |= splitGEP(GEP);
- }
- // No need to split GEP ConstantExprs because all its indices are constant
- // already.
- }
+ // No need to split GEP ConstantExprs because all its indices are constant
+ // already.
}
+ Changed |= reuniteExts(F);
+
if (VerifyNoDeadCode)
verifyNoDeadCode(F);
return Changed;
}
+Instruction *SeparateConstOffsetFromGEP::findClosestMatchingDominator(
+ const SCEV *Key, Instruction *Dominatee) {
+ auto Pos = DominatingExprs.find(Key);
+ if (Pos == DominatingExprs.end())
+ return nullptr;
+
+ auto &Candidates = Pos->second;
+ // Because we process the basic blocks in pre-order of the dominator tree, a
+ // candidate that doesn't dominate the current instruction won't dominate any
+ // future instruction either. Therefore, we pop it out of the stack. This
+ // optimization makes the algorithm O(n).
+ while (!Candidates.empty()) {
+ Instruction *Candidate = Candidates.back();
+ if (DT->dominates(Candidate, Dominatee))
+ return Candidate;
+ Candidates.pop_back();
+ }
+ return nullptr;
+}
+
+bool SeparateConstOffsetFromGEP::reuniteExts(Instruction *I) {
+ if (!SE->isSCEVable(I->getType()))
+ return false;
+
+ // Dom: LHS+RHS
+ // I: sext(LHS)+sext(RHS)
+ // If Dom can't sign overflow and Dom dominates I, optimize I to sext(Dom).
+ // TODO: handle zext
+ Value *LHS = nullptr, *RHS = nullptr;
+ if (match(I, m_Add(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS)))) ||
+ match(I, m_Sub(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS))))) {
+ if (LHS->getType() == RHS->getType()) {
+ const SCEV *Key =
+ SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS));
+ if (auto *Dom = findClosestMatchingDominator(Key, I)) {
+ Instruction *NewSExt = new SExtInst(Dom, I->getType(), "", I);
+ NewSExt->takeName(I);
+ I->replaceAllUsesWith(NewSExt);
+ RecursivelyDeleteTriviallyDeadInstructions(I);
+ return true;
+ }
+ }
+ }
+
+ // Add I to DominatingExprs if it's an add/sub that can't sign overflow.
+ if (match(I, m_NSWAdd(m_Value(LHS), m_Value(RHS))) ||
+ match(I, m_NSWSub(m_Value(LHS), m_Value(RHS)))) {
+ if (isKnownNotFullPoison(I)) {
+ const SCEV *Key =
+ SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS));
+ DominatingExprs[Key].push_back(I);
+ }
+ }
+ return false;
+}
+
+bool SeparateConstOffsetFromGEP::reuniteExts(Function &F) {
+ bool Changed = false;
+ DominatingExprs.clear();
+ for (auto Node = GraphTraits<DominatorTree *>::nodes_begin(DT);
+ Node != GraphTraits<DominatorTree *>::nodes_end(DT); ++Node) {
+ BasicBlock *BB = Node->getBlock();
+ for (auto I = BB->begin(); I != BB->end(); ) {
+ Instruction *Cur = &*I++;
+ Changed |= reuniteExts(Cur);
+ }
+ }
+ return Changed;
+}
+
void SeparateConstOffsetFromGEP::verifyNoDeadCode(Function &F) {
for (auto &B : F) {
for (auto &I : B) {
@@ -1038,3 +1173,93 @@ void SeparateConstOffsetFromGEP::verifyNoDeadCode(Function &F) {
}
}
}
+
+bool SeparateConstOffsetFromGEP::isLegalToSwapOperand(
+ GetElementPtrInst *FirstGEP, GetElementPtrInst *SecondGEP, Loop *CurLoop) {
+ if (!FirstGEP || !FirstGEP->hasOneUse())
+ return false;
+
+ if (!SecondGEP || FirstGEP->getParent() != SecondGEP->getParent())
+ return false;
+
+ if (FirstGEP == SecondGEP)
+ return false;
+
+ unsigned FirstNum = FirstGEP->getNumOperands();
+ unsigned SecondNum = SecondGEP->getNumOperands();
+ // Give up if the number of operands are not 2.
+ if (FirstNum != SecondNum || FirstNum != 2)
+ return false;
+
+ Value *FirstBase = FirstGEP->getOperand(0);
+ Value *SecondBase = SecondGEP->getOperand(0);
+ Value *FirstOffset = FirstGEP->getOperand(1);
+ // Give up if the index of the first GEP is loop invariant.
+ if (CurLoop->isLoopInvariant(FirstOffset))
+ return false;
+
+ // Give up if base doesn't have same type.
+ if (FirstBase->getType() != SecondBase->getType())
+ return false;
+
+ Instruction *FirstOffsetDef = dyn_cast<Instruction>(FirstOffset);
+
+ // Check if the second operand of first GEP has constant coefficient.
+ // For an example, for the following code, we won't gain anything by
+ // hoisting the second GEP out because the second GEP can be folded away.
+ // %scevgep.sum.ur159 = add i64 %idxprom48.ur, 256
+ // %67 = shl i64 %scevgep.sum.ur159, 2
+ // %uglygep160 = getelementptr i8* %65, i64 %67
+ // %uglygep161 = getelementptr i8* %uglygep160, i64 -1024
+
+ // Skip constant shift instruction which may be generated by Splitting GEPs.
+ if (FirstOffsetDef && FirstOffsetDef->isShift() &&
+ isa<ConstantInt>(FirstOffsetDef->getOperand(1)))
+ FirstOffsetDef = dyn_cast<Instruction>(FirstOffsetDef->getOperand(0));
+
+ // Give up if FirstOffsetDef is an Add or Sub with constant.
+ // Because it may not profitable at all due to constant folding.
+ if (FirstOffsetDef)
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FirstOffsetDef)) {
+ unsigned opc = BO->getOpcode();
+ if ((opc == Instruction::Add || opc == Instruction::Sub) &&
+ (isa<ConstantInt>(BO->getOperand(0)) ||
+ isa<ConstantInt>(BO->getOperand(1))))
+ return false;
+ }
+ return true;
+}
+
+bool SeparateConstOffsetFromGEP::hasMoreThanOneUseInLoop(Value *V, Loop *L) {
+ int UsesInLoop = 0;
+ for (User *U : V->users()) {
+ if (Instruction *User = dyn_cast<Instruction>(U))
+ if (L->contains(User))
+ if (++UsesInLoop > 1)
+ return true;
+ }
+ return false;
+}
+
+void SeparateConstOffsetFromGEP::swapGEPOperand(GetElementPtrInst *First,
+ GetElementPtrInst *Second) {
+ Value *Offset1 = First->getOperand(1);
+ Value *Offset2 = Second->getOperand(1);
+ First->setOperand(1, Offset2);
+ Second->setOperand(1, Offset1);
+
+ // We changed p+o+c to p+c+o, p+c may not be inbound anymore.
+ const DataLayout &DAL = First->getModule()->getDataLayout();
+ APInt Offset(DAL.getPointerSizeInBits(
+ cast<PointerType>(First->getType())->getAddressSpace()),
+ 0);
+ Value *NewBase =
+ First->stripAndAccumulateInBoundsConstantOffsets(DAL, Offset);
+ uint64_t ObjectSize;
+ if (!getObjectSize(NewBase, ObjectSize, DAL, TLI) ||
+ Offset.ugt(ObjectSize)) {
+ First->setIsInBounds(false);
+ Second->setIsInBounds(false);
+ } else
+ First->setIsInBounds(true);
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 231411a..63c8836 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -25,6 +25,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Attributes.h"
@@ -67,15 +68,14 @@ static bool mergeEmptyReturnBlocks(Function &F) {
// single PHI node that is the operand to the return.
if (Ret != &BB.front()) {
// Check for something else in the block.
- BasicBlock::iterator I = Ret;
+ BasicBlock::iterator I(Ret);
--I;
// Skip over debug info.
while (isa<DbgInfoIntrinsic>(I) && I != BB.begin())
--I;
if (!isa<DbgInfoIntrinsic>(I) &&
- (!isa<PHINode>(I) || I != BB.begin() ||
- Ret->getNumOperands() == 0 ||
- Ret->getOperand(0) != I))
+ (!isa<PHINode>(I) || I != BB.begin() || Ret->getNumOperands() == 0 ||
+ Ret->getOperand(0) != &*I))
continue;
}
@@ -136,7 +136,7 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
// Loop over all of the basic blocks and remove them if they are unneeded.
for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) {
- if (SimplifyCFG(BBIt++, TTI, BonusInstThreshold, AC)) {
+ if (SimplifyCFG(&*BBIt++, TTI, BonusInstThreshold, AC)) {
LocalChange = true;
++NumSimpl;
}
@@ -217,6 +217,7 @@ struct CFGSimplifyPass : public FunctionPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
}
};
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/Sink.cpp b/contrib/llvm/lib/Transforms/Scalar/Sink.cpp
index f49f4ea..64109b2 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Sink.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Sink.cpp
@@ -48,7 +48,7 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
FunctionPass::getAnalysisUsage(AU);
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
@@ -66,7 +66,7 @@ char Sinking::ID = 0;
INITIALIZE_PASS_BEGIN(Sinking, "sink", "Code sinking", false, false)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(Sinking, "sink", "Code sinking", false, false)
FunctionPass *llvm::createSinkingPass() { return new Sinking(); }
@@ -99,7 +99,7 @@ bool Sinking::AllUsesDominatedByBlock(Instruction *Inst,
bool Sinking::runOnFunction(Function &F) {
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- AA = &getAnalysis<AliasAnalysis>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
bool MadeChange, EverMadeChange = false;
@@ -119,7 +119,7 @@ bool Sinking::runOnFunction(Function &F) {
bool Sinking::ProcessBlock(BasicBlock &BB) {
// Can't sink anything out of a block that has less than two successors.
- if (BB.getTerminator()->getNumSuccessors() <= 1 || BB.empty()) return false;
+ if (BB.getTerminator()->getNumSuccessors() <= 1) return false;
// Don't bother sinking code out of unreachable blocks. In addition to being
// unprofitable, it can also lead to infinite looping, because in an
@@ -134,7 +134,7 @@ bool Sinking::ProcessBlock(BasicBlock &BB) {
bool ProcessedBegin = false;
SmallPtrSet<Instruction *, 8> Stores;
do {
- Instruction *Inst = I; // The instruction to sink.
+ Instruction *Inst = &*I; // The instruction to sink.
// Predecrement I (if it's not begin) so that it isn't invalidated by
// sinking.
@@ -165,14 +165,16 @@ static bool isSafeToMove(Instruction *Inst, AliasAnalysis *AA,
if (LoadInst *L = dyn_cast<LoadInst>(Inst)) {
MemoryLocation Loc = MemoryLocation::get(L);
for (Instruction *S : Stores)
- if (AA->getModRefInfo(S, Loc) & AliasAnalysis::Mod)
+ if (AA->getModRefInfo(S, Loc) & MRI_Mod)
return false;
}
- if (isa<TerminatorInst>(Inst) || isa<PHINode>(Inst))
+ if (isa<TerminatorInst>(Inst) || isa<PHINode>(Inst) || Inst->isEHPad() ||
+ Inst->mayThrow())
return false;
- // Convergent operations can only be moved to control equivalent blocks.
+ // Convergent operations cannot be made control-dependent on additional
+ // values.
if (auto CS = CallSite(Inst)) {
if (CS.hasFnAttr(Attribute::Convergent))
return false;
@@ -193,6 +195,11 @@ bool Sinking::IsAcceptableTarget(Instruction *Inst,
if (Inst->getParent() == SuccToSinkTo)
return false;
+ // It's never legal to sink an instruction into a block which terminates in an
+ // EH-pad.
+ if (SuccToSinkTo->getTerminator()->isExceptional())
+ return false;
+
// If the block has multiple predecessors, this would introduce computation
// on different code paths. We could split the critical edge, but for now we
// just punt.
@@ -278,6 +285,6 @@ bool Sinking::SinkInstruction(Instruction *Inst,
dbgs() << ")\n");
// Move the instruction.
- Inst->moveBefore(SuccToSinkTo->getFirstInsertionPt());
+ Inst->moveBefore(&*SuccToSinkTo->getFirstInsertionPt());
return true;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp b/contrib/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
index ff3f00a..147d615 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
@@ -227,7 +227,7 @@ bool SpeculativeExecution::considerHoistingFromTo(BasicBlock &FromBlock,
// changes the list that I is iterating through.
auto Current = I;
++I;
- if (!NotHoisted.count(Current)) {
+ if (!NotHoisted.count(&*Current)) {
Current->moveBefore(ToBlock.getTerminator());
}
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/contrib/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
index 6d9d417..1faa65e 100644
--- a/contrib/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
@@ -131,7 +131,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<ScalarEvolution>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
// We do not modify the shape of the CFG.
AU.setPreservesCFG();
@@ -212,7 +212,7 @@ char StraightLineStrengthReduce::ID = 0;
INITIALIZE_PASS_BEGIN(StraightLineStrengthReduce, "slsr",
"Straight line strength reduction", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(StraightLineStrengthReduce, "slsr",
"Straight line strength reduction", false, false)
@@ -234,6 +234,7 @@ bool StraightLineStrengthReduce::isBasisFor(const Candidate &Basis,
Basis.CandidateKind == C.CandidateKind);
}
+// TODO: use TTI->getGEPCost.
static bool isGEPFoldable(GetElementPtrInst *GEP,
const TargetTransformInfo *TTI,
const DataLayout *DL) {
@@ -523,7 +524,7 @@ void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForGEP(
continue;
const SCEV *OrigIndexExpr = IndexExprs[I - 1];
- IndexExprs[I - 1] = SE->getConstant(OrigIndexExpr->getType(), 0);
+ IndexExprs[I - 1] = SE->getZero(OrigIndexExpr->getType());
// The base of this candidate is GEP's base plus the offsets of all
// indices except this current one.
@@ -689,7 +690,7 @@ bool StraightLineStrengthReduce::runOnFunction(Function &F) {
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- SE = &getAnalysis<ScalarEvolution>();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
// Traverse the dominator tree in the depth-first order. This order makes sure
// all bases of a candidate are in Candidates when we process it.
for (auto node = GraphTraits<DominatorTree *>::nodes_begin(DT);
diff --git a/contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index 4f23e20..662513c 100644
--- a/contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -358,13 +358,9 @@ void StructurizeCFG::analyzeLoops(RegionNode *N) {
BasicBlock *BB = N->getNodeAs<BasicBlock>();
BranchInst *Term = cast<BranchInst>(BB->getTerminator());
- for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
- BasicBlock *Succ = Term->getSuccessor(i);
-
- if (Visited.count(Succ)) {
+ for (BasicBlock *Succ : Term->successors())
+ if (Visited.count(Succ))
Loops[Succ] = BB;
- }
- }
}
}
@@ -903,14 +899,14 @@ void StructurizeCFG::rebuildSSA() {
continue;
}
- if (DT->dominates(II, User))
+ if (DT->dominates(&*II, User))
continue;
if (!Initialized) {
Value *Undef = UndefValue::get(II->getType());
Updater.Initialize(II->getType(), "");
Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
- Updater.AddAvailableValue(BB, II);
+ Updater.AddAvailableValue(BB, &*II);
Initialized = true;
}
Updater.RewriteUseAfterInsertions(U);
diff --git a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
index c7de2e2..0e0b00d 100644
--- a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -54,6 +54,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/InlineCost.h"
@@ -136,6 +137,7 @@ FunctionPass *llvm::createTailCallEliminationPass() {
void TailCallElim::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
}
/// \brief Scan the specified function for alloca instructions.
@@ -195,8 +197,8 @@ struct AllocaDerivedValueTracker {
case Instruction::Call:
case Instruction::Invoke: {
CallSite CS(I);
- bool IsNocapture = !CS.isCallee(U) &&
- CS.doesNotCapture(CS.getArgumentNo(U));
+ bool IsNocapture =
+ CS.isDataOperand(U) && CS.doesNotCapture(CS.getDataOperandNo(U));
callUsesLocalStack(CS, IsNocapture);
if (IsNocapture) {
// If the alloca-derived argument is passed in as nocapture, then it
@@ -302,7 +304,9 @@ bool TailCallElim::markTails(Function &F, bool &AllCallsAreTailCalls) {
if (!CI || CI->isTailCall())
continue;
- if (CI->doesNotAccessMemory()) {
+ bool IsNoTail = CI->isNoTailCall();
+
+ if (!IsNoTail && CI->doesNotAccessMemory()) {
// A call to a readnone function whose arguments are all things computed
// outside this function can be marked tail. Even if you stored the
// alloca address into a global, a readnone function can't load the
@@ -330,7 +334,7 @@ bool TailCallElim::markTails(Function &F, bool &AllCallsAreTailCalls) {
}
}
- if (Escaped == UNESCAPED && !Tracker.AllocaUsers.count(CI)) {
+ if (!IsNoTail && Escaped == UNESCAPED && !Tracker.AllocaUsers.count(CI)) {
DeferredTails.push_back(CI);
} else {
AllCallsAreTailCalls = false;
@@ -404,7 +408,7 @@ bool TailCallElim::runTRE(Function &F) {
// Until this is resolved, disable this transformation if that would ever
// happen. This bug is PR962.
for (Function::iterator BBI = F.begin(), E = F.end(); BBI != E; /*in loop*/) {
- BasicBlock *BB = BBI++; // FoldReturnAndProcessPred may delete BB.
+ BasicBlock *BB = &*BBI++; // FoldReturnAndProcessPred may delete BB.
if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) {
bool Change = ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail,
ArgumentPHIs, !CanTRETailMarkedCall);
@@ -574,7 +578,7 @@ TailCallElim::FindTRECandidate(Instruction *TI,
// Scan backwards from the return, checking to see if there is a tail call in
// this block. If so, set CI to it.
CallInst *CI = nullptr;
- BasicBlock::iterator BBI = TI;
+ BasicBlock::iterator BBI(TI);
while (true) {
CI = dyn_cast<CallInst>(BBI);
if (CI && CI->getCalledFunction() == F)
@@ -595,9 +599,8 @@ TailCallElim::FindTRECandidate(Instruction *TI,
// and disable this xform in this case, because the code generator will
// lower the call to fabs into inline code.
if (BB == &F->getEntryBlock() &&
- FirstNonDbg(BB->front()) == CI &&
- FirstNonDbg(std::next(BB->begin())) == TI &&
- CI->getCalledFunction() &&
+ FirstNonDbg(BB->front().getIterator()) == CI &&
+ FirstNonDbg(std::next(BB->begin())) == TI && CI->getCalledFunction() &&
!TTI->isLoweredToCall(CI->getCalledFunction())) {
// A single-block function with just a call and a return. Check that
// the arguments match.
@@ -636,19 +639,19 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
// tail call if all of the instructions between the call and the return are
// movable to above the call itself, leaving the call next to the return.
// Check that this is the case now.
- BasicBlock::iterator BBI = CI;
+ BasicBlock::iterator BBI(CI);
for (++BBI; &*BBI != Ret; ++BBI) {
- if (CanMoveAboveCall(BBI, CI)) continue;
+ if (CanMoveAboveCall(&*BBI, CI)) continue;
// If we can't move the instruction above the call, it might be because it
// is an associative and commutative operation that could be transformed
// using accumulator recursion elimination. Check to see if this is the
// case, and if so, remember the initial accumulator value for later.
if ((AccumulatorRecursionEliminationInitVal =
- CanTransformAccumulatorRecursion(BBI, CI))) {
+ CanTransformAccumulatorRecursion(&*BBI, CI))) {
// Yes, this is accumulator recursion. Remember which instruction
// accumulates.
- AccumulatorRecursionInstr = BBI;
+ AccumulatorRecursionInstr = &*BBI;
} else {
return false; // Otherwise, we cannot eliminate the tail recursion!
}
@@ -698,19 +701,19 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
NEBI = NewEntry->begin(); OEBI != E; )
if (AllocaInst *AI = dyn_cast<AllocaInst>(OEBI++))
if (isa<ConstantInt>(AI->getArraySize()))
- AI->moveBefore(NEBI);
+ AI->moveBefore(&*NEBI);
// Now that we have created a new block, which jumps to the entry
// block, insert a PHI node for each argument of the function.
// For now, we initialize each PHI to only have the real arguments
// which are passed in.
- Instruction *InsertPos = OldEntry->begin();
+ Instruction *InsertPos = &OldEntry->front();
for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
I != E; ++I) {
PHINode *PN = PHINode::Create(I->getType(), 2,
I->getName() + ".tr", InsertPos);
I->replaceAllUsesWith(PN); // Everyone use the PHI node now!
- PN->addIncoming(I, NewEntry);
+ PN->addIncoming(&*I, NewEntry);
ArgumentPHIs.push_back(PN);
}
}
@@ -739,10 +742,9 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
Instruction *AccRecInstr = AccumulatorRecursionInstr;
// Start by inserting a new PHI node for the accumulator.
pred_iterator PB = pred_begin(OldEntry), PE = pred_end(OldEntry);
- PHINode *AccPN =
- PHINode::Create(AccumulatorRecursionEliminationInitVal->getType(),
- std::distance(PB, PE) + 1,
- "accumulator.tr", OldEntry->begin());
+ PHINode *AccPN = PHINode::Create(
+ AccumulatorRecursionEliminationInitVal->getType(),
+ std::distance(PB, PE) + 1, "accumulator.tr", &OldEntry->front());
// Loop over all of the predecessors of the tail recursion block. For the
// real entry into the function we seed the PHI with the initial value,
OpenPOWER on IntegriCloud