summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Transforms')
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp166
-rw-r--r--contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp24
-rw-r--r--contrib/llvm/lib/Transforms/IPO/IPO.cpp5
-rw-r--r--contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp4
-rw-r--r--contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp10
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp6
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp6
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp17
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp34
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp218
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LICM.cpp46
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp488
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp35
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp3
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp102
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SCCP.cpp61
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp9
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp16
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp72
-rw-r--r--contrib/llvm/lib/Transforms/Utils/Local.cpp31
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp4
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp101
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp165
-rw-r--r--contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp13
-rw-r--r--contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp44
30 files changed, 928 insertions, 762 deletions
diff --git a/contrib/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
index 816291d..6df0447 100644
--- a/contrib/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
@@ -22,7 +22,7 @@ static cl::list<std::string>
ForceAttributes("force-attribute", cl::Hidden,
cl::desc("Add an attribute to a function. This should be a "
"pair of 'function-name:attribute-name', for "
- "example -force-add-attribute=foo:noinline. This "
+ "example -force-attribute=foo:noinline. This "
"option can be specified multiple times."));
static Attribute::AttrKind parseAttrKind(StringRef Kind) {
diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
index 6dcfb3f..527fdd1 100644
--- a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -6,16 +6,11 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-//
-// This file implements a simple interprocedural pass which walks the
-// call-graph, looking for functions which do not access or only read
-// non-local memory, and marking them readnone/readonly. It does the
-// same with function arguments independently, marking them readonly/
-// readnone/nocapture. Finally, well-known library call declarations
-// are marked with all attributes that are consistent with the
-// function's standard definition. This pass is implemented as a
-// bottom-up traversal of the call-graph.
-//
+///
+/// \file
+/// This file implements interprocedural passes which walk the
+/// call-graph deducing and/or propagating function attributes.
+///
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO.h"
@@ -57,19 +52,14 @@ typedef SmallSetVector<Function *, 8> SCCNodeSet;
}
namespace {
-struct FunctionAttrs : public CallGraphSCCPass {
+struct PostOrderFunctionAttrs : public CallGraphSCCPass {
static char ID; // Pass identification, replacement for typeid
- FunctionAttrs() : CallGraphSCCPass(ID) {
- initializeFunctionAttrsPass(*PassRegistry::getPassRegistry());
+ PostOrderFunctionAttrs() : CallGraphSCCPass(ID) {
+ initializePostOrderFunctionAttrsPass(*PassRegistry::getPassRegistry());
}
bool runOnSCC(CallGraphSCC &SCC) override;
- bool doInitialization(CallGraph &CG) override {
- Revisit.clear();
- return false;
- }
- bool doFinalization(CallGraph &CG) override;
-
+
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<AssumptionCacheTracker>();
@@ -79,20 +69,19 @@ struct FunctionAttrs : public CallGraphSCCPass {
private:
TargetLibraryInfo *TLI;
- SmallVector<WeakVH,16> Revisit;
};
}
-char FunctionAttrs::ID = 0;
-INITIALIZE_PASS_BEGIN(FunctionAttrs, "functionattrs",
+char PostOrderFunctionAttrs::ID = 0;
+INITIALIZE_PASS_BEGIN(PostOrderFunctionAttrs, "functionattrs",
"Deduce function attributes", false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_END(FunctionAttrs, "functionattrs",
+INITIALIZE_PASS_END(PostOrderFunctionAttrs, "functionattrs",
"Deduce function attributes", false, false)
-Pass *llvm::createFunctionAttrsPass() { return new FunctionAttrs(); }
+Pass *llvm::createPostOrderFunctionAttrsPass() { return new PostOrderFunctionAttrs(); }
namespace {
/// The three kinds of memory access relevant to 'readonly' and
@@ -949,8 +938,7 @@ static bool setDoesNotRecurse(Function &F) {
return true;
}
-static bool addNoRecurseAttrs(const CallGraphSCC &SCC,
- SmallVectorImpl<WeakVH> &Revisit) {
+static bool addNoRecurseAttrs(const CallGraphSCC &SCC) {
// Try and identify functions that do not recurse.
// If the SCC contains multiple nodes we know for sure there is recursion.
@@ -973,32 +961,11 @@ static bool addNoRecurseAttrs(const CallGraphSCC &SCC,
// Function calls a potentially recursive function.
return setDoesNotRecurse(*F);
- // We know that F is not obviously recursive, but we haven't been able to
- // prove that it doesn't actually recurse. Add it to the Revisit list to try
- // again top-down later.
- Revisit.push_back(F);
+ // Nothing else we can deduce usefully during the postorder traversal.
return false;
}
-static bool addNoRecurseAttrsTopDownOnly(Function *F) {
- // If F is internal and all uses are in norecurse functions, then F is also
- // norecurse.
- if (F->doesNotRecurse())
- return false;
- if (F->hasInternalLinkage()) {
- for (auto *U : F->users())
- if (auto *I = dyn_cast<Instruction>(U)) {
- if (!I->getParent()->getParent()->doesNotRecurse())
- return false;
- } else {
- return false;
- }
- return setDoesNotRecurse(*F);
- }
- return false;
-}
-
-bool FunctionAttrs::runOnSCC(CallGraphSCC &SCC) {
+bool PostOrderFunctionAttrs::runOnSCC(CallGraphSCC &SCC) {
TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
bool Changed = false;
@@ -1040,19 +1007,100 @@ bool FunctionAttrs::runOnSCC(CallGraphSCC &SCC) {
Changed |= addNoAliasAttrs(SCCNodes);
Changed |= addNonNullAttrs(SCCNodes, *TLI);
}
-
- Changed |= addNoRecurseAttrs(SCC, Revisit);
+
+ Changed |= addNoRecurseAttrs(SCC);
return Changed;
}
-bool FunctionAttrs::doFinalization(CallGraph &CG) {
+namespace {
+/// A pass to do RPO deduction and propagation of function attributes.
+///
+/// This pass provides a general RPO or "top down" propagation of
+/// function attributes. For a few (rare) cases, we can deduce significantly
+/// more about function attributes by working in RPO, so this pass
+/// provides the compliment to the post-order pass above where the majority of
+/// deduction is performed.
+// FIXME: Currently there is no RPO CGSCC pass structure to slide into and so
+// this is a boring module pass, but eventually it should be an RPO CGSCC pass
+// when such infrastructure is available.
+struct ReversePostOrderFunctionAttrs : public ModulePass {
+ static char ID; // Pass identification, replacement for typeid
+ ReversePostOrderFunctionAttrs() : ModulePass(ID) {
+ initializeReversePostOrderFunctionAttrsPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<CallGraphWrapperPass>();
+ }
+};
+}
+
+char ReversePostOrderFunctionAttrs::ID = 0;
+INITIALIZE_PASS_BEGIN(ReversePostOrderFunctionAttrs, "rpo-functionattrs",
+ "Deduce function attributes in RPO", false, false)
+INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
+INITIALIZE_PASS_END(ReversePostOrderFunctionAttrs, "rpo-functionattrs",
+ "Deduce function attributes in RPO", false, false)
+
+Pass *llvm::createReversePostOrderFunctionAttrsPass() {
+ return new ReversePostOrderFunctionAttrs();
+}
+
+static bool addNoRecurseAttrsTopDown(Function &F) {
+ // We check the preconditions for the function prior to calling this to avoid
+ // the cost of building up a reversible post-order list. We assert them here
+ // to make sure none of the invariants this relies on were violated.
+ assert(!F.isDeclaration() && "Cannot deduce norecurse without a definition!");
+ assert(!F.doesNotRecurse() &&
+ "This function has already been deduced as norecurs!");
+ assert(F.hasInternalLinkage() &&
+ "Can only do top-down deduction for internal linkage functions!");
+
+ // If F is internal and all of its uses are calls from a non-recursive
+ // functions, then none of its calls could in fact recurse without going
+ // through a function marked norecurse, and so we can mark this function too
+ // as norecurse. Note that the uses must actually be calls -- otherwise
+ // a pointer to this function could be returned from a norecurse function but
+ // this function could be recursively (indirectly) called. Note that this
+ // also detects if F is directly recursive as F is not yet marked as
+ // a norecurse function.
+ for (auto *U : F.users()) {
+ auto *I = dyn_cast<Instruction>(U);
+ if (!I)
+ return false;
+ CallSite CS(I);
+ if (!CS || !CS.getParent()->getParent()->doesNotRecurse())
+ return false;
+ }
+ return setDoesNotRecurse(F);
+}
+
+bool ReversePostOrderFunctionAttrs::runOnModule(Module &M) {
+ // We only have a post-order SCC traversal (because SCCs are inherently
+ // discovered in post-order), so we accumulate them in a vector and then walk
+ // it in reverse. This is simpler than using the RPO iterator infrastructure
+ // because we need to combine SCC detection and the PO walk of the call
+ // graph. We can also cheat egregiously because we're primarily interested in
+ // synthesizing norecurse and so we can only save the singular SCCs as SCCs
+ // with multiple functions in them will clearly be recursive.
+ auto &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
+ SmallVector<Function *, 16> Worklist;
+ for (scc_iterator<CallGraph *> I = scc_begin(&CG); !I.isAtEnd(); ++I) {
+ if (I->size() != 1)
+ continue;
+
+ Function *F = I->front()->getFunction();
+ if (F && !F->isDeclaration() && !F->doesNotRecurse() &&
+ F->hasInternalLinkage())
+ Worklist.push_back(F);
+ }
+
bool Changed = false;
- // When iterating over SCCs we visit functions in a bottom-up fashion. Some of
- // the rules we have for identifying norecurse functions work best with a
- // top-down walk, so look again at all the functions we previously marked as
- // worth revisiting, in top-down order.
- for (auto &F : reverse(Revisit))
- if (F)
- Changed |= addNoRecurseAttrsTopDownOnly(cast<Function>((Value*)F));
+ for (auto *F : reverse(Worklist))
+ Changed |= addNoRecurseAttrsTopDown(*F);
+
return Changed;
}
diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp
index d8b677b..5e0df95 100644
--- a/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -41,15 +41,16 @@ static std::unique_ptr<Module> loadFile(const std::string &FileName,
LLVMContext &Context) {
SMDiagnostic Err;
DEBUG(dbgs() << "Loading '" << FileName << "'\n");
- std::unique_ptr<Module> Result = getLazyIRFileModule(FileName, Err, Context);
+ // Metadata isn't loaded or linked until after all functions are
+ // imported, after which it will be materialized and linked.
+ std::unique_ptr<Module> Result =
+ getLazyIRFileModule(FileName, Err, Context,
+ /* ShouldLazyLoadMetadata = */ true);
if (!Result) {
Err.print("function-import", errs());
return nullptr;
}
- Result->materializeMetadata();
- UpgradeDebugInfo(*Result);
-
return Result;
}
@@ -132,6 +133,8 @@ static void findExternalCalls(const Module &DestModule, Function &F,
// Ignore functions already present in the destination module
auto *SrcGV = DestModule.getNamedValue(ImportedName);
if (SrcGV) {
+ if (GlobalAlias *SGA = dyn_cast<GlobalAlias>(SrcGV))
+ SrcGV = SGA->getBaseObject();
assert(isa<Function>(SrcGV) && "Name collision during import");
if (!cast<Function>(SrcGV)->isDeclaration()) {
DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Ignoring "
@@ -324,6 +327,10 @@ bool FunctionImporter::importFunctions(Module &DestModule) {
ModuleToTempMDValsMap) {
// Load the specified source module.
auto &SrcModule = ModuleLoaderCache(SME.getKey());
+ // The modules were created with lazy metadata loading. Materialize it
+ // now, before linking it.
+ SrcModule.materializeMetadata();
+ UpgradeDebugInfo(SrcModule);
// Link in all necessary metadata from this module.
if (TheLinker.linkInMetadata(SrcModule, SME.getValue().get()))
@@ -408,14 +415,19 @@ public:
Index = IndexPtr.get();
}
+ // First we need to promote to global scope and rename any local values that
+ // are potentially exported to other modules.
+ if (renameModuleForThinLTO(M, Index)) {
+ errs() << "Error renaming module\n";
+ return false;
+ }
+
// Perform the import now.
auto ModuleLoader = [&M](StringRef Identifier) {
return loadFile(Identifier, M.getContext());
};
FunctionImporter Importer(*Index, ModuleLoader);
return Importer.importFunctions(M);
-
- return false;
}
};
} // anonymous namespace
diff --git a/contrib/llvm/lib/Transforms/IPO/IPO.cpp b/contrib/llvm/lib/Transforms/IPO/IPO.cpp
index 7ea6c08..89629cf0 100644
--- a/contrib/llvm/lib/Transforms/IPO/IPO.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/IPO.cpp
@@ -28,7 +28,6 @@ void llvm::initializeIPO(PassRegistry &Registry) {
initializeDAEPass(Registry);
initializeDAHPass(Registry);
initializeForceFunctionAttrsLegacyPassPass(Registry);
- initializeFunctionAttrsPass(Registry);
initializeGlobalDCEPass(Registry);
initializeGlobalOptPass(Registry);
initializeIPCPPass(Registry);
@@ -42,6 +41,8 @@ void llvm::initializeIPO(PassRegistry &Registry) {
initializeLowerBitSetsPass(Registry);
initializeMergeFunctionsPass(Registry);
initializePartialInlinerPass(Registry);
+ initializePostOrderFunctionAttrsPass(Registry);
+ initializeReversePostOrderFunctionAttrsPass(Registry);
initializePruneEHPass(Registry);
initializeStripDeadPrototypesLegacyPassPass(Registry);
initializeStripSymbolsPass(Registry);
@@ -71,7 +72,7 @@ void LLVMAddDeadArgEliminationPass(LLVMPassManagerRef PM) {
}
void LLVMAddFunctionAttrsPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createFunctionAttrsPass());
+ unwrap(PM)->add(createPostOrderFunctionAttrsPass());
}
void LLVMAddFunctionInliningPass(LLVMPassManagerRef PM) {
diff --git a/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp
index 8e4ad64..3c6a7bb 100644
--- a/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp
@@ -38,7 +38,7 @@ namespace {
static char ID; // Pass identification, replacement for typeid
unsigned NumLoops;
- explicit LoopExtractor(unsigned numLoops = ~0)
+ explicit LoopExtractor(unsigned numLoops = ~0)
: LoopPass(ID), NumLoops(numLoops) {
initializeLoopExtractorPass(*PassRegistry::getPassRegistry());
}
@@ -143,7 +143,7 @@ bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &) {
Changed = true;
// After extraction, the loop is replaced by a function call, so
// we shouldn't try to run any more loop passes on it.
- LI.updateUnloop(L);
+ LI.markAsRemoved(L);
}
++NumExtracted;
}
diff --git a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
index 9876efa..faada9c 100644
--- a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -251,7 +251,7 @@ void PassManagerBuilder::populateModulePassManager(
Inliner = nullptr;
}
if (!DisableUnitAtATime)
- MPM.add(createFunctionAttrsPass()); // Set readonly/readnone attrs
+ MPM.add(createPostOrderFunctionAttrsPass());
if (OptLevel > 2)
MPM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args
@@ -346,6 +346,9 @@ void PassManagerBuilder::populateModulePassManager(
// we must insert a no-op module pass to reset the pass manager.
MPM.add(createBarrierNoopPass());
+ if (!DisableUnitAtATime)
+ MPM.add(createReversePostOrderFunctionAttrsPass());
+
if (!DisableUnitAtATime && OptLevel > 1 && !PrepareForLTO) {
// Remove avail extern fns and globals definitions if we aren't
// compiling an object file for later LTO. For LTO we want to preserve
@@ -502,7 +505,8 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
PM.add(createIPSCCPPass());
// Now that we internalized some globals, see if we can hack on them!
- PM.add(createFunctionAttrsPass()); // Add norecurse if possible.
+ PM.add(createPostOrderFunctionAttrsPass());
+ PM.add(createReversePostOrderFunctionAttrsPass());
PM.add(createGlobalOptimizerPass());
// Promote any localized global vars.
PM.add(createPromoteMemoryToRegisterPass());
@@ -551,7 +555,7 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
PM.add(createScalarReplAggregatesPass());
// Run a few AA driven optimizations here and now, to cleanup the code.
- PM.add(createFunctionAttrsPass()); // Add nocapture.
+ PM.add(createPostOrderFunctionAttrsPass()); // Add nocapture.
PM.add(createGlobalsAAWrapperPass()); // IP alias analysis.
PM.add(createLICMPass()); // Hoist loop invariants.
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 7ad0efc..160792b 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -636,7 +636,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
// if pattern detected emit alternate sequence
if (OpX && OpY) {
BuilderTy::FastMathFlagGuard Guard(*Builder);
- Builder->SetFastMathFlags(Log2->getFastMathFlags());
+ Builder->setFastMathFlags(Log2->getFastMathFlags());
Log2->setArgOperand(0, OpY);
Value *FMulVal = Builder->CreateFMul(OpX, Log2);
Value *FSub = Builder->CreateFSub(FMulVal, OpX);
@@ -652,7 +652,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
bool IgnoreZeroSign = I.hasNoSignedZeros();
if (BinaryOperator::isFNeg(Opnd0, IgnoreZeroSign)) {
BuilderTy::FastMathFlagGuard Guard(*Builder);
- Builder->SetFastMathFlags(I.getFastMathFlags());
+ Builder->setFastMathFlags(I.getFastMathFlags());
Value *N0 = dyn_castFNegVal(Opnd0, IgnoreZeroSign);
Value *N1 = dyn_castFNegVal(Opnd1, IgnoreZeroSign);
@@ -693,7 +693,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
if (Y) {
BuilderTy::FastMathFlagGuard Guard(*Builder);
- Builder->SetFastMathFlags(I.getFastMathFlags());
+ Builder->setFastMathFlags(I.getFastMathFlags());
Value *T = Builder->CreateFMul(Opnd1, Opnd1);
Value *R = Builder->CreateFMul(T, Y);
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 776704d..51219bc 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -930,7 +930,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
if (FCI->hasOneUse() && FCmpInst::isUnordered(FCI->getPredicate())) {
FCmpInst::Predicate InvPred = FCI->getInversePredicate();
IRBuilder<>::FastMathFlagGuard FMFG(*Builder);
- Builder->SetFastMathFlags(FCI->getFastMathFlags());
+ Builder->setFastMathFlags(FCI->getFastMathFlags());
Value *NewCond = Builder->CreateFCmp(InvPred, TrueVal, FalseVal,
FCI->getName() + ".inv");
@@ -973,7 +973,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
if (FCI->hasOneUse() && FCmpInst::isUnordered(FCI->getPredicate())) {
FCmpInst::Predicate InvPred = FCI->getInversePredicate();
IRBuilder<>::FastMathFlagGuard FMFG(*Builder);
- Builder->SetFastMathFlags(FCI->getFastMathFlags());
+ Builder->setFastMathFlags(FCI->getFastMathFlags());
Value *NewCond = Builder->CreateFCmp(InvPred, FalseVal, TrueVal,
FCI->getName() + ".inv");
@@ -1082,7 +1082,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
} else {
IRBuilder<>::FastMathFlagGuard FMFG(*Builder);
auto FMF = cast<FPMathOperator>(SI.getCondition())->getFastMathFlags();
- Builder->SetFastMathFlags(FMF);
+ Builder->setFastMathFlags(FMF);
Cmp = Builder->CreateFCmp(Pred, LHS, RHS);
}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 54a9fbd..5cde31a 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -384,23 +384,20 @@ static void replaceExtractElements(InsertElementInst *InsElt,
ConstantVector::get(ExtendMask));
// Insert the new shuffle after the vector operand of the extract is defined
- // or at the start of the basic block, so any subsequent extracts can use it.
- bool ReplaceAllExtUsers;
- if (auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp)) {
+ // (as long as it's not a PHI) or at the start of the basic block of the
+ // extract, so any subsequent extracts in the same basic block can use it.
+ // TODO: Insert before the earliest ExtractElementInst that is replaced.
+ auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp);
+ if (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
WideVec->insertAfter(ExtVecOpInst);
- ReplaceAllExtUsers = true;
- } else {
- // TODO: Insert at start of function, so it's always safe to replace all?
+ else
IC.InsertNewInstWith(WideVec, *ExtElt->getParent()->getFirstInsertionPt());
- ReplaceAllExtUsers = false;
- }
// Replace extracts from the original narrow vector with extracts from the new
// wide vector.
for (User *U : ExtVecOp->users()) {
ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(U);
- if (!OldExt ||
- (!ReplaceAllExtUsers && OldExt->getParent() != WideVec->getParent()))
+ if (!OldExt || OldExt->getParent() != WideVec->getParent())
continue;
auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1));
NewExt->insertAfter(WideVec);
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 51ff95d..28483e7 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -93,8 +93,8 @@ private:
/// Replace instrprof_increment with an increment of the appropriate value.
void lowerIncrement(InstrProfIncrementInst *Inc);
- /// Set up the section and uses for coverage data and its references.
- void lowerCoverageData(GlobalVariable *CoverageData);
+ /// Force emitting of name vars for unused functions.
+ void lowerCoverageData(GlobalVariable *CoverageNamesVar);
/// Get the region counters for an increment, creating them if necessary.
///
@@ -156,9 +156,9 @@ bool InstrProfiling::runOnModule(Module &M) {
}
}
- if (GlobalVariable *Coverage =
- M.getNamedGlobal(getCoverageMappingVarName())) {
- lowerCoverageData(Coverage);
+ if (GlobalVariable *CoverageNamesVar =
+ M.getNamedGlobal(getCoverageNamesVarName())) {
+ lowerCoverageData(CoverageNamesVar);
MadeChange = true;
}
@@ -233,28 +233,16 @@ void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) {
Inc->eraseFromParent();
}
-void InstrProfiling::lowerCoverageData(GlobalVariable *CoverageData) {
-
- Constant *Init = CoverageData->getInitializer();
- // We're expecting { [4 x 32], [n x { i8*, i32, i32 }], [m x i8] }
- // for some C. If not, the frontend's given us something broken.
- assert(Init->getNumOperands() == 3 && "bad number of fields in coverage map");
- assert(isa<ConstantArray>(Init->getAggregateElement(1)) &&
- "invalid function list in coverage map");
- ConstantArray *Records = cast<ConstantArray>(Init->getAggregateElement(1));
- for (unsigned I = 0, E = Records->getNumOperands(); I < E; ++I) {
- Constant *Record = Records->getOperand(I);
- Value *V = const_cast<Value *>(Record->getOperand(0))->stripPointerCasts();
+void InstrProfiling::lowerCoverageData(GlobalVariable *CoverageNamesVar) {
+ ConstantArray *Names =
+ cast<ConstantArray>(CoverageNamesVar->getInitializer());
+ for (unsigned I = 0, E = Names->getNumOperands(); I < E; ++I) {
+ Constant *NC = Names->getOperand(I);
+ Value *V = NC->stripPointerCasts();
assert(isa<GlobalVariable>(V) && "Missing reference to function name");
GlobalVariable *Name = cast<GlobalVariable>(V);
- // If we have region counters for this name, we've already handled it.
- auto It = ProfileDataMap.find(Name);
- if (It != ProfileDataMap.end())
- if (It->second.RegionCounters)
- continue;
-
// Move the name variable to the right section.
Name->setSection(getNameSection());
Name->setAlignment(1);
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 5a7bce5..34aaa7f 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -692,7 +692,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
const DataLayout &DL = F.getParent()->getDataLayout();
unsigned OriginAlignment = std::max(kMinOriginAlignment, Alignment);
unsigned StoreSize = DL.getTypeStoreSize(Shadow->getType());
- if (isa<StructType>(Shadow->getType())) {
+ if (Shadow->getType()->isAggregateType()) {
paintOrigin(IRB, updateOrigin(Origin, IRB),
getOriginPtr(Addr, IRB, Alignment), StoreSize,
OriginAlignment);
diff --git a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index 087ce8a..dcdcfed 100644
--- a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -100,9 +100,9 @@ namespace {
std::unique_ptr<BranchProbabilityInfo> BPI;
bool HasProfileData;
#ifdef NDEBUG
- SmallPtrSet<BasicBlock*, 16> LoopHeaders;
+ SmallPtrSet<const BasicBlock *, 16> LoopHeaders;
#else
- SmallSet<AssertingVH<BasicBlock>, 16> LoopHeaders;
+ SmallSet<AssertingVH<const BasicBlock>, 16> LoopHeaders;
#endif
DenseSet<std::pair<Value*, BasicBlock*> > RecursionSet;
@@ -163,6 +163,7 @@ namespace {
bool SimplifyPartiallyRedundantLoad(LoadInst *LI);
bool TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB);
+ bool TryToUnfoldSelectInCurrBB(BasicBlock *BB);
private:
BasicBlock *SplitBlockPreds(BasicBlock *BB, ArrayRef<BasicBlock *> Preds,
@@ -210,11 +211,12 @@ bool JumpThreading::runOnFunction(Function &F) {
// we will loop forever. We take care of this issue by not jump threading for
// back edges. This works for normal cases but not for unreachable blocks as
// they may have cycle with no back edge.
- removeUnreachableBlocks(F);
+ bool EverChanged = false;
+ EverChanged |= removeUnreachableBlocks(F, LVI);
FindLoopHeaders(F);
- bool Changed, EverChanged = false;
+ bool Changed;
do {
Changed = false;
for (Function::iterator I = F.begin(), E = F.end(); I != E;) {
@@ -363,8 +365,8 @@ void JumpThreading::FindLoopHeaders(Function &F) {
SmallVector<std::pair<const BasicBlock*,const BasicBlock*>, 32> Edges;
FindFunctionBackedges(F, Edges);
- for (unsigned i = 0, e = Edges.size(); i != e; ++i)
- LoopHeaders.insert(const_cast<BasicBlock*>(Edges[i].second));
+ for (const auto &Edge : Edges)
+ LoopHeaders.insert(Edge.second);
}
/// getKnownConstant - Helper method to determine if we can thread over a
@@ -410,8 +412,8 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
// If V is a constant, then it is known in all predecessors.
if (Constant *KC = getKnownConstant(V, Preference)) {
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
- Result.push_back(std::make_pair(KC, *PI));
+ for (BasicBlock *Pred : predecessors(BB))
+ Result.push_back(std::make_pair(KC, Pred));
return true;
}
@@ -434,8 +436,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
// "X < 4" and "X < 3" is known true but "X < 4" itself is not available.
// Perhaps getConstantOnEdge should be smart enough to do this?
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
- BasicBlock *P = *PI;
+ for (BasicBlock *P : predecessors(BB)) {
// If the value is known by LazyValueInfo to be a constant in a
// predecessor, use that information to try to thread this block.
Constant *PredCst = LVI->getConstantOnEdge(V, P, BB, CxtI);
@@ -491,22 +492,17 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
// Scan for the sentinel. If we find an undef, force it to the
// interesting value: x|undef -> true and x&undef -> false.
- for (unsigned i = 0, e = LHSVals.size(); i != e; ++i)
- if (LHSVals[i].first == InterestingVal ||
- isa<UndefValue>(LHSVals[i].first)) {
- Result.push_back(LHSVals[i]);
- Result.back().first = InterestingVal;
- LHSKnownBBs.insert(LHSVals[i].second);
+ for (const auto &LHSVal : LHSVals)
+ if (LHSVal.first == InterestingVal || isa<UndefValue>(LHSVal.first)) {
+ Result.emplace_back(InterestingVal, LHSVal.second);
+ LHSKnownBBs.insert(LHSVal.second);
}
- for (unsigned i = 0, e = RHSVals.size(); i != e; ++i)
- if (RHSVals[i].first == InterestingVal ||
- isa<UndefValue>(RHSVals[i].first)) {
+ for (const auto &RHSVal : RHSVals)
+ if (RHSVal.first == InterestingVal || isa<UndefValue>(RHSVal.first)) {
// If we already inferred a value for this block on the LHS, don't
// re-add it.
- if (!LHSKnownBBs.count(RHSVals[i].second)) {
- Result.push_back(RHSVals[i]);
- Result.back().first = InterestingVal;
- }
+ if (!LHSKnownBBs.count(RHSVal.second))
+ Result.emplace_back(InterestingVal, RHSVal.second);
}
return !Result.empty();
@@ -522,8 +518,8 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
return false;
// Invert the known values.
- for (unsigned i = 0, e = Result.size(); i != e; ++i)
- Result[i].first = ConstantExpr::getNot(Result[i].first);
+ for (auto &R : Result)
+ R.first = ConstantExpr::getNot(R.first);
return true;
}
@@ -538,12 +534,12 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
WantInteger, CxtI);
// Try to use constant folding to simplify the binary operator.
- for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) {
- Constant *V = LHSVals[i].first;
+ for (const auto &LHSVal : LHSVals) {
+ Constant *V = LHSVal.first;
Constant *Folded = ConstantExpr::get(BO->getOpcode(), V, CI);
if (Constant *KC = getKnownConstant(Folded, WantInteger))
- Result.push_back(std::make_pair(KC, LHSVals[i].second));
+ Result.push_back(std::make_pair(KC, LHSVal.second));
}
}
@@ -591,8 +587,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
cast<Instruction>(Cmp->getOperand(0))->getParent() != BB) {
Constant *RHSCst = cast<Constant>(Cmp->getOperand(1));
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB);PI != E; ++PI){
- BasicBlock *P = *PI;
+ for (BasicBlock *P : predecessors(BB)) {
// If the value is known by LazyValueInfo to be a constant in a
// predecessor, use that information to try to thread this block.
LazyValueInfo::Tristate Res =
@@ -615,12 +610,12 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals,
WantInteger, CxtI);
- for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) {
- Constant *V = LHSVals[i].first;
+ for (const auto &LHSVal : LHSVals) {
+ Constant *V = LHSVal.first;
Constant *Folded = ConstantExpr::getCompare(Cmp->getPredicate(),
V, CmpConst);
if (Constant *KC = getKnownConstant(Folded, WantInteger))
- Result.push_back(std::make_pair(KC, LHSVals[i].second));
+ Result.push_back(std::make_pair(KC, LHSVal.second));
}
return !Result.empty();
@@ -637,8 +632,8 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
if ((TrueVal || FalseVal) &&
ComputeValueKnownInPredecessors(SI->getCondition(), BB, Conds,
WantInteger, CxtI)) {
- for (unsigned i = 0, e = Conds.size(); i != e; ++i) {
- Constant *Cond = Conds[i].first;
+ for (auto &C : Conds) {
+ Constant *Cond = C.first;
// Figure out what value to use for the condition.
bool KnownCond;
@@ -655,7 +650,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
// See if the select has a known constant value for this predecessor.
if (Constant *Val = KnownCond ? TrueVal : FalseVal)
- Result.push_back(std::make_pair(Val, Conds[i].second));
+ Result.push_back(std::make_pair(Val, C.second));
}
return !Result.empty();
@@ -665,8 +660,8 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
// If all else fails, see if LVI can figure out a constant value for us.
Constant *CI = LVI->getConstant(V, BB, CxtI);
if (Constant *KC = getKnownConstant(CI, Preference)) {
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
- Result.push_back(std::make_pair(KC, *PI));
+ for (BasicBlock *Pred : predecessors(BB))
+ Result.push_back(std::make_pair(KC, Pred));
}
return !Result.empty();
@@ -736,6 +731,9 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
}
}
+ if (TryToUnfoldSelectInCurrBB(BB))
+ return true;
+
// What kind of constant we're looking for.
ConstantPreference Preference = WantInteger;
@@ -988,10 +986,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
// If we got here, the loaded value is transparent through to the start of the
// block. Check to see if it is available in any of the predecessor blocks.
- for (pred_iterator PI = pred_begin(LoadBB), PE = pred_end(LoadBB);
- PI != PE; ++PI) {
- BasicBlock *PredBB = *PI;
-
+ for (BasicBlock *PredBB : predecessors(LoadBB)) {
// If we already scanned this predecessor, skip it.
if (!PredsScanned.insert(PredBB).second)
continue;
@@ -1038,13 +1033,11 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
SmallVector<BasicBlock*, 8> PredsToSplit;
SmallPtrSet<BasicBlock*, 8> AvailablePredSet;
- for (unsigned i = 0, e = AvailablePreds.size(); i != e; ++i)
- AvailablePredSet.insert(AvailablePreds[i].first);
+ for (const auto &AvailablePred : AvailablePreds)
+ AvailablePredSet.insert(AvailablePred.first);
// Add all the unavailable predecessors to the PredsToSplit list.
- for (pred_iterator PI = pred_begin(LoadBB), PE = pred_end(LoadBB);
- PI != PE; ++PI) {
- BasicBlock *P = *PI;
+ for (BasicBlock *P : predecessors(LoadBB)) {
// If the predecessor is an indirect goto, we can't split the edge.
if (isa<IndirectBrInst>(P->getTerminator()))
return false;
@@ -1129,9 +1122,9 @@ FindMostPopularDest(BasicBlock *BB,
// blocks with known and real destinations to threading undef. We'll handle
// them later if interesting.
DenseMap<BasicBlock*, unsigned> DestPopularity;
- for (unsigned i = 0, e = PredToDestList.size(); i != e; ++i)
- if (PredToDestList[i].second)
- DestPopularity[PredToDestList[i].second]++;
+ for (const auto &PredToDest : PredToDestList)
+ if (PredToDest.second)
+ DestPopularity[PredToDest.second]++;
// Find the most popular dest.
DenseMap<BasicBlock*, unsigned>::iterator DPI = DestPopularity.begin();
@@ -1194,10 +1187,10 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
"ComputeValueKnownInPredecessors returned true with no values");
DEBUG(dbgs() << "IN BB: " << *BB;
- for (unsigned i = 0, e = PredValues.size(); i != e; ++i) {
+ for (const auto &PredValue : PredValues) {
dbgs() << " BB '" << BB->getName() << "': FOUND condition = "
- << *PredValues[i].first
- << " for pred '" << PredValues[i].second->getName() << "'.\n";
+ << *PredValue.first
+ << " for pred '" << PredValue.second->getName() << "'.\n";
});
// Decide what we want to thread through. Convert our list of known values to
@@ -1210,8 +1203,8 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
BasicBlock *OnlyDest = nullptr;
BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL;
- for (unsigned i = 0, e = PredValues.size(); i != e; ++i) {
- BasicBlock *Pred = PredValues[i].second;
+ for (const auto &PredValue : PredValues) {
+ BasicBlock *Pred = PredValue.second;
if (!SeenPreds.insert(Pred).second)
continue; // Duplicate predecessor entry.
@@ -1220,7 +1213,7 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
if (isa<IndirectBrInst>(Pred->getTerminator()))
continue;
- Constant *Val = PredValues[i].first;
+ Constant *Val = PredValue.first;
BasicBlock *DestBB;
if (isa<UndefValue>(Val))
@@ -1260,16 +1253,15 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
// Now that we know what the most popular destination is, factor all
// predecessors that will jump to it into a single predecessor.
SmallVector<BasicBlock*, 16> PredsToFactor;
- for (unsigned i = 0, e = PredToDestList.size(); i != e; ++i)
- if (PredToDestList[i].second == MostPopularDest) {
- BasicBlock *Pred = PredToDestList[i].first;
+ for (const auto &PredToDest : PredToDestList)
+ if (PredToDest.second == MostPopularDest) {
+ BasicBlock *Pred = PredToDest.first;
// This predecessor may be a switch or something else that has multiple
// edges to the block. Factor each of these edges by listing them
// according to # occurrences in PredsToFactor.
- TerminatorInst *PredTI = Pred->getTerminator();
- for (unsigned i = 0, e = PredTI->getNumSuccessors(); i != e; ++i)
- if (PredTI->getSuccessor(i) == BB)
+ for (BasicBlock *Succ : successors(Pred))
+ if (Succ == BB)
PredsToFactor.push_back(Pred);
}
@@ -1366,11 +1358,11 @@ bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) {
// Scan the information to see which is most popular: true or false. The
// predecessors can be of the set true, false, or undef.
unsigned NumTrue = 0, NumFalse = 0;
- for (unsigned i = 0, e = XorOpValues.size(); i != e; ++i) {
- if (isa<UndefValue>(XorOpValues[i].first))
+ for (const auto &XorOpValue : XorOpValues) {
+ if (isa<UndefValue>(XorOpValue.first))
// Ignore undefs for the count.
continue;
- if (cast<ConstantInt>(XorOpValues[i].first)->isZero())
+ if (cast<ConstantInt>(XorOpValue.first)->isZero())
++NumFalse;
else
++NumTrue;
@@ -1386,12 +1378,11 @@ bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) {
// Collect all of the blocks that this can be folded into so that we can
// factor this once and clone it once.
SmallVector<BasicBlock*, 8> BlocksToFoldInto;
- for (unsigned i = 0, e = XorOpValues.size(); i != e; ++i) {
- if (XorOpValues[i].first != SplitVal &&
- !isa<UndefValue>(XorOpValues[i].first))
+ for (const auto &XorOpValue : XorOpValues) {
+ if (XorOpValue.first != SplitVal && !isa<UndefValue>(XorOpValue.first))
continue;
- BlocksToFoldInto.push_back(XorOpValues[i].second);
+ BlocksToFoldInto.push_back(XorOpValue.second);
}
// If we inferred a value for all of the predecessors, then duplication won't
@@ -1543,10 +1534,10 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
// PHI insertion, of which we are prepared to do, clean these up now.
SSAUpdater SSAUpdate;
SmallVector<Use*, 16> UsesToRename;
- for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
+ for (Instruction &I : *BB) {
// Scan all uses of this instruction to see if it is used outside of its
// block, and if so, record them in UsesToRename.
- for (Use &U : I->uses()) {
+ for (Use &U : I.uses()) {
Instruction *User = cast<Instruction>(U.getUser());
if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
if (UserPN->getIncomingBlock(U) == BB)
@@ -1561,14 +1552,14 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
if (UsesToRename.empty())
continue;
- DEBUG(dbgs() << "JT: Renaming non-local uses of: " << *I << "\n");
+ DEBUG(dbgs() << "JT: Renaming non-local uses of: " << I << "\n");
// We found a use of I outside of BB. Rename all uses of I that are outside
// its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
// with the two values we know.
- SSAUpdate.Initialize(I->getType(), I->getName());
- SSAUpdate.AddAvailableValue(BB, &*I);
- SSAUpdate.AddAvailableValue(NewBB, ValueMapping[&*I]);
+ SSAUpdate.Initialize(I.getType(), I.getName());
+ SSAUpdate.AddAvailableValue(BB, &I);
+ SSAUpdate.AddAvailableValue(NewBB, ValueMapping[&I]);
while (!UsesToRename.empty())
SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
@@ -1644,10 +1635,10 @@ void JumpThreading::UpdateBlockFreqAndEdgeWeight(BasicBlock *PredBB,
// Collect updated outgoing edges' frequencies from BB and use them to update
// edge probabilities.
SmallVector<uint64_t, 4> BBSuccFreq;
- for (auto I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
- auto SuccFreq = (*I == SuccBB)
+ for (BasicBlock *Succ : successors(BB)) {
+ auto SuccFreq = (Succ == SuccBB)
? BB2SuccBBFreq - NewBBFreq
- : BBOrigFreq * BPI->getEdgeProbability(BB, *I);
+ : BBOrigFreq * BPI->getEdgeProbability(BB, Succ);
BBSuccFreq.push_back(SuccFreq.getFrequency());
}
@@ -1783,10 +1774,10 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
// PHI insertion, of which we are prepared to do, clean these up now.
SSAUpdater SSAUpdate;
SmallVector<Use*, 16> UsesToRename;
- for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
+ for (Instruction &I : *BB) {
// Scan all uses of this instruction to see if it is used outside of its
// block, and if so, record them in UsesToRename.
- for (Use &U : I->uses()) {
+ for (Use &U : I.uses()) {
Instruction *User = cast<Instruction>(U.getUser());
if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
if (UserPN->getIncomingBlock(U) == BB)
@@ -1801,14 +1792,14 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
if (UsesToRename.empty())
continue;
- DEBUG(dbgs() << "JT: Renaming non-local uses of: " << *I << "\n");
+ DEBUG(dbgs() << "JT: Renaming non-local uses of: " << I << "\n");
// We found a use of I outside of BB. Rename all uses of I that are outside
// its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
// with the two values we know.
- SSAUpdate.Initialize(I->getType(), I->getName());
- SSAUpdate.AddAvailableValue(BB, &*I);
- SSAUpdate.AddAvailableValue(PredBB, ValueMapping[&*I]);
+ SSAUpdate.Initialize(I.getType(), I.getName());
+ SSAUpdate.AddAvailableValue(BB, &I);
+ SSAUpdate.AddAvailableValue(PredBB, ValueMapping[&I]);
while (!UsesToRename.empty())
SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
@@ -1903,3 +1894,62 @@ bool JumpThreading::TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) {
}
return false;
}
+
+/// TryToUnfoldSelectInCurrBB - Look for PHI/Select in the same BB of the form
+/// bb:
+/// %p = phi [false, %bb1], [true, %bb2], [false, %bb3], [true, %bb4], ...
+/// %s = select p, trueval, falseval
+///
+/// And expand the select into a branch structure. This later enables
+/// jump-threading over bb in this pass.
+///
+/// Using the similar approach of SimplifyCFG::FoldCondBranchOnPHI(), unfold
+/// select if the associated PHI has at least one constant. If the unfolded
+/// select is not jump-threaded, it will be folded again in the later
+/// optimizations.
+bool JumpThreading::TryToUnfoldSelectInCurrBB(BasicBlock *BB) {
+ // If threading this would thread across a loop header, don't thread the edge.
+ // See the comments above FindLoopHeaders for justifications and caveats.
+ if (LoopHeaders.count(BB))
+ return false;
+
+ // Look for a Phi/Select pair in the same basic block. The Phi feeds the
+ // condition of the Select and at least one of the incoming values is a
+ // constant.
+ for (BasicBlock::iterator BI = BB->begin();
+ PHINode *PN = dyn_cast<PHINode>(BI); ++BI) {
+ unsigned NumPHIValues = PN->getNumIncomingValues();
+ if (NumPHIValues == 0 || !PN->hasOneUse())
+ continue;
+
+ SelectInst *SI = dyn_cast<SelectInst>(PN->user_back());
+ if (!SI || SI->getParent() != BB)
+ continue;
+
+ Value *Cond = SI->getCondition();
+ if (!Cond || Cond != PN || !Cond->getType()->isIntegerTy(1))
+ continue;
+
+ bool HasConst = false;
+ for (unsigned i = 0; i != NumPHIValues; ++i) {
+ if (PN->getIncomingBlock(i) == BB)
+ return false;
+ if (isa<ConstantInt>(PN->getIncomingValue(i)))
+ HasConst = true;
+ }
+
+ if (HasConst) {
+ // Expand the select.
+ TerminatorInst *Term =
+ SplitBlockAndInsertIfThen(SI->getCondition(), SI, false);
+ PHINode *NewPN = PHINode::Create(SI->getType(), 2, "", SI);
+ NewPN->addIncoming(SI->getTrueValue(), Term->getParent());
+ NewPN->addIncoming(SI->getFalseValue(), BB);
+ SI->replaceAllUsesWith(NewPN);
+ SI->eraseFromParent();
+ return true;
+ }
+ }
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
index e01e23f..8923ff7 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -203,9 +203,7 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
CurAST = new AliasSetTracker(*AA);
// Collect Alias info from subloops.
- for (Loop::iterator LoopItr = L->begin(), LoopItrE = L->end();
- LoopItr != LoopItrE; ++LoopItr) {
- Loop *InnerL = *LoopItr;
+ for (Loop *InnerL : L->getSubLoops()) {
AliasSetTracker *InnerAST = LoopToAliasSetMap[InnerL];
assert(InnerAST && "Where is my AST?");
@@ -227,9 +225,7 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
// Because subloops have already been incorporated into AST, we skip blocks in
// subloops.
//
- for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
- I != E; ++I) {
- BasicBlock *BB = *I;
+ for (BasicBlock *BB : L->blocks()) {
if (LI->getLoopFor(BB) == L) // Ignore blocks in subloops.
CurAST->add(*BB); // Incorporate the specified basic block
}
@@ -263,9 +259,8 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
PredIteratorCache PIC;
// Loop over all of the alias sets in the tracker object.
- for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end();
- I != E; ++I)
- Changed |= promoteLoopAccessesToScalars(*I, ExitBlocks, InsertPts,
+ for (AliasSet &AS : *CurAST)
+ Changed |= promoteLoopAccessesToScalars(AS, ExitBlocks, InsertPts,
PIC, LI, DT, CurLoop,
CurAST, &SafetyInfo);
@@ -324,9 +319,9 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
// We are processing blocks in reverse dfo, so process children first.
const std::vector<DomTreeNode*> &Children = N->getChildren();
- for (unsigned i = 0, e = Children.size(); i != e; ++i)
- Changed |=
- sinkRegion(Children[i], AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo);
+ for (DomTreeNode *Child : Children)
+ Changed |= sinkRegion(Child, AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo);
+
// Only need to process the contents of this block if it is not part of a
// subloop (which would already have been processed).
if (inSubLoop(BB,CurLoop,LI)) return Changed;
@@ -407,9 +402,8 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
}
const std::vector<DomTreeNode*> &Children = N->getChildren();
- for (unsigned i = 0, e = Children.size(); i != e; ++i)
- Changed |=
- hoistRegion(Children[i], AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo);
+ for (DomTreeNode *Child : Children)
+ Changed |= hoistRegion(Child, AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo);
return Changed;
}
@@ -499,9 +493,7 @@ bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, DominatorTree *DT,
// If this call only reads from memory and there are no writes to memory
// in the loop, we can hoist or sink the call as appropriate.
bool FoundMod = false;
- for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end();
- I != E; ++I) {
- AliasSet &AS = *I;
+ for (AliasSet &AS : *CurAST) {
if (!AS.isForwardingAliasSet() && AS.isMod()) {
FoundMod = true;
break;
@@ -783,8 +775,8 @@ static bool isGuaranteedToExecute(const Instruction &Inst,
CurLoop->getExitBlocks(ExitBlocks);
// Verify that the block dominates each of the exit blocks of the loop.
- for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
- if (!DT->dominates(Inst.getParent(), ExitBlocks[i]))
+ for (BasicBlock *ExitBlock : ExitBlocks)
+ if (!DT->dominates(Inst.getParent(), ExitBlock))
return false;
// As a degenerate case, if the loop is statically infinite then we haven't
@@ -951,17 +943,17 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS,
// If there is an non-load/store instruction in the loop, we can't promote
// it.
- if (const LoadInst *load = dyn_cast<LoadInst>(UI)) {
- assert(!load->isVolatile() && "AST broken");
- if (!load->isSimple())
+ if (const LoadInst *Load = dyn_cast<LoadInst>(UI)) {
+ assert(!Load->isVolatile() && "AST broken");
+ if (!Load->isSimple())
return Changed;
- } else if (const StoreInst *store = dyn_cast<StoreInst>(UI)) {
+ } else if (const StoreInst *Store = dyn_cast<StoreInst>(UI)) {
// Stores *of* the pointer are not interesting, only stores *to* the
// pointer.
if (UI->getOperand(1) != ASIV)
continue;
- assert(!store->isVolatile() && "AST broken");
- if (!store->isSimple())
+ assert(!Store->isVolatile() && "AST broken");
+ if (!Store->isSimple())
return Changed;
// Don't sink stores from loops without dedicated block exits. Exits
// containing indirect branches are not transformed by loop simplify,
@@ -979,7 +971,7 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS,
// restrictive (and performant) alignment and if we are sure this
// instruction will be executed, update the alignment.
// Larger is better, with the exception of 0 being the best alignment.
- unsigned InstAlignment = store->getAlignment();
+ unsigned InstAlignment = Store->getAlignment();
if ((InstAlignment > Alignment || InstAlignment == 0) && Alignment != 0)
if (isGuaranteedToExecute(*UI, DT, CurLoop, SafetyInfo)) {
GuaranteedToExecute = true;
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
index bc00ff3..7b1940b 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -245,7 +245,7 @@ bool LoopDeletion::runOnLoop(Loop *L, LPPassManager &) {
loopInfo.removeBlock(BB);
// The last step is to update LoopInfo now that we've eliminated this loop.
- loopInfo.updateUnloop(L);
+ loopInfo.markAsRemoved(L);
Changed = true;
++NumDeleted;
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 56ae5c0..ecef6db 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -39,16 +39,16 @@ using namespace llvm;
#define DEBUG_TYPE "loop-unroll"
static cl::opt<unsigned>
- UnrollThreshold("unroll-threshold", cl::init(150), cl::Hidden,
+ UnrollThreshold("unroll-threshold", cl::Hidden,
cl::desc("The baseline cost threshold for loop unrolling"));
static cl::opt<unsigned> UnrollPercentDynamicCostSavedThreshold(
- "unroll-percent-dynamic-cost-saved-threshold", cl::init(20), cl::Hidden,
+ "unroll-percent-dynamic-cost-saved-threshold", cl::Hidden,
cl::desc("The percentage of estimated dynamic cost which must be saved by "
"unrolling to allow unrolling up to the max threshold."));
static cl::opt<unsigned> UnrollDynamicCostSavingsDiscount(
- "unroll-dynamic-cost-savings-discount", cl::init(2000), cl::Hidden,
+ "unroll-dynamic-cost-savings-discount", cl::Hidden,
cl::desc("This is the amount discounted from the total unroll cost when "
"the unrolled form has a high dynamic cost savings (triggered by "
"the '-unroll-perecent-dynamic-cost-saved-threshold' flag)."));
@@ -59,17 +59,17 @@ static cl::opt<unsigned> UnrollMaxIterationsCountToAnalyze(
"iterations when checking full unroll profitability"));
static cl::opt<unsigned>
-UnrollCount("unroll-count", cl::init(0), cl::Hidden,
+UnrollCount("unroll-count", cl::Hidden,
cl::desc("Use this unroll count for all loops including those with "
"unroll_count pragma values, for testing purposes"));
static cl::opt<bool>
-UnrollAllowPartial("unroll-allow-partial", cl::init(false), cl::Hidden,
+UnrollAllowPartial("unroll-allow-partial", cl::Hidden,
cl::desc("Allows loops to be partially unrolled until "
"-unroll-threshold loop size is reached."));
static cl::opt<bool>
-UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::init(false), cl::Hidden,
+UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::Hidden,
cl::desc("Unroll loops with run-time trip counts"));
static cl::opt<unsigned>
@@ -77,182 +77,95 @@ PragmaUnrollThreshold("pragma-unroll-threshold", cl::init(16 * 1024), cl::Hidden
cl::desc("Unrolled size limit for loops with an unroll(full) or "
"unroll_count pragma."));
-namespace {
- class LoopUnroll : public LoopPass {
- public:
- static char ID; // Pass ID, replacement for typeid
- LoopUnroll(int T = -1, int C = -1, int P = -1, int R = -1) : LoopPass(ID) {
- CurrentThreshold = (T == -1) ? UnrollThreshold : unsigned(T);
- CurrentPercentDynamicCostSavedThreshold =
- UnrollPercentDynamicCostSavedThreshold;
- CurrentDynamicCostSavingsDiscount = UnrollDynamicCostSavingsDiscount;
- CurrentCount = (C == -1) ? UnrollCount : unsigned(C);
- CurrentAllowPartial = (P == -1) ? UnrollAllowPartial : (bool)P;
- CurrentRuntime = (R == -1) ? UnrollRuntime : (bool)R;
-
- UserThreshold = (T != -1) || (UnrollThreshold.getNumOccurrences() > 0);
- UserPercentDynamicCostSavedThreshold =
- (UnrollPercentDynamicCostSavedThreshold.getNumOccurrences() > 0);
- UserDynamicCostSavingsDiscount =
- (UnrollDynamicCostSavingsDiscount.getNumOccurrences() > 0);
- UserAllowPartial = (P != -1) ||
- (UnrollAllowPartial.getNumOccurrences() > 0);
- UserRuntime = (R != -1) || (UnrollRuntime.getNumOccurrences() > 0);
- UserCount = (C != -1) || (UnrollCount.getNumOccurrences() > 0);
-
- initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
- }
- /// A magic value for use with the Threshold parameter to indicate
- /// that the loop unroll should be performed regardless of how much
- /// code expansion would result.
- static const unsigned NoThreshold = UINT_MAX;
-
- // Threshold to use when optsize is specified (and there is no
- // explicit -unroll-threshold).
- static const unsigned OptSizeUnrollThreshold = 50;
-
- // Default unroll count for loops with run-time trip count if
- // -unroll-count is not set
- static const unsigned UnrollRuntimeCount = 8;
-
- unsigned CurrentCount;
- unsigned CurrentThreshold;
- unsigned CurrentPercentDynamicCostSavedThreshold;
- unsigned CurrentDynamicCostSavingsDiscount;
- bool CurrentAllowPartial;
- bool CurrentRuntime;
-
- // Flags for whether the 'current' settings are user-specified.
- bool UserCount;
- bool UserThreshold;
- bool UserPercentDynamicCostSavedThreshold;
- bool UserDynamicCostSavingsDiscount;
- bool UserAllowPartial;
- bool UserRuntime;
-
- bool runOnLoop(Loop *L, LPPassManager &) override;
-
- /// This transformation requires natural loop information & requires that
- /// loop preheaders be inserted into the CFG...
- ///
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addPreserved<LoopInfoWrapperPass>();
- AU.addRequiredID(LoopSimplifyID);
- AU.addPreservedID(LoopSimplifyID);
- AU.addRequiredID(LCSSAID);
- AU.addPreservedID(LCSSAID);
- AU.addRequired<ScalarEvolutionWrapperPass>();
- AU.addPreserved<ScalarEvolutionWrapperPass>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
- // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info.
- // If loop unroll does not preserve dom info then LCSSA pass on next
- // loop will receive invalid dom info.
- // For now, recreate dom info, if loop is unrolled.
- AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
- }
+/// A magic value for use with the Threshold parameter to indicate
+/// that the loop unroll should be performed regardless of how much
+/// code expansion would result.
+static const unsigned NoThreshold = UINT_MAX;
- // Fill in the UnrollingPreferences parameter with values from the
- // TargetTransformationInfo.
- void getUnrollingPreferences(Loop *L, const TargetTransformInfo &TTI,
- TargetTransformInfo::UnrollingPreferences &UP) {
- UP.Threshold = CurrentThreshold;
- UP.PercentDynamicCostSavedThreshold =
- CurrentPercentDynamicCostSavedThreshold;
- UP.DynamicCostSavingsDiscount = CurrentDynamicCostSavingsDiscount;
- UP.OptSizeThreshold = OptSizeUnrollThreshold;
- UP.PartialThreshold = CurrentThreshold;
- UP.PartialOptSizeThreshold = OptSizeUnrollThreshold;
- UP.Count = CurrentCount;
- UP.MaxCount = UINT_MAX;
- UP.Partial = CurrentAllowPartial;
- UP.Runtime = CurrentRuntime;
- UP.AllowExpensiveTripCount = false;
- TTI.getUnrollingPreferences(L, UP);
- }
+/// Default unroll count for loops with run-time trip count if
+/// -unroll-count is not set
+static const unsigned DefaultUnrollRuntimeCount = 8;
- // Select and return an unroll count based on parameters from
- // user, unroll preferences, unroll pragmas, or a heuristic.
- // SetExplicitly is set to true if the unroll count is is set by
- // the user or a pragma rather than selected heuristically.
- unsigned
- selectUnrollCount(const Loop *L, unsigned TripCount, bool PragmaFullUnroll,
- unsigned PragmaCount,
- const TargetTransformInfo::UnrollingPreferences &UP,
- bool &SetExplicitly);
-
- // Select threshold values used to limit unrolling based on a
- // total unrolled size. Parameters Threshold and PartialThreshold
- // are set to the maximum unrolled size for fully and partially
- // unrolled loops respectively.
- void selectThresholds(const Loop *L, bool UsePragmaThreshold,
- const TargetTransformInfo::UnrollingPreferences &UP,
- unsigned &Threshold, unsigned &PartialThreshold,
- unsigned &PercentDynamicCostSavedThreshold,
- unsigned &DynamicCostSavingsDiscount) {
- // Determine the current unrolling threshold. While this is
- // normally set from UnrollThreshold, it is overridden to a
- // smaller value if the current function is marked as
- // optimize-for-size, and the unroll threshold was not user
- // specified.
- Threshold = UserThreshold ? CurrentThreshold : UP.Threshold;
- PartialThreshold = UserThreshold ? CurrentThreshold : UP.PartialThreshold;
- PercentDynamicCostSavedThreshold =
- UserPercentDynamicCostSavedThreshold
- ? CurrentPercentDynamicCostSavedThreshold
- : UP.PercentDynamicCostSavedThreshold;
- DynamicCostSavingsDiscount = UserDynamicCostSavingsDiscount
- ? CurrentDynamicCostSavingsDiscount
- : UP.DynamicCostSavingsDiscount;
-
- if (!UserThreshold &&
- // FIXME: Use Function::optForSize().
- L->getHeader()->getParent()->hasFnAttribute(
- Attribute::OptimizeForSize)) {
- Threshold = UP.OptSizeThreshold;
- PartialThreshold = UP.PartialOptSizeThreshold;
- }
- if (UsePragmaThreshold) {
- // If the loop has an unrolling pragma, we want to be more
- // aggressive with unrolling limits. Set thresholds to at
- // least the PragmaTheshold value which is larger than the
- // default limits.
- if (Threshold != NoThreshold)
- Threshold = std::max<unsigned>(Threshold, PragmaUnrollThreshold);
- if (PartialThreshold != NoThreshold)
- PartialThreshold =
- std::max<unsigned>(PartialThreshold, PragmaUnrollThreshold);
- }
- }
- bool canUnrollCompletely(Loop *L, unsigned Threshold,
- unsigned PercentDynamicCostSavedThreshold,
- unsigned DynamicCostSavingsDiscount,
- uint64_t UnrolledCost, uint64_t RolledDynamicCost);
- };
-}
+/// Gather the various unrolling parameters based on the defaults, compiler
+/// flags, TTI overrides, pragmas, and user specified parameters.
+static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
+ Loop *L, const TargetTransformInfo &TTI, Optional<unsigned> UserThreshold,
+ Optional<unsigned> UserCount, Optional<bool> UserAllowPartial,
+ Optional<bool> UserRuntime, unsigned PragmaCount, bool PragmaFullUnroll,
+ bool PragmaEnableUnroll, unsigned TripCount) {
+ TargetTransformInfo::UnrollingPreferences UP;
-char LoopUnroll::ID = 0;
-INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
-INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
-INITIALIZE_PASS_DEPENDENCY(LCSSA)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
-INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
+ // Set up the defaults
+ UP.Threshold = 150;
+ UP.PercentDynamicCostSavedThreshold = 20;
+ UP.DynamicCostSavingsDiscount = 2000;
+ UP.OptSizeThreshold = 50;
+ UP.PartialThreshold = UP.Threshold;
+ UP.PartialOptSizeThreshold = UP.OptSizeThreshold;
+ UP.Count = 0;
+ UP.MaxCount = UINT_MAX;
+ UP.Partial = false;
+ UP.Runtime = false;
+ UP.AllowExpensiveTripCount = false;
+
+ // Override with any target specific settings
+ TTI.getUnrollingPreferences(L, UP);
+
+ // Apply size attributes
+ if (L->getHeader()->getParent()->optForSize()) {
+ UP.Threshold = UP.OptSizeThreshold;
+ UP.PartialThreshold = UP.PartialOptSizeThreshold;
+ }
-Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial,
- int Runtime) {
- return new LoopUnroll(Threshold, Count, AllowPartial, Runtime);
-}
+ // Apply unroll count pragmas
+ if (PragmaCount)
+ UP.Count = PragmaCount;
+ else if (PragmaFullUnroll)
+ UP.Count = TripCount;
-Pass *llvm::createSimpleLoopUnrollPass() {
- return llvm::createLoopUnrollPass(-1, -1, 0, 0);
+ // Apply any user values specified by cl::opt
+ if (UnrollThreshold.getNumOccurrences() > 0) {
+ UP.Threshold = UnrollThreshold;
+ UP.PartialThreshold = UnrollThreshold;
+ }
+ if (UnrollPercentDynamicCostSavedThreshold.getNumOccurrences() > 0)
+ UP.PercentDynamicCostSavedThreshold =
+ UnrollPercentDynamicCostSavedThreshold;
+ if (UnrollDynamicCostSavingsDiscount.getNumOccurrences() > 0)
+ UP.DynamicCostSavingsDiscount = UnrollDynamicCostSavingsDiscount;
+ if (UnrollCount.getNumOccurrences() > 0)
+ UP.Count = UnrollCount;
+ if (UnrollAllowPartial.getNumOccurrences() > 0)
+ UP.Partial = UnrollAllowPartial;
+ if (UnrollRuntime.getNumOccurrences() > 0)
+ UP.Runtime = UnrollRuntime;
+
+ // Apply user values provided by argument
+ if (UserThreshold.hasValue()) {
+ UP.Threshold = *UserThreshold;
+ UP.PartialThreshold = *UserThreshold;
+ }
+ if (UserCount.hasValue())
+ UP.Count = *UserCount;
+ if (UserAllowPartial.hasValue())
+ UP.Partial = *UserAllowPartial;
+ if (UserRuntime.hasValue())
+ UP.Runtime = *UserRuntime;
+
+ if (PragmaCount > 0 ||
+ ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount != 0)) {
+ // If the loop has an unrolling pragma, we want to be more aggressive with
+ // unrolling limits. Set thresholds to at least the PragmaTheshold value
+ // which is larger than the default limits.
+ if (UP.Threshold != NoThreshold)
+ UP.Threshold = std::max<unsigned>(UP.Threshold, PragmaUnrollThreshold);
+ if (UP.PartialThreshold != NoThreshold)
+ UP.PartialThreshold =
+ std::max<unsigned>(UP.PartialThreshold, PragmaUnrollThreshold);
+ }
+
+ return UP;
}
namespace {
@@ -793,12 +706,11 @@ static void SetLoopAlreadyUnrolled(Loop *L) {
L->setLoopID(NewLoopID);
}
-bool LoopUnroll::canUnrollCompletely(Loop *L, unsigned Threshold,
- unsigned PercentDynamicCostSavedThreshold,
- unsigned DynamicCostSavingsDiscount,
- uint64_t UnrolledCost,
- uint64_t RolledDynamicCost) {
-
+static bool canUnrollCompletely(Loop *L, unsigned Threshold,
+ unsigned PercentDynamicCostSavedThreshold,
+ unsigned DynamicCostSavingsDiscount,
+ uint64_t UnrolledCost,
+ uint64_t RolledDynamicCost) {
if (Threshold == NoThreshold) {
DEBUG(dbgs() << " Can fully unroll, because no threshold is set.\n");
return true;
@@ -846,60 +758,13 @@ bool LoopUnroll::canUnrollCompletely(Loop *L, unsigned Threshold,
return false;
}
-unsigned LoopUnroll::selectUnrollCount(
- const Loop *L, unsigned TripCount, bool PragmaFullUnroll,
- unsigned PragmaCount, const TargetTransformInfo::UnrollingPreferences &UP,
- bool &SetExplicitly) {
- SetExplicitly = true;
-
- // User-specified count (either as a command-line option or
- // constructor parameter) has highest precedence.
- unsigned Count = UserCount ? CurrentCount : 0;
-
- // If there is no user-specified count, unroll pragmas have the next
- // highest precedence.
- if (Count == 0) {
- if (PragmaCount) {
- Count = PragmaCount;
- } else if (PragmaFullUnroll) {
- Count = TripCount;
- }
- }
-
- if (Count == 0)
- Count = UP.Count;
-
- if (Count == 0) {
- SetExplicitly = false;
- if (TripCount == 0)
- // Runtime trip count.
- Count = UnrollRuntimeCount;
- else
- // Conservative heuristic: if we know the trip count, see if we can
- // completely unroll (subject to the threshold, checked below); otherwise
- // try to find greatest modulo of the trip count which is still under
- // threshold value.
- Count = TripCount;
- }
- if (TripCount && Count > TripCount)
- return TripCount;
- return Count;
-}
-
-bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) {
- if (skipOptnoneFunction(L))
- return false;
-
- Function &F = *L->getHeader()->getParent();
-
- auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
- const TargetTransformInfo &TTI =
- getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
-
+static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
+ ScalarEvolution *SE, const TargetTransformInfo &TTI,
+ AssumptionCache &AC, bool PreserveLCSSA,
+ Optional<unsigned> ProvidedCount,
+ Optional<unsigned> ProvidedThreshold,
+ Optional<bool> ProvidedAllowPartial,
+ Optional<bool> ProvidedRuntime) {
BasicBlock *Header = L->getHeader();
DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName()
<< "] Loop %" << Header->getName() << "\n");
@@ -912,9 +777,6 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) {
unsigned PragmaCount = UnrollCountPragmaValue(L);
bool HasPragma = PragmaFullUnroll || PragmaEnableUnroll || PragmaCount > 0;
- TargetTransformInfo::UnrollingPreferences UP;
- getUnrollingPreferences(L, TTI, UP);
-
// Find trip count and trip multiple if count is not available
unsigned TripCount = 0;
unsigned TripMultiple = 1;
@@ -929,11 +791,18 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) {
TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock);
}
- // Select an initial unroll count. This may be reduced later based
- // on size thresholds.
- bool CountSetExplicitly;
- unsigned Count = selectUnrollCount(L, TripCount, PragmaFullUnroll,
- PragmaCount, UP, CountSetExplicitly);
+ TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
+ L, TTI, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial,
+ ProvidedRuntime, PragmaCount, PragmaFullUnroll, PragmaEnableUnroll,
+ TripCount);
+
+ unsigned Count = UP.Count;
+ bool CountSetExplicitly = Count != 0;
+ // Use a heuristic count if we didn't set anything explicitly.
+ if (!CountSetExplicitly)
+ Count = TripCount == 0 ? DefaultUnrollRuntimeCount : TripCount;
+ if (TripCount && Count > TripCount)
+ Count = TripCount;
unsigned NumInlineCandidates;
bool notDuplicatable;
@@ -955,21 +824,6 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) {
return false;
}
- unsigned Threshold, PartialThreshold;
- unsigned PercentDynamicCostSavedThreshold;
- unsigned DynamicCostSavingsDiscount;
- // Only use the high pragma threshold when we have a target unroll factor such
- // as with "#pragma unroll N" or a pragma indicating full unrolling and the
- // trip count is known. Otherwise we rely on the standard threshold to
- // heuristically select a reasonable unroll count.
- bool UsePragmaThreshold =
- PragmaCount > 0 ||
- ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount != 0);
-
- selectThresholds(L, UsePragmaThreshold, UP, Threshold, PartialThreshold,
- PercentDynamicCostSavedThreshold,
- DynamicCostSavingsDiscount);
-
// Given Count, TripCount and thresholds determine the type of
// unrolling which is to be performed.
enum { Full = 0, Partial = 1, Runtime = 2 };
@@ -977,19 +831,20 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) {
if (TripCount && Count == TripCount) {
Unrolling = Partial;
// If the loop is really small, we don't need to run an expensive analysis.
- if (canUnrollCompletely(L, Threshold, 100, DynamicCostSavingsDiscount,
+ if (canUnrollCompletely(L, UP.Threshold, 100, UP.DynamicCostSavingsDiscount,
UnrolledSize, UnrolledSize)) {
Unrolling = Full;
} else {
// The loop isn't that small, but we still can fully unroll it if that
// helps to remove a significant number of instructions.
// To check that, run additional analysis on the loop.
- if (Optional<EstimatedUnrollCost> Cost =
- analyzeLoopUnrollCost(L, TripCount, DT, *SE, TTI,
- Threshold + DynamicCostSavingsDiscount))
- if (canUnrollCompletely(L, Threshold, PercentDynamicCostSavedThreshold,
- DynamicCostSavingsDiscount, Cost->UnrolledCost,
- Cost->RolledDynamicCost)) {
+ if (Optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost(
+ L, TripCount, DT, *SE, TTI,
+ UP.Threshold + UP.DynamicCostSavingsDiscount))
+ if (canUnrollCompletely(L, UP.Threshold,
+ UP.PercentDynamicCostSavedThreshold,
+ UP.DynamicCostSavingsDiscount,
+ Cost->UnrolledCost, Cost->RolledDynamicCost)) {
Unrolling = Full;
}
}
@@ -1001,23 +856,22 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) {
// Reduce count based on the type of unrolling and the threshold values.
unsigned OriginalCount = Count;
- bool AllowRuntime = PragmaEnableUnroll || (PragmaCount > 0) ||
- (UserRuntime ? CurrentRuntime : UP.Runtime);
+ bool AllowRuntime = PragmaEnableUnroll || (PragmaCount > 0) || UP.Runtime;
// Don't unroll a runtime trip count loop with unroll full pragma.
if (HasRuntimeUnrollDisablePragma(L) || PragmaFullUnroll) {
AllowRuntime = false;
}
if (Unrolling == Partial) {
- bool AllowPartial = PragmaEnableUnroll ||
- (UserAllowPartial ? CurrentAllowPartial : UP.Partial);
+ bool AllowPartial = PragmaEnableUnroll || UP.Partial;
if (!AllowPartial && !CountSetExplicitly) {
DEBUG(dbgs() << " will not try to unroll partially because "
<< "-unroll-allow-partial not given\n");
return false;
}
- if (PartialThreshold != NoThreshold && UnrolledSize > PartialThreshold) {
+ if (UP.PartialThreshold != NoThreshold &&
+ UnrolledSize > UP.PartialThreshold) {
// Reduce unroll count to be modulo of TripCount for partial unrolling.
- Count = (std::max(PartialThreshold, 3u)-2) / (LoopSize-2);
+ Count = (std::max(UP.PartialThreshold, 3u) - 2) / (LoopSize - 2);
while (Count != 0 && TripCount % Count != 0)
Count--;
}
@@ -1029,7 +883,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) {
}
// Reduce unroll count to be the largest power-of-two factor of
// the original count which satisfies the threshold limit.
- while (Count != 0 && UnrolledSize > PartialThreshold) {
+ while (Count != 0 && UnrolledSize > UP.PartialThreshold) {
Count >>= 1;
UnrolledSize = (LoopSize-2) * Count + 2;
}
@@ -1086,3 +940,91 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) {
return true;
}
+
+namespace {
+class LoopUnroll : public LoopPass {
+public:
+ static char ID; // Pass ID, replacement for typeid
+ LoopUnroll(Optional<unsigned> Threshold = None,
+ Optional<unsigned> Count = None,
+ Optional<bool> AllowPartial = None, Optional<bool> Runtime = None)
+ : LoopPass(ID), ProvidedCount(Count), ProvidedThreshold(Threshold),
+ ProvidedAllowPartial(AllowPartial), ProvidedRuntime(Runtime) {
+ initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
+ }
+
+ Optional<unsigned> ProvidedCount;
+ Optional<unsigned> ProvidedThreshold;
+ Optional<bool> ProvidedAllowPartial;
+ Optional<bool> ProvidedRuntime;
+
+ bool runOnLoop(Loop *L, LPPassManager &) override {
+ if (skipOptnoneFunction(L))
+ return false;
+
+ Function &F = *L->getHeader()->getParent();
+
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ const TargetTransformInfo &TTI =
+ getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
+
+ return tryToUnrollLoop(L, DT, LI, SE, TTI, AC, PreserveLCSSA, ProvidedCount,
+ ProvidedThreshold, ProvidedAllowPartial,
+ ProvidedRuntime);
+ }
+
+ /// This transformation requires natural loop information & requires that
+ /// loop preheaders be inserted into the CFG...
+ ///
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addRequiredID(LCSSAID);
+ AU.addPreservedID(LCSSAID);
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info.
+ // If loop unroll does not preserve dom info then LCSSA pass on next
+ // loop will receive invalid dom info.
+ // For now, recreate dom info, if loop is unrolled.
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ }
+};
+}
+
+char LoopUnroll::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
+
+Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial,
+ int Runtime) {
+ // TODO: It would make more sense for this function to take the optionals
+ // directly, but that's dangerous since it would silently break out of tree
+ // callers.
+ return new LoopUnroll(Threshold == -1 ? None : Optional<unsigned>(Threshold),
+ Count == -1 ? None : Optional<unsigned>(Count),
+ AllowPartial == -1 ? None
+ : Optional<bool>(AllowPartial),
+ Runtime == -1 ? None : Optional<bool>(Runtime));
+}
+
+Pass *llvm::createSimpleLoopUnrollPass() {
+ return llvm::createLoopUnrollPass(-1, -1, 0, 0);
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 7354016..6b43b0f 100644
--- a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -529,12 +529,13 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
// We found an instruction that may write to the loaded memory.
// We can try to promote at this position instead of the store
- // position if nothing alias the store memory after this.
+ // position if nothing alias the store memory after this and the store
+ // destination is not in the range.
P = &*I;
for (; I != E; ++I) {
MemoryLocation StoreLoc = MemoryLocation::get(SI);
- if (AA.getModRefInfo(&*I, StoreLoc) != MRI_NoModRef) {
- DEBUG(dbgs() << "Alias " << *I << "\n");
+ if (&*I == SI->getOperand(1) ||
+ AA.getModRefInfo(&*I, StoreLoc) != MRI_NoModRef) {
P = nullptr;
break;
}
@@ -628,13 +629,39 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
// Ensure that the value being stored is something that can be memset'able a
// byte at a time like "0" or "-1" or any width, as well as things like
// 0xA0A0A0A0 and 0.0.
- if (Value *ByteVal = isBytewiseValue(SI->getOperand(0)))
+ auto *V = SI->getOperand(0);
+ if (Value *ByteVal = isBytewiseValue(V)) {
if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(),
ByteVal)) {
BBI = I->getIterator(); // Don't invalidate iterator.
return true;
}
+ // If we have an aggregate, we try to promote it to memset regardless
+ // of opportunity for merging as it can expose optimization opportunities
+ // in subsequent passes.
+ auto *T = V->getType();
+ if (T->isAggregateType()) {
+ uint64_t Size = DL.getTypeStoreSize(T);
+ unsigned Align = SI->getAlignment();
+ if (!Align)
+ Align = DL.getABITypeAlignment(T);
+ IRBuilder<> Builder(SI);
+ auto *M = Builder.CreateMemSet(SI->getPointerOperand(), ByteVal,
+ Size, Align, SI->isVolatile());
+
+ DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n");
+
+ MD->removeInstruction(SI);
+ SI->eraseFromParent();
+ NumMemSetInfer++;
+
+ // Make sure we do not invalidate the iterator.
+ BBI = M->getIterator();
+ return true;
+ }
+ }
+
return false;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp b/contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
index 28c610c..b56b355 100644
--- a/contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
@@ -499,7 +499,7 @@ static bool isGCSafepointPoll(Function &F) {
static bool shouldRewriteFunction(Function &F) {
// TODO: This should check the GCStrategy
if (F.hasGC()) {
- const char *FunctionGCName = F.getGC();
+ const auto &FunctionGCName = F.getGC();
const StringRef StatepointExampleName("statepoint-example");
const StringRef CoreCLRName("coreclr");
return (StatepointExampleName == FunctionGCName) ||
diff --git a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
index 401a740..bcadd4e 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -2155,7 +2155,8 @@ void Reassociate::OptimizeInst(Instruction *I) {
// During the initial run we will get to the root of the tree.
// But if we get here while we are redoing instructions, there is no
// guarantee that the root will be visited. So Redo later
- if (BO->user_back() != BO)
+ if (BO->user_back() != BO &&
+ BO->getParent() == BO->user_back()->getParent())
RedoInsts.insert(BO->user_back());
return;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index 5d253be..d77d574 100644
--- a/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -78,6 +78,13 @@ static cl::opt<bool>
AllowStatepointWithNoDeoptInfo("rs4gc-allow-statepoint-with-no-deopt-info",
cl::Hidden, cl::init(true));
+/// Should we split vectors of pointers into their individual elements? This
+/// is known to be buggy, but the alternate implementation isn't yet ready.
+/// This is purely to provide a debugging and dianostic hook until the vector
+/// split is replaced with vector relocations.
+static cl::opt<bool> UseVectorSplit("rs4gc-split-vector-values", cl::Hidden,
+ cl::init(true));
+
namespace {
struct RewriteStatepointsForGC : public ModulePass {
static char ID; // Pass identification, replacement for typeid
@@ -357,10 +364,6 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I);
/// particular element in 'I'.
static BaseDefiningValueResult
findBaseDefiningValueOfVector(Value *I) {
- assert(I->getType()->isVectorTy() &&
- cast<VectorType>(I->getType())->getElementType()->isPointerTy() &&
- "Illegal to ask for the base pointer of a non-pointer type");
-
// Each case parallels findBaseDefiningValue below, see that code for
// detailed motivation.
@@ -368,26 +371,10 @@ findBaseDefiningValueOfVector(Value *I) {
// An incoming argument to the function is a base pointer
return BaseDefiningValueResult(I, true);
- // We shouldn't see the address of a global as a vector value?
- assert(!isa<GlobalVariable>(I) &&
- "unexpected global variable found in base of vector");
-
- // inlining could possibly introduce phi node that contains
- // undef if callee has multiple returns
- if (isa<UndefValue>(I))
- // utterly meaningless, but useful for dealing with partially optimized
- // code.
+ if (isa<Constant>(I))
+ // Constant vectors consist only of constant pointers.
return BaseDefiningValueResult(I, true);
- // Due to inheritance, this must be _after_ the global variable and undef
- // checks
- if (Constant *Con = dyn_cast<Constant>(I)) {
- assert(!isa<GlobalVariable>(I) && !isa<UndefValue>(I) &&
- "order of checks wrong!");
- assert(Con->isNullValue() && "null is the only case which makes sense");
- return BaseDefiningValueResult(Con, true);
- }
-
if (isa<LoadInst>(I))
return BaseDefiningValueResult(I, true);
@@ -417,11 +404,11 @@ findBaseDefiningValueOfVector(Value *I) {
/// (i.e. a PHI or Select of two derived pointers), or c) involves a change
/// from pointer to vector type or back.
static BaseDefiningValueResult findBaseDefiningValue(Value *I) {
+ assert(I->getType()->isPtrOrPtrVectorTy() &&
+ "Illegal to ask for the base pointer of a non-pointer type");
+
if (I->getType()->isVectorTy())
return findBaseDefiningValueOfVector(I);
-
- assert(I->getType()->isPointerTy() &&
- "Illegal to ask for the base pointer of a non-pointer type");
if (isa<Argument>(I))
// An incoming argument to the function is a base pointer
@@ -1310,18 +1297,29 @@ static void CreateGCRelocates(ArrayRef<Value *> LiveVariables,
assert(Index < LiveVec.size() && "Bug in std::find?");
return Index;
};
-
- // All gc_relocate are set to i8 addrspace(1)* type. We originally generated
- // unique declarations for each pointer type, but this proved problematic
- // because the intrinsic mangling code is incomplete and fragile. Since
- // we're moving towards a single unified pointer type anyways, we can just
- // cast everything to an i8* of the right address space. A bitcast is added
- // later to convert gc_relocate to the actual value's type.
Module *M = StatepointToken->getModule();
- auto AS = cast<PointerType>(LiveVariables[0]->getType())->getAddressSpace();
- Type *Types[] = {Type::getInt8PtrTy(M->getContext(), AS)};
- Value *GCRelocateDecl =
- Intrinsic::getDeclaration(M, Intrinsic::experimental_gc_relocate, Types);
+
+ // All gc_relocate are generated as i8 addrspace(1)* (or a vector type whose
+ // element type is i8 addrspace(1)*). We originally generated unique
+ // declarations for each pointer type, but this proved problematic because
+ // the intrinsic mangling code is incomplete and fragile. Since we're moving
+ // towards a single unified pointer type anyways, we can just cast everything
+ // to an i8* of the right address space. A bitcast is added later to convert
+ // gc_relocate to the actual value's type.
+ auto getGCRelocateDecl = [&] (Type *Ty) {
+ assert(isHandledGCPointerType(Ty));
+ auto AS = Ty->getScalarType()->getPointerAddressSpace();
+ Type *NewTy = Type::getInt8PtrTy(M->getContext(), AS);
+ if (auto *VT = dyn_cast<VectorType>(Ty))
+ NewTy = VectorType::get(NewTy, VT->getNumElements());
+ return Intrinsic::getDeclaration(M, Intrinsic::experimental_gc_relocate,
+ {NewTy});
+ };
+
+ // Lazily populated map from input types to the canonicalized form mentioned
+ // in the comment above. This should probably be cached somewhere more
+ // broadly.
+ DenseMap<Type*, Value*> TypeToDeclMap;
for (unsigned i = 0; i < LiveVariables.size(); i++) {
// Generate the gc.relocate call and save the result
@@ -1329,6 +1327,11 @@ static void CreateGCRelocates(ArrayRef<Value *> LiveVariables,
Builder.getInt32(LiveStart + FindIndex(LiveVariables, BasePtrs[i]));
Value *LiveIdx = Builder.getInt32(LiveStart + i);
+ Type *Ty = LiveVariables[i]->getType();
+ if (!TypeToDeclMap.count(Ty))
+ TypeToDeclMap[Ty] = getGCRelocateDecl(Ty);
+ Value *GCRelocateDecl = TypeToDeclMap[Ty];
+
// only specify a debug name if we can give a useful one
CallInst *Reloc = Builder.CreateCall(
GCRelocateDecl, {StatepointToken, BaseIdx, LiveIdx},
@@ -2380,15 +2383,19 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
// Do a limited scalarization of any live at safepoint vector values which
// contain pointers. This enables this pass to run after vectorization at
- // the cost of some possible performance loss. TODO: it would be nice to
- // natively support vectors all the way through the backend so we don't need
- // to scalarize here.
- for (size_t i = 0; i < Records.size(); i++) {
- PartiallyConstructedSafepointRecord &Info = Records[i];
- Instruction *Statepoint = ToUpdate[i].getInstruction();
- splitVectorValues(cast<Instruction>(Statepoint), Info.LiveSet,
- Info.PointerToBase, DT);
- }
+ // the cost of some possible performance loss. Note: This is known to not
+ // handle updating of the side tables correctly which can lead to relocation
+ // bugs when the same vector is live at multiple statepoints. We're in the
+ // process of implementing the alternate lowering - relocating the
+ // vector-of-pointers as first class item and updating the backend to
+ // understand that - but that's not yet complete.
+ if (UseVectorSplit)
+ for (size_t i = 0; i < Records.size(); i++) {
+ PartiallyConstructedSafepointRecord &Info = Records[i];
+ Instruction *Statepoint = ToUpdate[i].getInstruction();
+ splitVectorValues(cast<Instruction>(Statepoint), Info.LiveSet,
+ Info.PointerToBase, DT);
+ }
// In order to reduce live set of statepoint we might choose to rematerialize
// some values instead of relocating them. This is purely an optimization and
@@ -2467,7 +2474,8 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
#ifndef NDEBUG
// sanity check
for (auto *Ptr : Live)
- assert(isGCPointerType(Ptr->getType()) && "must be a gc pointer type");
+ assert(isHandledGCPointerType(Ptr->getType()) &&
+ "must be a gc pointer type");
#endif
relocationViaAlloca(F, DT, Live, Records);
@@ -2547,7 +2555,7 @@ void RewriteStatepointsForGC::stripNonValidAttributesFromBody(Function &F) {
static bool shouldRewriteStatepointsIn(Function &F) {
// TODO: This should check the GCStrategy
if (F.hasGC()) {
- const char *FunctionGCName = F.getGC();
+ const auto &FunctionGCName = F.getGC();
const StringRef StatepointExampleName("statepoint-example");
const StringRef CoreCLRName("coreclr");
return (StatepointExampleName == FunctionGCName) ||
diff --git a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
index 2fca803..8569e08 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
@@ -757,9 +757,14 @@ void SCCPSolver::visitCastInst(CastInst &I) {
LatticeVal OpSt = getValueState(I.getOperand(0));
if (OpSt.isOverdefined()) // Inherit overdefinedness of operand
markOverdefined(&I);
- else if (OpSt.isConstant()) // Propagate constant value
- markConstant(&I, ConstantExpr::getCast(I.getOpcode(),
- OpSt.getConstant(), I.getType()));
+ else if (OpSt.isConstant()) {
+ Constant *C =
+ ConstantExpr::getCast(I.getOpcode(), OpSt.getConstant(), I.getType());
+ if (isa<UndefValue>(C))
+ return;
+ // Propagate constant value
+ markConstant(&I, C);
+ }
}
@@ -859,10 +864,14 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) {
LatticeVal &IV = ValueState[&I];
if (IV.isOverdefined()) return;
- if (V1State.isConstant() && V2State.isConstant())
- return markConstant(IV, &I,
- ConstantExpr::get(I.getOpcode(), V1State.getConstant(),
- V2State.getConstant()));
+ if (V1State.isConstant() && V2State.isConstant()) {
+ Constant *C = ConstantExpr::get(I.getOpcode(), V1State.getConstant(),
+ V2State.getConstant());
+ // X op Y -> undef.
+ if (isa<UndefValue>(C))
+ return;
+ return markConstant(IV, &I, C);
+ }
// If something is undef, wait for it to resolve.
if (!V1State.isOverdefined() && !V2State.isOverdefined())
@@ -917,10 +926,13 @@ void SCCPSolver::visitCmpInst(CmpInst &I) {
LatticeVal &IV = ValueState[&I];
if (IV.isOverdefined()) return;
- if (V1State.isConstant() && V2State.isConstant())
- return markConstant(IV, &I, ConstantExpr::getCompare(I.getPredicate(),
- V1State.getConstant(),
- V2State.getConstant()));
+ if (V1State.isConstant() && V2State.isConstant()) {
+ Constant *C = ConstantExpr::getCompare(
+ I.getPredicate(), V1State.getConstant(), V2State.getConstant());
+ if (isa<UndefValue>(C))
+ return;
+ return markConstant(IV, &I, C);
+ }
// If operands are still undefined, wait for it to resolve.
if (!V1State.isOverdefined() && !V2State.isOverdefined())
@@ -1020,8 +1032,11 @@ void SCCPSolver::visitGetElementPtrInst(GetElementPtrInst &I) {
Constant *Ptr = Operands[0];
auto Indices = makeArrayRef(Operands.begin() + 1, Operands.end());
- markConstant(&I, ConstantExpr::getGetElementPtr(I.getSourceElementType(), Ptr,
- Indices));
+ Constant *C =
+ ConstantExpr::getGetElementPtr(I.getSourceElementType(), Ptr, Indices);
+ if (isa<UndefValue>(C))
+ return;
+ markConstant(&I, C);
}
void SCCPSolver::visitStoreInst(StoreInst &SI) {
@@ -1061,9 +1076,9 @@ void SCCPSolver::visitLoadInst(LoadInst &I) {
Constant *Ptr = PtrVal.getConstant();
- // load null -> null
+ // load null is undefined.
if (isa<ConstantPointerNull>(Ptr) && I.getPointerAddressSpace() == 0)
- return markConstant(IV, &I, UndefValue::get(I.getType()));
+ return;
// Transform load (constant global) into the value loaded.
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {
@@ -1079,8 +1094,11 @@ void SCCPSolver::visitLoadInst(LoadInst &I) {
}
// Transform load from a constant into a constant if possible.
- if (Constant *C = ConstantFoldLoadFromConstPtr(Ptr, DL))
+ if (Constant *C = ConstantFoldLoadFromConstPtr(Ptr, DL)) {
+ if (isa<UndefValue>(C))
+ return;
return markConstant(IV, &I, C);
+ }
// Otherwise we cannot say for certain what value this load will produce.
// Bail out.
@@ -1122,8 +1140,12 @@ CallOverdefined:
// If we can constant fold this, mark the result of the call as a
// constant.
- if (Constant *C = ConstantFoldCall(F, Operands, TLI))
+ if (Constant *C = ConstantFoldCall(F, Operands, TLI)) {
+ // call -> undef.
+ if (isa<UndefValue>(C))
+ return;
return markConstant(I, C);
+ }
}
// Otherwise, we don't know anything about this call, mark it overdefined.
@@ -1379,6 +1401,11 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
// X % undef -> undef. No change.
if (Op1LV.isUndefined()) break;
+ // X / 0 -> undef. No change.
+ // X % 0 -> undef. No change.
+ if (Op1LV.isConstant() && Op1LV.getConstant()->isZeroValue())
+ break;
+
// undef / X -> 0. X could be maxint.
// undef % X -> 0. X could be 1.
markForcedConstant(&I, Constant::getNullValue(ITy));
diff --git a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 0e0b00d..4e84d72 100644
--- a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -425,9 +425,7 @@ bool TailCallElim::runTRE(Function &F) {
// with themselves. Check to see if we did and clean up our mess if so. This
// occurs when a function passes an argument straight through to its tail
// call.
- for (unsigned i = 0, e = ArgumentPHIs.size(); i != e; ++i) {
- PHINode *PN = ArgumentPHIs[i];
-
+ for (PHINode *PN : ArgumentPHIs) {
// If the PHI Node is a dynamic constant, replace it with the value it is.
if (Value *PNV = SimplifyInstruction(PN, F.getParent()->getDataLayout())) {
PN->replaceAllUsesWith(PNV);
@@ -468,10 +466,7 @@ bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) {
// return value of the call, it must only use things that are defined before
// the call, or movable instructions between the call and the instruction
// itself.
- for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
- if (I->getOperand(i) == CI)
- return false;
- return true;
+ return std::find(I->op_begin(), I->op_end(), CI) == I->op_end();
}
/// Return true if the specified value is the same when the return would exit
diff --git a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index a5137e9..72db980 100644
--- a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -626,11 +626,17 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
Clone2->setName(Twine("lpad") + Suffix2);
NewBB2->getInstList().insert(NewBB2->getFirstInsertionPt(), Clone2);
- // Create a PHI node for the two cloned landingpad instructions.
- PHINode *PN = PHINode::Create(LPad->getType(), 2, "lpad.phi", LPad);
- PN->addIncoming(Clone1, NewBB1);
- PN->addIncoming(Clone2, NewBB2);
- LPad->replaceAllUsesWith(PN);
+ // Create a PHI node for the two cloned landingpad instructions only
+ // if the original landingpad instruction has some uses.
+ if (!LPad->use_empty()) {
+ assert(!LPad->getType()->isTokenTy() &&
+ "Split cannot be applied if LPad is token type. Otherwise an "
+ "invalid PHINode of token type would be created.");
+ PHINode *PN = PHINode::Create(LPad->getType(), 2, "lpad.phi", LPad);
+ PN->addIncoming(Clone1, NewBB1);
+ PN->addIncoming(Clone2, NewBB2);
+ LPad->replaceAllUsesWith(PN);
+ }
LPad->eraseFromParent();
} else {
// There is no second clone. Just replace the landing pad with the first
diff --git a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 854a3b8..6454afb 100644
--- a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -266,27 +266,14 @@ namespace {
bool ModuleLevelChanges;
const char *NameSuffix;
ClonedCodeInfo *CodeInfo;
- CloningDirector *Director;
- ValueMapTypeRemapper *TypeMapper;
- ValueMaterializer *Materializer;
public:
PruningFunctionCloner(Function *newFunc, const Function *oldFunc,
ValueToValueMapTy &valueMap, bool moduleLevelChanges,
- const char *nameSuffix, ClonedCodeInfo *codeInfo,
- CloningDirector *Director)
+ const char *nameSuffix, ClonedCodeInfo *codeInfo)
: NewFunc(newFunc), OldFunc(oldFunc), VMap(valueMap),
ModuleLevelChanges(moduleLevelChanges), NameSuffix(nameSuffix),
- CodeInfo(codeInfo), Director(Director) {
- // These are optional components. The Director may return null.
- if (Director) {
- TypeMapper = Director->getTypeRemapper();
- Materializer = Director->getValueMaterializer();
- } else {
- TypeMapper = nullptr;
- Materializer = nullptr;
- }
- }
+ CodeInfo(codeInfo) {}
/// The specified block is found to be reachable, clone it and
/// anything that it can reach.
@@ -332,23 +319,6 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
// loop doesn't include the terminator.
for (BasicBlock::const_iterator II = StartingInst, IE = --BB->end();
II != IE; ++II) {
- // If the "Director" remaps the instruction, don't clone it.
- if (Director) {
- CloningDirector::CloningAction Action =
- Director->handleInstruction(VMap, &*II, NewBB);
- // If the cloning director says stop, we want to stop everything, not
- // just break out of the loop (which would cause the terminator to be
- // cloned). The cloning director is responsible for inserting a proper
- // terminator into the new basic block in this case.
- if (Action == CloningDirector::StopCloningBB)
- return;
- // If the cloning director says skip, continue to the next instruction.
- // In this case, the cloning director is responsible for mapping the
- // skipped instruction to some value that is defined in the new
- // basic block.
- if (Action == CloningDirector::SkipInstruction)
- continue;
- }
Instruction *NewInst = II->clone();
@@ -356,8 +326,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
// nodes for which we defer processing until we update the CFG.
if (!isa<PHINode>(NewInst)) {
RemapInstruction(NewInst, VMap,
- ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
- TypeMapper, Materializer);
+ ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
// If we can simplify this instruction to some other value, simply add
// a mapping to that value rather than inserting a new instruction into
@@ -397,26 +366,6 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
// Finally, clone over the terminator.
const TerminatorInst *OldTI = BB->getTerminator();
bool TerminatorDone = false;
- if (Director) {
- CloningDirector::CloningAction Action
- = Director->handleInstruction(VMap, OldTI, NewBB);
- // If the cloning director says stop, we want to stop everything, not
- // just break out of the loop (which would cause the terminator to be
- // cloned). The cloning director is responsible for inserting a proper
- // terminator into the new basic block in this case.
- if (Action == CloningDirector::StopCloningBB)
- return;
- if (Action == CloningDirector::CloneSuccessors) {
- // If the director says to skip with a terminate instruction, we still
- // need to clone this block's successors.
- const TerminatorInst *TI = NewBB->getTerminator();
- for (const BasicBlock *Succ : TI->successors())
- ToClone.push_back(Succ);
- return;
- }
- assert(Action != CloningDirector::SkipInstruction &&
- "SkipInstruction is not valid for terminators.");
- }
if (const BranchInst *BI = dyn_cast<BranchInst>(OldTI)) {
if (BI->isConditional()) {
// If the condition was a known constant in the callee...
@@ -485,19 +434,13 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
ValueToValueMapTy &VMap,
bool ModuleLevelChanges,
SmallVectorImpl<ReturnInst *> &Returns,
- const char *NameSuffix,
- ClonedCodeInfo *CodeInfo,
- CloningDirector *Director) {
+ const char *NameSuffix,
+ ClonedCodeInfo *CodeInfo) {
assert(NameSuffix && "NameSuffix cannot be null!");
ValueMapTypeRemapper *TypeMapper = nullptr;
ValueMaterializer *Materializer = nullptr;
- if (Director) {
- TypeMapper = Director->getTypeRemapper();
- Materializer = Director->getValueMaterializer();
- }
-
#ifndef NDEBUG
// If the cloning starts at the beginning of the function, verify that
// the function arguments are mapped.
@@ -507,7 +450,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
#endif
PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges,
- NameSuffix, CodeInfo, Director);
+ NameSuffix, CodeInfo);
const BasicBlock *StartingBB;
if (StartingInst)
StartingBB = StartingInst->getParent();
@@ -731,8 +674,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
ClonedCodeInfo *CodeInfo,
Instruction *TheCall) {
CloneAndPruneIntoFromInst(NewFunc, OldFunc, &OldFunc->front().front(), VMap,
- ModuleLevelChanges, Returns, NameSuffix, CodeInfo,
- nullptr);
+ ModuleLevelChanges, Returns, NameSuffix, CodeInfo);
}
/// \brief Remaps instructions in \p Blocks using the mapping in \p VMap.
diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp
index 0e386ac..d2793e5 100644
--- a/contrib/llvm/lib/Transforms/Utils/Local.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp
@@ -23,6 +23,7 @@
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/LazyValueInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
@@ -1051,9 +1052,31 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
ExtendedArg = dyn_cast<Argument>(ZExt->getOperand(0));
if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
ExtendedArg = dyn_cast<Argument>(SExt->getOperand(0));
- if (ExtendedArg)
- Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar, DIExpr,
+ if (ExtendedArg) {
+ // We're now only describing a subset of the variable. The piece we're
+ // describing will always be smaller than the variable size, because
+ // VariableSize == Size of Alloca described by DDI. Since SI stores
+ // to the alloca described by DDI, if it's first operand is an extend,
+ // we're guaranteed that before extension, the value was narrower than
+ // the size of the alloca, hence the size of the described variable.
+ SmallVector<uint64_t, 3> NewDIExpr;
+ unsigned PieceOffset = 0;
+ // If this already is a bit piece, we drop the bit piece from the expression
+ // and record the offset.
+ if (DIExpr->isBitPiece()) {
+ NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end()-3);
+ PieceOffset = DIExpr->getBitPieceOffset();
+ } else {
+ NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end());
+ }
+ NewDIExpr.push_back(dwarf::DW_OP_bit_piece);
+ NewDIExpr.push_back(PieceOffset); //Offset
+ const DataLayout &DL = DDI->getModule()->getDataLayout();
+ NewDIExpr.push_back(DL.getTypeSizeInBits(ExtendedArg->getType())); // Size
+ Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar,
+ Builder.createExpression(NewDIExpr),
DDI->getDebugLoc(), SI);
+ }
else
Builder.insertDbgValueIntrinsic(SI->getOperand(0), 0, DIVar, DIExpr,
DDI->getDebugLoc(), SI);
@@ -1407,7 +1430,7 @@ void llvm::removeUnwindEdge(BasicBlock *BB) {
/// removeUnreachableBlocksFromFn - Remove blocks that are not reachable, even
/// if they are in a dead cycle. Return true if a change was made, false
/// otherwise.
-bool llvm::removeUnreachableBlocks(Function &F) {
+bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI) {
SmallPtrSet<BasicBlock*, 128> Reachable;
bool Changed = markAliveBlocks(F, Reachable);
@@ -1428,6 +1451,8 @@ bool llvm::removeUnreachableBlocks(Function &F) {
++SI)
if (Reachable.count(*SI))
(*SI)->removePredecessor(&*BB);
+ if (LVI)
+ LVI->eraseBlock(&*BB);
BB->dropAllReferences();
}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index 2499b88..eea9237 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -528,7 +528,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
Loop *OuterL = L->getParentLoop();
// Update LoopInfo if the loop is completely removed.
if (CompletelyUnroll)
- LI->updateUnloop(L);;
+ LI->markAsRemoved(L);
// If we have a pass and a DominatorTree we should re-simplify impacted loops
// to ensure subsequent analyses can rely on this form. We want to simplify
@@ -542,7 +542,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
// LCSSA must be performed on the outermost affected loop. The unrolled
// loop's last loop latch is guaranteed to be in the outermost loop after
- // LoopInfo's been updated by updateUnloop.
+ // LoopInfo's been updated by markAsRemoved.
Loop *LatchLoop = LI->getLoopFor(Latches.back());
if (!OuterL->contains(LatchLoop))
while (OuterL->getParentLoop() != LatchLoop)
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp
index e038805..fa958e9 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -599,7 +599,7 @@ Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder,
IRBuilder<>::FastMathFlagGuard FMFG(Builder);
FastMathFlags FMF;
FMF.setUnsafeAlgebra();
- Builder.SetFastMathFlags(FMF);
+ Builder.setFastMathFlags(FMF);
Value *Cmp;
if (RK == MRK_FloatMin || RK == MRK_FloatMax)
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 3bb3fa5..3125a2c 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -141,6 +141,8 @@ class SimplifyCFGOpt {
bool SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder);
bool SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
+ bool SimplifySingleResume(ResumeInst *RI);
+ bool SimplifyCommonResume(ResumeInst *RI);
bool SimplifyCleanupReturn(CleanupReturnInst *RI);
bool SimplifyUnreachable(UnreachableInst *UI);
bool SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
@@ -3239,14 +3241,101 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
}
bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
- // If this is a trivial landing pad that just continues unwinding the caught
- // exception then zap the landing pad, turning its invokes into calls.
+ if (isa<PHINode>(RI->getValue()))
+ return SimplifyCommonResume(RI);
+ else if (isa<LandingPadInst>(RI->getParent()->getFirstNonPHI()) &&
+ RI->getValue() == RI->getParent()->getFirstNonPHI())
+ // The resume must unwind the exception that caused control to branch here.
+ return SimplifySingleResume(RI);
+
+ return false;
+}
+
+// Simplify resume that is shared by several landing pads (phi of landing pad).
+bool SimplifyCFGOpt::SimplifyCommonResume(ResumeInst *RI) {
+ BasicBlock *BB = RI->getParent();
+
+ // Check that there are no other instructions except for debug intrinsics
+ // between the phi of landing pads (RI->getValue()) and resume instruction.
+ BasicBlock::iterator I = cast<Instruction>(RI->getValue())->getIterator(),
+ E = RI->getIterator();
+ while (++I != E)
+ if (!isa<DbgInfoIntrinsic>(I))
+ return false;
+
+ SmallSet<BasicBlock *, 4> TrivialUnwindBlocks;
+ auto *PhiLPInst = cast<PHINode>(RI->getValue());
+
+ // Check incoming blocks to see if any of them are trivial.
+ for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues();
+ Idx != End; Idx++) {
+ auto *IncomingBB = PhiLPInst->getIncomingBlock(Idx);
+ auto *IncomingValue = PhiLPInst->getIncomingValue(Idx);
+
+ // If the block has other successors, we can not delete it because
+ // it has other dependents.
+ if (IncomingBB->getUniqueSuccessor() != BB)
+ continue;
+
+ auto *LandingPad =
+ dyn_cast<LandingPadInst>(IncomingBB->getFirstNonPHI());
+ // Not the landing pad that caused the control to branch here.
+ if (IncomingValue != LandingPad)
+ continue;
+
+ bool isTrivial = true;
+
+ I = IncomingBB->getFirstNonPHI()->getIterator();
+ E = IncomingBB->getTerminator()->getIterator();
+ while (++I != E)
+ if (!isa<DbgInfoIntrinsic>(I)) {
+ isTrivial = false;
+ break;
+ }
+
+ if (isTrivial)
+ TrivialUnwindBlocks.insert(IncomingBB);
+ }
+
+ // If no trivial unwind blocks, don't do any simplifications.
+ if (TrivialUnwindBlocks.empty()) return false;
+
+ // Turn all invokes that unwind here into calls.
+ for (auto *TrivialBB : TrivialUnwindBlocks) {
+ // Blocks that will be simplified should be removed from the phi node.
+ // Note there could be multiple edges to the resume block, and we need
+ // to remove them all.
+ while (PhiLPInst->getBasicBlockIndex(TrivialBB) != -1)
+ BB->removePredecessor(TrivialBB, true);
+
+ for (pred_iterator PI = pred_begin(TrivialBB), PE = pred_end(TrivialBB);
+ PI != PE;) {
+ BasicBlock *Pred = *PI++;
+ removeUnwindEdge(Pred);
+ }
+
+ // In each SimplifyCFG run, only the current processed block can be erased.
+ // Otherwise, it will break the iteration of SimplifyCFG pass. So instead
+ // of erasing TrivialBB, we only remove the branch to the common resume
+ // block so that we can later erase the resume block since it has no
+ // predecessors.
+ TrivialBB->getTerminator()->eraseFromParent();
+ new UnreachableInst(RI->getContext(), TrivialBB);
+ }
+
+ // Delete the resume block if all its predecessors have been removed.
+ if (pred_empty(BB))
+ BB->eraseFromParent();
+
+ return !TrivialUnwindBlocks.empty();
+}
+
+// Simplify resume that is only used by a single (non-phi) landing pad.
+bool SimplifyCFGOpt::SimplifySingleResume(ResumeInst *RI) {
BasicBlock *BB = RI->getParent();
LandingPadInst *LPInst = dyn_cast<LandingPadInst>(BB->getFirstNonPHI());
- if (RI->getValue() != LPInst)
- // Not a landing pad, or the resume is not unwinding the exception that
- // caused control to branch here.
- return false;
+ assert (RI->getValue() == LPInst &&
+ "Resume must unwind the exception that caused control to here");
// Check that there are no other instructions except for debug intrinsics.
BasicBlock::iterator I = LPInst->getIterator(), E = RI->getIterator();
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index dc5fee5..dc07440 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -997,7 +997,7 @@ Value *LibCallSimplifier::optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B,
// Propagate fast-math flags from the existing call to the new call.
IRBuilder<>::FastMathFlagGuard Guard(B);
- B.SetFastMathFlags(CI->getFastMathFlags());
+ B.setFastMathFlags(CI->getFastMathFlags());
// floor((double)floatval) -> (double)floorf(floatval)
if (Callee->isIntrinsic()) {
@@ -1035,7 +1035,7 @@ Value *LibCallSimplifier::optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B) {
// Propagate fast-math flags from the existing call to the new call.
IRBuilder<>::FastMathFlagGuard Guard(B);
- B.SetFastMathFlags(CI->getFastMathFlags());
+ B.setFastMathFlags(CI->getFastMathFlags());
// fmin((double)floatval1, (double)floatval2)
// -> (double)fminf(floatval1, floatval2)
@@ -1127,29 +1127,26 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
Callee->getAttributes());
}
+ // FIXME: Use instruction-level FMF.
bool UnsafeFPMath = canUseUnsafeFPMath(CI->getParent()->getParent());
- // pow(exp(x), y) -> exp(x*y)
+ // pow(exp(x), y) -> exp(x * y)
// pow(exp2(x), y) -> exp2(x * y)
- // We enable these only under fast-math. Besides rounding
- // differences the transformation changes overflow and
- // underflow behavior quite dramatically.
+ // We enable these only with fast-math. Besides rounding differences, the
+ // transformation changes overflow and underflow behavior quite dramatically.
// Example: x = 1000, y = 0.001.
// pow(exp(x), y) = pow(inf, 0.001) = inf, whereas exp(x*y) = exp(1).
- if (UnsafeFPMath) {
- if (auto *OpC = dyn_cast<CallInst>(Op1)) {
+ auto *OpC = dyn_cast<CallInst>(Op1);
+ if (OpC && OpC->hasUnsafeAlgebra() && CI->hasUnsafeAlgebra()) {
+ LibFunc::Func Func;
+ Function *OpCCallee = OpC->getCalledFunction();
+ if (OpCCallee && TLI->getLibFunc(OpCCallee->getName(), Func) &&
+ TLI->has(Func) && (Func == LibFunc::exp || Func == LibFunc::exp2)) {
IRBuilder<>::FastMathFlagGuard Guard(B);
- FastMathFlags FMF;
- FMF.setUnsafeAlgebra();
- B.SetFastMathFlags(FMF);
-
- LibFunc::Func Func;
- Function *OpCCallee = OpC->getCalledFunction();
- if (OpCCallee && TLI->getLibFunc(OpCCallee->getName(), Func) &&
- TLI->has(Func) && (Func == LibFunc::exp || Func == LibFunc::exp2))
- return EmitUnaryFloatFnCall(
- B.CreateFMul(OpC->getArgOperand(0), Op2, "mul"),
- OpCCallee->getName(), B, OpCCallee->getAttributes());
+ B.setFastMathFlags(CI->getFastMathFlags());
+ Value *FMul = B.CreateFMul(OpC->getArgOperand(0), Op2, "mul");
+ return EmitUnaryFloatFnCall(FMul, OpCCallee->getName(), B,
+ OpCCallee->getAttributes());
}
}
@@ -1167,9 +1164,12 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
LibFunc::fabsl)) {
// In -ffast-math, pow(x, 0.5) -> sqrt(x).
- if (UnsafeFPMath)
+ if (CI->hasUnsafeAlgebra()) {
+ IRBuilder<>::FastMathFlagGuard Guard(B);
+ B.setFastMathFlags(CI->getFastMathFlags());
return EmitUnaryFloatFnCall(Op1, TLI->getName(LibFunc::sqrt), B,
Callee->getAttributes());
+ }
// Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))).
// This is faster than calling pow, and still handles negative zero
@@ -1328,7 +1328,7 @@ Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) {
FMF.setNoSignedZeros();
FMF.setNoNaNs();
}
- B.SetFastMathFlags(FMF);
+ B.setFastMathFlags(FMF);
// We have a relaxed floating-point environment. We can ignore NaN-handling
// and transform to a compare and select. We do not have to consider errno or
@@ -1354,11 +1354,13 @@ Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) {
!FT->getParamType(0)->isFloatingPointTy())
return Ret;
- if (!canUseUnsafeFPMath(CI->getParent()->getParent()))
+ if (!CI->hasUnsafeAlgebra())
return Ret;
Value *Op1 = CI->getArgOperand(0);
auto *OpC = dyn_cast<CallInst>(Op1);
- if (!OpC)
+
+ // The earlier call must also be unsafe in order to do these transforms.
+ if (!OpC || !OpC->hasUnsafeAlgebra())
return Ret;
// log(pow(x,y)) -> y*log(x)
@@ -1369,7 +1371,7 @@ Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) {
IRBuilder<>::FastMathFlagGuard Guard(B);
FastMathFlags FMF;
FMF.setUnsafeAlgebra();
- B.SetFastMathFlags(FMF);
+ B.setFastMathFlags(FMF);
LibFunc::Func Func;
Function *F = OpC->getCalledFunction();
@@ -1397,66 +1399,67 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) {
if (TLI->has(LibFunc::sqrtf) && (Callee->getName() == "sqrt" ||
Callee->getIntrinsicID() == Intrinsic::sqrt))
Ret = optimizeUnaryDoubleFP(CI, B, true);
- if (!canUseUnsafeFPMath(CI->getParent()->getParent()))
+
+ if (!CI->hasUnsafeAlgebra())
return Ret;
- Value *Op = CI->getArgOperand(0);
- if (Instruction *I = dyn_cast<Instruction>(Op)) {
- if (I->getOpcode() == Instruction::FMul && I->hasUnsafeAlgebra()) {
- // We're looking for a repeated factor in a multiplication tree,
- // so we can do this fold: sqrt(x * x) -> fabs(x);
- // or this fold: sqrt(x * x * y) -> fabs(x) * sqrt(y).
- Value *Op0 = I->getOperand(0);
- Value *Op1 = I->getOperand(1);
- Value *RepeatOp = nullptr;
- Value *OtherOp = nullptr;
- if (Op0 == Op1) {
- // Simple match: the operands of the multiply are identical.
- RepeatOp = Op0;
- } else {
- // Look for a more complicated pattern: one of the operands is itself
- // a multiply, so search for a common factor in that multiply.
- // Note: We don't bother looking any deeper than this first level or for
- // variations of this pattern because instcombine's visitFMUL and/or the
- // reassociation pass should give us this form.
- Value *OtherMul0, *OtherMul1;
- if (match(Op0, m_FMul(m_Value(OtherMul0), m_Value(OtherMul1)))) {
- // Pattern: sqrt((x * y) * z)
- if (OtherMul0 == OtherMul1) {
- // Matched: sqrt((x * x) * z)
- RepeatOp = OtherMul0;
- OtherOp = Op1;
- }
- }
- }
- if (RepeatOp) {
- // Fast math flags for any created instructions should match the sqrt
- // and multiply.
- // FIXME: We're not checking the sqrt because it doesn't have
- // fast-math-flags (see earlier comment).
- IRBuilder<>::FastMathFlagGuard Guard(B);
- B.SetFastMathFlags(I->getFastMathFlags());
- // If we found a repeated factor, hoist it out of the square root and
- // replace it with the fabs of that factor.
- Module *M = Callee->getParent();
- Type *ArgType = Op->getType();
- Value *Fabs = Intrinsic::getDeclaration(M, Intrinsic::fabs, ArgType);
- Value *FabsCall = B.CreateCall(Fabs, RepeatOp, "fabs");
- if (OtherOp) {
- // If we found a non-repeated factor, we still need to get its square
- // root. We then multiply that by the value that was simplified out
- // of the square root calculation.
- Value *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, ArgType);
- Value *SqrtCall = B.CreateCall(Sqrt, OtherOp, "sqrt");
- return B.CreateFMul(FabsCall, SqrtCall);
- }
- return FabsCall;
+ Instruction *I = dyn_cast<Instruction>(CI->getArgOperand(0));
+ if (!I || I->getOpcode() != Instruction::FMul || !I->hasUnsafeAlgebra())
+ return Ret;
+
+ // We're looking for a repeated factor in a multiplication tree,
+ // so we can do this fold: sqrt(x * x) -> fabs(x);
+ // or this fold: sqrt((x * x) * y) -> fabs(x) * sqrt(y).
+ Value *Op0 = I->getOperand(0);
+ Value *Op1 = I->getOperand(1);
+ Value *RepeatOp = nullptr;
+ Value *OtherOp = nullptr;
+ if (Op0 == Op1) {
+ // Simple match: the operands of the multiply are identical.
+ RepeatOp = Op0;
+ } else {
+ // Look for a more complicated pattern: one of the operands is itself
+ // a multiply, so search for a common factor in that multiply.
+ // Note: We don't bother looking any deeper than this first level or for
+ // variations of this pattern because instcombine's visitFMUL and/or the
+ // reassociation pass should give us this form.
+ Value *OtherMul0, *OtherMul1;
+ if (match(Op0, m_FMul(m_Value(OtherMul0), m_Value(OtherMul1)))) {
+ // Pattern: sqrt((x * y) * z)
+ if (OtherMul0 == OtherMul1 &&
+ cast<Instruction>(Op0)->hasUnsafeAlgebra()) {
+ // Matched: sqrt((x * x) * z)
+ RepeatOp = OtherMul0;
+ OtherOp = Op1;
}
}
}
- return Ret;
-}
+ if (!RepeatOp)
+ return Ret;
+ // Fast math flags for any created instructions should match the sqrt
+ // and multiply.
+ IRBuilder<>::FastMathFlagGuard Guard(B);
+ B.setFastMathFlags(I->getFastMathFlags());
+
+ // If we found a repeated factor, hoist it out of the square root and
+ // replace it with the fabs of that factor.
+ Module *M = Callee->getParent();
+ Type *ArgType = I->getType();
+ Value *Fabs = Intrinsic::getDeclaration(M, Intrinsic::fabs, ArgType);
+ Value *FabsCall = B.CreateCall(Fabs, RepeatOp, "fabs");
+ if (OtherOp) {
+ // If we found a non-repeated factor, we still need to get its square
+ // root. We then multiply that by the value that was simplified out
+ // of the square root calculation.
+ Value *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, ArgType);
+ Value *SqrtCall = B.CreateCall(Sqrt, OtherOp, "sqrt");
+ return B.CreateFMul(FabsCall, SqrtCall);
+ }
+ return FabsCall;
+}
+
+// TODO: Generalize to handle any trig function and its inverse.
Value *LibCallSimplifier::optimizeTan(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
Value *Ret = nullptr;
@@ -1471,13 +1474,15 @@ Value *LibCallSimplifier::optimizeTan(CallInst *CI, IRBuilder<> &B) {
!FT->getParamType(0)->isFloatingPointTy())
return Ret;
- if (!canUseUnsafeFPMath(CI->getParent()->getParent()))
- return Ret;
Value *Op1 = CI->getArgOperand(0);
auto *OpC = dyn_cast<CallInst>(Op1);
if (!OpC)
return Ret;
+ // Both calls must allow unsafe optimizations in order to remove them.
+ if (!CI->hasUnsafeAlgebra() || !OpC->hasUnsafeAlgebra())
+ return Ret;
+
// tan(atan(x)) -> x
// tanf(atanf(x)) -> x
// tanl(atanl(x)) -> x
diff --git a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
index 2e361d3..f47ddb9 100644
--- a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
@@ -222,8 +222,17 @@ static void resolveCycles(Metadata *MD, bool AllowTemps) {
if (auto *N = dyn_cast_or_null<MDNode>(MD)) {
if (AllowTemps && N->isTemporary())
return;
- if (!N->isResolved())
- N->resolveCycles(AllowTemps);
+ if (!N->isResolved()) {
+ if (AllowTemps)
+ // Note that this will drop RAUW support on any temporaries, which
+ // blocks uniquing. If this ends up being an issue, in the future
+ // we can experiment with delaying resolving these nodes until
+ // after metadata is fully materialized (i.e. when linking metadata
+ // as a postpass after function importing).
+ N->resolveNonTemporaries();
+ else
+ N->resolveCycles();
+ }
}
}
diff --git a/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 9ed44d1..27d3337 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -222,7 +222,7 @@ static void propagateIRFlags(Value *I, ArrayRef<Value *> VL) {
}
}
}
-
+
/// \returns \p I after propagating metadata from \p VL.
static Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL) {
Instruction *I0 = cast<Instruction>(VL[0]);
@@ -506,7 +506,7 @@ private:
}
return Last;
}
-
+
/// -- Vectorization State --
/// Holds all of the tree entries.
std::vector<TreeEntry> VectorizableTree;
@@ -884,7 +884,7 @@ private:
/// The current size of the scheduling region.
int ScheduleRegionSize;
-
+
/// The maximum size allowed for the scheduling region.
int ScheduleRegionSizeLimit;
@@ -1089,7 +1089,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
newTreeEntry(VL, false);
return;
}
-
+
// Check that every instructions appears once in this bundle.
for (unsigned i = 0, e = VL.size(); i < e; ++i)
for (unsigned j = i+1; j < e; ++j)
@@ -1711,7 +1711,7 @@ int BoUpSLP::getSpillCost() {
int Cost = 0;
SmallPtrSet<Instruction*, 4> LiveValues;
- Instruction *PrevInst = nullptr;
+ Instruction *PrevInst = nullptr;
for (unsigned N = 0; N < VectorizableTree.size(); ++N) {
Instruction *Inst = dyn_cast<Instruction>(VectorizableTree[N].Scalars[0]);
@@ -1736,7 +1736,7 @@ int BoUpSLP::getSpillCost() {
for (auto &J : PrevInst->operands()) {
if (isa<Instruction>(&*J) && ScalarToTreeEntry.count(&*J))
LiveValues.insert(cast<Instruction>(&*J));
- }
+ }
// Now find the sequence of instructions between PrevInst and Inst.
BasicBlock::reverse_iterator InstIt(Inst->getIterator()),
@@ -1780,30 +1780,29 @@ int BoUpSLP::getTreeCost() {
unsigned BundleWidth = VectorizableTree[0].Scalars.size();
- for (unsigned i = 0, e = VectorizableTree.size(); i != e; ++i) {
- int C = getEntryCost(&VectorizableTree[i]);
+ for (TreeEntry &TE : VectorizableTree) {
+ int C = getEntryCost(&TE);
DEBUG(dbgs() << "SLP: Adding cost " << C << " for bundle that starts with "
- << *VectorizableTree[i].Scalars[0] << " .\n");
+ << TE.Scalars[0] << " .\n");
Cost += C;
}
SmallSet<Value *, 16> ExtractCostCalculated;
int ExtractCost = 0;
- for (UserList::iterator I = ExternalUses.begin(), E = ExternalUses.end();
- I != E; ++I) {
+ for (ExternalUser &EU : ExternalUses) {
// We only add extract cost once for the same scalar.
- if (!ExtractCostCalculated.insert(I->Scalar).second)
+ if (!ExtractCostCalculated.insert(EU.Scalar).second)
continue;
// Uses by ephemeral values are free (because the ephemeral value will be
// removed prior to code generation, and so the extraction will be
// removed as well).
- if (EphValues.count(I->User))
+ if (EphValues.count(EU.User))
continue;
- VectorType *VecTy = VectorType::get(I->Scalar->getType(), BundleWidth);
+ VectorType *VecTy = VectorType::get(EU.Scalar->getType(), BundleWidth);
ExtractCost += TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy,
- I->Lane);
+ EU.Lane);
}
Cost += getSpillCost();
@@ -2551,7 +2550,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
}
Value *BoUpSLP::vectorizeTree() {
-
+
// All blocks must be scheduled before any instructions are inserted.
for (auto &BSIter : BlocksSchedules) {
scheduleBlock(BSIter.second.get());
@@ -3072,10 +3071,10 @@ void BoUpSLP::BlockScheduling::resetSchedule() {
}
void BoUpSLP::scheduleBlock(BlockScheduling *BS) {
-
+
if (!BS->ScheduleStart)
return;
-
+
DEBUG(dbgs() << "SLP: schedule block " << BS->BB->getName() << "\n");
BS->resetSchedule();
@@ -3590,7 +3589,7 @@ bool SLPVectorizer::tryToVectorize(BinaryOperator *V, BoUpSLP &R) {
/// \param NumEltsToRdx The number of elements that should be reduced in the
/// vector.
/// \param IsPairwise Whether the reduction is a pairwise or splitting
-/// reduction. A pairwise reduction will generate a mask of
+/// reduction. A pairwise reduction will generate a mask of
/// <0,2,...> or <1,3,..> while a splitting reduction will generate
/// <2,3, undef,undef> for a vector of 4 and NumElts = 2.
/// \param IsLeft True will generate a mask of even elements, odd otherwise.
@@ -3773,7 +3772,7 @@ public:
IRBuilder<> Builder(ReductionRoot);
FastMathFlags Unsafe;
Unsafe.setUnsafeAlgebra();
- Builder.SetFastMathFlags(Unsafe);
+ Builder.setFastMathFlags(Unsafe);
unsigned i = 0;
for (; i < NumReducedVals - ReduxWidth + 1; i += ReduxWidth) {
@@ -4018,9 +4017,8 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
// Collect the incoming values from the PHIs.
Incoming.clear();
- for (BasicBlock::iterator instr = BB->begin(), ie = BB->end(); instr != ie;
- ++instr) {
- PHINode *P = dyn_cast<PHINode>(instr);
+ for (Instruction &I : *BB) {
+ PHINode *P = dyn_cast<PHINode>(&I);
if (!P)
break;
OpenPOWER on IntegriCloud