summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Transforms
diff options
context:
space:
mode:
authordim <dim@FreeBSD.org>2015-05-27 20:26:41 +0000
committerdim <dim@FreeBSD.org>2015-05-27 20:26:41 +0000
commit5ef8fd3549d38e883a31881636be3dc2a275de20 (patch)
treebd13a22d9db57ccf3eddbc07b32c18109521d050 /contrib/llvm/lib/Transforms
parent77794ebe2d5718eb502c93ec32f8ccae4d8a0b7b (diff)
parent782067d0278612ee75d024b9b135c221c327e9e8 (diff)
downloadFreeBSD-src-5ef8fd3549d38e883a31881636be3dc2a275de20.zip
FreeBSD-src-5ef8fd3549d38e883a31881636be3dc2a275de20.tar.gz
Merge llvm trunk r238337 from ^/vendor/llvm/dist, resolve conflicts, and
preserve our customizations, where necessary.
Diffstat (limited to 'contrib/llvm/lib/Transforms')
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp108
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp25
-rw-r--r--contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp198
-rw-r--r--contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp18
-rw-r--r--contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp26
-rw-r--r--contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp125
-rw-r--r--contrib/llvm/lib/Transforms/IPO/IPO.cpp3
-rw-r--r--contrib/llvm/lib/Transforms/IPO/Inliner.cpp143
-rw-r--r--contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/IPO/LowerBitSets.cpp732
-rw-r--r--contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp66
-rw-r--r--contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp10
-rw-r--r--contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp110
-rw-r--r--contrib/llvm/lib/Transforms/IPO/PruneEH.cpp4
-rw-r--r--contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp24
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp119
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp53
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp392
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp264
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp231
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineInternal.h (renamed from contrib/llvm/lib/Transforms/InstCombine/InstCombine.h)305
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp561
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp46
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp17
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp345
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp31
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp274
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp129
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineWorklist.h107
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp824
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp954
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp32
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp156
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp142
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp33
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp413
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp334
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp201
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ARCInstKind.cpp673
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ARCInstKind.h123
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h50
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/BlotMapVector.h108
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp101
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.h21
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.h266
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp8
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp37
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h4
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp499
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ObjCARCExpand.cpp14
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp1530
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ObjCARCUtil.cpp254
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp31
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.h9
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp4
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/PtrState.cpp404
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/PtrState.h210
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/ADCE.cpp70
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp19
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/BDCE.cpp410
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp10
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp13
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp47
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/DCE.cpp8
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp68
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp654
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Float2Int.cpp540
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/GVN.cpp366
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp176
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp1495
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp40
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LICM.cpp439
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoadCombine.cpp39
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp8
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopDistribute.cpp976
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp146
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp18
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopInterchange.cpp1300
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp1287
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp93
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp381
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp597
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp55
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp (renamed from contrib/llvm/lib/Transforms/Utils/LowerExpectIntrinsic.cpp)118
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp226
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp11
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/NaryReassociate.cpp481
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp14
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp993
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp114
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp2506
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SCCP.cpp29
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SROA.cpp195
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SampleProfile.cpp22
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Scalar.cpp21
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp211
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Scalarizer.cpp28
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp199
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp112
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Sink.cpp14
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp243
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp710
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp77
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp76
-rw-r--r--contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp8
-rw-r--r--contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp45
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp215
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp63
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp161
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp224
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CloneModule.cpp4
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp30
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp3
-rw-r--r--contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp34
-rw-r--r--contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp234
-rw-r--r--contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp4
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LCSSA.cpp32
-rw-r--r--contrib/llvm/lib/Transforms/Utils/Local.cpp149
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp80
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp51
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp46
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp499
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp240
-rw-r--r--contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp32
-rw-r--r--contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp164
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp51
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp661
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp136
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp12
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp348
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp15
-rw-r--r--contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp225
-rw-r--r--contrib/llvm/lib/Transforms/Vectorize/BBVectorize.cpp72
-rw-r--r--contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp2702
-rw-r--r--contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp521
-rw-r--r--contrib/llvm/lib/Transforms/Vectorize/Vectorize.cpp2
137 files changed, 23593 insertions, 11030 deletions
diff --git a/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index 3282022..7b7672d 100644
--- a/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -36,6 +36,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
@@ -69,16 +70,15 @@ namespace {
bool runOnSCC(CallGraphSCC &SCC) override;
static char ID; // Pass identification, replacement for typeid
explicit ArgPromotion(unsigned maxElements = 3)
- : CallGraphSCCPass(ID), DL(nullptr), maxElements(maxElements) {
+ : CallGraphSCCPass(ID), maxElements(maxElements) {
initializeArgPromotionPass(*PassRegistry::getPassRegistry());
}
/// A vector used to hold the indices of a single GEP instruction
typedef std::vector<uint64_t> IndicesVector;
- const DataLayout *DL;
private:
- bool isDenselyPacked(Type *type);
+ bool isDenselyPacked(Type *type, const DataLayout &DL);
bool canPaddingBeAccessed(Argument *Arg);
CallGraphNode *PromoteArguments(CallGraphNode *CGN);
bool isSafeToPromoteArgument(Argument *Arg, bool isByVal) const;
@@ -90,7 +90,7 @@ namespace {
bool doInitialization(CallGraph &CG) override;
/// The maximum number of elements to expand, or 0 for unlimited.
unsigned maxElements;
- DenseMap<const Function *, DISubprogram> FunctionDIs;
+ DenseMap<const Function *, DISubprogram *> FunctionDIs;
};
}
@@ -109,9 +109,6 @@ Pass *llvm::createArgumentPromotionPass(unsigned maxElements) {
bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) {
bool Changed = false, LocalChange;
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
-
do { // Iterate until we stop promoting from this SCC.
LocalChange = false;
// Attempt to promote arguments from all functions in this SCC.
@@ -128,7 +125,7 @@ bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) {
}
/// \brief Checks if a type could have padding bytes.
-bool ArgPromotion::isDenselyPacked(Type *type) {
+bool ArgPromotion::isDenselyPacked(Type *type, const DataLayout &DL) {
// There is no size information, so be conservative.
if (!type->isSized())
@@ -136,7 +133,7 @@ bool ArgPromotion::isDenselyPacked(Type *type) {
// If the alloc size is not equal to the storage size, then there are padding
// bytes. For x86_fp80 on x86-64, size: 80 alloc size: 128.
- if (!DL || DL->getTypeSizeInBits(type) != DL->getTypeAllocSizeInBits(type))
+ if (DL.getTypeSizeInBits(type) != DL.getTypeAllocSizeInBits(type))
return false;
if (!isa<CompositeType>(type))
@@ -144,19 +141,20 @@ bool ArgPromotion::isDenselyPacked(Type *type) {
// For homogenous sequential types, check for padding within members.
if (SequentialType *seqTy = dyn_cast<SequentialType>(type))
- return isa<PointerType>(seqTy) || isDenselyPacked(seqTy->getElementType());
+ return isa<PointerType>(seqTy) ||
+ isDenselyPacked(seqTy->getElementType(), DL);
// Check for padding within and between elements of a struct.
StructType *StructTy = cast<StructType>(type);
- const StructLayout *Layout = DL->getStructLayout(StructTy);
+ const StructLayout *Layout = DL.getStructLayout(StructTy);
uint64_t StartPos = 0;
for (unsigned i = 0, E = StructTy->getNumElements(); i < E; ++i) {
Type *ElTy = StructTy->getElementType(i);
- if (!isDenselyPacked(ElTy))
+ if (!isDenselyPacked(ElTy, DL))
return false;
if (StartPos != Layout->getElementOffsetInBits(i))
return false;
- StartPos += DL->getTypeAllocSizeInBits(ElTy);
+ StartPos += DL.getTypeAllocSizeInBits(ElTy);
}
return true;
@@ -210,6 +208,13 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
// Make sure that it is local to this module.
if (!F || !F->hasLocalLinkage()) return nullptr;
+ // Don't promote arguments for variadic functions. Adding, removing, or
+ // changing non-pack parameters can change the classification of pack
+ // parameters. Frontends encode that classification at the call site in the
+ // IR, while in the callee the classification is determined dynamically based
+ // on the number of registers consumed so far.
+ if (F->isVarArg()) return nullptr;
+
// First check: see if there are any pointer arguments! If not, quick exit.
SmallVector<Argument*, 16> PointerArgs;
for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I)
@@ -230,12 +235,7 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
isSelfRecursive = true;
}
- // Don't promote arguments for variadic functions. Adding, removing, or
- // changing non-pack parameters can change the classification of pack
- // parameters. Frontends encode that classification at the call site in the
- // IR, while in the callee the classification is determined dynamically based
- // on the number of registers consumed so far.
- if (F->isVarArg()) return nullptr;
+ const DataLayout &DL = F->getParent()->getDataLayout();
// Check to see which arguments are promotable. If an argument is promotable,
// add it to ArgsToPromote.
@@ -250,8 +250,8 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
// packed or if we can prove the padding bytes are never accessed. This does
// not apply to inalloca.
bool isSafeToPromote =
- PtrArg->hasByValAttr() &&
- (isDenselyPacked(AgTy) || !canPaddingBeAccessed(PtrArg));
+ PtrArg->hasByValAttr() &&
+ (isDenselyPacked(AgTy, DL) || !canPaddingBeAccessed(PtrArg));
if (isSafeToPromote) {
if (StructType *STy = dyn_cast<StructType>(AgTy)) {
if (maxElements > 0 && STy->getNumElements() > maxElements) {
@@ -310,9 +310,9 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
/// AllCallersPassInValidPointerForArgument - Return true if we can prove that
/// all callees pass in a valid pointer for the specified function argument.
-static bool AllCallersPassInValidPointerForArgument(Argument *Arg,
- const DataLayout *DL) {
+static bool AllCallersPassInValidPointerForArgument(Argument *Arg) {
Function *Callee = Arg->getParent();
+ const DataLayout &DL = Callee->getParent()->getDataLayout();
unsigned ArgNo = Arg->getArgNo();
@@ -322,7 +322,7 @@ static bool AllCallersPassInValidPointerForArgument(Argument *Arg,
CallSite CS(U);
assert(CS && "Should only have direct calls!");
- if (!CS.getArgument(ArgNo)->isDereferenceablePointer(DL))
+ if (!isDereferenceablePointer(CS.getArgument(ArgNo), DL))
return false;
}
return true;
@@ -430,7 +430,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg,
GEPIndicesSet ToPromote;
// If the pointer is always valid, any load with first index 0 is valid.
- if (isByValOrInAlloca || AllCallersPassInValidPointerForArgument(Arg, DL))
+ if (isByValOrInAlloca || AllCallersPassInValidPointerForArgument(Arg))
SafeToUnconditionallyLoad.insert(IndicesVector(1, 0));
// First, iterate the entry block and mark loads of (geps of) arguments as
@@ -561,8 +561,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg,
// Now check every path from the entry block to the load for transparency.
// To do this, we perform a depth first search on the inverse CFG from the
// loading block.
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
- BasicBlock *P = *PI;
+ for (BasicBlock *P : predecessors(BB)) {
for (BasicBlock *TranspBB : inverse_depth_first_ext(P, TranspBlocks))
if (AA.canBasicBlockModify(*TranspBB, Loc))
return false;
@@ -587,7 +586,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
FunctionType *FTy = F->getFunctionType();
std::vector<Type*> Params;
- typedef std::set<IndicesVector> ScalarizeTable;
+ typedef std::set<std::pair<Type *, IndicesVector>> ScalarizeTable;
// ScalarizedElements - If we are promoting a pointer that has elements
// accessed out of it, keep track of which elements are accessed so that we
@@ -624,8 +623,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
// Simple byval argument? Just add all the struct element types.
Type *AgTy = cast<PointerType>(I->getType())->getElementType();
StructType *STy = cast<StructType>(AgTy);
- for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
- Params.push_back(STy->getElementType(i));
+ Params.insert(Params.end(), STy->element_begin(), STy->element_end());
++NumByValArgsPromoted;
} else if (!ArgsToPromote.count(I)) {
// Unchanged argument
@@ -648,7 +646,11 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
ScalarizeTable &ArgIndices = ScalarizedElements[I];
for (User *U : I->users()) {
Instruction *UI = cast<Instruction>(U);
- assert(isa<LoadInst>(UI) || isa<GetElementPtrInst>(UI));
+ Type *SrcTy;
+ if (LoadInst *L = dyn_cast<LoadInst>(UI))
+ SrcTy = L->getType();
+ else
+ SrcTy = cast<GetElementPtrInst>(UI)->getSourceElementType();
IndicesVector Indices;
Indices.reserve(UI->getNumOperands() - 1);
// Since loads will only have a single operand, and GEPs only a single
@@ -660,7 +662,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
// GEPs with a single 0 index can be merged with direct loads
if (Indices.size() == 1 && Indices.front() == 0)
Indices.clear();
- ArgIndices.insert(Indices);
+ ArgIndices.insert(std::make_pair(SrcTy, Indices));
LoadInst *OrigLoad;
if (LoadInst *L = dyn_cast<LoadInst>(UI))
OrigLoad = L;
@@ -674,11 +676,13 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
for (ScalarizeTable::iterator SI = ArgIndices.begin(),
E = ArgIndices.end(); SI != E; ++SI) {
// not allowed to dereference ->begin() if size() is 0
- Params.push_back(GetElementPtrInst::getIndexedType(I->getType(), *SI));
+ Params.push_back(GetElementPtrInst::getIndexedType(
+ cast<PointerType>(I->getType()->getScalarType())->getElementType(),
+ SI->second));
assert(Params.back());
}
- if (ArgIndices.size() == 1 && ArgIndices.begin()->empty())
+ if (ArgIndices.size() == 1 && ArgIndices.begin()->second.empty())
++NumArgumentsPromoted;
else
++NumAggregatesPromoted;
@@ -702,8 +706,8 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
// Patch the pointer to LLVM function in debug info descriptor.
auto DI = FunctionDIs.find(F);
if (DI != FunctionDIs.end()) {
- DISubprogram SP = DI->second;
- SP.replaceFunction(NF);
+ DISubprogram *SP = DI->second;
+ SP->replaceFunction(NF);
// Ensure the map is updated so it can be reused on subsequent argument
// promotions of the same function.
FunctionDIs.erase(DI);
@@ -769,9 +773,8 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr };
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
- Value *Idx = GetElementPtrInst::Create(*AI, Idxs,
- (*AI)->getName()+"."+utostr(i),
- Call);
+ Value *Idx = GetElementPtrInst::Create(
+ STy, *AI, Idxs, (*AI)->getName() + "." + utostr(i), Call);
// TODO: Tell AA about the new values?
Args.push_back(new LoadInst(Idx, Idx->getName()+".val", Call));
}
@@ -784,12 +787,13 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
for (ScalarizeTable::iterator SI = ArgIndices.begin(),
E = ArgIndices.end(); SI != E; ++SI) {
Value *V = *AI;
- LoadInst *OrigLoad = OriginalLoads[std::make_pair(I, *SI)];
- if (!SI->empty()) {
- Ops.reserve(SI->size());
+ LoadInst *OrigLoad = OriginalLoads[std::make_pair(I, SI->second)];
+ if (!SI->second.empty()) {
+ Ops.reserve(SI->second.size());
Type *ElTy = V->getType();
- for (IndicesVector::const_iterator II = SI->begin(),
- IE = SI->end(); II != IE; ++II) {
+ for (IndicesVector::const_iterator II = SI->second.begin(),
+ IE = SI->second.end();
+ II != IE; ++II) {
// Use i32 to index structs, and i64 for others (pointers/arrays).
// This satisfies GEP constraints.
Type *IdxTy = (ElTy->isStructTy() ?
@@ -800,7 +804,8 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(*II);
}
// And create a GEP to extract those indices.
- V = GetElementPtrInst::Create(V, Ops, V->getName()+".idx", Call);
+ V = GetElementPtrInst::Create(SI->first, V, Ops,
+ V->getName() + ".idx", Call);
Ops.clear();
AA.copyValue(OrigLoad->getOperand(0), V);
}
@@ -858,7 +863,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
// Update the callgraph to know that the callsite has been transformed.
CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()];
- CalleeNode->replaceCallEdge(Call, New, NF_CGN);
+ CalleeNode->replaceCallEdge(CS, CallSite(New), NF_CGN);
if (!Call->use_empty()) {
Call->replaceAllUsesWith(New);
@@ -904,10 +909,9 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
- Value *Idx =
- GetElementPtrInst::Create(TheAlloca, Idxs,
- TheAlloca->getName()+"."+Twine(i),
- InsertPt);
+ Value *Idx = GetElementPtrInst::Create(
+ AgTy, TheAlloca, Idxs, TheAlloca->getName() + "." + Twine(i),
+ InsertPt);
I2->setName(I->getName()+"."+Twine(i));
new StoreInst(I2++, Idx, InsertPt);
}
@@ -940,7 +944,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
while (!I->use_empty()) {
if (LoadInst *LI = dyn_cast<LoadInst>(I->user_back())) {
- assert(ArgIndices.begin()->empty() &&
+ assert(ArgIndices.begin()->second.empty() &&
"Load element should sort to front!");
I2->setName(I->getName()+".val");
LI->replaceAllUsesWith(I2);
@@ -962,7 +966,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
Function::arg_iterator TheArg = I2;
for (ScalarizeTable::iterator It = ArgIndices.begin();
- *It != Operands; ++It, ++TheArg) {
+ It->second != Operands; ++It, ++TheArg) {
assert(It != ArgIndices.end() && "GEP not handled??");
}
diff --git a/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
index 0b6ade9..8ce7646 100644
--- a/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
@@ -52,7 +52,6 @@ namespace {
// alignment to a concrete value.
unsigned getAlignment(GlobalVariable *GV) const;
- const DataLayout *DL;
};
}
@@ -89,32 +88,22 @@ static bool IsBetterCanonical(const GlobalVariable &A,
return A.hasUnnamedAddr();
}
-bool ConstantMerge::hasKnownAlignment(GlobalVariable *GV) const {
- return DL || GV->getAlignment() != 0;
-}
-
unsigned ConstantMerge::getAlignment(GlobalVariable *GV) const {
unsigned Align = GV->getAlignment();
if (Align)
return Align;
- if (DL)
- return DL->getPreferredAlignment(GV);
- return 0;
+ return GV->getParent()->getDataLayout().getPreferredAlignment(GV);
}
bool ConstantMerge::runOnModule(Module &M) {
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
// Find all the globals that are marked "used". These cannot be merged.
SmallPtrSet<const GlobalValue*, 8> UsedGlobals;
FindUsedValues(M.getGlobalVariable("llvm.used"), UsedGlobals);
FindUsedValues(M.getGlobalVariable("llvm.compiler.used"), UsedGlobals);
-
- // Map unique <constants, has-unknown-alignment> pairs to globals. We don't
- // want to merge globals of unknown alignment with those of explicit
- // alignment. If we have DataLayout, we always know the alignment.
- DenseMap<PointerIntPair<Constant*, 1, bool>, GlobalVariable*> CMap;
+
+ // Map unique constants to globals.
+ DenseMap<Constant *, GlobalVariable *> CMap;
// Replacements - This vector contains a list of replacements to perform.
SmallVector<std::pair<GlobalVariable*, GlobalVariable*>, 32> Replacements;
@@ -156,8 +145,7 @@ bool ConstantMerge::runOnModule(Module &M) {
Constant *Init = GV->getInitializer();
// Check to see if the initializer is already known.
- PointerIntPair<Constant*, 1, bool> Pair(Init, hasKnownAlignment(GV));
- GlobalVariable *&Slot = CMap[Pair];
+ GlobalVariable *&Slot = CMap[Init];
// If this is the first constant we find or if the old one is local,
// replace with the current one. If the current is externally visible
@@ -188,8 +176,7 @@ bool ConstantMerge::runOnModule(Module &M) {
Constant *Init = GV->getInitializer();
// Check to see if the initializer is already known.
- PointerIntPair<Constant*, 1, bool> Pair(Init, hasKnownAlignment(GV));
- GlobalVariable *Slot = CMap[Pair];
+ GlobalVariable *Slot = CMap[Init];
if (!Slot || Slot == GV)
continue;
diff --git a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 4045c09..76898f2 100644
--- a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -73,8 +73,8 @@ namespace {
}
std::string getDescription() const {
- return std::string((IsArg ? "Argument #" : "Return value #"))
- + utostr(Idx) + " of function " + F->getName().str();
+ return (Twine(IsArg ? "Argument #" : "Return value #") + utostr(Idx) +
+ " of function " + F->getName()).str();
}
};
@@ -127,7 +127,7 @@ namespace {
// As the code generation for module is finished (and DIBuilder is
// finalized) we assume that subprogram descriptors won't be changed, and
// they are stored in map for short duration anyway.
- DenseMap<const Function *, DISubprogram> FunctionDIs;
+ DenseMap<const Function *, DISubprogram *> FunctionDIs;
protected:
// DAH uses this to specify a different ID.
@@ -146,7 +146,7 @@ namespace {
private:
Liveness MarkIfNotLive(RetOrArg Use, UseVector &MaybeLiveUses);
Liveness SurveyUse(const Use *U, UseVector &MaybeLiveUses,
- unsigned RetValNum = 0);
+ unsigned RetValNum = -1U);
Liveness SurveyUses(const Value *V, UseVector &MaybeLiveUses);
void SurveyFunction(const Function &F);
@@ -303,8 +303,8 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
// Patch the pointer to LLVM function in debug info descriptor.
auto DI = FunctionDIs.find(&Fn);
if (DI != FunctionDIs.end()) {
- DISubprogram SP = DI->second;
- SP.replaceFunction(NF);
+ DISubprogram *SP = DI->second;
+ SP->replaceFunction(NF);
// Ensure the map is updated so it can be reused on non-varargs argument
// eliminations of the same function.
FunctionDIs.erase(DI);
@@ -387,14 +387,32 @@ bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn)
/// for void functions and 1 for functions not returning a struct. It returns
/// the number of struct elements for functions returning a struct.
static unsigned NumRetVals(const Function *F) {
- if (F->getReturnType()->isVoidTy())
+ Type *RetTy = F->getReturnType();
+ if (RetTy->isVoidTy())
return 0;
- else if (StructType *STy = dyn_cast<StructType>(F->getReturnType()))
+ else if (StructType *STy = dyn_cast<StructType>(RetTy))
return STy->getNumElements();
+ else if (ArrayType *ATy = dyn_cast<ArrayType>(RetTy))
+ return ATy->getNumElements();
else
return 1;
}
+/// Returns the sub-type a function will return at a given Idx. Should
+/// correspond to the result type of an ExtractValue instruction executed with
+/// just that one Idx (i.e. only top-level structure is considered).
+static Type *getRetComponentType(const Function *F, unsigned Idx) {
+ Type *RetTy = F->getReturnType();
+ assert(!RetTy->isVoidTy() && "void type has no subtype");
+
+ if (StructType *STy = dyn_cast<StructType>(RetTy))
+ return STy->getElementType(Idx);
+ else if (ArrayType *ATy = dyn_cast<ArrayType>(RetTy))
+ return ATy->getElementType();
+ else
+ return RetTy;
+}
+
/// MarkIfNotLive - This checks Use for liveness in LiveValues. If Use is not
/// live, it adds Use to the MaybeLiveUses argument. Returns the determined
/// liveness of Use.
@@ -425,9 +443,24 @@ DAE::Liveness DAE::SurveyUse(const Use *U,
// function's return value is live. We use RetValNum here, for the case
// that U is really a use of an insertvalue instruction that uses the
// original Use.
- RetOrArg Use = CreateRet(RI->getParent()->getParent(), RetValNum);
- // We might be live, depending on the liveness of Use.
- return MarkIfNotLive(Use, MaybeLiveUses);
+ const Function *F = RI->getParent()->getParent();
+ if (RetValNum != -1U) {
+ RetOrArg Use = CreateRet(F, RetValNum);
+ // We might be live, depending on the liveness of Use.
+ return MarkIfNotLive(Use, MaybeLiveUses);
+ } else {
+ DAE::Liveness Result = MaybeLive;
+ for (unsigned i = 0; i < NumRetVals(F); ++i) {
+ RetOrArg Use = CreateRet(F, i);
+ // We might be live, depending on the liveness of Use. If any
+ // sub-value is live, then the entire value is considered live. This
+ // is a conservative choice, and better tracking is possible.
+ DAE::Liveness SubResult = MarkIfNotLive(Use, MaybeLiveUses);
+ if (Result != Live)
+ Result = SubResult;
+ }
+ return Result;
+ }
}
if (const InsertValueInst *IV = dyn_cast<InsertValueInst>(V)) {
if (U->getOperandNo() != InsertValueInst::getAggregateOperandIndex()
@@ -449,7 +482,7 @@ DAE::Liveness DAE::SurveyUse(const Use *U,
return Result;
}
- if (ImmutableCallSite CS = V) {
+ if (auto CS = ImmutableCallSite(V)) {
const Function *F = CS.getCalledFunction();
if (F) {
// Used in a direct call.
@@ -541,7 +574,6 @@ void DAE::SurveyFunction(const Function &F) {
// Keep track of the number of live retvals, so we can skip checks once all
// of them turn out to be live.
unsigned NumLiveRetVals = 0;
- Type *STy = dyn_cast<StructType>(F.getReturnType());
// Loop all uses of the function.
for (const Use &U : F.uses()) {
// If the function is PASSED IN as an argument, its address has been
@@ -563,34 +595,35 @@ void DAE::SurveyFunction(const Function &F) {
// Now, check how our return value(s) is/are used in this caller. Don't
// bother checking return values if all of them are live already.
- if (NumLiveRetVals != RetCount) {
- if (STy) {
- // Check all uses of the return value.
- for (const User *U : TheCall->users()) {
- const ExtractValueInst *Ext = dyn_cast<ExtractValueInst>(U);
- if (Ext && Ext->hasIndices()) {
- // This use uses a part of our return value, survey the uses of
- // that part and store the results for this index only.
- unsigned Idx = *Ext->idx_begin();
- if (RetValLiveness[Idx] != Live) {
- RetValLiveness[Idx] = SurveyUses(Ext, MaybeLiveRetUses[Idx]);
- if (RetValLiveness[Idx] == Live)
- NumLiveRetVals++;
- }
- } else {
- // Used by something else than extractvalue. Mark all return
- // values as live.
- for (unsigned i = 0; i != RetCount; ++i )
- RetValLiveness[i] = Live;
- NumLiveRetVals = RetCount;
- break;
- }
+ if (NumLiveRetVals == RetCount)
+ continue;
+
+ // Check all uses of the return value.
+ for (const Use &U : TheCall->uses()) {
+ if (ExtractValueInst *Ext = dyn_cast<ExtractValueInst>(U.getUser())) {
+ // This use uses a part of our return value, survey the uses of
+ // that part and store the results for this index only.
+ unsigned Idx = *Ext->idx_begin();
+ if (RetValLiveness[Idx] != Live) {
+ RetValLiveness[Idx] = SurveyUses(Ext, MaybeLiveRetUses[Idx]);
+ if (RetValLiveness[Idx] == Live)
+ NumLiveRetVals++;
}
} else {
- // Single return value
- RetValLiveness[0] = SurveyUses(TheCall, MaybeLiveRetUses[0]);
- if (RetValLiveness[0] == Live)
+ // Used by something else than extractvalue. Survey, but assume that the
+ // result applies to all sub-values.
+ UseVector MaybeLiveAggregateUses;
+ if (SurveyUse(&U, MaybeLiveAggregateUses) == Live) {
NumLiveRetVals = RetCount;
+ RetValLiveness.assign(RetCount, Live);
+ break;
+ } else {
+ for (unsigned i = 0; i != RetCount; ++i) {
+ if (RetValLiveness[i] != Live)
+ MaybeLiveRetUses[i].append(MaybeLiveAggregateUses.begin(),
+ MaybeLiveAggregateUses.end());
+ }
+ }
}
}
}
@@ -775,39 +808,29 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
if (RetTy->isVoidTy() || HasLiveReturnedArg) {
NRetTy = RetTy;
} else {
- StructType *STy = dyn_cast<StructType>(RetTy);
- if (STy)
- // Look at each of the original return values individually.
- for (unsigned i = 0; i != RetCount; ++i) {
- RetOrArg Ret = CreateRet(F, i);
- if (LiveValues.erase(Ret)) {
- RetTypes.push_back(STy->getElementType(i));
- NewRetIdxs[i] = RetTypes.size() - 1;
- } else {
- ++NumRetValsEliminated;
- DEBUG(dbgs() << "DAE - Removing return value " << i << " from "
- << F->getName() << "\n");
- }
- }
- else
- // We used to return a single value.
- if (LiveValues.erase(CreateRet(F, 0))) {
- RetTypes.push_back(RetTy);
- NewRetIdxs[0] = 0;
+ // Look at each of the original return values individually.
+ for (unsigned i = 0; i != RetCount; ++i) {
+ RetOrArg Ret = CreateRet(F, i);
+ if (LiveValues.erase(Ret)) {
+ RetTypes.push_back(getRetComponentType(F, i));
+ NewRetIdxs[i] = RetTypes.size() - 1;
} else {
- DEBUG(dbgs() << "DAE - Removing return value from " << F->getName()
- << "\n");
++NumRetValsEliminated;
+ DEBUG(dbgs() << "DAE - Removing return value " << i << " from "
+ << F->getName() << "\n");
+ }
+ }
+ if (RetTypes.size() > 1) {
+ // More than one return type? Reduce it down to size.
+ if (StructType *STy = dyn_cast<StructType>(RetTy)) {
+ // Make the new struct packed if we used to return a packed struct
+ // already.
+ NRetTy = StructType::get(STy->getContext(), RetTypes, STy->isPacked());
+ } else {
+ assert(isa<ArrayType>(RetTy) && "unexpected multi-value return");
+ NRetTy = ArrayType::get(RetTypes[0], RetTypes.size());
}
- if (RetTypes.size() > 1)
- // More than one return type? Return a struct with them. Also, if we used
- // to return a struct and didn't change the number of return values,
- // return a struct again. This prevents changing {something} into
- // something and {} into void.
- // Make the new struct packed if we used to return a packed struct
- // already.
- NRetTy = StructType::get(STy->getContext(), RetTypes, STy->isPacked());
- else if (RetTypes.size() == 1)
+ } else if (RetTypes.size() == 1)
// One return type? Just a simple value then, but only if we didn't use to
// return a struct with that simple value before.
NRetTy = RetTypes.front();
@@ -826,17 +849,12 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
// here. Currently, this should not be possible, but special handling might be
// required when new return value attributes are added.
if (NRetTy->isVoidTy())
- RAttrs =
- AttributeSet::get(NRetTy->getContext(), AttributeSet::ReturnIndex,
- AttrBuilder(RAttrs, AttributeSet::ReturnIndex).
- removeAttributes(AttributeFuncs::
- typeIncompatible(NRetTy, AttributeSet::ReturnIndex),
- AttributeSet::ReturnIndex));
+ RAttrs = RAttrs.removeAttributes(NRetTy->getContext(),
+ AttributeSet::ReturnIndex,
+ AttributeFuncs::typeIncompatible(NRetTy));
else
assert(!AttrBuilder(RAttrs, AttributeSet::ReturnIndex).
- hasAttributes(AttributeFuncs::
- typeIncompatible(NRetTy, AttributeSet::ReturnIndex),
- AttributeSet::ReturnIndex) &&
+ overlaps(AttributeFuncs::typeIncompatible(NRetTy)) &&
"Return attributes no longer compatible?");
if (RAttrs.hasAttributes(AttributeSet::ReturnIndex))
@@ -880,13 +898,9 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
AttributeSet RAttrs = CallPAL.getRetAttributes();
// Adjust in case the function was changed to return void.
- RAttrs =
- AttributeSet::get(NF->getContext(), AttributeSet::ReturnIndex,
- AttrBuilder(RAttrs, AttributeSet::ReturnIndex).
- removeAttributes(AttributeFuncs::
- typeIncompatible(NF->getReturnType(),
- AttributeSet::ReturnIndex),
- AttributeSet::ReturnIndex));
+ RAttrs = RAttrs.removeAttributes(NRetTy->getContext(),
+ AttributeSet::ReturnIndex,
+ AttributeFuncs::typeIncompatible(NF->getReturnType()));
if (RAttrs.hasAttributes(AttributeSet::ReturnIndex))
AttributesVec.push_back(AttributeSet::get(NF->getContext(), RAttrs));
@@ -959,9 +973,9 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
if (!Call->getType()->isX86_MMXTy())
Call->replaceAllUsesWith(Constant::getNullValue(Call->getType()));
} else {
- assert(RetTy->isStructTy() &&
+ assert((RetTy->isStructTy() || RetTy->isArrayTy()) &&
"Return type changed, but not into a void. The old return type"
- " must have been a struct!");
+ " must have been a struct or an array!");
Instruction *InsertPt = Call;
if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
BasicBlock::iterator IP = II->getNormalDest()->begin();
@@ -969,9 +983,9 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
InsertPt = IP;
}
- // We used to return a struct. Instead of doing smart stuff with all the
- // uses of this struct, we will just rebuild it using
- // extract/insertvalue chaining and let instcombine clean that up.
+ // We used to return a struct or array. Instead of doing smart stuff
+ // with all the uses, we will just rebuild it using extract/insertvalue
+ // chaining and let instcombine clean that up.
//
// Start out building up our return value from undef
Value *RetVal = UndefValue::get(RetTy);
@@ -1034,8 +1048,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
if (NFTy->getReturnType()->isVoidTy()) {
RetVal = nullptr;
} else {
- assert (RetTy->isStructTy());
- // The original return value was a struct, insert
+ assert(RetTy->isStructTy() || RetTy->isArrayTy());
+ // The original return value was a struct or array, insert
// extractvalue/insertvalue chains to extract only the values we need
// to return and insert them into our new result.
// This does generate messy code, but we'll let it to instcombine to
@@ -1069,7 +1083,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
// Patch the pointer to LLVM function in debug info descriptor.
auto DI = FunctionDIs.find(F);
if (DI != FunctionDIs.end())
- DI->second.replaceFunction(NF);
+ DI->second->replaceFunction(NF);
// Now that the old function is dead, delete it.
F->eraseFromParent();
diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
index 823ae53..92e384a 100644
--- a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -31,7 +31,7 @@
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
using namespace llvm;
#define DEBUG_TYPE "functionattrs"
@@ -124,7 +124,7 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<AliasAnalysis>();
- AU.addRequired<TargetLibraryInfo>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
CallGraphSCCPass::getAnalysisUsage(AU);
}
@@ -139,7 +139,7 @@ INITIALIZE_PASS_BEGIN(FunctionAttrs, "functionattrs",
"Deduce function attributes", false, false)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(FunctionAttrs, "functionattrs",
"Deduce function attributes", false, false)
@@ -703,10 +703,14 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
}
if (ReadAttr != Attribute::None) {
- AttrBuilder B;
+ AttrBuilder B, R;
B.addAttribute(ReadAttr);
+ R.addAttribute(Attribute::ReadOnly)
+ .addAttribute(Attribute::ReadNone);
for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) {
Argument *A = ArgumentSCC[i]->Definition;
+ // Clear out existing readonly/readnone attributes
+ A->removeAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, R));
A->addAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, B));
ReadAttr == Attribute::ReadOnly ? ++NumReadOnlyArg : ++NumReadNoneArg;
Changed = true;
@@ -755,8 +759,8 @@ bool FunctionAttrs::IsFunctionMallocLike(Function *F,
}
case Instruction::PHI: {
PHINode *PN = cast<PHINode>(RVI);
- for (int i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- FlowsToReturn.insert(PN->getIncomingValue(i));
+ for (Value *IncValue : PN->incoming_values())
+ FlowsToReturn.insert(IncValue);
continue;
}
@@ -1702,7 +1706,7 @@ bool FunctionAttrs::annotateLibraryCalls(const CallGraphSCC &SCC) {
bool FunctionAttrs::runOnSCC(CallGraphSCC &SCC) {
AA = &getAnalysis<AliasAnalysis>();
- TLI = &getAnalysis<TargetLibraryInfo>();
+ TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
bool Changed = annotateLibraryCalls(SCC);
Changed |= AddReadAttrs(SCC);
diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
index 0c844fe..ba04c80 100644
--- a/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
@@ -24,6 +24,7 @@
#include "llvm/Transforms/Utils/CtorUtils.h"
#include "llvm/Transforms/Utils/GlobalStatus.h"
#include "llvm/Pass.h"
+#include <unordered_map>
using namespace llvm;
#define DEBUG_TYPE "globaldce"
@@ -47,6 +48,7 @@ namespace {
private:
SmallPtrSet<GlobalValue*, 32> AliveGlobals;
SmallPtrSet<Constant *, 8> SeenConstants;
+ std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
/// GlobalIsNeeded - mark the specific global value as needed, and
/// recursively mark anything that it uses as also needed.
@@ -78,6 +80,17 @@ bool GlobalDCE::runOnModule(Module &M) {
// Remove empty functions from the global ctors list.
Changed |= optimizeGlobalCtorsList(M, isEmptyFunction);
+ // Collect the set of members for each comdat.
+ for (Function &F : M)
+ if (Comdat *C = F.getComdat())
+ ComdatMembers.insert(std::make_pair(C, &F));
+ for (GlobalVariable &GV : M.globals())
+ if (Comdat *C = GV.getComdat())
+ ComdatMembers.insert(std::make_pair(C, &GV));
+ for (GlobalAlias &GA : M.aliases())
+ if (Comdat *C = GA.getComdat())
+ ComdatMembers.insert(std::make_pair(C, &GA));
+
// Loop over the module, adding globals which are obviously necessary.
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
Changed |= RemoveUnusedGlobalValue(*I);
@@ -177,6 +190,7 @@ bool GlobalDCE::runOnModule(Module &M) {
// Make sure that all memory is released
AliveGlobals.clear();
SeenConstants.clear();
+ ComdatMembers.clear();
return Changed;
}
@@ -188,17 +202,9 @@ void GlobalDCE::GlobalIsNeeded(GlobalValue *G) {
if (!AliveGlobals.insert(G).second)
return;
- Module *M = G->getParent();
if (Comdat *C = G->getComdat()) {
- for (Function &F : *M)
- if (F.getComdat() == C)
- GlobalIsNeeded(&F);
- for (GlobalVariable &GV : M->globals())
- if (GV.getComdat() == C)
- GlobalIsNeeded(&GV);
- for (GlobalAlias &GA : M->aliases())
- if (GA.getComdat() == C)
- GlobalIsNeeded(&GA);
+ for (auto &&CM : make_range(ComdatMembers.equal_range(C)))
+ GlobalIsNeeded(CM.second);
}
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(G)) {
diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index 6e0ae83..cc4a79f 100644
--- a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -22,6 +22,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constants.h"
@@ -38,7 +39,6 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/CtorUtils.h"
#include "llvm/Transforms/Utils/GlobalStatus.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
@@ -68,7 +68,7 @@ STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed");
namespace {
struct GlobalOpt : public ModulePass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<TargetLibraryInfo>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
}
static char ID; // Pass identification, replacement for typeid
GlobalOpt() : ModulePass(ID) {
@@ -86,7 +86,6 @@ namespace {
const GlobalStatus &GS);
bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn);
- const DataLayout *DL;
TargetLibraryInfo *TLI;
SmallSet<const Comdat *, 8> NotDiscardableComdats;
};
@@ -95,7 +94,7 @@ namespace {
char GlobalOpt::ID = 0;
INITIALIZE_PASS_BEGIN(GlobalOpt, "globalopt",
"Global Variable Optimizer", false, false)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(GlobalOpt, "globalopt",
"Global Variable Optimizer", false, false)
@@ -269,7 +268,7 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV,
/// quick scan over the use list to clean up the easy and obvious cruft. This
/// returns true if it made a change.
static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
- const DataLayout *DL,
+ const DataLayout &DL,
TargetLibraryInfo *TLI) {
bool Changed = false;
// Note that we need to use a weak value handle for the worklist items. When
@@ -318,8 +317,8 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
// and will invalidate our notion of what Init is.
Constant *SubInit = nullptr;
if (!isa<ConstantExpr>(GEP->getOperand(0))) {
- ConstantExpr *CE =
- dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP, DL, TLI));
+ ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(
+ ConstantFoldInstruction(GEP, DL, TLI));
if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr)
SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
@@ -565,6 +564,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
if (Val >= NewGlobals.size()) Val = 0; // Out of bound array access.
Value *NewPtr = NewGlobals[Val];
+ Type *NewTy = NewGlobals[Val]->getValueType();
// Form a shorter GEP if needed.
if (GEP->getNumOperands() > 3) {
@@ -573,15 +573,16 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
Idxs.push_back(NullInt);
for (unsigned i = 3, e = CE->getNumOperands(); i != e; ++i)
Idxs.push_back(CE->getOperand(i));
- NewPtr = ConstantExpr::getGetElementPtr(cast<Constant>(NewPtr), Idxs);
+ NewPtr =
+ ConstantExpr::getGetElementPtr(NewTy, cast<Constant>(NewPtr), Idxs);
} else {
GetElementPtrInst *GEPI = cast<GetElementPtrInst>(GEP);
SmallVector<Value*, 8> Idxs;
Idxs.push_back(NullInt);
for (unsigned i = 3, e = GEPI->getNumOperands(); i != e; ++i)
Idxs.push_back(GEPI->getOperand(i));
- NewPtr = GetElementPtrInst::Create(NewPtr, Idxs,
- GEPI->getName()+"."+Twine(Val),GEPI);
+ NewPtr = GetElementPtrInst::Create(
+ NewTy, NewPtr, Idxs, GEPI->getName() + "." + Twine(Val), GEPI);
}
}
GEP->replaceAllUsesWith(NewPtr);
@@ -721,8 +722,8 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
else
break;
if (Idxs.size() == GEPI->getNumOperands()-1)
- Changed |= OptimizeAwayTrappingUsesOfValue(GEPI,
- ConstantExpr::getGetElementPtr(NewV, Idxs));
+ Changed |= OptimizeAwayTrappingUsesOfValue(
+ GEPI, ConstantExpr::getGetElementPtr(nullptr, NewV, Idxs));
if (GEPI->use_empty()) {
Changed = true;
GEPI->eraseFromParent();
@@ -739,7 +740,7 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
/// if the loaded value is dynamically null, then we know that they cannot be
/// reachable with a null optimize away the load.
static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
- const DataLayout *DL,
+ const DataLayout &DL,
TargetLibraryInfo *TLI) {
bool Changed = false;
@@ -802,7 +803,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
/// ConstantPropUsersOf - Walk the use list of V, constant folding all of the
/// instructions that are foldable.
-static void ConstantPropUsersOf(Value *V, const DataLayout *DL,
+static void ConstantPropUsersOf(Value *V, const DataLayout &DL,
TargetLibraryInfo *TLI) {
for (Value::user_iterator UI = V->user_begin(), E = V->user_end(); UI != E; )
if (Instruction *I = dyn_cast<Instruction>(*UI++))
@@ -822,12 +823,10 @@ static void ConstantPropUsersOf(Value *V, const DataLayout *DL,
/// the specified malloc. Because it is always the result of the specified
/// malloc, there is no reason to actually DO the malloc. Instead, turn the
/// malloc into a global, and any loads of GV as uses of the new global.
-static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
- CallInst *CI,
- Type *AllocTy,
- ConstantInt *NElements,
- const DataLayout *DL,
- TargetLibraryInfo *TLI) {
+static GlobalVariable *
+OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
+ ConstantInt *NElements, const DataLayout &DL,
+ TargetLibraryInfo *TLI) {
DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI << '\n');
Type *GlobalType;
@@ -1167,7 +1166,8 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
InsertedScalarizedValues,
PHIsToRewrite),
LI->getName()+".f"+Twine(FieldNo), LI);
- } else if (PHINode *PN = dyn_cast<PHINode>(V)) {
+ } else {
+ PHINode *PN = cast<PHINode>(V);
// PN's type is pointer to struct. Make a new PHI of pointer to struct
// field.
@@ -1181,8 +1181,6 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
PN->getName()+".f"+Twine(FieldNo), PN);
Result = NewPN;
PHIsToRewrite.push_back(std::make_pair(PN, FieldNo));
- } else {
- llvm_unreachable("Unknown usable value");
}
return FieldVals[FieldNo] = Result;
@@ -1224,7 +1222,7 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser,
GEPIdx.push_back(GEPI->getOperand(1));
GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end());
- Value *NGEPI = GetElementPtrInst::Create(NewPtr, GEPIdx,
+ Value *NGEPI = GetElementPtrInst::Create(GEPI->getResultElementType(), NewPtr, GEPIdx,
GEPI->getName(), GEPI);
GEPI->replaceAllUsesWith(NGEPI);
GEPI->eraseFromParent();
@@ -1271,7 +1269,7 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
/// PerformHeapAllocSRoA - CI is an allocation of an array of structures. Break
/// it up into multiple allocations of arrays of the fields.
static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
- Value *NElems, const DataLayout *DL,
+ Value *NElems, const DataLayout &DL,
const TargetLibraryInfo *TLI) {
DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *CI << '\n');
Type *MAT = getMallocAllocatedType(CI, TLI);
@@ -1301,10 +1299,10 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
GV->getThreadLocalMode());
FieldGlobals.push_back(NGV);
- unsigned TypeSize = DL->getTypeAllocSize(FieldTy);
+ unsigned TypeSize = DL.getTypeAllocSize(FieldTy);
if (StructType *ST = dyn_cast<StructType>(FieldTy))
- TypeSize = DL->getStructLayout(ST)->getSizeInBytes();
- Type *IntPtrTy = DL->getIntPtrType(CI->getType());
+ TypeSize = DL.getStructLayout(ST)->getSizeInBytes();
+ Type *IntPtrTy = DL.getIntPtrType(CI->getType());
Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy,
ConstantInt::get(IntPtrTy, TypeSize),
NElems, nullptr,
@@ -1459,16 +1457,12 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
/// TryToOptimizeStoreOfMallocToGlobal - This function is called when we see a
/// pointer global variable with a single value stored it that is a malloc or
/// cast of malloc.
-static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
- CallInst *CI,
+static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI,
Type *AllocTy,
AtomicOrdering Ordering,
Module::global_iterator &GVI,
- const DataLayout *DL,
+ const DataLayout &DL,
TargetLibraryInfo *TLI) {
- if (!DL)
- return false;
-
// If this is a malloc of an abstract type, don't touch it.
if (!AllocTy->isSized())
return false;
@@ -1504,7 +1498,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
// Restrict this transformation to only working on small allocations
// (2048 bytes currently), as we don't want to introduce a 16M global or
// something.
- if (NElements->getZExtValue() * DL->getTypeAllocSize(AllocTy) < 2048) {
+ if (NElements->getZExtValue() * DL.getTypeAllocSize(AllocTy) < 2048) {
GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, DL, TLI);
return true;
}
@@ -1534,8 +1528,8 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
// If this is a fixed size array, transform the Malloc to be an alloc of
// structs. malloc [100 x struct],1 -> malloc struct, 100
if (ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI, TLI))) {
- Type *IntPtrTy = DL->getIntPtrType(CI->getType());
- unsigned TypeSize = DL->getStructLayout(AllocSTy)->getSizeInBytes();
+ Type *IntPtrTy = DL.getIntPtrType(CI->getType());
+ unsigned TypeSize = DL.getStructLayout(AllocSTy)->getSizeInBytes();
Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize);
Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements());
Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy,
@@ -1563,7 +1557,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
AtomicOrdering Ordering,
Module::global_iterator &GVI,
- const DataLayout *DL,
+ const DataLayout &DL,
TargetLibraryInfo *TLI) {
// Ignore no-op GEPs and bitcasts.
StoredOnceVal = StoredOnceVal->stripPointerCasts();
@@ -1733,6 +1727,7 @@ bool GlobalOpt::ProcessGlobal(GlobalVariable *GV,
bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
Module::global_iterator &GVI,
const GlobalStatus &GS) {
+ auto &DL = GV->getParent()->getDataLayout();
// If this is a first class global and has only one accessing function
// and this function is main (which we know is not recursive), we replace
// the global with a local alloca in this function.
@@ -1804,12 +1799,10 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
++NumMarked;
return true;
} else if (!GV->getInitializer()->getType()->isSingleValueType()) {
- if (DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>()) {
- const DataLayout &DL = DLP->getDataLayout();
- if (GlobalVariable *FirstNewGV = SRAGlobal(GV, DL)) {
- GVI = FirstNewGV; // Don't skip the newly produced globals!
- return true;
- }
+ const DataLayout &DL = GV->getParent()->getDataLayout();
+ if (GlobalVariable *FirstNewGV = SRAGlobal(GV, DL)) {
+ GVI = FirstNewGV; // Don't skip the newly produced globals!
+ return true;
}
} else if (GS.StoredType == GlobalStatus::StoredOnce) {
// If the initial value for the global was an undef value, and if only
@@ -1954,6 +1947,7 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) {
// Simplify the initializer.
if (GV->hasInitializer())
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GV->getInitializer())) {
+ auto &DL = M.getDataLayout();
Constant *New = ConstantFoldConstantExpression(CE, DL, TLI);
if (New && New != CE)
GV->setInitializer(New);
@@ -1971,9 +1965,8 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) {
static inline bool
isSimpleEnoughValueToCommit(Constant *C,
- SmallPtrSetImpl<Constant*> &SimpleConstants,
- const DataLayout *DL);
-
+ SmallPtrSetImpl<Constant *> &SimpleConstants,
+ const DataLayout &DL);
/// isSimpleEnoughValueToCommit - Return true if the specified constant can be
/// handled by the code generator. We don't want to generate something like:
@@ -1983,9 +1976,10 @@ isSimpleEnoughValueToCommit(Constant *C,
/// This function should be called if C was not found (but just got inserted)
/// in SimpleConstants to avoid having to rescan the same constants all the
/// time.
-static bool isSimpleEnoughValueToCommitHelper(Constant *C,
- SmallPtrSetImpl<Constant*> &SimpleConstants,
- const DataLayout *DL) {
+static bool
+isSimpleEnoughValueToCommitHelper(Constant *C,
+ SmallPtrSetImpl<Constant *> &SimpleConstants,
+ const DataLayout &DL) {
// Simple global addresses are supported, do not allow dllimport or
// thread-local globals.
if (auto *GV = dyn_cast<GlobalValue>(C))
@@ -2019,8 +2013,8 @@ static bool isSimpleEnoughValueToCommitHelper(Constant *C,
case Instruction::PtrToInt:
// int <=> ptr is fine if the int type is the same size as the
// pointer type.
- if (!DL || DL->getTypeSizeInBits(CE->getType()) !=
- DL->getTypeSizeInBits(CE->getOperand(0)->getType()))
+ if (DL.getTypeSizeInBits(CE->getType()) !=
+ DL.getTypeSizeInBits(CE->getOperand(0)->getType()))
return false;
return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL);
@@ -2042,8 +2036,8 @@ static bool isSimpleEnoughValueToCommitHelper(Constant *C,
static inline bool
isSimpleEnoughValueToCommit(Constant *C,
- SmallPtrSetImpl<Constant*> &SimpleConstants,
- const DataLayout *DL) {
+ SmallPtrSetImpl<Constant *> &SimpleConstants,
+ const DataLayout &DL) {
// If we already checked this constant, we win.
if (!SimpleConstants.insert(C).second)
return true;
@@ -2174,8 +2168,8 @@ namespace {
/// Once an evaluation call fails, the evaluation object should not be reused.
class Evaluator {
public:
- Evaluator(const DataLayout *DL, const TargetLibraryInfo *TLI)
- : DL(DL), TLI(TLI) {
+ Evaluator(const DataLayout &DL, const TargetLibraryInfo *TLI)
+ : DL(DL), TLI(TLI) {
ValueStack.emplace_back();
}
@@ -2249,7 +2243,7 @@ private:
/// simple enough to live in a static initializer of a global.
SmallPtrSet<Constant*, 8> SimpleConstants;
- const DataLayout *DL;
+ const DataLayout &DL;
const TargetLibraryInfo *TLI;
};
@@ -2345,7 +2339,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
Constant *IdxZero = ConstantInt::get(IdxTy, 0, false);
Constant * const IdxList[] = {IdxZero, IdxZero};
- Ptr = ConstantExpr::getGetElementPtr(Ptr, IdxList);
+ Ptr = ConstantExpr::getGetElementPtr(nullptr, Ptr, IdxList);
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
Ptr = ConstantFoldConstantExpression(CE, DL, TLI);
@@ -2409,8 +2403,8 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
i != e; ++i)
GEPOps.push_back(getVal(*i));
InstResult =
- ConstantExpr::getGetElementPtr(P, GEPOps,
- cast<GEPOperator>(GEP)->isInBounds());
+ ConstantExpr::getGetElementPtr(GEP->getSourceElementType(), P, GEPOps,
+ cast<GEPOperator>(GEP)->isInBounds());
DEBUG(dbgs() << "Found a GEP! Simplifying: " << *InstResult
<< "\n");
} else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) {
@@ -2498,9 +2492,9 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
Value *Ptr = PtrArg->stripPointerCasts();
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {
Type *ElemTy = cast<PointerType>(GV->getType())->getElementType();
- if (DL && !Size->isAllOnesValue() &&
+ if (!Size->isAllOnesValue() &&
Size->getValue().getLimitedValue() >=
- DL->getTypeStoreSize(ElemTy)) {
+ DL.getTypeStoreSize(ElemTy)) {
Invariants.insert(GV);
DEBUG(dbgs() << "Found a global var that is an invariant: " << *GV
<< "\n");
@@ -2689,7 +2683,7 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal,
/// EvaluateStaticConstructor - Evaluate static constructors in the function, if
/// we can. Return true if we can, false otherwise.
-static bool EvaluateStaticConstructor(Function *F, const DataLayout *DL,
+static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL,
const TargetLibraryInfo *TLI) {
// Call the function.
Evaluator Eval(DL, TLI);
@@ -3040,9 +3034,8 @@ bool GlobalOpt::OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) {
bool GlobalOpt::runOnModule(Module &M) {
bool Changed = false;
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
- TLI = &getAnalysis<TargetLibraryInfo>();
+ auto &DL = M.getDataLayout();
+ TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
bool LocalChange = true;
while (LocalChange) {
diff --git a/contrib/llvm/lib/Transforms/IPO/IPO.cpp b/contrib/llvm/lib/Transforms/IPO/IPO.cpp
index b4d31d8..fcacec328 100644
--- a/contrib/llvm/lib/Transforms/IPO/IPO.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/IPO.cpp
@@ -16,7 +16,7 @@
#include "llvm-c/Initialization.h"
#include "llvm-c/Transforms/IPO.h"
#include "llvm/InitializePasses.h"
-#include "llvm/PassManager.h"
+#include "llvm/IR/LegacyPassManager.h"
#include "llvm/Transforms/IPO.h"
using namespace llvm;
@@ -36,6 +36,7 @@ void llvm::initializeIPO(PassRegistry &Registry) {
initializeLoopExtractorPass(Registry);
initializeBlockExtractorPassPass(Registry);
initializeSingleLoopExtractorPass(Registry);
+ initializeLowerBitSetsPass(Registry);
initializeMergeFunctionsPass(Registry);
initializePartialInlinerPass(Registry);
initializePruneEHPass(Registry);
diff --git a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
index 6686743..8f65a98 100644
--- a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -20,6 +20,7 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DiagnosticInfo.h"
@@ -29,7 +30,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -72,8 +72,8 @@ Inliner::Inliner(char &ID, int Threshold, bool InsertLifetime)
InlineLimit : Threshold),
InsertLifetime(InsertLifetime) {}
-/// getAnalysisUsage - For this class, we declare that we require and preserve
-/// the call graph. If the derived class implements this method, it should
+/// For this class, we declare that we require and preserve the call graph.
+/// If the derived class implements this method, it should
/// always explicitly call the implementation here.
void Inliner::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<AliasAnalysis>();
@@ -97,40 +97,31 @@ static void AdjustCallerSSPLevel(Function *Caller, Function *Callee) {
AttributeSet OldSSPAttr = AttributeSet::get(Caller->getContext(),
AttributeSet::FunctionIndex,
B);
- AttributeSet CallerAttr = Caller->getAttributes(),
- CalleeAttr = Callee->getAttributes();
- if (CalleeAttr.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::StackProtectReq)) {
+ if (Callee->hasFnAttribute(Attribute::StackProtectReq)) {
Caller->removeAttributes(AttributeSet::FunctionIndex, OldSSPAttr);
Caller->addFnAttr(Attribute::StackProtectReq);
- } else if (CalleeAttr.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::StackProtectStrong) &&
- !CallerAttr.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::StackProtectReq)) {
+ } else if (Callee->hasFnAttribute(Attribute::StackProtectStrong) &&
+ !Caller->hasFnAttribute(Attribute::StackProtectReq)) {
Caller->removeAttributes(AttributeSet::FunctionIndex, OldSSPAttr);
Caller->addFnAttr(Attribute::StackProtectStrong);
- } else if (CalleeAttr.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::StackProtect) &&
- !CallerAttr.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::StackProtectReq) &&
- !CallerAttr.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::StackProtectStrong))
+ } else if (Callee->hasFnAttribute(Attribute::StackProtect) &&
+ !Caller->hasFnAttribute(Attribute::StackProtectReq) &&
+ !Caller->hasFnAttribute(Attribute::StackProtectStrong))
Caller->addFnAttr(Attribute::StackProtect);
}
-/// InlineCallIfPossible - If it is possible to inline the specified call site,
+/// If it is possible to inline the specified call site,
/// do so and update the CallGraph for this operation.
///
/// This function also does some basic book-keeping to update the IR. The
/// InlinedArrayAllocas map keeps track of any allocas that are already
-/// available from other functions inlined into the caller. If we are able to
+/// available from other functions inlined into the caller. If we are able to
/// inline this call site we attempt to reuse already available allocas or add
/// any new allocas to the set if not possible.
static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
InlinedArrayAllocasTy &InlinedArrayAllocas,
- int InlineHistory, bool InsertLifetime,
- const DataLayout *DL) {
+ int InlineHistory, bool InsertLifetime) {
Function *Callee = CS.getCalledFunction();
Function *Caller = CS.getCaller();
@@ -206,11 +197,6 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
unsigned Align1 = AI->getAlignment(),
Align2 = AvailableAlloca->getAlignment();
- // If we don't have data layout information, and only one alloca is using
- // the target default, then we can't safely merge them because we can't
- // pick the greater alignment.
- if (!DL && (!Align1 || !Align2) && Align1 != Align2)
- continue;
// The available alloca has to be in the right function, not in some other
// function in this SCC.
@@ -231,8 +217,8 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
if (Align1 != Align2) {
if (!Align1 || !Align2) {
- assert(DL && "DataLayout required to compare default alignments");
- unsigned TypeAlign = DL->getABITypeAlignment(AI->getAllocatedType());
+ const DataLayout &DL = Caller->getParent()->getDataLayout();
+ unsigned TypeAlign = DL.getABITypeAlignment(AI->getAllocatedType());
Align1 = Align1 ? Align1 : TypeAlign;
Align2 = Align2 ? Align2 : TypeAlign;
@@ -273,8 +259,7 @@ unsigned Inliner::getInlineThreshold(CallSite CS) const {
// would decrease the threshold.
Function *Caller = CS.getCaller();
bool OptSize = Caller && !Caller->isDeclaration() &&
- Caller->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::OptimizeForSize);
+ Caller->hasFnAttribute(Attribute::OptimizeForSize);
if (!(InlineLimit.getNumOccurrences() > 0) && OptSize &&
OptSizeThreshold < thres)
thres = OptSizeThreshold;
@@ -283,17 +268,14 @@ unsigned Inliner::getInlineThreshold(CallSite CS) const {
// and the caller does not need to minimize its size.
Function *Callee = CS.getCalledFunction();
bool InlineHint = Callee && !Callee->isDeclaration() &&
- Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::InlineHint);
- if (InlineHint && HintThreshold > thres
- && !Caller->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::MinSize))
+ Callee->hasFnAttribute(Attribute::InlineHint);
+ if (InlineHint && HintThreshold > thres &&
+ !Caller->hasFnAttribute(Attribute::MinSize))
thres = HintThreshold;
// Listen to the cold attribute when it would decrease the threshold.
bool ColdCallee = Callee && !Callee->isDeclaration() &&
- Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::Cold);
+ Callee->hasFnAttribute(Attribute::Cold);
// Command line argument for InlineLimit will override the default
// ColdThreshold. If we have -inline-threshold but no -inlinecold-threshold,
// do not use the default cold threshold even if it is smaller.
@@ -312,8 +294,7 @@ static void emitAnalysis(CallSite CS, const Twine &Msg) {
emitOptimizationRemarkAnalysis(Ctx, DEBUG_TYPE, *Caller, DLoc, Msg);
}
-/// shouldInline - Return true if the inliner should attempt to inline
-/// at the given CallSite.
+/// Return true if the inliner should attempt to inline at the given CallSite.
bool Inliner::shouldInline(CallSite CS) {
InlineCost IC = getInlineCost(CS);
@@ -427,7 +408,7 @@ bool Inliner::shouldInline(CallSite CS) {
return true;
}
-/// InlineHistoryIncludes - Return true if the specified inline history ID
+/// Return true if the specified inline history ID
/// indicates an inline history that includes the specified function.
static bool InlineHistoryIncludes(Function *F, int InlineHistoryID,
const SmallVectorImpl<std::pair<Function*, int> > &InlineHistory) {
@@ -444,9 +425,8 @@ static bool InlineHistoryIncludes(Function *F, int InlineHistoryID,
bool Inliner::runOnSCC(CallGraphSCC &SCC) {
CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
AssumptionCacheTracker *ACT = &getAnalysis<AssumptionCacheTracker>();
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
- const TargetLibraryInfo *TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
+ auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
+ const TargetLibraryInfo *TLI = TLIP ? &TLIP->getTLI() : nullptr;
AliasAnalysis *AA = &getAnalysis<AliasAnalysis>();
SmallPtrSet<Function*, 8> SCCFunctions;
@@ -506,7 +486,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
InlinedArrayAllocasTy InlinedArrayAllocas;
- InlineFunctionInfo InlineInfo(&CG, DL, AA, ACT);
+ InlineFunctionInfo InlineInfo(&CG, AA, ACT);
// Now that we have all of the call sites, loop over them and inline them if
// it looks profitable to do so.
@@ -564,7 +544,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
// Attempt to inline the function.
if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas,
- InlineHistoryID, InsertLifetime, DL)) {
+ InlineHistoryID, InsertLifetime)) {
emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc,
Twine(Callee->getName() +
" will not be inlined into " +
@@ -636,16 +616,30 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
return Changed;
}
-// doFinalization - Remove now-dead linkonce functions at the end of
-// processing to avoid breaking the SCC traversal.
+/// Remove now-dead linkonce functions at the end of
+/// processing to avoid breaking the SCC traversal.
bool Inliner::doFinalization(CallGraph &CG) {
return removeDeadFunctions(CG);
}
-/// removeDeadFunctions - Remove dead functions that are not included in
-/// DNR (Do Not Remove) list.
+/// Remove dead functions that are not included in DNR (Do Not Remove) list.
bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) {
SmallVector<CallGraphNode*, 16> FunctionsToRemove;
+ SmallVector<CallGraphNode *, 16> DeadFunctionsInComdats;
+ SmallDenseMap<const Comdat *, int, 16> ComdatEntriesAlive;
+
+ auto RemoveCGN = [&](CallGraphNode *CGN) {
+ // Remove any call graph edges from the function to its callees.
+ CGN->removeAllCalledFunctions();
+
+ // Remove any edges from the external node to the function's call graph
+ // node. These edges might have been made irrelegant due to
+ // optimization of the program.
+ CG.getExternalCallingNode()->removeAnyCallEdgeTo(CGN);
+
+ // Removing the node for callee from the call graph and delete it.
+ FunctionsToRemove.push_back(CGN);
+ };
// Scan for all of the functions, looking for ones that should now be removed
// from the program. Insert the dead ones in the FunctionsToRemove set.
@@ -658,9 +652,7 @@ bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) {
// Handle the case when this function is called and we only want to care
// about always-inline functions. This is a bit of a hack to share code
// between here and the InlineAlways pass.
- if (AlwaysInlineOnly &&
- !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::AlwaysInline))
+ if (AlwaysInlineOnly && !F->hasFnAttribute(Attribute::AlwaysInline))
continue;
// If the only remaining users of the function are dead constants, remove
@@ -674,20 +666,45 @@ bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) {
// without also dropping the other members of the COMDAT.
// The inliner doesn't visit non-function entities which are in COMDAT
// groups so it is unsafe to do so *unless* the linkage is local.
- if (!F->hasLocalLinkage() && F->hasComdat())
- continue;
-
- // Remove any call graph edges from the function to its callees.
- CGN->removeAllCalledFunctions();
+ if (!F->hasLocalLinkage()) {
+ if (const Comdat *C = F->getComdat()) {
+ --ComdatEntriesAlive[C];
+ DeadFunctionsInComdats.push_back(CGN);
+ continue;
+ }
+ }
- // Remove any edges from the external node to the function's call graph
- // node. These edges might have been made irrelegant due to
- // optimization of the program.
- CG.getExternalCallingNode()->removeAnyCallEdgeTo(CGN);
+ RemoveCGN(CGN);
+ }
+ if (!DeadFunctionsInComdats.empty()) {
+ // Count up all the entities in COMDAT groups
+ auto ComdatGroupReferenced = [&](const Comdat *C) {
+ auto I = ComdatEntriesAlive.find(C);
+ if (I != ComdatEntriesAlive.end())
+ ++(I->getSecond());
+ };
+ for (const Function &F : CG.getModule())
+ if (const Comdat *C = F.getComdat())
+ ComdatGroupReferenced(C);
+ for (const GlobalVariable &GV : CG.getModule().globals())
+ if (const Comdat *C = GV.getComdat())
+ ComdatGroupReferenced(C);
+ for (const GlobalAlias &GA : CG.getModule().aliases())
+ if (const Comdat *C = GA.getComdat())
+ ComdatGroupReferenced(C);
+ for (CallGraphNode *CGN : DeadFunctionsInComdats) {
+ Function *F = CGN->getFunction();
+ const Comdat *C = F->getComdat();
+ int NumAlive = ComdatEntriesAlive[C];
+ // We can remove functions in a COMDAT group if the entire group is dead.
+ assert(NumAlive >= 0);
+ if (NumAlive > 0)
+ continue;
- // Removing the node for callee from the call graph and delete it.
- FunctionsToRemove.push_back(CGN);
+ RemoveCGN(CGN);
+ }
}
+
if (FunctionsToRemove.empty())
return false;
diff --git a/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp
index 20414aa..41334ca 100644
--- a/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp
@@ -242,7 +242,7 @@ void BlockExtractorPass::SplitLandingPadPreds(Function *F) {
if (!Split) continue;
SmallVector<BasicBlock*, 2> NewBBs;
- SplitLandingPadPredecessors(LPad, Parent, ".1", ".2", nullptr, NewBBs);
+ SplitLandingPadPredecessors(LPad, Parent, ".1", ".2", NewBBs);
}
}
diff --git a/contrib/llvm/lib/Transforms/IPO/LowerBitSets.cpp b/contrib/llvm/lib/Transforms/IPO/LowerBitSets.cpp
new file mode 100644
index 0000000..bffeebb
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/LowerBitSets.cpp
@@ -0,0 +1,732 @@
+//===-- LowerBitSets.cpp - Bitset lowering pass ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers bitset metadata and calls to the llvm.bitset.test intrinsic.
+// See http://llvm.org/docs/LangRef.html#bitsets for more information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/LowerBitSets.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "lowerbitsets"
+
+STATISTIC(ByteArraySizeBits, "Byte array size in bits");
+STATISTIC(ByteArraySizeBytes, "Byte array size in bytes");
+STATISTIC(NumByteArraysCreated, "Number of byte arrays created");
+STATISTIC(NumBitSetCallsLowered, "Number of bitset calls lowered");
+STATISTIC(NumBitSetDisjointSets, "Number of disjoint sets of bitsets");
+
+static cl::opt<bool> AvoidReuse(
+ "lowerbitsets-avoid-reuse",
+ cl::desc("Try to avoid reuse of byte array addresses using aliases"),
+ cl::Hidden, cl::init(true));
+
+bool BitSetInfo::containsGlobalOffset(uint64_t Offset) const {
+ if (Offset < ByteOffset)
+ return false;
+
+ if ((Offset - ByteOffset) % (uint64_t(1) << AlignLog2) != 0)
+ return false;
+
+ uint64_t BitOffset = (Offset - ByteOffset) >> AlignLog2;
+ if (BitOffset >= BitSize)
+ return false;
+
+ return Bits.count(BitOffset);
+}
+
+bool BitSetInfo::containsValue(
+ const DataLayout &DL,
+ const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout, Value *V,
+ uint64_t COffset) const {
+ if (auto GV = dyn_cast<GlobalVariable>(V)) {
+ auto I = GlobalLayout.find(GV);
+ if (I == GlobalLayout.end())
+ return false;
+ return containsGlobalOffset(I->second + COffset);
+ }
+
+ if (auto GEP = dyn_cast<GEPOperator>(V)) {
+ APInt APOffset(DL.getPointerSizeInBits(0), 0);
+ bool Result = GEP->accumulateConstantOffset(DL, APOffset);
+ if (!Result)
+ return false;
+ COffset += APOffset.getZExtValue();
+ return containsValue(DL, GlobalLayout, GEP->getPointerOperand(),
+ COffset);
+ }
+
+ if (auto Op = dyn_cast<Operator>(V)) {
+ if (Op->getOpcode() == Instruction::BitCast)
+ return containsValue(DL, GlobalLayout, Op->getOperand(0), COffset);
+
+ if (Op->getOpcode() == Instruction::Select)
+ return containsValue(DL, GlobalLayout, Op->getOperand(1), COffset) &&
+ containsValue(DL, GlobalLayout, Op->getOperand(2), COffset);
+ }
+
+ return false;
+}
+
+BitSetInfo BitSetBuilder::build() {
+ if (Min > Max)
+ Min = 0;
+
+ // Normalize each offset against the minimum observed offset, and compute
+ // the bitwise OR of each of the offsets. The number of trailing zeros
+ // in the mask gives us the log2 of the alignment of all offsets, which
+ // allows us to compress the bitset by only storing one bit per aligned
+ // address.
+ uint64_t Mask = 0;
+ for (uint64_t &Offset : Offsets) {
+ Offset -= Min;
+ Mask |= Offset;
+ }
+
+ BitSetInfo BSI;
+ BSI.ByteOffset = Min;
+
+ BSI.AlignLog2 = 0;
+ if (Mask != 0)
+ BSI.AlignLog2 = countTrailingZeros(Mask, ZB_Undefined);
+
+ // Build the compressed bitset while normalizing the offsets against the
+ // computed alignment.
+ BSI.BitSize = ((Max - Min) >> BSI.AlignLog2) + 1;
+ for (uint64_t Offset : Offsets) {
+ Offset >>= BSI.AlignLog2;
+ BSI.Bits.insert(Offset);
+ }
+
+ return BSI;
+}
+
+void GlobalLayoutBuilder::addFragment(const std::set<uint64_t> &F) {
+ // Create a new fragment to hold the layout for F.
+ Fragments.emplace_back();
+ std::vector<uint64_t> &Fragment = Fragments.back();
+ uint64_t FragmentIndex = Fragments.size() - 1;
+
+ for (auto ObjIndex : F) {
+ uint64_t OldFragmentIndex = FragmentMap[ObjIndex];
+ if (OldFragmentIndex == 0) {
+ // We haven't seen this object index before, so just add it to the current
+ // fragment.
+ Fragment.push_back(ObjIndex);
+ } else {
+ // This index belongs to an existing fragment. Copy the elements of the
+ // old fragment into this one and clear the old fragment. We don't update
+ // the fragment map just yet, this ensures that any further references to
+ // indices from the old fragment in this fragment do not insert any more
+ // indices.
+ std::vector<uint64_t> &OldFragment = Fragments[OldFragmentIndex];
+ Fragment.insert(Fragment.end(), OldFragment.begin(), OldFragment.end());
+ OldFragment.clear();
+ }
+ }
+
+ // Update the fragment map to point our object indices to this fragment.
+ for (uint64_t ObjIndex : Fragment)
+ FragmentMap[ObjIndex] = FragmentIndex;
+}
+
+void ByteArrayBuilder::allocate(const std::set<uint64_t> &Bits,
+ uint64_t BitSize, uint64_t &AllocByteOffset,
+ uint8_t &AllocMask) {
+ // Find the smallest current allocation.
+ unsigned Bit = 0;
+ for (unsigned I = 1; I != BitsPerByte; ++I)
+ if (BitAllocs[I] < BitAllocs[Bit])
+ Bit = I;
+
+ AllocByteOffset = BitAllocs[Bit];
+
+ // Add our size to it.
+ unsigned ReqSize = AllocByteOffset + BitSize;
+ BitAllocs[Bit] = ReqSize;
+ if (Bytes.size() < ReqSize)
+ Bytes.resize(ReqSize);
+
+ // Set our bits.
+ AllocMask = 1 << Bit;
+ for (uint64_t B : Bits)
+ Bytes[AllocByteOffset + B] |= AllocMask;
+}
+
+namespace {
+
+struct ByteArrayInfo {
+ std::set<uint64_t> Bits;
+ uint64_t BitSize;
+ GlobalVariable *ByteArray;
+ Constant *Mask;
+};
+
+struct LowerBitSets : public ModulePass {
+ static char ID;
+ LowerBitSets() : ModulePass(ID) {
+ initializeLowerBitSetsPass(*PassRegistry::getPassRegistry());
+ }
+
+ Module *M;
+
+ bool LinkerSubsectionsViaSymbols;
+ IntegerType *Int1Ty;
+ IntegerType *Int8Ty;
+ IntegerType *Int32Ty;
+ Type *Int32PtrTy;
+ IntegerType *Int64Ty;
+ Type *IntPtrTy;
+
+ // The llvm.bitsets named metadata.
+ NamedMDNode *BitSetNM;
+
+ // Mapping from bitset mdstrings to the call sites that test them.
+ DenseMap<MDString *, std::vector<CallInst *>> BitSetTestCallSites;
+
+ std::vector<ByteArrayInfo> ByteArrayInfos;
+
+ BitSetInfo
+ buildBitSet(MDString *BitSet,
+ const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout);
+ ByteArrayInfo *createByteArray(BitSetInfo &BSI);
+ void allocateByteArrays();
+ Value *createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI, ByteArrayInfo *&BAI,
+ Value *BitOffset);
+ Value *
+ lowerBitSetCall(CallInst *CI, BitSetInfo &BSI, ByteArrayInfo *&BAI,
+ GlobalVariable *CombinedGlobal,
+ const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout);
+ void buildBitSetsFromGlobals(const std::vector<MDString *> &BitSets,
+ const std::vector<GlobalVariable *> &Globals);
+ bool buildBitSets();
+ bool eraseBitSetMetadata();
+
+ bool doInitialization(Module &M) override;
+ bool runOnModule(Module &M) override;
+};
+
+} // namespace
+
+INITIALIZE_PASS_BEGIN(LowerBitSets, "lowerbitsets",
+ "Lower bitset metadata", false, false)
+INITIALIZE_PASS_END(LowerBitSets, "lowerbitsets",
+ "Lower bitset metadata", false, false)
+char LowerBitSets::ID = 0;
+
+ModulePass *llvm::createLowerBitSetsPass() { return new LowerBitSets; }
+
+bool LowerBitSets::doInitialization(Module &Mod) {
+ M = &Mod;
+ const DataLayout &DL = Mod.getDataLayout();
+
+ Triple TargetTriple(M->getTargetTriple());
+ LinkerSubsectionsViaSymbols = TargetTriple.isMacOSX();
+
+ Int1Ty = Type::getInt1Ty(M->getContext());
+ Int8Ty = Type::getInt8Ty(M->getContext());
+ Int32Ty = Type::getInt32Ty(M->getContext());
+ Int32PtrTy = PointerType::getUnqual(Int32Ty);
+ Int64Ty = Type::getInt64Ty(M->getContext());
+ IntPtrTy = DL.getIntPtrType(M->getContext(), 0);
+
+ BitSetNM = M->getNamedMetadata("llvm.bitsets");
+
+ BitSetTestCallSites.clear();
+
+ return false;
+}
+
+/// Build a bit set for BitSet using the object layouts in
+/// GlobalLayout.
+BitSetInfo LowerBitSets::buildBitSet(
+ MDString *BitSet,
+ const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout) {
+ BitSetBuilder BSB;
+
+ // Compute the byte offset of each element of this bitset.
+ if (BitSetNM) {
+ for (MDNode *Op : BitSetNM->operands()) {
+ if (Op->getOperand(0) != BitSet || !Op->getOperand(1))
+ continue;
+ auto OpGlobal = cast<GlobalVariable>(
+ cast<ConstantAsMetadata>(Op->getOperand(1))->getValue());
+ uint64_t Offset =
+ cast<ConstantInt>(cast<ConstantAsMetadata>(Op->getOperand(2))
+ ->getValue())->getZExtValue();
+
+ Offset += GlobalLayout.find(OpGlobal)->second;
+
+ BSB.addOffset(Offset);
+ }
+ }
+
+ return BSB.build();
+}
+
+/// Build a test that bit BitOffset mod sizeof(Bits)*8 is set in
+/// Bits. This pattern matches to the bt instruction on x86.
+static Value *createMaskedBitTest(IRBuilder<> &B, Value *Bits,
+ Value *BitOffset) {
+ auto BitsType = cast<IntegerType>(Bits->getType());
+ unsigned BitWidth = BitsType->getBitWidth();
+
+ BitOffset = B.CreateZExtOrTrunc(BitOffset, BitsType);
+ Value *BitIndex =
+ B.CreateAnd(BitOffset, ConstantInt::get(BitsType, BitWidth - 1));
+ Value *BitMask = B.CreateShl(ConstantInt::get(BitsType, 1), BitIndex);
+ Value *MaskedBits = B.CreateAnd(Bits, BitMask);
+ return B.CreateICmpNE(MaskedBits, ConstantInt::get(BitsType, 0));
+}
+
+ByteArrayInfo *LowerBitSets::createByteArray(BitSetInfo &BSI) {
+ // Create globals to stand in for byte arrays and masks. These never actually
+ // get initialized, we RAUW and erase them later in allocateByteArrays() once
+ // we know the offset and mask to use.
+ auto ByteArrayGlobal = new GlobalVariable(
+ *M, Int8Ty, /*isConstant=*/true, GlobalValue::PrivateLinkage, nullptr);
+ auto MaskGlobal = new GlobalVariable(
+ *M, Int8Ty, /*isConstant=*/true, GlobalValue::PrivateLinkage, nullptr);
+
+ ByteArrayInfos.emplace_back();
+ ByteArrayInfo *BAI = &ByteArrayInfos.back();
+
+ BAI->Bits = BSI.Bits;
+ BAI->BitSize = BSI.BitSize;
+ BAI->ByteArray = ByteArrayGlobal;
+ BAI->Mask = ConstantExpr::getPtrToInt(MaskGlobal, Int8Ty);
+ return BAI;
+}
+
+void LowerBitSets::allocateByteArrays() {
+ std::stable_sort(ByteArrayInfos.begin(), ByteArrayInfos.end(),
+ [](const ByteArrayInfo &BAI1, const ByteArrayInfo &BAI2) {
+ return BAI1.BitSize > BAI2.BitSize;
+ });
+
+ std::vector<uint64_t> ByteArrayOffsets(ByteArrayInfos.size());
+
+ ByteArrayBuilder BAB;
+ for (unsigned I = 0; I != ByteArrayInfos.size(); ++I) {
+ ByteArrayInfo *BAI = &ByteArrayInfos[I];
+
+ uint8_t Mask;
+ BAB.allocate(BAI->Bits, BAI->BitSize, ByteArrayOffsets[I], Mask);
+
+ BAI->Mask->replaceAllUsesWith(ConstantInt::get(Int8Ty, Mask));
+ cast<GlobalVariable>(BAI->Mask->getOperand(0))->eraseFromParent();
+ }
+
+ Constant *ByteArrayConst = ConstantDataArray::get(M->getContext(), BAB.Bytes);
+ auto ByteArray =
+ new GlobalVariable(*M, ByteArrayConst->getType(), /*isConstant=*/true,
+ GlobalValue::PrivateLinkage, ByteArrayConst);
+
+ for (unsigned I = 0; I != ByteArrayInfos.size(); ++I) {
+ ByteArrayInfo *BAI = &ByteArrayInfos[I];
+
+ Constant *Idxs[] = {ConstantInt::get(IntPtrTy, 0),
+ ConstantInt::get(IntPtrTy, ByteArrayOffsets[I])};
+ Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(
+ ByteArrayConst->getType(), ByteArray, Idxs);
+
+ // Create an alias instead of RAUW'ing the gep directly. On x86 this ensures
+ // that the pc-relative displacement is folded into the lea instead of the
+ // test instruction getting another displacement.
+ if (LinkerSubsectionsViaSymbols) {
+ BAI->ByteArray->replaceAllUsesWith(GEP);
+ } else {
+ GlobalAlias *Alias =
+ GlobalAlias::create(PointerType::getUnqual(Int8Ty),
+ GlobalValue::PrivateLinkage, "bits", GEP, M);
+ BAI->ByteArray->replaceAllUsesWith(Alias);
+ }
+ BAI->ByteArray->eraseFromParent();
+ }
+
+ ByteArraySizeBits = BAB.BitAllocs[0] + BAB.BitAllocs[1] + BAB.BitAllocs[2] +
+ BAB.BitAllocs[3] + BAB.BitAllocs[4] + BAB.BitAllocs[5] +
+ BAB.BitAllocs[6] + BAB.BitAllocs[7];
+ ByteArraySizeBytes = BAB.Bytes.size();
+}
+
+/// Build a test that bit BitOffset is set in BSI, where
+/// BitSetGlobal is a global containing the bits in BSI.
+Value *LowerBitSets::createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI,
+ ByteArrayInfo *&BAI, Value *BitOffset) {
+ if (BSI.BitSize <= 64) {
+ // If the bit set is sufficiently small, we can avoid a load by bit testing
+ // a constant.
+ IntegerType *BitsTy;
+ if (BSI.BitSize <= 32)
+ BitsTy = Int32Ty;
+ else
+ BitsTy = Int64Ty;
+
+ uint64_t Bits = 0;
+ for (auto Bit : BSI.Bits)
+ Bits |= uint64_t(1) << Bit;
+ Constant *BitsConst = ConstantInt::get(BitsTy, Bits);
+ return createMaskedBitTest(B, BitsConst, BitOffset);
+ } else {
+ if (!BAI) {
+ ++NumByteArraysCreated;
+ BAI = createByteArray(BSI);
+ }
+
+ Constant *ByteArray = BAI->ByteArray;
+ Type *Ty = BAI->ByteArray->getValueType();
+ if (!LinkerSubsectionsViaSymbols && AvoidReuse) {
+ // Each use of the byte array uses a different alias. This makes the
+ // backend less likely to reuse previously computed byte array addresses,
+ // improving the security of the CFI mechanism based on this pass.
+ ByteArray = GlobalAlias::create(BAI->ByteArray->getType(),
+ GlobalValue::PrivateLinkage, "bits_use",
+ ByteArray, M);
+ }
+
+ Value *ByteAddr = B.CreateGEP(Ty, ByteArray, BitOffset);
+ Value *Byte = B.CreateLoad(ByteAddr);
+
+ Value *ByteAndMask = B.CreateAnd(Byte, BAI->Mask);
+ return B.CreateICmpNE(ByteAndMask, ConstantInt::get(Int8Ty, 0));
+ }
+}
+
+/// Lower a llvm.bitset.test call to its implementation. Returns the value to
+/// replace the call with.
+Value *LowerBitSets::lowerBitSetCall(
+ CallInst *CI, BitSetInfo &BSI, ByteArrayInfo *&BAI,
+ GlobalVariable *CombinedGlobal,
+ const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout) {
+ Value *Ptr = CI->getArgOperand(0);
+ const DataLayout &DL = M->getDataLayout();
+
+ if (BSI.containsValue(DL, GlobalLayout, Ptr))
+ return ConstantInt::getTrue(CombinedGlobal->getParent()->getContext());
+
+ Constant *GlobalAsInt = ConstantExpr::getPtrToInt(CombinedGlobal, IntPtrTy);
+ Constant *OffsetedGlobalAsInt = ConstantExpr::getAdd(
+ GlobalAsInt, ConstantInt::get(IntPtrTy, BSI.ByteOffset));
+
+ BasicBlock *InitialBB = CI->getParent();
+
+ IRBuilder<> B(CI);
+
+ Value *PtrAsInt = B.CreatePtrToInt(Ptr, IntPtrTy);
+
+ if (BSI.isSingleOffset())
+ return B.CreateICmpEQ(PtrAsInt, OffsetedGlobalAsInt);
+
+ Value *PtrOffset = B.CreateSub(PtrAsInt, OffsetedGlobalAsInt);
+
+ Value *BitOffset;
+ if (BSI.AlignLog2 == 0) {
+ BitOffset = PtrOffset;
+ } else {
+ // We need to check that the offset both falls within our range and is
+ // suitably aligned. We can check both properties at the same time by
+ // performing a right rotate by log2(alignment) followed by an integer
+ // comparison against the bitset size. The rotate will move the lower
+ // order bits that need to be zero into the higher order bits of the
+ // result, causing the comparison to fail if they are nonzero. The rotate
+ // also conveniently gives us a bit offset to use during the load from
+ // the bitset.
+ Value *OffsetSHR =
+ B.CreateLShr(PtrOffset, ConstantInt::get(IntPtrTy, BSI.AlignLog2));
+ Value *OffsetSHL = B.CreateShl(
+ PtrOffset,
+ ConstantInt::get(IntPtrTy, DL.getPointerSizeInBits(0) - BSI.AlignLog2));
+ BitOffset = B.CreateOr(OffsetSHR, OffsetSHL);
+ }
+
+ Constant *BitSizeConst = ConstantInt::get(IntPtrTy, BSI.BitSize);
+ Value *OffsetInRange = B.CreateICmpULT(BitOffset, BitSizeConst);
+
+ // If the bit set is all ones, testing against it is unnecessary.
+ if (BSI.isAllOnes())
+ return OffsetInRange;
+
+ TerminatorInst *Term = SplitBlockAndInsertIfThen(OffsetInRange, CI, false);
+ IRBuilder<> ThenB(Term);
+
+ // Now that we know that the offset is in range and aligned, load the
+ // appropriate bit from the bitset.
+ Value *Bit = createBitSetTest(ThenB, BSI, BAI, BitOffset);
+
+ // The value we want is 0 if we came directly from the initial block
+ // (having failed the range or alignment checks), or the loaded bit if
+ // we came from the block in which we loaded it.
+ B.SetInsertPoint(CI);
+ PHINode *P = B.CreatePHI(Int1Ty, 2);
+ P->addIncoming(ConstantInt::get(Int1Ty, 0), InitialBB);
+ P->addIncoming(Bit, ThenB.GetInsertBlock());
+ return P;
+}
+
+/// Given a disjoint set of bitsets and globals, layout the globals, build the
+/// bit sets and lower the llvm.bitset.test calls.
+void LowerBitSets::buildBitSetsFromGlobals(
+ const std::vector<MDString *> &BitSets,
+ const std::vector<GlobalVariable *> &Globals) {
+ // Build a new global with the combined contents of the referenced globals.
+ std::vector<Constant *> GlobalInits;
+ const DataLayout &DL = M->getDataLayout();
+ for (GlobalVariable *G : Globals) {
+ GlobalInits.push_back(G->getInitializer());
+ uint64_t InitSize = DL.getTypeAllocSize(G->getInitializer()->getType());
+
+ // Compute the amount of padding required to align the next element to the
+ // next power of 2.
+ uint64_t Padding = NextPowerOf2(InitSize - 1) - InitSize;
+
+ // Cap at 128 was found experimentally to have a good data/instruction
+ // overhead tradeoff.
+ if (Padding > 128)
+ Padding = RoundUpToAlignment(InitSize, 128) - InitSize;
+
+ GlobalInits.push_back(
+ ConstantAggregateZero::get(ArrayType::get(Int8Ty, Padding)));
+ }
+ if (!GlobalInits.empty())
+ GlobalInits.pop_back();
+ Constant *NewInit = ConstantStruct::getAnon(M->getContext(), GlobalInits);
+ auto CombinedGlobal =
+ new GlobalVariable(*M, NewInit->getType(), /*isConstant=*/true,
+ GlobalValue::PrivateLinkage, NewInit);
+
+ const StructLayout *CombinedGlobalLayout =
+ DL.getStructLayout(cast<StructType>(NewInit->getType()));
+
+ // Compute the offsets of the original globals within the new global.
+ DenseMap<GlobalVariable *, uint64_t> GlobalLayout;
+ for (unsigned I = 0; I != Globals.size(); ++I)
+ // Multiply by 2 to account for padding elements.
+ GlobalLayout[Globals[I]] = CombinedGlobalLayout->getElementOffset(I * 2);
+
+ // For each bitset in this disjoint set...
+ for (MDString *BS : BitSets) {
+ // Build the bitset.
+ BitSetInfo BSI = buildBitSet(BS, GlobalLayout);
+
+ ByteArrayInfo *BAI = 0;
+
+ // Lower each call to llvm.bitset.test for this bitset.
+ for (CallInst *CI : BitSetTestCallSites[BS]) {
+ ++NumBitSetCallsLowered;
+ Value *Lowered = lowerBitSetCall(CI, BSI, BAI, CombinedGlobal, GlobalLayout);
+ CI->replaceAllUsesWith(Lowered);
+ CI->eraseFromParent();
+ }
+ }
+
+ // Build aliases pointing to offsets into the combined global for each
+ // global from which we built the combined global, and replace references
+ // to the original globals with references to the aliases.
+ for (unsigned I = 0; I != Globals.size(); ++I) {
+ // Multiply by 2 to account for padding elements.
+ Constant *CombinedGlobalIdxs[] = {ConstantInt::get(Int32Ty, 0),
+ ConstantInt::get(Int32Ty, I * 2)};
+ Constant *CombinedGlobalElemPtr = ConstantExpr::getGetElementPtr(
+ NewInit->getType(), CombinedGlobal, CombinedGlobalIdxs);
+ if (LinkerSubsectionsViaSymbols) {
+ Globals[I]->replaceAllUsesWith(CombinedGlobalElemPtr);
+ } else {
+ GlobalAlias *GAlias =
+ GlobalAlias::create(Globals[I]->getType(), Globals[I]->getLinkage(),
+ "", CombinedGlobalElemPtr, M);
+ GAlias->takeName(Globals[I]);
+ Globals[I]->replaceAllUsesWith(GAlias);
+ }
+ Globals[I]->eraseFromParent();
+ }
+}
+
+/// Lower all bit sets in this module.
+bool LowerBitSets::buildBitSets() {
+ Function *BitSetTestFunc =
+ M->getFunction(Intrinsic::getName(Intrinsic::bitset_test));
+ if (!BitSetTestFunc)
+ return false;
+
+ // Equivalence class set containing bitsets and the globals they reference.
+ // This is used to partition the set of bitsets in the module into disjoint
+ // sets.
+ typedef EquivalenceClasses<PointerUnion<GlobalVariable *, MDString *>>
+ GlobalClassesTy;
+ GlobalClassesTy GlobalClasses;
+
+ for (const Use &U : BitSetTestFunc->uses()) {
+ auto CI = cast<CallInst>(U.getUser());
+
+ auto BitSetMDVal = dyn_cast<MetadataAsValue>(CI->getArgOperand(1));
+ if (!BitSetMDVal || !isa<MDString>(BitSetMDVal->getMetadata()))
+ report_fatal_error(
+ "Second argument of llvm.bitset.test must be metadata string");
+ auto BitSet = cast<MDString>(BitSetMDVal->getMetadata());
+
+ // Add the call site to the list of call sites for this bit set. We also use
+ // BitSetTestCallSites to keep track of whether we have seen this bit set
+ // before. If we have, we don't need to re-add the referenced globals to the
+ // equivalence class.
+ std::pair<DenseMap<MDString *, std::vector<CallInst *>>::iterator,
+ bool> Ins =
+ BitSetTestCallSites.insert(
+ std::make_pair(BitSet, std::vector<CallInst *>()));
+ Ins.first->second.push_back(CI);
+ if (!Ins.second)
+ continue;
+
+ // Add the bitset to the equivalence class.
+ GlobalClassesTy::iterator GCI = GlobalClasses.insert(BitSet);
+ GlobalClassesTy::member_iterator CurSet = GlobalClasses.findLeader(GCI);
+
+ if (!BitSetNM)
+ continue;
+
+ // Verify the bitset metadata and add the referenced globals to the bitset's
+ // equivalence class.
+ for (MDNode *Op : BitSetNM->operands()) {
+ if (Op->getNumOperands() != 3)
+ report_fatal_error(
+ "All operands of llvm.bitsets metadata must have 3 elements");
+
+ if (Op->getOperand(0) != BitSet || !Op->getOperand(1))
+ continue;
+
+ auto OpConstMD = dyn_cast<ConstantAsMetadata>(Op->getOperand(1));
+ if (!OpConstMD)
+ report_fatal_error("Bit set element must be a constant");
+ auto OpGlobal = dyn_cast<GlobalVariable>(OpConstMD->getValue());
+ if (!OpGlobal)
+ report_fatal_error("Bit set element must refer to global");
+
+ auto OffsetConstMD = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
+ if (!OffsetConstMD)
+ report_fatal_error("Bit set element offset must be a constant");
+ auto OffsetInt = dyn_cast<ConstantInt>(OffsetConstMD->getValue());
+ if (!OffsetInt)
+ report_fatal_error(
+ "Bit set element offset must be an integer constant");
+
+ CurSet = GlobalClasses.unionSets(
+ CurSet, GlobalClasses.findLeader(GlobalClasses.insert(OpGlobal)));
+ }
+ }
+
+ if (GlobalClasses.empty())
+ return false;
+
+ // For each disjoint set we found...
+ for (GlobalClassesTy::iterator I = GlobalClasses.begin(),
+ E = GlobalClasses.end();
+ I != E; ++I) {
+ if (!I->isLeader()) continue;
+
+ ++NumBitSetDisjointSets;
+
+ // Build the list of bitsets and referenced globals in this disjoint set.
+ std::vector<MDString *> BitSets;
+ std::vector<GlobalVariable *> Globals;
+ llvm::DenseMap<MDString *, uint64_t> BitSetIndices;
+ llvm::DenseMap<GlobalVariable *, uint64_t> GlobalIndices;
+ for (GlobalClassesTy::member_iterator MI = GlobalClasses.member_begin(I);
+ MI != GlobalClasses.member_end(); ++MI) {
+ if ((*MI).is<MDString *>()) {
+ BitSetIndices[MI->get<MDString *>()] = BitSets.size();
+ BitSets.push_back(MI->get<MDString *>());
+ } else {
+ GlobalIndices[MI->get<GlobalVariable *>()] = Globals.size();
+ Globals.push_back(MI->get<GlobalVariable *>());
+ }
+ }
+
+ // For each bitset, build a set of indices that refer to globals referenced
+ // by the bitset.
+ std::vector<std::set<uint64_t>> BitSetMembers(BitSets.size());
+ if (BitSetNM) {
+ for (MDNode *Op : BitSetNM->operands()) {
+ // Op = { bitset name, global, offset }
+ if (!Op->getOperand(1))
+ continue;
+ auto I = BitSetIndices.find(cast<MDString>(Op->getOperand(0)));
+ if (I == BitSetIndices.end())
+ continue;
+
+ auto OpGlobal = cast<GlobalVariable>(
+ cast<ConstantAsMetadata>(Op->getOperand(1))->getValue());
+ BitSetMembers[I->second].insert(GlobalIndices[OpGlobal]);
+ }
+ }
+
+ // Order the sets of indices by size. The GlobalLayoutBuilder works best
+ // when given small index sets first.
+ std::stable_sort(
+ BitSetMembers.begin(), BitSetMembers.end(),
+ [](const std::set<uint64_t> &O1, const std::set<uint64_t> &O2) {
+ return O1.size() < O2.size();
+ });
+
+ // Create a GlobalLayoutBuilder and provide it with index sets as layout
+ // fragments. The GlobalLayoutBuilder tries to lay out members of fragments
+ // as close together as possible.
+ GlobalLayoutBuilder GLB(Globals.size());
+ for (auto &&MemSet : BitSetMembers)
+ GLB.addFragment(MemSet);
+
+ // Build a vector of globals with the computed layout.
+ std::vector<GlobalVariable *> OrderedGlobals(Globals.size());
+ auto OGI = OrderedGlobals.begin();
+ for (auto &&F : GLB.Fragments)
+ for (auto &&Offset : F)
+ *OGI++ = Globals[Offset];
+
+ // Order bitsets by name for determinism.
+ std::sort(BitSets.begin(), BitSets.end(), [](MDString *S1, MDString *S2) {
+ return S1->getString() < S2->getString();
+ });
+
+ // Build the bitsets from this disjoint set.
+ buildBitSetsFromGlobals(BitSets, OrderedGlobals);
+ }
+
+ allocateByteArrays();
+
+ return true;
+}
+
+bool LowerBitSets::eraseBitSetMetadata() {
+ if (!BitSetNM)
+ return false;
+
+ M->eraseNamedMetadata(BitSetNM);
+ return true;
+}
+
+bool LowerBitSets::runOnModule(Module &M) {
+ bool Changed = buildBitSets();
+ Changed |= eraseBitSetMetadata();
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
index b91ebf2..91a5eef 100644
--- a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
@@ -127,9 +127,8 @@ namespace {
/// side of claiming that two functions are different).
class FunctionComparator {
public:
- FunctionComparator(const DataLayout *DL, const Function *F1,
- const Function *F2)
- : FnL(F1), FnR(F2), DL(DL) {}
+ FunctionComparator(const Function *F1, const Function *F2)
+ : FnL(F1), FnR(F2) {}
/// Test whether the two functions have equivalent behaviour.
int compare();
@@ -292,8 +291,7 @@ private:
/// Parts to be compared for each comparison stage,
/// most significant stage first:
/// 1. Address space. As numbers.
- /// 2. Constant offset, (if "DataLayout *DL" field is not NULL,
- /// using GEPOperator::accumulateConstantOffset method).
+ /// 2. Constant offset, (using GEPOperator::accumulateConstantOffset method).
/// 3. Pointer operand type (using cmpType method).
/// 4. Number of operands.
/// 5. Compare operands, using cmpValues method.
@@ -354,8 +352,6 @@ private:
// The two functions undergoing comparison.
const Function *FnL, *FnR;
- const DataLayout *DL;
-
/// Assign serial numbers to values from left function, and values from
/// right function.
/// Explanation:
@@ -394,14 +390,13 @@ private:
class FunctionNode {
AssertingVH<Function> F;
- const DataLayout *DL;
public:
- FunctionNode(Function *F, const DataLayout *DL) : F(F), DL(DL) {}
+ FunctionNode(Function *F) : F(F) {}
Function *getFunc() const { return F; }
void release() { F = 0; }
bool operator<(const FunctionNode &RHS) const {
- return (FunctionComparator(DL, F, RHS.getFunc()).compare()) == -1;
+ return (FunctionComparator(F, RHS.getFunc()).compare()) == -1;
}
};
}
@@ -620,10 +615,11 @@ int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const {
PointerType *PTyL = dyn_cast<PointerType>(TyL);
PointerType *PTyR = dyn_cast<PointerType>(TyR);
- if (DL) {
- if (PTyL && PTyL->getAddressSpace() == 0) TyL = DL->getIntPtrType(TyL);
- if (PTyR && PTyR->getAddressSpace() == 0) TyR = DL->getIntPtrType(TyR);
- }
+ const DataLayout &DL = FnL->getParent()->getDataLayout();
+ if (PTyL && PTyL->getAddressSpace() == 0)
+ TyL = DL.getIntPtrType(TyL);
+ if (PTyR && PTyR->getAddressSpace() == 0)
+ TyR = DL.getIntPtrType(TyR);
if (TyL == TyR)
return 0;
@@ -723,6 +719,15 @@ int FunctionComparator::cmpOperations(const Instruction *L,
R->getRawSubclassOptionalData()))
return Res;
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(L)) {
+ if (int Res = cmpTypes(AI->getAllocatedType(),
+ cast<AllocaInst>(R)->getAllocatedType()))
+ return Res;
+ if (int Res =
+ cmpNumbers(AI->getAlignment(), cast<AllocaInst>(R)->getAlignment()))
+ return Res;
+ }
+
// We have two instructions of identical opcode and #operands. Check to see
// if all operands are the same type
for (unsigned i = 0, e = L->getNumOperands(); i != e; ++i) {
@@ -855,13 +860,12 @@ int FunctionComparator::cmpGEPs(const GEPOperator *GEPL,
// When we have target data, we can reduce the GEP down to the value in bytes
// added to the address.
- if (DL) {
- unsigned BitWidth = DL->getPointerSizeInBits(ASL);
- APInt OffsetL(BitWidth, 0), OffsetR(BitWidth, 0);
- if (GEPL->accumulateConstantOffset(*DL, OffsetL) &&
- GEPR->accumulateConstantOffset(*DL, OffsetR))
- return cmpAPInts(OffsetL, OffsetR);
- }
+ const DataLayout &DL = FnL->getParent()->getDataLayout();
+ unsigned BitWidth = DL.getPointerSizeInBits(ASL);
+ APInt OffsetL(BitWidth, 0), OffsetR(BitWidth, 0);
+ if (GEPL->accumulateConstantOffset(DL, OffsetL) &&
+ GEPR->accumulateConstantOffset(DL, OffsetR))
+ return cmpAPInts(OffsetL, OffsetR);
if (int Res = cmpNumbers((uint64_t)GEPL->getPointerOperand()->getType(),
(uint64_t)GEPR->getPointerOperand()->getType()))
@@ -1122,9 +1126,6 @@ private:
/// to modify it.
FnTreeType FnTree;
- /// DataLayout for more accurate GEP comparisons. May be NULL.
- const DataLayout *DL;
-
/// Whether or not the target supports global aliases.
bool HasGlobalAliases;
};
@@ -1152,8 +1153,8 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) {
for (std::vector<WeakVH>::iterator J = I; J != E && j < Max; ++J, ++j) {
Function *F1 = cast<Function>(*I);
Function *F2 = cast<Function>(*J);
- int Res1 = FunctionComparator(DL, F1, F2).compare();
- int Res2 = FunctionComparator(DL, F2, F1).compare();
+ int Res1 = FunctionComparator(F1, F2).compare();
+ int Res2 = FunctionComparator(F2, F1).compare();
// If F1 <= F2, then F2 >= F1, otherwise report failure.
if (Res1 != -Res2) {
@@ -1174,8 +1175,8 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) {
continue;
Function *F3 = cast<Function>(*K);
- int Res3 = FunctionComparator(DL, F1, F3).compare();
- int Res4 = FunctionComparator(DL, F2, F3).compare();
+ int Res3 = FunctionComparator(F1, F3).compare();
+ int Res4 = FunctionComparator(F2, F3).compare();
bool Transitive = true;
@@ -1212,8 +1213,6 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) {
bool MergeFunctions::runOnModule(Module &M) {
bool Changed = false;
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage())
@@ -1368,8 +1367,7 @@ void MergeFunctions::writeThunk(Function *F, Function *G) {
// Replace G with an alias to F and delete G.
void MergeFunctions::writeAlias(Function *F, Function *G) {
PointerType *PTy = G->getType();
- auto *GA = GlobalAlias::create(PTy->getElementType(), PTy->getAddressSpace(),
- G->getLinkage(), "", F);
+ auto *GA = GlobalAlias::create(PTy, G->getLinkage(), "", F);
F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
GA->takeName(G);
GA->setVisibility(G->getVisibility());
@@ -1420,7 +1418,7 @@ void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) {
// that was already inserted.
bool MergeFunctions::insert(Function *NewFunction) {
std::pair<FnTreeType::iterator, bool> Result =
- FnTree.insert(FunctionNode(NewFunction, DL));
+ FnTree.insert(FunctionNode(NewFunction));
if (Result.second) {
DEBUG(dbgs() << "Inserting as unique: " << NewFunction->getName() << '\n');
@@ -1457,7 +1455,7 @@ bool MergeFunctions::insert(Function *NewFunction) {
void MergeFunctions::remove(Function *F) {
// We need to make sure we remove F, not a function "equal" to F per the
// function equality comparator.
- FnTreeType::iterator found = FnTree.find(FunctionNode(F, DL));
+ FnTreeType::iterator found = FnTree.find(FunctionNode(F));
size_t Erased = 0;
if (found != FnTree.end() && found->getFunc() == F) {
Erased = 1;
diff --git a/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp
index 76d6dfa..4a7cb7b 100644
--- a/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp
@@ -58,13 +58,13 @@ Function* PartialInliner::unswitchFunction(Function* F) {
BasicBlock* returnBlock = nullptr;
BasicBlock* nonReturnBlock = nullptr;
unsigned returnCount = 0;
- for (succ_iterator SI = succ_begin(entryBlock), SE = succ_end(entryBlock);
- SI != SE; ++SI)
- if (isa<ReturnInst>((*SI)->getTerminator())) {
- returnBlock = *SI;
+ for (BasicBlock *BB : successors(entryBlock)) {
+ if (isa<ReturnInst>(BB->getTerminator())) {
+ returnBlock = BB;
returnCount++;
} else
- nonReturnBlock = *SI;
+ nonReturnBlock = BB;
+ }
if (returnCount != 1)
return nullptr;
diff --git a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
index fb673dc..7eb0682 100644
--- a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -19,12 +19,11 @@
#include "llvm/Analysis/Passes.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Verifier.h"
-#include "llvm/PassManager.h"
+#include "llvm/IR/LegacyPassManager.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Vectorize.h"
@@ -60,6 +59,10 @@ static cl::opt<bool>
RunLoopRerolling("reroll-loops", cl::Hidden,
cl::desc("Run the loop rerolling pass"));
+static cl::opt<bool>
+RunFloat2Int("float-to-int", cl::Hidden, cl::init(true),
+ cl::desc("Run the float2int (float demotion) pass"));
+
static cl::opt<bool> RunLoadCombine("combine-loads", cl::init(false),
cl::Hidden,
cl::desc("Run the load combining pass"));
@@ -78,6 +81,14 @@ static cl::opt<bool>
EnableMLSM("mlsm", cl::init(true), cl::Hidden,
cl::desc("Enable motion of merged load and store"));
+static cl::opt<bool> EnableLoopInterchange(
+ "enable-loopinterchange", cl::init(false), cl::Hidden,
+ cl::desc("Enable the new, experimental LoopInterchange Pass"));
+
+static cl::opt<bool> EnableLoopDistribute(
+ "enable-loop-distribute", cl::init(false), cl::Hidden,
+ cl::desc("Enable the new, experimental LoopDistribution Pass"));
+
static cl::opt<bool> EnableGVN("enable-gvn",
cl::init(true), cl::Hidden,
cl::desc("Run the global value numbering pass"));
@@ -98,7 +109,6 @@ PassManagerBuilder::PassManagerBuilder() {
DisableGVNLoadPRE = false;
VerifyInput = false;
VerifyOutput = false;
- StripDebug = false;
MergeFunctions = false;
}
@@ -122,7 +132,7 @@ void PassManagerBuilder::addExtension(ExtensionPointTy Ty, ExtensionFn Fn) {
}
void PassManagerBuilder::addExtensionsToPM(ExtensionPointTy ETy,
- PassManagerBase &PM) const {
+ legacy::PassManagerBase &PM) const {
for (unsigned i = 0, e = GlobalExtensions->size(); i != e; ++i)
if ((*GlobalExtensions)[i].first == ETy)
(*GlobalExtensions)[i].second(*this, PM);
@@ -131,8 +141,8 @@ void PassManagerBuilder::addExtensionsToPM(ExtensionPointTy ETy,
Extensions[i].second(*this, PM);
}
-void
-PassManagerBuilder::addInitialAliasAnalysisPasses(PassManagerBase &PM) const {
+void PassManagerBuilder::addInitialAliasAnalysisPasses(
+ legacy::PassManagerBase &PM) const {
// Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
// BasicAliasAnalysis wins if they disagree. This is intended to help
// support "obvious" type-punning idioms.
@@ -143,11 +153,13 @@ PassManagerBuilder::addInitialAliasAnalysisPasses(PassManagerBase &PM) const {
PM.add(createBasicAliasAnalysisPass());
}
-void PassManagerBuilder::populateFunctionPassManager(FunctionPassManager &FPM) {
+void PassManagerBuilder::populateFunctionPassManager(
+ legacy::FunctionPassManager &FPM) {
addExtensionsToPM(EP_EarlyAsPossible, FPM);
// Add LibraryInfo if we have some.
- if (LibraryInfo) FPM.add(new TargetLibraryInfo(*LibraryInfo));
+ if (LibraryInfo)
+ FPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo));
if (OptLevel == 0) return;
@@ -162,7 +174,8 @@ void PassManagerBuilder::populateFunctionPassManager(FunctionPassManager &FPM) {
FPM.add(createLowerExpectIntrinsicPass());
}
-void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
+void PassManagerBuilder::populateModulePassManager(
+ legacy::PassManagerBase &MPM) {
// If all optimizations are disabled, just run the always-inline pass and,
// if enabled, the function merging pass.
if (OptLevel == 0) {
@@ -186,7 +199,8 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
}
// Add LibraryInfo if we have some.
- if (LibraryInfo) MPM.add(new TargetLibraryInfo(*LibraryInfo));
+ if (LibraryInfo)
+ MPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo));
addInitialAliasAnalysisPasses(MPM);
@@ -240,7 +254,10 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars
MPM.add(createLoopIdiomPass()); // Recognize idioms like memset.
MPM.add(createLoopDeletionPass()); // Delete dead loops
-
+ if (EnableLoopInterchange) {
+ MPM.add(createLoopInterchangePass()); // Interchange loops
+ MPM.add(createCFGSimplificationPass());
+ }
if (!DisableUnrollLoops)
MPM.add(createSimpleLoopUnrollPass()); // Unroll small loops
addExtensionsToPM(EP_LoopOptimizerEnd, MPM);
@@ -254,6 +271,11 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset
MPM.add(createSCCPPass()); // Constant prop with SCCP
+ // Delete dead bit computations (instcombine runs after to fold away the dead
+ // computations, and then ADCE will run later to exploit any new DCE
+ // opportunities that creates).
+ MPM.add(createBitTrackingDCEPass()); // Delete dead bit computations
+
// Run instcombine after redundancy elimination to exploit opportunities
// opened up by them.
MPM.add(createInstructionCombiningPass());
@@ -261,6 +283,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
MPM.add(createJumpThreadingPass()); // Thread jumps
MPM.add(createCorrelatedValuePropagationPass());
MPM.add(createDeadStoreEliminationPass()); // Delete dead stores
+ MPM.add(createLICMPass());
addExtensionsToPM(EP_ScalarOptimizerLate, MPM);
@@ -298,11 +321,18 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
// we must insert a no-op module pass to reset the pass manager.
MPM.add(createBarrierNoopPass());
+ if (RunFloat2Int)
+ MPM.add(createFloat2IntPass());
+
// Re-rotate loops in all our loop nests. These may have fallout out of
// rotated form due to GVN or other transformations, and the vectorizer relies
// on the rotated form.
- if (ExtraVectorizerPasses)
- MPM.add(createLoopRotatePass());
+ MPM.add(createLoopRotatePass());
+
+ // Distribute loops to allow partial vectorization. I.e. isolate dependences
+ // into separate loop that would otherwise inhibit vectorization.
+ if (EnableLoopDistribute)
+ MPM.add(createLoopDistributePass());
MPM.add(createLoopVectorizePass(DisableUnrollLoops, LoopVectorize));
// FIXME: Because of #pragma vectorize enable, the passes below are always
@@ -354,9 +384,19 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
MPM.add(createCFGSimplificationPass());
MPM.add(createInstructionCombiningPass());
- if (!DisableUnrollLoops)
+ if (!DisableUnrollLoops) {
MPM.add(createLoopUnrollPass()); // Unroll small loops
+ // LoopUnroll may generate some redundency to cleanup.
+ MPM.add(createInstructionCombiningPass());
+
+ // Runtime unrolling will introduce runtime check in loop prologue. If the
+ // unrolled loop is a inner loop, then the prologue will be inside the
+ // outer loop. LICM pass can help to promote the runtime check out if the
+ // checked value is loop invariant.
+ MPM.add(createLICMPass());
+ }
+
// After vectorization and unrolling, assume intrinsics may tell us more
// about pointer alignments.
MPM.add(createAlignmentFromAssumptionsPass());
@@ -379,7 +419,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
addExtensionsToPM(EP_OptimizerLast, MPM);
}
-void PassManagerBuilder::addLTOOptimizationPasses(PassManagerBase &PM) {
+void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
// Provide AliasAnalysis services for optimizations.
addInitialAliasAnalysisPasses(PM);
@@ -450,6 +490,9 @@ void PassManagerBuilder::addLTOOptimizationPasses(PassManagerBase &PM) {
// More loops are countable; try to optimize them.
PM.add(createIndVarSimplifyPass());
PM.add(createLoopDeletionPass());
+ if (EnableLoopInterchange)
+ PM.add(createLoopInterchangePass());
+
PM.add(createLoopVectorizePass(true, LoopVectorize));
// More scalar chains could be vectorized due to more alias information
@@ -469,7 +512,10 @@ void PassManagerBuilder::addLTOOptimizationPasses(PassManagerBase &PM) {
addExtensionsToPM(EP_Peephole, PM);
PM.add(createJumpThreadingPass());
+}
+void PassManagerBuilder::addLateLTOOptimizationPasses(
+ legacy::PassManagerBase &PM) {
// Delete basic blocks, which optimization passes may have killed.
PM.add(createCFGSimplificationPass());
@@ -482,32 +528,26 @@ void PassManagerBuilder::addLTOOptimizationPasses(PassManagerBase &PM) {
PM.add(createMergeFunctionsPass());
}
-void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
- TargetMachine *TM) {
- if (TM) {
- PM.add(new DataLayoutPass());
- TM->addAnalysisPasses(PM);
- }
-
+void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) {
if (LibraryInfo)
- PM.add(new TargetLibraryInfo(*LibraryInfo));
+ PM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo));
if (VerifyInput)
PM.add(createVerifierPass());
- if (StripDebug)
- PM.add(createStripSymbolsPass(true));
+ if (OptLevel > 1)
+ addLTOOptimizationPasses(PM);
- if (VerifyInput)
- PM.add(createDebugInfoVerifierPass());
+ // Lower bit sets to globals. This pass supports Clang's control flow
+ // integrity mechanisms (-fsanitize=cfi*) and needs to run at link time if CFI
+ // is enabled. The pass does nothing if CFI is disabled.
+ PM.add(createLowerBitSetsPass());
if (OptLevel != 0)
- addLTOOptimizationPasses(PM);
+ addLateLTOOptimizationPasses(PM);
- if (VerifyOutput) {
+ if (VerifyOutput)
PM.add(createVerifierPass());
- PM.add(createDebugInfoVerifierPass());
- }
}
inline PassManagerBuilder *unwrap(LLVMPassManagerBuilderRef P) {
@@ -573,7 +613,7 @@ void
LLVMPassManagerBuilderPopulateFunctionPassManager(LLVMPassManagerBuilderRef PMB,
LLVMPassManagerRef PM) {
PassManagerBuilder *Builder = unwrap(PMB);
- FunctionPassManager *FPM = unwrap<FunctionPassManager>(PM);
+ legacy::FunctionPassManager *FPM = unwrap<legacy::FunctionPassManager>(PM);
Builder->populateFunctionPassManager(*FPM);
}
@@ -581,7 +621,7 @@ void
LLVMPassManagerBuilderPopulateModulePassManager(LLVMPassManagerBuilderRef PMB,
LLVMPassManagerRef PM) {
PassManagerBuilder *Builder = unwrap(PMB);
- PassManagerBase *MPM = unwrap(PM);
+ legacy::PassManagerBase *MPM = unwrap(PM);
Builder->populateModulePassManager(*MPM);
}
@@ -590,7 +630,7 @@ void LLVMPassManagerBuilderPopulateLTOPassManager(LLVMPassManagerBuilderRef PMB,
LLVMBool Internalize,
LLVMBool RunInliner) {
PassManagerBuilder *Builder = unwrap(PMB);
- PassManagerBase *LPM = unwrap(PM);
+ legacy::PassManagerBase *LPM = unwrap(PM);
// A small backwards compatibility hack. populateLTOPassManager used to take
// an RunInliner option.
diff --git a/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp
index 7bd4ce1..1943b93 100644
--- a/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp
@@ -18,8 +18,10 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/Analysis/LibCallSemantics.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
@@ -175,7 +177,7 @@ bool PruneEH::SimplifyFunction(Function *F) {
bool MadeChange = false;
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator()))
- if (II->doesNotThrow()) {
+ if (II->doesNotThrow() && canSimplifyInvokeNoUnwind(II)) {
SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
// Insert a call instruction before the invoke.
CallInst *Call = CallInst::Create(II->getCalledValue(), Args, "", II);
diff --git a/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp
index 816978e..60c9573 100644
--- a/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp
@@ -305,41 +305,31 @@ bool StripDeadDebugInfo::runOnModule(Module &M) {
SmallVector<Metadata *, 64> LiveSubprograms;
DenseSet<const MDNode *> VisitedSet;
- for (DICompileUnit DIC : F.compile_units()) {
- assert(DIC.Verify() && "DIC must verify as a DICompileUnit.");
-
+ for (DICompileUnit *DIC : F.compile_units()) {
// Create our live subprogram list.
- DIArray SPs = DIC.getSubprograms();
bool SubprogramChange = false;
- for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
- DISubprogram DISP(SPs.getElement(i));
- assert(DISP.Verify() && "DISP must verify as a DISubprogram.");
-
+ for (DISubprogram *DISP : DIC->getSubprograms()) {
// Make sure we visit each subprogram only once.
if (!VisitedSet.insert(DISP).second)
continue;
// If the function referenced by DISP is not null, the function is live.
- if (DISP.getFunction())
+ if (DISP->getFunction())
LiveSubprograms.push_back(DISP);
else
SubprogramChange = true;
}
// Create our live global variable list.
- DIArray GVs = DIC.getGlobalVariables();
bool GlobalVariableChange = false;
- for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i) {
- DIGlobalVariable DIG(GVs.getElement(i));
- assert(DIG.Verify() && "DIG must verify as DIGlobalVariable.");
-
+ for (DIGlobalVariable *DIG : DIC->getGlobalVariables()) {
// Make sure we only visit each global variable only once.
if (!VisitedSet.insert(DIG).second)
continue;
// If the global variable referenced by DIG is not null, the global
// variable is live.
- if (DIG.getGlobal())
+ if (DIG->getVariable())
LiveGlobalVariables.push_back(DIG);
else
GlobalVariableChange = true;
@@ -349,12 +339,12 @@ bool StripDeadDebugInfo::runOnModule(Module &M) {
// subprogram list/global variable list with our new live subprogram/global
// variable list.
if (SubprogramChange) {
- DIC.replaceSubprograms(DIArray(MDNode::get(C, LiveSubprograms)));
+ DIC->replaceSubprograms(MDTuple::get(C, LiveSubprograms));
Changed = true;
}
if (GlobalVariableChange) {
- DIC.replaceGlobalVariables(DIArray(MDNode::get(C, LiveGlobalVariables)));
+ DIC->replaceGlobalVariables(MDTuple::get(C, LiveGlobalVariables));
Changed = true;
}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 6d20384..a8d0172 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -11,7 +11,7 @@
//
//===----------------------------------------------------------------------===//
-#include "InstCombine.h"
+#include "InstCombineInternal.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/IR/DataLayout.h"
@@ -891,7 +891,7 @@ static bool checkRippleForAdd(const APInt &Op0KnownZero,
/// This basically requires proving that the add in the original type would not
/// overflow to change the sign bit or have a carry out.
bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS,
- Instruction *CxtI) {
+ Instruction &CxtI) {
// There are different heuristics we can use for this. Here are some simple
// ones.
@@ -909,18 +909,18 @@ bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS,
//
// Since the carry into the most significant position is always equal to
// the carry out of the addition, there is no signed overflow.
- if (ComputeNumSignBits(LHS, 0, CxtI) > 1 &&
- ComputeNumSignBits(RHS, 0, CxtI) > 1)
+ if (ComputeNumSignBits(LHS, 0, &CxtI) > 1 &&
+ ComputeNumSignBits(RHS, 0, &CxtI) > 1)
return true;
unsigned BitWidth = LHS->getType()->getScalarSizeInBits();
APInt LHSKnownZero(BitWidth, 0);
APInt LHSKnownOne(BitWidth, 0);
- computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, CxtI);
+ computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, &CxtI);
APInt RHSKnownZero(BitWidth, 0);
APInt RHSKnownOne(BitWidth, 0);
- computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, CxtI);
+ computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, &CxtI);
// Addition of two 2's compliment numbers having opposite signs will never
// overflow.
@@ -943,21 +943,21 @@ bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS,
/// overflow to change the sign bit or have a carry out.
/// TODO: Handle this for Vectors.
bool InstCombiner::WillNotOverflowSignedSub(Value *LHS, Value *RHS,
- Instruction *CxtI) {
+ Instruction &CxtI) {
// If LHS and RHS each have at least two sign bits, the subtraction
// cannot overflow.
- if (ComputeNumSignBits(LHS, 0, CxtI) > 1 &&
- ComputeNumSignBits(RHS, 0, CxtI) > 1)
+ if (ComputeNumSignBits(LHS, 0, &CxtI) > 1 &&
+ ComputeNumSignBits(RHS, 0, &CxtI) > 1)
return true;
unsigned BitWidth = LHS->getType()->getScalarSizeInBits();
APInt LHSKnownZero(BitWidth, 0);
APInt LHSKnownOne(BitWidth, 0);
- computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, CxtI);
+ computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, &CxtI);
APInt RHSKnownZero(BitWidth, 0);
APInt RHSKnownOne(BitWidth, 0);
- computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, CxtI);
+ computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, &CxtI);
// Subtraction of two 2's compliment numbers having identical signs will
// never overflow.
@@ -972,12 +972,14 @@ bool InstCombiner::WillNotOverflowSignedSub(Value *LHS, Value *RHS,
/// \brief Return true if we can prove that:
/// (sub LHS, RHS) === (sub nuw LHS, RHS)
bool InstCombiner::WillNotOverflowUnsignedSub(Value *LHS, Value *RHS,
- Instruction *CxtI) {
+ Instruction &CxtI) {
// If the LHS is negative and the RHS is non-negative, no unsigned wrap.
bool LHSKnownNonNegative, LHSKnownNegative;
bool RHSKnownNonNegative, RHSKnownNegative;
- ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, /*Depth=*/0, CxtI);
- ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, /*Depth=*/0, CxtI);
+ ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, /*Depth=*/0,
+ &CxtI);
+ ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, /*Depth=*/0,
+ &CxtI);
if (LHSKnownNegative && RHSKnownNonNegative)
return true;
@@ -1046,15 +1048,15 @@ static Value *checkForNegativeOperand(BinaryOperator &I,
}
Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
- bool Changed = SimplifyAssociativeOrCommutative(I);
- Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+ bool Changed = SimplifyAssociativeOrCommutative(I);
+ Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
- if (Value *V = SimplifyVectorOp(I))
- return ReplaceInstUsesWith(I, V);
+ if (Value *V = SimplifyVectorOp(I))
+ return ReplaceInstUsesWith(I, V);
- if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(),
- I.hasNoUnsignedWrap(), DL, TLI, DT, AC))
- return ReplaceInstUsesWith(I, V);
+ if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(),
+ I.hasNoUnsignedWrap(), DL, TLI, DT, AC))
+ return ReplaceInstUsesWith(I, V);
// (A*B)+(A*C) -> A*(B+C) etc
if (Value *V = SimplifyUsingDistributiveLaws(I))
@@ -1158,20 +1160,8 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
return ReplaceInstUsesWith(I, V);
// A+B --> A|B iff A and B have no bits set in common.
- if (IntegerType *IT = dyn_cast<IntegerType>(I.getType())) {
- APInt LHSKnownOne(IT->getBitWidth(), 0);
- APInt LHSKnownZero(IT->getBitWidth(), 0);
- computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, &I);
- if (LHSKnownZero != 0) {
- APInt RHSKnownOne(IT->getBitWidth(), 0);
- APInt RHSKnownZero(IT->getBitWidth(), 0);
- computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, &I);
-
- // No bits in common -> bitwise or.
- if ((LHSKnownZero|RHSKnownZero).isAllOnesValue())
- return BinaryOperator::CreateOr(LHS, RHS);
- }
- }
+ if (haveNoCommonBitsSet(LHS, RHS, DL, AC, &I, DT))
+ return BinaryOperator::CreateOr(LHS, RHS);
if (Constant *CRHS = dyn_cast<Constant>(RHS)) {
Value *X;
@@ -1243,7 +1233,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType());
if (LHSConv->hasOneUse() &&
ConstantExpr::getSExt(CI, I.getType()) == RHSC &&
- WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI, &I)) {
+ WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI, I)) {
// Insert the new, smaller add.
Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
CI, "addconv");
@@ -1256,10 +1246,11 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
// Only do this if x/y have the same type, if at last one of them has a
// single use (so we don't increase the number of sexts), and if the
// integer add will not overflow.
- if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&&
+ if (LHSConv->getOperand(0)->getType() ==
+ RHSConv->getOperand(0)->getType() &&
(LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
WillNotOverflowSignedAdd(LHSConv->getOperand(0),
- RHSConv->getOperand(0), &I)) {
+ RHSConv->getOperand(0), I)) {
// Insert the new integer add.
Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
RHSConv->getOperand(0), "addconv");
@@ -1307,7 +1298,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
// TODO(jingyue): Consider WillNotOverflowSignedAdd and
// WillNotOverflowUnsignedAdd to reduce the number of invocations of
// computeKnownBits.
- if (!I.hasNoSignedWrap() && WillNotOverflowSignedAdd(LHS, RHS, &I)) {
+ if (!I.hasNoSignedWrap() && WillNotOverflowSignedAdd(LHS, RHS, I)) {
Changed = true;
I.setHasNoSignedWrap(true);
}
@@ -1371,7 +1362,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
ConstantExpr::getFPToSI(CFP, LHSConv->getOperand(0)->getType());
if (LHSConv->hasOneUse() &&
ConstantExpr::getSIToFP(CI, I.getType()) == CFP &&
- WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI, &I)) {
+ WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI, I)) {
// Insert the new integer add.
Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
CI, "addconv");
@@ -1384,10 +1375,11 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
// Only do this if x/y have the same type, if at last one of them has a
// single use (so we don't increase the number of int->fp conversions),
// and if the integer add will not overflow.
- if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&&
+ if (LHSConv->getOperand(0)->getType() ==
+ RHSConv->getOperand(0)->getType() &&
(LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
WillNotOverflowSignedAdd(LHSConv->getOperand(0),
- RHSConv->getOperand(0), &I)) {
+ RHSConv->getOperand(0), I)) {
// Insert the new integer add.
Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
RHSConv->getOperand(0),"addconv");
@@ -1436,8 +1428,6 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
///
Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS,
Type *Ty) {
- assert(DL && "Must have target data info for this");
-
// If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize
// this.
bool Swapped = false;
@@ -1584,6 +1574,19 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
CI->getValue() == I.getType()->getPrimitiveSizeInBits() - 1)
return BinaryOperator::CreateLShr(X, CI);
}
+
+ // Turn this into a xor if LHS is 2^n-1 and the remaining bits are known
+ // zero.
+ APInt IntVal = C->getValue();
+ if ((IntVal + 1).isPowerOf2()) {
+ unsigned BitWidth = I.getType()->getScalarSizeInBits();
+ APInt KnownZero(BitWidth, 0);
+ APInt KnownOne(BitWidth, 0);
+ computeKnownBits(&I, KnownZero, KnownOne, 0, &I);
+ if ((IntVal | KnownZero).isAllOnesValue()) {
+ return BinaryOperator::CreateXor(Op1, C);
+ }
+ }
}
@@ -1662,26 +1665,24 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
// Optimize pointer differences into the same array into a size. Consider:
// &A[10] - &A[0]: we should compile this to "10".
- if (DL) {
- Value *LHSOp, *RHSOp;
- if (match(Op0, m_PtrToInt(m_Value(LHSOp))) &&
- match(Op1, m_PtrToInt(m_Value(RHSOp))))
- if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType()))
- return ReplaceInstUsesWith(I, Res);
-
- // trunc(p)-trunc(q) -> trunc(p-q)
- if (match(Op0, m_Trunc(m_PtrToInt(m_Value(LHSOp)))) &&
- match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp)))))
- if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType()))
- return ReplaceInstUsesWith(I, Res);
- }
+ Value *LHSOp, *RHSOp;
+ if (match(Op0, m_PtrToInt(m_Value(LHSOp))) &&
+ match(Op1, m_PtrToInt(m_Value(RHSOp))))
+ if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType()))
+ return ReplaceInstUsesWith(I, Res);
+
+ // trunc(p)-trunc(q) -> trunc(p-q)
+ if (match(Op0, m_Trunc(m_PtrToInt(m_Value(LHSOp)))) &&
+ match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp)))))
+ if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType()))
+ return ReplaceInstUsesWith(I, Res);
bool Changed = false;
- if (!I.hasNoSignedWrap() && WillNotOverflowSignedSub(Op0, Op1, &I)) {
+ if (!I.hasNoSignedWrap() && WillNotOverflowSignedSub(Op0, Op1, I)) {
Changed = true;
I.setHasNoSignedWrap(true);
}
- if (!I.hasNoUnsignedWrap() && WillNotOverflowUnsignedSub(Op0, Op1, &I)) {
+ if (!I.hasNoUnsignedWrap() && WillNotOverflowUnsignedSub(Op0, Op1, I)) {
Changed = true;
I.setHasNoUnsignedWrap(true);
}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 74b6970..ee21c81 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -11,7 +11,7 @@
//
//===----------------------------------------------------------------------===//
-#include "InstCombine.h"
+#include "InstCombineInternal.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Intrinsics.h"
@@ -22,30 +22,12 @@ using namespace PatternMatch;
#define DEBUG_TYPE "instcombine"
-/// isFreeToInvert - Return true if the specified value is free to invert (apply
-/// ~ to). This happens in cases where the ~ can be eliminated.
-static inline bool isFreeToInvert(Value *V) {
- // ~(~(X)) -> X.
- if (BinaryOperator::isNot(V))
- return true;
-
- // Constants can be considered to be not'ed values.
- if (isa<ConstantInt>(V))
- return true;
-
- // Compares can be inverted if they have a single use.
- if (CmpInst *CI = dyn_cast<CmpInst>(V))
- return CI->hasOneUse();
-
- return false;
-}
-
static inline Value *dyn_castNotVal(Value *V) {
// If this is not(not(x)) don't return that this is a not: we want the two
// not's to be folded first.
if (BinaryOperator::isNot(V)) {
Value *Operand = BinaryOperator::getNotArgument(V);
- if (!isFreeToInvert(Operand))
+ if (!IsFreeToInvert(Operand, Operand->hasOneUse()))
return Operand;
}
@@ -997,9 +979,9 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
// Make a constant range that's the intersection of the two icmp ranges.
// If the intersection is empty, we know that the result is false.
ConstantRange LHSRange =
- ConstantRange::makeICmpRegion(LHSCC, LHSCst->getValue());
+ ConstantRange::makeAllowedICmpRegion(LHSCC, LHSCst->getValue());
ConstantRange RHSRange =
- ConstantRange::makeICmpRegion(RHSCC, RHSCst->getValue());
+ ConstantRange::makeAllowedICmpRegion(RHSCC, RHSCst->getValue());
if (LHSRange.intersectWith(RHSRange).isEmptySet())
return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
@@ -1727,15 +1709,17 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
Value *Mask = nullptr;
Value *Masked = nullptr;
if (LAnd->getOperand(0) == RAnd->getOperand(0) &&
- isKnownToBeAPowerOfTwo(LAnd->getOperand(1), false, 0, AC, CxtI, DT) &&
- isKnownToBeAPowerOfTwo(RAnd->getOperand(1), false, 0, AC, CxtI, DT)) {
+ isKnownToBeAPowerOfTwo(LAnd->getOperand(1), DL, false, 0, AC, CxtI,
+ DT) &&
+ isKnownToBeAPowerOfTwo(RAnd->getOperand(1), DL, false, 0, AC, CxtI,
+ DT)) {
Mask = Builder->CreateOr(LAnd->getOperand(1), RAnd->getOperand(1));
Masked = Builder->CreateAnd(LAnd->getOperand(0), Mask);
} else if (LAnd->getOperand(1) == RAnd->getOperand(1) &&
- isKnownToBeAPowerOfTwo(LAnd->getOperand(0), false, 0, AC, CxtI,
- DT) &&
- isKnownToBeAPowerOfTwo(RAnd->getOperand(0), false, 0, AC, CxtI,
- DT)) {
+ isKnownToBeAPowerOfTwo(LAnd->getOperand(0), DL, false, 0, AC,
+ CxtI, DT) &&
+ isKnownToBeAPowerOfTwo(RAnd->getOperand(0), DL, false, 0, AC,
+ CxtI, DT)) {
Mask = Builder->CreateOr(LAnd->getOperand(0), RAnd->getOperand(0));
Masked = Builder->CreateAnd(LAnd->getOperand(1), Mask);
}
@@ -2585,8 +2569,10 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
// ~(X & Y) --> (~X | ~Y) - De Morgan's Law
// ~(X | Y) === (~X & ~Y) - De Morgan's Law
- if (isFreeToInvert(Op0I->getOperand(0)) &&
- isFreeToInvert(Op0I->getOperand(1))) {
+ if (IsFreeToInvert(Op0I->getOperand(0),
+ Op0I->getOperand(0)->hasOneUse()) &&
+ IsFreeToInvert(Op0I->getOperand(1),
+ Op0I->getOperand(1)->hasOneUse())) {
Value *NotX =
Builder->CreateNot(Op0I->getOperand(0), "notlhs");
Value *NotY =
@@ -2604,15 +2590,16 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
}
}
-
- if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
- if (RHS->isOne() && Op0->hasOneUse())
+ if (Constant *RHS = dyn_cast<Constant>(Op1)) {
+ if (RHS->isAllOnesValue() && Op0->hasOneUse())
// xor (cmp A, B), true = not (cmp A, B) = !cmp A, B
if (CmpInst *CI = dyn_cast<CmpInst>(Op0))
return CmpInst::Create(CI->getOpcode(),
CI->getInversePredicate(),
CI->getOperand(0), CI->getOperand(1));
+ }
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
// fold (xor(zext(cmp)), 1) and (xor(sext(cmp)), -1) to ext(!cmp).
if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
if (CmpInst *CI = dyn_cast<CmpInst>(Op0C->getOperand(0))) {
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 83b4b82..e83b9dd 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -11,16 +11,17 @@
//
//===----------------------------------------------------------------------===//
-#include "InstCombine.h"
+#include "InstCombineInternal.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/IR/CallSite.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Statepoint.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
using namespace llvm;
using namespace PatternMatch;
@@ -60,8 +61,8 @@ static Type *reduceToSingleValueType(Type *T) {
}
Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
- unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), DL, AC, MI, DT);
- unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), DL, AC, MI, DT);
+ unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), DL, MI, AC, DT);
+ unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), DL, MI, AC, DT);
unsigned MinAlign = std::min(DstAlign, SrcAlign);
unsigned CopyAlign = MI->getAlignment();
@@ -107,7 +108,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
if (StrippedDest != MI->getArgOperand(0)) {
Type *SrcETy = cast<PointerType>(StrippedDest->getType())
->getElementType();
- if (DL && SrcETy->isSized() && DL->getTypeStoreSize(SrcETy) == Size) {
+ if (SrcETy->isSized() && DL.getTypeStoreSize(SrcETy) == Size) {
// The SrcETy might be something like {{{double}}} or [1 x double]. Rip
// down through these levels if so.
SrcETy = reduceToSingleValueType(SrcETy);
@@ -155,7 +156,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
}
Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
- unsigned Alignment = getKnownAlignment(MI->getDest(), DL, AC, MI, DT);
+ unsigned Alignment = getKnownAlignment(MI->getDest(), DL, MI, AC, DT);
if (MI->getAlignment() < Alignment) {
MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
Alignment, false));
@@ -197,11 +198,137 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
return nullptr;
}
+static Value *SimplifyX86insertps(const IntrinsicInst &II,
+ InstCombiner::BuilderTy &Builder) {
+ if (auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
+ VectorType *VecTy = cast<VectorType>(II.getType());
+ assert(VecTy->getNumElements() == 4 && "insertps with wrong vector type");
+
+ // The immediate permute control byte looks like this:
+ // [3:0] - zero mask for each 32-bit lane
+ // [5:4] - select one 32-bit destination lane
+ // [7:6] - select one 32-bit source lane
+
+ uint8_t Imm = CInt->getZExtValue();
+ uint8_t ZMask = Imm & 0xf;
+ uint8_t DestLane = (Imm >> 4) & 0x3;
+ uint8_t SourceLane = (Imm >> 6) & 0x3;
+
+ ConstantAggregateZero *ZeroVector = ConstantAggregateZero::get(VecTy);
+
+ // If all zero mask bits are set, this was just a weird way to
+ // generate a zero vector.
+ if (ZMask == 0xf)
+ return ZeroVector;
+
+ // Initialize by passing all of the first source bits through.
+ int ShuffleMask[4] = { 0, 1, 2, 3 };
+
+ // We may replace the second operand with the zero vector.
+ Value *V1 = II.getArgOperand(1);
+
+ if (ZMask) {
+ // If the zero mask is being used with a single input or the zero mask
+ // overrides the destination lane, this is a shuffle with the zero vector.
+ if ((II.getArgOperand(0) == II.getArgOperand(1)) ||
+ (ZMask & (1 << DestLane))) {
+ V1 = ZeroVector;
+ // We may still move 32-bits of the first source vector from one lane
+ // to another.
+ ShuffleMask[DestLane] = SourceLane;
+ // The zero mask may override the previous insert operation.
+ for (unsigned i = 0; i < 4; ++i)
+ if ((ZMask >> i) & 0x1)
+ ShuffleMask[i] = i + 4;
+ } else {
+ // TODO: Model this case as 2 shuffles or a 'logical and' plus shuffle?
+ return nullptr;
+ }
+ } else {
+ // Replace the selected destination lane with the selected source lane.
+ ShuffleMask[DestLane] = SourceLane + 4;
+ }
+
+ return Builder.CreateShuffleVector(II.getArgOperand(0), V1, ShuffleMask);
+ }
+ return nullptr;
+}
+
+/// The shuffle mask for a perm2*128 selects any two halves of two 256-bit
+/// source vectors, unless a zero bit is set. If a zero bit is set,
+/// then ignore that half of the mask and clear that half of the vector.
+static Value *SimplifyX86vperm2(const IntrinsicInst &II,
+ InstCombiner::BuilderTy &Builder) {
+ if (auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
+ VectorType *VecTy = cast<VectorType>(II.getType());
+ ConstantAggregateZero *ZeroVector = ConstantAggregateZero::get(VecTy);
+
+ // The immediate permute control byte looks like this:
+ // [1:0] - select 128 bits from sources for low half of destination
+ // [2] - ignore
+ // [3] - zero low half of destination
+ // [5:4] - select 128 bits from sources for high half of destination
+ // [6] - ignore
+ // [7] - zero high half of destination
+
+ uint8_t Imm = CInt->getZExtValue();
+
+ bool LowHalfZero = Imm & 0x08;
+ bool HighHalfZero = Imm & 0x80;
+
+ // If both zero mask bits are set, this was just a weird way to
+ // generate a zero vector.
+ if (LowHalfZero && HighHalfZero)
+ return ZeroVector;
+
+ // If 0 or 1 zero mask bits are set, this is a simple shuffle.
+ unsigned NumElts = VecTy->getNumElements();
+ unsigned HalfSize = NumElts / 2;
+ SmallVector<int, 8> ShuffleMask(NumElts);
+
+ // The high bit of the selection field chooses the 1st or 2nd operand.
+ bool LowInputSelect = Imm & 0x02;
+ bool HighInputSelect = Imm & 0x20;
+
+ // The low bit of the selection field chooses the low or high half
+ // of the selected operand.
+ bool LowHalfSelect = Imm & 0x01;
+ bool HighHalfSelect = Imm & 0x10;
+
+ // Determine which operand(s) are actually in use for this instruction.
+ Value *V0 = LowInputSelect ? II.getArgOperand(1) : II.getArgOperand(0);
+ Value *V1 = HighInputSelect ? II.getArgOperand(1) : II.getArgOperand(0);
+
+ // If needed, replace operands based on zero mask.
+ V0 = LowHalfZero ? ZeroVector : V0;
+ V1 = HighHalfZero ? ZeroVector : V1;
+
+ // Permute low half of result.
+ unsigned StartIndex = LowHalfSelect ? HalfSize : 0;
+ for (unsigned i = 0; i < HalfSize; ++i)
+ ShuffleMask[i] = StartIndex + i;
+
+ // Permute high half of result.
+ StartIndex = HighHalfSelect ? HalfSize : 0;
+ StartIndex += NumElts;
+ for (unsigned i = 0; i < HalfSize; ++i)
+ ShuffleMask[i + HalfSize] = StartIndex + i;
+
+ return Builder.CreateShuffleVector(V0, V1, ShuffleMask);
+ }
+ return nullptr;
+}
+
/// visitCallInst - CallInst simplification. This mostly only handles folding
/// of intrinsic instructions. For normal calls, it allows visitCallSite to do
/// the heavy lifting.
///
Instruction *InstCombiner::visitCallInst(CallInst &CI) {
+ auto Args = CI.arg_operands();
+ if (Value *V = SimplifyCall(CI.getCalledValue(), Args.begin(), Args.end(), DL,
+ TLI, DT, AC))
+ return ReplaceInstUsesWith(CI, V);
+
if (isFreeCall(&CI, TLI))
return visitFree(CI);
@@ -350,112 +477,36 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
}
break;
- case Intrinsic::uadd_with_overflow: {
- Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
- OverflowResult OR = computeOverflowForUnsignedAdd(LHS, RHS, II);
- if (OR == OverflowResult::NeverOverflows)
- return CreateOverflowTuple(II, Builder->CreateNUWAdd(LHS, RHS), false);
- if (OR == OverflowResult::AlwaysOverflows)
- return CreateOverflowTuple(II, Builder->CreateAdd(LHS, RHS), true);
- }
- // FALL THROUGH uadd into sadd
+
+ case Intrinsic::uadd_with_overflow:
case Intrinsic::sadd_with_overflow:
- // Canonicalize constants into the RHS.
+ case Intrinsic::umul_with_overflow:
+ case Intrinsic::smul_with_overflow:
if (isa<Constant>(II->getArgOperand(0)) &&
!isa<Constant>(II->getArgOperand(1))) {
+ // Canonicalize constants into the RHS.
Value *LHS = II->getArgOperand(0);
II->setArgOperand(0, II->getArgOperand(1));
II->setArgOperand(1, LHS);
return II;
}
+ // fall through
- // X + undef -> undef
- if (isa<UndefValue>(II->getArgOperand(1)))
- return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
-
- if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
- // X + 0 -> {X, false}
- if (RHS->isZero()) {
- return CreateOverflowTuple(II, II->getArgOperand(0), false,
- /*ReUseName*/false);
- }
- }
-
- // We can strength reduce reduce this signed add into a regular add if we
- // can prove that it will never overflow.
- if (II->getIntrinsicID() == Intrinsic::sadd_with_overflow) {
- Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
- if (WillNotOverflowSignedAdd(LHS, RHS, II)) {
- return CreateOverflowTuple(II, Builder->CreateNSWAdd(LHS, RHS), false);
- }
- }
-
- break;
case Intrinsic::usub_with_overflow:
case Intrinsic::ssub_with_overflow: {
- Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
- // undef - X -> undef
- // X - undef -> undef
- if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS))
- return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
-
- if (ConstantInt *ConstRHS = dyn_cast<ConstantInt>(RHS)) {
- // X - 0 -> {X, false}
- if (ConstRHS->isZero()) {
- return CreateOverflowTuple(II, LHS, false, /*ReUseName*/false);
- }
- }
- if (II->getIntrinsicID() == Intrinsic::ssub_with_overflow) {
- if (WillNotOverflowSignedSub(LHS, RHS, II)) {
- return CreateOverflowTuple(II, Builder->CreateNSWSub(LHS, RHS), false);
- }
- } else {
- if (WillNotOverflowUnsignedSub(LHS, RHS, II)) {
- return CreateOverflowTuple(II, Builder->CreateNUWSub(LHS, RHS), false);
- }
- }
- break;
- }
- case Intrinsic::umul_with_overflow: {
- Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
- OverflowResult OR = computeOverflowForUnsignedMul(LHS, RHS, II);
- if (OR == OverflowResult::NeverOverflows)
- return CreateOverflowTuple(II, Builder->CreateNUWMul(LHS, RHS), false);
- if (OR == OverflowResult::AlwaysOverflows)
- return CreateOverflowTuple(II, Builder->CreateMul(LHS, RHS), true);
- } // FALL THROUGH
- case Intrinsic::smul_with_overflow:
- // Canonicalize constants into the RHS.
- if (isa<Constant>(II->getArgOperand(0)) &&
- !isa<Constant>(II->getArgOperand(1))) {
- Value *LHS = II->getArgOperand(0);
- II->setArgOperand(0, II->getArgOperand(1));
- II->setArgOperand(1, LHS);
- return II;
- }
-
- // X * undef -> undef
- if (isa<UndefValue>(II->getArgOperand(1)))
- return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
+ OverflowCheckFlavor OCF =
+ IntrinsicIDToOverflowCheckFlavor(II->getIntrinsicID());
+ assert(OCF != OCF_INVALID && "unexpected!");
- if (ConstantInt *RHSI = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
- // X*0 -> {0, false}
- if (RHSI->isZero())
- return ReplaceInstUsesWith(CI, Constant::getNullValue(II->getType()));
+ Value *OperationResult = nullptr;
+ Constant *OverflowResult = nullptr;
+ if (OptimizeOverflowCheck(OCF, II->getArgOperand(0), II->getArgOperand(1),
+ *II, OperationResult, OverflowResult))
+ return CreateOverflowTuple(II, OperationResult, OverflowResult);
- // X * 1 -> {X, false}
- if (RHSI->equalsInt(1)) {
- return CreateOverflowTuple(II, II->getArgOperand(0), false,
- /*ReUseName*/false);
- }
- }
- if (II->getIntrinsicID() == Intrinsic::smul_with_overflow) {
- Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
- if (WillNotOverflowSignedMul(LHS, RHS, II)) {
- return CreateOverflowTuple(II, Builder->CreateNSWMul(LHS, RHS), false);
- }
- }
break;
+ }
+
case Intrinsic::minnum:
case Intrinsic::maxnum: {
Value *Arg0 = II->getArgOperand(0);
@@ -543,7 +594,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::ppc_altivec_lvx:
case Intrinsic::ppc_altivec_lvxl:
// Turn PPC lvx -> load if the pointer is known aligned.
- if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, AC, II, DT) >=
+ if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, AC, DT) >=
16) {
Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
PointerType::getUnqual(II->getType()));
@@ -560,7 +611,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::ppc_altivec_stvx:
case Intrinsic::ppc_altivec_stvxl:
// Turn stvx -> store if the pointer is known aligned.
- if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, AC, II, DT) >=
+ if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, AC, DT) >=
16) {
Type *OpPtrTy =
PointerType::getUnqual(II->getArgOperand(0)->getType());
@@ -575,11 +626,54 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
return new StoreInst(II->getArgOperand(0), Ptr, false, 1);
}
+ case Intrinsic::ppc_qpx_qvlfs:
+ // Turn PPC QPX qvlfs -> load if the pointer is known aligned.
+ if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, AC, DT) >=
+ 16) {
+ Type *VTy = VectorType::get(Builder->getFloatTy(),
+ II->getType()->getVectorNumElements());
+ Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
+ PointerType::getUnqual(VTy));
+ Value *Load = Builder->CreateLoad(Ptr);
+ return new FPExtInst(Load, II->getType());
+ }
+ break;
+ case Intrinsic::ppc_qpx_qvlfd:
+ // Turn PPC QPX qvlfd -> load if the pointer is known aligned.
+ if (getOrEnforceKnownAlignment(II->getArgOperand(0), 32, DL, II, AC, DT) >=
+ 32) {
+ Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
+ PointerType::getUnqual(II->getType()));
+ return new LoadInst(Ptr);
+ }
+ break;
+ case Intrinsic::ppc_qpx_qvstfs:
+ // Turn PPC QPX qvstfs -> store if the pointer is known aligned.
+ if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, AC, DT) >=
+ 16) {
+ Type *VTy = VectorType::get(Builder->getFloatTy(),
+ II->getArgOperand(0)->getType()->getVectorNumElements());
+ Value *TOp = Builder->CreateFPTrunc(II->getArgOperand(0), VTy);
+ Type *OpPtrTy = PointerType::getUnqual(VTy);
+ Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
+ return new StoreInst(TOp, Ptr);
+ }
+ break;
+ case Intrinsic::ppc_qpx_qvstfd:
+ // Turn PPC QPX qvstfd -> store if the pointer is known aligned.
+ if (getOrEnforceKnownAlignment(II->getArgOperand(1), 32, DL, II, AC, DT) >=
+ 32) {
+ Type *OpPtrTy =
+ PointerType::getUnqual(II->getArgOperand(0)->getType());
+ Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
+ return new StoreInst(II->getArgOperand(0), Ptr);
+ }
+ break;
case Intrinsic::x86_sse_storeu_ps:
case Intrinsic::x86_sse2_storeu_pd:
case Intrinsic::x86_sse2_storeu_dq:
// Turn X86 storeu -> store if the pointer is known aligned.
- if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, AC, II, DT) >=
+ if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, AC, DT) >=
16) {
Type *OpPtrTy =
PointerType::getUnqual(II->getArgOperand(1)->getType());
@@ -696,15 +790,18 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
unsigned LowHalfElts = VWidth / 2;
APInt InputDemandedElts(APInt::getBitsSet(VWidth, 0, LowHalfElts));
APInt UndefElts(VWidth, 0);
- if (Value *TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0),
- InputDemandedElts,
- UndefElts)) {
+ if (Value *TmpV = SimplifyDemandedVectorElts(
+ II->getArgOperand(0), InputDemandedElts, UndefElts)) {
II->setArgOperand(0, TmpV);
return II;
}
break;
}
-
+ case Intrinsic::x86_sse41_insertps:
+ if (Value *V = SimplifyX86insertps(*II, *Builder))
+ return ReplaceInstUsesWith(*II, V);
+ break;
+
case Intrinsic::x86_sse4a_insertqi: {
// insertqi x, y, 64, 0 can just copy y's lower bits and leave the top
// ones undef
@@ -867,6 +964,14 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
return ReplaceInstUsesWith(CI, Shuffle);
}
+ case Intrinsic::x86_avx_vperm2f128_pd_256:
+ case Intrinsic::x86_avx_vperm2f128_ps_256:
+ case Intrinsic::x86_avx_vperm2f128_si_256:
+ case Intrinsic::x86_avx2_vperm2i128:
+ if (Value *V = SimplifyX86vperm2(*II, *Builder))
+ return ReplaceInstUsesWith(*II, V);
+ break;
+
case Intrinsic::ppc_altivec_vperm:
// Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
// Note that ppc_altivec_vperm has a big-endian bias, so when creating
@@ -906,12 +1011,12 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
unsigned Idx =
cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue();
Idx &= 31; // Match the hardware behavior.
- if (DL && DL->isLittleEndian())
+ if (DL.isLittleEndian())
Idx = 31 - Idx;
if (!ExtractedElts[Idx]) {
- Value *Op0ToUse = (DL && DL->isLittleEndian()) ? Op1 : Op0;
- Value *Op1ToUse = (DL && DL->isLittleEndian()) ? Op0 : Op1;
+ Value *Op0ToUse = (DL.isLittleEndian()) ? Op1 : Op0;
+ Value *Op1ToUse = (DL.isLittleEndian()) ? Op0 : Op1;
ExtractedElts[Idx] =
Builder->CreateExtractElement(Idx < 16 ? Op0ToUse : Op1ToUse,
Builder->getInt32(Idx&15));
@@ -940,7 +1045,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::arm_neon_vst2lane:
case Intrinsic::arm_neon_vst3lane:
case Intrinsic::arm_neon_vst4lane: {
- unsigned MemAlign = getKnownAlignment(II->getArgOperand(0), DL, AC, II, DT);
+ unsigned MemAlign = getKnownAlignment(II->getArgOperand(0), DL, II, AC, DT);
unsigned AlignArg = II->getNumArgOperands() - 1;
ConstantInt *IntrAlign = dyn_cast<ConstantInt>(II->getArgOperand(AlignArg));
if (IntrAlign && IntrAlign->getZExtValue() < MemAlign) {
@@ -1079,7 +1184,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
RHS->getType()->isPointerTy() &&
cast<Constant>(RHS)->isNullValue()) {
LoadInst* LI = cast<LoadInst>(LHS);
- if (isValidAssumeForContext(II, LI, DL, DT)) {
+ if (isValidAssumeForContext(II, LI, DT)) {
MDNode *MD = MDNode::get(II->getContext(), None);
LI->setMetadata(LLVMContext::MD_nonnull, MD);
return EraseInstFromFunction(*II);
@@ -1102,7 +1207,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// facts about the relocate value, while being careful to
// preserve relocation semantics.
GCRelocateOperands Operands(II);
- Value *DerivedPtr = Operands.derivedPtr();
+ Value *DerivedPtr = Operands.getDerivedPtr();
+ auto *GCRelocateType = cast<PointerType>(II->getType());
// Remove the relocation if unused, note that this check is required
// to prevent the cases below from looping forever.
@@ -1113,24 +1219,34 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// TODO: provide a hook for this in GCStrategy. This is clearly legal for
// most practical collectors, but there was discussion in the review thread
// about whether it was legal for all possible collectors.
- if (isa<UndefValue>(DerivedPtr))
- return ReplaceInstUsesWith(*II, DerivedPtr);
+ if (isa<UndefValue>(DerivedPtr)) {
+ // gc_relocate is uncasted. Use undef of gc_relocate's type to replace it.
+ return ReplaceInstUsesWith(*II, UndefValue::get(GCRelocateType));
+ }
// The relocation of null will be null for most any collector.
// TODO: provide a hook for this in GCStrategy. There might be some weird
// collector this property does not hold for.
- if (isa<ConstantPointerNull>(DerivedPtr))
- return ReplaceInstUsesWith(*II, DerivedPtr);
+ if (isa<ConstantPointerNull>(DerivedPtr)) {
+ // gc_relocate is uncasted. Use null-pointer of gc_relocate's type to replace it.
+ return ReplaceInstUsesWith(*II, ConstantPointerNull::get(GCRelocateType));
+ }
// isKnownNonNull -> nonnull attribute
if (isKnownNonNull(DerivedPtr))
II->addAttribute(AttributeSet::ReturnIndex, Attribute::NonNull);
- // TODO: dereferenceable -> deref attribute
+ // isDereferenceablePointer -> deref attribute
+ if (isDereferenceablePointer(DerivedPtr, DL)) {
+ if (Argument *A = dyn_cast<Argument>(DerivedPtr)) {
+ uint64_t Bytes = A->getDereferenceableBytes();
+ II->addDereferenceableAttr(AttributeSet::ReturnIndex, Bytes);
+ }
+ }
// TODO: bitcast(relocate(p)) -> relocate(bitcast(p))
// Canonicalize on the type from the uses to the defs
-
+
// TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...)
}
}
@@ -1147,8 +1263,8 @@ Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) {
/// isSafeToEliminateVarargsCast - If this cast does not affect the value
/// passed through the varargs area, we can eliminate the use of the cast.
static bool isSafeToEliminateVarargsCast(const CallSite CS,
- const CastInst * const CI,
- const DataLayout * const DL,
+ const DataLayout &DL,
+ const CastInst *const CI,
const int ix) {
if (!CI->isLosslessCast())
return false;
@@ -1172,7 +1288,7 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS,
Type* DstTy = cast<PointerType>(CI->getType())->getElementType();
if (!SrcTy->isSized() || !DstTy->isSized())
return false;
- if (!DL || DL->getTypeAllocSize(SrcTy) != DL->getTypeAllocSize(DstTy))
+ if (DL.getTypeAllocSize(SrcTy) != DL.getTypeAllocSize(DstTy))
return false;
return true;
}
@@ -1181,10 +1297,14 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS,
// Currently we're only working with the checking functions, memcpy_chk,
// mempcpy_chk, memmove_chk, memset_chk, strcpy_chk, stpcpy_chk, strncpy_chk,
// strcat_chk and strncat_chk.
-Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const DataLayout *DL) {
+Instruction *InstCombiner::tryOptimizeCall(CallInst *CI) {
if (!CI->getCalledFunction()) return nullptr;
- if (Value *With = Simplifier->optimizeCall(CI)) {
+ auto InstCombineRAUW = [this](Instruction *From, Value *With) {
+ ReplaceInstUsesWith(*From, With);
+ };
+ LibCallSimplifier Simplifier(DL, TLI, InstCombineRAUW);
+ if (Value *With = Simplifier.optimizeCall(CI)) {
++NumSimplified;
return CI->use_empty() ? CI : ReplaceInstUsesWith(*CI, With);
}
@@ -1342,7 +1462,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
for (CallSite::arg_iterator I = CS.arg_begin() + FTy->getNumParams(),
E = CS.arg_end(); I != E; ++I, ++ix) {
CastInst *CI = dyn_cast<CastInst>(*I);
- if (CI && isSafeToEliminateVarargsCast(CS, CI, DL, ix)) {
+ if (CI && isSafeToEliminateVarargsCast(CS, DL, CI, ix)) {
*I = CI->getOperand(0);
Changed = true;
}
@@ -1359,7 +1479,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
// this. None of these calls are seen as possibly dead so go ahead and
// delete the instruction now.
if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) {
- Instruction *I = tryOptimizeCall(CI, DL);
+ Instruction *I = tryOptimizeCall(CI);
// If we changed something return the result, etc. Otherwise let
// the fallthrough check.
if (I) return EraseInstFromFunction(*I);
@@ -1409,10 +1529,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
AttrBuilder RAttrs(CallerPAL, AttributeSet::ReturnIndex);
- if (RAttrs.
- hasAttributes(AttributeFuncs::
- typeIncompatible(NewRetTy, AttributeSet::ReturnIndex),
- AttributeSet::ReturnIndex))
+ if (RAttrs.overlaps(AttributeFuncs::typeIncompatible(NewRetTy)))
return false; // Attribute not compatible with transformed value.
}
@@ -1438,7 +1555,10 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
//
// into:
// call void @takes_i32_inalloca(i32* null)
- if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca))
+ //
+ // Similarly, avoid folding away bitcasts of byval calls.
+ if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca) ||
+ Callee->getAttributes().hasAttrSomewhere(Attribute::ByVal))
return false;
CallSite::arg_iterator AI = CS.arg_begin();
@@ -1450,8 +1570,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
return false; // Cannot transform this parameter value.
if (AttrBuilder(CallerPAL.getParamAttributes(i + 1), i + 1).
- hasAttributes(AttributeFuncs::
- typeIncompatible(ParamTy, i + 1), i + 1))
+ overlaps(AttributeFuncs::typeIncompatible(ParamTy)))
return false; // Attribute not compatible with transformed value.
if (CS.isInAllocaArgument(i))
@@ -1463,12 +1582,12 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
CallerPAL.getParamAttributes(i + 1).hasAttribute(i + 1,
Attribute::ByVal)) {
PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy);
- if (!ParamPTy || !ParamPTy->getElementType()->isSized() || !DL)
+ if (!ParamPTy || !ParamPTy->getElementType()->isSized())
return false;
Type *CurElTy = ActTy->getPointerElementType();
- if (DL->getTypeAllocSize(CurElTy) !=
- DL->getTypeAllocSize(ParamPTy->getElementType()))
+ if (DL.getTypeAllocSize(CurElTy) !=
+ DL.getTypeAllocSize(ParamPTy->getElementType()))
return false;
}
}
@@ -1524,10 +1643,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
// If the return value is not being used, the type may not be compatible
// with the existing attributes. Wipe out any problematic attributes.
- RAttrs.
- removeAttributes(AttributeFuncs::
- typeIncompatible(NewRetTy, AttributeSet::ReturnIndex),
- AttributeSet::ReturnIndex);
+ RAttrs.remove(AttributeFuncs::typeIncompatible(NewRetTy));
// Add the new return attributes.
if (RAttrs.hasAttributes())
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 5415726..48ab0eb 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -11,11 +11,11 @@
//
//===----------------------------------------------------------------------===//
-#include "InstCombine.h"
+#include "InstCombineInternal.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/PatternMatch.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
using namespace llvm;
using namespace PatternMatch;
@@ -80,9 +80,6 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
/// try to eliminate the cast by moving the type information into the alloc.
Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
AllocaInst &AI) {
- // This requires DataLayout to get the alloca alignment and size information.
- if (!DL) return nullptr;
-
PointerType *PTy = cast<PointerType>(CI.getType());
BuilderTy AllocaBuilder(*Builder);
@@ -93,8 +90,8 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
Type *CastElTy = PTy->getElementType();
if (!AllocElTy->isSized() || !CastElTy->isSized()) return nullptr;
- unsigned AllocElTyAlign = DL->getABITypeAlignment(AllocElTy);
- unsigned CastElTyAlign = DL->getABITypeAlignment(CastElTy);
+ unsigned AllocElTyAlign = DL.getABITypeAlignment(AllocElTy);
+ unsigned CastElTyAlign = DL.getABITypeAlignment(CastElTy);
if (CastElTyAlign < AllocElTyAlign) return nullptr;
// If the allocation has multiple uses, only promote it if we are strictly
@@ -102,14 +99,14 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
// same, we open the door to infinite loops of various kinds.
if (!AI.hasOneUse() && CastElTyAlign == AllocElTyAlign) return nullptr;
- uint64_t AllocElTySize = DL->getTypeAllocSize(AllocElTy);
- uint64_t CastElTySize = DL->getTypeAllocSize(CastElTy);
+ uint64_t AllocElTySize = DL.getTypeAllocSize(AllocElTy);
+ uint64_t CastElTySize = DL.getTypeAllocSize(CastElTy);
if (CastElTySize == 0 || AllocElTySize == 0) return nullptr;
// If the allocation has multiple uses, only promote it if we're not
// shrinking the amount of memory being allocated.
- uint64_t AllocElTyStoreSize = DL->getTypeStoreSize(AllocElTy);
- uint64_t CastElTyStoreSize = DL->getTypeStoreSize(CastElTy);
+ uint64_t AllocElTyStoreSize = DL.getTypeStoreSize(AllocElTy);
+ uint64_t CastElTyStoreSize = DL.getTypeStoreSize(CastElTy);
if (!AI.hasOneUse() && CastElTyStoreSize < AllocElTyStoreSize) return nullptr;
// See if we can satisfy the modulus by pulling a scale out of the array
@@ -215,7 +212,8 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
PHINode *OPN = cast<PHINode>(I);
PHINode *NPN = PHINode::Create(Ty, OPN->getNumIncomingValues());
for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) {
- Value *V =EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned);
+ Value *V =
+ EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned);
NPN->addIncoming(V, OPN->getIncomingBlock(i));
}
Res = NPN;
@@ -234,25 +232,22 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
/// This function is a wrapper around CastInst::isEliminableCastPair. It
/// simply extracts arguments and returns what that function returns.
static Instruction::CastOps
-isEliminableCastPair(
- const CastInst *CI, ///< The first cast instruction
- unsigned opcode, ///< The opcode of the second cast instruction
- Type *DstTy, ///< The target type for the second cast instruction
- const DataLayout *DL ///< The target data for pointer size
-) {
-
+isEliminableCastPair(const CastInst *CI, ///< First cast instruction
+ unsigned opcode, ///< Opcode for the second cast
+ Type *DstTy, ///< Target type for the second cast
+ const DataLayout &DL) {
Type *SrcTy = CI->getOperand(0)->getType(); // A from above
Type *MidTy = CI->getType(); // B from above
// Get the opcodes of the two Cast instructions
Instruction::CastOps firstOp = Instruction::CastOps(CI->getOpcode());
Instruction::CastOps secondOp = Instruction::CastOps(opcode);
- Type *SrcIntPtrTy = DL && SrcTy->isPtrOrPtrVectorTy() ?
- DL->getIntPtrType(SrcTy) : nullptr;
- Type *MidIntPtrTy = DL && MidTy->isPtrOrPtrVectorTy() ?
- DL->getIntPtrType(MidTy) : nullptr;
- Type *DstIntPtrTy = DL && DstTy->isPtrOrPtrVectorTy() ?
- DL->getIntPtrType(DstTy) : nullptr;
+ Type *SrcIntPtrTy =
+ SrcTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(SrcTy) : nullptr;
+ Type *MidIntPtrTy =
+ MidTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(MidTy) : nullptr;
+ Type *DstIntPtrTy =
+ DstTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(DstTy) : nullptr;
unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy,
DstTy, SrcIntPtrTy, MidIntPtrTy,
DstIntPtrTy);
@@ -298,7 +293,7 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
// eliminate it now.
if (CastInst *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast
if (Instruction::CastOps opc =
- isEliminableCastPair(CSrc, CI.getOpcode(), CI.getType(), DL)) {
+ isEliminableCastPair(CSrc, CI.getOpcode(), CI.getType(), DL)) {
// The first cast (CSrc) is eliminable so we need to fix up or replace
// the second cast (CI). CSrc will then have a good chance of being dead.
return CastInst::Create(opc, CSrc->getOperand(0), CI.getType());
@@ -314,8 +309,7 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
if (isa<PHINode>(Src)) {
// We don't do this if this would create a PHI node with an illegal type if
// it is currently legal.
- if (!Src->getType()->isIntegerTy() ||
- !CI.getType()->isIntegerTy() ||
+ if (!Src->getType()->isIntegerTy() || !CI.getType()->isIntegerTy() ||
ShouldChangeType(CI.getType(), Src->getType()))
if (Instruction *NV = FoldOpIntoPhi(CI))
return NV;
@@ -424,8 +418,8 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC,
// get into trouble with cyclic PHIs here because we only consider
// instructions with a single use.
PHINode *PN = cast<PHINode>(I);
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- if (!CanEvaluateTruncated(PN->getIncomingValue(i), Ty, IC, CxtI))
+ for (Value *IncValue : PN->incoming_values())
+ if (!CanEvaluateTruncated(IncValue, Ty, IC, CxtI))
return false;
return true;
}
@@ -441,6 +435,15 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
if (Instruction *Result = commonCastTransforms(CI))
return Result;
+ // Test if the trunc is the user of a select which is part of a
+ // minimum or maximum operation. If so, don't do any more simplification.
+ // Even simplifying demanded bits can break the canonical form of a
+ // min/max.
+ Value *LHS, *RHS;
+ if (SelectInst *SI = dyn_cast<SelectInst>(CI.getOperand(0)))
+ if (matchSelectPattern(SI, LHS, RHS) != SPF_UNKNOWN)
+ return nullptr;
+
// See if we can simplify any instructions used by the input whose sole
// purpose is to compute bits we don't care about.
if (SimplifyDemandedInstructionBits(CI))
@@ -1035,8 +1038,8 @@ static bool CanEvaluateSExtd(Value *V, Type *Ty) {
// get into trouble with cyclic PHIs here because we only consider
// instructions with a single use.
PHINode *PN = cast<PHINode>(I);
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- if (!CanEvaluateSExtd(PN->getIncomingValue(i), Ty)) return false;
+ for (Value *IncValue : PN->incoming_values())
+ if (!CanEvaluateSExtd(IncValue, Ty)) return false;
return true;
}
default:
@@ -1064,6 +1067,15 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
Value *Src = CI.getOperand(0);
Type *SrcTy = Src->getType(), *DestTy = CI.getType();
+ // If we know that the value being extended is positive, we can use a zext
+ // instead.
+ bool KnownZero, KnownOne;
+ ComputeSignBit(Src, KnownZero, KnownOne, 0, &CI);
+ if (KnownZero) {
+ Value *ZExt = Builder->CreateZExt(Src, DestTy);
+ return ReplaceInstUsesWith(CI, ZExt);
+ }
+
// Attempt to extend the entire input expression tree to the destination
// type. Only do this if the dest type is a simple type, don't convert the
// expression tree to something weird like i93 unless the source is also
@@ -1332,22 +1344,57 @@ Instruction *InstCombiner::visitFPExt(CastInst &CI) {
return commonCastTransforms(CI);
}
+// fpto{s/u}i({u/s}itofp(X)) --> X or zext(X) or sext(X) or trunc(X)
+// This is safe if the intermediate type has enough bits in its mantissa to
+// accurately represent all values of X. For example, this won't work with
+// i64 -> float -> i64.
+Instruction *InstCombiner::FoldItoFPtoI(Instruction &FI) {
+ if (!isa<UIToFPInst>(FI.getOperand(0)) && !isa<SIToFPInst>(FI.getOperand(0)))
+ return nullptr;
+ Instruction *OpI = cast<Instruction>(FI.getOperand(0));
+
+ Value *SrcI = OpI->getOperand(0);
+ Type *FITy = FI.getType();
+ Type *OpITy = OpI->getType();
+ Type *SrcTy = SrcI->getType();
+ bool IsInputSigned = isa<SIToFPInst>(OpI);
+ bool IsOutputSigned = isa<FPToSIInst>(FI);
+
+ // We can safely assume the conversion won't overflow the output range,
+ // because (for example) (uint8_t)18293.f is undefined behavior.
+
+ // Since we can assume the conversion won't overflow, our decision as to
+ // whether the input will fit in the float should depend on the minimum
+ // of the input range and output range.
+
+ // This means this is also safe for a signed input and unsigned output, since
+ // a negative input would lead to undefined behavior.
+ int InputSize = (int)SrcTy->getScalarSizeInBits() - IsInputSigned;
+ int OutputSize = (int)FITy->getScalarSizeInBits() - IsOutputSigned;
+ int ActualSize = std::min(InputSize, OutputSize);
+
+ if (ActualSize <= OpITy->getFPMantissaWidth()) {
+ if (FITy->getScalarSizeInBits() > SrcTy->getScalarSizeInBits()) {
+ if (IsInputSigned && IsOutputSigned)
+ return new SExtInst(SrcI, FITy);
+ return new ZExtInst(SrcI, FITy);
+ }
+ if (FITy->getScalarSizeInBits() < SrcTy->getScalarSizeInBits())
+ return new TruncInst(SrcI, FITy);
+ if (SrcTy == FITy)
+ return ReplaceInstUsesWith(FI, SrcI);
+ return new BitCastInst(SrcI, FITy);
+ }
+ return nullptr;
+}
+
Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) {
Instruction *OpI = dyn_cast<Instruction>(FI.getOperand(0));
if (!OpI)
return commonCastTransforms(FI);
- // fptoui(uitofp(X)) --> X
- // fptoui(sitofp(X)) --> X
- // This is safe if the intermediate type has enough bits in its mantissa to
- // accurately represent all values of X. For example, do not do this with
- // i64->float->i64. This is also safe for sitofp case, because any negative
- // 'X' value would cause an undefined result for the fptoui.
- if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) &&
- OpI->getOperand(0)->getType() == FI.getType() &&
- (int)FI.getType()->getScalarSizeInBits() < /*extra bit for sign */
- OpI->getType()->getFPMantissaWidth())
- return ReplaceInstUsesWith(FI, OpI->getOperand(0));
+ if (Instruction *I = FoldItoFPtoI(FI))
+ return I;
return commonCastTransforms(FI);
}
@@ -1357,17 +1404,8 @@ Instruction *InstCombiner::visitFPToSI(FPToSIInst &FI) {
if (!OpI)
return commonCastTransforms(FI);
- // fptosi(sitofp(X)) --> X
- // fptosi(uitofp(X)) --> X
- // This is safe if the intermediate type has enough bits in its mantissa to
- // accurately represent all values of X. For example, do not do this with
- // i64->float->i64. This is also safe for sitofp case, because any negative
- // 'X' value would cause an undefined result for the fptoui.
- if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) &&
- OpI->getOperand(0)->getType() == FI.getType() &&
- (int)FI.getType()->getScalarSizeInBits() <=
- OpI->getType()->getFPMantissaWidth())
- return ReplaceInstUsesWith(FI, OpI->getOperand(0));
+ if (Instruction *I = FoldItoFPtoI(FI))
+ return I;
return commonCastTransforms(FI);
}
@@ -1384,18 +1422,15 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {
// If the source integer type is not the intptr_t type for this target, do a
// trunc or zext to the intptr_t type, then inttoptr of it. This allows the
// cast to be exposed to other transforms.
-
- if (DL) {
- unsigned AS = CI.getAddressSpace();
- if (CI.getOperand(0)->getType()->getScalarSizeInBits() !=
- DL->getPointerSizeInBits(AS)) {
- Type *Ty = DL->getIntPtrType(CI.getContext(), AS);
- if (CI.getType()->isVectorTy()) // Handle vectors of pointers.
- Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements());
-
- Value *P = Builder->CreateZExtOrTrunc(CI.getOperand(0), Ty);
- return new IntToPtrInst(P, CI.getType());
- }
+ unsigned AS = CI.getAddressSpace();
+ if (CI.getOperand(0)->getType()->getScalarSizeInBits() !=
+ DL.getPointerSizeInBits(AS)) {
+ Type *Ty = DL.getIntPtrType(CI.getContext(), AS);
+ if (CI.getType()->isVectorTy()) // Handle vectors of pointers.
+ Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements());
+
+ Value *P = Builder->CreateZExtOrTrunc(CI.getOperand(0), Ty);
+ return new IntToPtrInst(P, CI.getType());
}
if (Instruction *I = commonCastTransforms(CI))
@@ -1424,41 +1459,6 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
CI.setOperand(0, GEP->getOperand(0));
return &CI;
}
-
- if (!DL)
- return commonCastTransforms(CI);
-
- // If the GEP has a single use, and the base pointer is a bitcast, and the
- // GEP computes a constant offset, see if we can convert these three
- // instructions into fewer. This typically happens with unions and other
- // non-type-safe code.
- unsigned AS = GEP->getPointerAddressSpace();
- unsigned OffsetBits = DL->getPointerSizeInBits(AS);
- APInt Offset(OffsetBits, 0);
- BitCastInst *BCI = dyn_cast<BitCastInst>(GEP->getOperand(0));
- if (GEP->hasOneUse() &&
- BCI &&
- GEP->accumulateConstantOffset(*DL, Offset)) {
- // Get the base pointer input of the bitcast, and the type it points to.
- Value *OrigBase = BCI->getOperand(0);
- SmallVector<Value*, 8> NewIndices;
- if (FindElementAtOffset(OrigBase->getType(),
- Offset.getSExtValue(),
- NewIndices)) {
- // If we were able to index down into an element, create the GEP
- // and bitcast the result. This eliminates one bitcast, potentially
- // two.
- Value *NGEP = cast<GEPOperator>(GEP)->isInBounds() ?
- Builder->CreateInBoundsGEP(OrigBase, NewIndices) :
- Builder->CreateGEP(OrigBase, NewIndices);
- NGEP->takeName(GEP);
-
- if (isa<BitCastInst>(CI))
- return new BitCastInst(NGEP, CI.getType());
- assert(isa<PtrToIntInst>(CI));
- return new PtrToIntInst(NGEP, CI.getType());
- }
- }
}
return commonCastTransforms(CI);
@@ -1469,16 +1469,13 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
// do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast
// to be exposed to other transforms.
- if (!DL)
- return commonPointerCastTransforms(CI);
-
Type *Ty = CI.getType();
unsigned AS = CI.getPointerAddressSpace();
- if (Ty->getScalarSizeInBits() == DL->getPointerSizeInBits(AS))
+ if (Ty->getScalarSizeInBits() == DL.getPointerSizeInBits(AS))
return commonPointerCastTransforms(CI);
- Type *PtrTy = DL->getIntPtrType(CI.getContext(), AS);
+ Type *PtrTy = DL.getIntPtrType(CI.getContext(), AS);
if (Ty->isVectorTy()) // Handle vectors of pointers.
PtrTy = VectorType::get(PtrTy, Ty->getVectorNumElements());
@@ -1562,8 +1559,8 @@ static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) {
/// This returns false if the pattern can't be matched or true if it can,
/// filling in Elements with the elements found here.
static bool CollectInsertionElements(Value *V, unsigned Shift,
- SmallVectorImpl<Value*> &Elements,
- Type *VecEltTy, InstCombiner &IC) {
+ SmallVectorImpl<Value *> &Elements,
+ Type *VecEltTy, bool isBigEndian) {
assert(isMultipleOfTypeSize(Shift, VecEltTy) &&
"Shift should be a multiple of the element type size");
@@ -1579,7 +1576,7 @@ static bool CollectInsertionElements(Value *V, unsigned Shift,
return true;
unsigned ElementIndex = getTypeSizeIndex(Shift, VecEltTy);
- if (IC.getDataLayout()->isBigEndian())
+ if (isBigEndian)
ElementIndex = Elements.size() - ElementIndex - 1;
// Fail if multiple elements are inserted into this slot.
@@ -1599,7 +1596,7 @@ static bool CollectInsertionElements(Value *V, unsigned Shift,
// it to the right type so it gets properly inserted.
if (NumElts == 1)
return CollectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy),
- Shift, Elements, VecEltTy, IC);
+ Shift, Elements, VecEltTy, isBigEndian);
// Okay, this is a constant that covers multiple elements. Slice it up into
// pieces and insert each element-sized piece into the vector.
@@ -1614,7 +1611,8 @@ static bool CollectInsertionElements(Value *V, unsigned Shift,
Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(),
ShiftI));
Piece = ConstantExpr::getTrunc(Piece, ElementIntTy);
- if (!CollectInsertionElements(Piece, ShiftI, Elements, VecEltTy, IC))
+ if (!CollectInsertionElements(Piece, ShiftI, Elements, VecEltTy,
+ isBigEndian))
return false;
}
return true;
@@ -1627,28 +1625,28 @@ static bool CollectInsertionElements(Value *V, unsigned Shift,
switch (I->getOpcode()) {
default: return false; // Unhandled case.
case Instruction::BitCast:
- return CollectInsertionElements(I->getOperand(0), Shift,
- Elements, VecEltTy, IC);
+ return CollectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
+ isBigEndian);
case Instruction::ZExt:
if (!isMultipleOfTypeSize(
I->getOperand(0)->getType()->getPrimitiveSizeInBits(),
VecEltTy))
return false;
- return CollectInsertionElements(I->getOperand(0), Shift,
- Elements, VecEltTy, IC);
+ return CollectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
+ isBigEndian);
case Instruction::Or:
- return CollectInsertionElements(I->getOperand(0), Shift,
- Elements, VecEltTy, IC) &&
- CollectInsertionElements(I->getOperand(1), Shift,
- Elements, VecEltTy, IC);
+ return CollectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
+ isBigEndian) &&
+ CollectInsertionElements(I->getOperand(1), Shift, Elements, VecEltTy,
+ isBigEndian);
case Instruction::Shl: {
// Must be shifting by a constant that is a multiple of the element size.
ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
if (!CI) return false;
Shift += CI->getZExtValue();
if (!isMultipleOfTypeSize(Shift, VecEltTy)) return false;
- return CollectInsertionElements(I->getOperand(0), Shift,
- Elements, VecEltTy, IC);
+ return CollectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
+ isBigEndian);
}
}
@@ -1671,15 +1669,13 @@ static bool CollectInsertionElements(Value *V, unsigned Shift,
/// Into two insertelements that do "buildvector{%inc, %inc5}".
static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI,
InstCombiner &IC) {
- // We need to know the target byte order to perform this optimization.
- if (!IC.getDataLayout()) return nullptr;
-
VectorType *DestVecTy = cast<VectorType>(CI.getType());
Value *IntInput = CI.getOperand(0);
SmallVector<Value*, 8> Elements(DestVecTy->getNumElements());
if (!CollectInsertionElements(IntInput, 0, Elements,
- DestVecTy->getElementType(), IC))
+ DestVecTy->getElementType(),
+ IC.getDataLayout().isBigEndian()))
return nullptr;
// If we succeeded, we know that all of the element are specified by Elements
@@ -1699,10 +1695,8 @@ static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI,
/// OptimizeIntToFloatBitCast - See if we can optimize an integer->float/double
/// bitcast. The various long double bitcasts can't get in here.
-static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
- // We need to know the target byte order to perform this optimization.
- if (!IC.getDataLayout()) return nullptr;
-
+static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI, InstCombiner &IC,
+ const DataLayout &DL) {
Value *Src = CI.getOperand(0);
Type *DestTy = CI.getType();
@@ -1725,7 +1719,7 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
}
unsigned Elt = 0;
- if (IC.getDataLayout()->isBigEndian())
+ if (DL.isBigEndian())
Elt = VecTy->getPrimitiveSizeInBits() / DestWidth - 1;
return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
}
@@ -1749,7 +1743,7 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
}
unsigned Elt = ShAmt->getZExtValue() / DestWidth;
- if (IC.getDataLayout()->isBigEndian())
+ if (DL.isBigEndian())
Elt = VecTy->getPrimitiveSizeInBits() / DestWidth - 1 - Elt;
return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
}
@@ -1785,26 +1779,24 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
// If the source and destination are pointers, and this cast is equivalent
// to a getelementptr X, 0, 0, 0... turn it into the appropriate gep.
// This can enhance SROA and other transforms that want type-safe pointers.
- Constant *ZeroUInt =
- Constant::getNullValue(Type::getInt32Ty(CI.getContext()));
unsigned NumZeros = 0;
while (SrcElTy != DstElTy &&
isa<CompositeType>(SrcElTy) && !SrcElTy->isPointerTy() &&
SrcElTy->getNumContainedTypes() /* not "{}" */) {
- SrcElTy = cast<CompositeType>(SrcElTy)->getTypeAtIndex(ZeroUInt);
+ SrcElTy = cast<CompositeType>(SrcElTy)->getTypeAtIndex(0U);
++NumZeros;
}
// If we found a path from the src to dest, create the getelementptr now.
if (SrcElTy == DstElTy) {
- SmallVector<Value*, 8> Idxs(NumZeros+1, ZeroUInt);
+ SmallVector<Value *, 8> Idxs(NumZeros + 1, Builder->getInt32(0));
return GetElementPtrInst::CreateInBounds(Src, Idxs);
}
}
// Try to optimize int -> float bitcasts.
if ((DestTy->isFloatTy() || DestTy->isDoubleTy()) && isa<IntegerType>(SrcTy))
- if (Instruction *I = OptimizeIntToFloatBitCast(CI, *this))
+ if (Instruction *I = OptimizeIntToFloatBitCast(CI, *this, DL))
return I;
if (VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) {
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index c07c96d..2dafa58 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -11,7 +11,7 @@
//
//===----------------------------------------------------------------------===//
-#include "InstCombine.h"
+#include "InstCombineInternal.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ConstantFolding.h"
@@ -24,7 +24,7 @@
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
using namespace llvm;
using namespace PatternMatch;
@@ -229,10 +229,6 @@ static void ComputeUnsignedMinMaxValuesFromKnownBits(const APInt &KnownZero,
Instruction *InstCombiner::
FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
CmpInst &ICI, ConstantInt *AndCst) {
- // We need TD information to know the pointer size unless this is inbounds.
- if (!GEP->isInBounds() && !DL)
- return nullptr;
-
Constant *Init = GV->getInitializer();
if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init))
return nullptr;
@@ -303,7 +299,6 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
// the array, this will fully represent all the comparison results.
uint64_t MagicBitvector = 0;
-
// Scan the array and see if one of our patterns matches.
Constant *CompareRHS = cast<Constant>(ICI.getOperand(1));
for (unsigned i = 0, e = ArrayElementCount; i != e; ++i) {
@@ -398,7 +393,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
// index down like the GEP would do implicitly. We don't have to do this for
// an inbounds GEP because the index can't be out of range.
if (!GEP->isInBounds()) {
- Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
+ Type *IntPtrTy = DL.getIntPtrType(GEP->getType());
unsigned PtrSize = IntPtrTy->getIntegerBitWidth();
if (Idx->getType()->getPrimitiveSizeInBits() > PtrSize)
Idx = Builder->CreateTrunc(Idx, IntPtrTy);
@@ -487,10 +482,8 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
// - Default to i32
if (ArrayElementCount <= Idx->getType()->getIntegerBitWidth())
Ty = Idx->getType();
- else if (DL)
- Ty = DL->getSmallestLegalIntType(Init->getContext(), ArrayElementCount);
- else if (ArrayElementCount <= 32)
- Ty = Type::getInt32Ty(Init->getContext());
+ else
+ Ty = DL.getSmallestLegalIntType(Init->getContext(), ArrayElementCount);
if (Ty) {
Value *V = Builder->CreateIntCast(Idx, Ty, false);
@@ -514,8 +507,8 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
///
/// If we can't emit an optimized form for this expression, this returns null.
///
-static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) {
- const DataLayout &DL = *IC.getDataLayout();
+static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC,
+ const DataLayout &DL) {
gep_type_iterator GTI = gep_type_begin(GEP);
// Check to see if this gep only has a single variable index. If so, and if
@@ -628,12 +621,12 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
RHS = RHS->stripPointerCasts();
Value *PtrBase = GEPLHS->getOperand(0);
- if (DL && PtrBase == RHS && GEPLHS->isInBounds()) {
+ if (PtrBase == RHS && GEPLHS->isInBounds()) {
// ((gep Ptr, OFFSET) cmp Ptr) ---> (OFFSET cmp 0).
// This transformation (ignoring the base and scales) is valid because we
// know pointers can't overflow since the gep is inbounds. See if we can
// output an optimized form.
- Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, *this);
+ Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, *this, DL);
// If not, synthesize the offset the hard way.
if (!Offset)
@@ -661,11 +654,11 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
// If we're comparing GEPs with two base pointers that only differ in type
// and both GEPs have only constant indices or just one use, then fold
// the compare with the adjusted indices.
- if (DL && GEPLHS->isInBounds() && GEPRHS->isInBounds() &&
+ if (GEPLHS->isInBounds() && GEPRHS->isInBounds() &&
(GEPLHS->hasAllConstantIndices() || GEPLHS->hasOneUse()) &&
(GEPRHS->hasAllConstantIndices() || GEPRHS->hasOneUse()) &&
PtrBase->stripPointerCasts() ==
- GEPRHS->getOperand(0)->stripPointerCasts()) {
+ GEPRHS->getOperand(0)->stripPointerCasts()) {
Value *LOffset = EmitGEPOffset(GEPLHS);
Value *ROffset = EmitGEPOffset(GEPRHS);
@@ -733,9 +726,7 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
// Only lower this if the icmp is the only user of the GEP or if we expect
// the result to fold to a constant!
- if (DL &&
- GEPsInBounds &&
- (isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) &&
+ if (GEPsInBounds && (isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) &&
(isa<ConstantExpr>(GEPRHS) || GEPRHS->hasOneUse())) {
// ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2) ---> (OFFSET1 cmp OFFSET2)
Value *L = EmitGEPOffset(GEPLHS);
@@ -1928,17 +1919,20 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
// Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the
// integer type is the same size as the pointer type.
- if (DL && LHSCI->getOpcode() == Instruction::PtrToInt &&
- DL->getPointerTypeSizeInBits(SrcTy) == DestTy->getIntegerBitWidth()) {
+ if (LHSCI->getOpcode() == Instruction::PtrToInt &&
+ DL.getPointerTypeSizeInBits(SrcTy) == DestTy->getIntegerBitWidth()) {
Value *RHSOp = nullptr;
- if (Constant *RHSC = dyn_cast<Constant>(ICI.getOperand(1))) {
+ if (PtrToIntOperator *RHSC = dyn_cast<PtrToIntOperator>(ICI.getOperand(1))) {
+ Value *RHSCIOp = RHSC->getOperand(0);
+ if (RHSCIOp->getType()->getPointerAddressSpace() ==
+ LHSCIOp->getType()->getPointerAddressSpace()) {
+ RHSOp = RHSC->getOperand(0);
+ // If the pointer types don't match, insert a bitcast.
+ if (LHSCIOp->getType() != RHSOp->getType())
+ RHSOp = Builder->CreateBitCast(RHSOp, LHSCIOp->getType());
+ }
+ } else if (Constant *RHSC = dyn_cast<Constant>(ICI.getOperand(1)))
RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy);
- } else if (PtrToIntInst *RHSC = dyn_cast<PtrToIntInst>(ICI.getOperand(1))) {
- RHSOp = RHSC->getOperand(0);
- // If the pointer types don't match, insert a bitcast.
- if (LHSCIOp->getType() != RHSOp->getType())
- RHSOp = Builder->CreateBitCast(RHSOp, LHSCIOp->getType());
- }
if (RHSOp)
return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSOp);
@@ -2103,7 +2097,7 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
Value *TruncA = Builder->CreateTrunc(A, NewType, A->getName()+".trunc");
Value *TruncB = Builder->CreateTrunc(B, NewType, B->getName()+".trunc");
- CallInst *Call = Builder->CreateCall2(F, TruncA, TruncB, "sadd");
+ CallInst *Call = Builder->CreateCall(F, {TruncA, TruncB}, "sadd");
Value *Add = Builder->CreateExtractValue(Call, 0, "sadd.result");
Value *ZExt = Builder->CreateZExt(Add, OrigAdd->getType());
@@ -2115,33 +2109,94 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
return ExtractValueInst::Create(Call, 1, "sadd.overflow");
}
-static Instruction *ProcessUAddIdiom(Instruction &I, Value *OrigAddV,
- InstCombiner &IC) {
- // Don't bother doing this transformation for pointers, don't do it for
- // vectors.
- if (!isa<IntegerType>(OrigAddV->getType())) return nullptr;
+bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS,
+ Value *RHS, Instruction &OrigI,
+ Value *&Result, Constant *&Overflow) {
+ assert((!OrigI.isCommutative() ||
+ !(isa<Constant>(LHS) && !isa<Constant>(RHS))) &&
+ "call with a constant RHS if possible!");
+
+ auto SetResult = [&](Value *OpResult, Constant *OverflowVal, bool ReuseName) {
+ Result = OpResult;
+ Overflow = OverflowVal;
+ if (ReuseName)
+ Result->takeName(&OrigI);
+ return true;
+ };
- // If the add is a constant expr, then we don't bother transforming it.
- Instruction *OrigAdd = dyn_cast<Instruction>(OrigAddV);
- if (!OrigAdd) return nullptr;
+ switch (OCF) {
+ case OCF_INVALID:
+ llvm_unreachable("bad overflow check kind!");
- Value *LHS = OrigAdd->getOperand(0), *RHS = OrigAdd->getOperand(1);
+ case OCF_UNSIGNED_ADD: {
+ OverflowResult OR = computeOverflowForUnsignedAdd(LHS, RHS, &OrigI);
+ if (OR == OverflowResult::NeverOverflows)
+ return SetResult(Builder->CreateNUWAdd(LHS, RHS), Builder->getFalse(),
+ true);
- // Put the new code above the original add, in case there are any uses of the
- // add between the add and the compare.
- InstCombiner::BuilderTy *Builder = IC.Builder;
- Builder->SetInsertPoint(OrigAdd);
+ if (OR == OverflowResult::AlwaysOverflows)
+ return SetResult(Builder->CreateAdd(LHS, RHS), Builder->getTrue(), true);
+ }
+ // FALL THROUGH uadd into sadd
+ case OCF_SIGNED_ADD: {
+ // X + 0 -> {X, false}
+ if (match(RHS, m_Zero()))
+ return SetResult(LHS, Builder->getFalse(), false);
+
+ // We can strength reduce this signed add into a regular add if we can prove
+ // that it will never overflow.
+ if (OCF == OCF_SIGNED_ADD)
+ if (WillNotOverflowSignedAdd(LHS, RHS, OrigI))
+ return SetResult(Builder->CreateNSWAdd(LHS, RHS), Builder->getFalse(),
+ true);
+ }
- Module *M = I.getParent()->getParent()->getParent();
- Type *Ty = LHS->getType();
- Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty);
- CallInst *Call = Builder->CreateCall2(F, LHS, RHS, "uadd");
- Value *Add = Builder->CreateExtractValue(Call, 0);
+ case OCF_UNSIGNED_SUB:
+ case OCF_SIGNED_SUB: {
+ // X - 0 -> {X, false}
+ if (match(RHS, m_Zero()))
+ return SetResult(LHS, Builder->getFalse(), false);
+
+ if (OCF == OCF_SIGNED_SUB) {
+ if (WillNotOverflowSignedSub(LHS, RHS, OrigI))
+ return SetResult(Builder->CreateNSWSub(LHS, RHS), Builder->getFalse(),
+ true);
+ } else {
+ if (WillNotOverflowUnsignedSub(LHS, RHS, OrigI))
+ return SetResult(Builder->CreateNUWSub(LHS, RHS), Builder->getFalse(),
+ true);
+ }
+ break;
+ }
- IC.ReplaceInstUsesWith(*OrigAdd, Add);
+ case OCF_UNSIGNED_MUL: {
+ OverflowResult OR = computeOverflowForUnsignedMul(LHS, RHS, &OrigI);
+ if (OR == OverflowResult::NeverOverflows)
+ return SetResult(Builder->CreateNUWMul(LHS, RHS), Builder->getFalse(),
+ true);
+ if (OR == OverflowResult::AlwaysOverflows)
+ return SetResult(Builder->CreateMul(LHS, RHS), Builder->getTrue(), true);
+ } // FALL THROUGH
+ case OCF_SIGNED_MUL:
+ // X * undef -> undef
+ if (isa<UndefValue>(RHS))
+ return SetResult(RHS, UndefValue::get(Builder->getInt1Ty()), false);
+
+ // X * 0 -> {0, false}
+ if (match(RHS, m_Zero()))
+ return SetResult(RHS, Builder->getFalse(), false);
+
+ // X * 1 -> {X, false}
+ if (match(RHS, m_One()))
+ return SetResult(LHS, Builder->getFalse(), false);
+
+ if (OCF == OCF_SIGNED_MUL)
+ if (WillNotOverflowSignedMul(LHS, RHS, OrigI))
+ return SetResult(Builder->CreateNSWMul(LHS, RHS), Builder->getFalse(),
+ true);
+ }
- // The original icmp gets replaced with the overflow value.
- return ExtractValueInst::Create(Call, 1, "uadd.overflow");
+ return false;
}
/// \brief Recognize and process idiom involving test for multiplication
@@ -2311,7 +2366,7 @@ static Instruction *ProcessUMulZExtIdiom(ICmpInst &I, Value *MulVal,
MulB = Builder->CreateZExt(B, MulType);
Value *F =
Intrinsic::getDeclaration(M, Intrinsic::umul_with_overflow, MulType);
- CallInst *Call = Builder->CreateCall2(F, MulA, MulB, "umul");
+ CallInst *Call = Builder->CreateCall(F, {MulA, MulB}, "umul");
IC.Worklist.Add(MulInstr);
// If there are uses of mul result other than the comparison, we know that
@@ -2657,8 +2712,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
unsigned BitWidth = 0;
if (Ty->isIntOrIntVectorTy())
BitWidth = Ty->getScalarSizeInBits();
- else if (DL) // Pointers require DL info to get their size.
- BitWidth = DL->getTypeSizeInBits(Ty->getScalarType());
+ else // Get pointer size.
+ BitWidth = DL.getTypeSizeInBits(Ty->getScalarType());
bool isSignBit = false;
@@ -2771,8 +2826,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
Op0KnownZero, Op0KnownOne, 0))
return &I;
if (SimplifyDemandedBits(I.getOperandUse(1),
- APInt::getAllOnesValue(BitWidth),
- Op1KnownZero, Op1KnownOne, 0))
+ APInt::getAllOnesValue(BitWidth), Op1KnownZero,
+ Op1KnownOne, 0))
return &I;
// Given the known and unknown bits, compute a range that the LHS could be
@@ -3091,9 +3146,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
}
case Instruction::IntToPtr:
// icmp pred inttoptr(X), null -> icmp pred X, 0
- if (RHSC->isNullValue() && DL &&
- DL->getIntPtrType(RHSC->getType()) ==
- LHSI->getOperand(0)->getType())
+ if (RHSC->isNullValue() &&
+ DL.getIntPtrType(RHSC->getType()) == LHSI->getOperand(0)->getType())
return new ICmpInst(I.getPredicate(), LHSI->getOperand(0),
Constant::getNullValue(LHSI->getOperand(0)->getType()));
break;
@@ -3425,7 +3479,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
// if A is a power of 2.
if (match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) &&
match(Op1, m_Zero()) &&
- isKnownToBeAPowerOfTwo(A, false, 0, AC, &I, DT) && I.isEquality())
+ isKnownToBeAPowerOfTwo(A, DL, false, 0, AC, &I, DT) && I.isEquality())
return new ICmpInst(I.getInversePredicate(),
Builder->CreateAnd(A, B),
Op1);
@@ -3439,21 +3493,18 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
return new ICmpInst(I.getPredicate(), ConstantExpr::getNot(RHSC), A);
}
- // (a+b) <u a --> llvm.uadd.with.overflow.
- // (a+b) <u b --> llvm.uadd.with.overflow.
- if (I.getPredicate() == ICmpInst::ICMP_ULT &&
- match(Op0, m_Add(m_Value(A), m_Value(B))) &&
- (Op1 == A || Op1 == B))
- if (Instruction *R = ProcessUAddIdiom(I, Op0, *this))
- return R;
-
- // a >u (a+b) --> llvm.uadd.with.overflow.
- // b >u (a+b) --> llvm.uadd.with.overflow.
- if (I.getPredicate() == ICmpInst::ICMP_UGT &&
- match(Op1, m_Add(m_Value(A), m_Value(B))) &&
- (Op0 == A || Op0 == B))
- if (Instruction *R = ProcessUAddIdiom(I, Op1, *this))
- return R;
+ Instruction *AddI = nullptr;
+ if (match(&I, m_UAddWithOverflow(m_Value(A), m_Value(B),
+ m_Instruction(AddI))) &&
+ isa<IntegerType>(A->getType())) {
+ Value *Result;
+ Constant *Overflow;
+ if (OptimizeOverflowCheck(OCF_UNSIGNED_ADD, A, B, *AddI, Result,
+ Overflow)) {
+ ReplaceInstUsesWith(*AddI, Result);
+ return ReplaceInstUsesWith(I, Overflow);
+ }
+ }
// (zext a) * (zext b) --> llvm.umul.with.overflow.
if (match(Op0, m_Mul(m_ZExt(m_Value(A)), m_ZExt(m_Value(B))))) {
@@ -3560,6 +3611,21 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
}
}
+ // (A << C) == (B << C) --> ((A^B) & (~0U >> C)) == 0
+ if (match(Op0, m_OneUse(m_Shl(m_Value(A), m_ConstantInt(Cst1)))) &&
+ match(Op1, m_OneUse(m_Shl(m_Value(B), m_Specific(Cst1))))) {
+ unsigned TypeBits = Cst1->getBitWidth();
+ unsigned ShAmt = (unsigned)Cst1->getLimitedValue(TypeBits);
+ if (ShAmt < TypeBits && ShAmt != 0) {
+ Value *Xor = Builder->CreateXor(A, B, I.getName() + ".unshifted");
+ APInt AndVal = APInt::getLowBitsSet(TypeBits, TypeBits - ShAmt);
+ Value *And = Builder->CreateAnd(Xor, Builder->getInt(AndVal),
+ I.getName() + ".mask");
+ return new ICmpInst(I.getPredicate(), And,
+ Constant::getNullValue(Cst1->getType()));
+ }
+ }
+
// Transform "icmp eq (trunc (lshr(X, cst1)), cst" to
// "icmp (and X, mask), cst"
uint64_t ShAmt = 0;
@@ -3886,6 +3952,19 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
}
}
+ // Test if the FCmpInst instruction is used exclusively by a select as
+ // part of a minimum or maximum operation. If so, refrain from doing
+ // any other folding. This helps out other analyses which understand
+ // non-obfuscated minimum and maximum idioms, such as ScalarEvolution
+ // and CodeGen. And in this case, at least one of the comparison
+ // operands has at least one user besides the compare (the select),
+ // which would often largely negate the benefit of folding anyway.
+ if (I.hasOneUse())
+ if (SelectInst *SI = dyn_cast<SelectInst>(*I.user_begin()))
+ if ((SI->getOperand(1) == Op0 && SI->getOperand(2) == Op1) ||
+ (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1))
+ return nullptr;
+
// Handle fcmp with constant RHS
if (Constant *RHSC = dyn_cast<Constant>(Op1)) {
if (Instruction *LHSI = dyn_cast<Instruction>(Op0))
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h b/contrib/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 3c3c135..97ea8df 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -1,4 +1,4 @@
-//===- InstCombine.h - Main InstCombine pass definition ---------*- C++ -*-===//
+//===- InstCombineInternal.h - InstCombine pass internals -------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -6,12 +6,17 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file provides internal interfaces used to implement the InstCombine.
+///
+//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_TRANSFORMS_INSTCOMBINE_INSTCOMBINE_H
-#define LLVM_LIB_TRANSFORMS_INSTCOMBINE_INSTCOMBINE_H
+#ifndef LLVM_LIB_TRANSFORMS_INSTCOMBINE_INSTCOMBINEINTERNAL_H
+#define LLVM_LIB_TRANSFORMS_INSTCOMBINE_INSTCOMBINEINTERNAL_H
-#include "InstCombineWorklist.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetFolder.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Dominators.h"
@@ -21,7 +26,7 @@
#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Pass.h"
-#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
+#include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
#define DEBUG_TYPE "instcombine"
@@ -34,20 +39,15 @@ class DbgDeclareInst;
class MemIntrinsic;
class MemSetInst;
-/// SelectPatternFlavor - We can match a variety of different patterns for
-/// select operations.
-enum SelectPatternFlavor {
- SPF_UNKNOWN = 0,
- SPF_SMIN,
- SPF_UMIN,
- SPF_SMAX,
- SPF_UMAX,
- SPF_ABS,
- SPF_NABS
-};
-
-/// getComplexity: Assign a complexity or rank value to LLVM Values...
-/// 0 -> undef, 1 -> Const, 2 -> Other, 3 -> Arg, 3 -> Unary, 4 -> OtherInst
+/// \brief Assign a complexity or rank value to LLVM Values.
+///
+/// This routine maps IR values to various complexity ranks:
+/// 0 -> undef
+/// 1 -> Constants
+/// 2 -> Other non-instructions
+/// 3 -> Arguments
+/// 3 -> Unary operations
+/// 4 -> Other instructions
static inline unsigned getComplexity(Value *V) {
if (isa<Instruction>(V)) {
if (BinaryOperator::isNeg(V) || BinaryOperator::isFNeg(V) ||
@@ -60,18 +60,82 @@ static inline unsigned getComplexity(Value *V) {
return isa<Constant>(V) ? (isa<UndefValue>(V) ? 0 : 1) : 2;
}
-/// AddOne - Add one to a Constant
+/// \brief Add one to a Constant
static inline Constant *AddOne(Constant *C) {
return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1));
}
-/// SubOne - Subtract one from a Constant
+/// \brief Subtract one from a Constant
static inline Constant *SubOne(Constant *C) {
return ConstantExpr::getSub(C, ConstantInt::get(C->getType(), 1));
}
-/// InstCombineIRInserter - This is an IRBuilder insertion helper that works
-/// just like the normal insertion helper, but also adds any new instructions
-/// to the instcombine worklist.
+/// \brief Return true if the specified value is free to invert (apply ~ to).
+/// This happens in cases where the ~ can be eliminated. If WillInvertAllUses
+/// is true, work under the assumption that the caller intends to remove all
+/// uses of V and only keep uses of ~V.
+///
+static inline bool IsFreeToInvert(Value *V, bool WillInvertAllUses) {
+ // ~(~(X)) -> X.
+ if (BinaryOperator::isNot(V))
+ return true;
+
+ // Constants can be considered to be not'ed values.
+ if (isa<ConstantInt>(V))
+ return true;
+
+ // Compares can be inverted if all of their uses are being modified to use the
+ // ~V.
+ if (isa<CmpInst>(V))
+ return WillInvertAllUses;
+
+ // If `V` is of the form `A + Constant` then `-1 - V` can be folded into `(-1
+ // - Constant) - A` if we are willing to invert all of the uses.
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V))
+ if (BO->getOpcode() == Instruction::Add ||
+ BO->getOpcode() == Instruction::Sub)
+ if (isa<Constant>(BO->getOperand(0)) || isa<Constant>(BO->getOperand(1)))
+ return WillInvertAllUses;
+
+ return false;
+}
+
+
+/// \brief Specific patterns of overflow check idioms that we match.
+enum OverflowCheckFlavor {
+ OCF_UNSIGNED_ADD,
+ OCF_SIGNED_ADD,
+ OCF_UNSIGNED_SUB,
+ OCF_SIGNED_SUB,
+ OCF_UNSIGNED_MUL,
+ OCF_SIGNED_MUL,
+
+ OCF_INVALID
+};
+
+/// \brief Returns the OverflowCheckFlavor corresponding to a overflow_with_op
+/// intrinsic.
+static inline OverflowCheckFlavor
+IntrinsicIDToOverflowCheckFlavor(unsigned ID) {
+ switch (ID) {
+ default:
+ return OCF_INVALID;
+ case Intrinsic::uadd_with_overflow:
+ return OCF_UNSIGNED_ADD;
+ case Intrinsic::sadd_with_overflow:
+ return OCF_SIGNED_ADD;
+ case Intrinsic::usub_with_overflow:
+ return OCF_UNSIGNED_SUB;
+ case Intrinsic::ssub_with_overflow:
+ return OCF_SIGNED_SUB;
+ case Intrinsic::umul_with_overflow:
+ return OCF_UNSIGNED_MUL;
+ case Intrinsic::smul_with_overflow:
+ return OCF_SIGNED_MUL;
+ }
+}
+
+/// \brief An IRBuilder inserter that adds new instructions to the instcombine
+/// worklist.
class LLVM_LIBRARY_VISIBILITY InstCombineIRInserter
: public IRBuilderDefaultInserter<true> {
InstCombineWorklist &Worklist;
@@ -92,47 +156,60 @@ public:
}
};
-/// InstCombiner - The -instcombine pass.
+/// \brief The core instruction combiner logic.
+///
+/// This class provides both the logic to recursively visit instructions and
+/// combine them, as well as the pass infrastructure for running this as part
+/// of the LLVM pass pipeline.
class LLVM_LIBRARY_VISIBILITY InstCombiner
- : public FunctionPass,
- public InstVisitor<InstCombiner, Instruction *> {
- AssumptionCache *AC;
- const DataLayout *DL;
- TargetLibraryInfo *TLI;
- DominatorTree *DT;
- bool MadeIRChange;
- LibCallSimplifier *Simplifier;
- bool MinimizeSize;
-
+ : public InstVisitor<InstCombiner, Instruction *> {
+ // FIXME: These members shouldn't be public.
public:
- /// Worklist - All of the instructions that need to be simplified.
- InstCombineWorklist Worklist;
+ /// \brief A worklist of the instructions that need to be simplified.
+ InstCombineWorklist &Worklist;
- /// Builder - This is an IRBuilder that automatically inserts new
- /// instructions into the worklist when they are created.
+ /// \brief An IRBuilder that automatically inserts new instructions into the
+ /// worklist.
typedef IRBuilder<true, TargetFolder, InstCombineIRInserter> BuilderTy;
BuilderTy *Builder;
- static char ID; // Pass identification, replacement for typeid
- InstCombiner()
- : FunctionPass(ID), DL(nullptr), DT(nullptr), Builder(nullptr) {
- MinimizeSize = false;
- initializeInstCombinerPass(*PassRegistry::getPassRegistry());
- }
+private:
+ // Mode in which we are running the combiner.
+ const bool MinimizeSize;
-public:
- bool runOnFunction(Function &F) override;
+ // Required analyses.
+ // FIXME: These can never be null and should be references.
+ AssumptionCache *AC;
+ TargetLibraryInfo *TLI;
+ DominatorTree *DT;
+ const DataLayout &DL;
+
+ // Optional analyses. When non-null, these can both be used to do better
+ // combining and will be updated to reflect any changes.
+ LoopInfo *LI;
+
+ bool MadeIRChange;
- bool DoOneIteration(Function &F, unsigned ItNum);
+public:
+ InstCombiner(InstCombineWorklist &Worklist, BuilderTy *Builder,
+ bool MinimizeSize, AssumptionCache *AC, TargetLibraryInfo *TLI,
+ DominatorTree *DT, const DataLayout &DL, LoopInfo *LI)
+ : Worklist(Worklist), Builder(Builder), MinimizeSize(MinimizeSize),
+ AC(AC), TLI(TLI), DT(DT), DL(DL), LI(LI), MadeIRChange(false) {}
- void getAnalysisUsage(AnalysisUsage &AU) const override;
+ /// \brief Run the combiner over the entire worklist until it is empty.
+ ///
+ /// \returns true if the IR is changed.
+ bool run();
AssumptionCache *getAssumptionCache() const { return AC; }
- const DataLayout *getDataLayout() const { return DL; }
-
+ const DataLayout &getDataLayout() const { return DL; }
+
DominatorTree *getDominatorTree() const { return DT; }
+ LoopInfo *getLoopInfo() const { return LI; }
+
TargetLibraryInfo *getTargetLibraryInfo() const { return TLI; }
// Visitation implementation - Implement instruction combining for different
@@ -222,6 +299,7 @@ public:
Instruction *FoldSPFofSPF(Instruction *Inner, SelectPatternFlavor SPF1,
Value *A, Value *B, Instruction &Outer,
SelectPatternFlavor SPF2, Value *C);
+ Instruction *FoldItoFPtoI(Instruction &FI);
Instruction *visitSelectInst(SelectInst &SI);
Instruction *visitSelectInstWithICmp(SelectInst &SI, ICmpInst *ICI);
Instruction *visitCallInst(CallInst &CI);
@@ -262,37 +340,51 @@ private:
bool ShouldChangeType(Type *From, Type *To) const;
Value *dyn_castNegVal(Value *V) const;
Value *dyn_castFNegVal(Value *V, bool NoSignedZero = false) const;
- Type *FindElementAtOffset(Type *PtrTy, int64_t Offset,
+ Type *FindElementAtOffset(PointerType *PtrTy, int64_t Offset,
SmallVectorImpl<Value *> &NewIndices);
Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI);
- /// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually
- /// results in any code being generated and is interesting to optimize out. If
- /// the cast can be eliminated by some other simple transformation, we prefer
- /// to do the simplification first.
+ /// \brief Classify whether a cast is worth optimizing.
+ ///
+ /// Returns true if the cast from "V to Ty" actually results in any code
+ /// being generated and is interesting to optimize out. If the cast can be
+ /// eliminated by some other simple transformation, we prefer to do the
+ /// simplification first.
bool ShouldOptimizeCast(Instruction::CastOps opcode, const Value *V,
Type *Ty);
+ /// \brief Try to optimize a sequence of instructions checking if an operation
+ /// on LHS and RHS overflows.
+ ///
+ /// If a simplification is possible, stores the simplified result of the
+ /// operation in OperationResult and result of the overflow check in
+ /// OverflowResult, and return true. If no simplification is possible,
+ /// returns false.
+ bool OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS, Value *RHS,
+ Instruction &CtxI, Value *&OperationResult,
+ Constant *&OverflowResult);
+
Instruction *visitCallSite(CallSite CS);
- Instruction *tryOptimizeCall(CallInst *CI, const DataLayout *DL);
+ Instruction *tryOptimizeCall(CallInst *CI);
bool transformConstExprCastCall(CallSite CS);
Instruction *transformCallThroughTrampoline(CallSite CS,
IntrinsicInst *Tramp);
Instruction *transformZExtICmp(ICmpInst *ICI, Instruction &CI,
bool DoXform = true);
Instruction *transformSExtICmp(ICmpInst *ICI, Instruction &CI);
- bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS, Instruction *CxtI);
- bool WillNotOverflowSignedSub(Value *LHS, Value *RHS, Instruction *CxtI);
- bool WillNotOverflowUnsignedSub(Value *LHS, Value *RHS, Instruction *CxtI);
- bool WillNotOverflowSignedMul(Value *LHS, Value *RHS, Instruction *CxtI);
+ bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS, Instruction &CxtI);
+ bool WillNotOverflowSignedSub(Value *LHS, Value *RHS, Instruction &CxtI);
+ bool WillNotOverflowUnsignedSub(Value *LHS, Value *RHS, Instruction &CxtI);
+ bool WillNotOverflowSignedMul(Value *LHS, Value *RHS, Instruction &CxtI);
Value *EmitGEPOffset(User *GEP);
Instruction *scalarizePHI(ExtractElementInst &EI, PHINode *PN);
Value *EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask);
public:
- // InsertNewInstBefore - insert an instruction New before instruction Old
- // in the program. Add the new instruction to the worklist.
- //
+ /// \brief Inserts an instruction \p New before instruction \p Old
+ ///
+ /// Also adds the new instruction to the worklist and returns \p New so that
+ /// it is suitable for use as the return from the visitation patterns.
Instruction *InsertNewInstBefore(Instruction *New, Instruction &Old) {
assert(New && !New->getParent() &&
"New instruction already inserted into a basic block!");
@@ -302,21 +394,23 @@ public:
return New;
}
- // InsertNewInstWith - same as InsertNewInstBefore, but also sets the
- // debug loc.
- //
+ /// \brief Same as InsertNewInstBefore, but also sets the debug loc.
Instruction *InsertNewInstWith(Instruction *New, Instruction &Old) {
New->setDebugLoc(Old.getDebugLoc());
return InsertNewInstBefore(New, Old);
}
- // ReplaceInstUsesWith - This method is to be used when an instruction is
- // found to be dead, replacable with another preexisting expression. Here
- // we add all uses of I to the worklist, replace all uses of I with the new
- // value, then return I, so that the inst combiner will know that I was
- // modified.
- //
+ /// \brief A combiner-aware RAUW-like routine.
+ ///
+ /// This method is to be used when an instruction is found to be dead,
+ /// replacable with another preexisting expression. Here we add all uses of
+ /// I to the worklist, replace all uses of I with the new value, then return
+ /// I, so that the inst combiner will know that I was modified.
Instruction *ReplaceInstUsesWith(Instruction &I, Value *V) {
+ // If there are no uses to replace, then we return nullptr to indicate that
+ // no changes were made to the program.
+ if (I.use_empty()) return nullptr;
+
Worklist.AddUsersToWorkList(I); // Add all modified instrs to worklist.
// If we are replacing the instruction with itself, this must be in a
@@ -325,30 +419,27 @@ public:
V = UndefValue::get(I.getType());
DEBUG(dbgs() << "IC: Replacing " << I << "\n"
- " with " << *V << '\n');
+ << " with " << *V << '\n');
I.replaceAllUsesWith(V);
return &I;
}
/// Creates a result tuple for an overflow intrinsic \p II with a given
- /// \p Result and a constant \p Overflow value. If \p ReUseName is true the
- /// \p Result's name is taken from \p II.
+ /// \p Result and a constant \p Overflow value.
Instruction *CreateOverflowTuple(IntrinsicInst *II, Value *Result,
- bool Overflow, bool ReUseName = true) {
- if (ReUseName)
- Result->takeName(II);
- Constant *V[] = { UndefValue::get(Result->getType()),
- Overflow ? Builder->getTrue() : Builder->getFalse() };
+ Constant *Overflow) {
+ Constant *V[] = {UndefValue::get(Result->getType()), Overflow};
StructType *ST = cast<StructType>(II->getType());
Constant *Struct = ConstantStruct::get(ST, V);
return InsertValueInst::Create(Struct, Result, 0);
}
-
- // EraseInstFromFunction - When dealing with an instruction that has side
- // effects or produces a void value, we can't rely on DCE to delete the
- // instruction. Instead, visit methods should return the value returned by
- // this function.
+
+ /// \brief Combiner aware instruction erasure.
+ ///
+ /// When dealing with an instruction that has side effects or produces a void
+ /// value, we can't rely on DCE to delete the instruction. Instead, visit
+ /// methods should return the value returned by this function.
Instruction *EraseInstFromFunction(Instruction &I) {
DEBUG(dbgs() << "IC: ERASE " << I << '\n');
@@ -367,13 +458,12 @@ public:
}
void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
- unsigned Depth = 0, Instruction *CxtI = nullptr) const {
+ unsigned Depth, Instruction *CxtI) const {
return llvm::computeKnownBits(V, KnownZero, KnownOne, DL, Depth, AC, CxtI,
DT);
}
- bool MaskedValueIsZero(Value *V, const APInt &Mask,
- unsigned Depth = 0,
+ bool MaskedValueIsZero(Value *V, const APInt &Mask, unsigned Depth = 0,
Instruction *CxtI = nullptr) const {
return llvm::MaskedValueIsZero(V, Mask, DL, Depth, AC, CxtI, DT);
}
@@ -396,22 +486,24 @@ public:
}
private:
- /// SimplifyAssociativeOrCommutative - This performs a few simplifications for
- /// operators which are associative or commutative.
+ /// \brief Performs a few simplifications for operators which are associative
+ /// or commutative.
bool SimplifyAssociativeOrCommutative(BinaryOperator &I);
- /// SimplifyUsingDistributiveLaws - This tries to simplify binary operations
- /// which some other binary operation distributes over either by factorizing
- /// out common terms (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this
- /// results in simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is
- /// a win). Returns the simplified value, or null if it didn't simplify.
+ /// \brief Tries to simplify binary operations which some other binary
+ /// operation distributes over.
+ ///
+ /// It does this by either by factorizing out common terms (eg "(A*B)+(A*C)"
+ /// -> "A*(B+C)") or expanding out if this results in simplifications (eg: "A
+ /// & (B | C) -> (A&B) | (A&C)" if this is a win). Returns the simplified
+ /// value, or null if it didn't simplify.
Value *SimplifyUsingDistributiveLaws(BinaryOperator &I);
- /// SimplifyDemandedUseBits - Attempts to replace V with a simpler value
- /// based on the demanded bits.
+ /// \brief Attempts to replace V with a simpler value based on the demanded
+ /// bits.
Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, APInt &KnownZero,
APInt &KnownOne, unsigned Depth,
- Instruction *CxtI = nullptr);
+ Instruction *CxtI);
bool SimplifyDemandedBits(Use &U, APInt DemandedMask, APInt &KnownZero,
APInt &KnownOne, unsigned Depth = 0);
/// Helper routine of SimplifyDemandedUseBits. It tries to simplify demanded
@@ -420,9 +512,8 @@ private:
APInt DemandedMask, APInt &KnownZero,
APInt &KnownOne);
- /// SimplifyDemandedInstructionBits - Inst is an integer instruction that
- /// SimplifyDemandedBits knows about. See if the instruction has any
- /// properties that allow us to simplify its operands.
+ /// \brief Tries to simplify operands to an integer instruction based on its
+ /// demanded bits.
bool SimplifyDemandedInstructionBits(Instruction &Inst);
Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
@@ -438,9 +529,8 @@ private:
//
Instruction *FoldOpIntoPhi(Instruction &I);
- // FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary"
- // operator and they all are only used by the PHI, PHI together their
- // inputs, and do the operation once, to the result of the PHI.
+ /// \brief Try to rotate an operation below a PHI node, using PHI nodes for
+ /// its operands.
Instruction *FoldPHIArgOpIntoPHI(PHINode &PN);
Instruction *FoldPHIArgBinOpIntoPHI(PHINode &PN);
Instruction *FoldPHIArgGEPIntoPHI(PHINode &PN);
@@ -461,8 +551,9 @@ private:
Value *EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned);
- /// Descale - Return a value X such that Val = X * Scale, or null if none. If
- /// the multiplication is known not to overflow then NoSignedWrap is set.
+ /// \brief Returns a value X such that Val = X * Scale, or null if none.
+ ///
+ /// If the multiplication is known not to overflow then NoSignedWrap is set.
Value *Descale(Value *Val, APInt Scale, bool &NoSignedWrap);
};
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 6230c00..5aa59c6 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -11,12 +11,13 @@
//
//===----------------------------------------------------------------------===//
-#include "InstCombine.h"
+#include "InstCombineInternal.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -83,7 +84,7 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
continue;
}
- if (CallSite CS = I) {
+ if (auto CS = CallSite(I)) {
// If this is the function being called then we treat it like a load and
// ignore it.
if (CS.isCallee(&U))
@@ -163,62 +164,75 @@ isOnlyCopiedFromConstantGlobal(AllocaInst *AI,
return nullptr;
}
-Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
- // Ensure that the alloca array size argument has type intptr_t, so that
- // any casting is exposed early.
- if (DL) {
- Type *IntPtrTy = DL->getIntPtrType(AI.getType());
- if (AI.getArraySize()->getType() != IntPtrTy) {
- Value *V = Builder->CreateIntCast(AI.getArraySize(),
- IntPtrTy, false);
- AI.setOperand(0, V);
- return &AI;
- }
+static Instruction *simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI) {
+ // Check for array size of 1 (scalar allocation).
+ if (!AI.isArrayAllocation()) {
+ // i32 1 is the canonical array size for scalar allocations.
+ if (AI.getArraySize()->getType()->isIntegerTy(32))
+ return nullptr;
+
+ // Canonicalize it.
+ Value *V = IC.Builder->getInt32(1);
+ AI.setOperand(0, V);
+ return &AI;
}
// Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
- if (AI.isArrayAllocation()) { // Check C != 1
- if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
- Type *NewTy =
- ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
- AllocaInst *New = Builder->CreateAlloca(NewTy, nullptr, AI.getName());
- New->setAlignment(AI.getAlignment());
-
- // Scan to the end of the allocation instructions, to skip over a block of
- // allocas if possible...also skip interleaved debug info
- //
- BasicBlock::iterator It = New;
- while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It)) ++It;
-
- // Now that I is pointing to the first non-allocation-inst in the block,
- // insert our getelementptr instruction...
- //
- Type *IdxTy = DL
- ? DL->getIntPtrType(AI.getType())
- : Type::getInt64Ty(AI.getContext());
- Value *NullIdx = Constant::getNullValue(IdxTy);
- Value *Idx[2] = { NullIdx, NullIdx };
- Instruction *GEP =
+ if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
+ Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
+ AllocaInst *New = IC.Builder->CreateAlloca(NewTy, nullptr, AI.getName());
+ New->setAlignment(AI.getAlignment());
+
+ // Scan to the end of the allocation instructions, to skip over a block of
+ // allocas if possible...also skip interleaved debug info
+ //
+ BasicBlock::iterator It = New;
+ while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It))
+ ++It;
+
+ // Now that I is pointing to the first non-allocation-inst in the block,
+ // insert our getelementptr instruction...
+ //
+ Type *IdxTy = IC.getDataLayout().getIntPtrType(AI.getType());
+ Value *NullIdx = Constant::getNullValue(IdxTy);
+ Value *Idx[2] = {NullIdx, NullIdx};
+ Instruction *GEP =
GetElementPtrInst::CreateInBounds(New, Idx, New->getName() + ".sub");
- InsertNewInstBefore(GEP, *It);
+ IC.InsertNewInstBefore(GEP, *It);
- // Now make everything use the getelementptr instead of the original
- // allocation.
- return ReplaceInstUsesWith(AI, GEP);
- } else if (isa<UndefValue>(AI.getArraySize())) {
- return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
- }
+ // Now make everything use the getelementptr instead of the original
+ // allocation.
+ return IC.ReplaceInstUsesWith(AI, GEP);
+ }
+
+ if (isa<UndefValue>(AI.getArraySize()))
+ return IC.ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
+
+ // Ensure that the alloca array size argument has type intptr_t, so that
+ // any casting is exposed early.
+ Type *IntPtrTy = IC.getDataLayout().getIntPtrType(AI.getType());
+ if (AI.getArraySize()->getType() != IntPtrTy) {
+ Value *V = IC.Builder->CreateIntCast(AI.getArraySize(), IntPtrTy, false);
+ AI.setOperand(0, V);
+ return &AI;
}
- if (DL && AI.getAllocatedType()->isSized()) {
+ return nullptr;
+}
+
+Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
+ if (auto *I = simplifyAllocaArraySize(*this, AI))
+ return I;
+
+ if (AI.getAllocatedType()->isSized()) {
// If the alignment is 0 (unspecified), assign it the preferred alignment.
if (AI.getAlignment() == 0)
- AI.setAlignment(DL->getPrefTypeAlignment(AI.getAllocatedType()));
+ AI.setAlignment(DL.getPrefTypeAlignment(AI.getAllocatedType()));
// Move all alloca's of zero byte objects to the entry block and merge them
// together. Note that we only do this for alloca's, because malloc should
// allocate and return a unique pointer, even for a zero byte allocation.
- if (DL->getTypeAllocSize(AI.getAllocatedType()) == 0) {
+ if (DL.getTypeAllocSize(AI.getAllocatedType()) == 0) {
// For a zero sized alloca there is no point in doing an array allocation.
// This is helpful if the array size is a complicated expression not used
// elsewhere.
@@ -236,7 +250,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
// dominance as the array size was forced to a constant earlier already.
AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst);
if (!EntryAI || !EntryAI->getAllocatedType()->isSized() ||
- DL->getTypeAllocSize(EntryAI->getAllocatedType()) != 0) {
+ DL.getTypeAllocSize(EntryAI->getAllocatedType()) != 0) {
AI.moveBefore(FirstInst);
return &AI;
}
@@ -245,7 +259,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
// assign it the preferred alignment.
if (EntryAI->getAlignment() == 0)
EntryAI->setAlignment(
- DL->getPrefTypeAlignment(EntryAI->getAllocatedType()));
+ DL.getPrefTypeAlignment(EntryAI->getAllocatedType()));
// Replace this zero-sized alloca with the one at the start of the entry
// block after ensuring that the address will be aligned enough for both
// types.
@@ -269,7 +283,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
SmallVector<Instruction *, 4> ToDelete;
if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) {
unsigned SourceAlign = getOrEnforceKnownAlignment(
- Copy->getSource(), AI.getAlignment(), DL, AC, &AI, DT);
+ Copy->getSource(), AI.getAlignment(), DL, &AI, AC, DT);
if (AI.getAlignment() <= SourceAlign) {
DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
DEBUG(dbgs() << " memcpy = " << *Copy << '\n');
@@ -300,7 +314,8 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
///
/// Note that this will create all of the instructions with whatever insert
/// point the \c InstCombiner currently is using.
-static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewTy) {
+static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewTy,
+ const Twine &Suffix = "") {
Value *Ptr = LI.getPointerOperand();
unsigned AS = LI.getPointerAddressSpace();
SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
@@ -308,7 +323,8 @@ static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewT
LoadInst *NewLoad = IC.Builder->CreateAlignedLoad(
IC.Builder->CreateBitCast(Ptr, NewTy->getPointerTo(AS)),
- LI.getAlignment(), LI.getName());
+ LI.getAlignment(), LI.getName() + Suffix);
+ MDBuilder MDB(NewLoad->getContext());
for (const auto &MDPair : MD) {
unsigned ID = MDPair.first;
MDNode *N = MDPair.second;
@@ -335,21 +351,81 @@ static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewT
break;
case LLVMContext::MD_nonnull:
- // FIXME: We should translate this into range metadata for integer types
- // and vice versa.
- if (NewTy->isPointerTy())
+ // This only directly applies if the new type is also a pointer.
+ if (NewTy->isPointerTy()) {
NewLoad->setMetadata(ID, N);
+ break;
+ }
+ // If it's integral now, translate it to !range metadata.
+ if (NewTy->isIntegerTy()) {
+ auto *ITy = cast<IntegerType>(NewTy);
+ auto *NullInt = ConstantExpr::getPtrToInt(
+ ConstantPointerNull::get(cast<PointerType>(Ptr->getType())), ITy);
+ auto *NonNullInt =
+ ConstantExpr::getAdd(NullInt, ConstantInt::get(ITy, 1));
+ NewLoad->setMetadata(LLVMContext::MD_range,
+ MDB.createRange(NonNullInt, NullInt));
+ }
break;
case LLVMContext::MD_range:
// FIXME: It would be nice to propagate this in some way, but the type
- // conversions make it hard.
+ // conversions make it hard. If the new type is a pointer, we could
+ // translate it to !nonnull metadata.
break;
}
}
return NewLoad;
}
+/// \brief Combine a store to a new type.
+///
+/// Returns the newly created store instruction.
+static StoreInst *combineStoreToNewValue(InstCombiner &IC, StoreInst &SI, Value *V) {
+ Value *Ptr = SI.getPointerOperand();
+ unsigned AS = SI.getPointerAddressSpace();
+ SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
+ SI.getAllMetadata(MD);
+
+ StoreInst *NewStore = IC.Builder->CreateAlignedStore(
+ V, IC.Builder->CreateBitCast(Ptr, V->getType()->getPointerTo(AS)),
+ SI.getAlignment());
+ for (const auto &MDPair : MD) {
+ unsigned ID = MDPair.first;
+ MDNode *N = MDPair.second;
+ // Note, essentially every kind of metadata should be preserved here! This
+ // routine is supposed to clone a store instruction changing *only its
+ // type*. The only metadata it makes sense to drop is metadata which is
+ // invalidated when the pointer type changes. This should essentially
+ // never be the case in LLVM, but we explicitly switch over only known
+ // metadata to be conservatively correct. If you are adding metadata to
+ // LLVM which pertains to stores, you almost certainly want to add it
+ // here.
+ switch (ID) {
+ case LLVMContext::MD_dbg:
+ case LLVMContext::MD_tbaa:
+ case LLVMContext::MD_prof:
+ case LLVMContext::MD_fpmath:
+ case LLVMContext::MD_tbaa_struct:
+ case LLVMContext::MD_alias_scope:
+ case LLVMContext::MD_noalias:
+ case LLVMContext::MD_nontemporal:
+ case LLVMContext::MD_mem_parallel_loop_access:
+ // All of these directly apply.
+ NewStore->setMetadata(ID, N);
+ break;
+
+ case LLVMContext::MD_invariant_load:
+ case LLVMContext::MD_nonnull:
+ case LLVMContext::MD_range:
+ // These don't apply for stores.
+ break;
+ }
+ }
+
+ return NewStore;
+}
+
/// \brief Combine loads to match the type of value their uses after looking
/// through intervening bitcasts.
///
@@ -376,6 +452,35 @@ static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) {
if (LI.use_empty())
return nullptr;
+ Type *Ty = LI.getType();
+ const DataLayout &DL = IC.getDataLayout();
+
+ // Try to canonicalize loads which are only ever stored to operate over
+ // integers instead of any other type. We only do this when the loaded type
+ // is sized and has a size exactly the same as its store size and the store
+ // size is a legal integer type.
+ if (!Ty->isIntegerTy() && Ty->isSized() &&
+ DL.isLegalInteger(DL.getTypeStoreSizeInBits(Ty)) &&
+ DL.getTypeStoreSizeInBits(Ty) == DL.getTypeSizeInBits(Ty)) {
+ if (std::all_of(LI.user_begin(), LI.user_end(), [&LI](User *U) {
+ auto *SI = dyn_cast<StoreInst>(U);
+ return SI && SI->getPointerOperand() != &LI;
+ })) {
+ LoadInst *NewLoad = combineLoadToNewType(
+ IC, LI,
+ Type::getIntNTy(LI.getContext(), DL.getTypeStoreSizeInBits(Ty)));
+ // Replace all the stores with stores of the newly loaded value.
+ for (auto UI = LI.user_begin(), UE = LI.user_end(); UI != UE;) {
+ auto *SI = cast<StoreInst>(*UI++);
+ IC.Builder->SetInsertPoint(SI);
+ combineStoreToNewValue(IC, *SI, NewLoad);
+ IC.EraseInstFromFunction(*SI);
+ }
+ assert(LI.use_empty() && "Failed to remove all users of the load!");
+ // Return the old load so the combiner can delete it safely.
+ return &LI;
+ }
+ }
// Fold away bit casts of the loaded value by loading the desired type.
if (LI.hasOneUse())
@@ -391,6 +496,218 @@ static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) {
return nullptr;
}
+static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) {
+ // FIXME: We could probably with some care handle both volatile and atomic
+ // stores here but it isn't clear that this is important.
+ if (!LI.isSimple())
+ return nullptr;
+
+ Type *T = LI.getType();
+ if (!T->isAggregateType())
+ return nullptr;
+
+ assert(LI.getAlignment() && "Alignement must be set at this point");
+
+ if (auto *ST = dyn_cast<StructType>(T)) {
+ // If the struct only have one element, we unpack.
+ if (ST->getNumElements() == 1) {
+ LoadInst *NewLoad = combineLoadToNewType(IC, LI, ST->getTypeAtIndex(0U),
+ ".unpack");
+ return IC.ReplaceInstUsesWith(LI, IC.Builder->CreateInsertValue(
+ UndefValue::get(T), NewLoad, 0, LI.getName()));
+ }
+ }
+
+ if (auto *AT = dyn_cast<ArrayType>(T)) {
+ // If the array only have one element, we unpack.
+ if (AT->getNumElements() == 1) {
+ LoadInst *NewLoad = combineLoadToNewType(IC, LI, AT->getElementType(),
+ ".unpack");
+ return IC.ReplaceInstUsesWith(LI, IC.Builder->CreateInsertValue(
+ UndefValue::get(T), NewLoad, 0, LI.getName()));
+ }
+ }
+
+ return nullptr;
+}
+
+// If we can determine that all possible objects pointed to by the provided
+// pointer value are, not only dereferenceable, but also definitively less than
+// or equal to the provided maximum size, then return true. Otherwise, return
+// false (constant global values and allocas fall into this category).
+//
+// FIXME: This should probably live in ValueTracking (or similar).
+static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize,
+ const DataLayout &DL) {
+ SmallPtrSet<Value *, 4> Visited;
+ SmallVector<Value *, 4> Worklist(1, V);
+
+ do {
+ Value *P = Worklist.pop_back_val();
+ P = P->stripPointerCasts();
+
+ if (!Visited.insert(P).second)
+ continue;
+
+ if (SelectInst *SI = dyn_cast<SelectInst>(P)) {
+ Worklist.push_back(SI->getTrueValue());
+ Worklist.push_back(SI->getFalseValue());
+ continue;
+ }
+
+ if (PHINode *PN = dyn_cast<PHINode>(P)) {
+ for (Value *IncValue : PN->incoming_values())
+ Worklist.push_back(IncValue);
+ continue;
+ }
+
+ if (GlobalAlias *GA = dyn_cast<GlobalAlias>(P)) {
+ if (GA->mayBeOverridden())
+ return false;
+ Worklist.push_back(GA->getAliasee());
+ continue;
+ }
+
+ // If we know how big this object is, and it is less than MaxSize, continue
+ // searching. Otherwise, return false.
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(P)) {
+ if (!AI->getAllocatedType()->isSized())
+ return false;
+
+ ConstantInt *CS = dyn_cast<ConstantInt>(AI->getArraySize());
+ if (!CS)
+ return false;
+
+ uint64_t TypeSize = DL.getTypeAllocSize(AI->getAllocatedType());
+ // Make sure that, even if the multiplication below would wrap as an
+ // uint64_t, we still do the right thing.
+ if ((CS->getValue().zextOrSelf(128)*APInt(128, TypeSize)).ugt(MaxSize))
+ return false;
+ continue;
+ }
+
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
+ if (!GV->hasDefinitiveInitializer() || !GV->isConstant())
+ return false;
+
+ uint64_t InitSize = DL.getTypeAllocSize(GV->getType()->getElementType());
+ if (InitSize > MaxSize)
+ return false;
+ continue;
+ }
+
+ return false;
+ } while (!Worklist.empty());
+
+ return true;
+}
+
+// If we're indexing into an object of a known size, and the outer index is
+// not a constant, but having any value but zero would lead to undefined
+// behavior, replace it with zero.
+//
+// For example, if we have:
+// @f.a = private unnamed_addr constant [1 x i32] [i32 12], align 4
+// ...
+// %arrayidx = getelementptr inbounds [1 x i32]* @f.a, i64 0, i64 %x
+// ... = load i32* %arrayidx, align 4
+// Then we know that we can replace %x in the GEP with i64 0.
+//
+// FIXME: We could fold any GEP index to zero that would cause UB if it were
+// not zero. Currently, we only handle the first such index. Also, we could
+// also search through non-zero constant indices if we kept track of the
+// offsets those indices implied.
+static bool canReplaceGEPIdxWithZero(InstCombiner &IC, GetElementPtrInst *GEPI,
+ Instruction *MemI, unsigned &Idx) {
+ if (GEPI->getNumOperands() < 2)
+ return false;
+
+ // Find the first non-zero index of a GEP. If all indices are zero, return
+ // one past the last index.
+ auto FirstNZIdx = [](const GetElementPtrInst *GEPI) {
+ unsigned I = 1;
+ for (unsigned IE = GEPI->getNumOperands(); I != IE; ++I) {
+ Value *V = GEPI->getOperand(I);
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
+ if (CI->isZero())
+ continue;
+
+ break;
+ }
+
+ return I;
+ };
+
+ // Skip through initial 'zero' indices, and find the corresponding pointer
+ // type. See if the next index is not a constant.
+ Idx = FirstNZIdx(GEPI);
+ if (Idx == GEPI->getNumOperands())
+ return false;
+ if (isa<Constant>(GEPI->getOperand(Idx)))
+ return false;
+
+ SmallVector<Value *, 4> Ops(GEPI->idx_begin(), GEPI->idx_begin() + Idx);
+ Type *AllocTy = GetElementPtrInst::getIndexedType(
+ cast<PointerType>(GEPI->getOperand(0)->getType()->getScalarType())
+ ->getElementType(),
+ Ops);
+ if (!AllocTy || !AllocTy->isSized())
+ return false;
+ const DataLayout &DL = IC.getDataLayout();
+ uint64_t TyAllocSize = DL.getTypeAllocSize(AllocTy);
+
+ // If there are more indices after the one we might replace with a zero, make
+ // sure they're all non-negative. If any of them are negative, the overall
+ // address being computed might be before the base address determined by the
+ // first non-zero index.
+ auto IsAllNonNegative = [&]() {
+ for (unsigned i = Idx+1, e = GEPI->getNumOperands(); i != e; ++i) {
+ bool KnownNonNegative, KnownNegative;
+ IC.ComputeSignBit(GEPI->getOperand(i), KnownNonNegative,
+ KnownNegative, 0, MemI);
+ if (KnownNonNegative)
+ continue;
+ return false;
+ }
+
+ return true;
+ };
+
+ // FIXME: If the GEP is not inbounds, and there are extra indices after the
+ // one we'll replace, those could cause the address computation to wrap
+ // (rendering the IsAllNonNegative() check below insufficient). We can do
+ // better, ignoring zero indicies (and other indicies we can prove small
+ // enough not to wrap).
+ if (Idx+1 != GEPI->getNumOperands() && !GEPI->isInBounds())
+ return false;
+
+ // Note that isObjectSizeLessThanOrEq will return true only if the pointer is
+ // also known to be dereferenceable.
+ return isObjectSizeLessThanOrEq(GEPI->getOperand(0), TyAllocSize, DL) &&
+ IsAllNonNegative();
+}
+
+// If we're indexing into an object with a variable index for the memory
+// access, but the object has only one element, we can assume that the index
+// will always be zero. If we replace the GEP, return it.
+template <typename T>
+static Instruction *replaceGEPIdxWithZero(InstCombiner &IC, Value *Ptr,
+ T &MemI) {
+ if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr)) {
+ unsigned Idx;
+ if (canReplaceGEPIdxWithZero(IC, GEPI, &MemI, Idx)) {
+ Instruction *NewGEPI = GEPI->clone();
+ NewGEPI->setOperand(Idx,
+ ConstantInt::get(GEPI->getOperand(Idx)->getType(), 0));
+ NewGEPI->insertBefore(GEPI);
+ MemI.setOperand(MemI.getPointerOperandIndex(), NewGEPI);
+ return NewGEPI;
+ }
+ }
+
+ return nullptr;
+}
+
Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
Value *Op = LI.getOperand(0);
@@ -399,23 +716,30 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
return Res;
// Attempt to improve the alignment.
- if (DL) {
- unsigned KnownAlign = getOrEnforceKnownAlignment(
- Op, DL->getPrefTypeAlignment(LI.getType()), DL, AC, &LI, DT);
- unsigned LoadAlign = LI.getAlignment();
- unsigned EffectiveLoadAlign = LoadAlign != 0 ? LoadAlign :
- DL->getABITypeAlignment(LI.getType());
-
- if (KnownAlign > EffectiveLoadAlign)
- LI.setAlignment(KnownAlign);
- else if (LoadAlign == 0)
- LI.setAlignment(EffectiveLoadAlign);
+ unsigned KnownAlign = getOrEnforceKnownAlignment(
+ Op, DL.getPrefTypeAlignment(LI.getType()), DL, &LI, AC, DT);
+ unsigned LoadAlign = LI.getAlignment();
+ unsigned EffectiveLoadAlign =
+ LoadAlign != 0 ? LoadAlign : DL.getABITypeAlignment(LI.getType());
+
+ if (KnownAlign > EffectiveLoadAlign)
+ LI.setAlignment(KnownAlign);
+ else if (LoadAlign == 0)
+ LI.setAlignment(EffectiveLoadAlign);
+
+ // Replace GEP indices if possible.
+ if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Op, LI)) {
+ Worklist.Add(NewGEPI);
+ return &LI;
}
// None of the following transforms are legal for volatile/atomic loads.
// FIXME: Some of it is okay for atomic loads; needs refactoring.
if (!LI.isSimple()) return nullptr;
+ if (Instruction *Res = unpackLoadToAggregate(*this, LI))
+ return Res;
+
// Do really simple store-to-load forwarding and load CSE, to catch cases
// where there are several consecutive memory accesses to the same location,
// separated by a few arithmetic operations.
@@ -466,8 +790,8 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
if (SelectInst *SI = dyn_cast<SelectInst>(Op)) {
// load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2).
unsigned Align = LI.getAlignment();
- if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align, DL) &&
- isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align, DL)) {
+ if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align) &&
+ isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align)) {
LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1),
SI->getOperand(1)->getName()+".val");
LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2),
@@ -521,50 +845,12 @@ static bool combineStoreToValueType(InstCombiner &IC, StoreInst &SI) {
if (!SI.isSimple())
return false;
- Value *Ptr = SI.getPointerOperand();
Value *V = SI.getValueOperand();
- unsigned AS = SI.getPointerAddressSpace();
- SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
- SI.getAllMetadata(MD);
// Fold away bit casts of the stored value by storing the original type.
if (auto *BC = dyn_cast<BitCastInst>(V)) {
V = BC->getOperand(0);
- StoreInst *NewStore = IC.Builder->CreateAlignedStore(
- V, IC.Builder->CreateBitCast(Ptr, V->getType()->getPointerTo(AS)),
- SI.getAlignment());
- for (const auto &MDPair : MD) {
- unsigned ID = MDPair.first;
- MDNode *N = MDPair.second;
- // Note, essentially every kind of metadata should be preserved here! This
- // routine is supposed to clone a store instruction changing *only its
- // type*. The only metadata it makes sense to drop is metadata which is
- // invalidated when the pointer type changes. This should essentially
- // never be the case in LLVM, but we explicitly switch over only known
- // metadata to be conservatively correct. If you are adding metadata to
- // LLVM which pertains to stores, you almost certainly want to add it
- // here.
- switch (ID) {
- case LLVMContext::MD_dbg:
- case LLVMContext::MD_tbaa:
- case LLVMContext::MD_prof:
- case LLVMContext::MD_fpmath:
- case LLVMContext::MD_tbaa_struct:
- case LLVMContext::MD_alias_scope:
- case LLVMContext::MD_noalias:
- case LLVMContext::MD_nontemporal:
- case LLVMContext::MD_mem_parallel_loop_access:
- // All of these directly apply.
- NewStore->setMetadata(ID, N);
- break;
-
- case LLVMContext::MD_invariant_load:
- case LLVMContext::MD_nonnull:
- case LLVMContext::MD_range:
- // These don't apply for stores.
- break;
- }
- }
+ combineStoreToNewValue(IC, SI, V);
return true;
}
@@ -573,6 +859,39 @@ static bool combineStoreToValueType(InstCombiner &IC, StoreInst &SI) {
return false;
}
+static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) {
+ // FIXME: We could probably with some care handle both volatile and atomic
+ // stores here but it isn't clear that this is important.
+ if (!SI.isSimple())
+ return false;
+
+ Value *V = SI.getValueOperand();
+ Type *T = V->getType();
+
+ if (!T->isAggregateType())
+ return false;
+
+ if (auto *ST = dyn_cast<StructType>(T)) {
+ // If the struct only have one element, we unpack.
+ if (ST->getNumElements() == 1) {
+ V = IC.Builder->CreateExtractValue(V, 0);
+ combineStoreToNewValue(IC, SI, V);
+ return true;
+ }
+ }
+
+ if (auto *AT = dyn_cast<ArrayType>(T)) {
+ // If the array only have one element, we unpack.
+ if (AT->getNumElements() == 1) {
+ V = IC.Builder->CreateExtractValue(V, 0);
+ combineStoreToNewValue(IC, SI, V);
+ return true;
+ }
+ }
+
+ return false;
+}
+
/// equivalentAddressValues - Test if A and B will obviously have the same
/// value. This includes recognizing that %t0 and %t1 will have the same
/// value in code like this:
@@ -611,17 +930,25 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
return EraseInstFromFunction(SI);
// Attempt to improve the alignment.
- if (DL) {
- unsigned KnownAlign = getOrEnforceKnownAlignment(
- Ptr, DL->getPrefTypeAlignment(Val->getType()), DL, AC, &SI, DT);
- unsigned StoreAlign = SI.getAlignment();
- unsigned EffectiveStoreAlign = StoreAlign != 0 ? StoreAlign :
- DL->getABITypeAlignment(Val->getType());
-
- if (KnownAlign > EffectiveStoreAlign)
- SI.setAlignment(KnownAlign);
- else if (StoreAlign == 0)
- SI.setAlignment(EffectiveStoreAlign);
+ unsigned KnownAlign = getOrEnforceKnownAlignment(
+ Ptr, DL.getPrefTypeAlignment(Val->getType()), DL, &SI, AC, DT);
+ unsigned StoreAlign = SI.getAlignment();
+ unsigned EffectiveStoreAlign =
+ StoreAlign != 0 ? StoreAlign : DL.getABITypeAlignment(Val->getType());
+
+ if (KnownAlign > EffectiveStoreAlign)
+ SI.setAlignment(KnownAlign);
+ else if (StoreAlign == 0)
+ SI.setAlignment(EffectiveStoreAlign);
+
+ // Try to canonicalize the stored type.
+ if (unpackStoreToAggregate(*this, SI))
+ return EraseInstFromFunction(SI);
+
+ // Replace GEP indices if possible.
+ if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Ptr, SI)) {
+ Worklist.Add(NewGEPI);
+ return &SI;
}
// Don't hack volatile/atomic stores.
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index b2ff96f..a554e9f 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -12,7 +12,7 @@
//
//===----------------------------------------------------------------------===//
-#include "InstCombine.h"
+#include "InstCombineInternal.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PatternMatch.h"
@@ -26,7 +26,7 @@ using namespace PatternMatch;
/// where it is known to be non-zero. If this allows us to simplify the
/// computation, do so and return the new operand, otherwise return null.
static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC,
- Instruction *CxtI) {
+ Instruction &CxtI) {
// If V has multiple uses, then we would have to do more analysis to determine
// if this is safe. For example, the use could be in dynamically unreached
// code.
@@ -47,8 +47,8 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC,
// inexact. Similarly for <<.
if (BinaryOperator *I = dyn_cast<BinaryOperator>(V))
if (I->isLogicalShift() &&
- isKnownToBeAPowerOfTwo(I->getOperand(0), false, 0,
- IC.getAssumptionCache(), CxtI,
+ isKnownToBeAPowerOfTwo(I->getOperand(0), IC.getDataLayout(), false, 0,
+ IC.getAssumptionCache(), &CxtI,
IC.getDominatorTree())) {
// We know that this is an exact/nuw shift and that the input is a
// non-zero context as well.
@@ -126,7 +126,7 @@ static Constant *getLogBase2Vector(ConstantDataVector *CV) {
/// \brief Return true if we can prove that:
/// (mul LHS, RHS) === (mul nsw LHS, RHS)
bool InstCombiner::WillNotOverflowSignedMul(Value *LHS, Value *RHS,
- Instruction *CxtI) {
+ Instruction &CxtI) {
// Multiplying n * m significant bits yields a result of n + m significant
// bits. If the total number of significant bits does not exceed the
// result bit width (minus 1), there is no overflow.
@@ -137,8 +137,8 @@ bool InstCombiner::WillNotOverflowSignedMul(Value *LHS, Value *RHS,
// Note that underestimating the number of sign bits gives a more
// conservative answer.
- unsigned SignBits = ComputeNumSignBits(LHS, 0, CxtI) +
- ComputeNumSignBits(RHS, 0, CxtI);
+ unsigned SignBits =
+ ComputeNumSignBits(LHS, 0, &CxtI) + ComputeNumSignBits(RHS, 0, &CxtI);
// First handle the easy case: if we have enough sign bits there's
// definitely no overflow.
@@ -157,8 +157,8 @@ bool InstCombiner::WillNotOverflowSignedMul(Value *LHS, Value *RHS,
// For simplicity we just check if at least one side is not negative.
bool LHSNonNegative, LHSNegative;
bool RHSNonNegative, RHSNegative;
- ComputeSignBit(LHS, LHSNonNegative, LHSNegative, /*Depth=*/0, CxtI);
- ComputeSignBit(RHS, RHSNonNegative, RHSNegative, /*Depth=*/0, CxtI);
+ ComputeSignBit(LHS, LHSNonNegative, LHSNegative, /*Depth=*/0, &CxtI);
+ ComputeSignBit(RHS, RHSNonNegative, RHSNegative, /*Depth=*/0, &CxtI);
if (LHSNonNegative || RHSNonNegative)
return true;
}
@@ -217,12 +217,16 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
NewCst = getLogBase2Vector(CV);
if (NewCst) {
+ unsigned Width = NewCst->getType()->getPrimitiveSizeInBits();
BinaryOperator *Shl = BinaryOperator::CreateShl(NewOp, NewCst);
if (I.hasNoUnsignedWrap())
Shl->setHasNoUnsignedWrap();
- if (I.hasNoSignedWrap() && NewCst->isNotMinSignedValue())
- Shl->setHasNoSignedWrap();
+ if (I.hasNoSignedWrap()) {
+ uint64_t V;
+ if (match(NewCst, m_ConstantInt(V)) && V != Width - 1)
+ Shl->setHasNoSignedWrap();
+ }
return Shl;
}
@@ -375,7 +379,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
}
}
- if (!I.hasNoSignedWrap() && WillNotOverflowSignedMul(Op0, Op1, &I)) {
+ if (!I.hasNoSignedWrap() && WillNotOverflowSignedMul(Op0, Op1, I)) {
Changed = true;
I.setHasNoSignedWrap(true);
}
@@ -422,7 +426,7 @@ static bool isFiniteNonZeroFp(Constant *C) {
if (C->getType()->isVectorTy()) {
for (unsigned I = 0, E = C->getType()->getVectorNumElements(); I != E;
++I) {
- ConstantFP *CFP = dyn_cast<ConstantFP>(C->getAggregateElement(I));
+ ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(C->getAggregateElement(I));
if (!CFP || !CFP->getValueAPF().isFiniteNonZero())
return false;
}
@@ -437,7 +441,7 @@ static bool isNormalFp(Constant *C) {
if (C->getType()->isVectorTy()) {
for (unsigned I = 0, E = C->getType()->getVectorNumElements(); I != E;
++I) {
- ConstantFP *CFP = dyn_cast<ConstantFP>(C->getAggregateElement(I));
+ ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(C->getAggregateElement(I));
if (!CFP || !CFP->getValueAPF().isNormal())
return false;
}
@@ -780,7 +784,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
// The RHS is known non-zero.
- if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this, &I)) {
+ if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this, I)) {
I.setOperand(1, V);
return &I;
}
@@ -1155,7 +1159,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
return BO;
}
- if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/ true, 0, AC, &I, DT)) {
+ if (isKnownToBeAPowerOfTwo(Op1, DL, /*OrZero*/ true, 0, AC, &I, DT)) {
// X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y)
// Safe because the only negative value (1 << Y) can take on is
// INT_MIN, and X sdiv INT_MIN == X udiv INT_MIN == 0 if X doesn't have
@@ -1206,7 +1210,8 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
if (Value *V = SimplifyVectorOp(I))
return ReplaceInstUsesWith(I, V);
- if (Value *V = SimplifyFDivInst(Op0, Op1, DL, TLI, DT, AC))
+ if (Value *V = SimplifyFDivInst(Op0, Op1, I.getFastMathFlags(),
+ DL, TLI, DT, AC))
return ReplaceInstUsesWith(I, V);
if (isa<Constant>(Op0))
@@ -1337,7 +1342,7 @@ Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
// The RHS is known non-zero.
- if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this, &I)) {
+ if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this, I)) {
I.setOperand(1, V);
return &I;
}
@@ -1384,7 +1389,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
I.getType());
// X urem Y -> X and Y-1, where Y is a power of 2,
- if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/ true, 0, AC, &I, DT)) {
+ if (isKnownToBeAPowerOfTwo(Op1, DL, /*OrZero*/ true, 0, AC, &I, DT)) {
Constant *N1 = Constant::getAllOnesValue(I.getType());
Value *Add = Builder->CreateAdd(Op1, N1);
return BinaryOperator::CreateAnd(Op0, Add);
@@ -1481,7 +1486,8 @@ Instruction *InstCombiner::visitFRem(BinaryOperator &I) {
if (Value *V = SimplifyVectorOp(I))
return ReplaceInstUsesWith(I, V);
- if (Value *V = SimplifyFRemInst(Op0, Op1, DL, TLI, DT, AC))
+ if (Value *V = SimplifyFRemInst(Op0, Op1, I.getFastMathFlags(),
+ DL, TLI, DT, AC))
return ReplaceInstUsesWith(I, V);
// Handle cases involving: rem X, (select Cond, Y, Z)
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 53831c8..6a6693c 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -11,11 +11,10 @@
//
//===----------------------------------------------------------------------===//
-#include "InstCombine.h"
+#include "InstCombineInternal.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/IR/DataLayout.h"
using namespace llvm;
#define DEBUG_TYPE "instcombine"
@@ -231,7 +230,8 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
Value *Base = FixedOperands[0];
GetElementPtrInst *NewGEP =
- GetElementPtrInst::Create(Base, makeArrayRef(FixedOperands).slice(1));
+ GetElementPtrInst::Create(FirstInst->getSourceElementType(), Base,
+ makeArrayRef(FixedOperands).slice(1));
if (AllInBounds) NewGEP->setIsInBounds();
NewGEP->setDebugLoc(FirstInst->getDebugLoc());
return NewGEP;
@@ -375,8 +375,8 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
// and mark all the input loads as non-volatile. If we don't do this, we will
// insert a new volatile load and the old ones will not be deletable.
if (isVolatile)
- for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
- cast<LoadInst>(PN.getIncomingValue(i))->setVolatile(false);
+ for (Value *IncValue : PN.incoming_values())
+ cast<LoadInst>(IncValue)->setVolatile(false);
LoadInst *NewLI = new LoadInst(PhiVal, "", isVolatile, LoadAlignment);
NewLI->setDebugLoc(FirstLI->getDebugLoc());
@@ -539,8 +539,7 @@ static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal,
// Scan the operands to see if they are either phi nodes or are equal to
// the value.
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- Value *Op = PN->getIncomingValue(i);
+ for (Value *Op : PN->incoming_values()) {
if (PHINode *OpPN = dyn_cast<PHINode>(Op)) {
if (!PHIsEqualValue(OpPN, NonPhiInVal, ValueEqualPHIs))
return false;
@@ -891,8 +890,8 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
// it is only used by trunc or trunc(lshr) operations. If so, we split the
// PHI into the various pieces being extracted. This sort of thing is
// introduced when SROA promotes an aggregate to a single large integer type.
- if (PN.getType()->isIntegerTy() && DL &&
- !DL->isLegalInteger(PN.getType()->getPrimitiveSizeInBits()))
+ if (PN.getType()->isIntegerTy() &&
+ !DL.isLegalInteger(PN.getType()->getPrimitiveSizeInBits()))
if (Instruction *Res = SliceUpIllegalIntegerPHI(PN))
return Res;
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index bf3c33e..d2fbcdd 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -11,88 +11,55 @@
//
//===----------------------------------------------------------------------===//
-#include "InstCombine.h"
+#include "InstCombineInternal.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/PatternMatch.h"
using namespace llvm;
using namespace PatternMatch;
#define DEBUG_TYPE "instcombine"
-/// MatchSelectPattern - Pattern match integer [SU]MIN, [SU]MAX, and ABS idioms,
-/// returning the kind and providing the out parameter results if we
-/// successfully match.
static SelectPatternFlavor
-MatchSelectPattern(Value *V, Value *&LHS, Value *&RHS) {
- SelectInst *SI = dyn_cast<SelectInst>(V);
- if (!SI) return SPF_UNKNOWN;
-
- ICmpInst *ICI = dyn_cast<ICmpInst>(SI->getCondition());
- if (!ICI) return SPF_UNKNOWN;
-
- ICmpInst::Predicate Pred = ICI->getPredicate();
- Value *CmpLHS = ICI->getOperand(0);
- Value *CmpRHS = ICI->getOperand(1);
- Value *TrueVal = SI->getTrueValue();
- Value *FalseVal = SI->getFalseValue();
-
- LHS = CmpLHS;
- RHS = CmpRHS;
-
- // (icmp X, Y) ? X : Y
- if (TrueVal == CmpLHS && FalseVal == CmpRHS) {
- switch (Pred) {
- default: return SPF_UNKNOWN; // Equality.
- case ICmpInst::ICMP_UGT:
- case ICmpInst::ICMP_UGE: return SPF_UMAX;
- case ICmpInst::ICMP_SGT:
- case ICmpInst::ICMP_SGE: return SPF_SMAX;
- case ICmpInst::ICMP_ULT:
- case ICmpInst::ICMP_ULE: return SPF_UMIN;
- case ICmpInst::ICMP_SLT:
- case ICmpInst::ICMP_SLE: return SPF_SMIN;
- }
- }
-
- // (icmp X, Y) ? Y : X
- if (TrueVal == CmpRHS && FalseVal == CmpLHS) {
- switch (Pred) {
- default: return SPF_UNKNOWN; // Equality.
- case ICmpInst::ICMP_UGT:
- case ICmpInst::ICMP_UGE: return SPF_UMIN;
- case ICmpInst::ICMP_SGT:
- case ICmpInst::ICMP_SGE: return SPF_SMIN;
- case ICmpInst::ICMP_ULT:
- case ICmpInst::ICMP_ULE: return SPF_UMAX;
- case ICmpInst::ICMP_SLT:
- case ICmpInst::ICMP_SLE: return SPF_SMAX;
- }
+getInverseMinMaxSelectPattern(SelectPatternFlavor SPF) {
+ switch (SPF) {
+ default:
+ llvm_unreachable("unhandled!");
+
+ case SPF_SMIN:
+ return SPF_SMAX;
+ case SPF_UMIN:
+ return SPF_UMAX;
+ case SPF_SMAX:
+ return SPF_SMIN;
+ case SPF_UMAX:
+ return SPF_UMIN;
}
+}
- if (ConstantInt *C1 = dyn_cast<ConstantInt>(CmpRHS)) {
- if ((CmpLHS == TrueVal && match(FalseVal, m_Neg(m_Specific(CmpLHS)))) ||
- (CmpLHS == FalseVal && match(TrueVal, m_Neg(m_Specific(CmpLHS))))) {
-
- // ABS(X) ==> (X >s 0) ? X : -X and (X >s -1) ? X : -X
- // NABS(X) ==> (X >s 0) ? -X : X and (X >s -1) ? -X : X
- if (Pred == ICmpInst::ICMP_SGT && (C1->isZero() || C1->isMinusOne())) {
- return (CmpLHS == TrueVal) ? SPF_ABS : SPF_NABS;
- }
-
- // ABS(X) ==> (X <s 0) ? -X : X and (X <s 1) ? -X : X
- // NABS(X) ==> (X <s 0) ? X : -X and (X <s 1) ? X : -X
- if (Pred == ICmpInst::ICMP_SLT && (C1->isZero() || C1->isOne())) {
- return (CmpLHS == FalseVal) ? SPF_ABS : SPF_NABS;
- }
- }
+static CmpInst::Predicate getICmpPredicateForMinMax(SelectPatternFlavor SPF) {
+ switch (SPF) {
+ default:
+ llvm_unreachable("unhandled!");
+
+ case SPF_SMIN:
+ return ICmpInst::ICMP_SLT;
+ case SPF_UMIN:
+ return ICmpInst::ICMP_ULT;
+ case SPF_SMAX:
+ return ICmpInst::ICMP_SGT;
+ case SPF_UMAX:
+ return ICmpInst::ICMP_UGT;
}
-
- // TODO: (X > 4) ? X : 5 --> (X >= 5) ? X : 5 --> MAX(X, 5)
-
- return SPF_UNKNOWN;
}
+static Value *generateMinMaxSelectPattern(InstCombiner::BuilderTy *Builder,
+ SelectPatternFlavor SPF, Value *A,
+ Value *B) {
+ CmpInst::Predicate Pred = getICmpPredicateForMinMax(SPF);
+ return Builder->CreateSelect(Builder->CreateICmp(Pred, A, B), A, B);
+}
/// GetSelectFoldableOperands - We want to turn code that looks like this:
/// %C = or %A, %B
@@ -312,9 +279,9 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,
/// SimplifyWithOpReplaced - See if V simplifies when its operand Op is
/// replaced with RepOp.
static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
- const DataLayout *TD,
const TargetLibraryInfo *TLI,
- DominatorTree *DT, AssumptionCache *AC) {
+ const DataLayout &DL, DominatorTree *DT,
+ AssumptionCache *AC) {
// Trivial replacement.
if (V == Op)
return RepOp;
@@ -326,18 +293,18 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
// If this is a binary operator, try to simplify it with the replaced op.
if (BinaryOperator *B = dyn_cast<BinaryOperator>(I)) {
if (B->getOperand(0) == Op)
- return SimplifyBinOp(B->getOpcode(), RepOp, B->getOperand(1), TD, TLI);
+ return SimplifyBinOp(B->getOpcode(), RepOp, B->getOperand(1), DL, TLI);
if (B->getOperand(1) == Op)
- return SimplifyBinOp(B->getOpcode(), B->getOperand(0), RepOp, TD, TLI);
+ return SimplifyBinOp(B->getOpcode(), B->getOperand(0), RepOp, DL, TLI);
}
// Same for CmpInsts.
if (CmpInst *C = dyn_cast<CmpInst>(I)) {
if (C->getOperand(0) == Op)
- return SimplifyCmpInst(C->getPredicate(), RepOp, C->getOperand(1), TD,
+ return SimplifyCmpInst(C->getPredicate(), RepOp, C->getOperand(1), DL,
TLI, DT, AC);
if (C->getOperand(1) == Op)
- return SimplifyCmpInst(C->getPredicate(), C->getOperand(0), RepOp, TD,
+ return SimplifyCmpInst(C->getPredicate(), C->getOperand(0), RepOp, DL,
TLI, DT, AC);
}
@@ -361,14 +328,14 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
if (ConstOps.size() == I->getNumOperands()) {
if (CmpInst *C = dyn_cast<CmpInst>(I))
return ConstantFoldCompareInstOperands(C->getPredicate(), ConstOps[0],
- ConstOps[1], TD, TLI);
+ ConstOps[1], DL, TLI);
if (LoadInst *LI = dyn_cast<LoadInst>(I))
if (!LI->isVolatile())
- return ConstantFoldLoadFromConstPtr(ConstOps[0], TD);
+ return ConstantFoldLoadFromConstPtr(ConstOps[0], DL);
- return ConstantFoldInstOperands(I->getOpcode(), I->getType(),
- ConstOps, TD, TLI);
+ return ConstantFoldInstOperands(I->getOpcode(), I->getType(), ConstOps,
+ DL, TLI);
}
}
@@ -437,6 +404,62 @@ static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal,
return Builder->CreateOr(V, Y);
}
+/// Attempt to fold a cttz/ctlz followed by a icmp plus select into a single
+/// call to cttz/ctlz with flag 'is_zero_undef' cleared.
+///
+/// For example, we can fold the following code sequence:
+/// \code
+/// %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true)
+/// %1 = icmp ne i32 %x, 0
+/// %2 = select i1 %1, i32 %0, i32 32
+/// \code
+///
+/// into:
+/// %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 false)
+static Value *foldSelectCttzCtlz(ICmpInst *ICI, Value *TrueVal, Value *FalseVal,
+ InstCombiner::BuilderTy *Builder) {
+ ICmpInst::Predicate Pred = ICI->getPredicate();
+ Value *CmpLHS = ICI->getOperand(0);
+ Value *CmpRHS = ICI->getOperand(1);
+
+ // Check if the condition value compares a value for equality against zero.
+ if (!ICI->isEquality() || !match(CmpRHS, m_Zero()))
+ return nullptr;
+
+ Value *Count = FalseVal;
+ Value *ValueOnZero = TrueVal;
+ if (Pred == ICmpInst::ICMP_NE)
+ std::swap(Count, ValueOnZero);
+
+ // Skip zero extend/truncate.
+ Value *V = nullptr;
+ if (match(Count, m_ZExt(m_Value(V))) ||
+ match(Count, m_Trunc(m_Value(V))))
+ Count = V;
+
+ // Check if the value propagated on zero is a constant number equal to the
+ // sizeof in bits of 'Count'.
+ unsigned SizeOfInBits = Count->getType()->getScalarSizeInBits();
+ if (!match(ValueOnZero, m_SpecificInt(SizeOfInBits)))
+ return nullptr;
+
+ // Check that 'Count' is a call to intrinsic cttz/ctlz. Also check that the
+ // input to the cttz/ctlz is used as LHS for the compare instruction.
+ if (match(Count, m_Intrinsic<Intrinsic::cttz>(m_Specific(CmpLHS))) ||
+ match(Count, m_Intrinsic<Intrinsic::ctlz>(m_Specific(CmpLHS)))) {
+ IntrinsicInst *II = cast<IntrinsicInst>(Count);
+ IRBuilder<> Builder(II);
+ // Explicitly clear the 'undef_on_zero' flag.
+ IntrinsicInst *NewI = cast<IntrinsicInst>(II->clone());
+ Type *Ty = NewI->getArgOperand(1)->getType();
+ NewI->setArgOperand(1, Constant::getNullValue(Ty));
+ Builder.Insert(NewI);
+ return Builder.CreateZExtOrTrunc(NewI, ValueOnZero->getType());
+ }
+
+ return nullptr;
+}
+
/// visitSelectInstWithICmp - Visit a SelectInst that has an
/// ICmpInst as its first operand.
///
@@ -579,25 +602,25 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
// arms of the select. See if substituting this value into the arm and
// simplifying the result yields the same value as the other arm.
if (Pred == ICmpInst::ICMP_EQ) {
- if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, DL, TLI, DT, AC) ==
+ if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TLI, DL, DT, AC) ==
TrueVal ||
- SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, DL, TLI, DT, AC) ==
+ SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TLI, DL, DT, AC) ==
TrueVal)
return ReplaceInstUsesWith(SI, FalseVal);
- if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, DL, TLI, DT, AC) ==
+ if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TLI, DL, DT, AC) ==
FalseVal ||
- SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, DL, TLI, DT, AC) ==
+ SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TLI, DL, DT, AC) ==
FalseVal)
return ReplaceInstUsesWith(SI, FalseVal);
} else if (Pred == ICmpInst::ICMP_NE) {
- if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, DL, TLI, DT, AC) ==
+ if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TLI, DL, DT, AC) ==
FalseVal ||
- SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, DL, TLI, DT, AC) ==
+ SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TLI, DL, DT, AC) ==
FalseVal)
return ReplaceInstUsesWith(SI, TrueVal);
- if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, DL, TLI, DT, AC) ==
+ if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TLI, DL, DT, AC) ==
TrueVal ||
- SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, DL, TLI, DT, AC) ==
+ SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TLI, DL, DT, AC) ==
TrueVal)
return ReplaceInstUsesWith(SI, TrueVal);
}
@@ -665,6 +688,9 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
if (Value *V = foldSelectICmpAndOr(SI, TrueVal, FalseVal, Builder))
return ReplaceInstUsesWith(SI, V);
+ if (Value *V = foldSelectCttzCtlz(ICI, TrueVal, FalseVal, Builder))
+ return ReplaceInstUsesWith(SI, V);
+
return Changed ? &SI : nullptr;
}
@@ -770,6 +796,52 @@ Instruction *InstCombiner::FoldSPFofSPF(Instruction *Inner,
SI->getCondition(), SI->getFalseValue(), SI->getTrueValue());
return ReplaceInstUsesWith(Outer, NewSI);
}
+
+ auto IsFreeOrProfitableToInvert =
+ [&](Value *V, Value *&NotV, bool &ElidesXor) {
+ if (match(V, m_Not(m_Value(NotV)))) {
+ // If V has at most 2 uses then we can get rid of the xor operation
+ // entirely.
+ ElidesXor |= !V->hasNUsesOrMore(3);
+ return true;
+ }
+
+ if (IsFreeToInvert(V, !V->hasNUsesOrMore(3))) {
+ NotV = nullptr;
+ return true;
+ }
+
+ return false;
+ };
+
+ Value *NotA, *NotB, *NotC;
+ bool ElidesXor = false;
+
+ // MIN(MIN(~A, ~B), ~C) == ~MAX(MAX(A, B), C)
+ // MIN(MAX(~A, ~B), ~C) == ~MAX(MIN(A, B), C)
+ // MAX(MIN(~A, ~B), ~C) == ~MIN(MAX(A, B), C)
+ // MAX(MAX(~A, ~B), ~C) == ~MIN(MIN(A, B), C)
+ //
+ // This transform is performance neutral if we can elide at least one xor from
+ // the set of three operands, since we'll be tacking on an xor at the very
+ // end.
+ if (IsFreeOrProfitableToInvert(A, NotA, ElidesXor) &&
+ IsFreeOrProfitableToInvert(B, NotB, ElidesXor) &&
+ IsFreeOrProfitableToInvert(C, NotC, ElidesXor) && ElidesXor) {
+ if (!NotA)
+ NotA = Builder->CreateNot(A);
+ if (!NotB)
+ NotB = Builder->CreateNot(B);
+ if (!NotC)
+ NotC = Builder->CreateNot(C);
+
+ Value *NewInner = generateMinMaxSelectPattern(
+ Builder, getInverseMinMaxSelectPattern(SPF1), NotA, NotB);
+ Value *NewOuter = Builder->CreateNot(generateMinMaxSelectPattern(
+ Builder, getInverseMinMaxSelectPattern(SPF2), NewInner, NotC));
+ return ReplaceInstUsesWith(Outer, NewOuter);
+ }
+
return nullptr;
}
@@ -868,7 +940,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
return BinaryOperator::CreateAnd(NotCond, FalseVal);
}
if (ConstantInt *C = dyn_cast<ConstantInt>(FalseVal)) {
- if (C->getZExtValue() == false) {
+ if (!C->getZExtValue()) {
// Change: A = select B, C, false --> A = and B, C
return BinaryOperator::CreateAnd(CondVal, TrueVal);
}
@@ -1082,26 +1154,67 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
}
// See if we can fold the select into one of our operands.
- if (SI.getType()->isIntegerTy()) {
+ if (SI.getType()->isIntOrIntVectorTy()) {
if (Instruction *FoldI = FoldSelectIntoOp(SI, TrueVal, FalseVal))
return FoldI;
- // MAX(MAX(a, b), a) -> MAX(a, b)
- // MIN(MIN(a, b), a) -> MIN(a, b)
- // MAX(MIN(a, b), a) -> a
- // MIN(MAX(a, b), a) -> a
Value *LHS, *RHS, *LHS2, *RHS2;
- if (SelectPatternFlavor SPF = MatchSelectPattern(&SI, LHS, RHS)) {
- if (SelectPatternFlavor SPF2 = MatchSelectPattern(LHS, LHS2, RHS2))
+ Instruction::CastOps CastOp;
+ SelectPatternFlavor SPF = matchSelectPattern(&SI, LHS, RHS, &CastOp);
+
+ if (SPF) {
+ // Canonicalize so that type casts are outside select patterns.
+ if (LHS->getType()->getPrimitiveSizeInBits() !=
+ SI.getType()->getPrimitiveSizeInBits()) {
+ CmpInst::Predicate Pred = getICmpPredicateForMinMax(SPF);
+ Value *Cmp = Builder->CreateICmp(Pred, LHS, RHS);
+ Value *NewSI = Builder->CreateCast(CastOp,
+ Builder->CreateSelect(Cmp, LHS, RHS),
+ SI.getType());
+ return ReplaceInstUsesWith(SI, NewSI);
+ }
+
+ // MAX(MAX(a, b), a) -> MAX(a, b)
+ // MIN(MIN(a, b), a) -> MIN(a, b)
+ // MAX(MIN(a, b), a) -> a
+ // MIN(MAX(a, b), a) -> a
+ if (SelectPatternFlavor SPF2 = matchSelectPattern(LHS, LHS2, RHS2))
if (Instruction *R = FoldSPFofSPF(cast<Instruction>(LHS),SPF2,LHS2,RHS2,
SI, SPF, RHS))
return R;
- if (SelectPatternFlavor SPF2 = MatchSelectPattern(RHS, LHS2, RHS2))
+ if (SelectPatternFlavor SPF2 = matchSelectPattern(RHS, LHS2, RHS2))
if (Instruction *R = FoldSPFofSPF(cast<Instruction>(RHS),SPF2,LHS2,RHS2,
SI, SPF, LHS))
return R;
}
+ // MAX(~a, ~b) -> ~MIN(a, b)
+ if (SPF == SPF_SMAX || SPF == SPF_UMAX) {
+ if (IsFreeToInvert(LHS, LHS->hasNUses(2)) &&
+ IsFreeToInvert(RHS, RHS->hasNUses(2))) {
+
+ // This transform adds a xor operation and that extra cost needs to be
+ // justified. We look for simplifications that will result from
+ // applying this rule:
+
+ bool Profitable =
+ (LHS->hasNUses(2) && match(LHS, m_Not(m_Value()))) ||
+ (RHS->hasNUses(2) && match(RHS, m_Not(m_Value()))) ||
+ (SI.hasOneUse() && match(*SI.user_begin(), m_Not(m_Value())));
+
+ if (Profitable) {
+ Value *NewLHS = Builder->CreateNot(LHS);
+ Value *NewRHS = Builder->CreateNot(RHS);
+ Value *NewCmp = SPF == SPF_SMAX
+ ? Builder->CreateICmpSLT(NewLHS, NewRHS)
+ : Builder->CreateICmpULT(NewLHS, NewRHS);
+ Value *NewSI =
+ Builder->CreateNot(Builder->CreateSelect(NewCmp, NewLHS, NewRHS));
+ return ReplaceInstUsesWith(SI, NewSI);
+ }
+ }
+ }
+
// TODO.
// ABS(-X) -> ABS(X)
}
@@ -1115,19 +1228,41 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
return NV;
if (SelectInst *TrueSI = dyn_cast<SelectInst>(TrueVal)) {
- if (TrueSI->getCondition() == CondVal) {
- if (SI.getTrueValue() == TrueSI->getTrueValue())
- return nullptr;
- SI.setOperand(1, TrueSI->getTrueValue());
- return &SI;
+ if (TrueSI->getCondition()->getType() == CondVal->getType()) {
+ // select(C, select(C, a, b), c) -> select(C, a, c)
+ if (TrueSI->getCondition() == CondVal) {
+ if (SI.getTrueValue() == TrueSI->getTrueValue())
+ return nullptr;
+ SI.setOperand(1, TrueSI->getTrueValue());
+ return &SI;
+ }
+ // select(C0, select(C1, a, b), b) -> select(C0&C1, a, b)
+ // We choose this as normal form to enable folding on the And and shortening
+ // paths for the values (this helps GetUnderlyingObjects() for example).
+ if (TrueSI->getFalseValue() == FalseVal && TrueSI->hasOneUse()) {
+ Value *And = Builder->CreateAnd(CondVal, TrueSI->getCondition());
+ SI.setOperand(0, And);
+ SI.setOperand(1, TrueSI->getTrueValue());
+ return &SI;
+ }
}
}
if (SelectInst *FalseSI = dyn_cast<SelectInst>(FalseVal)) {
- if (FalseSI->getCondition() == CondVal) {
- if (SI.getFalseValue() == FalseSI->getFalseValue())
- return nullptr;
- SI.setOperand(2, FalseSI->getFalseValue());
- return &SI;
+ if (FalseSI->getCondition()->getType() == CondVal->getType()) {
+ // select(C, a, select(C, b, c)) -> select(C, a, c)
+ if (FalseSI->getCondition() == CondVal) {
+ if (SI.getFalseValue() == FalseSI->getFalseValue())
+ return nullptr;
+ SI.setOperand(2, FalseSI->getFalseValue());
+ return &SI;
+ }
+ // select(C0, a, select(C1, a, b)) -> select(C0|C1, a, b)
+ if (FalseSI->getTrueValue() == TrueVal && FalseSI->hasOneUse()) {
+ Value *Or = Builder->CreateOr(CondVal, FalseSI->getCondition());
+ SI.setOperand(0, Or);
+ SI.setOperand(2, FalseSI->getFalseValue());
+ return &SI;
+ }
}
}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 0a16e25..d04ed58 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -11,7 +11,7 @@
//
//===----------------------------------------------------------------------===//
-#include "InstCombine.h"
+#include "InstCombineInternal.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -175,8 +175,8 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
// get into trouble with cyclic PHIs here because we only consider
// instructions with a single use.
PHINode *PN = cast<PHINode>(I);
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- if (!CanEvaluateShifted(PN->getIncomingValue(i), NumBits, isLeftShift,
+ for (Value *IncValue : PN->incoming_values())
+ if (!CanEvaluateShifted(IncValue, NumBits, isLeftShift,
IC, PN))
return false;
return true;
@@ -187,7 +187,7 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
/// GetShiftedValue - When CanEvaluateShifted returned true for an expression,
/// this value inserts the new computation that produces the shifted value.
static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
- InstCombiner &IC) {
+ InstCombiner &IC, const DataLayout &DL) {
// We can always evaluate constants shifted.
if (Constant *C = dyn_cast<Constant>(V)) {
if (isLeftShift)
@@ -196,8 +196,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
V = IC.Builder->CreateLShr(C, NumBits);
// If we got a constantexpr back, try to simplify it with TD info.
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
- V = ConstantFoldConstantExpression(CE, IC.getDataLayout(),
- IC.getTargetLibraryInfo());
+ V = ConstantFoldConstantExpression(CE, DL, IC.getTargetLibraryInfo());
return V;
}
@@ -210,8 +209,10 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
case Instruction::Or:
case Instruction::Xor:
// Bitwise operators can all arbitrarily be arbitrarily evaluated shifted.
- I->setOperand(0, GetShiftedValue(I->getOperand(0), NumBits,isLeftShift,IC));
- I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC));
+ I->setOperand(
+ 0, GetShiftedValue(I->getOperand(0), NumBits, isLeftShift, IC, DL));
+ I->setOperand(
+ 1, GetShiftedValue(I->getOperand(1), NumBits, isLeftShift, IC, DL));
return I;
case Instruction::Shl: {
@@ -297,8 +298,10 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
}
case Instruction::Select:
- I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC));
- I->setOperand(2, GetShiftedValue(I->getOperand(2), NumBits,isLeftShift,IC));
+ I->setOperand(
+ 1, GetShiftedValue(I->getOperand(1), NumBits, isLeftShift, IC, DL));
+ I->setOperand(
+ 2, GetShiftedValue(I->getOperand(2), NumBits, isLeftShift, IC, DL));
return I;
case Instruction::PHI: {
// We can change a phi if we can change all operands. Note that we never
@@ -306,8 +309,8 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
// instructions with a single use.
PHINode *PN = cast<PHINode>(I);
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- PN->setIncomingValue(i, GetShiftedValue(PN->getIncomingValue(i),
- NumBits, isLeftShift, IC));
+ PN->setIncomingValue(i, GetShiftedValue(PN->getIncomingValue(i), NumBits,
+ isLeftShift, IC, DL));
return PN;
}
}
@@ -337,8 +340,8 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
DEBUG(dbgs() << "ICE: GetShiftedValue propagating shift through expression"
" to eliminate shift:\n IN: " << *Op0 << "\n SH: " << I <<"\n");
- return ReplaceInstUsesWith(I,
- GetShiftedValue(Op0, COp1->getZExtValue(), isLeftShift, *this));
+ return ReplaceInstUsesWith(
+ I, GetShiftedValue(Op0, COp1->getZExtValue(), isLeftShift, *this, DL));
}
// See if we can simplify any instructions used by the instruction whose sole
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index ad6983a..80628b2 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#include "InstCombine.h"
-#include "llvm/IR/DataLayout.h"
+#include "InstCombineInternal.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PatternMatch.h"
@@ -44,19 +44,6 @@ static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
Demanded &= OpC->getValue();
I->setOperand(OpNo, ConstantInt::get(OpC->getType(), Demanded));
- // If either 'nsw' or 'nuw' is set and the constant is negative,
- // removing *any* bits from the constant could make overflow occur.
- // Remove 'nsw' and 'nuw' from the instruction in this case.
- if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(I)) {
- assert(OBO->getOpcode() == Instruction::Add);
- if (OBO->hasNoSignedWrap() || OBO->hasNoUnsignedWrap()) {
- if (OpC->getValue().isNegative()) {
- cast<BinaryOperator>(OBO)->setHasNoSignedWrap(false);
- cast<BinaryOperator>(OBO)->setHasNoUnsignedWrap(false);
- }
- }
- }
-
return true;
}
@@ -70,8 +57,8 @@ bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) {
APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
APInt DemandedMask(APInt::getAllOnesValue(BitWidth));
- Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask,
- KnownZero, KnownOne, 0, &Inst);
+ Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, KnownZero, KnownOne,
+ 0, &Inst);
if (!V) return false;
if (V == &Inst) return true;
ReplaceInstUsesWith(Inst, V);
@@ -84,9 +71,9 @@ bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) {
bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask,
APInt &KnownZero, APInt &KnownOne,
unsigned Depth) {
- Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask,
- KnownZero, KnownOne, Depth,
- dyn_cast<Instruction>(U.getUser()));
+ auto *UserI = dyn_cast<Instruction>(U.getUser());
+ Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask, KnownZero,
+ KnownOne, Depth, UserI);
if (!NewVal) return false;
U = NewVal;
return true;
@@ -122,15 +109,12 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
assert(Depth <= 6 && "Limit Search Depth");
uint32_t BitWidth = DemandedMask.getBitWidth();
Type *VTy = V->getType();
- assert((DL || !VTy->isPointerTy()) &&
- "SimplifyDemandedBits needs to know bit widths!");
- assert((!DL || DL->getTypeSizeInBits(VTy->getScalarType()) == BitWidth) &&
- (!VTy->isIntOrIntVectorTy() ||
- VTy->getScalarSizeInBits() == BitWidth) &&
- KnownZero.getBitWidth() == BitWidth &&
- KnownOne.getBitWidth() == BitWidth &&
- "Value *V, DemandedMask, KnownZero and KnownOne "
- "must have same BitWidth");
+ assert(
+ (!VTy->isIntOrIntVectorTy() || VTy->getScalarSizeInBits() == BitWidth) &&
+ KnownZero.getBitWidth() == BitWidth &&
+ KnownOne.getBitWidth() == BitWidth &&
+ "Value *V, DemandedMask, KnownZero and KnownOne "
+ "must have same BitWidth");
if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
// We know all of the bits for a constant!
KnownOne = CI->getValue() & DemandedMask;
@@ -174,9 +158,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// this instruction has a simpler value in that context.
if (I->getOpcode() == Instruction::And) {
// If either the LHS or the RHS are Zero, the result is zero.
- computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1,
+ computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1,
CxtI);
- computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1,
+ computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
CxtI);
// If all of the demanded bits are known 1 on one side, return the other.
@@ -198,9 +182,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// only bits from X or Y are demanded.
// If either the LHS or the RHS are One, the result is One.
- computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1,
+ computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1,
CxtI);
- computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1,
+ computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
CxtI);
// If all of the demanded bits are known zero on one side, return the
@@ -225,9 +209,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// We can simplify (X^Y) -> X or Y in the user's context if we know that
// only bits from X or Y are demanded.
- computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1,
+ computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1,
CxtI);
- computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1,
+ computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
CxtI);
// If all of the demanded bits are known zero on one side, return the
@@ -256,10 +240,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
break;
case Instruction::And:
// If either the LHS or the RHS are Zero, the result is zero.
- if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
- RHSKnownZero, RHSKnownOne, Depth+1) ||
+ if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, RHSKnownZero,
+ RHSKnownOne, Depth + 1) ||
SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownZero,
- LHSKnownZero, LHSKnownOne, Depth+1))
+ LHSKnownZero, LHSKnownOne, Depth + 1))
return I;
assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
@@ -294,10 +278,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
break;
case Instruction::Or:
// If either the LHS or the RHS are One, the result is One.
- if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
- RHSKnownZero, RHSKnownOne, Depth+1) ||
+ if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, RHSKnownZero,
+ RHSKnownOne, Depth + 1) ||
SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownOne,
- LHSKnownZero, LHSKnownOne, Depth+1))
+ LHSKnownZero, LHSKnownOne, Depth + 1))
return I;
assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
@@ -336,10 +320,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
KnownOne = RHSKnownOne | LHSKnownOne;
break;
case Instruction::Xor: {
- if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
- RHSKnownZero, RHSKnownOne, Depth+1) ||
- SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
- LHSKnownZero, LHSKnownOne, Depth+1))
+ if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, RHSKnownZero,
+ RHSKnownOne, Depth + 1) ||
+ SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, LHSKnownZero,
+ LHSKnownOne, Depth + 1))
return I;
assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
@@ -423,10 +407,16 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
break;
}
case Instruction::Select:
- if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask,
- RHSKnownZero, RHSKnownOne, Depth+1) ||
- SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
- LHSKnownZero, LHSKnownOne, Depth+1))
+ // If this is a select as part of a min/max pattern, don't simplify any
+ // further in case we break the structure.
+ Value *LHS, *RHS;
+ if (matchSelectPattern(I, LHS, RHS) != SPF_UNKNOWN)
+ return nullptr;
+
+ if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask, RHSKnownZero,
+ RHSKnownOne, Depth + 1) ||
+ SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, LHSKnownZero,
+ LHSKnownOne, Depth + 1))
return I;
assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
@@ -445,8 +435,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
DemandedMask = DemandedMask.zext(truncBf);
KnownZero = KnownZero.zext(truncBf);
KnownOne = KnownOne.zext(truncBf);
- if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
- KnownZero, KnownOne, Depth+1))
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero,
+ KnownOne, Depth + 1))
return I;
DemandedMask = DemandedMask.trunc(BitWidth);
KnownZero = KnownZero.trunc(BitWidth);
@@ -471,8 +461,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// Don't touch a vector-to-scalar bitcast.
return nullptr;
- if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
- KnownZero, KnownOne, Depth+1))
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero,
+ KnownOne, Depth + 1))
return I;
assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
break;
@@ -483,8 +473,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
DemandedMask = DemandedMask.trunc(SrcBitWidth);
KnownZero = KnownZero.trunc(SrcBitWidth);
KnownOne = KnownOne.trunc(SrcBitWidth);
- if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
- KnownZero, KnownOne, Depth+1))
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero,
+ KnownOne, Depth + 1))
return I;
DemandedMask = DemandedMask.zext(BitWidth);
KnownZero = KnownZero.zext(BitWidth);
@@ -510,8 +500,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
InputDemandedBits = InputDemandedBits.trunc(SrcBitWidth);
KnownZero = KnownZero.trunc(SrcBitWidth);
KnownOne = KnownOne.trunc(SrcBitWidth);
- if (SimplifyDemandedBits(I->getOperandUse(0), InputDemandedBits,
- KnownZero, KnownOne, Depth+1))
+ if (SimplifyDemandedBits(I->getOperandUse(0), InputDemandedBits, KnownZero,
+ KnownOne, Depth + 1))
return I;
InputDemandedBits = InputDemandedBits.zext(BitWidth);
KnownZero = KnownZero.zext(BitWidth);
@@ -532,113 +522,35 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
}
break;
}
- case Instruction::Add: {
- // Figure out what the input bits are. If the top bits of the and result
- // are not demanded, then the add doesn't demand them from its input
- // either.
+ case Instruction::Add:
+ case Instruction::Sub: {
+ /// If the high-bits of an ADD/SUB are not demanded, then we do not care
+ /// about the high bits of the operands.
unsigned NLZ = DemandedMask.countLeadingZeros();
-
- // If there is a constant on the RHS, there are a variety of xformations
- // we can do.
- if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
- // If null, this should be simplified elsewhere. Some of the xforms here
- // won't work if the RHS is zero.
- if (RHS->isZero())
- break;
-
- // If the top bit of the output is demanded, demand everything from the
- // input. Otherwise, we demand all the input bits except NLZ top bits.
- APInt InDemandedBits(APInt::getLowBitsSet(BitWidth, BitWidth - NLZ));
-
- // Find information about known zero/one bits in the input.
- if (SimplifyDemandedBits(I->getOperandUse(0), InDemandedBits,
- LHSKnownZero, LHSKnownOne, Depth+1))
- return I;
-
- // If the RHS of the add has bits set that can't affect the input, reduce
- // the constant.
- if (ShrinkDemandedConstant(I, 1, InDemandedBits))
- return I;
-
- // Avoid excess work.
- if (LHSKnownZero == 0 && LHSKnownOne == 0)
- break;
-
- // Turn it into OR if input bits are zero.
- if ((LHSKnownZero & RHS->getValue()) == RHS->getValue()) {
- Instruction *Or =
- BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
- I->getName());
- return InsertNewInstWith(Or, *I);
- }
-
- // We can say something about the output known-zero and known-one bits,
- // depending on potential carries from the input constant and the
- // unknowns. For example if the LHS is known to have at most the 0x0F0F0
- // bits set and the RHS constant is 0x01001, then we know we have a known
- // one mask of 0x00001 and a known zero mask of 0xE0F0E.
-
- // To compute this, we first compute the potential carry bits. These are
- // the bits which may be modified. I'm not aware of a better way to do
- // this scan.
- const APInt &RHSVal = RHS->getValue();
- APInt CarryBits((~LHSKnownZero + RHSVal) ^ (~LHSKnownZero ^ RHSVal));
-
- // Now that we know which bits have carries, compute the known-1/0 sets.
-
- // Bits are known one if they are known zero in one operand and one in the
- // other, and there is no input carry.
- KnownOne = ((LHSKnownZero & RHSVal) |
- (LHSKnownOne & ~RHSVal)) & ~CarryBits;
-
- // Bits are known zero if they are known zero in both operands and there
- // is no input carry.
- KnownZero = LHSKnownZero & ~RHSVal & ~CarryBits;
- } else {
- // If the high-bits of this ADD are not demanded, then it does not demand
- // the high bits of its LHS or RHS.
- if (DemandedMask[BitWidth-1] == 0) {
- // Right fill the mask of bits for this ADD to demand the most
- // significant bit and all those below it.
- APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
- if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps,
- LHSKnownZero, LHSKnownOne, Depth+1) ||
- SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps,
- LHSKnownZero, LHSKnownOne, Depth+1))
- return I;
- }
- }
- break;
- }
- case Instruction::Sub:
- // If the high-bits of this SUB are not demanded, then it does not demand
- // the high bits of its LHS or RHS.
- if (DemandedMask[BitWidth-1] == 0) {
- // Right fill the mask of bits for this SUB to demand the most
+ if (NLZ > 0) {
+ // Right fill the mask of bits for this ADD/SUB to demand the most
// significant bit and all those below it.
- uint32_t NLZ = DemandedMask.countLeadingZeros();
APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps,
- LHSKnownZero, LHSKnownOne, Depth+1) ||
+ LHSKnownZero, LHSKnownOne, Depth + 1) ||
+ ShrinkDemandedConstant(I, 1, DemandedFromOps) ||
SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps,
- LHSKnownZero, LHSKnownOne, Depth+1))
+ LHSKnownZero, LHSKnownOne, Depth + 1)) {
+ // Disable the nsw and nuw flags here: We can no longer guarantee that
+ // we won't wrap after simplification. Removing the nsw/nuw flags is
+ // legal here because the top bit is not demanded.
+ BinaryOperator &BinOP = *cast<BinaryOperator>(I);
+ BinOP.setHasNoSignedWrap(false);
+ BinOP.setHasNoUnsignedWrap(false);
return I;
+ }
}
- // Otherwise just hand the sub off to computeKnownBits to fill in
+ // Otherwise just hand the add/sub off to computeKnownBits to fill in
// the known zeros and ones.
computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI);
-
- // Turn this into a xor if LHS is 2^n-1 and the remaining bits are known
- // zero.
- if (ConstantInt *C0 = dyn_cast<ConstantInt>(I->getOperand(0))) {
- APInt I0 = C0->getValue();
- if ((I0 + 1).isPowerOf2() && (I0 | KnownZero).isAllOnesValue()) {
- Instruction *Xor = BinaryOperator::CreateXor(I->getOperand(1), C0);
- return InsertNewInstWith(Xor, *I);
- }
- }
break;
+ }
case Instruction::Shl:
if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
{
@@ -662,8 +574,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
else if (IOp->hasNoUnsignedWrap())
DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
- if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
- KnownZero, KnownOne, Depth+1))
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero,
+ KnownOne, Depth + 1))
return I;
assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
KnownZero <<= ShiftAmt;
@@ -686,8 +598,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
if (cast<LShrOperator>(I)->isExact())
DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
- if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
- KnownZero, KnownOne, Depth+1))
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero,
+ KnownOne, Depth + 1))
return I;
assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
@@ -731,8 +643,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
if (cast<AShrOperator>(I)->isExact())
DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
- if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
- KnownZero, KnownOne, Depth+1))
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero,
+ KnownOne, Depth + 1))
return I;
assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
// Compute the new bits that are at the top now.
@@ -772,8 +684,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
APInt LowBits = RA - 1;
APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
- if (SimplifyDemandedBits(I->getOperandUse(0), Mask2,
- LHSKnownZero, LHSKnownOne, Depth+1))
+ if (SimplifyDemandedBits(I->getOperandUse(0), Mask2, LHSKnownZero,
+ LHSKnownOne, Depth + 1))
return I;
// The low bits of LHS are unchanged by the srem.
@@ -798,7 +710,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// remainder is zero.
if (DemandedMask.isNegative() && KnownZero.isNonNegative()) {
APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
- computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1,
+ computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
CxtI);
// If it's known zero, our sign bit is also zero.
if (LHSKnownZero.isNegative())
@@ -808,10 +720,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
case Instruction::URem: {
APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0);
APInt AllOnes = APInt::getAllOnesValue(BitWidth);
- if (SimplifyDemandedBits(I->getOperandUse(0), AllOnes,
- KnownZero2, KnownOne2, Depth+1) ||
- SimplifyDemandedBits(I->getOperandUse(1), AllOnes,
- KnownZero2, KnownOne2, Depth+1))
+ if (SimplifyDemandedBits(I->getOperandUse(0), AllOnes, KnownZero2,
+ KnownOne2, Depth + 1) ||
+ SimplifyDemandedBits(I->getOperandUse(1), AllOnes, KnownZero2,
+ KnownOne2, Depth + 1))
return I;
unsigned Leaders = KnownZero2.countLeadingOnes();
@@ -1051,7 +963,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
// Note that we can't propagate undef elt info, because we don't know
// which elt is getting updated.
TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
- UndefElts2, Depth+1);
+ UndefElts2, Depth + 1);
if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
break;
}
@@ -1069,7 +981,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
APInt DemandedElts2 = DemandedElts;
DemandedElts2.clearBit(IdxNo);
TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts2,
- UndefElts, Depth+1);
+ UndefElts, Depth + 1);
if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
// The inserted element is defined.
@@ -1097,12 +1009,12 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
APInt UndefElts4(LHSVWidth, 0);
TmpV = SimplifyDemandedVectorElts(I->getOperand(0), LeftDemanded,
- UndefElts4, Depth+1);
+ UndefElts4, Depth + 1);
if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
APInt UndefElts3(LHSVWidth, 0);
TmpV = SimplifyDemandedVectorElts(I->getOperand(1), RightDemanded,
- UndefElts3, Depth+1);
+ UndefElts3, Depth + 1);
if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
bool NewUndefElts = false;
@@ -1152,12 +1064,12 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
}
}
- TmpV = SimplifyDemandedVectorElts(I->getOperand(1), LeftDemanded,
- UndefElts, Depth+1);
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(1), LeftDemanded, UndefElts,
+ Depth + 1);
if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
TmpV = SimplifyDemandedVectorElts(I->getOperand(2), RightDemanded,
- UndefElts2, Depth+1);
+ UndefElts2, Depth + 1);
if (TmpV) { I->setOperand(2, TmpV); MadeChange = true; }
// Output elements are undefined if both are undefined.
@@ -1204,7 +1116,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
// div/rem demand all inputs, because they don't want divide by zero.
TmpV = SimplifyDemandedVectorElts(I->getOperand(0), InputDemandedElts,
- UndefElts2, Depth+1);
+ UndefElts2, Depth + 1);
if (TmpV) {
I->setOperand(0, TmpV);
MadeChange = true;
@@ -1238,11 +1150,11 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
case Instruction::Sub:
case Instruction::Mul:
// div/rem demand all inputs, because they don't want divide by zero.
- TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
- UndefElts, Depth+1);
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, UndefElts,
+ Depth + 1);
if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
TmpV = SimplifyDemandedVectorElts(I->getOperand(1), DemandedElts,
- UndefElts2, Depth+1);
+ UndefElts2, Depth + 1);
if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
// Output elements are undefined if both are undefined. Consider things
@@ -1251,8 +1163,8 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
break;
case Instruction::FPTrunc:
case Instruction::FPExt:
- TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
- UndefElts, Depth+1);
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, UndefElts,
+ Depth + 1);
if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
break;
@@ -1273,10 +1185,10 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
case Intrinsic::x86_sse2_min_sd:
case Intrinsic::x86_sse2_max_sd:
TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
- UndefElts, Depth+1);
+ UndefElts, Depth + 1);
if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts,
- UndefElts2, Depth+1);
+ UndefElts2, Depth + 1);
if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
// If only the low elt is demanded and this is a scalarizable intrinsic,
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index cb16584..24446c8 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -12,7 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#include "InstCombine.h"
+#include "InstCombineInternal.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/IR/PatternMatch.h"
using namespace llvm;
using namespace PatternMatch;
@@ -201,8 +202,8 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
APInt UndefElts(VectorWidth, 0);
APInt DemandedMask(VectorWidth, 0);
DemandedMask.setBit(IndexVal);
- if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0),
- DemandedMask, UndefElts)) {
+ if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0), DemandedMask,
+ UndefElts)) {
EI.setOperand(0, V);
return &EI;
}
@@ -732,7 +733,8 @@ static Value *BuildNew(Instruction *I, ArrayRef<Value*> NewOps) {
case Instruction::GetElementPtr: {
Value *Ptr = NewOps[0];
ArrayRef<Value*> Idx = NewOps.slice(1);
- GetElementPtrInst *GEP = GetElementPtrInst::Create(Ptr, Idx, "", I);
+ GetElementPtrInst *GEP = GetElementPtrInst::Create(
+ cast<GetElementPtrInst>(I)->getSourceElementType(), Ptr, Idx, "", I);
GEP->setIsInBounds(cast<GetElementPtrInst>(I)->isInBounds());
return GEP;
}
@@ -853,10 +855,32 @@ static void RecognizeIdentityMask(const SmallVectorImpl<int> &Mask,
}
}
+// Returns true if the shuffle is extracting a contiguous range of values from
+// LHS, for example:
+// +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+// Input: |AA|BB|CC|DD|EE|FF|GG|HH|II|JJ|KK|LL|MM|NN|OO|PP|
+// Shuffles to: |EE|FF|GG|HH|
+// +--+--+--+--+
+static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI,
+ SmallVector<int, 16> &Mask) {
+ unsigned LHSElems =
+ cast<VectorType>(SVI.getOperand(0)->getType())->getNumElements();
+ unsigned MaskElems = Mask.size();
+ unsigned BegIdx = Mask.front();
+ unsigned EndIdx = Mask.back();
+ if (BegIdx > EndIdx || EndIdx >= LHSElems || EndIdx - BegIdx != MaskElems - 1)
+ return false;
+ for (unsigned I = 0; I != MaskElems; ++I)
+ if (static_cast<unsigned>(Mask[I]) != BegIdx + I)
+ return false;
+ return true;
+}
+
Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
Value *LHS = SVI.getOperand(0);
Value *RHS = SVI.getOperand(1);
SmallVector<int, 16> Mask = SVI.getShuffleMask();
+ Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
bool MadeChange = false;
@@ -892,18 +916,17 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
SmallVector<Constant*, 16> Elts;
for (unsigned i = 0, e = LHSWidth; i != VWidth; ++i) {
if (Mask[i] < 0) {
- Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext())));
+ Elts.push_back(UndefValue::get(Int32Ty));
continue;
}
if ((Mask[i] >= (int)e && isa<UndefValue>(RHS)) ||
(Mask[i] < (int)e && isa<UndefValue>(LHS))) {
Mask[i] = -1; // Turn into undef.
- Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext())));
+ Elts.push_back(UndefValue::get(Int32Ty));
} else {
Mask[i] = Mask[i] % e; // Force to LHS.
- Elts.push_back(ConstantInt::get(Type::getInt32Ty(SVI.getContext()),
- Mask[i]));
+ Elts.push_back(ConstantInt::get(Int32Ty, Mask[i]));
}
}
SVI.setOperand(0, SVI.getOperand(1));
@@ -929,6 +952,95 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
return ReplaceInstUsesWith(SVI, V);
}
+ // SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to
+ // a non-vector type. We can instead bitcast the original vector followed by
+ // an extract of the desired element:
+ //
+ // %sroa = shufflevector <16 x i8> %in, <16 x i8> undef,
+ // <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ // %1 = bitcast <4 x i8> %sroa to i32
+ // Becomes:
+ // %bc = bitcast <16 x i8> %in to <4 x i32>
+ // %ext = extractelement <4 x i32> %bc, i32 0
+ //
+ // If the shuffle is extracting a contiguous range of values from the input
+ // vector then each use which is a bitcast of the extracted size can be
+ // replaced. This will work if the vector types are compatible, and the begin
+ // index is aligned to a value in the casted vector type. If the begin index
+ // isn't aligned then we can shuffle the original vector (keeping the same
+ // vector type) before extracting.
+ //
+ // This code will bail out if the target type is fundamentally incompatible
+ // with vectors of the source type.
+ //
+ // Example of <16 x i8>, target type i32:
+ // Index range [4,8): v-----------v Will work.
+ // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+ // <16 x i8>: | | | | | | | | | | | | | | | | |
+ // <4 x i32>: | | | | |
+ // +-----------+-----------+-----------+-----------+
+ // Index range [6,10): ^-----------^ Needs an extra shuffle.
+ // Target type i40: ^--------------^ Won't work, bail.
+ if (isShuffleExtractingFromLHS(SVI, Mask)) {
+ Value *V = LHS;
+ unsigned MaskElems = Mask.size();
+ unsigned BegIdx = Mask.front();
+ VectorType *SrcTy = cast<VectorType>(V->getType());
+ unsigned VecBitWidth = SrcTy->getBitWidth();
+ unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType());
+ assert(SrcElemBitWidth && "vector elements must have a bitwidth");
+ unsigned SrcNumElems = SrcTy->getNumElements();
+ SmallVector<BitCastInst *, 8> BCs;
+ DenseMap<Type *, Value *> NewBCs;
+ for (User *U : SVI.users())
+ if (BitCastInst *BC = dyn_cast<BitCastInst>(U))
+ if (!BC->use_empty())
+ // Only visit bitcasts that weren't previously handled.
+ BCs.push_back(BC);
+ for (BitCastInst *BC : BCs) {
+ Type *TgtTy = BC->getDestTy();
+ unsigned TgtElemBitWidth = DL.getTypeSizeInBits(TgtTy);
+ if (!TgtElemBitWidth)
+ continue;
+ unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth;
+ bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth;
+ bool BegIsAligned = 0 == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth);
+ if (!VecBitWidthsEqual)
+ continue;
+ if (!VectorType::isValidElementType(TgtTy))
+ continue;
+ VectorType *CastSrcTy = VectorType::get(TgtTy, TgtNumElems);
+ if (!BegIsAligned) {
+ // Shuffle the input so [0,NumElements) contains the output, and
+ // [NumElems,SrcNumElems) is undef.
+ SmallVector<Constant *, 16> ShuffleMask(SrcNumElems,
+ UndefValue::get(Int32Ty));
+ for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I)
+ ShuffleMask[I] = ConstantInt::get(Int32Ty, Idx);
+ V = Builder->CreateShuffleVector(V, UndefValue::get(V->getType()),
+ ConstantVector::get(ShuffleMask),
+ SVI.getName() + ".extract");
+ BegIdx = 0;
+ }
+ unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth;
+ assert(SrcElemsPerTgtElem);
+ BegIdx /= SrcElemsPerTgtElem;
+ bool BCAlreadyExists = NewBCs.find(CastSrcTy) != NewBCs.end();
+ auto *NewBC =
+ BCAlreadyExists
+ ? NewBCs[CastSrcTy]
+ : Builder->CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc");
+ if (!BCAlreadyExists)
+ NewBCs[CastSrcTy] = NewBC;
+ auto *Ext = Builder->CreateExtractElement(
+ NewBC, ConstantInt::get(Int32Ty, BegIdx), SVI.getName() + ".extract");
+ // The shufflevector isn't being replaced: the bitcast that used it
+ // is. InstCombine will visit the newly-created instructions.
+ ReplaceInstUsesWith(*BC, Ext);
+ MadeChange = true;
+ }
+ }
+
// If the LHS is a shufflevector itself, see if we can combine it with this
// one without producing an unusual shuffle.
// Cases that might be simplified:
@@ -1099,7 +1211,6 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
// or is a splat, do the replacement.
if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
SmallVector<Constant*, 16> Elts;
- Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
for (unsigned i = 0, e = newMask.size(); i != e; ++i) {
if (newMask[i] < 0) {
Elts.push_back(UndefValue::get(Int32Ty));
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineWorklist.h b/contrib/llvm/lib/Transforms/InstCombine/InstCombineWorklist.h
deleted file mode 100644
index 8d857d0..0000000
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineWorklist.h
+++ /dev/null
@@ -1,107 +0,0 @@
-//===- InstCombineWorklist.h - Worklist for InstCombine pass ----*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TRANSFORMS_INSTCOMBINE_INSTCOMBINEWORKLIST_H
-#define LLVM_LIB_TRANSFORMS_INSTCOMBINE_INSTCOMBINEWORKLIST_H
-
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
-#define DEBUG_TYPE "instcombine"
-
-namespace llvm {
-
-/// InstCombineWorklist - This is the worklist management logic for
-/// InstCombine.
-class LLVM_LIBRARY_VISIBILITY InstCombineWorklist {
- SmallVector<Instruction*, 256> Worklist;
- DenseMap<Instruction*, unsigned> WorklistMap;
-
- void operator=(const InstCombineWorklist&RHS) LLVM_DELETED_FUNCTION;
- InstCombineWorklist(const InstCombineWorklist&) LLVM_DELETED_FUNCTION;
-public:
- InstCombineWorklist() {}
-
- bool isEmpty() const { return Worklist.empty(); }
-
- /// Add - Add the specified instruction to the worklist if it isn't already
- /// in it.
- void Add(Instruction *I) {
- if (WorklistMap.insert(std::make_pair(I, Worklist.size())).second) {
- DEBUG(dbgs() << "IC: ADD: " << *I << '\n');
- Worklist.push_back(I);
- }
- }
-
- void AddValue(Value *V) {
- if (Instruction *I = dyn_cast<Instruction>(V))
- Add(I);
- }
-
- /// AddInitialGroup - Add the specified batch of stuff in reverse order.
- /// which should only be done when the worklist is empty and when the group
- /// has no duplicates.
- void AddInitialGroup(Instruction *const *List, unsigned NumEntries) {
- assert(Worklist.empty() && "Worklist must be empty to add initial group");
- Worklist.reserve(NumEntries+16);
- WorklistMap.resize(NumEntries);
- DEBUG(dbgs() << "IC: ADDING: " << NumEntries << " instrs to worklist\n");
- for (unsigned Idx = 0; NumEntries; --NumEntries) {
- Instruction *I = List[NumEntries-1];
- WorklistMap.insert(std::make_pair(I, Idx++));
- Worklist.push_back(I);
- }
- }
-
- // Remove - remove I from the worklist if it exists.
- void Remove(Instruction *I) {
- DenseMap<Instruction*, unsigned>::iterator It = WorklistMap.find(I);
- if (It == WorklistMap.end()) return; // Not in worklist.
-
- // Don't bother moving everything down, just null out the slot.
- Worklist[It->second] = nullptr;
-
- WorklistMap.erase(It);
- }
-
- Instruction *RemoveOne() {
- Instruction *I = Worklist.pop_back_val();
- WorklistMap.erase(I);
- return I;
- }
-
- /// AddUsersToWorkList - When an instruction is simplified, add all users of
- /// the instruction to the work lists because they might get more simplified
- /// now.
- ///
- void AddUsersToWorkList(Instruction &I) {
- for (User *U : I.users())
- Add(cast<Instruction>(U));
- }
-
-
- /// Zap - check that the worklist is empty and nuke the backing store for
- /// the map if it is large.
- void Zap() {
- assert(WorklistMap.empty() && "Worklist empty, but map not?");
-
- // Do an explicit clear, this shrinks the map if needed.
- WorklistMap.clear();
- }
-};
-
-} // end namespace llvm.
-
-#undef DEBUG_TYPE
-
-#endif
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index a0c239a..be49cd1 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -33,8 +33,8 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar.h"
-#include "InstCombine.h"
+#include "llvm/Transforms/InstCombine/InstCombine.h"
+#include "InstCombineInternal.h"
#include "llvm-c/Initialization.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
@@ -43,8 +43,10 @@
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LibCallSemantics.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/DataLayout.h"
@@ -55,7 +57,8 @@
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
#include <climits>
@@ -72,35 +75,8 @@ STATISTIC(NumExpand, "Number of expansions");
STATISTIC(NumFactor , "Number of factorizations");
STATISTIC(NumReassoc , "Number of reassociations");
-// Initialization Routines
-void llvm::initializeInstCombine(PassRegistry &Registry) {
- initializeInstCombinerPass(Registry);
-}
-
-void LLVMInitializeInstCombine(LLVMPassRegistryRef R) {
- initializeInstCombine(*unwrap(R));
-}
-
-char InstCombiner::ID = 0;
-INITIALIZE_PASS_BEGIN(InstCombiner, "instcombine",
- "Combine redundant instructions", false, false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_END(InstCombiner, "instcombine",
- "Combine redundant instructions", false, false)
-
-void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<TargetLibraryInfo>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addPreserved<DominatorTreeWrapperPass>();
-}
-
-
Value *InstCombiner::EmitGEPOffset(User *GEP) {
- return llvm::EmitGEPOffset(Builder, *getDataLayout(), GEP);
+ return llvm::EmitGEPOffset(Builder, DL, GEP);
}
/// ShouldChangeType - Return true if it is desirable to convert a computation
@@ -109,13 +85,10 @@ Value *InstCombiner::EmitGEPOffset(User *GEP) {
bool InstCombiner::ShouldChangeType(Type *From, Type *To) const {
assert(From->isIntegerTy() && To->isIntegerTy());
- // If we don't have DL, we don't know if the source/dest are legal.
- if (!DL) return false;
-
unsigned FromWidth = From->getPrimitiveSizeInBits();
unsigned ToWidth = To->getPrimitiveSizeInBits();
- bool FromLegal = DL->isLegalInteger(FromWidth);
- bool ToLegal = DL->isLegalInteger(ToWidth);
+ bool FromLegal = DL.isLegalInteger(FromWidth);
+ bool ToLegal = DL.isLegalInteger(ToWidth);
// If this is a legal integer from type, and the result would be an illegal
// type, don't do the transformation.
@@ -470,7 +443,7 @@ getBinOpsForFactorization(Instruction::BinaryOps TopLevelOpcode,
/// This tries to simplify binary operations by factorizing out common terms
/// (e. g. "(A*B)+(A*C)" -> "A*(B+C)").
static Value *tryFactorization(InstCombiner::BuilderTy *Builder,
- const DataLayout *DL, BinaryOperator &I,
+ const DataLayout &DL, BinaryOperator &I,
Instruction::BinaryOps InnerOpcode, Value *A,
Value *B, Value *C, Value *D) {
@@ -479,6 +452,7 @@ static Value *tryFactorization(InstCombiner::BuilderTy *Builder,
if (!A || !C || !B || !D)
return nullptr;
+ Value *V = nullptr;
Value *SimplifiedInst = nullptr;
Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
@@ -495,7 +469,7 @@ static Value *tryFactorization(InstCombiner::BuilderTy *Builder,
std::swap(C, D);
// Consider forming "A op' (B op D)".
// If "B op D" simplifies then it can be formed with no cost.
- Value *V = SimplifyBinOp(TopLevelOpcode, B, D, DL);
+ V = SimplifyBinOp(TopLevelOpcode, B, D, DL);
// If "B op D" doesn't simplify then only go on if both of the existing
// operations "A op' B" and "C op' D" will be zapped as no longer used.
if (!V && LHS->hasOneUse() && RHS->hasOneUse())
@@ -514,7 +488,7 @@ static Value *tryFactorization(InstCombiner::BuilderTy *Builder,
std::swap(C, D);
// Consider forming "(A op C) op' B".
// If "A op C" simplifies then it can be formed with no cost.
- Value *V = SimplifyBinOp(TopLevelOpcode, A, C, DL);
+ V = SimplifyBinOp(TopLevelOpcode, A, C, DL);
// If "A op C" doesn't simplify then only go on if both of the existing
// operations "A op' B" and "C op' D" will be zapped as no longer used.
@@ -544,7 +518,19 @@ static Value *tryFactorization(InstCombiner::BuilderTy *Builder,
if (BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS))
if (isa<OverflowingBinaryOperator>(Op1))
HasNSW &= Op1->hasNoSignedWrap();
- BO->setHasNoSignedWrap(HasNSW);
+
+ // We can propogate 'nsw' if we know that
+ // %Y = mul nsw i16 %X, C
+ // %Z = add nsw i16 %Y, %X
+ // =>
+ // %Z = mul nsw i16 %X, C+1
+ //
+ // iff C+1 isn't INT_MIN
+ const APInt *CInt;
+ if (TopLevelOpcode == Instruction::Add &&
+ InnerOpcode == Instruction::Mul)
+ if (match(V, m_APInt(CInt)) && !CInt->isMinSignedValue())
+ BO->setHasNoSignedWrap(HasNSW);
}
}
}
@@ -741,6 +727,22 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
return nullptr;
}
+ // Test if a CmpInst instruction is used exclusively by a select as
+ // part of a minimum or maximum operation. If so, refrain from doing
+ // any other folding. This helps out other analyses which understand
+ // non-obfuscated minimum and maximum idioms, such as ScalarEvolution
+ // and CodeGen. And in this case, at least one of the comparison
+ // operands has at least one user besides the compare (the select),
+ // which would often largely negate the benefit of folding anyway.
+ if (auto *CI = dyn_cast<CmpInst>(SI->getCondition())) {
+ if (CI->hasOneUse()) {
+ Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
+ if ((SI->getOperand(1) == Op0 && SI->getOperand(2) == Op1) ||
+ (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1))
+ return nullptr;
+ }
+ }
+
Value *SelectTrueVal = FoldOperationIntoSelectOperand(Op, TV, this);
Value *SelectFalseVal = FoldOperationIntoSelectOperand(Op, FV, this);
@@ -750,7 +752,6 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
return nullptr;
}
-
/// FoldOpIntoPhi - Given a binary operator, cast instruction, or select which
/// has a PHI node as operand #0, see if we can fold the instruction into the
/// PHI (which is only possible if all operands to the PHI are constants).
@@ -799,8 +800,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
// If the incoming non-constant value is in I's block, we will remove one
// instruction, but insert another equivalent one, leading to infinite
// instcombine.
- if (isPotentiallyReachable(I.getParent(), NonConstBB, DT,
- getAnalysisIfAvailable<LoopInfo>()))
+ if (isPotentiallyReachable(I.getParent(), NonConstBB, DT, LI))
return nullptr;
}
@@ -897,23 +897,18 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
/// whether or not there is a sequence of GEP indices into the pointed type that
/// will land us at the specified offset. If so, fill them into NewIndices and
/// return the resultant element type, otherwise return null.
-Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset,
- SmallVectorImpl<Value*> &NewIndices) {
- assert(PtrTy->isPtrOrPtrVectorTy());
-
- if (!DL)
- return nullptr;
-
- Type *Ty = PtrTy->getPointerElementType();
+Type *InstCombiner::FindElementAtOffset(PointerType *PtrTy, int64_t Offset,
+ SmallVectorImpl<Value *> &NewIndices) {
+ Type *Ty = PtrTy->getElementType();
if (!Ty->isSized())
return nullptr;
// Start with the index over the outer type. Note that the type size
// might be zero (even if the offset isn't zero) if the indexed type
// is something like [0 x {int, int}]
- Type *IntPtrTy = DL->getIntPtrType(PtrTy);
+ Type *IntPtrTy = DL.getIntPtrType(PtrTy);
int64_t FirstIdx = 0;
- if (int64_t TySize = DL->getTypeAllocSize(Ty)) {
+ if (int64_t TySize = DL.getTypeAllocSize(Ty)) {
FirstIdx = Offset/TySize;
Offset -= FirstIdx*TySize;
@@ -931,11 +926,11 @@ Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset,
// Index into the types. If we fail, set OrigBase to null.
while (Offset) {
// Indexing into tail padding between struct/array elements.
- if (uint64_t(Offset*8) >= DL->getTypeSizeInBits(Ty))
+ if (uint64_t(Offset * 8) >= DL.getTypeSizeInBits(Ty))
return nullptr;
if (StructType *STy = dyn_cast<StructType>(Ty)) {
- const StructLayout *SL = DL->getStructLayout(STy);
+ const StructLayout *SL = DL.getStructLayout(STy);
assert(Offset < (int64_t)SL->getSizeInBytes() &&
"Offset must stay within the indexed type");
@@ -946,7 +941,7 @@ Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset,
Offset -= SL->getElementOffset(Elt);
Ty = STy->getElementType(Elt);
} else if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
- uint64_t EltSize = DL->getTypeAllocSize(AT->getElementType());
+ uint64_t EltSize = DL.getTypeAllocSize(AT->getElementType());
assert(EltSize && "Cannot index into a zero-sized array");
NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize));
Offset %= EltSize;
@@ -1240,7 +1235,8 @@ Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) {
// It may not be safe to reorder shuffles and things like div, urem, etc.
// because we may trap when executing those ops on unknown vector elements.
// See PR20059.
- if (!isSafeToSpeculativelyExecute(&Inst, DL)) return nullptr;
+ if (!isSafeToSpeculativelyExecute(&Inst))
+ return nullptr;
unsigned VWidth = cast<VectorType>(Inst.getType())->getNumElements();
Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1);
@@ -1326,37 +1322,37 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// Eliminate unneeded casts for indices, and replace indices which displace
// by multiples of a zero size type with zero.
- if (DL) {
- bool MadeChange = false;
- Type *IntPtrTy = DL->getIntPtrType(GEP.getPointerOperandType());
-
- gep_type_iterator GTI = gep_type_begin(GEP);
- for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end();
- I != E; ++I, ++GTI) {
- // Skip indices into struct types.
- SequentialType *SeqTy = dyn_cast<SequentialType>(*GTI);
- if (!SeqTy) continue;
-
- // If the element type has zero size then any index over it is equivalent
- // to an index of zero, so replace it with zero if it is not zero already.
- if (SeqTy->getElementType()->isSized() &&
- DL->getTypeAllocSize(SeqTy->getElementType()) == 0)
- if (!isa<Constant>(*I) || !cast<Constant>(*I)->isNullValue()) {
- *I = Constant::getNullValue(IntPtrTy);
- MadeChange = true;
- }
+ bool MadeChange = false;
+ Type *IntPtrTy = DL.getIntPtrType(GEP.getPointerOperandType());
+
+ gep_type_iterator GTI = gep_type_begin(GEP);
+ for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E;
+ ++I, ++GTI) {
+ // Skip indices into struct types.
+ SequentialType *SeqTy = dyn_cast<SequentialType>(*GTI);
+ if (!SeqTy)
+ continue;
- Type *IndexTy = (*I)->getType();
- if (IndexTy != IntPtrTy) {
- // If we are using a wider index than needed for this platform, shrink
- // it to what we need. If narrower, sign-extend it to what we need.
- // This explicit cast can make subsequent optimizations more obvious.
- *I = Builder->CreateIntCast(*I, IntPtrTy, true);
+ // If the element type has zero size then any index over it is equivalent
+ // to an index of zero, so replace it with zero if it is not zero already.
+ if (SeqTy->getElementType()->isSized() &&
+ DL.getTypeAllocSize(SeqTy->getElementType()) == 0)
+ if (!isa<Constant>(*I) || !cast<Constant>(*I)->isNullValue()) {
+ *I = Constant::getNullValue(IntPtrTy);
MadeChange = true;
}
+
+ Type *IndexTy = (*I)->getType();
+ if (IndexTy != IntPtrTy) {
+ // If we are using a wider index than needed for this platform, shrink
+ // it to what we need. If narrower, sign-extend it to what we need.
+ // This explicit cast can make subsequent optimizations more obvious.
+ *I = Builder->CreateIntCast(*I, IntPtrTy, true);
+ MadeChange = true;
}
- if (MadeChange) return &GEP;
}
+ if (MadeChange)
+ return &GEP;
// Check to see if the inputs to the PHI node are getelementptr instructions.
if (PHINode *PN = dyn_cast<PHINode>(PtrOp)) {
@@ -1364,6 +1360,15 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (!Op1)
return nullptr;
+ // Don't fold a GEP into itself through a PHI node. This can only happen
+ // through the back-edge of a loop. Folding a GEP into itself means that
+ // the value of the previous iteration needs to be stored in the meantime,
+ // thus requiring an additional register variable to be live, but not
+ // actually achieving anything (the GEP still needs to be executed once per
+ // loop iteration).
+ if (Op1 == &GEP)
+ return nullptr;
+
signed DI = -1;
for (auto I = PN->op_begin()+1, E = PN->op_end(); I !=E; ++I) {
@@ -1371,6 +1376,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (!Op2 || Op1->getNumOperands() != Op2->getNumOperands())
return nullptr;
+ // As for Op1 above, don't try to fold a GEP into itself.
+ if (Op2 == &GEP)
+ return nullptr;
+
// Keep track of the type as we walk the GEP.
Type *CurTy = Op1->getOperand(0)->getType()->getScalarType();
@@ -1417,8 +1426,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (DI == -1) {
// All the GEPs feeding the PHI are identical. Clone one down into our
// BB so that it can be merged with the current GEP.
- GEP.getParent()->getInstList().insert(GEP.getParent()->getFirstNonPHI(),
- NewGEP);
+ GEP.getParent()->getInstList().insert(
+ GEP.getParent()->getFirstInsertionPt(), NewGEP);
} else {
// All the GEPs feeding the PHI differ at a single offset. Clone a GEP
// into the current block so it can be merged, and create a new PHI to
@@ -1434,8 +1443,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
PN->getIncomingBlock(I));
NewGEP->setOperand(DI, NewPN);
- GEP.getParent()->getInstList().insert(GEP.getParent()->getFirstNonPHI(),
- NewGEP);
+ GEP.getParent()->getInstList().insert(
+ GEP.getParent()->getFirstInsertionPt(), NewGEP);
NewGEP->setOperand(DI, NewPN);
}
@@ -1486,6 +1495,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// normalized.
if (SO1->getType() != GO1->getType())
return nullptr;
+ // Only do the combine when GO1 and SO1 are both constants. Only in
+ // this case, we are sure the cost after the merge is never more than
+ // that before the merge.
+ if (!isa<Constant>(GO1) || !isa<Constant>(SO1))
+ return nullptr;
Sum = Builder->CreateAdd(SO1, GO1, PtrOp->getName()+".sum");
}
@@ -1507,19 +1521,22 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
}
if (!Indices.empty())
- return (GEP.isInBounds() && Src->isInBounds()) ?
- GetElementPtrInst::CreateInBounds(Src->getOperand(0), Indices,
- GEP.getName()) :
- GetElementPtrInst::Create(Src->getOperand(0), Indices, GEP.getName());
+ return GEP.isInBounds() && Src->isInBounds()
+ ? GetElementPtrInst::CreateInBounds(
+ Src->getSourceElementType(), Src->getOperand(0), Indices,
+ GEP.getName())
+ : GetElementPtrInst::Create(Src->getSourceElementType(),
+ Src->getOperand(0), Indices,
+ GEP.getName());
}
- if (DL && GEP.getNumIndices() == 1) {
+ if (GEP.getNumIndices() == 1) {
unsigned AS = GEP.getPointerAddressSpace();
if (GEP.getOperand(1)->getType()->getScalarSizeInBits() ==
- DL->getPointerSizeInBits(AS)) {
+ DL.getPointerSizeInBits(AS)) {
Type *PtrTy = GEP.getPointerOperandType();
Type *Ty = PtrTy->getPointerElementType();
- uint64_t TyAllocSize = DL->getTypeAllocSize(Ty);
+ uint64_t TyAllocSize = DL.getTypeAllocSize(Ty);
bool Matched = false;
uint64_t C;
@@ -1588,8 +1605,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (CATy->getElementType() == StrippedPtrTy->getElementType()) {
// -> GEP i8* X, ...
SmallVector<Value*, 8> Idx(GEP.idx_begin()+1, GEP.idx_end());
- GetElementPtrInst *Res =
- GetElementPtrInst::Create(StrippedPtr, Idx, GEP.getName());
+ GetElementPtrInst *Res = GetElementPtrInst::Create(
+ StrippedPtrTy->getElementType(), StrippedPtr, Idx, GEP.getName());
Res->setIsInBounds(GEP.isInBounds());
if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace())
return Res;
@@ -1613,6 +1630,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// is a leading zero) we can fold the cast into this GEP.
if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace()) {
GEP.setOperand(0, StrippedPtr);
+ GEP.setSourceElementType(XATy);
return &GEP;
}
// Cannot replace the base pointer directly because StrippedPtr's
@@ -1625,9 +1643,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// %0 = GEP [10 x i8] addrspace(1)* X, ...
// addrspacecast i8 addrspace(1)* %0 to i8*
SmallVector<Value*, 8> Idx(GEP.idx_begin(), GEP.idx_end());
- Value *NewGEP = GEP.isInBounds() ?
- Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()) :
- Builder->CreateGEP(StrippedPtr, Idx, GEP.getName());
+ Value *NewGEP = GEP.isInBounds()
+ ? Builder->CreateInBoundsGEP(
+ nullptr, StrippedPtr, Idx, GEP.getName())
+ : Builder->CreateGEP(nullptr, StrippedPtr, Idx,
+ GEP.getName());
return new AddrSpaceCastInst(NewGEP, GEP.getType());
}
}
@@ -1638,14 +1658,16 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// into: %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast
Type *SrcElTy = StrippedPtrTy->getElementType();
Type *ResElTy = PtrOp->getType()->getPointerElementType();
- if (DL && SrcElTy->isArrayTy() &&
- DL->getTypeAllocSize(SrcElTy->getArrayElementType()) ==
- DL->getTypeAllocSize(ResElTy)) {
- Type *IdxType = DL->getIntPtrType(GEP.getType());
+ if (SrcElTy->isArrayTy() &&
+ DL.getTypeAllocSize(SrcElTy->getArrayElementType()) ==
+ DL.getTypeAllocSize(ResElTy)) {
+ Type *IdxType = DL.getIntPtrType(GEP.getType());
Value *Idx[2] = { Constant::getNullValue(IdxType), GEP.getOperand(1) };
- Value *NewGEP = GEP.isInBounds() ?
- Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()) :
- Builder->CreateGEP(StrippedPtr, Idx, GEP.getName());
+ Value *NewGEP =
+ GEP.isInBounds()
+ ? Builder->CreateInBoundsGEP(nullptr, StrippedPtr, Idx,
+ GEP.getName())
+ : Builder->CreateGEP(nullptr, StrippedPtr, Idx, GEP.getName());
// V and GEP are both pointer types --> BitCast
return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP,
@@ -1656,11 +1678,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// %V = mul i64 %N, 4
// %t = getelementptr i8* bitcast (i32* %arr to i8*), i32 %V
// into: %t1 = getelementptr i32* %arr, i32 %N; bitcast
- if (DL && ResElTy->isSized() && SrcElTy->isSized()) {
+ if (ResElTy->isSized() && SrcElTy->isSized()) {
// Check that changing the type amounts to dividing the index by a scale
// factor.
- uint64_t ResSize = DL->getTypeAllocSize(ResElTy);
- uint64_t SrcSize = DL->getTypeAllocSize(SrcElTy);
+ uint64_t ResSize = DL.getTypeAllocSize(ResElTy);
+ uint64_t SrcSize = DL.getTypeAllocSize(SrcElTy);
if (ResSize && SrcSize % ResSize == 0) {
Value *Idx = GEP.getOperand(1);
unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
@@ -1668,7 +1690,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// Earlier transforms ensure that the index has type IntPtrType, which
// considerably simplifies the logic by eliminating implicit casts.
- assert(Idx->getType() == DL->getIntPtrType(GEP.getType()) &&
+ assert(Idx->getType() == DL.getIntPtrType(GEP.getType()) &&
"Index not cast to pointer width?");
bool NSW;
@@ -1676,9 +1698,12 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// Successfully decomposed Idx as NewIdx * Scale, form a new GEP.
// If the multiplication NewIdx * Scale may overflow then the new
// GEP may not be "inbounds".
- Value *NewGEP = GEP.isInBounds() && NSW ?
- Builder->CreateInBoundsGEP(StrippedPtr, NewIdx, GEP.getName()) :
- Builder->CreateGEP(StrippedPtr, NewIdx, GEP.getName());
+ Value *NewGEP =
+ GEP.isInBounds() && NSW
+ ? Builder->CreateInBoundsGEP(nullptr, StrippedPtr, NewIdx,
+ GEP.getName())
+ : Builder->CreateGEP(nullptr, StrippedPtr, NewIdx,
+ GEP.getName());
// The NewGEP must be pointer typed, so must the old one -> BitCast
return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP,
@@ -1691,13 +1716,12 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp
// (where tmp = 8*tmp2) into:
// getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast
- if (DL && ResElTy->isSized() && SrcElTy->isSized() &&
- SrcElTy->isArrayTy()) {
+ if (ResElTy->isSized() && SrcElTy->isSized() && SrcElTy->isArrayTy()) {
// Check that changing to the array element type amounts to dividing the
// index by a scale factor.
- uint64_t ResSize = DL->getTypeAllocSize(ResElTy);
- uint64_t ArrayEltSize
- = DL->getTypeAllocSize(SrcElTy->getArrayElementType());
+ uint64_t ResSize = DL.getTypeAllocSize(ResElTy);
+ uint64_t ArrayEltSize =
+ DL.getTypeAllocSize(SrcElTy->getArrayElementType());
if (ResSize && ArrayEltSize % ResSize == 0) {
Value *Idx = GEP.getOperand(1);
unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
@@ -1705,7 +1729,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// Earlier transforms ensure that the index has type IntPtrType, which
// considerably simplifies the logic by eliminating implicit casts.
- assert(Idx->getType() == DL->getIntPtrType(GEP.getType()) &&
+ assert(Idx->getType() == DL.getIntPtrType(GEP.getType()) &&
"Index not cast to pointer width?");
bool NSW;
@@ -1714,13 +1738,14 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// If the multiplication NewIdx * Scale may overflow then the new
// GEP may not be "inbounds".
Value *Off[2] = {
- Constant::getNullValue(DL->getIntPtrType(GEP.getType())),
- NewIdx
- };
-
- Value *NewGEP = GEP.isInBounds() && NSW ?
- Builder->CreateInBoundsGEP(StrippedPtr, Off, GEP.getName()) :
- Builder->CreateGEP(StrippedPtr, Off, GEP.getName());
+ Constant::getNullValue(DL.getIntPtrType(GEP.getType())),
+ NewIdx};
+
+ Value *NewGEP = GEP.isInBounds() && NSW
+ ? Builder->CreateInBoundsGEP(
+ SrcElTy, StrippedPtr, Off, GEP.getName())
+ : Builder->CreateGEP(SrcElTy, StrippedPtr, Off,
+ GEP.getName());
// The NewGEP must be pointer typed, so must the old one -> BitCast
return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP,
GEP.getType());
@@ -1730,9 +1755,6 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
}
}
- if (!DL)
- return nullptr;
-
// addrspacecast between types is canonicalized as a bitcast, then an
// addrspacecast. To take advantage of the below bitcast + struct GEP, look
// through the addrspacecast.
@@ -1753,10 +1775,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) {
Value *Operand = BCI->getOperand(0);
PointerType *OpType = cast<PointerType>(Operand->getType());
- unsigned OffsetBits = DL->getPointerTypeSizeInBits(GEP.getType());
+ unsigned OffsetBits = DL.getPointerTypeSizeInBits(GEP.getType());
APInt Offset(OffsetBits, 0);
if (!isa<BitCastInst>(Operand) &&
- GEP.accumulateConstantOffset(*DL, Offset)) {
+ GEP.accumulateConstantOffset(DL, Offset)) {
// If this GEP instruction doesn't move the pointer, just replace the GEP
// with a bitcast of the real input to the dest type.
@@ -1785,9 +1807,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// GEP.
SmallVector<Value*, 8> NewIndices;
if (FindElementAtOffset(OpType, Offset.getSExtValue(), NewIndices)) {
- Value *NGEP = GEP.isInBounds() ?
- Builder->CreateInBoundsGEP(Operand, NewIndices) :
- Builder->CreateGEP(Operand, NewIndices);
+ Value *NGEP =
+ GEP.isInBounds()
+ ? Builder->CreateInBoundsGEP(nullptr, Operand, NewIndices)
+ : Builder->CreateGEP(nullptr, Operand, NewIndices);
if (NGEP->getType() == GEP.getType())
return ReplaceInstUsesWith(GEP, NGEP);
@@ -2038,6 +2061,15 @@ Instruction *InstCombiner::visitBranchInst(BranchInst &BI) {
return &BI;
}
+ // If the condition is irrelevant, remove the use so that other
+ // transforms on the condition become more effective.
+ if (BI.isConditional() &&
+ BI.getSuccessor(0) == BI.getSuccessor(1) &&
+ !isa<UndefValue>(BI.getCondition())) {
+ BI.setCondition(UndefValue::get(BI.getCondition()->getType()));
+ return &BI;
+ }
+
// Canonicalize fcmp_one -> fcmp_oeq
FCmpInst::Predicate FPred; Value *Y;
if (match(&BI, m_Br(m_FCmp(FPred, m_Value(X), m_Value(Y)),
@@ -2077,7 +2109,7 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
Value *Cond = SI.getCondition();
unsigned BitWidth = cast<IntegerType>(Cond->getType())->getBitWidth();
APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- computeKnownBits(Cond, KnownZero, KnownOne);
+ computeKnownBits(Cond, KnownZero, KnownOne, 0, &SI);
unsigned LeadingKnownZeros = KnownZero.countLeadingOnes();
unsigned LeadingKnownOnes = KnownOne.countLeadingOnes();
@@ -2096,8 +2128,8 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
// x86 generates redundant zero-extenstion instructions if the operand is
// truncated to i8 or i16.
bool TruncCond = false;
- if (DL && BitWidth > NewWidth &&
- NewWidth >= DL->getLargestLegalIntTypeSize()) {
+ if (NewWidth > 0 && BitWidth > NewWidth &&
+ NewWidth >= DL.getLargestLegalIntTypeSize()) {
TruncCond = true;
IntegerType *Ty = IntegerType::get(SI.getContext(), NewWidth);
Builder->SetInsertPoint(&SI);
@@ -2270,7 +2302,8 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
// We need to insert these at the location of the old load, not at that of
// the extractvalue.
Builder->SetInsertPoint(L->getParent(), L);
- Value *GEP = Builder->CreateInBoundsGEP(L->getPointerOperand(), Indices);
+ Value *GEP = Builder->CreateInBoundsGEP(L->getType(),
+ L->getPointerOperand(), Indices);
// Returning the load directly will cause the main loop to insert it in
// the wrong spot, so use ReplaceInstUsesWith().
return ReplaceInstUsesWith(EV, Builder->CreateLoad(GEP));
@@ -2286,41 +2319,27 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
return nullptr;
}
-enum Personality_Type {
- Unknown_Personality,
- GNU_Ada_Personality,
- GNU_CXX_Personality,
- GNU_ObjC_Personality
-};
-
-/// RecognizePersonality - See if the given exception handling personality
-/// function is one that we understand. If so, return a description of it;
-/// otherwise return Unknown_Personality.
-static Personality_Type RecognizePersonality(Value *Pers) {
- Function *F = dyn_cast<Function>(Pers->stripPointerCasts());
- if (!F)
- return Unknown_Personality;
- return StringSwitch<Personality_Type>(F->getName())
- .Case("__gnat_eh_personality", GNU_Ada_Personality)
- .Case("__gxx_personality_v0", GNU_CXX_Personality)
- .Case("__objc_personality_v0", GNU_ObjC_Personality)
- .Default(Unknown_Personality);
-}
-
/// isCatchAll - Return 'true' if the given typeinfo will match anything.
-static bool isCatchAll(Personality_Type Personality, Constant *TypeInfo) {
+static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo) {
switch (Personality) {
- case Unknown_Personality:
+ case EHPersonality::GNU_C:
+ // The GCC C EH personality only exists to support cleanups, so it's not
+ // clear what the semantics of catch clauses are.
return false;
- case GNU_Ada_Personality:
+ case EHPersonality::Unknown:
+ return false;
+ case EHPersonality::GNU_Ada:
// While __gnat_all_others_value will match any Ada exception, it doesn't
// match foreign exceptions (or didn't, before gcc-4.7).
return false;
- case GNU_CXX_Personality:
- case GNU_ObjC_Personality:
+ case EHPersonality::GNU_CXX:
+ case EHPersonality::GNU_ObjC:
+ case EHPersonality::MSVC_X86SEH:
+ case EHPersonality::MSVC_Win64SEH:
+ case EHPersonality::MSVC_CXX:
return TypeInfo->isNullValue();
}
- llvm_unreachable("Unknown personality!");
+ llvm_unreachable("invalid enum");
}
static bool shorter_filter(const Value *LHS, const Value *RHS) {
@@ -2334,7 +2353,7 @@ Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) {
// The logic here should be correct for any real-world personality function.
// However if that turns out not to be true, the offending logic can always
// be conditioned on the personality function, like the catch-all logic is.
- Personality_Type Personality = RecognizePersonality(LI.getPersonalityFn());
+ EHPersonality Personality = classifyEHPersonality(LI.getPersonalityFn());
// Simplify the list of clauses, eg by removing repeated catch clauses
// (these are often created by inlining).
@@ -2625,9 +2644,6 @@ Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) {
return nullptr;
}
-
-
-
/// TryToSinkInstruction - Try to move the specified instruction from its
/// current block into the beginning of DestBlock, which can only happen if it's
/// safe to move the instruction past all of the instructions between it and the
@@ -2660,164 +2676,7 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
return true;
}
-
-/// AddReachableCodeToWorklist - Walk the function in depth-first order, adding
-/// all reachable code to the worklist.
-///
-/// This has a couple of tricks to make the code faster and more powerful. In
-/// particular, we constant fold and DCE instructions as we go, to avoid adding
-/// them to the worklist (this significantly speeds up instcombine on code where
-/// many instructions are dead or constant). Additionally, if we find a branch
-/// whose condition is a known constant, we only visit the reachable successors.
-///
-static bool AddReachableCodeToWorklist(BasicBlock *BB,
- SmallPtrSetImpl<BasicBlock*> &Visited,
- InstCombiner &IC,
- const DataLayout *DL,
- const TargetLibraryInfo *TLI) {
- bool MadeIRChange = false;
- SmallVector<BasicBlock*, 256> Worklist;
- Worklist.push_back(BB);
-
- SmallVector<Instruction*, 128> InstrsForInstCombineWorklist;
- DenseMap<ConstantExpr*, Constant*> FoldedConstants;
-
- do {
- BB = Worklist.pop_back_val();
-
- // We have now visited this block! If we've already been here, ignore it.
- if (!Visited.insert(BB).second)
- continue;
-
- for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
- Instruction *Inst = BBI++;
-
- // DCE instruction if trivially dead.
- if (isInstructionTriviallyDead(Inst, TLI)) {
- ++NumDeadInst;
- DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
- Inst->eraseFromParent();
- continue;
- }
-
- // ConstantProp instruction if trivially constant.
- if (!Inst->use_empty() && isa<Constant>(Inst->getOperand(0)))
- if (Constant *C = ConstantFoldInstruction(Inst, DL, TLI)) {
- DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: "
- << *Inst << '\n');
- Inst->replaceAllUsesWith(C);
- ++NumConstProp;
- Inst->eraseFromParent();
- continue;
- }
-
- if (DL) {
- // See if we can constant fold its operands.
- for (User::op_iterator i = Inst->op_begin(), e = Inst->op_end();
- i != e; ++i) {
- ConstantExpr *CE = dyn_cast<ConstantExpr>(i);
- if (CE == nullptr) continue;
-
- Constant*& FoldRes = FoldedConstants[CE];
- if (!FoldRes)
- FoldRes = ConstantFoldConstantExpression(CE, DL, TLI);
- if (!FoldRes)
- FoldRes = CE;
-
- if (FoldRes != CE) {
- *i = FoldRes;
- MadeIRChange = true;
- }
- }
- }
-
- InstrsForInstCombineWorklist.push_back(Inst);
- }
-
- // Recursively visit successors. If this is a branch or switch on a
- // constant, only visit the reachable successor.
- TerminatorInst *TI = BB->getTerminator();
- if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
- if (BI->isConditional() && isa<ConstantInt>(BI->getCondition())) {
- bool CondVal = cast<ConstantInt>(BI->getCondition())->getZExtValue();
- BasicBlock *ReachableBB = BI->getSuccessor(!CondVal);
- Worklist.push_back(ReachableBB);
- continue;
- }
- } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
- if (ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition())) {
- // See if this is an explicit destination.
- for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
- i != e; ++i)
- if (i.getCaseValue() == Cond) {
- BasicBlock *ReachableBB = i.getCaseSuccessor();
- Worklist.push_back(ReachableBB);
- continue;
- }
-
- // Otherwise it is the default destination.
- Worklist.push_back(SI->getDefaultDest());
- continue;
- }
- }
-
- for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
- Worklist.push_back(TI->getSuccessor(i));
- } while (!Worklist.empty());
-
- // Once we've found all of the instructions to add to instcombine's worklist,
- // add them in reverse order. This way instcombine will visit from the top
- // of the function down. This jives well with the way that it adds all uses
- // of instructions to the worklist after doing a transformation, thus avoiding
- // some N^2 behavior in pathological cases.
- IC.Worklist.AddInitialGroup(&InstrsForInstCombineWorklist[0],
- InstrsForInstCombineWorklist.size());
-
- return MadeIRChange;
-}
-
-bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
- MadeIRChange = false;
-
- DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
- << F.getName() << "\n");
-
- {
- // Do a depth-first traversal of the function, populate the worklist with
- // the reachable instructions. Ignore blocks that are not reachable. Keep
- // track of which blocks we visit.
- SmallPtrSet<BasicBlock*, 64> Visited;
- MadeIRChange |= AddReachableCodeToWorklist(F.begin(), Visited, *this, DL,
- TLI);
-
- // Do a quick scan over the function. If we find any blocks that are
- // unreachable, remove any instructions inside of them. This prevents
- // the instcombine code from having to deal with some bad special cases.
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
- if (Visited.count(BB)) continue;
-
- // Delete the instructions backwards, as it has a reduced likelihood of
- // having to update as many def-use and use-def chains.
- Instruction *EndInst = BB->getTerminator(); // Last not to be deleted.
- while (EndInst != BB->begin()) {
- // Delete the next to last instruction.
- BasicBlock::iterator I = EndInst;
- Instruction *Inst = --I;
- if (!Inst->use_empty())
- Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
- if (isa<LandingPadInst>(Inst)) {
- EndInst = Inst;
- continue;
- }
- if (!isa<DbgInfoIntrinsic>(Inst)) {
- ++NumDeadInst;
- MadeIRChange = true;
- }
- Inst->eraseFromParent();
- }
- }
- }
-
+bool InstCombiner::run() {
while (!Worklist.isEmpty()) {
Instruction *I = Worklist.RemoveOne();
if (I == nullptr) continue; // skip null values.
@@ -2832,7 +2691,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
}
// Instruction isn't dead, see if we can constant propagate it.
- if (!I->use_empty() && isa<Constant>(I->getOperand(0)))
+ if (!I->use_empty() && isa<Constant>(I->getOperand(0))) {
if (Constant *C = ConstantFoldInstruction(I, DL, TLI)) {
DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n');
@@ -2843,6 +2702,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
MadeIRChange = true;
continue;
}
+ }
// See if we can trivially sink this instruction to a successor basic block.
if (I->hasOneUse()) {
@@ -2900,7 +2760,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
DEBUG(dbgs() << "IC: Old = " << *I << '\n'
<< " New = " << *Result << '\n');
- if (!I->getDebugLoc().isUnknown())
+ if (I->getDebugLoc())
Result->setDebugLoc(I->getDebugLoc());
// Everything uses the new instruction now.
I->replaceAllUsesWith(Result);
@@ -2947,63 +2807,287 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
return MadeIRChange;
}
-namespace {
-class InstCombinerLibCallSimplifier final : public LibCallSimplifier {
- InstCombiner *IC;
-public:
- InstCombinerLibCallSimplifier(const DataLayout *DL,
- const TargetLibraryInfo *TLI,
- InstCombiner *IC)
- : LibCallSimplifier(DL, TLI) {
- this->IC = IC;
- }
+/// AddReachableCodeToWorklist - Walk the function in depth-first order, adding
+/// all reachable code to the worklist.
+///
+/// This has a couple of tricks to make the code faster and more powerful. In
+/// particular, we constant fold and DCE instructions as we go, to avoid adding
+/// them to the worklist (this significantly speeds up instcombine on code where
+/// many instructions are dead or constant). Additionally, if we find a branch
+/// whose condition is a known constant, we only visit the reachable successors.
+///
+static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL,
+ SmallPtrSetImpl<BasicBlock *> &Visited,
+ InstCombineWorklist &ICWorklist,
+ const TargetLibraryInfo *TLI) {
+ bool MadeIRChange = false;
+ SmallVector<BasicBlock*, 256> Worklist;
+ Worklist.push_back(BB);
- /// replaceAllUsesWith - override so that instruction replacement
- /// can be defined in terms of the instruction combiner framework.
- void replaceAllUsesWith(Instruction *I, Value *With) const override {
- IC->ReplaceInstUsesWith(*I, With);
- }
-};
+ SmallVector<Instruction*, 128> InstrsForInstCombineWorklist;
+ DenseMap<ConstantExpr*, Constant*> FoldedConstants;
+
+ do {
+ BB = Worklist.pop_back_val();
+
+ // We have now visited this block! If we've already been here, ignore it.
+ if (!Visited.insert(BB).second)
+ continue;
+
+ for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
+ Instruction *Inst = BBI++;
+
+ // DCE instruction if trivially dead.
+ if (isInstructionTriviallyDead(Inst, TLI)) {
+ ++NumDeadInst;
+ DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
+ Inst->eraseFromParent();
+ continue;
+ }
+
+ // ConstantProp instruction if trivially constant.
+ if (!Inst->use_empty() && isa<Constant>(Inst->getOperand(0)))
+ if (Constant *C = ConstantFoldInstruction(Inst, DL, TLI)) {
+ DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: "
+ << *Inst << '\n');
+ Inst->replaceAllUsesWith(C);
+ ++NumConstProp;
+ Inst->eraseFromParent();
+ continue;
+ }
+
+ // See if we can constant fold its operands.
+ for (User::op_iterator i = Inst->op_begin(), e = Inst->op_end(); i != e;
+ ++i) {
+ ConstantExpr *CE = dyn_cast<ConstantExpr>(i);
+ if (CE == nullptr)
+ continue;
+
+ Constant *&FoldRes = FoldedConstants[CE];
+ if (!FoldRes)
+ FoldRes = ConstantFoldConstantExpression(CE, DL, TLI);
+ if (!FoldRes)
+ FoldRes = CE;
+
+ if (FoldRes != CE) {
+ *i = FoldRes;
+ MadeIRChange = true;
+ }
+ }
+
+ InstrsForInstCombineWorklist.push_back(Inst);
+ }
+
+ // Recursively visit successors. If this is a branch or switch on a
+ // constant, only visit the reachable successor.
+ TerminatorInst *TI = BB->getTerminator();
+ if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ if (BI->isConditional() && isa<ConstantInt>(BI->getCondition())) {
+ bool CondVal = cast<ConstantInt>(BI->getCondition())->getZExtValue();
+ BasicBlock *ReachableBB = BI->getSuccessor(!CondVal);
+ Worklist.push_back(ReachableBB);
+ continue;
+ }
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ if (ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition())) {
+ // See if this is an explicit destination.
+ for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+ i != e; ++i)
+ if (i.getCaseValue() == Cond) {
+ BasicBlock *ReachableBB = i.getCaseSuccessor();
+ Worklist.push_back(ReachableBB);
+ continue;
+ }
+
+ // Otherwise it is the default destination.
+ Worklist.push_back(SI->getDefaultDest());
+ continue;
+ }
+ }
+
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+ Worklist.push_back(TI->getSuccessor(i));
+ } while (!Worklist.empty());
+
+ // Once we've found all of the instructions to add to instcombine's worklist,
+ // add them in reverse order. This way instcombine will visit from the top
+ // of the function down. This jives well with the way that it adds all uses
+ // of instructions to the worklist after doing a transformation, thus avoiding
+ // some N^2 behavior in pathological cases.
+ ICWorklist.AddInitialGroup(&InstrsForInstCombineWorklist[0],
+ InstrsForInstCombineWorklist.size());
+
+ return MadeIRChange;
}
-bool InstCombiner::runOnFunction(Function &F) {
- if (skipOptnoneFunction(F))
- return false;
+/// \brief Populate the IC worklist from a function, and prune any dead basic
+/// blocks discovered in the process.
+///
+/// This also does basic constant propagation and other forward fixing to make
+/// the combiner itself run much faster.
+static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
+ TargetLibraryInfo *TLI,
+ InstCombineWorklist &ICWorklist) {
+ bool MadeIRChange = false;
+
+ // Do a depth-first traversal of the function, populate the worklist with
+ // the reachable instructions. Ignore blocks that are not reachable. Keep
+ // track of which blocks we visit.
+ SmallPtrSet<BasicBlock *, 64> Visited;
+ MadeIRChange |=
+ AddReachableCodeToWorklist(F.begin(), DL, Visited, ICWorklist, TLI);
+
+ // Do a quick scan over the function. If we find any blocks that are
+ // unreachable, remove any instructions inside of them. This prevents
+ // the instcombine code from having to deal with some bad special cases.
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ if (Visited.count(BB))
+ continue;
+
+ // Delete the instructions backwards, as it has a reduced likelihood of
+ // having to update as many def-use and use-def chains.
+ Instruction *EndInst = BB->getTerminator(); // Last not to be deleted.
+ while (EndInst != BB->begin()) {
+ // Delete the next to last instruction.
+ BasicBlock::iterator I = EndInst;
+ Instruction *Inst = --I;
+ if (!Inst->use_empty())
+ Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
+ if (isa<LandingPadInst>(Inst)) {
+ EndInst = Inst;
+ continue;
+ }
+ if (!isa<DbgInfoIntrinsic>(Inst)) {
+ ++NumDeadInst;
+ MadeIRChange = true;
+ }
+ Inst->eraseFromParent();
+ }
+ }
- AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- TLI = &getAnalysis<TargetLibraryInfo>();
+ return MadeIRChange;
+}
+static bool
+combineInstructionsOverFunction(Function &F, InstCombineWorklist &Worklist,
+ AssumptionCache &AC, TargetLibraryInfo &TLI,
+ DominatorTree &DT, LoopInfo *LI = nullptr) {
// Minimizing size?
- MinimizeSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::MinSize);
+ bool MinimizeSize = F.hasFnAttribute(Attribute::MinSize);
+ auto &DL = F.getParent()->getDataLayout();
/// Builder - This is an IRBuilder that automatically inserts new
/// instructions into the worklist when they are created.
- IRBuilder<true, TargetFolder, InstCombineIRInserter> TheBuilder(
- F.getContext(), TargetFolder(DL), InstCombineIRInserter(Worklist, AC));
- Builder = &TheBuilder;
-
- InstCombinerLibCallSimplifier TheSimplifier(DL, TLI, this);
- Simplifier = &TheSimplifier;
-
- bool EverMadeChange = false;
+ IRBuilder<true, TargetFolder, InstCombineIRInserter> Builder(
+ F.getContext(), TargetFolder(DL), InstCombineIRInserter(Worklist, &AC));
// Lower dbg.declare intrinsics otherwise their value may be clobbered
// by instcombiner.
- EverMadeChange = LowerDbgDeclare(F);
+ bool DbgDeclaresChanged = LowerDbgDeclare(F);
// Iterate while there is work to do.
- unsigned Iteration = 0;
- while (DoOneIteration(F, Iteration++))
- EverMadeChange = true;
+ int Iteration = 0;
+ for (;;) {
+ ++Iteration;
+ DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
+ << F.getName() << "\n");
+
+ bool Changed = false;
+ if (prepareICWorklistFromFunction(F, DL, &TLI, Worklist))
+ Changed = true;
+
+ InstCombiner IC(Worklist, &Builder, MinimizeSize, &AC, &TLI, &DT, DL, LI);
+ if (IC.run())
+ Changed = true;
+
+ if (!Changed)
+ break;
+ }
+
+ return DbgDeclaresChanged || Iteration > 1;
+}
+
+PreservedAnalyses InstCombinePass::run(Function &F,
+ AnalysisManager<Function> *AM) {
+ auto &AC = AM->getResult<AssumptionAnalysis>(F);
+ auto &DT = AM->getResult<DominatorTreeAnalysis>(F);
+ auto &TLI = AM->getResult<TargetLibraryAnalysis>(F);
+
+ auto *LI = AM->getCachedResult<LoopAnalysis>(F);
+
+ if (!combineInstructionsOverFunction(F, Worklist, AC, TLI, DT, LI))
+ // No changes, all analyses are preserved.
+ return PreservedAnalyses::all();
+
+ // Mark all the analyses that instcombine updates as preserved.
+ // FIXME: Need a way to preserve CFG analyses here!
+ PreservedAnalyses PA;
+ PA.preserve<DominatorTreeAnalysis>();
+ return PA;
+}
+
+namespace {
+/// \brief The legacy pass manager's instcombine pass.
+///
+/// This is a basic whole-function wrapper around the instcombine utility. It
+/// will try to combine all instructions in the function.
+class InstructionCombiningPass : public FunctionPass {
+ InstCombineWorklist Worklist;
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+
+ InstructionCombiningPass() : FunctionPass(ID) {
+ initializeInstructionCombiningPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool runOnFunction(Function &F) override;
+};
+}
+
+void InstructionCombiningPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+}
+
+bool InstructionCombiningPass::runOnFunction(Function &F) {
+ if (skipOptnoneFunction(F))
+ return false;
+
+ // Required analyses.
+ auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+
+ // Optional analyses.
+ auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
+ auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
- Builder = nullptr;
- return EverMadeChange;
+ return combineInstructionsOverFunction(F, Worklist, AC, TLI, DT, LI);
+}
+
+char InstructionCombiningPass::ID = 0;
+INITIALIZE_PASS_BEGIN(InstructionCombiningPass, "instcombine",
+ "Combine redundant instructions", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(InstructionCombiningPass, "instcombine",
+ "Combine redundant instructions", false, false)
+
+// Initialization Routines
+void llvm::initializeInstCombine(PassRegistry &Registry) {
+ initializeInstructionCombiningPassPass(Registry);
+}
+
+void LLVMInitializeInstCombine(LLVMPassRegistryRef R) {
+ initializeInstructionCombiningPassPass(*unwrap(R));
}
FunctionPass *llvm::createInstructionCombiningPass() {
- return new InstCombiner();
+ return new InstructionCombiningPass();
}
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 25f1f02..939e04b 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -24,6 +24,9 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DataLayout.h"
@@ -43,12 +46,14 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/SwapByteOrder.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/ASanStackFrameLayout.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
#include <algorithm>
#include <string>
#include <system_error>
@@ -64,22 +69,21 @@ static const uint64_t kDefaultShadowOffset64 = 1ULL << 44;
static const uint64_t kSmallX86_64ShadowOffset = 0x7FFF8000; // < 2G.
static const uint64_t kPPC64_ShadowOffset64 = 1ULL << 41;
static const uint64_t kMIPS32_ShadowOffset32 = 0x0aaa0000;
-static const uint64_t kMIPS64_ShadowOffset64 = 1ULL << 36;
+static const uint64_t kMIPS64_ShadowOffset64 = 1ULL << 37;
+static const uint64_t kAArch64_ShadowOffset64 = 1ULL << 36;
static const uint64_t kFreeBSD_ShadowOffset32 = 1ULL << 30;
static const uint64_t kFreeBSD_ShadowOffset64 = 1ULL << 46;
static const uint64_t kWindowsShadowOffset32 = 3ULL << 28;
-static const size_t kMinStackMallocSize = 1 << 6; // 64B
+static const size_t kMinStackMallocSize = 1 << 6; // 64B
static const size_t kMaxStackMallocSize = 1 << 16; // 64K
static const uintptr_t kCurrentStackFrameMagic = 0x41B58AB3;
static const uintptr_t kRetiredStackFrameMagic = 0x45E0360E;
static const char *const kAsanModuleCtorName = "asan.module_ctor";
static const char *const kAsanModuleDtorName = "asan.module_dtor";
-static const uint64_t kAsanCtorAndDtorPriority = 1;
+static const uint64_t kAsanCtorAndDtorPriority = 1;
static const char *const kAsanReportErrorTemplate = "__asan_report_";
-static const char *const kAsanReportLoadN = "__asan_report_load_n";
-static const char *const kAsanReportStoreN = "__asan_report_store_n";
static const char *const kAsanRegisterGlobalsName = "__asan_register_globals";
static const char *const kAsanUnregisterGlobalsName =
"__asan_unregister_globals";
@@ -89,7 +93,7 @@ static const char *const kAsanInitName = "__asan_init_v5";
static const char *const kAsanPtrCmp = "__sanitizer_ptr_cmp";
static const char *const kAsanPtrSub = "__sanitizer_ptr_sub";
static const char *const kAsanHandleNoReturnName = "__asan_handle_no_return";
-static const int kMaxAsanStackMallocSizeClass = 10;
+static const int kMaxAsanStackMallocSizeClass = 10;
static const char *const kAsanStackMallocNameTemplate = "__asan_stack_malloc_";
static const char *const kAsanStackFreeNameTemplate = "__asan_stack_free_";
static const char *const kAsanGenPrefix = "__asan_gen_";
@@ -102,10 +106,6 @@ static const char *const kAsanUnpoisonStackMemoryName =
static const char *const kAsanOptionDetectUAR =
"__asan_option_detect_stack_use_after_return";
-#ifndef NDEBUG
-static const int kAsanStackAfterReturnMagic = 0xf5;
-#endif
-
// Accesses sizes are powers of two: 1, 2, 4, 8, 16.
static const size_t kNumberOfAccessSizes = 5;
@@ -119,84 +119,110 @@ static const unsigned kAsanAllocaPartialVal2 = 0x000000cbU;
// This flag may need to be replaced with -f[no-]asan-reads.
static cl::opt<bool> ClInstrumentReads("asan-instrument-reads",
- cl::desc("instrument read instructions"), cl::Hidden, cl::init(true));
-static cl::opt<bool> ClInstrumentWrites("asan-instrument-writes",
- cl::desc("instrument write instructions"), cl::Hidden, cl::init(true));
-static cl::opt<bool> ClInstrumentAtomics("asan-instrument-atomics",
- cl::desc("instrument atomic instructions (rmw, cmpxchg)"),
- cl::Hidden, cl::init(true));
-static cl::opt<bool> ClAlwaysSlowPath("asan-always-slow-path",
- cl::desc("use instrumentation with slow path for all accesses"),
- cl::Hidden, cl::init(false));
+ cl::desc("instrument read instructions"),
+ cl::Hidden, cl::init(true));
+static cl::opt<bool> ClInstrumentWrites(
+ "asan-instrument-writes", cl::desc("instrument write instructions"),
+ cl::Hidden, cl::init(true));
+static cl::opt<bool> ClInstrumentAtomics(
+ "asan-instrument-atomics",
+ cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden,
+ cl::init(true));
+static cl::opt<bool> ClAlwaysSlowPath(
+ "asan-always-slow-path",
+ cl::desc("use instrumentation with slow path for all accesses"), cl::Hidden,
+ cl::init(false));
// This flag limits the number of instructions to be instrumented
// in any given BB. Normally, this should be set to unlimited (INT_MAX),
// but due to http://llvm.org/bugs/show_bug.cgi?id=12652 we temporary
// set it to 10000.
-static cl::opt<int> ClMaxInsnsToInstrumentPerBB("asan-max-ins-per-bb",
- cl::init(10000),
- cl::desc("maximal number of instructions to instrument in any given BB"),
- cl::Hidden);
+static cl::opt<int> ClMaxInsnsToInstrumentPerBB(
+ "asan-max-ins-per-bb", cl::init(10000),
+ cl::desc("maximal number of instructions to instrument in any given BB"),
+ cl::Hidden);
// This flag may need to be replaced with -f[no]asan-stack.
-static cl::opt<bool> ClStack("asan-stack",
- cl::desc("Handle stack memory"), cl::Hidden, cl::init(true));
+static cl::opt<bool> ClStack("asan-stack", cl::desc("Handle stack memory"),
+ cl::Hidden, cl::init(true));
static cl::opt<bool> ClUseAfterReturn("asan-use-after-return",
- cl::desc("Check return-after-free"), cl::Hidden, cl::init(true));
+ cl::desc("Check return-after-free"),
+ cl::Hidden, cl::init(true));
// This flag may need to be replaced with -f[no]asan-globals.
static cl::opt<bool> ClGlobals("asan-globals",
- cl::desc("Handle global objects"), cl::Hidden, cl::init(true));
+ cl::desc("Handle global objects"), cl::Hidden,
+ cl::init(true));
static cl::opt<bool> ClInitializers("asan-initialization-order",
- cl::desc("Handle C++ initializer order"), cl::Hidden, cl::init(true));
-static cl::opt<bool> ClInvalidPointerPairs("asan-detect-invalid-pointer-pair",
- cl::desc("Instrument <, <=, >, >=, - with pointer operands"),
- cl::Hidden, cl::init(false));
-static cl::opt<unsigned> ClRealignStack("asan-realign-stack",
- cl::desc("Realign stack to the value of this flag (power of two)"),
- cl::Hidden, cl::init(32));
+ cl::desc("Handle C++ initializer order"),
+ cl::Hidden, cl::init(true));
+static cl::opt<bool> ClInvalidPointerPairs(
+ "asan-detect-invalid-pointer-pair",
+ cl::desc("Instrument <, <=, >, >=, - with pointer operands"), cl::Hidden,
+ cl::init(false));
+static cl::opt<unsigned> ClRealignStack(
+ "asan-realign-stack",
+ cl::desc("Realign stack to the value of this flag (power of two)"),
+ cl::Hidden, cl::init(32));
static cl::opt<int> ClInstrumentationWithCallsThreshold(
"asan-instrumentation-with-call-threshold",
- cl::desc("If the function being instrumented contains more than "
- "this number of memory accesses, use callbacks instead of "
- "inline checks (-1 means never use callbacks)."),
- cl::Hidden, cl::init(7000));
+ cl::desc(
+ "If the function being instrumented contains more than "
+ "this number of memory accesses, use callbacks instead of "
+ "inline checks (-1 means never use callbacks)."),
+ cl::Hidden, cl::init(7000));
static cl::opt<std::string> ClMemoryAccessCallbackPrefix(
- "asan-memory-access-callback-prefix",
- cl::desc("Prefix for memory access callbacks"), cl::Hidden,
- cl::init("__asan_"));
+ "asan-memory-access-callback-prefix",
+ cl::desc("Prefix for memory access callbacks"), cl::Hidden,
+ cl::init("__asan_"));
static cl::opt<bool> ClInstrumentAllocas("asan-instrument-allocas",
- cl::desc("instrument dynamic allocas"), cl::Hidden, cl::init(false));
+ cl::desc("instrument dynamic allocas"),
+ cl::Hidden, cl::init(false));
+static cl::opt<bool> ClSkipPromotableAllocas(
+ "asan-skip-promotable-allocas",
+ cl::desc("Do not instrument promotable allocas"), cl::Hidden,
+ cl::init(true));
// These flags allow to change the shadow mapping.
// The shadow mapping looks like
// Shadow = (Mem >> scale) + (1 << offset_log)
static cl::opt<int> ClMappingScale("asan-mapping-scale",
- cl::desc("scale of asan shadow mapping"), cl::Hidden, cl::init(0));
+ cl::desc("scale of asan shadow mapping"),
+ cl::Hidden, cl::init(0));
// Optimization flags. Not user visible, used mostly for testing
// and benchmarking the tool.
-static cl::opt<bool> ClOpt("asan-opt",
- cl::desc("Optimize instrumentation"), cl::Hidden, cl::init(true));
-static cl::opt<bool> ClOptSameTemp("asan-opt-same-temp",
- cl::desc("Instrument the same temp just once"), cl::Hidden,
- cl::init(true));
+static cl::opt<bool> ClOpt("asan-opt", cl::desc("Optimize instrumentation"),
+ cl::Hidden, cl::init(true));
+static cl::opt<bool> ClOptSameTemp(
+ "asan-opt-same-temp", cl::desc("Instrument the same temp just once"),
+ cl::Hidden, cl::init(true));
static cl::opt<bool> ClOptGlobals("asan-opt-globals",
- cl::desc("Don't instrument scalar globals"), cl::Hidden, cl::init(true));
-
-static cl::opt<bool> ClCheckLifetime("asan-check-lifetime",
- cl::desc("Use llvm.lifetime intrinsics to insert extra checks"),
- cl::Hidden, cl::init(false));
+ cl::desc("Don't instrument scalar globals"),
+ cl::Hidden, cl::init(true));
+static cl::opt<bool> ClOptStack(
+ "asan-opt-stack", cl::desc("Don't instrument scalar stack variables"),
+ cl::Hidden, cl::init(false));
+
+static cl::opt<bool> ClCheckLifetime(
+ "asan-check-lifetime",
+ cl::desc("Use llvm.lifetime intrinsics to insert extra checks"), cl::Hidden,
+ cl::init(false));
static cl::opt<bool> ClDynamicAllocaStack(
"asan-stack-dynamic-alloca",
cl::desc("Use dynamic alloca to represent stack variables"), cl::Hidden,
- cl::init(false));
+ cl::init(true));
+
+static cl::opt<uint32_t> ClForceExperiment(
+ "asan-force-experiment",
+ cl::desc("Force optimization experiment (for testing)"), cl::Hidden,
+ cl::init(0));
// Debug flags.
static cl::opt<int> ClDebug("asan-debug", cl::desc("debug"), cl::Hidden,
cl::init(0));
static cl::opt<int> ClDebugStack("asan-debug-stack", cl::desc("debug stack"),
cl::Hidden, cl::init(0));
-static cl::opt<std::string> ClDebugFunc("asan-debug-func",
- cl::Hidden, cl::desc("Debug func"));
+static cl::opt<std::string> ClDebugFunc("asan-debug-func", cl::Hidden,
+ cl::desc("Debug func"));
static cl::opt<int> ClDebugMin("asan-debug-min", cl::desc("Debug min inst"),
cl::Hidden, cl::init(-1));
static cl::opt<int> ClDebugMax("asan-debug-max", cl::desc("Debug man inst"),
@@ -206,10 +232,10 @@ STATISTIC(NumInstrumentedReads, "Number of instrumented reads");
STATISTIC(NumInstrumentedWrites, "Number of instrumented writes");
STATISTIC(NumInstrumentedDynamicAllocas,
"Number of instrumented dynamic allocas");
-STATISTIC(NumOptimizedAccessesToGlobalArray,
- "Number of optimized accesses to global arrays");
STATISTIC(NumOptimizedAccessesToGlobalVar,
"Number of optimized accesses to global vars");
+STATISTIC(NumOptimizedAccessesToStackVar,
+ "Number of optimized accesses to stack vars");
namespace {
/// Frontend-provided metadata for source location.
@@ -224,8 +250,8 @@ struct LocationMetadata {
void parse(MDNode *MDN) {
assert(MDN->getNumOperands() == 3);
- MDString *MDFilename = cast<MDString>(MDN->getOperand(0));
- Filename = MDFilename->getString();
+ MDString *DIFilename = cast<MDString>(MDN->getOperand(0));
+ Filename = DIFilename->getString();
LineNo =
mdconst::extract<ConstantInt>(MDN->getOperand(1))->getLimitedValue();
ColumnNo =
@@ -237,9 +263,7 @@ struct LocationMetadata {
class GlobalsMetadata {
public:
struct Entry {
- Entry()
- : SourceLoc(), Name(), IsDynInit(false),
- IsBlacklisted(false) {}
+ Entry() : SourceLoc(), Name(), IsDynInit(false), IsBlacklisted(false) {}
LocationMetadata SourceLoc;
StringRef Name;
bool IsDynInit;
@@ -248,19 +272,17 @@ class GlobalsMetadata {
GlobalsMetadata() : inited_(false) {}
- void init(Module& M) {
+ void init(Module &M) {
assert(!inited_);
inited_ = true;
NamedMDNode *Globals = M.getNamedMetadata("llvm.asan.globals");
- if (!Globals)
- return;
+ if (!Globals) return;
for (auto MDN : Globals->operands()) {
// Metadata node contains the global and the fields of "Entry".
assert(MDN->getNumOperands() == 5);
auto *GV = mdconst::extract_or_null<GlobalVariable>(MDN->getOperand(0));
// The optimizer may optimize away a global entirely.
- if (!GV)
- continue;
+ if (!GV) continue;
// We can already have an entry for GV if it was merged with another
// global.
Entry &E = Entries[GV];
@@ -285,7 +307,7 @@ class GlobalsMetadata {
private:
bool inited_;
- DenseMap<GlobalVariable*, Entry> Entries;
+ DenseMap<GlobalVariable *, Entry> Entries;
};
/// This struct defines the shadow mapping using the rule:
@@ -308,6 +330,7 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize) {
TargetTriple.getArch() == llvm::Triple::mipsel;
bool IsMIPS64 = TargetTriple.getArch() == llvm::Triple::mips64 ||
TargetTriple.getArch() == llvm::Triple::mips64el;
+ bool IsAArch64 = TargetTriple.getArch() == llvm::Triple::aarch64;
bool IsWindows = TargetTriple.isOSWindows();
ShadowMapping Mapping;
@@ -334,6 +357,8 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize) {
Mapping.Offset = kSmallX86_64ShadowOffset;
else if (IsMIPS64)
Mapping.Offset = kMIPS64_ShadowOffset64;
+ else if (IsAArch64)
+ Mapping.Offset = kAArch64_ShadowOffset64;
else
Mapping.Offset = kDefaultShadowOffset64;
}
@@ -367,17 +392,36 @@ struct AddressSanitizer : public FunctionPass {
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ }
+ uint64_t getAllocaSizeInBytes(AllocaInst *AI) const {
+ Type *Ty = AI->getAllocatedType();
+ uint64_t SizeInBytes =
+ AI->getModule()->getDataLayout().getTypeAllocSize(Ty);
+ return SizeInBytes;
}
- void instrumentMop(Instruction *I, bool UseCalls);
+ /// Check if we want (and can) handle this alloca.
+ bool isInterestingAlloca(AllocaInst &AI);
+ /// If it is an interesting memory access, return the PointerOperand
+ /// and set IsWrite/Alignment. Otherwise return nullptr.
+ Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite,
+ uint64_t *TypeSize,
+ unsigned *Alignment);
+ void instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis, Instruction *I,
+ bool UseCalls, const DataLayout &DL);
void instrumentPointerComparisonOrSubtraction(Instruction *I);
void instrumentAddress(Instruction *OrigIns, Instruction *InsertBefore,
Value *Addr, uint32_t TypeSize, bool IsWrite,
- Value *SizeArgument, bool UseCalls);
+ Value *SizeArgument, bool UseCalls, uint32_t Exp);
+ void instrumentUnusualSizeOrAlignment(Instruction *I, Value *Addr,
+ uint32_t TypeSize, bool IsWrite,
+ Value *SizeArgument, bool UseCalls,
+ uint32_t Exp);
Value *createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong,
Value *ShadowValue, uint32_t TypeSize);
Instruction *generateCrashCode(Instruction *InsertBefore, Value *Addr,
bool IsWrite, size_t AccessSizeIndex,
- Value *SizeArgument);
+ Value *SizeArgument, uint32_t Exp);
void instrumentMemIntrinsic(MemIntrinsic *MI);
Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
bool runOnFunction(Function &F) override;
@@ -392,9 +436,10 @@ struct AddressSanitizer : public FunctionPass {
bool LooksLikeCodeInBug11395(Instruction *I);
bool GlobalIsLinkerInitialized(GlobalVariable *G);
+ bool isSafeAccess(ObjectSizeOffsetVisitor &ObjSizeVis, Value *Addr,
+ uint64_t TypeSize) const;
LLVMContext *C;
- const DataLayout *DL;
Triple TargetTriple;
int LongSize;
Type *IntptrTy;
@@ -404,15 +449,16 @@ struct AddressSanitizer : public FunctionPass {
Function *AsanInitFunction;
Function *AsanHandleNoReturnFunc;
Function *AsanPtrCmpFunction, *AsanPtrSubFunction;
- // This array is indexed by AccessIsWrite and log2(AccessSize).
- Function *AsanErrorCallback[2][kNumberOfAccessSizes];
- Function *AsanMemoryAccessCallback[2][kNumberOfAccessSizes];
- // This array is indexed by AccessIsWrite.
- Function *AsanErrorCallbackSized[2],
- *AsanMemoryAccessCallbackSized[2];
+ // This array is indexed by AccessIsWrite, Experiment and log2(AccessSize).
+ Function *AsanErrorCallback[2][2][kNumberOfAccessSizes];
+ Function *AsanMemoryAccessCallback[2][2][kNumberOfAccessSizes];
+ // This array is indexed by AccessIsWrite and Experiment.
+ Function *AsanErrorCallbackSized[2][2];
+ Function *AsanMemoryAccessCallbackSized[2][2];
Function *AsanMemmove, *AsanMemcpy, *AsanMemset;
InlineAsm *EmptyAsm;
GlobalsMetadata GlobalsMD;
+ DenseMap<AllocaInst *, bool> ProcessedAllocas;
friend struct FunctionStackPoisoner;
};
@@ -422,9 +468,7 @@ class AddressSanitizerModule : public ModulePass {
AddressSanitizerModule() : ModulePass(ID) {}
bool runOnModule(Module &M) override;
static char ID; // Pass identification, replacement for typeid
- const char *getPassName() const override {
- return "AddressSanitizerModule";
- }
+ const char *getPassName() const override { return "AddressSanitizerModule"; }
private:
void initializeCallbacks(Module &M);
@@ -440,7 +484,6 @@ class AddressSanitizerModule : public ModulePass {
GlobalsMetadata GlobalsMD;
Type *IntptrTy;
LLVMContext *C;
- const DataLayout *DL;
Triple TargetTriple;
ShadowMapping Mapping;
Function *AsanPoisonGlobals;
@@ -467,12 +510,12 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
Type *IntptrPtrTy;
ShadowMapping Mapping;
- SmallVector<AllocaInst*, 16> AllocaVec;
- SmallVector<Instruction*, 8> RetVec;
+ SmallVector<AllocaInst *, 16> AllocaVec;
+ SmallVector<Instruction *, 8> RetVec;
unsigned StackAlignment;
Function *AsanStackMallocFunc[kMaxAsanStackMallocSizeClass + 1],
- *AsanStackFreeFunc[kMaxAsanStackMallocSizeClass + 1];
+ *AsanStackFreeFunc[kMaxAsanStackMallocSizeClass + 1];
Function *AsanPoisonStackMemoryFunc, *AsanUnpoisonStackMemoryFunc;
// Stores a place and arguments of poisoning/unpoisoning call for alloca.
@@ -493,33 +536,38 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
Value *LeftRzAddr;
Value *RightRzAddr;
bool Poison;
- explicit DynamicAllocaCall(AllocaInst *AI,
- Value *LeftRzAddr = nullptr,
- Value *RightRzAddr = nullptr)
- : AI(AI), LeftRzAddr(LeftRzAddr), RightRzAddr(RightRzAddr), Poison(true)
- {}
+ explicit DynamicAllocaCall(AllocaInst *AI, Value *LeftRzAddr = nullptr,
+ Value *RightRzAddr = nullptr)
+ : AI(AI),
+ LeftRzAddr(LeftRzAddr),
+ RightRzAddr(RightRzAddr),
+ Poison(true) {}
};
SmallVector<DynamicAllocaCall, 1> DynamicAllocaVec;
// Maps Value to an AllocaInst from which the Value is originated.
- typedef DenseMap<Value*, AllocaInst*> AllocaForValueMapTy;
+ typedef DenseMap<Value *, AllocaInst *> AllocaForValueMapTy;
AllocaForValueMapTy AllocaForValue;
bool HasNonEmptyInlineAsm;
std::unique_ptr<CallInst> EmptyInlineAsm;
FunctionStackPoisoner(Function &F, AddressSanitizer &ASan)
- : F(F), ASan(ASan), DIB(*F.getParent(), /*AllowUnresolved*/ false),
- C(ASan.C), IntptrTy(ASan.IntptrTy),
- IntptrPtrTy(PointerType::get(IntptrTy, 0)), Mapping(ASan.Mapping),
- StackAlignment(1 << Mapping.Scale), HasNonEmptyInlineAsm(false),
+ : F(F),
+ ASan(ASan),
+ DIB(*F.getParent(), /*AllowUnresolved*/ false),
+ C(ASan.C),
+ IntptrTy(ASan.IntptrTy),
+ IntptrPtrTy(PointerType::get(IntptrTy, 0)),
+ Mapping(ASan.Mapping),
+ StackAlignment(1 << Mapping.Scale),
+ HasNonEmptyInlineAsm(false),
EmptyInlineAsm(CallInst::Create(ASan.EmptyAsm)) {}
bool runOnFunction() {
if (!ClStack) return false;
// Collect alloca, ret, lifetime instructions etc.
- for (BasicBlock *BB : depth_first(&F.getEntryBlock()))
- visit(*BB);
+ for (BasicBlock *BB : depth_first(&F.getEntryBlock())) visit(*BB);
if (AllocaVec.empty() && DynamicAllocaVec.empty()) return false;
@@ -540,33 +588,31 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
// ----------------------- Visitors.
/// \brief Collect all Ret instructions.
- void visitReturnInst(ReturnInst &RI) {
- RetVec.push_back(&RI);
- }
+ void visitReturnInst(ReturnInst &RI) { RetVec.push_back(&RI); }
// Unpoison dynamic allocas redzones.
void unpoisonDynamicAlloca(DynamicAllocaCall &AllocaCall) {
- if (!AllocaCall.Poison)
- return;
+ if (!AllocaCall.Poison) return;
for (auto Ret : RetVec) {
IRBuilder<> IRBRet(Ret);
PointerType *Int32PtrTy = PointerType::getUnqual(IRBRet.getInt32Ty());
Value *Zero = Constant::getNullValue(IRBRet.getInt32Ty());
Value *PartialRzAddr = IRBRet.CreateSub(AllocaCall.RightRzAddr,
ConstantInt::get(IntptrTy, 4));
- IRBRet.CreateStore(Zero, IRBRet.CreateIntToPtr(AllocaCall.LeftRzAddr,
- Int32PtrTy));
- IRBRet.CreateStore(Zero, IRBRet.CreateIntToPtr(PartialRzAddr,
- Int32PtrTy));
- IRBRet.CreateStore(Zero, IRBRet.CreateIntToPtr(AllocaCall.RightRzAddr,
- Int32PtrTy));
+ IRBRet.CreateStore(
+ Zero, IRBRet.CreateIntToPtr(AllocaCall.LeftRzAddr, Int32PtrTy));
+ IRBRet.CreateStore(Zero,
+ IRBRet.CreateIntToPtr(PartialRzAddr, Int32PtrTy));
+ IRBRet.CreateStore(
+ Zero, IRBRet.CreateIntToPtr(AllocaCall.RightRzAddr, Int32PtrTy));
}
}
// Right shift for BigEndian and left shift for LittleEndian.
Value *shiftAllocaMagic(Value *Val, IRBuilder<> &IRB, Value *Shift) {
- return ASan.DL->isLittleEndian() ? IRB.CreateShl(Val, Shift)
- : IRB.CreateLShr(Val, Shift);
+ auto &DL = F.getParent()->getDataLayout();
+ return DL.isLittleEndian() ? IRB.CreateShl(Val, Shift)
+ : IRB.CreateLShr(Val, Shift);
}
// Compute PartialRzMagic for dynamic alloca call. Since we don't know the
@@ -595,7 +641,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
/// \brief Collect Alloca instructions we want (and can) handle.
void visitAllocaInst(AllocaInst &AI) {
- if (!isInterestingAlloca(AI)) return;
+ if (!ASan.isInterestingAlloca(AI)) return;
StackAlignment = std::max(StackAlignment, AI.getAlignment());
if (isDynamicAlloca(AI))
@@ -609,8 +655,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
void visitIntrinsicInst(IntrinsicInst &II) {
if (!ClCheckLifetime) return;
Intrinsic::ID ID = II.getIntrinsicID();
- if (ID != Intrinsic::lifetime_start &&
- ID != Intrinsic::lifetime_end)
+ if (ID != Intrinsic::lifetime_start && ID != Intrinsic::lifetime_end)
return;
// Found lifetime intrinsic, add ASan instrumentation if necessary.
ConstantInt *Size = dyn_cast<ConstantInt>(II.getArgOperand(0));
@@ -640,8 +685,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
bool doesDominateAllExits(const Instruction *I) const {
for (auto Ret : RetVec) {
- if (!ASan.getDominatorTree().dominates(I, Ret))
- return false;
+ if (!ASan.getDominatorTree().dominates(I, Ret)) return false;
}
return true;
}
@@ -649,19 +693,6 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
bool isDynamicAlloca(AllocaInst &AI) const {
return AI.isArrayAllocation() || !AI.isStaticAlloca();
}
-
- // Check if we want (and can) handle this alloca.
- bool isInterestingAlloca(AllocaInst &AI) const {
- return (AI.getAllocatedType()->isSized() &&
- // alloca() may be called with 0 size, ignore it.
- getAllocaSizeInBytes(&AI) > 0);
- }
-
- uint64_t getAllocaSizeInBytes(AllocaInst *AI) const {
- Type *Ty = AI->getAllocatedType();
- uint64_t SizeInBytes = ASan.DL->getTypeAllocSize(Ty);
- return SizeInBytes;
- }
/// Finds alloca where the value comes from.
AllocaInst *findAllocaForValue(Value *V);
void poisonRedZones(ArrayRef<uint8_t> ShadowBytes, IRBuilder<> &IRB,
@@ -679,21 +710,25 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
} // namespace
char AddressSanitizer::ID = 0;
-INITIALIZE_PASS_BEGIN(AddressSanitizer, "asan",
- "AddressSanitizer: detects use-after-free and out-of-bounds bugs.",
- false, false)
+INITIALIZE_PASS_BEGIN(
+ AddressSanitizer, "asan",
+ "AddressSanitizer: detects use-after-free and out-of-bounds bugs.", false,
+ false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_END(AddressSanitizer, "asan",
- "AddressSanitizer: detects use-after-free and out-of-bounds bugs.",
- false, false)
+INITIALIZE_PASS_END(
+ AddressSanitizer, "asan",
+ "AddressSanitizer: detects use-after-free and out-of-bounds bugs.", false,
+ false)
FunctionPass *llvm::createAddressSanitizerFunctionPass() {
return new AddressSanitizer();
}
char AddressSanitizerModule::ID = 0;
-INITIALIZE_PASS(AddressSanitizerModule, "asan-module",
+INITIALIZE_PASS(
+ AddressSanitizerModule, "asan-module",
"AddressSanitizer: detects use-after-free and out-of-bounds bugs."
- "ModulePass", false, false)
+ "ModulePass",
+ false, false)
ModulePass *llvm::createAddressSanitizerModulePass() {
return new AddressSanitizerModule();
}
@@ -705,16 +740,15 @@ static size_t TypeSizeToSizeIndex(uint32_t TypeSize) {
}
// \brief Create a constant for Str so that we can pass it to the run-time lib.
-static GlobalVariable *createPrivateGlobalForString(
- Module &M, StringRef Str, bool AllowMerging) {
+static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str,
+ bool AllowMerging) {
Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str);
// We use private linkage for module-local strings. If they can be merged
// with another one, we set the unnamed_addr attribute.
GlobalVariable *GV =
new GlobalVariable(M, StrConst->getType(), true,
GlobalValue::PrivateLinkage, StrConst, kAsanGenPrefix);
- if (AllowMerging)
- GV->setUnnamedAddr(true);
+ if (AllowMerging) GV->setUnnamedAddr(true);
GV->setAlignment(1); // Strings may not be merged w/o setting align 1.
return GV;
}
@@ -743,8 +777,7 @@ static bool GlobalWasGeneratedByAsan(GlobalVariable *G) {
Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) {
// Shadow >> scale
Shadow = IRB.CreateLShr(Shadow, Mapping.Scale);
- if (Mapping.Offset == 0)
- return Shadow;
+ if (Mapping.Offset == 0) return Shadow;
// (Shadow >> scale) | offset
if (Mapping.OrShadowOffset)
return IRB.CreateOr(Shadow, ConstantInt::get(IntptrTy, Mapping.Offset));
@@ -756,53 +789,84 @@ Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) {
void AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
IRBuilder<> IRB(MI);
if (isa<MemTransferInst>(MI)) {
- IRB.CreateCall3(
+ IRB.CreateCall(
isa<MemMoveInst>(MI) ? AsanMemmove : AsanMemcpy,
- IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
- IRB.CreatePointerCast(MI->getOperand(1), IRB.getInt8PtrTy()),
- IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false));
+ {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
+ IRB.CreatePointerCast(MI->getOperand(1), IRB.getInt8PtrTy()),
+ IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});
} else if (isa<MemSetInst>(MI)) {
- IRB.CreateCall3(
+ IRB.CreateCall(
AsanMemset,
- IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
- IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false),
- IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false));
+ {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
+ IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false),
+ IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});
}
MI->eraseFromParent();
}
-// If I is an interesting memory access, return the PointerOperand
-// and set IsWrite/Alignment. Otherwise return nullptr.
-static Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite,
- unsigned *Alignment) {
+/// Check if we want (and can) handle this alloca.
+bool AddressSanitizer::isInterestingAlloca(AllocaInst &AI) {
+ auto PreviouslySeenAllocaInfo = ProcessedAllocas.find(&AI);
+
+ if (PreviouslySeenAllocaInfo != ProcessedAllocas.end())
+ return PreviouslySeenAllocaInfo->getSecond();
+
+ bool IsInteresting = (AI.getAllocatedType()->isSized() &&
+ // alloca() may be called with 0 size, ignore it.
+ getAllocaSizeInBytes(&AI) > 0 &&
+ // We are only interested in allocas not promotable to registers.
+ // Promotable allocas are common under -O0.
+ (!ClSkipPromotableAllocas || !isAllocaPromotable(&AI)));
+
+ ProcessedAllocas[&AI] = IsInteresting;
+ return IsInteresting;
+}
+
+/// If I is an interesting memory access, return the PointerOperand
+/// and set IsWrite/Alignment. Otherwise return nullptr.
+Value *AddressSanitizer::isInterestingMemoryAccess(Instruction *I,
+ bool *IsWrite,
+ uint64_t *TypeSize,
+ unsigned *Alignment) {
// Skip memory accesses inserted by another instrumentation.
- if (I->getMetadata("nosanitize"))
- return nullptr;
+ if (I->getMetadata("nosanitize")) return nullptr;
+
+ Value *PtrOperand = nullptr;
+ const DataLayout &DL = I->getModule()->getDataLayout();
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
if (!ClInstrumentReads) return nullptr;
*IsWrite = false;
+ *TypeSize = DL.getTypeStoreSizeInBits(LI->getType());
*Alignment = LI->getAlignment();
- return LI->getPointerOperand();
- }
- if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ PtrOperand = LI->getPointerOperand();
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
if (!ClInstrumentWrites) return nullptr;
*IsWrite = true;
+ *TypeSize = DL.getTypeStoreSizeInBits(SI->getValueOperand()->getType());
*Alignment = SI->getAlignment();
- return SI->getPointerOperand();
- }
- if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
+ PtrOperand = SI->getPointerOperand();
+ } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
if (!ClInstrumentAtomics) return nullptr;
*IsWrite = true;
+ *TypeSize = DL.getTypeStoreSizeInBits(RMW->getValOperand()->getType());
*Alignment = 0;
- return RMW->getPointerOperand();
- }
- if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) {
+ PtrOperand = RMW->getPointerOperand();
+ } else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) {
if (!ClInstrumentAtomics) return nullptr;
*IsWrite = true;
+ *TypeSize = DL.getTypeStoreSizeInBits(XCHG->getCompareOperand()->getType());
*Alignment = 0;
- return XCHG->getPointerOperand();
+ PtrOperand = XCHG->getPointerOperand();
}
- return nullptr;
+
+ // Treat memory accesses to promotable allocas as non-interesting since they
+ // will not cause memory violations. This greatly speeds up the instrumented
+ // executable at -O0.
+ if (ClSkipPromotableAllocas)
+ if (auto AI = dyn_cast_or_null<AllocaInst>(PtrOperand))
+ return isInterestingAlloca(*AI) ? AI : nullptr;
+
+ return PtrOperand;
}
static bool isPointerOperand(Value *V) {
@@ -814,17 +878,15 @@ static bool isPointerOperand(Value *V) {
// the frontend.
static bool isInterestingPointerComparisonOrSubtraction(Instruction *I) {
if (ICmpInst *Cmp = dyn_cast<ICmpInst>(I)) {
- if (!Cmp->isRelational())
- return false;
+ if (!Cmp->isRelational()) return false;
} else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
- if (BO->getOpcode() != Instruction::Sub)
- return false;
+ if (BO->getOpcode() != Instruction::Sub) return false;
} else {
return false;
}
if (!isPointerOperand(I->getOperand(0)) ||
!isPointerOperand(I->getOperand(1)))
- return false;
+ return false;
return true;
}
@@ -835,8 +897,8 @@ bool AddressSanitizer::GlobalIsLinkerInitialized(GlobalVariable *G) {
return G->hasInitializer() && !GlobalsMD.get(G).IsDynInit;
}
-void
-AddressSanitizer::instrumentPointerComparisonOrSubtraction(Instruction *I) {
+void AddressSanitizer::instrumentPointerComparisonOrSubtraction(
+ Instruction *I) {
IRBuilder<> IRB(I);
Function *F = isa<ICmpInst>(I) ? AsanPtrCmpFunction : AsanPtrSubFunction;
Value *Param[2] = {I->getOperand(0), I->getOperand(1)};
@@ -844,41 +906,50 @@ AddressSanitizer::instrumentPointerComparisonOrSubtraction(Instruction *I) {
if (Param[i]->getType()->isPointerTy())
Param[i] = IRB.CreatePointerCast(Param[i], IntptrTy);
}
- IRB.CreateCall2(F, Param[0], Param[1]);
+ IRB.CreateCall(F, Param);
}
-void AddressSanitizer::instrumentMop(Instruction *I, bool UseCalls) {
+void AddressSanitizer::instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis,
+ Instruction *I, bool UseCalls,
+ const DataLayout &DL) {
bool IsWrite = false;
unsigned Alignment = 0;
- Value *Addr = isInterestingMemoryAccess(I, &IsWrite, &Alignment);
+ uint64_t TypeSize = 0;
+ Value *Addr = isInterestingMemoryAccess(I, &IsWrite, &TypeSize, &Alignment);
assert(Addr);
+
+ // Optimization experiments.
+ // The experiments can be used to evaluate potential optimizations that remove
+ // instrumentation (assess false negatives). Instead of completely removing
+ // some instrumentation, you set Exp to a non-zero value (mask of optimization
+ // experiments that want to remove instrumentation of this instruction).
+ // If Exp is non-zero, this pass will emit special calls into runtime
+ // (e.g. __asan_report_exp_load1 instead of __asan_report_load1). These calls
+ // make runtime terminate the program in a special way (with a different
+ // exit status). Then you run the new compiler on a buggy corpus, collect
+ // the special terminations (ideally, you don't see them at all -- no false
+ // negatives) and make the decision on the optimization.
+ uint32_t Exp = ClForceExperiment;
+
if (ClOpt && ClOptGlobals) {
- if (GlobalVariable *G = dyn_cast<GlobalVariable>(Addr)) {
- // If initialization order checking is disabled, a simple access to a
- // dynamically initialized global is always valid.
- if (!ClInitializers || GlobalIsLinkerInitialized(G)) {
- NumOptimizedAccessesToGlobalVar++;
- return;
- }
- }
- ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr);
- if (CE && CE->isGEPWithNoNotionalOverIndexing()) {
- if (GlobalVariable *G = dyn_cast<GlobalVariable>(CE->getOperand(0))) {
- if (CE->getOperand(1)->isNullValue() && GlobalIsLinkerInitialized(G)) {
- NumOptimizedAccessesToGlobalArray++;
- return;
- }
- }
+ // If initialization order checking is disabled, a simple access to a
+ // dynamically initialized global is always valid.
+ GlobalVariable *G = dyn_cast<GlobalVariable>(GetUnderlyingObject(Addr, DL));
+ if (G != NULL && (!ClInitializers || GlobalIsLinkerInitialized(G)) &&
+ isSafeAccess(ObjSizeVis, Addr, TypeSize)) {
+ NumOptimizedAccessesToGlobalVar++;
+ return;
}
}
- Type *OrigPtrTy = Addr->getType();
- Type *OrigTy = cast<PointerType>(OrigPtrTy)->getElementType();
-
- assert(OrigTy->isSized());
- uint32_t TypeSize = DL->getTypeStoreSizeInBits(OrigTy);
-
- assert((TypeSize % 8) == 0);
+ if (ClOpt && ClOptStack) {
+ // A direct inbounds access to a stack variable is always valid.
+ if (isa<AllocaInst>(GetUnderlyingObject(Addr, DL)) &&
+ isSafeAccess(ObjSizeVis, Addr, TypeSize)) {
+ NumOptimizedAccessesToStackVar++;
+ return;
+ }
+ }
if (IsWrite)
NumInstrumentedWrites++;
@@ -891,65 +962,57 @@ void AddressSanitizer::instrumentMop(Instruction *I, bool UseCalls) {
if ((TypeSize == 8 || TypeSize == 16 || TypeSize == 32 || TypeSize == 64 ||
TypeSize == 128) &&
(Alignment >= Granularity || Alignment == 0 || Alignment >= TypeSize / 8))
- return instrumentAddress(I, I, Addr, TypeSize, IsWrite, nullptr, UseCalls);
- // Instrument unusual size or unusual alignment.
- // We can not do it with a single check, so we do 1-byte check for the first
- // and the last bytes. We call __asan_report_*_n(addr, real_size) to be able
- // to report the actual access size.
- IRBuilder<> IRB(I);
- Value *Size = ConstantInt::get(IntptrTy, TypeSize / 8);
- Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
- if (UseCalls) {
- IRB.CreateCall2(AsanMemoryAccessCallbackSized[IsWrite], AddrLong, Size);
- } else {
- Value *LastByte = IRB.CreateIntToPtr(
- IRB.CreateAdd(AddrLong, ConstantInt::get(IntptrTy, TypeSize / 8 - 1)),
- OrigPtrTy);
- instrumentAddress(I, I, Addr, 8, IsWrite, Size, false);
- instrumentAddress(I, I, LastByte, 8, IsWrite, Size, false);
- }
+ return instrumentAddress(I, I, Addr, TypeSize, IsWrite, nullptr, UseCalls,
+ Exp);
+ instrumentUnusualSizeOrAlignment(I, Addr, TypeSize, IsWrite, nullptr,
+ UseCalls, Exp);
}
-// Validate the result of Module::getOrInsertFunction called for an interface
-// function of AddressSanitizer. If the instrumented module defines a function
-// with the same name, their prototypes must match, otherwise
-// getOrInsertFunction returns a bitcast.
-static Function *checkInterfaceFunction(Constant *FuncOrBitcast) {
- if (isa<Function>(FuncOrBitcast)) return cast<Function>(FuncOrBitcast);
- FuncOrBitcast->dump();
- report_fatal_error("trying to redefine an AddressSanitizer "
- "interface function");
-}
-
-Instruction *AddressSanitizer::generateCrashCode(
- Instruction *InsertBefore, Value *Addr,
- bool IsWrite, size_t AccessSizeIndex, Value *SizeArgument) {
+Instruction *AddressSanitizer::generateCrashCode(Instruction *InsertBefore,
+ Value *Addr, bool IsWrite,
+ size_t AccessSizeIndex,
+ Value *SizeArgument,
+ uint32_t Exp) {
IRBuilder<> IRB(InsertBefore);
- CallInst *Call = SizeArgument
- ? IRB.CreateCall2(AsanErrorCallbackSized[IsWrite], Addr, SizeArgument)
- : IRB.CreateCall(AsanErrorCallback[IsWrite][AccessSizeIndex], Addr);
+ Value *ExpVal = Exp == 0 ? nullptr : ConstantInt::get(IRB.getInt32Ty(), Exp);
+ CallInst *Call = nullptr;
+ if (SizeArgument) {
+ if (Exp == 0)
+ Call = IRB.CreateCall(AsanErrorCallbackSized[IsWrite][0],
+ {Addr, SizeArgument});
+ else
+ Call = IRB.CreateCall(AsanErrorCallbackSized[IsWrite][1],
+ {Addr, SizeArgument, ExpVal});
+ } else {
+ if (Exp == 0)
+ Call =
+ IRB.CreateCall(AsanErrorCallback[IsWrite][0][AccessSizeIndex], Addr);
+ else
+ Call = IRB.CreateCall(AsanErrorCallback[IsWrite][1][AccessSizeIndex],
+ {Addr, ExpVal});
+ }
// We don't do Call->setDoesNotReturn() because the BB already has
// UnreachableInst at the end.
// This EmptyAsm is required to avoid callback merge.
- IRB.CreateCall(EmptyAsm);
+ IRB.CreateCall(EmptyAsm, {});
return Call;
}
Value *AddressSanitizer::createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong,
- Value *ShadowValue,
- uint32_t TypeSize) {
+ Value *ShadowValue,
+ uint32_t TypeSize) {
size_t Granularity = 1 << Mapping.Scale;
// Addr & (Granularity - 1)
- Value *LastAccessedByte = IRB.CreateAnd(
- AddrLong, ConstantInt::get(IntptrTy, Granularity - 1));
+ Value *LastAccessedByte =
+ IRB.CreateAnd(AddrLong, ConstantInt::get(IntptrTy, Granularity - 1));
// (Addr & (Granularity - 1)) + size - 1
if (TypeSize / 8 > 1)
LastAccessedByte = IRB.CreateAdd(
LastAccessedByte, ConstantInt::get(IntptrTy, TypeSize / 8 - 1));
// (uint8_t) ((Addr & (Granularity-1)) + size - 1)
- LastAccessedByte = IRB.CreateIntCast(
- LastAccessedByte, ShadowValue->getType(), false);
+ LastAccessedByte =
+ IRB.CreateIntCast(LastAccessedByte, ShadowValue->getType(), false);
// ((uint8_t) ((Addr & (Granularity-1)) + size - 1)) >= ShadowValue
return IRB.CreateICmpSGE(LastAccessedByte, ShadowValue);
}
@@ -957,24 +1020,29 @@ Value *AddressSanitizer::createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong,
void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
Instruction *InsertBefore, Value *Addr,
uint32_t TypeSize, bool IsWrite,
- Value *SizeArgument, bool UseCalls) {
+ Value *SizeArgument, bool UseCalls,
+ uint32_t Exp) {
IRBuilder<> IRB(InsertBefore);
Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize);
if (UseCalls) {
- IRB.CreateCall(AsanMemoryAccessCallback[IsWrite][AccessSizeIndex],
- AddrLong);
+ if (Exp == 0)
+ IRB.CreateCall(AsanMemoryAccessCallback[IsWrite][0][AccessSizeIndex],
+ AddrLong);
+ else
+ IRB.CreateCall(AsanMemoryAccessCallback[IsWrite][1][AccessSizeIndex],
+ {AddrLong, ConstantInt::get(IRB.getInt32Ty(), Exp)});
return;
}
- Type *ShadowTy = IntegerType::get(
- *C, std::max(8U, TypeSize >> Mapping.Scale));
+ Type *ShadowTy =
+ IntegerType::get(*C, std::max(8U, TypeSize >> Mapping.Scale));
Type *ShadowPtrTy = PointerType::get(ShadowTy, 0);
Value *ShadowPtr = memToShadow(AddrLong, IRB);
Value *CmpVal = Constant::getNullValue(ShadowTy);
- Value *ShadowValue = IRB.CreateLoad(
- IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy));
+ Value *ShadowValue =
+ IRB.CreateLoad(IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy));
Value *Cmp = IRB.CreateICmpNE(ShadowValue, CmpVal);
size_t Granularity = 1 << Mapping.Scale;
@@ -983,10 +1051,9 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
if (ClAlwaysSlowPath || (TypeSize < 8 * Granularity)) {
// We use branch weights for the slow path check, to indicate that the slow
// path is rarely taken. This seems to be the case for SPEC benchmarks.
- TerminatorInst *CheckTerm =
- SplitBlockAndInsertIfThen(Cmp, InsertBefore, false,
- MDBuilder(*C).createBranchWeights(1, 100000));
- assert(dyn_cast<BranchInst>(CheckTerm)->isUnconditional());
+ TerminatorInst *CheckTerm = SplitBlockAndInsertIfThen(
+ Cmp, InsertBefore, false, MDBuilder(*C).createBranchWeights(1, 100000));
+ assert(cast<BranchInst>(CheckTerm)->isUnconditional());
BasicBlock *NextBB = CheckTerm->getSuccessor(0);
IRB.SetInsertPoint(CheckTerm);
Value *Cmp2 = createSlowPathCmp(IRB, AddrLong, ShadowValue, TypeSize);
@@ -999,11 +1066,37 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
CrashTerm = SplitBlockAndInsertIfThen(Cmp, InsertBefore, true);
}
- Instruction *Crash = generateCrashCode(
- CrashTerm, AddrLong, IsWrite, AccessSizeIndex, SizeArgument);
+ Instruction *Crash = generateCrashCode(CrashTerm, AddrLong, IsWrite,
+ AccessSizeIndex, SizeArgument, Exp);
Crash->setDebugLoc(OrigIns->getDebugLoc());
}
+// Instrument unusual size or unusual alignment.
+// We can not do it with a single check, so we do 1-byte check for the first
+// and the last bytes. We call __asan_report_*_n(addr, real_size) to be able
+// to report the actual access size.
+void AddressSanitizer::instrumentUnusualSizeOrAlignment(
+ Instruction *I, Value *Addr, uint32_t TypeSize, bool IsWrite,
+ Value *SizeArgument, bool UseCalls, uint32_t Exp) {
+ IRBuilder<> IRB(I);
+ Value *Size = ConstantInt::get(IntptrTy, TypeSize / 8);
+ Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
+ if (UseCalls) {
+ if (Exp == 0)
+ IRB.CreateCall(AsanMemoryAccessCallbackSized[IsWrite][0],
+ {AddrLong, Size});
+ else
+ IRB.CreateCall(AsanMemoryAccessCallbackSized[IsWrite][1],
+ {AddrLong, Size, ConstantInt::get(IRB.getInt32Ty(), Exp)});
+ } else {
+ Value *LastByte = IRB.CreateIntToPtr(
+ IRB.CreateAdd(AddrLong, ConstantInt::get(IntptrTy, TypeSize / 8 - 1)),
+ Addr->getType());
+ instrumentAddress(I, I, Addr, 8, IsWrite, Size, false, Exp);
+ instrumentAddress(I, I, LastByte, 8, IsWrite, Size, false, Exp);
+ }
+}
+
void AddressSanitizerModule::poisonOneInitializer(Function &GlobalInit,
GlobalValue *ModuleName) {
// Set up the arguments to our poison/unpoison functions.
@@ -1025,12 +1118,11 @@ void AddressSanitizerModule::createInitializerPoisonCalls(
ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
for (Use &OP : CA->operands()) {
- if (isa<ConstantAggregateZero>(OP))
- continue;
+ if (isa<ConstantAggregateZero>(OP)) continue;
ConstantStruct *CS = cast<ConstantStruct>(OP);
// Must have a function or null ptr.
- if (Function* F = dyn_cast<Function>(CS->getOperand(1))) {
+ if (Function *F = dyn_cast<Function>(CS->getOperand(1))) {
if (F->getName() == kAsanModuleCtorName) continue;
ConstantInt *Priority = dyn_cast<ConstantInt>(CS->getOperand(0));
// Don't instrument CTORs that will run before asan.module_ctor.
@@ -1055,13 +1147,11 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) {
G->getLinkage() != GlobalVariable::PrivateLinkage &&
G->getLinkage() != GlobalVariable::InternalLinkage)
return false;
- if (G->hasComdat())
- return false;
+ if (G->hasComdat()) return false;
// Two problems with thread-locals:
// - The address of the main thread's copy can't be computed at link-time.
// - Need to poison all copies, not just the main thread's one.
- if (G->isThreadLocal())
- return false;
+ if (G->isThreadLocal()) return false;
// For now, just ignore this Global if the alignment is large.
if (G->getAlignment() > MinRedzoneSizeForGlobal()) return false;
@@ -1072,10 +1162,8 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) {
StringRef ParsedSegment, ParsedSection;
unsigned TAA = 0, StubSize = 0;
bool TAAParsed;
- std::string ErrorCode =
- MCSectionMachO::ParseSectionSpecifier(Section, ParsedSegment,
- ParsedSection, TAA, TAAParsed,
- StubSize);
+ std::string ErrorCode = MCSectionMachO::ParseSectionSpecifier(
+ Section, ParsedSegment, ParsedSection, TAA, TAAParsed, StubSize);
if (!ErrorCode.empty()) {
report_fatal_error("Invalid section specifier '" + ParsedSection +
"': " + ErrorCode + ".");
@@ -1128,20 +1216,19 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) {
void AddressSanitizerModule::initializeCallbacks(Module &M) {
IRBuilder<> IRB(*C);
// Declare our poisoning and unpoisoning functions.
- AsanPoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction(
+ AsanPoisonGlobals = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
kAsanPoisonGlobalsName, IRB.getVoidTy(), IntptrTy, nullptr));
AsanPoisonGlobals->setLinkage(Function::ExternalLinkage);
- AsanUnpoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction(
+ AsanUnpoisonGlobals = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
kAsanUnpoisonGlobalsName, IRB.getVoidTy(), nullptr));
AsanUnpoisonGlobals->setLinkage(Function::ExternalLinkage);
// Declare functions that register/unregister globals.
- AsanRegisterGlobals = checkInterfaceFunction(M.getOrInsertFunction(
- kAsanRegisterGlobalsName, IRB.getVoidTy(),
- IntptrTy, IntptrTy, nullptr));
+ AsanRegisterGlobals = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+ kAsanRegisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr));
AsanRegisterGlobals->setLinkage(Function::ExternalLinkage);
- AsanUnregisterGlobals = checkInterfaceFunction(M.getOrInsertFunction(
- kAsanUnregisterGlobalsName,
- IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr));
+ AsanUnregisterGlobals = checkSanitizerInterfaceFunction(
+ M.getOrInsertFunction(kAsanUnregisterGlobalsName, IRB.getVoidTy(),
+ IntptrTy, IntptrTy, nullptr));
AsanUnregisterGlobals->setLinkage(Function::ExternalLinkage);
}
@@ -1154,8 +1241,7 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
SmallVector<GlobalVariable *, 16> GlobalsToChange;
for (auto &G : M.globals()) {
- if (ShouldInstrumentGlobal(&G))
- GlobalsToChange.push_back(&G);
+ if (ShouldInstrumentGlobal(&G)) GlobalsToChange.push_back(&G);
}
size_t n = GlobalsToChange.size();
@@ -1180,8 +1266,9 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
// We shouldn't merge same module names, as this string serves as unique
// module ID in runtime.
GlobalVariable *ModuleName = createPrivateGlobalForString(
- M, M.getModuleIdentifier(), /*AllowMerging*/false);
+ M, M.getModuleIdentifier(), /*AllowMerging*/ false);
+ auto &DL = M.getDataLayout();
for (size_t i = 0; i < n; i++) {
static const uint64_t kMaxGlobalRedzone = 1 << 18;
GlobalVariable *G = GlobalsToChange[i];
@@ -1195,32 +1282,30 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
PointerType *PtrTy = cast<PointerType>(G->getType());
Type *Ty = PtrTy->getElementType();
- uint64_t SizeInBytes = DL->getTypeAllocSize(Ty);
+ uint64_t SizeInBytes = DL.getTypeAllocSize(Ty);
uint64_t MinRZ = MinRedzoneSizeForGlobal();
// MinRZ <= RZ <= kMaxGlobalRedzone
// and trying to make RZ to be ~ 1/4 of SizeInBytes.
- uint64_t RZ = std::max(MinRZ,
- std::min(kMaxGlobalRedzone,
- (SizeInBytes / MinRZ / 4) * MinRZ));
+ uint64_t RZ = std::max(
+ MinRZ, std::min(kMaxGlobalRedzone, (SizeInBytes / MinRZ / 4) * MinRZ));
uint64_t RightRedzoneSize = RZ;
// Round up to MinRZ
- if (SizeInBytes % MinRZ)
- RightRedzoneSize += MinRZ - (SizeInBytes % MinRZ);
+ if (SizeInBytes % MinRZ) RightRedzoneSize += MinRZ - (SizeInBytes % MinRZ);
assert(((RightRedzoneSize + SizeInBytes) % MinRZ) == 0);
Type *RightRedZoneTy = ArrayType::get(IRB.getInt8Ty(), RightRedzoneSize);
StructType *NewTy = StructType::get(Ty, RightRedZoneTy, nullptr);
- Constant *NewInitializer = ConstantStruct::get(
- NewTy, G->getInitializer(),
- Constant::getNullValue(RightRedZoneTy), nullptr);
+ Constant *NewInitializer =
+ ConstantStruct::get(NewTy, G->getInitializer(),
+ Constant::getNullValue(RightRedZoneTy), nullptr);
// Create a new global variable with enough space for a redzone.
GlobalValue::LinkageTypes Linkage = G->getLinkage();
if (G->isConstant() && Linkage == GlobalValue::PrivateLinkage)
Linkage = GlobalValue::InternalLinkage;
- GlobalVariable *NewGlobal = new GlobalVariable(
- M, NewTy, G->isConstant(), Linkage,
- NewInitializer, "", G, G->getThreadLocalMode());
+ GlobalVariable *NewGlobal =
+ new GlobalVariable(M, NewTy, G->isConstant(), Linkage, NewInitializer,
+ "", G, G->getThreadLocalMode());
NewGlobal->copyAttributesFrom(G);
NewGlobal->setAlignment(MinRZ);
@@ -1229,7 +1314,7 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
Indices2[1] = IRB.getInt32(0);
G->replaceAllUsesWith(
- ConstantExpr::getGetElementPtr(NewGlobal, Indices2, true));
+ ConstantExpr::getGetElementPtr(NewTy, NewGlobal, Indices2, true));
NewGlobal->takeName(G);
G->eraseFromParent();
@@ -1249,8 +1334,7 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
ConstantExpr::getPointerCast(ModuleName, IntptrTy),
ConstantInt::get(IntptrTy, MD.IsDynInit), SourceLoc, nullptr);
- if (ClInitializers && MD.IsDynInit)
- HasDynamicallyInitializedGlobals = true;
+ if (ClInitializers && MD.IsDynInit) HasDynamicallyInitializedGlobals = true;
DEBUG(dbgs() << "NEW GLOBAL: " << *NewGlobal << "\n");
}
@@ -1263,20 +1347,20 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
// Create calls for poisoning before initializers run and unpoisoning after.
if (HasDynamicallyInitializedGlobals)
createInitializerPoisonCalls(M, ModuleName);
- IRB.CreateCall2(AsanRegisterGlobals,
- IRB.CreatePointerCast(AllGlobals, IntptrTy),
- ConstantInt::get(IntptrTy, n));
+ IRB.CreateCall(AsanRegisterGlobals,
+ {IRB.CreatePointerCast(AllGlobals, IntptrTy),
+ ConstantInt::get(IntptrTy, n)});
// We also need to unregister globals at the end, e.g. when a shared library
// gets closed.
- Function *AsanDtorFunction = Function::Create(
- FunctionType::get(Type::getVoidTy(*C), false),
- GlobalValue::InternalLinkage, kAsanModuleDtorName, &M);
+ Function *AsanDtorFunction =
+ Function::Create(FunctionType::get(Type::getVoidTy(*C), false),
+ GlobalValue::InternalLinkage, kAsanModuleDtorName, &M);
BasicBlock *AsanDtorBB = BasicBlock::Create(*C, "", AsanDtorFunction);
IRBuilder<> IRB_Dtor(ReturnInst::Create(*C, AsanDtorBB));
- IRB_Dtor.CreateCall2(AsanUnregisterGlobals,
- IRB.CreatePointerCast(AllGlobals, IntptrTy),
- ConstantInt::get(IntptrTy, n));
+ IRB_Dtor.CreateCall(AsanUnregisterGlobals,
+ {IRB.CreatePointerCast(AllGlobals, IntptrTy),
+ ConstantInt::get(IntptrTy, n)});
appendToGlobalDtors(M, AsanDtorFunction, kAsanCtorAndDtorPriority);
DEBUG(dbgs() << M);
@@ -1284,12 +1368,8 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
}
bool AddressSanitizerModule::runOnModule(Module &M) {
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- if (!DLP)
- return false;
- DL = &DLP->getDataLayout();
C = &(M.getContext());
- int LongSize = DL->getPointerSizeInBits();
+ int LongSize = M.getDataLayout().getPointerSizeInBits();
IntptrTy = Type::getIntNTy(*C, LongSize);
TargetTriple = Triple(M.getTargetTriple());
Mapping = getShadowMapping(TargetTriple, LongSize);
@@ -1301,8 +1381,7 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
assert(CtorFunc);
IRBuilder<> IRB(CtorFunc->getEntryBlock().getTerminator());
- if (ClGlobals)
- Changed |= InstrumentGlobals(IRB, M);
+ if (ClGlobals) Changed |= InstrumentGlobals(IRB, M);
return Changed;
}
@@ -1310,50 +1389,51 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
void AddressSanitizer::initializeCallbacks(Module &M) {
IRBuilder<> IRB(*C);
// Create __asan_report* callbacks.
- for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) {
- for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
- AccessSizeIndex++) {
- // IsWrite and TypeSize are encoded in the function name.
- std::string Suffix =
- (AccessIsWrite ? "store" : "load") + itostr(1 << AccessSizeIndex);
- AsanErrorCallback[AccessIsWrite][AccessSizeIndex] =
- checkInterfaceFunction(
- M.getOrInsertFunction(kAsanReportErrorTemplate + Suffix,
- IRB.getVoidTy(), IntptrTy, nullptr));
- AsanMemoryAccessCallback[AccessIsWrite][AccessSizeIndex] =
- checkInterfaceFunction(
- M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + Suffix,
- IRB.getVoidTy(), IntptrTy, nullptr));
+ // IsWrite, TypeSize and Exp are encoded in the function name.
+ for (int Exp = 0; Exp < 2; Exp++) {
+ for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) {
+ const std::string TypeStr = AccessIsWrite ? "store" : "load";
+ const std::string ExpStr = Exp ? "exp_" : "";
+ const Type *ExpType = Exp ? Type::getInt32Ty(*C) : nullptr;
+ AsanErrorCallbackSized[AccessIsWrite][Exp] =
+ checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+ kAsanReportErrorTemplate + ExpStr + TypeStr + "_n",
+ IRB.getVoidTy(), IntptrTy, IntptrTy, ExpType, nullptr));
+ AsanMemoryAccessCallbackSized[AccessIsWrite][Exp] =
+ checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+ ClMemoryAccessCallbackPrefix + ExpStr + TypeStr + "N",
+ IRB.getVoidTy(), IntptrTy, IntptrTy, ExpType, nullptr));
+ for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
+ AccessSizeIndex++) {
+ const std::string Suffix = TypeStr + itostr(1 << AccessSizeIndex);
+ AsanErrorCallback[AccessIsWrite][Exp][AccessSizeIndex] =
+ checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+ kAsanReportErrorTemplate + ExpStr + Suffix, IRB.getVoidTy(),
+ IntptrTy, ExpType, nullptr));
+ AsanMemoryAccessCallback[AccessIsWrite][Exp][AccessSizeIndex] =
+ checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+ ClMemoryAccessCallbackPrefix + ExpStr + Suffix, IRB.getVoidTy(),
+ IntptrTy, ExpType, nullptr));
+ }
}
}
- AsanErrorCallbackSized[0] = checkInterfaceFunction(M.getOrInsertFunction(
- kAsanReportLoadN, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr));
- AsanErrorCallbackSized[1] = checkInterfaceFunction(M.getOrInsertFunction(
- kAsanReportStoreN, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr));
-
- AsanMemoryAccessCallbackSized[0] = checkInterfaceFunction(
- M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + "loadN",
- IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr));
- AsanMemoryAccessCallbackSized[1] = checkInterfaceFunction(
- M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + "storeN",
- IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr));
-
- AsanMemmove = checkInterfaceFunction(M.getOrInsertFunction(
+
+ AsanMemmove = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
ClMemoryAccessCallbackPrefix + "memmove", IRB.getInt8PtrTy(),
IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy, nullptr));
- AsanMemcpy = checkInterfaceFunction(M.getOrInsertFunction(
+ AsanMemcpy = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
ClMemoryAccessCallbackPrefix + "memcpy", IRB.getInt8PtrTy(),
IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy, nullptr));
- AsanMemset = checkInterfaceFunction(M.getOrInsertFunction(
+ AsanMemset = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
ClMemoryAccessCallbackPrefix + "memset", IRB.getInt8PtrTy(),
IRB.getInt8PtrTy(), IRB.getInt32Ty(), IntptrTy, nullptr));
- AsanHandleNoReturnFunc = checkInterfaceFunction(
+ AsanHandleNoReturnFunc = checkSanitizerInterfaceFunction(
M.getOrInsertFunction(kAsanHandleNoReturnName, IRB.getVoidTy(), nullptr));
- AsanPtrCmpFunction = checkInterfaceFunction(M.getOrInsertFunction(
+ AsanPtrCmpFunction = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
kAsanPtrCmp, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr));
- AsanPtrSubFunction = checkInterfaceFunction(M.getOrInsertFunction(
+ AsanPtrSubFunction = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
kAsanPtrSub, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr));
// We insert an empty inline asm after __asan_report* to avoid callback merge.
EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false),
@@ -1364,28 +1444,18 @@ void AddressSanitizer::initializeCallbacks(Module &M) {
// virtual
bool AddressSanitizer::doInitialization(Module &M) {
// Initialize the private fields. No one has accessed them before.
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- if (!DLP)
- report_fatal_error("data layout missing");
- DL = &DLP->getDataLayout();
GlobalsMD.init(M);
C = &(M.getContext());
- LongSize = DL->getPointerSizeInBits();
+ LongSize = M.getDataLayout().getPointerSizeInBits();
IntptrTy = Type::getIntNTy(*C, LongSize);
TargetTriple = Triple(M.getTargetTriple());
- AsanCtorFunction = Function::Create(
- FunctionType::get(Type::getVoidTy(*C), false),
- GlobalValue::InternalLinkage, kAsanModuleCtorName, &M);
- BasicBlock *AsanCtorBB = BasicBlock::Create(*C, "", AsanCtorFunction);
- // call __asan_init in the module ctor.
- IRBuilder<> IRB(ReturnInst::Create(*C, AsanCtorBB));
- AsanInitFunction = checkInterfaceFunction(
- M.getOrInsertFunction(kAsanInitName, IRB.getVoidTy(), nullptr));
- AsanInitFunction->setLinkage(Function::ExternalLinkage);
- IRB.CreateCall(AsanInitFunction);
+ std::tie(AsanCtorFunction, AsanInitFunction) =
+ createSanitizerCtorAndInitFunctions(M, kAsanModuleCtorName, kAsanInitName,
+ /*InitArgTypes=*/{},
+ /*InitArgs=*/{});
Mapping = getShadowMapping(TargetTriple, LongSize);
@@ -1403,7 +1473,7 @@ bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) {
// instrumented functions.
if (F.getName().find(" load]") != std::string::npos) {
IRBuilder<> IRB(F.begin()->begin());
- IRB.CreateCall(AsanInitFunction);
+ IRB.CreateCall(AsanInitFunction, {});
return true;
}
return false;
@@ -1420,22 +1490,21 @@ bool AddressSanitizer::runOnFunction(Function &F) {
// If needed, insert __asan_init before checking for SanitizeAddress attr.
maybeInsertAsanInitAtFunctionEntry(F);
- if (!F.hasFnAttribute(Attribute::SanitizeAddress))
- return false;
+ if (!F.hasFnAttribute(Attribute::SanitizeAddress)) return false;
- if (!ClDebugFunc.empty() && ClDebugFunc != F.getName())
- return false;
+ if (!ClDebugFunc.empty() && ClDebugFunc != F.getName()) return false;
// We want to instrument every address only once per basic block (unless there
// are calls between uses).
- SmallSet<Value*, 16> TempsToInstrument;
- SmallVector<Instruction*, 16> ToInstrument;
- SmallVector<Instruction*, 8> NoReturnCalls;
- SmallVector<BasicBlock*, 16> AllBlocks;
- SmallVector<Instruction*, 16> PointerComparisonsOrSubtracts;
+ SmallSet<Value *, 16> TempsToInstrument;
+ SmallVector<Instruction *, 16> ToInstrument;
+ SmallVector<Instruction *, 8> NoReturnCalls;
+ SmallVector<BasicBlock *, 16> AllBlocks;
+ SmallVector<Instruction *, 16> PointerComparisonsOrSubtracts;
int NumAllocas = 0;
bool IsWrite;
unsigned Alignment;
+ uint64_t TypeSize;
// Fill the set of memory operations to instrument.
for (auto &BB : F) {
@@ -1444,8 +1513,8 @@ bool AddressSanitizer::runOnFunction(Function &F) {
int NumInsnsPerBB = 0;
for (auto &Inst : BB) {
if (LooksLikeCodeInBug11395(&Inst)) return false;
- if (Value *Addr =
- isInterestingMemoryAccess(&Inst, &IsWrite, &Alignment)) {
+ if (Value *Addr = isInterestingMemoryAccess(&Inst, &IsWrite, &TypeSize,
+ &Alignment)) {
if (ClOpt && ClOptSameTemp) {
if (!TempsToInstrument.insert(Addr).second)
continue; // We've seen this temp in the current BB.
@@ -1457,21 +1526,18 @@ bool AddressSanitizer::runOnFunction(Function &F) {
} else if (isa<MemIntrinsic>(Inst)) {
// ok, take it.
} else {
- if (isa<AllocaInst>(Inst))
- NumAllocas++;
+ if (isa<AllocaInst>(Inst)) NumAllocas++;
CallSite CS(&Inst);
if (CS) {
// A call inside BB.
TempsToInstrument.clear();
- if (CS.doesNotReturn())
- NoReturnCalls.push_back(CS.getInstruction());
+ if (CS.doesNotReturn()) NoReturnCalls.push_back(CS.getInstruction());
}
continue;
}
ToInstrument.push_back(&Inst);
NumInsnsPerBB++;
- if (NumInsnsPerBB >= ClMaxInsnsToInstrumentPerBB)
- break;
+ if (NumInsnsPerBB >= ClMaxInsnsToInstrumentPerBB) break;
}
}
@@ -1480,13 +1546,20 @@ bool AddressSanitizer::runOnFunction(Function &F) {
ToInstrument.size() > (unsigned)ClInstrumentationWithCallsThreshold)
UseCalls = true;
+ const TargetLibraryInfo *TLI =
+ &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ ObjectSizeOffsetVisitor ObjSizeVis(DL, TLI, F.getContext(),
+ /*RoundToAlign=*/true);
+
// Instrument.
int NumInstrumented = 0;
for (auto Inst : ToInstrument) {
if (ClDebugMin < 0 || ClDebugMax < 0 ||
(NumInstrumented >= ClDebugMin && NumInstrumented <= ClDebugMax)) {
- if (isInterestingMemoryAccess(Inst, &IsWrite, &Alignment))
- instrumentMop(Inst, UseCalls);
+ if (isInterestingMemoryAccess(Inst, &IsWrite, &TypeSize, &Alignment))
+ instrumentMop(ObjSizeVis, Inst, UseCalls,
+ F.getParent()->getDataLayout());
else
instrumentMemIntrinsic(cast<MemIntrinsic>(Inst));
}
@@ -1500,7 +1573,7 @@ bool AddressSanitizer::runOnFunction(Function &F) {
// See e.g. http://code.google.com/p/address-sanitizer/issues/detail?id=37
for (auto CI : NoReturnCalls) {
IRBuilder<> IRB(CI);
- IRB.CreateCall(AsanHandleNoReturnFunc);
+ IRB.CreateCall(AsanHandleNoReturnFunc, {});
}
for (auto Inst : PointerComparisonsOrSubtracts) {
@@ -1531,24 +1604,24 @@ void FunctionStackPoisoner::initializeCallbacks(Module &M) {
IRBuilder<> IRB(*C);
for (int i = 0; i <= kMaxAsanStackMallocSizeClass; i++) {
std::string Suffix = itostr(i);
- AsanStackMallocFunc[i] = checkInterfaceFunction(M.getOrInsertFunction(
- kAsanStackMallocNameTemplate + Suffix, IntptrTy, IntptrTy, nullptr));
- AsanStackFreeFunc[i] = checkInterfaceFunction(
+ AsanStackMallocFunc[i] = checkSanitizerInterfaceFunction(
+ M.getOrInsertFunction(kAsanStackMallocNameTemplate + Suffix, IntptrTy,
+ IntptrTy, nullptr));
+ AsanStackFreeFunc[i] = checkSanitizerInterfaceFunction(
M.getOrInsertFunction(kAsanStackFreeNameTemplate + Suffix,
IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr));
}
- AsanPoisonStackMemoryFunc = checkInterfaceFunction(
+ AsanPoisonStackMemoryFunc = checkSanitizerInterfaceFunction(
M.getOrInsertFunction(kAsanPoisonStackMemoryName, IRB.getVoidTy(),
IntptrTy, IntptrTy, nullptr));
- AsanUnpoisonStackMemoryFunc = checkInterfaceFunction(
+ AsanUnpoisonStackMemoryFunc = checkSanitizerInterfaceFunction(
M.getOrInsertFunction(kAsanUnpoisonStackMemoryName, IRB.getVoidTy(),
IntptrTy, IntptrTy, nullptr));
}
-void
-FunctionStackPoisoner::poisonRedZones(ArrayRef<uint8_t> ShadowBytes,
- IRBuilder<> &IRB, Value *ShadowBase,
- bool DoPoison) {
+void FunctionStackPoisoner::poisonRedZones(ArrayRef<uint8_t> ShadowBytes,
+ IRBuilder<> &IRB, Value *ShadowBase,
+ bool DoPoison) {
size_t n = ShadowBytes.size();
size_t i = 0;
// We need to (un)poison n bytes of stack shadow. Poison as many as we can
@@ -1559,7 +1632,7 @@ FunctionStackPoisoner::poisonRedZones(ArrayRef<uint8_t> ShadowBytes,
for (; i + LargeStoreSizeInBytes - 1 < n; i += LargeStoreSizeInBytes) {
uint64_t Val = 0;
for (size_t j = 0; j < LargeStoreSizeInBytes; j++) {
- if (ASan.DL->isLittleEndian())
+ if (F.getParent()->getDataLayout().isLittleEndian())
Val |= (uint64_t)ShadowBytes[i + j] << (8 * j);
else
Val = (Val << 8) | ShadowBytes[i + j];
@@ -1578,9 +1651,8 @@ FunctionStackPoisoner::poisonRedZones(ArrayRef<uint8_t> ShadowBytes,
static int StackMallocSizeClass(uint64_t LocalStackSize) {
assert(LocalStackSize <= kMaxStackMallocSize);
uint64_t MaxSize = kMinStackMallocSize;
- for (int i = 0; ; i++, MaxSize *= 2)
- if (LocalStackSize <= MaxSize)
- return i;
+ for (int i = 0;; i++, MaxSize *= 2)
+ if (LocalStackSize <= MaxSize) return i;
llvm_unreachable("impossible LocalStackSize");
}
@@ -1592,18 +1664,21 @@ static int StackMallocSizeClass(uint64_t LocalStackSize) {
void FunctionStackPoisoner::SetShadowToStackAfterReturnInlined(
IRBuilder<> &IRB, Value *ShadowBase, int Size) {
assert(!(Size % 8));
- assert(kAsanStackAfterReturnMagic == 0xf5);
+
+ // kAsanStackAfterReturnMagic is 0xf5.
+ const uint64_t kAsanStackAfterReturnMagic64 = 0xf5f5f5f5f5f5f5f5ULL;
+
for (int i = 0; i < Size; i += 8) {
Value *p = IRB.CreateAdd(ShadowBase, ConstantInt::get(IntptrTy, i));
- IRB.CreateStore(ConstantInt::get(IRB.getInt64Ty(), 0xf5f5f5f5f5f5f5f5ULL),
- IRB.CreateIntToPtr(p, IRB.getInt64Ty()->getPointerTo()));
+ IRB.CreateStore(
+ ConstantInt::get(IRB.getInt64Ty(), kAsanStackAfterReturnMagic64),
+ IRB.CreateIntToPtr(p, IRB.getInt64Ty()->getPointerTo()));
}
}
static DebugLoc getFunctionEntryDebugLocation(Function &F) {
for (const auto &Inst : F.getEntryBlock())
- if (!isa<AllocaInst>(Inst))
- return Inst.getDebugLoc();
+ if (!isa<AllocaInst>(Inst)) return Inst.getDebugLoc();
return DebugLoc();
}
@@ -1640,10 +1715,13 @@ Value *FunctionStackPoisoner::createAllocaForLayout(
void FunctionStackPoisoner::poisonStack() {
assert(AllocaVec.size() > 0 || DynamicAllocaVec.size() > 0);
- if (ClInstrumentAllocas)
+ if (ClInstrumentAllocas) {
// Handle dynamic allocas.
- for (auto &AllocaCall : DynamicAllocaVec)
+ for (auto &AllocaCall : DynamicAllocaVec) {
handleDynamicAllocaCall(AllocaCall);
+ unpoisonDynamicAlloca(AllocaCall);
+ }
+ }
if (AllocaVec.size() == 0) return;
@@ -1657,9 +1735,9 @@ void FunctionStackPoisoner::poisonStack() {
SmallVector<ASanStackVariableDescription, 16> SVD;
SVD.reserve(AllocaVec.size());
for (AllocaInst *AI : AllocaVec) {
- ASanStackVariableDescription D = { AI->getName().data(),
- getAllocaSizeInBytes(AI),
- AI->getAlignment(), AI, 0};
+ ASanStackVariableDescription D = {AI->getName().data(),
+ ASan.getAllocaSizeInBytes(AI),
+ AI->getAlignment(), AI, 0};
SVD.push_back(D);
}
// Minimal header size (left redzone) is 4 pointers,
@@ -1671,9 +1749,11 @@ void FunctionStackPoisoner::poisonStack() {
uint64_t LocalStackSize = L.FrameSize;
bool DoStackMalloc =
ClUseAfterReturn && LocalStackSize <= kMaxStackMallocSize;
- // Don't do dynamic alloca in presence of inline asm: too often it
- // makes assumptions on which registers are available.
+ // Don't do dynamic alloca in presence of inline asm: too often it makes
+ // assumptions on which registers are available. Don't do stack malloc in the
+ // presence of inline asm on 32-bit platforms for the same reason.
bool DoDynamicAlloca = ClDynamicAllocaStack && !HasNonEmptyInlineAsm;
+ DoStackMalloc &= !HasNonEmptyInlineAsm || ASan.LongSize != 32;
Value *StaticAlloca =
DoDynamicAlloca ? nullptr : createAllocaForLayout(IRB, L, false);
@@ -1739,7 +1819,7 @@ void FunctionStackPoisoner::poisonStack() {
Value *NewAllocaPtr = IRB.CreateIntToPtr(
IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, Desc.Offset)),
AI->getType());
- replaceDbgDeclareForAlloca(AI, NewAllocaPtr, DIB);
+ replaceDbgDeclareForAlloca(AI, NewAllocaPtr, DIB, /*Deref=*/true);
AI->replaceAllUsesWith(NewAllocaPtr);
}
@@ -1750,19 +1830,19 @@ void FunctionStackPoisoner::poisonStack() {
BasePlus0);
// Write the frame description constant to redzone[1].
Value *BasePlus1 = IRB.CreateIntToPtr(
- IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, ASan.LongSize/8)),
- IntptrPtrTy);
+ IRB.CreateAdd(LocalStackBase,
+ ConstantInt::get(IntptrTy, ASan.LongSize / 8)),
+ IntptrPtrTy);
GlobalVariable *StackDescriptionGlobal =
createPrivateGlobalForString(*F.getParent(), L.DescriptionString,
- /*AllowMerging*/true);
- Value *Description = IRB.CreatePointerCast(StackDescriptionGlobal,
- IntptrTy);
+ /*AllowMerging*/ true);
+ Value *Description = IRB.CreatePointerCast(StackDescriptionGlobal, IntptrTy);
IRB.CreateStore(Description, BasePlus1);
// Write the PC to redzone[2].
Value *BasePlus2 = IRB.CreateIntToPtr(
- IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy,
- 2 * ASan.LongSize/8)),
- IntptrPtrTy);
+ IRB.CreateAdd(LocalStackBase,
+ ConstantInt::get(IntptrTy, 2 * ASan.LongSize / 8)),
+ IntptrPtrTy);
IRB.CreateStore(IRB.CreatePointerCast(&F, IntptrTy), BasePlus2);
// Poison the stack redzones at the entry.
@@ -1807,8 +1887,9 @@ void FunctionStackPoisoner::poisonStack() {
IRBPoison.CreateIntToPtr(SavedFlagPtr, IRBPoison.getInt8PtrTy()));
} else {
// For larger frames call __asan_stack_free_*.
- IRBPoison.CreateCall2(AsanStackFreeFunc[StackMallocIdx], FakeStack,
- ConstantInt::get(IntptrTy, LocalStackSize));
+ IRBPoison.CreateCall(
+ AsanStackFreeFunc[StackMallocIdx],
+ {FakeStack, ConstantInt::get(IntptrTy, LocalStackSize)});
}
IRBuilder<> IRBElse(ElseTerm);
@@ -1822,14 +1903,8 @@ void FunctionStackPoisoner::poisonStack() {
}
}
- if (ClInstrumentAllocas)
- // Unpoison dynamic allocas.
- for (auto &AllocaCall : DynamicAllocaVec)
- unpoisonDynamicAlloca(AllocaCall);
-
// We are done. Remove the old unused alloca instructions.
- for (auto AI : AllocaVec)
- AI->eraseFromParent();
+ for (auto AI : AllocaVec) AI->eraseFromParent();
}
void FunctionStackPoisoner::poisonAlloca(Value *V, uint64_t Size,
@@ -1837,9 +1912,9 @@ void FunctionStackPoisoner::poisonAlloca(Value *V, uint64_t Size,
// For now just insert the call to ASan runtime.
Value *AddrArg = IRB.CreatePointerCast(V, IntptrTy);
Value *SizeArg = ConstantInt::get(IntptrTy, Size);
- IRB.CreateCall2(DoPoison ? AsanPoisonStackMemoryFunc
- : AsanUnpoisonStackMemoryFunc,
- AddrArg, SizeArg);
+ IRB.CreateCall(DoPoison ? AsanPoisonStackMemoryFunc
+ : AsanUnpoisonStackMemoryFunc,
+ {AddrArg, SizeArg});
}
// Handling llvm.lifetime intrinsics for a given %alloca:
@@ -1854,12 +1929,11 @@ void FunctionStackPoisoner::poisonAlloca(Value *V, uint64_t Size,
AllocaInst *FunctionStackPoisoner::findAllocaForValue(Value *V) {
if (AllocaInst *AI = dyn_cast<AllocaInst>(V))
// We're intested only in allocas we can handle.
- return isInterestingAlloca(*AI) ? AI : nullptr;
+ return ASan.isInterestingAlloca(*AI) ? AI : nullptr;
// See if we've already calculated (or started to calculate) alloca for a
// given value.
AllocaForValueMapTy::iterator I = AllocaForValue.find(V);
- if (I != AllocaForValue.end())
- return I->second;
+ if (I != AllocaForValue.end()) return I->second;
// Store 0 while we're calculating alloca for value V to avoid
// infinite recursion if the value references itself.
AllocaForValue[V] = nullptr;
@@ -1867,8 +1941,7 @@ AllocaInst *FunctionStackPoisoner::findAllocaForValue(Value *V) {
if (CastInst *CI = dyn_cast<CastInst>(V))
Res = findAllocaForValue(CI->getOperand(0));
else if (PHINode *PN = dyn_cast<PHINode>(V)) {
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- Value *IncValue = PN->getIncomingValue(i);
+ for (Value *IncValue : PN->incoming_values()) {
// Allow self-referencing phi-nodes.
if (IncValue == PN) continue;
AllocaInst *IncValueAI = findAllocaForValue(IncValue);
@@ -1878,8 +1951,7 @@ AllocaInst *FunctionStackPoisoner::findAllocaForValue(Value *V) {
Res = IncValueAI;
}
}
- if (Res)
- AllocaForValue[V] = Res;
+ if (Res) AllocaForValue[V] = Res;
return Res;
}
@@ -1910,14 +1982,14 @@ Value *FunctionStackPoisoner::computePartialRzMagic(Value *PartialSize,
Value *Shift = IRB.CreateAnd(PartialSize, IRB.getInt32(~7));
unsigned Val1Int = kAsanAllocaPartialVal1;
unsigned Val2Int = kAsanAllocaPartialVal2;
- if (!ASan.DL->isLittleEndian()) {
+ if (!F.getParent()->getDataLayout().isLittleEndian()) {
Val1Int = sys::getSwappedBytes(Val1Int);
Val2Int = sys::getSwappedBytes(Val2Int);
}
Value *Val1 = shiftAllocaMagic(IRB.getInt32(Val1Int), IRB, Shift);
Value *PartialBits = IRB.CreateAnd(PartialSize, IRB.getInt32(7));
// For BigEndian get 0x000000YZ -> 0xYZ000000.
- if (ASan.DL->isBigEndian())
+ if (F.getParent()->getDataLayout().isBigEndian())
PartialBits = IRB.CreateShl(PartialBits, IRB.getInt32(24));
Value *Val2 = IRB.getInt32(Val2Int);
Value *Cond =
@@ -1951,7 +2023,8 @@ void FunctionStackPoisoner::handleDynamicAllocaCall(
// redzones, and OldSize is number of allocated blocks with
// ElementSize size, get allocated memory size in bytes by
// OldSize * ElementSize.
- unsigned ElementSize = ASan.DL->getTypeAllocSize(AI->getAllocatedType());
+ unsigned ElementSize =
+ F.getParent()->getDataLayout().getTypeAllocSize(AI->getAllocatedType());
Value *OldSize = IRB.CreateMul(AI->getArraySize(),
ConstantInt::get(IntptrTy, ElementSize));
@@ -2019,3 +2092,20 @@ void FunctionStackPoisoner::handleDynamicAllocaCall(
AI->eraseFromParent();
NumInstrumentedDynamicAllocas++;
}
+
+// isSafeAccess returns true if Addr is always inbounds with respect to its
+// base object. For example, it is a field access or an array access with
+// constant inbounds index.
+bool AddressSanitizer::isSafeAccess(ObjectSizeOffsetVisitor &ObjSizeVis,
+ Value *Addr, uint64_t TypeSize) const {
+ SizeOffsetType SizeOffset = ObjSizeVis.compute(Addr);
+ if (!ObjSizeVis.bothKnown(SizeOffset)) return false;
+ uint64_t Size = SizeOffset.first.getZExtValue();
+ int64_t Offset = SizeOffset.second.getSExtValue();
+ // Three checks are required to ensure safety:
+ // . Offset >= 0 (since the offset is given from the base ptr)
+ // . Size >= Offset (unsigned)
+ // . Size - Offset >= NeededSize (unsigned)
+ return Offset >= 0 && Size >= uint64_t(Offset) &&
+ Size - uint64_t(Offset) >= TypeSize / 8;
+}
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp b/contrib/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp
index 9a5cea8..f685803 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp
@@ -16,6 +16,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/TargetFolder.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
@@ -24,7 +25,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLibraryInfo.h"
using namespace llvm;
#define DEBUG_TYPE "bounds-checking"
@@ -49,12 +49,10 @@ namespace {
bool runOnFunction(Function &F) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<DataLayoutPass>();
- AU.addRequired<TargetLibraryInfo>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
}
private:
- const DataLayout *DL;
const TargetLibraryInfo *TLI;
ObjectSizeOffsetEvaluator *ObjSizeEval;
BuilderTy *Builder;
@@ -63,7 +61,7 @@ namespace {
BasicBlock *getTrapBB();
void emitBranchToTrap(Value *Cmp = nullptr);
- bool instrument(Value *Ptr, Value *Val);
+ bool instrument(Value *Ptr, Value *Val, const DataLayout &DL);
};
}
@@ -84,7 +82,7 @@ BasicBlock *BoundsChecking::getTrapBB() {
Builder->SetInsertPoint(TrapBB);
llvm::Value *F = Intrinsic::getDeclaration(Fn->getParent(), Intrinsic::trap);
- CallInst *TrapCall = Builder->CreateCall(F);
+ CallInst *TrapCall = Builder->CreateCall(F, {});
TrapCall->setDoesNotReturn();
TrapCall->setDoesNotThrow();
TrapCall->setDebugLoc(Inst->getDebugLoc());
@@ -125,8 +123,9 @@ void BoundsChecking::emitBranchToTrap(Value *Cmp) {
/// result from the load or the value being stored. It is used to determine the
/// size of memory block that is touched.
/// Returns true if any change was made to the IR, false otherwise.
-bool BoundsChecking::instrument(Value *Ptr, Value *InstVal) {
- uint64_t NeededSize = DL->getTypeStoreSize(InstVal->getType());
+bool BoundsChecking::instrument(Value *Ptr, Value *InstVal,
+ const DataLayout &DL) {
+ uint64_t NeededSize = DL.getTypeStoreSize(InstVal->getType());
DEBUG(dbgs() << "Instrument " << *Ptr << " for " << Twine(NeededSize)
<< " bytes\n");
@@ -141,7 +140,7 @@ bool BoundsChecking::instrument(Value *Ptr, Value *InstVal) {
Value *Offset = SizeOffset.second;
ConstantInt *SizeCI = dyn_cast<ConstantInt>(Size);
- Type *IntTy = DL->getIntPtrType(Ptr->getType());
+ Type *IntTy = DL.getIntPtrType(Ptr->getType());
Value *NeededSizeVal = ConstantInt::get(IntTy, NeededSize);
// three checks are required to ensure safety:
@@ -165,8 +164,8 @@ bool BoundsChecking::instrument(Value *Ptr, Value *InstVal) {
}
bool BoundsChecking::runOnFunction(Function &F) {
- DL = &getAnalysis<DataLayoutPass>().getDataLayout();
- TLI = &getAnalysis<TargetLibraryInfo>();
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
TrapBB = nullptr;
BuilderTy TheBuilder(F.getContext(), TargetFolder(DL));
@@ -192,13 +191,16 @@ bool BoundsChecking::runOnFunction(Function &F) {
Builder->SetInsertPoint(Inst);
if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
- MadeChange |= instrument(LI->getPointerOperand(), LI);
+ MadeChange |= instrument(LI->getPointerOperand(), LI, DL);
} else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
- MadeChange |= instrument(SI->getPointerOperand(), SI->getValueOperand());
+ MadeChange |=
+ instrument(SI->getPointerOperand(), SI->getValueOperand(), DL);
} else if (AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(Inst)) {
- MadeChange |= instrument(AI->getPointerOperand(),AI->getCompareOperand());
+ MadeChange |=
+ instrument(AI->getPointerOperand(), AI->getCompareOperand(), DL);
} else if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(Inst)) {
- MadeChange |= instrument(AI->getPointerOperand(), AI->getValOperand());
+ MadeChange |=
+ instrument(AI->getPointerOperand(), AI->getValOperand(), DL);
} else {
llvm_unreachable("unknown Instruction type");
}
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
index 8f24476..2de6e1a 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -83,14 +83,14 @@ static cl::opt<bool> ClPreserveAlignment(
cl::desc("respect alignment requirements provided by input IR"), cl::Hidden,
cl::init(false));
-// The ABI list file controls how shadow parameters are passed. The pass treats
+// The ABI list files control how shadow parameters are passed. The pass treats
// every function labelled "uninstrumented" in the ABI list file as conforming
// to the "native" (i.e. unsanitized) ABI. Unless the ABI list contains
// additional annotations for those functions, a call to one of those functions
// will produce a warning message, as the labelling behaviour of the function is
// unknown. The other supported annotations are "functional" and "discard",
// which are described below under DataFlowSanitizer::WrapperKind.
-static cl::opt<std::string> ClABIListFile(
+static cl::list<std::string> ClABIListFiles(
"dfsan-abilist",
cl::desc("File listing native ABI functions and how the pass treats them"),
cl::Hidden);
@@ -141,7 +141,9 @@ class DFSanABIList {
std::unique_ptr<SpecialCaseList> SCL;
public:
- DFSanABIList(std::unique_ptr<SpecialCaseList> SCL) : SCL(std::move(SCL)) {}
+ DFSanABIList() {}
+
+ void set(std::unique_ptr<SpecialCaseList> List) { SCL = std::move(List); }
/// Returns whether either this function or its source file are listed in the
/// given category.
@@ -215,7 +217,6 @@ class DataFlowSanitizer : public ModulePass {
WK_Custom
};
- const DataLayout *DL;
Module *Mod;
LLVMContext *Ctx;
IntegerType *ShadowTy;
@@ -247,7 +248,7 @@ class DataFlowSanitizer : public ModulePass {
DFSanABIList ABIList;
DenseMap<Value *, Function *> UnwrappedFnMap;
AttributeSet ReadOnlyNoneAttrs;
- DenseMap<const Function *, DISubprogram> FunctionDIs;
+ DenseMap<const Function *, DISubprogram *> FunctionDIs;
Value *getShadowAddress(Value *Addr, Instruction *Pos);
bool isInstrumented(const Function *F);
@@ -264,9 +265,9 @@ class DataFlowSanitizer : public ModulePass {
Constant *getOrBuildTrampolineFunction(FunctionType *FT, StringRef FName);
public:
- DataFlowSanitizer(StringRef ABIListFile = StringRef(),
- void *(*getArgTLS)() = nullptr,
- void *(*getRetValTLS)() = nullptr);
+ DataFlowSanitizer(
+ const std::vector<std::string> &ABIListFiles = std::vector<std::string>(),
+ void *(*getArgTLS)() = nullptr, void *(*getRetValTLS)() = nullptr);
static char ID;
bool doInitialization(Module &M) override;
bool runOnModule(Module &M) override;
@@ -351,25 +352,26 @@ char DataFlowSanitizer::ID;
INITIALIZE_PASS(DataFlowSanitizer, "dfsan",
"DataFlowSanitizer: dynamic data flow analysis.", false, false)
-ModulePass *llvm::createDataFlowSanitizerPass(StringRef ABIListFile,
- void *(*getArgTLS)(),
- void *(*getRetValTLS)()) {
- return new DataFlowSanitizer(ABIListFile, getArgTLS, getRetValTLS);
+ModulePass *
+llvm::createDataFlowSanitizerPass(const std::vector<std::string> &ABIListFiles,
+ void *(*getArgTLS)(),
+ void *(*getRetValTLS)()) {
+ return new DataFlowSanitizer(ABIListFiles, getArgTLS, getRetValTLS);
}
-DataFlowSanitizer::DataFlowSanitizer(StringRef ABIListFile,
- void *(*getArgTLS)(),
- void *(*getRetValTLS)())
- : ModulePass(ID), GetArgTLSPtr(getArgTLS), GetRetvalTLSPtr(getRetValTLS),
- ABIList(SpecialCaseList::createOrDie(ABIListFile.empty() ? ClABIListFile
- : ABIListFile)) {
+DataFlowSanitizer::DataFlowSanitizer(
+ const std::vector<std::string> &ABIListFiles, void *(*getArgTLS)(),
+ void *(*getRetValTLS)())
+ : ModulePass(ID), GetArgTLSPtr(getArgTLS), GetRetvalTLSPtr(getRetValTLS) {
+ std::vector<std::string> AllABIListFiles(std::move(ABIListFiles));
+ AllABIListFiles.insert(AllABIListFiles.end(), ClABIListFiles.begin(),
+ ClABIListFiles.end());
+ ABIList.set(SpecialCaseList::createOrDie(AllABIListFiles));
}
FunctionType *DataFlowSanitizer::getArgsFunctionType(FunctionType *T) {
- llvm::SmallVector<Type *, 4> ArgTypes;
- std::copy(T->param_begin(), T->param_end(), std::back_inserter(ArgTypes));
- for (unsigned i = 0, e = T->getNumParams(); i != e; ++i)
- ArgTypes.push_back(ShadowTy);
+ llvm::SmallVector<Type *, 4> ArgTypes(T->param_begin(), T->param_end());
+ ArgTypes.append(T->getNumParams(), ShadowTy);
if (T->isVarArg())
ArgTypes.push_back(ShadowPtrTy);
Type *RetType = T->getReturnType();
@@ -382,9 +384,8 @@ FunctionType *DataFlowSanitizer::getTrampolineFunctionType(FunctionType *T) {
assert(!T->isVarArg());
llvm::SmallVector<Type *, 4> ArgTypes;
ArgTypes.push_back(T->getPointerTo());
- std::copy(T->param_begin(), T->param_end(), std::back_inserter(ArgTypes));
- for (unsigned i = 0, e = T->getNumParams(); i != e; ++i)
- ArgTypes.push_back(ShadowTy);
+ ArgTypes.append(T->param_begin(), T->param_end());
+ ArgTypes.append(T->getNumParams(), ShadowTy);
Type *RetType = T->getReturnType();
if (!RetType->isVoidTy())
ArgTypes.push_back(ShadowPtrTy);
@@ -420,16 +421,13 @@ bool DataFlowSanitizer::doInitialization(Module &M) {
bool IsMIPS64 = TargetTriple.getArch() == llvm::Triple::mips64 ||
TargetTriple.getArch() == llvm::Triple::mips64el;
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- if (!DLP)
- report_fatal_error("data layout missing");
- DL = &DLP->getDataLayout();
+ const DataLayout &DL = M.getDataLayout();
Mod = &M;
Ctx = &M.getContext();
ShadowTy = IntegerType::get(*Ctx, ShadowWidth);
ShadowPtrTy = PointerType::getUnqual(ShadowTy);
- IntptrTy = DL->getIntPtrType(*Ctx);
+ IntptrTy = DL.getIntPtrType(*Ctx);
ZeroShadow = ConstantInt::getSigned(ShadowTy, 0);
ShadowPtrMul = ConstantInt::getSigned(IntptrTy, ShadowWidth / 8);
if (IsX86_64)
@@ -528,9 +526,9 @@ DataFlowSanitizer::buildWrapperFunction(Function *F, StringRef NewFName,
F->getParent());
NewF->copyAttributesFrom(F);
NewF->removeAttributes(
- AttributeSet::ReturnIndex,
- AttributeFuncs::typeIncompatible(NewFT->getReturnType(),
- AttributeSet::ReturnIndex));
+ AttributeSet::ReturnIndex,
+ AttributeSet::get(F->getContext(), AttributeSet::ReturnIndex,
+ AttributeFuncs::typeIncompatible(NewFT->getReturnType())));
BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", NewF);
if (F->isVarArg()) {
@@ -591,9 +589,6 @@ Constant *DataFlowSanitizer::getOrBuildTrampolineFunction(FunctionType *FT,
}
bool DataFlowSanitizer::runOnModule(Module &M) {
- if (!DL)
- return false;
-
if (ABIList.isIn(M, "skip"))
return false;
@@ -708,9 +703,9 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
Function *NewF = Function::Create(NewFT, F.getLinkage(), "", &M);
NewF->copyAttributesFrom(&F);
NewF->removeAttributes(
- AttributeSet::ReturnIndex,
- AttributeFuncs::typeIncompatible(NewFT->getReturnType(),
- AttributeSet::ReturnIndex));
+ AttributeSet::ReturnIndex,
+ AttributeSet::get(NewF->getContext(), AttributeSet::ReturnIndex,
+ AttributeFuncs::typeIncompatible(NewFT->getReturnType())));
for (Function::arg_iterator FArg = F.arg_begin(),
NewFArg = NewF->arg_begin(),
FArgEnd = F.arg_end();
@@ -758,7 +753,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
// Patch the pointer to LLVM function in debug info descriptor.
auto DI = FunctionDIs.find(&F);
if (DI != FunctionDIs.end())
- DI->second.replaceFunction(&F);
+ DI->second->replaceFunction(&F);
UnwrappedFnMap[WrappedFnCst] = &F;
*i = NewF;
@@ -855,7 +850,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
BranchInst *BI = cast<BranchInst>(SplitBlockAndInsertIfThen(
Ne, Pos, /*Unreachable=*/false, ColdCallWeights));
IRBuilder<> ThenIRB(BI);
- ThenIRB.CreateCall(DFSF.DFS.DFSanNonzeroLabelFn);
+ ThenIRB.CreateCall(DFSF.DFS.DFSanNonzeroLabelFn, {});
}
}
}
@@ -870,7 +865,7 @@ Value *DFSanFunction::getArgTLSPtr() {
return ArgTLSPtr = DFS.ArgTLS;
IRBuilder<> IRB(F->getEntryBlock().begin());
- return ArgTLSPtr = IRB.CreateCall(DFS.GetArgTLS);
+ return ArgTLSPtr = IRB.CreateCall(DFS.GetArgTLS, {});
}
Value *DFSanFunction::getRetvalTLS() {
@@ -880,7 +875,7 @@ Value *DFSanFunction::getRetvalTLS() {
return RetvalTLSPtr = DFS.RetvalTLS;
IRBuilder<> IRB(F->getEntryBlock().begin());
- return RetvalTLSPtr = IRB.CreateCall(DFS.GetRetvalTLS);
+ return RetvalTLSPtr = IRB.CreateCall(DFS.GetRetvalTLS, {});
}
Value *DFSanFunction::getArgTLS(unsigned Idx, Instruction *Pos) {
@@ -977,7 +972,7 @@ Value *DFSanFunction::combineShadows(Value *V1, Value *V2, Instruction *Pos) {
IRBuilder<> IRB(Pos);
if (AvoidNewBlocks) {
- CallInst *Call = IRB.CreateCall2(DFS.DFSanCheckedUnionFn, V1, V2);
+ CallInst *Call = IRB.CreateCall(DFS.DFSanCheckedUnionFn, {V1, V2});
Call->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt);
Call->addAttribute(1, Attribute::ZExt);
Call->addAttribute(2, Attribute::ZExt);
@@ -990,7 +985,7 @@ Value *DFSanFunction::combineShadows(Value *V1, Value *V2, Instruction *Pos) {
BranchInst *BI = cast<BranchInst>(SplitBlockAndInsertIfThen(
Ne, Pos, /*Unreachable=*/false, DFS.ColdCallWeights, &DT));
IRBuilder<> ThenIRB(BI);
- CallInst *Call = ThenIRB.CreateCall2(DFS.DFSanUnionFn, V1, V2);
+ CallInst *Call = ThenIRB.CreateCall(DFS.DFSanUnionFn, {V1, V2});
Call->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt);
Call->addAttribute(1, Attribute::ZExt);
Call->addAttribute(2, Attribute::ZExt);
@@ -1054,7 +1049,7 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align,
uint64_t ShadowAlign = Align * DFS.ShadowWidth / 8;
SmallVector<Value *, 2> Objs;
- GetUnderlyingObjects(Addr, Objs, DFS.DL);
+ GetUnderlyingObjects(Addr, Objs, Pos->getModule()->getDataLayout());
bool AllConstants = true;
for (SmallVector<Value *, 2>::iterator i = Objs.begin(), e = Objs.end();
i != e; ++i) {
@@ -1080,8 +1075,8 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align,
}
case 2: {
IRBuilder<> IRB(Pos);
- Value *ShadowAddr1 =
- IRB.CreateGEP(ShadowAddr, ConstantInt::get(DFS.IntptrTy, 1));
+ Value *ShadowAddr1 = IRB.CreateGEP(DFS.ShadowTy, ShadowAddr,
+ ConstantInt::get(DFS.IntptrTy, 1));
return combineShadows(IRB.CreateAlignedLoad(ShadowAddr, ShadowAlign),
IRB.CreateAlignedLoad(ShadowAddr1, ShadowAlign), Pos);
}
@@ -1092,8 +1087,9 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align,
// shadow is non-equal.
BasicBlock *FallbackBB = BasicBlock::Create(*DFS.Ctx, "", F);
IRBuilder<> FallbackIRB(FallbackBB);
- CallInst *FallbackCall = FallbackIRB.CreateCall2(
- DFS.DFSanUnionLoadFn, ShadowAddr, ConstantInt::get(DFS.IntptrTy, Size));
+ CallInst *FallbackCall = FallbackIRB.CreateCall(
+ DFS.DFSanUnionLoadFn,
+ {ShadowAddr, ConstantInt::get(DFS.IntptrTy, Size)});
FallbackCall->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt);
// Compare each of the shadows stored in the loaded 64 bits to each other,
@@ -1132,7 +1128,8 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align,
BasicBlock *NextBB = BasicBlock::Create(*DFS.Ctx, "", F);
DT.addNewBlock(NextBB, LastBr->getParent());
IRBuilder<> NextIRB(NextBB);
- WideAddr = NextIRB.CreateGEP(WideAddr, ConstantInt::get(DFS.IntptrTy, 1));
+ WideAddr = NextIRB.CreateGEP(Type::getInt64Ty(*DFS.Ctx), WideAddr,
+ ConstantInt::get(DFS.IntptrTy, 1));
Value *NextWideShadow = NextIRB.CreateAlignedLoad(WideAddr, ShadowAlign);
ShadowsEq = NextIRB.CreateICmpEQ(WideShadow, NextWideShadow);
LastBr->setSuccessor(0, NextBB);
@@ -1148,14 +1145,15 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align,
}
IRBuilder<> IRB(Pos);
- CallInst *FallbackCall = IRB.CreateCall2(
- DFS.DFSanUnionLoadFn, ShadowAddr, ConstantInt::get(DFS.IntptrTy, Size));
+ CallInst *FallbackCall = IRB.CreateCall(
+ DFS.DFSanUnionLoadFn, {ShadowAddr, ConstantInt::get(DFS.IntptrTy, Size)});
FallbackCall->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt);
return FallbackCall;
}
void DFSanVisitor::visitLoadInst(LoadInst &LI) {
- uint64_t Size = DFSF.DFS.DL->getTypeStoreSize(LI.getType());
+ auto &DL = LI.getModule()->getDataLayout();
+ uint64_t Size = DL.getTypeStoreSize(LI.getType());
if (Size == 0) {
DFSF.setShadow(&LI, DFSF.DFS.ZeroShadow);
return;
@@ -1165,7 +1163,7 @@ void DFSanVisitor::visitLoadInst(LoadInst &LI) {
if (ClPreserveAlignment) {
Align = LI.getAlignment();
if (Align == 0)
- Align = DFSF.DFS.DL->getABITypeAlignment(LI.getType());
+ Align = DL.getABITypeAlignment(LI.getType());
} else {
Align = 1;
}
@@ -1217,7 +1215,8 @@ void DFSanFunction::storeShadow(Value *Addr, uint64_t Size, uint64_t Align,
Value *ShadowVecAddr =
IRB.CreateBitCast(ShadowAddr, PointerType::getUnqual(ShadowVecTy));
do {
- Value *CurShadowVecAddr = IRB.CreateConstGEP1_32(ShadowVecAddr, Offset);
+ Value *CurShadowVecAddr =
+ IRB.CreateConstGEP1_32(ShadowVecTy, ShadowVecAddr, Offset);
IRB.CreateAlignedStore(ShadowVec, CurShadowVecAddr, ShadowAlign);
Size -= ShadowVecSize;
++Offset;
@@ -1225,7 +1224,8 @@ void DFSanFunction::storeShadow(Value *Addr, uint64_t Size, uint64_t Align,
Offset *= ShadowVecSize;
}
while (Size > 0) {
- Value *CurShadowAddr = IRB.CreateConstGEP1_32(ShadowAddr, Offset);
+ Value *CurShadowAddr =
+ IRB.CreateConstGEP1_32(DFS.ShadowTy, ShadowAddr, Offset);
IRB.CreateAlignedStore(Shadow, CurShadowAddr, ShadowAlign);
--Size;
++Offset;
@@ -1233,8 +1233,8 @@ void DFSanFunction::storeShadow(Value *Addr, uint64_t Size, uint64_t Align,
}
void DFSanVisitor::visitStoreInst(StoreInst &SI) {
- uint64_t Size =
- DFSF.DFS.DL->getTypeStoreSize(SI.getValueOperand()->getType());
+ auto &DL = SI.getModule()->getDataLayout();
+ uint64_t Size = DL.getTypeStoreSize(SI.getValueOperand()->getType());
if (Size == 0)
return;
@@ -1242,7 +1242,7 @@ void DFSanVisitor::visitStoreInst(StoreInst &SI) {
if (ClPreserveAlignment) {
Align = SI.getAlignment();
if (Align == 0)
- Align = DFSF.DFS.DL->getABITypeAlignment(SI.getValueOperand()->getType());
+ Align = DL.getABITypeAlignment(SI.getValueOperand()->getType());
} else {
Align = 1;
}
@@ -1333,10 +1333,10 @@ void DFSanVisitor::visitSelectInst(SelectInst &I) {
void DFSanVisitor::visitMemSetInst(MemSetInst &I) {
IRBuilder<> IRB(&I);
Value *ValShadow = DFSF.getShadow(I.getValue());
- IRB.CreateCall3(
- DFSF.DFS.DFSanSetLabelFn, ValShadow,
- IRB.CreateBitCast(I.getDest(), Type::getInt8PtrTy(*DFSF.DFS.Ctx)),
- IRB.CreateZExtOrTrunc(I.getLength(), DFSF.DFS.IntptrTy));
+ IRB.CreateCall(DFSF.DFS.DFSanSetLabelFn,
+ {ValShadow, IRB.CreateBitCast(I.getDest(), Type::getInt8PtrTy(
+ *DFSF.DFS.Ctx)),
+ IRB.CreateZExtOrTrunc(I.getLength(), DFSF.DFS.IntptrTy)});
}
void DFSanVisitor::visitMemTransferInst(MemTransferInst &I) {
@@ -1358,8 +1358,8 @@ void DFSanVisitor::visitMemTransferInst(MemTransferInst &I) {
Type *Int8Ptr = Type::getInt8PtrTy(*DFSF.DFS.Ctx);
DestShadow = IRB.CreateBitCast(DestShadow, Int8Ptr);
SrcShadow = IRB.CreateBitCast(SrcShadow, Int8Ptr);
- IRB.CreateCall5(I.getCalledValue(), DestShadow, SrcShadow, LenShadow,
- AlignShadow, I.getVolatileCst());
+ IRB.CreateCall(I.getCalledValue(), {DestShadow, SrcShadow, LenShadow,
+ AlignShadow, I.getVolatileCst()});
}
void DFSanVisitor::visitReturnInst(ReturnInst &RI) {
@@ -1473,17 +1473,17 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
Args.push_back(DFSF.getShadow(*i));
if (FT->isVarArg()) {
- auto LabelVAAlloca =
- new AllocaInst(ArrayType::get(DFSF.DFS.ShadowTy,
- CS.arg_size() - FT->getNumParams()),
- "labelva", DFSF.F->getEntryBlock().begin());
+ auto *LabelVATy = ArrayType::get(DFSF.DFS.ShadowTy,
+ CS.arg_size() - FT->getNumParams());
+ auto *LabelVAAlloca = new AllocaInst(LabelVATy, "labelva",
+ DFSF.F->getEntryBlock().begin());
for (unsigned n = 0; i != CS.arg_end(); ++i, ++n) {
- auto LabelVAPtr = IRB.CreateStructGEP(LabelVAAlloca, n);
+ auto LabelVAPtr = IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, n);
IRB.CreateStore(DFSF.getShadow(*i), LabelVAPtr);
}
- Args.push_back(IRB.CreateStructGEP(LabelVAAlloca, 0));
+ Args.push_back(IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, 0));
}
if (!FT->getReturnType()->isVoidTy()) {
@@ -1532,7 +1532,7 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
Next = II->getNormalDest()->begin();
} else {
BasicBlock *NewBB =
- SplitEdge(II->getParent(), II->getNormalDest(), &DFSF.DFS);
+ SplitEdge(II->getParent(), II->getNormalDest(), &DFSF.DT);
Next = NewBB->begin();
}
} else {
@@ -1569,10 +1569,11 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
ArrayType *VarArgArrayTy = ArrayType::get(DFSF.DFS.ShadowTy, VarArgSize);
AllocaInst *VarArgShadow =
new AllocaInst(VarArgArrayTy, "", DFSF.F->getEntryBlock().begin());
- Args.push_back(IRB.CreateConstGEP2_32(VarArgShadow, 0, 0));
+ Args.push_back(IRB.CreateConstGEP2_32(VarArgArrayTy, VarArgShadow, 0, 0));
for (unsigned n = 0; i != e; ++i, ++n) {
- IRB.CreateStore(DFSF.getShadow(*i),
- IRB.CreateConstGEP2_32(VarArgShadow, 0, n));
+ IRB.CreateStore(
+ DFSF.getShadow(*i),
+ IRB.CreateConstGEP2_32(VarArgArrayTy, VarArgShadow, 0, n));
Args.push_back(*i);
}
}
@@ -1587,8 +1588,7 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
NewCS.setCallingConv(CS.getCallingConv());
NewCS.setAttributes(CS.getAttributes().removeAttributes(
*DFSF.DFS.Ctx, AttributeSet::ReturnIndex,
- AttributeFuncs::typeIncompatible(NewCS.getInstruction()->getType(),
- AttributeSet::ReturnIndex)));
+ AttributeFuncs::typeIncompatible(NewCS.getInstruction()->getType())));
if (Next) {
ExtractValueInst *ExVal =
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index 60b541f..9a3ed5c 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -57,6 +57,7 @@ GCOVOptions GCOVOptions::getDefault() {
Options.UseCfgChecksum = false;
Options.NoRedZone = false;
Options.FunctionNamesInData = true;
+ Options.ExitBlockBeforeBody = DefaultExitBlockBeforeBody;
if (DefaultGCOVVersion.size() != 4) {
llvm::report_fatal_error(std::string("Invalid -default-gcov-version: ") +
@@ -72,20 +73,10 @@ namespace {
class GCOVProfiler : public ModulePass {
public:
static char ID;
- GCOVProfiler() : ModulePass(ID), Options(GCOVOptions::getDefault()) {
- init();
- }
- GCOVProfiler(const GCOVOptions &Options) : ModulePass(ID), Options(Options){
+ GCOVProfiler() : GCOVProfiler(GCOVOptions::getDefault()) {}
+ GCOVProfiler(const GCOVOptions &Opts) : ModulePass(ID), Options(Opts) {
assert((Options.EmitNotes || Options.EmitData) &&
"GCOVProfiler asked to do nothing?");
- init();
- }
- const char *getPassName() const override {
- return "GCOV Profiler";
- }
-
- private:
- void init() {
ReversedVersion[0] = Options.Version[3];
ReversedVersion[1] = Options.Version[2];
ReversedVersion[2] = Options.Version[1];
@@ -93,6 +84,11 @@ namespace {
ReversedVersion[4] = '\0';
initializeGCOVProfilerPass(*PassRegistry::getPassRegistry());
}
+ const char *getPassName() const override {
+ return "GCOV Profiler";
+ }
+
+ private:
bool runOnModule(Module &M) override;
// Create the .gcno files for the Module based on DebugInfo.
@@ -130,7 +126,7 @@ namespace {
Function *insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> >);
void insertIndirectCounterIncrement();
- std::string mangleName(DICompileUnit CU, const char *NewStem);
+ std::string mangleName(const DICompileUnit *CU, const char *NewStem);
GCOVOptions Options;
@@ -153,10 +149,10 @@ ModulePass *llvm::createGCOVProfilerPass(const GCOVOptions &Options) {
return new GCOVProfiler(Options);
}
-static StringRef getFunctionName(DISubprogram SP) {
- if (!SP.getLinkageName().empty())
- return SP.getLinkageName();
- return SP.getName();
+static StringRef getFunctionName(const DISubprogram *SP) {
+ if (!SP->getLinkageName().empty())
+ return SP->getLinkageName();
+ return SP->getName();
}
namespace {
@@ -167,7 +163,7 @@ namespace {
static const char *const BlockTag;
static const char *const EdgeTag;
- GCOVRecord() {}
+ GCOVRecord() = default;
void writeBytes(const char *Bytes, int Size) {
os->write(Bytes, Size);
@@ -313,13 +309,13 @@ namespace {
// object users can construct, the blocks and lines will be rooted here.
class GCOVFunction : public GCOVRecord {
public:
- GCOVFunction(DISubprogram SP, raw_ostream *os, uint32_t Ident,
+ GCOVFunction(const DISubprogram *SP, raw_ostream *os, uint32_t Ident,
bool UseCfgChecksum, bool ExitBlockBeforeBody)
: SP(SP), Ident(Ident), UseCfgChecksum(UseCfgChecksum), CfgChecksum(0),
ReturnBlock(1, os) {
this->os = os;
- Function *F = SP.getFunction();
+ Function *F = SP->getFunction();
DEBUG(dbgs() << "Function: " << getFunctionName(SP) << "\n");
uint32_t i = 0;
@@ -334,7 +330,7 @@ namespace {
std::string FunctionNameAndLine;
raw_string_ostream FNLOS(FunctionNameAndLine);
- FNLOS << getFunctionName(SP) << SP.getLineNumber();
+ FNLOS << getFunctionName(SP) << SP->getLine();
FNLOS.flush();
FuncChecksum = hash_value(FunctionNameAndLine);
}
@@ -370,7 +366,7 @@ namespace {
void writeOut() {
writeBytes(FunctionTag, 4);
uint32_t BlockLen = 1 + 1 + 1 + lengthOfGCOVString(getFunctionName(SP)) +
- 1 + lengthOfGCOVString(SP.getFilename()) + 1;
+ 1 + lengthOfGCOVString(SP->getFilename()) + 1;
if (UseCfgChecksum)
++BlockLen;
write(BlockLen);
@@ -379,8 +375,8 @@ namespace {
if (UseCfgChecksum)
write(CfgChecksum);
writeGCOVString(getFunctionName(SP));
- writeGCOVString(SP.getFilename());
- write(SP.getLineNumber());
+ writeGCOVString(SP->getFilename());
+ write(SP->getLine());
// Emit count of blocks.
writeBytes(BlockTag, 4);
@@ -415,7 +411,7 @@ namespace {
}
private:
- DISubprogram SP;
+ const DISubprogram *SP;
uint32_t Ident;
uint32_t FuncChecksum;
bool UseCfgChecksum;
@@ -425,7 +421,8 @@ namespace {
};
}
-std::string GCOVProfiler::mangleName(DICompileUnit CU, const char *NewStem) {
+std::string GCOVProfiler::mangleName(const DICompileUnit *CU,
+ const char *NewStem) {
if (NamedMDNode *GCov = M->getNamedMetadata("llvm.gcov")) {
for (int i = 0, e = GCov->getNumOperands(); i != e; ++i) {
MDNode *N = GCov->getOperand(i);
@@ -441,12 +438,12 @@ std::string GCOVProfiler::mangleName(DICompileUnit CU, const char *NewStem) {
}
}
- SmallString<128> Filename = CU.getFilename();
+ SmallString<128> Filename = CU->getFilename();
sys::path::replace_extension(Filename, NewStem);
StringRef FName = sys::path::filename(Filename);
SmallString<128> CurPath;
if (sys::fs::current_path(CurPath)) return FName;
- sys::path::append(CurPath, FName.str());
+ sys::path::append(CurPath, FName);
return CurPath.str();
}
@@ -470,7 +467,8 @@ static bool functionHasLines(Function *F) {
if (isa<DbgInfoIntrinsic>(I)) continue;
const DebugLoc &Loc = I->getDebugLoc();
- if (Loc.isUnknown()) continue;
+ if (!Loc)
+ continue;
// Artificial lines such as calls to the global constructors.
if (Loc.getLine() == 0) continue;
@@ -490,21 +488,14 @@ void GCOVProfiler::emitProfileNotes() {
// this pass over the original .o's as they're produced, or run it after
// LTO, we'll generate the same .gcno files.
- DICompileUnit CU(CU_Nodes->getOperand(i));
+ auto *CU = cast<DICompileUnit>(CU_Nodes->getOperand(i));
std::error_code EC;
raw_fd_ostream out(mangleName(CU, "gcno"), EC, sys::fs::F_None);
std::string EdgeDestinations;
- DIArray SPs = CU.getSubprograms();
unsigned FunctionIdent = 0;
- for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
- DISubprogram SP(SPs.getElement(i));
- assert((!SP || SP.isSubprogram()) &&
- "A MDNode in subprograms of a CU should be null or a DISubprogram.");
- if (!SP)
- continue;
-
- Function *F = SP.getFunction();
+ for (auto *SP : CU->getSubprograms()) {
+ Function *F = SP->getFunction();
if (!F) continue;
if (!functionHasLines(F)) continue;
@@ -518,7 +509,7 @@ void GCOVProfiler::emitProfileNotes() {
Funcs.push_back(make_unique<GCOVFunction>(SP, &out, FunctionIdent++,
Options.UseCfgChecksum,
- DefaultExitBlockBeforeBody));
+ Options.ExitBlockBeforeBody));
GCOVFunction &Func = *Funcs.back();
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
@@ -540,16 +531,18 @@ void GCOVProfiler::emitProfileNotes() {
if (isa<DbgInfoIntrinsic>(I)) continue;
const DebugLoc &Loc = I->getDebugLoc();
- if (Loc.isUnknown()) continue;
+ if (!Loc)
+ continue;
// Artificial lines such as calls to the global constructors.
if (Loc.getLine() == 0) continue;
if (Line == Loc.getLine()) continue;
Line = Loc.getLine();
- if (SP != getDISubprogram(Loc.getScope(*Ctx))) continue;
+ if (SP != getDISubprogram(Loc.getScope()))
+ continue;
- GCOVLines &Lines = Block.getFile(SP.getFilename());
+ GCOVLines &Lines = Block.getFile(SP->getFilename());
Lines.addLine(Loc.getLine());
}
}
@@ -578,16 +571,10 @@ bool GCOVProfiler::emitProfileArcs() {
bool Result = false;
bool InsertIndCounterIncrCode = false;
for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
- DICompileUnit CU(CU_Nodes->getOperand(i));
- DIArray SPs = CU.getSubprograms();
+ auto *CU = cast<DICompileUnit>(CU_Nodes->getOperand(i));
SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> CountersBySP;
- for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
- DISubprogram SP(SPs.getElement(i));
- assert((!SP || SP.isSubprogram()) &&
- "A MDNode in subprograms of a CU should be null or a DISubprogram.");
- if (!SP)
- continue;
- Function *F = SP.getFunction();
+ for (auto *SP : CU->getSubprograms()) {
+ Function *F = SP->getFunction();
if (!F) continue;
if (!functionHasLines(F)) continue;
if (!Result) Result = true;
@@ -607,7 +594,7 @@ bool GCOVProfiler::emitProfileArcs() {
GlobalValue::InternalLinkage,
Constant::getNullValue(CounterTy),
"__llvm_gcov_ctr");
- CountersBySP.push_back(std::make_pair(Counters, (MDNode*)SP));
+ CountersBySP.push_back(std::make_pair(Counters, SP));
UniqueVector<BasicBlock *> ComplexEdgePreds;
UniqueVector<BasicBlock *> ComplexEdgeSuccs;
@@ -632,7 +619,8 @@ bool GCOVProfiler::emitProfileArcs() {
SmallVector<Value *, 2> Idx;
Idx.push_back(Builder.getInt64(0));
Idx.push_back(Sel);
- Value *Counter = Builder.CreateInBoundsGEP(Counters, Idx);
+ Value *Counter = Builder.CreateInBoundsGEP(Counters->getValueType(),
+ Counters, Idx);
Value *Count = Builder.CreateLoad(Counter);
Count = Builder.CreateAdd(Count, Builder.getInt64(1));
Builder.CreateStore(Count, Counter);
@@ -666,8 +654,8 @@ bool GCOVProfiler::emitProfileArcs() {
// Build code to increment the counter.
InsertIndCounterIncrCode = true;
- Builder.CreateCall2(getIncrementIndirectCounterFunc(),
- EdgeState, CounterPtrArray);
+ Builder.CreateCall(getIncrementIndirectCounterFunc(),
+ {EdgeState, CounterPtrArray});
}
}
}
@@ -700,7 +688,7 @@ bool GCOVProfiler::emitProfileArcs() {
// Initialize the environment and register the local writeout and flush
// functions.
Constant *GCOVInit = M->getOrInsertFunction("llvm_gcov_init", FTy);
- Builder.CreateCall2(GCOVInit, WriteoutF, FlushF);
+ Builder.CreateCall(GCOVInit, {WriteoutF, FlushF});
Builder.CreateRetVoid();
appendToGlobalCtors(*M, F, 0);
@@ -859,34 +847,34 @@ Function *GCOVProfiler::insertCounterWriteout(
NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
if (CU_Nodes) {
for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
- DICompileUnit CU(CU_Nodes->getOperand(i));
+ auto *CU = cast<DICompileUnit>(CU_Nodes->getOperand(i));
std::string FilenameGcda = mangleName(CU, "gcda");
uint32_t CfgChecksum = FileChecksums.empty() ? 0 : FileChecksums[i];
- Builder.CreateCall3(StartFile,
- Builder.CreateGlobalStringPtr(FilenameGcda),
+ Builder.CreateCall(StartFile,
+ {Builder.CreateGlobalStringPtr(FilenameGcda),
Builder.CreateGlobalStringPtr(ReversedVersion),
- Builder.getInt32(CfgChecksum));
+ Builder.getInt32(CfgChecksum)});
for (unsigned j = 0, e = CountersBySP.size(); j != e; ++j) {
- DISubprogram SP(CountersBySP[j].second);
+ auto *SP = cast_or_null<DISubprogram>(CountersBySP[j].second);
uint32_t FuncChecksum = Funcs.empty() ? 0 : Funcs[j]->getFuncChecksum();
- Builder.CreateCall5(
- EmitFunction, Builder.getInt32(j),
- Options.FunctionNamesInData ?
- Builder.CreateGlobalStringPtr(getFunctionName(SP)) :
- Constant::getNullValue(Builder.getInt8PtrTy()),
- Builder.getInt32(FuncChecksum),
- Builder.getInt8(Options.UseCfgChecksum),
- Builder.getInt32(CfgChecksum));
+ Builder.CreateCall(
+ EmitFunction,
+ {Builder.getInt32(j),
+ Options.FunctionNamesInData
+ ? Builder.CreateGlobalStringPtr(getFunctionName(SP))
+ : Constant::getNullValue(Builder.getInt8PtrTy()),
+ Builder.getInt32(FuncChecksum),
+ Builder.getInt8(Options.UseCfgChecksum),
+ Builder.getInt32(CfgChecksum)});
GlobalVariable *GV = CountersBySP[j].first;
unsigned Arcs =
cast<ArrayType>(GV->getType()->getElementType())->getNumElements();
- Builder.CreateCall2(EmitArcs,
- Builder.getInt32(Arcs),
- Builder.CreateConstGEP2_64(GV, 0, 0));
+ Builder.CreateCall(EmitArcs, {Builder.getInt32(Arcs),
+ Builder.CreateConstGEP2_64(GV, 0, 0)});
}
- Builder.CreateCall(SummaryInfo);
- Builder.CreateCall(EndFile);
+ Builder.CreateCall(SummaryInfo, {});
+ Builder.CreateCall(EndFile, {});
}
}
@@ -926,7 +914,7 @@ void GCOVProfiler::insertIndirectCounterIncrement() {
Value *ZExtPred = Builder.CreateZExt(Pred, Builder.getInt64Ty());
Arg = std::next(Fn->arg_begin());
Arg->setName("counters");
- Value *GEP = Builder.CreateGEP(Arg, ZExtPred);
+ Value *GEP = Builder.CreateGEP(Type::getInt64PtrTy(*Ctx), Arg, ZExtPred);
Value *Counter = Builder.CreateLoad(GEP, "counter");
Cond = Builder.CreateICmpEQ(Counter,
Constant::getNullValue(
@@ -966,7 +954,7 @@ insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> > CountersBySP) {
assert(WriteoutF && "Need to create the writeout function first!");
IRBuilder<> Builder(Entry);
- Builder.CreateCall(WriteoutF);
+ Builder.CreateCall(WriteoutF, {});
// Zero out the counters.
for (ArrayRef<std::pair<GlobalVariable *, MDNode *> >::iterator
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 2a3d154..610ff52 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -97,7 +97,8 @@ private:
/// Add uses of our data variables and runtime hook.
void emitUses();
- /// Create a static initializer for our data, on platforms that need it.
+ /// Create a static initializer for our data, on platforms that need it,
+ /// and for any profile output file that was specified.
void emitInitialization();
};
@@ -202,6 +203,7 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
LLVMContext &Ctx = M->getContext();
ArrayType *CounterTy = ArrayType::get(Type::getInt64Ty(Ctx), NumCounters);
+ Function *Fn = Inc->getParent()->getParent();
// Create the counters variable.
auto *Counters = new GlobalVariable(*M, CounterTy, false, Name->getLinkage(),
@@ -210,6 +212,10 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
Counters->setVisibility(Name->getVisibility());
Counters->setSection(getCountersSection());
Counters->setAlignment(8);
+ // Place the counters in the same comdat section as its parent function.
+ // Otherwise, we may get multiple counters for the same function in certain
+ // cases.
+ Counters->setComdat(Fn->getComdat());
RegionCounters[Inc->getName()] = Counters;
@@ -234,6 +240,7 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
Data->setVisibility(Name->getVisibility());
Data->setSection(getDataSection());
Data->setAlignment(8);
+ Data->setComdat(Fn->getComdat());
// Mark the data variable as used so that it isn't stripped out.
UsedVars.push_back(Data);
@@ -288,6 +295,7 @@ void InstrProfiling::emitRuntimeHook() {
User->addFnAttr(Attribute::NoInline);
if (Options.NoRedZone)
User->addFnAttr(Attribute::NoRedZone);
+ User->setVisibility(GlobalValue::HiddenVisibility);
IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", User));
auto *Load = IRB.CreateLoad(Var);
@@ -327,8 +335,10 @@ void InstrProfiling::emitUses() {
}
void InstrProfiling::emitInitialization() {
+ std::string InstrProfileOutput = Options.InstrProfileOutput;
+
Constant *RegisterF = M->getFunction("__llvm_profile_register_functions");
- if (!RegisterF)
+ if (!RegisterF && InstrProfileOutput.empty())
return;
// Create the initialization function.
@@ -343,7 +353,24 @@ void InstrProfiling::emitInitialization() {
// Add the basic block and the necessary calls.
IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", F));
- IRB.CreateCall(RegisterF);
+ if (RegisterF)
+ IRB.CreateCall(RegisterF, {});
+ if (!InstrProfileOutput.empty()) {
+ auto *Int8PtrTy = Type::getInt8PtrTy(M->getContext());
+ auto *SetNameTy = FunctionType::get(VoidTy, Int8PtrTy, false);
+ auto *SetNameF =
+ Function::Create(SetNameTy, GlobalValue::ExternalLinkage,
+ "__llvm_profile_override_default_filename", M);
+
+ // Create variable for profile name
+ Constant *ProfileNameConst =
+ ConstantDataArray::getString(M->getContext(), InstrProfileOutput, true);
+ GlobalVariable *ProfileName =
+ new GlobalVariable(*M, ProfileNameConst->getType(), true,
+ GlobalValue::PrivateLinkage, ProfileNameConst);
+
+ IRB.CreateCall(SetNameF, IRB.CreatePointerCast(ProfileName, Int8PtrTy));
+ }
IRB.CreateRetVoid();
appendToGlobalCtors(*M, F, 0);
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index d7d752f..100824e 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -120,6 +120,7 @@ using namespace llvm;
#define DEBUG_TYPE "msan"
+static const unsigned kOriginSize = 4;
static const unsigned kMinOriginAlignment = 4;
static const unsigned kShadowTLSAlignment = 8;
@@ -190,6 +191,9 @@ static cl::opt<bool> ClCheckConstantShadow("msan-check-constant-shadow",
cl::desc("Insert checks for constant shadow values"),
cl::Hidden, cl::init(false));
+static const char *const kMsanModuleCtorName = "msan.module_ctor";
+static const char *const kMsanInitName = "__msan_init";
+
namespace {
// Memory map parameters used in application-to-shadow address calculation.
@@ -209,7 +213,7 @@ struct PlatformMemoryMapParams {
};
// i386 Linux
-static const MemoryMapParams LinuxMemoryMapParams32 = {
+static const MemoryMapParams Linux_I386_MemoryMapParams = {
0x000080000000, // AndMask
0, // XorMask (not used)
0, // ShadowBase (not used)
@@ -217,15 +221,23 @@ static const MemoryMapParams LinuxMemoryMapParams32 = {
};
// x86_64 Linux
-static const MemoryMapParams LinuxMemoryMapParams64 = {
+static const MemoryMapParams Linux_X86_64_MemoryMapParams = {
0x400000000000, // AndMask
0, // XorMask (not used)
0, // ShadowBase (not used)
0x200000000000, // OriginBase
};
+// mips64 Linux
+static const MemoryMapParams Linux_MIPS64_MemoryMapParams = {
+ 0x004000000000, // AndMask
+ 0, // XorMask (not used)
+ 0, // ShadowBase (not used)
+ 0x002000000000, // OriginBase
+};
+
// i386 FreeBSD
-static const MemoryMapParams FreeBSDMemoryMapParams32 = {
+static const MemoryMapParams FreeBSD_I386_MemoryMapParams = {
0x000180000000, // AndMask
0x000040000000, // XorMask
0x000020000000, // ShadowBase
@@ -233,21 +245,26 @@ static const MemoryMapParams FreeBSDMemoryMapParams32 = {
};
// x86_64 FreeBSD
-static const MemoryMapParams FreeBSDMemoryMapParams64 = {
+static const MemoryMapParams FreeBSD_X86_64_MemoryMapParams = {
0xc00000000000, // AndMask
0x200000000000, // XorMask
0x100000000000, // ShadowBase
0x380000000000, // OriginBase
};
-static const PlatformMemoryMapParams LinuxMemoryMapParams = {
- &LinuxMemoryMapParams32,
- &LinuxMemoryMapParams64,
+static const PlatformMemoryMapParams Linux_X86_MemoryMapParams = {
+ &Linux_I386_MemoryMapParams,
+ &Linux_X86_64_MemoryMapParams,
+};
+
+static const PlatformMemoryMapParams Linux_MIPS_MemoryMapParams = {
+ NULL,
+ &Linux_MIPS64_MemoryMapParams,
};
-static const PlatformMemoryMapParams FreeBSDMemoryMapParams = {
- &FreeBSDMemoryMapParams32,
- &FreeBSDMemoryMapParams64,
+static const PlatformMemoryMapParams FreeBSD_X86_MemoryMapParams = {
+ &FreeBSD_I386_MemoryMapParams,
+ &FreeBSD_X86_64_MemoryMapParams,
};
/// \brief An instrumentation pass implementing detection of uninitialized
@@ -260,7 +277,6 @@ class MemorySanitizer : public FunctionPass {
MemorySanitizer(int TrackOrigins = 0)
: FunctionPass(ID),
TrackOrigins(std::max(TrackOrigins, (int)ClTrackOrigins)),
- DL(nullptr),
WarningFn(nullptr) {}
const char *getPassName() const override { return "MemorySanitizer"; }
bool runOnFunction(Function &F) override;
@@ -273,7 +289,6 @@ class MemorySanitizer : public FunctionPass {
/// \brief Track origins (allocation points) of uninitialized values.
int TrackOrigins;
- const DataLayout *DL;
LLVMContext *C;
Type *IntptrTy;
Type *OriginTy;
@@ -320,9 +335,11 @@ class MemorySanitizer : public FunctionPass {
MDNode *OriginStoreWeights;
/// \brief An empty volatile inline asm that prevents callback merge.
InlineAsm *EmptyAsm;
+ Function *MsanCtorFunction;
friend struct MemorySanitizerVisitor;
friend struct VarArgAMD64Helper;
+ friend struct VarArgMIPS64Helper;
};
} // namespace
@@ -434,32 +451,43 @@ void MemorySanitizer::initializeCallbacks(Module &M) {
///
/// inserts a call to __msan_init to the module's constructor list.
bool MemorySanitizer::doInitialization(Module &M) {
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- if (!DLP)
- report_fatal_error("data layout missing");
- DL = &DLP->getDataLayout();
+ auto &DL = M.getDataLayout();
Triple TargetTriple(M.getTargetTriple());
- const PlatformMemoryMapParams *PlatformMapParams;
- if (TargetTriple.getOS() == Triple::FreeBSD)
- PlatformMapParams = &FreeBSDMemoryMapParams;
- else
- PlatformMapParams = &LinuxMemoryMapParams;
-
- C = &(M.getContext());
- unsigned PtrSize = DL->getPointerSizeInBits(/* AddressSpace */0);
- switch (PtrSize) {
- case 64:
- MapParams = PlatformMapParams->bits64;
+ switch (TargetTriple.getOS()) {
+ case Triple::FreeBSD:
+ switch (TargetTriple.getArch()) {
+ case Triple::x86_64:
+ MapParams = FreeBSD_X86_MemoryMapParams.bits64;
+ break;
+ case Triple::x86:
+ MapParams = FreeBSD_X86_MemoryMapParams.bits32;
+ break;
+ default:
+ report_fatal_error("unsupported architecture");
+ }
break;
- case 32:
- MapParams = PlatformMapParams->bits32;
+ case Triple::Linux:
+ switch (TargetTriple.getArch()) {
+ case Triple::x86_64:
+ MapParams = Linux_X86_MemoryMapParams.bits64;
+ break;
+ case Triple::x86:
+ MapParams = Linux_X86_MemoryMapParams.bits32;
+ break;
+ case Triple::mips64:
+ case Triple::mips64el:
+ MapParams = Linux_MIPS_MemoryMapParams.bits64;
+ break;
+ default:
+ report_fatal_error("unsupported architecture");
+ }
break;
default:
- report_fatal_error("unsupported pointer size");
- break;
+ report_fatal_error("unsupported operating system");
}
+ C = &(M.getContext());
IRBuilder<> IRB(*C);
IntptrTy = IRB.getIntPtrTy(DL);
OriginTy = IRB.getInt32Ty();
@@ -467,9 +495,12 @@ bool MemorySanitizer::doInitialization(Module &M) {
ColdCallWeights = MDBuilder(*C).createBranchWeights(1, 1000);
OriginStoreWeights = MDBuilder(*C).createBranchWeights(1, 1000);
- // Insert a call to __msan_init/__msan_track_origins into the module's CTORs.
- appendToGlobalCtors(M, cast<Function>(M.getOrInsertFunction(
- "__msan_init", IRB.getVoidTy(), nullptr)), 0);
+ std::tie(MsanCtorFunction, std::ignore) =
+ createSanitizerCtorAndInitFunctions(M, kMsanModuleCtorName, kMsanInitName,
+ /*InitArgTypes=*/{},
+ /*InitArgs=*/{});
+
+ appendToGlobalCtors(M, MsanCtorFunction, 0);
if (TrackOrigins)
new GlobalVariable(M, IRB.getInt32Ty(), true, GlobalValue::WeakODRLinkage,
@@ -555,8 +586,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
MemorySanitizerVisitor(Function &F, MemorySanitizer &MS)
: F(F), MS(MS), VAHelper(CreateVarArgHelper(F, MS, *this)) {
- bool SanitizeFunction = F.getAttributes().hasAttribute(
- AttributeSet::FunctionIndex, Attribute::SanitizeMemory);
+ bool SanitizeFunction = F.hasFnAttribute(Attribute::SanitizeMemory);
InsertChecks = SanitizeFunction;
PropagateShadow = SanitizeFunction;
PoisonStack = SanitizeFunction && ClPoisonStack;
@@ -575,39 +605,86 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
return IRB.CreateCall(MS.MsanChainOriginFn, V);
}
+ Value *originToIntptr(IRBuilder<> &IRB, Value *Origin) {
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ unsigned IntptrSize = DL.getTypeStoreSize(MS.IntptrTy);
+ if (IntptrSize == kOriginSize) return Origin;
+ assert(IntptrSize == kOriginSize * 2);
+ Origin = IRB.CreateIntCast(Origin, MS.IntptrTy, /* isSigned */ false);
+ return IRB.CreateOr(Origin, IRB.CreateShl(Origin, kOriginSize * 8));
+ }
+
+ /// \brief Fill memory range with the given origin value.
+ void paintOrigin(IRBuilder<> &IRB, Value *Origin, Value *OriginPtr,
+ unsigned Size, unsigned Alignment) {
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ unsigned IntptrAlignment = DL.getABITypeAlignment(MS.IntptrTy);
+ unsigned IntptrSize = DL.getTypeStoreSize(MS.IntptrTy);
+ assert(IntptrAlignment >= kMinOriginAlignment);
+ assert(IntptrSize >= kOriginSize);
+
+ unsigned Ofs = 0;
+ unsigned CurrentAlignment = Alignment;
+ if (Alignment >= IntptrAlignment && IntptrSize > kOriginSize) {
+ Value *IntptrOrigin = originToIntptr(IRB, Origin);
+ Value *IntptrOriginPtr =
+ IRB.CreatePointerCast(OriginPtr, PointerType::get(MS.IntptrTy, 0));
+ for (unsigned i = 0; i < Size / IntptrSize; ++i) {
+ Value *Ptr = i ? IRB.CreateConstGEP1_32(MS.IntptrTy, IntptrOriginPtr, i)
+ : IntptrOriginPtr;
+ IRB.CreateAlignedStore(IntptrOrigin, Ptr, CurrentAlignment);
+ Ofs += IntptrSize / kOriginSize;
+ CurrentAlignment = IntptrAlignment;
+ }
+ }
+
+ for (unsigned i = Ofs; i < (Size + kOriginSize - 1) / kOriginSize; ++i) {
+ Value *GEP =
+ i ? IRB.CreateConstGEP1_32(nullptr, OriginPtr, i) : OriginPtr;
+ IRB.CreateAlignedStore(Origin, GEP, CurrentAlignment);
+ CurrentAlignment = kMinOriginAlignment;
+ }
+ }
+
void storeOrigin(IRBuilder<> &IRB, Value *Addr, Value *Shadow, Value *Origin,
unsigned Alignment, bool AsCall) {
+ const DataLayout &DL = F.getParent()->getDataLayout();
unsigned OriginAlignment = std::max(kMinOriginAlignment, Alignment);
+ unsigned StoreSize = DL.getTypeStoreSize(Shadow->getType());
if (isa<StructType>(Shadow->getType())) {
- IRB.CreateAlignedStore(updateOrigin(Origin, IRB),
- getOriginPtr(Addr, IRB, Alignment),
- OriginAlignment);
+ paintOrigin(IRB, updateOrigin(Origin, IRB),
+ getOriginPtr(Addr, IRB, Alignment), StoreSize,
+ OriginAlignment);
} else {
Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB);
- // TODO(eugenis): handle non-zero constant shadow by inserting an
- // unconditional check (can not simply fail compilation as this could
- // be in the dead code).
- if (!ClCheckConstantShadow)
- if (isa<Constant>(ConvertedShadow)) return;
+ Constant *ConstantShadow = dyn_cast_or_null<Constant>(ConvertedShadow);
+ if (ConstantShadow) {
+ if (ClCheckConstantShadow && !ConstantShadow->isZeroValue())
+ paintOrigin(IRB, updateOrigin(Origin, IRB),
+ getOriginPtr(Addr, IRB, Alignment), StoreSize,
+ OriginAlignment);
+ return;
+ }
+
unsigned TypeSizeInBits =
- MS.DL->getTypeSizeInBits(ConvertedShadow->getType());
+ DL.getTypeSizeInBits(ConvertedShadow->getType());
unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
if (AsCall && SizeIndex < kNumberOfAccessSizes) {
Value *Fn = MS.MaybeStoreOriginFn[SizeIndex];
Value *ConvertedShadow2 = IRB.CreateZExt(
ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex)));
- IRB.CreateCall3(Fn, ConvertedShadow2,
- IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()),
- Origin);
+ IRB.CreateCall(Fn, {ConvertedShadow2,
+ IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()),
+ Origin});
} else {
Value *Cmp = IRB.CreateICmpNE(
ConvertedShadow, getCleanShadow(ConvertedShadow), "_mscmp");
Instruction *CheckTerm = SplitBlockAndInsertIfThen(
Cmp, IRB.GetInsertPoint(), false, MS.OriginStoreWeights);
IRBuilder<> IRBNew(CheckTerm);
- IRBNew.CreateAlignedStore(updateOrigin(Origin, IRBNew),
- getOriginPtr(Addr, IRBNew, Alignment),
- OriginAlignment);
+ paintOrigin(IRBNew, updateOrigin(Origin, IRBNew),
+ getOriginPtr(Addr, IRBNew, Alignment), StoreSize,
+ OriginAlignment);
}
}
}
@@ -643,19 +720,34 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
DEBUG(dbgs() << " SHAD0 : " << *Shadow << "\n");
Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB);
DEBUG(dbgs() << " SHAD1 : " << *ConvertedShadow << "\n");
- // See the comment in storeOrigin().
- if (!ClCheckConstantShadow)
- if (isa<Constant>(ConvertedShadow)) return;
- unsigned TypeSizeInBits =
- MS.DL->getTypeSizeInBits(ConvertedShadow->getType());
+
+ Constant *ConstantShadow = dyn_cast_or_null<Constant>(ConvertedShadow);
+ if (ConstantShadow) {
+ if (ClCheckConstantShadow && !ConstantShadow->isZeroValue()) {
+ if (MS.TrackOrigins) {
+ IRB.CreateStore(Origin ? (Value *)Origin : (Value *)IRB.getInt32(0),
+ MS.OriginTLS);
+ }
+ IRB.CreateCall(MS.WarningFn, {});
+ IRB.CreateCall(MS.EmptyAsm, {});
+ // FIXME: Insert UnreachableInst if !ClKeepGoing?
+ // This may invalidate some of the following checks and needs to be done
+ // at the very end.
+ }
+ return;
+ }
+
+ const DataLayout &DL = OrigIns->getModule()->getDataLayout();
+
+ unsigned TypeSizeInBits = DL.getTypeSizeInBits(ConvertedShadow->getType());
unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
if (AsCall && SizeIndex < kNumberOfAccessSizes) {
Value *Fn = MS.MaybeWarningFn[SizeIndex];
Value *ConvertedShadow2 =
IRB.CreateZExt(ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex)));
- IRB.CreateCall2(Fn, ConvertedShadow2, MS.TrackOrigins && Origin
+ IRB.CreateCall(Fn, {ConvertedShadow2, MS.TrackOrigins && Origin
? Origin
- : (Value *)IRB.getInt32(0));
+ : (Value *)IRB.getInt32(0)});
} else {
Value *Cmp = IRB.CreateICmpNE(ConvertedShadow,
getCleanShadow(ConvertedShadow), "_mscmp");
@@ -668,8 +760,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
IRB.CreateStore(Origin ? (Value *)Origin : (Value *)IRB.getInt32(0),
MS.OriginTLS);
}
- IRB.CreateCall(MS.WarningFn);
- IRB.CreateCall(MS.EmptyAsm);
+ IRB.CreateCall(MS.WarningFn, {});
+ IRB.CreateCall(MS.EmptyAsm, {});
DEBUG(dbgs() << " CHECK: " << *Cmp << "\n");
}
}
@@ -687,7 +779,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
/// \brief Add MemorySanitizer instrumentation to a function.
bool runOnFunction() {
MS.initializeCallbacks(*F.getParent());
- if (!MS.DL) return false;
// In the presence of unreachable blocks, we may see Phi nodes with
// incoming nodes from such blocks. Since InstVisitor skips unreachable
@@ -743,8 +834,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// This may return weird-sized types like i1.
if (IntegerType *IT = dyn_cast<IntegerType>(OrigTy))
return IT;
+ const DataLayout &DL = F.getParent()->getDataLayout();
if (VectorType *VT = dyn_cast<VectorType>(OrigTy)) {
- uint32_t EltSize = MS.DL->getTypeSizeInBits(VT->getElementType());
+ uint32_t EltSize = DL.getTypeSizeInBits(VT->getElementType());
return VectorType::get(IntegerType::get(*MS.C, EltSize),
VT->getNumElements());
}
@@ -760,7 +852,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
DEBUG(dbgs() << "getShadowTy: " << *ST << " ===> " << *Res << "\n");
return Res;
}
- uint32_t TypeSize = MS.DL->getTypeSizeInBits(OrigTy);
+ uint32_t TypeSize = DL.getTypeSizeInBits(OrigTy);
return IntegerType::get(*MS.C, TypeSize);
}
@@ -953,14 +1045,16 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Function *F = A->getParent();
IRBuilder<> EntryIRB(F->getEntryBlock().getFirstNonPHI());
unsigned ArgOffset = 0;
+ const DataLayout &DL = F->getParent()->getDataLayout();
for (auto &FArg : F->args()) {
if (!FArg.getType()->isSized()) {
DEBUG(dbgs() << "Arg is not sized\n");
continue;
}
- unsigned Size = FArg.hasByValAttr()
- ? MS.DL->getTypeAllocSize(FArg.getType()->getPointerElementType())
- : MS.DL->getTypeAllocSize(FArg.getType());
+ unsigned Size =
+ FArg.hasByValAttr()
+ ? DL.getTypeAllocSize(FArg.getType()->getPointerElementType())
+ : DL.getTypeAllocSize(FArg.getType());
if (A == &FArg) {
bool Overflow = ArgOffset + Size > kParamTLSSize;
Value *Base = getShadowPtrForArgument(&FArg, EntryIRB, ArgOffset);
@@ -971,7 +1065,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
unsigned ArgAlign = FArg.getParamAlignment();
if (ArgAlign == 0) {
Type *EltType = A->getType()->getPointerElementType();
- ArgAlign = MS.DL->getABITypeAlignment(EltType);
+ ArgAlign = DL.getABITypeAlignment(EltType);
}
if (Overflow) {
// ParamTLS overflow.
@@ -1708,11 +1802,11 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
/// Similar situation exists for memcpy and memset.
void visitMemMoveInst(MemMoveInst &I) {
IRBuilder<> IRB(&I);
- IRB.CreateCall3(
- MS.MemmoveFn,
- IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
- IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()),
- IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false));
+ IRB.CreateCall(
+ MS.MemmoveFn,
+ {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
+ IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()),
+ IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)});
I.eraseFromParent();
}
@@ -1722,22 +1816,22 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// alignment.
void visitMemCpyInst(MemCpyInst &I) {
IRBuilder<> IRB(&I);
- IRB.CreateCall3(
- MS.MemcpyFn,
- IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
- IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()),
- IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false));
+ IRB.CreateCall(
+ MS.MemcpyFn,
+ {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
+ IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()),
+ IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)});
I.eraseFromParent();
}
// Same as memcpy.
void visitMemSetInst(MemSetInst &I) {
IRBuilder<> IRB(&I);
- IRB.CreateCall3(
- MS.MemsetFn,
- IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
- IRB.CreateIntCast(I.getArgOperand(1), IRB.getInt32Ty(), false),
- IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false));
+ IRB.CreateCall(
+ MS.MemsetFn,
+ {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
+ IRB.CreateIntCast(I.getArgOperand(1), IRB.getInt32Ty(), false),
+ IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)});
I.eraseFromParent();
}
@@ -2018,8 +2112,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
: Lower64ShadowExtend(IRB, S2, getShadowTy(&I));
Value *V1 = I.getOperand(0);
Value *V2 = I.getOperand(1);
- Value *Shift = IRB.CreateCall2(I.getCalledValue(),
- IRB.CreateBitCast(S1, V1->getType()), V2);
+ Value *Shift = IRB.CreateCall(I.getCalledValue(),
+ {IRB.CreateBitCast(S1, V1->getType()), V2});
Shift = IRB.CreateBitCast(Shift, getShadowTy(&I));
setShadow(&I, IRB.CreateOr(Shift, S2Conv));
setOriginForNaryOp(I);
@@ -2099,7 +2193,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Function *ShadowFn = Intrinsic::getDeclaration(
F.getParent(), getSignedPackIntrinsic(I.getIntrinsicID()));
- Value *S = IRB.CreateCall2(ShadowFn, S1_ext, S2_ext, "_msprop_vector_pack");
+ Value *S =
+ IRB.CreateCall(ShadowFn, {S1_ext, S2_ext}, "_msprop_vector_pack");
if (isX86_MMX) S = IRB.CreateBitCast(S, getShadowTy(&I));
setShadow(&I, S);
setOriginForNaryOp(I);
@@ -2178,15 +2273,12 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
case llvm::Intrinsic::x86_sse_cvttps2pi:
handleVectorConvertIntrinsic(I, 2);
break;
- case llvm::Intrinsic::x86_avx512_psll_dq:
- case llvm::Intrinsic::x86_avx512_psrl_dq:
case llvm::Intrinsic::x86_avx2_psll_w:
case llvm::Intrinsic::x86_avx2_psll_d:
case llvm::Intrinsic::x86_avx2_psll_q:
case llvm::Intrinsic::x86_avx2_pslli_w:
case llvm::Intrinsic::x86_avx2_pslli_d:
case llvm::Intrinsic::x86_avx2_pslli_q:
- case llvm::Intrinsic::x86_avx2_psll_dq:
case llvm::Intrinsic::x86_avx2_psrl_w:
case llvm::Intrinsic::x86_avx2_psrl_d:
case llvm::Intrinsic::x86_avx2_psrl_q:
@@ -2197,14 +2289,12 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
case llvm::Intrinsic::x86_avx2_psrli_q:
case llvm::Intrinsic::x86_avx2_psrai_w:
case llvm::Intrinsic::x86_avx2_psrai_d:
- case llvm::Intrinsic::x86_avx2_psrl_dq:
case llvm::Intrinsic::x86_sse2_psll_w:
case llvm::Intrinsic::x86_sse2_psll_d:
case llvm::Intrinsic::x86_sse2_psll_q:
case llvm::Intrinsic::x86_sse2_pslli_w:
case llvm::Intrinsic::x86_sse2_pslli_d:
case llvm::Intrinsic::x86_sse2_pslli_q:
- case llvm::Intrinsic::x86_sse2_psll_dq:
case llvm::Intrinsic::x86_sse2_psrl_w:
case llvm::Intrinsic::x86_sse2_psrl_d:
case llvm::Intrinsic::x86_sse2_psrl_q:
@@ -2215,7 +2305,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
case llvm::Intrinsic::x86_sse2_psrli_q:
case llvm::Intrinsic::x86_sse2_psrai_w:
case llvm::Intrinsic::x86_sse2_psrai_d:
- case llvm::Intrinsic::x86_sse2_psrl_dq:
case llvm::Intrinsic::x86_mmx_psll_w:
case llvm::Intrinsic::x86_mmx_psll_d:
case llvm::Intrinsic::x86_mmx_psll_q:
@@ -2247,14 +2336,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
handleVectorShiftIntrinsic(I, /* Variable */ true);
break;
- // Byte shifts are not implemented.
- // case llvm::Intrinsic::x86_avx512_psll_dq_bs:
- // case llvm::Intrinsic::x86_avx512_psrl_dq_bs:
- // case llvm::Intrinsic::x86_avx2_psll_dq_bs:
- // case llvm::Intrinsic::x86_avx2_psrl_dq_bs:
- // case llvm::Intrinsic::x86_sse2_psll_dq_bs:
- // case llvm::Intrinsic::x86_sse2_psrl_dq_bs:
-
case llvm::Intrinsic::x86_sse2_packsswb_128:
case llvm::Intrinsic::x86_sse2_packssdw_128:
case llvm::Intrinsic::x86_sse2_packuswb_128:
@@ -2356,10 +2437,11 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
DEBUG(dbgs() << " Arg#" << i << ": " << *A <<
" Shadow: " << *ArgShadow << "\n");
bool ArgIsInitialized = false;
+ const DataLayout &DL = F.getParent()->getDataLayout();
if (CS.paramHasAttr(i + 1, Attribute::ByVal)) {
assert(A->getType()->isPointerTy() &&
"ByVal argument is not a pointer!");
- Size = MS.DL->getTypeAllocSize(A->getType()->getPointerElementType());
+ Size = DL.getTypeAllocSize(A->getType()->getPointerElementType());
if (ArgOffset + Size > kParamTLSSize) break;
unsigned ParamAlignment = CS.getParamAlignment(i + 1);
unsigned Alignment = std::min(ParamAlignment, kShadowTLSAlignment);
@@ -2367,7 +2449,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
getShadowPtr(A, Type::getInt8Ty(*MS.C), IRB),
Size, Alignment);
} else {
- Size = MS.DL->getTypeAllocSize(A->getType());
+ Size = DL.getTypeAllocSize(A->getType());
if (ArgOffset + Size > kParamTLSSize) break;
Store = IRB.CreateAlignedStore(ArgShadow, ArgShadowBase,
kShadowTLSAlignment);
@@ -2460,11 +2542,12 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
setShadow(&I, getCleanShadow(&I));
setOrigin(&I, getCleanOrigin());
IRBuilder<> IRB(I.getNextNode());
- uint64_t Size = MS.DL->getTypeAllocSize(I.getAllocatedType());
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ uint64_t Size = DL.getTypeAllocSize(I.getAllocatedType());
if (PoisonStack && ClPoisonStackWithCall) {
- IRB.CreateCall2(MS.MsanPoisonStackFn,
- IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()),
- ConstantInt::get(MS.IntptrTy, Size));
+ IRB.CreateCall(MS.MsanPoisonStackFn,
+ {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()),
+ ConstantInt::get(MS.IntptrTy, Size)});
} else {
Value *ShadowBase = getShadowPtr(&I, Type::getInt8PtrTy(*MS.C), IRB);
Value *PoisonValue = IRB.getInt8(PoisonStack ? ClPoisonStackPattern : 0);
@@ -2484,11 +2567,11 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
createPrivateNonConstGlobalForString(*F.getParent(),
StackDescription.str());
- IRB.CreateCall4(MS.MsanSetAllocaOrigin4Fn,
- IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()),
+ IRB.CreateCall(MS.MsanSetAllocaOrigin4Fn,
+ {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()),
ConstantInt::get(MS.IntptrTy, Size),
IRB.CreatePointerCast(Descr, IRB.getInt8PtrTy()),
- IRB.CreatePointerCast(&F, MS.IntptrTy));
+ IRB.CreatePointerCast(&F, MS.IntptrTy)});
}
}
@@ -2652,6 +2735,7 @@ struct VarArgAMD64Helper : public VarArgHelper {
unsigned GpOffset = 0;
unsigned FpOffset = AMD64GpEndOffset;
unsigned OverflowOffset = AMD64FpEndOffset;
+ const DataLayout &DL = F.getParent()->getDataLayout();
for (CallSite::arg_iterator ArgIt = CS.arg_begin(), End = CS.arg_end();
ArgIt != End; ++ArgIt) {
Value *A = *ArgIt;
@@ -2661,7 +2745,7 @@ struct VarArgAMD64Helper : public VarArgHelper {
// ByVal arguments always go to the overflow area.
assert(A->getType()->isPointerTy());
Type *RealTy = A->getType()->getPointerElementType();
- uint64_t ArgSize = MS.DL->getTypeAllocSize(RealTy);
+ uint64_t ArgSize = DL.getTypeAllocSize(RealTy);
Value *Base = getShadowPtrForVAArgument(RealTy, IRB, OverflowOffset);
OverflowOffset += RoundUpToAlignment(ArgSize, 8);
IRB.CreateMemCpy(Base, MSV.getShadowPtr(A, IRB.getInt8Ty(), IRB),
@@ -2683,7 +2767,7 @@ struct VarArgAMD64Helper : public VarArgHelper {
FpOffset += 16;
break;
case AK_Memory:
- uint64_t ArgSize = MS.DL->getTypeAllocSize(A->getType());
+ uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
Base = getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset);
OverflowOffset += RoundUpToAlignment(ArgSize, 8);
}
@@ -2768,12 +2852,114 @@ struct VarArgAMD64Helper : public VarArgHelper {
Value *OverflowArgAreaPtr = IRB.CreateLoad(OverflowArgAreaPtrPtr);
Value *OverflowArgAreaShadowPtr =
MSV.getShadowPtr(OverflowArgAreaPtr, IRB.getInt8Ty(), IRB);
- Value *SrcPtr = IRB.CreateConstGEP1_32(VAArgTLSCopy, AMD64FpEndOffset);
+ Value *SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSCopy,
+ AMD64FpEndOffset);
IRB.CreateMemCpy(OverflowArgAreaShadowPtr, SrcPtr, VAArgOverflowSize, 16);
}
}
};
+/// \brief MIPS64-specific implementation of VarArgHelper.
+struct VarArgMIPS64Helper : public VarArgHelper {
+ Function &F;
+ MemorySanitizer &MS;
+ MemorySanitizerVisitor &MSV;
+ Value *VAArgTLSCopy;
+ Value *VAArgSize;
+
+ SmallVector<CallInst*, 16> VAStartInstrumentationList;
+
+ VarArgMIPS64Helper(Function &F, MemorySanitizer &MS,
+ MemorySanitizerVisitor &MSV)
+ : F(F), MS(MS), MSV(MSV), VAArgTLSCopy(nullptr),
+ VAArgSize(nullptr) {}
+
+ void visitCallSite(CallSite &CS, IRBuilder<> &IRB) override {
+ unsigned VAArgOffset = 0;
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ for (CallSite::arg_iterator ArgIt = CS.arg_begin() + 1, End = CS.arg_end();
+ ArgIt != End; ++ArgIt) {
+ Value *A = *ArgIt;
+ Value *Base;
+ uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
+#if defined(__MIPSEB__) || defined(MIPSEB)
+ // Adjusting the shadow for argument with size < 8 to match the placement
+ // of bits in big endian system
+ if (ArgSize < 8)
+ VAArgOffset += (8 - ArgSize);
+#endif
+ Base = getShadowPtrForVAArgument(A->getType(), IRB, VAArgOffset);
+ VAArgOffset += ArgSize;
+ VAArgOffset = RoundUpToAlignment(VAArgOffset, 8);
+ IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
+ }
+
+ Constant *TotalVAArgSize = ConstantInt::get(IRB.getInt64Ty(), VAArgOffset);
+ // Here using VAArgOverflowSizeTLS as VAArgSizeTLS to avoid creation of
+ // a new class member i.e. it is the total size of all VarArgs.
+ IRB.CreateStore(TotalVAArgSize, MS.VAArgOverflowSizeTLS);
+ }
+
+ /// \brief Compute the shadow address for a given va_arg.
+ Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
+ int ArgOffset) {
+ Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
+ Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
+ return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
+ "_msarg");
+ }
+
+ void visitVAStartInst(VAStartInst &I) override {
+ IRBuilder<> IRB(&I);
+ VAStartInstrumentationList.push_back(&I);
+ Value *VAListTag = I.getArgOperand(0);
+ Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB);
+ IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
+ /* size */8, /* alignment */8, false);
+ }
+
+ void visitVACopyInst(VACopyInst &I) override {
+ IRBuilder<> IRB(&I);
+ Value *VAListTag = I.getArgOperand(0);
+ Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB);
+ // Unpoison the whole __va_list_tag.
+ // FIXME: magic ABI constants.
+ IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
+ /* size */8, /* alignment */8, false);
+ }
+
+ void finalizeInstrumentation() override {
+ assert(!VAArgSize && !VAArgTLSCopy &&
+ "finalizeInstrumentation called twice");
+ IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI());
+ VAArgSize = IRB.CreateLoad(MS.VAArgOverflowSizeTLS);
+ Value *CopySize = IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, 0),
+ VAArgSize);
+
+ if (!VAStartInstrumentationList.empty()) {
+ // If there is a va_start in this function, make a backup copy of
+ // va_arg_tls somewhere in the function entry block.
+ VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
+ IRB.CreateMemCpy(VAArgTLSCopy, MS.VAArgTLS, CopySize, 8);
+ }
+
+ // Instrument va_start.
+ // Copy va_list shadow from the backup copy of the TLS contents.
+ for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
+ CallInst *OrigInst = VAStartInstrumentationList[i];
+ IRBuilder<> IRB(OrigInst->getNextNode());
+ Value *VAListTag = OrigInst->getArgOperand(0);
+ Value *RegSaveAreaPtrPtr =
+ IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
+ Type::getInt64PtrTy(*MS.C));
+ Value *RegSaveAreaPtr = IRB.CreateLoad(RegSaveAreaPtrPtr);
+ Value *RegSaveAreaShadowPtr =
+ MSV.getShadowPtr(RegSaveAreaPtr, IRB.getInt8Ty(), IRB);
+ IRB.CreateMemCpy(RegSaveAreaShadowPtr, VAArgTLSCopy, CopySize, 8);
+ }
+ }
+};
+
/// \brief A no-op implementation of VarArgHelper.
struct VarArgNoOpHelper : public VarArgHelper {
VarArgNoOpHelper(Function &F, MemorySanitizer &MS,
@@ -2795,6 +2981,9 @@ VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
llvm::Triple TargetTriple(Func.getParent()->getTargetTriple());
if (TargetTriple.getArch() == llvm::Triple::x86_64)
return new VarArgAMD64Helper(Func, Msan, Visitor);
+ else if (TargetTriple.getArch() == llvm::Triple::mips64 ||
+ TargetTriple.getArch() == llvm::Triple::mips64el)
+ return new VarArgMIPS64Helper(Func, Msan, Visitor);
else
return new VarArgNoOpHelper(Func, Msan, Visitor);
}
@@ -2802,6 +2991,8 @@ VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
} // namespace
bool MemorySanitizer::runOnFunction(Function &F) {
+ if (&F == MsanCtorFunction)
+ return false;
MemorySanitizerVisitor Visitor(F, *this);
// Clear out readonly/readnone attributes.
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
index c048a99..f6ae0c2 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
@@ -11,19 +11,17 @@
// and potentially with other Sanitizers.
//
// We create a Guard variable with the same linkage
-// as the function and inject this code into the entry block (CoverageLevel=1)
-// or all blocks (CoverageLevel>=2):
+// as the function and inject this code into the entry block (SCK_Function)
+// or all blocks (SCK_BB):
// if (Guard < 0) {
// __sanitizer_cov(&Guard);
// }
// The accesses to Guard are atomic. The rest of the logic is
// in __sanitizer_cov (it's fine to call it more than once).
//
-// With CoverageLevel>=3 we also split critical edges this effectively
+// With SCK_Edge we also split critical edges this effectively
// instrumenting all edges.
//
-// CoverageLevel>=4 add indirect call profiling implented as a function call.
-//
// This coverage implementation provides very limited data:
// it only tells if a given function (block) was ever executed. No counters.
// But for many use cases this is what we need and the added slowdown small.
@@ -55,11 +53,13 @@ using namespace llvm;
static const char *const kSanCovModuleInitName = "__sanitizer_cov_module_init";
static const char *const kSanCovName = "__sanitizer_cov";
+static const char *const kSanCovWithCheckName = "__sanitizer_cov_with_check";
static const char *const kSanCovIndirCallName = "__sanitizer_cov_indir_call16";
static const char *const kSanCovTraceEnter = "__sanitizer_cov_trace_func_enter";
static const char *const kSanCovTraceBB = "__sanitizer_cov_trace_basic_block";
+static const char *const kSanCovTraceCmp = "__sanitizer_cov_trace_cmp";
static const char *const kSanCovModuleCtorName = "sancov.module_ctor";
-static const uint64_t kSanCtorAndDtorPriority = 1;
+static const uint64_t kSanCtorAndDtorPriority = 2;
static cl::opt<int> ClCoverageLevel("sanitizer-coverage-level",
cl::desc("Sanitizer Coverage. 0: none, 1: entry block, 2: all blocks, "
@@ -67,11 +67,11 @@ static cl::opt<int> ClCoverageLevel("sanitizer-coverage-level",
"4: above plus indirect calls"),
cl::Hidden, cl::init(0));
-static cl::opt<int> ClCoverageBlockThreshold(
+static cl::opt<unsigned> ClCoverageBlockThreshold(
"sanitizer-coverage-block-threshold",
- cl::desc("Add coverage instrumentation only to the entry block if there "
- "are more than this number of blocks."),
- cl::Hidden, cl::init(1500));
+ cl::desc("Use a callback with a guard check inside it if there are"
+ " more than this number of blocks."),
+ cl::Hidden, cl::init(500));
static cl::opt<bool>
ClExperimentalTracing("sanitizer-coverage-experimental-tracing",
@@ -79,13 +79,63 @@ static cl::opt<bool>
"callbacks at every basic block"),
cl::Hidden, cl::init(false));
+static cl::opt<bool>
+ ClExperimentalCMPTracing("sanitizer-coverage-experimental-trace-compares",
+ cl::desc("Experimental tracing of CMP and similar "
+ "instructions"),
+ cl::Hidden, cl::init(false));
+
+// Experimental 8-bit counters used as an additional search heuristic during
+// coverage-guided fuzzing.
+// The counters are not thread-friendly:
+// - contention on these counters may cause significant slowdown;
+// - the counter updates are racy and the results may be inaccurate.
+// They are also inaccurate due to 8-bit integer overflow.
+static cl::opt<bool> ClUse8bitCounters("sanitizer-coverage-8bit-counters",
+ cl::desc("Experimental 8-bit counters"),
+ cl::Hidden, cl::init(false));
+
namespace {
+SanitizerCoverageOptions getOptions(int LegacyCoverageLevel) {
+ SanitizerCoverageOptions Res;
+ switch (LegacyCoverageLevel) {
+ case 0:
+ Res.CoverageType = SanitizerCoverageOptions::SCK_None;
+ break;
+ case 1:
+ Res.CoverageType = SanitizerCoverageOptions::SCK_Function;
+ break;
+ case 2:
+ Res.CoverageType = SanitizerCoverageOptions::SCK_BB;
+ break;
+ case 3:
+ Res.CoverageType = SanitizerCoverageOptions::SCK_Edge;
+ break;
+ case 4:
+ Res.CoverageType = SanitizerCoverageOptions::SCK_Edge;
+ Res.IndirectCalls = true;
+ break;
+ }
+ return Res;
+}
+
+SanitizerCoverageOptions OverrideFromCL(SanitizerCoverageOptions Options) {
+ // Sets CoverageType and IndirectCalls.
+ SanitizerCoverageOptions CLOpts = getOptions(ClCoverageLevel);
+ Options.CoverageType = std::max(Options.CoverageType, CLOpts.CoverageType);
+ Options.IndirectCalls |= CLOpts.IndirectCalls;
+ Options.TraceBB |= ClExperimentalTracing;
+ Options.TraceCmp |= ClExperimentalCMPTracing;
+ Options.Use8bitCounters |= ClUse8bitCounters;
+ return Options;
+}
+
class SanitizerCoverageModule : public ModulePass {
public:
- SanitizerCoverageModule(int CoverageLevel = 0)
- : ModulePass(ID),
- CoverageLevel(std::max(CoverageLevel, (int)ClCoverageLevel)) {}
+ SanitizerCoverageModule(
+ const SanitizerCoverageOptions &Options = SanitizerCoverageOptions())
+ : ModulePass(ID), Options(OverrideFromCL(Options)) {}
bool runOnModule(Module &M) override;
bool runOnFunction(Function &F);
static char ID; // Pass identification, replacement for typeid
@@ -93,104 +143,135 @@ class SanitizerCoverageModule : public ModulePass {
return "SanitizerCoverageModule";
}
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<DataLayoutPass>();
- }
-
private:
void InjectCoverageForIndirectCalls(Function &F,
ArrayRef<Instruction *> IndirCalls);
- bool InjectCoverage(Function &F, ArrayRef<BasicBlock *> AllBlocks,
- ArrayRef<Instruction *> IndirCalls);
- void InjectCoverageAtBlock(Function &F, BasicBlock &BB);
+ void InjectTraceForCmp(Function &F, ArrayRef<Instruction *> CmpTraceTargets);
+ bool InjectCoverage(Function &F, ArrayRef<BasicBlock *> AllBlocks);
+ void SetNoSanitizeMetadata(Instruction *I);
+ void InjectCoverageAtBlock(Function &F, BasicBlock &BB, bool UseCalls);
+ unsigned NumberOfInstrumentedBlocks() {
+ return SanCovFunction->getNumUses() + SanCovWithCheckFunction->getNumUses();
+ }
Function *SanCovFunction;
+ Function *SanCovWithCheckFunction;
Function *SanCovIndirCallFunction;
- Function *SanCovModuleInit;
Function *SanCovTraceEnter, *SanCovTraceBB;
+ Function *SanCovTraceCmpFunction;
InlineAsm *EmptyAsm;
- Type *IntptrTy;
+ Type *IntptrTy, *Int64Ty;
LLVMContext *C;
+ const DataLayout *DL;
GlobalVariable *GuardArray;
+ GlobalVariable *EightBitCounterArray;
- int CoverageLevel;
+ SanitizerCoverageOptions Options;
};
} // namespace
-static Function *checkInterfaceFunction(Constant *FuncOrBitcast) {
- if (Function *F = dyn_cast<Function>(FuncOrBitcast))
- return F;
- std::string Err;
- raw_string_ostream Stream(Err);
- Stream << "SanitizerCoverage interface function redefined: "
- << *FuncOrBitcast;
- report_fatal_error(Err);
-}
-
bool SanitizerCoverageModule::runOnModule(Module &M) {
- if (!CoverageLevel) return false;
+ if (Options.CoverageType == SanitizerCoverageOptions::SCK_None)
+ return false;
C = &(M.getContext());
- DataLayoutPass *DLP = &getAnalysis<DataLayoutPass>();
- IntptrTy = Type::getIntNTy(*C, DLP->getDataLayout().getPointerSizeInBits());
+ DL = &M.getDataLayout();
+ IntptrTy = Type::getIntNTy(*C, DL->getPointerSizeInBits());
Type *VoidTy = Type::getVoidTy(*C);
IRBuilder<> IRB(*C);
+ Type *Int8PtrTy = PointerType::getUnqual(IRB.getInt8Ty());
Type *Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty());
+ Int64Ty = IRB.getInt64Ty();
- Function *CtorFunc =
- Function::Create(FunctionType::get(VoidTy, false),
- GlobalValue::InternalLinkage, kSanCovModuleCtorName, &M);
- ReturnInst::Create(*C, BasicBlock::Create(*C, "", CtorFunc));
- appendToGlobalCtors(M, CtorFunc, kSanCtorAndDtorPriority);
-
- SanCovFunction = checkInterfaceFunction(
+ SanCovFunction = checkSanitizerInterfaceFunction(
M.getOrInsertFunction(kSanCovName, VoidTy, Int32PtrTy, nullptr));
- SanCovIndirCallFunction = checkInterfaceFunction(M.getOrInsertFunction(
- kSanCovIndirCallName, VoidTy, IntptrTy, IntptrTy, nullptr));
- SanCovModuleInit = checkInterfaceFunction(
- M.getOrInsertFunction(kSanCovModuleInitName, Type::getVoidTy(*C),
- Int32PtrTy, IntptrTy, nullptr));
- SanCovModuleInit->setLinkage(Function::ExternalLinkage);
+ SanCovWithCheckFunction = checkSanitizerInterfaceFunction(
+ M.getOrInsertFunction(kSanCovWithCheckName, VoidTy, Int32PtrTy, nullptr));
+ SanCovIndirCallFunction =
+ checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+ kSanCovIndirCallName, VoidTy, IntptrTy, IntptrTy, nullptr));
+ SanCovTraceCmpFunction =
+ checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+ kSanCovTraceCmp, VoidTy, Int64Ty, Int64Ty, Int64Ty, nullptr));
+
// We insert an empty inline asm after cov callbacks to avoid callback merge.
EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false),
StringRef(""), StringRef(""),
/*hasSideEffects=*/true);
- if (ClExperimentalTracing) {
- SanCovTraceEnter = checkInterfaceFunction(
+ if (Options.TraceBB) {
+ SanCovTraceEnter = checkSanitizerInterfaceFunction(
M.getOrInsertFunction(kSanCovTraceEnter, VoidTy, Int32PtrTy, nullptr));
- SanCovTraceBB = checkInterfaceFunction(
+ SanCovTraceBB = checkSanitizerInterfaceFunction(
M.getOrInsertFunction(kSanCovTraceBB, VoidTy, Int32PtrTy, nullptr));
}
// At this point we create a dummy array of guards because we don't
// know how many elements we will need.
Type *Int32Ty = IRB.getInt32Ty();
+ Type *Int8Ty = IRB.getInt8Ty();
+
GuardArray =
new GlobalVariable(M, Int32Ty, false, GlobalValue::ExternalLinkage,
nullptr, "__sancov_gen_cov_tmp");
+ if (Options.Use8bitCounters)
+ EightBitCounterArray =
+ new GlobalVariable(M, Int8Ty, false, GlobalVariable::ExternalLinkage,
+ nullptr, "__sancov_gen_cov_tmp");
for (auto &F : M)
runOnFunction(F);
+ auto N = NumberOfInstrumentedBlocks();
+
// Now we know how many elements we need. Create an array of guards
// with one extra element at the beginning for the size.
- Type *Int32ArrayNTy =
- ArrayType::get(Int32Ty, SanCovFunction->getNumUses() + 1);
+ Type *Int32ArrayNTy = ArrayType::get(Int32Ty, N + 1);
GlobalVariable *RealGuardArray = new GlobalVariable(
M, Int32ArrayNTy, false, GlobalValue::PrivateLinkage,
Constant::getNullValue(Int32ArrayNTy), "__sancov_gen_cov");
+
// Replace the dummy array with the real one.
GuardArray->replaceAllUsesWith(
IRB.CreatePointerCast(RealGuardArray, Int32PtrTy));
GuardArray->eraseFromParent();
- // Call __sanitizer_cov_module_init
- IRB.SetInsertPoint(CtorFunc->getEntryBlock().getTerminator());
- IRB.CreateCall2(SanCovModuleInit,
- IRB.CreatePointerCast(RealGuardArray, Int32PtrTy),
- ConstantInt::get(IntptrTy, SanCovFunction->getNumUses()));
+ GlobalVariable *RealEightBitCounterArray;
+ if (Options.Use8bitCounters) {
+ // Make sure the array is 16-aligned.
+ static const int kCounterAlignment = 16;
+ Type *Int8ArrayNTy =
+ ArrayType::get(Int8Ty, RoundUpToAlignment(N, kCounterAlignment));
+ RealEightBitCounterArray = new GlobalVariable(
+ M, Int8ArrayNTy, false, GlobalValue::PrivateLinkage,
+ Constant::getNullValue(Int8ArrayNTy), "__sancov_gen_cov_counter");
+ RealEightBitCounterArray->setAlignment(kCounterAlignment);
+ EightBitCounterArray->replaceAllUsesWith(
+ IRB.CreatePointerCast(RealEightBitCounterArray, Int8PtrTy));
+ EightBitCounterArray->eraseFromParent();
+ }
+
+ // Create variable for module (compilation unit) name
+ Constant *ModNameStrConst =
+ ConstantDataArray::getString(M.getContext(), M.getName(), true);
+ GlobalVariable *ModuleName =
+ new GlobalVariable(M, ModNameStrConst->getType(), true,
+ GlobalValue::PrivateLinkage, ModNameStrConst);
+
+ Function *CtorFunc;
+ std::tie(CtorFunc, std::ignore) = createSanitizerCtorAndInitFunctions(
+ M, kSanCovModuleCtorName, kSanCovModuleInitName,
+ {Int32PtrTy, IntptrTy, Int8PtrTy, Int8PtrTy},
+ {IRB.CreatePointerCast(RealGuardArray, Int32PtrTy),
+ ConstantInt::get(IntptrTy, N),
+ Options.Use8bitCounters
+ ? IRB.CreatePointerCast(RealEightBitCounterArray, Int8PtrTy)
+ : Constant::getNullValue(Int8PtrTy),
+ IRB.CreatePointerCast(ModuleName, Int8PtrTy)});
+
+ appendToGlobalCtors(M, CtorFunc, kSanCtorAndDtorPriority);
+
return true;
}
@@ -198,38 +279,44 @@ bool SanitizerCoverageModule::runOnFunction(Function &F) {
if (F.empty()) return false;
if (F.getName().find(".module_ctor") != std::string::npos)
return false; // Should not instrument sanitizer init functions.
- if (CoverageLevel >= 3)
- SplitAllCriticalEdges(F, this);
+ if (Options.CoverageType >= SanitizerCoverageOptions::SCK_Edge)
+ SplitAllCriticalEdges(F);
SmallVector<Instruction*, 8> IndirCalls;
SmallVector<BasicBlock*, 16> AllBlocks;
+ SmallVector<Instruction*, 8> CmpTraceTargets;
for (auto &BB : F) {
AllBlocks.push_back(&BB);
- if (CoverageLevel >= 4)
- for (auto &Inst : BB) {
+ for (auto &Inst : BB) {
+ if (Options.IndirectCalls) {
CallSite CS(&Inst);
if (CS && !CS.getCalledFunction())
IndirCalls.push_back(&Inst);
}
+ if (Options.TraceCmp && isa<ICmpInst>(&Inst))
+ CmpTraceTargets.push_back(&Inst);
+ }
}
- InjectCoverage(F, AllBlocks, IndirCalls);
+ InjectCoverage(F, AllBlocks);
+ InjectCoverageForIndirectCalls(F, IndirCalls);
+ InjectTraceForCmp(F, CmpTraceTargets);
return true;
}
-bool
-SanitizerCoverageModule::InjectCoverage(Function &F,
- ArrayRef<BasicBlock *> AllBlocks,
- ArrayRef<Instruction *> IndirCalls) {
- if (!CoverageLevel) return false;
-
- if (CoverageLevel == 1 ||
- (unsigned)ClCoverageBlockThreshold < AllBlocks.size()) {
- InjectCoverageAtBlock(F, F.getEntryBlock());
- } else {
+bool SanitizerCoverageModule::InjectCoverage(Function &F,
+ ArrayRef<BasicBlock *> AllBlocks) {
+ switch (Options.CoverageType) {
+ case SanitizerCoverageOptions::SCK_None:
+ return false;
+ case SanitizerCoverageOptions::SCK_Function:
+ InjectCoverageAtBlock(F, F.getEntryBlock(), false);
+ return true;
+ default: {
+ bool UseCalls = ClCoverageBlockThreshold < AllBlocks.size();
for (auto BB : AllBlocks)
- InjectCoverageAtBlock(F, *BB);
+ InjectCoverageAtBlock(F, *BB, UseCalls);
+ return true;
+ }
}
- InjectCoverageForIndirectCalls(F, IndirCalls);
- return true;
}
// On every indirect call we call a run-time function
@@ -249,19 +336,44 @@ void SanitizerCoverageModule::InjectCoverageForIndirectCalls(
IRBuilder<> IRB(I);
CallSite CS(I);
Value *Callee = CS.getCalledValue();
- if (dyn_cast<InlineAsm>(Callee)) continue;
+ if (isa<InlineAsm>(Callee)) continue;
GlobalVariable *CalleeCache = new GlobalVariable(
*F.getParent(), Ty, false, GlobalValue::PrivateLinkage,
Constant::getNullValue(Ty), "__sancov_gen_callee_cache");
CalleeCache->setAlignment(kCacheAlignment);
- IRB.CreateCall2(SanCovIndirCallFunction,
- IRB.CreatePointerCast(Callee, IntptrTy),
- IRB.CreatePointerCast(CalleeCache, IntptrTy));
+ IRB.CreateCall(SanCovIndirCallFunction,
+ {IRB.CreatePointerCast(Callee, IntptrTy),
+ IRB.CreatePointerCast(CalleeCache, IntptrTy)});
+ }
+}
+
+void SanitizerCoverageModule::InjectTraceForCmp(
+ Function &F, ArrayRef<Instruction *> CmpTraceTargets) {
+ for (auto I : CmpTraceTargets) {
+ if (ICmpInst *ICMP = dyn_cast<ICmpInst>(I)) {
+ IRBuilder<> IRB(ICMP);
+ Value *A0 = ICMP->getOperand(0);
+ Value *A1 = ICMP->getOperand(1);
+ if (!A0->getType()->isIntegerTy()) continue;
+ uint64_t TypeSize = DL->getTypeStoreSizeInBits(A0->getType());
+ // __sanitizer_cov_trace_cmp((type_size << 32) | predicate, A0, A1);
+ IRB.CreateCall(
+ SanCovTraceCmpFunction,
+ {ConstantInt::get(Int64Ty, (TypeSize << 32) | ICMP->getPredicate()),
+ IRB.CreateIntCast(A0, Int64Ty, true),
+ IRB.CreateIntCast(A1, Int64Ty, true)});
+ }
}
}
-void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F,
- BasicBlock &BB) {
+void SanitizerCoverageModule::SetNoSanitizeMetadata(Instruction *I) {
+ I->setMetadata(
+ I->getParent()->getParent()->getParent()->getMDKindID("nosanitize"),
+ MDNode::get(*C, None));
+}
+
+void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
+ bool UseCalls) {
BasicBlock::iterator IP = BB.getFirstInsertionPt(), BE = BB.end();
// Skip static allocas at the top of the entry block so they don't become
// dynamic when we split the block. If we used our optimized stack layout,
@@ -273,31 +385,48 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F,
}
bool IsEntryBB = &BB == &F.getEntryBlock();
- DebugLoc EntryLoc =
- IsEntryBB ? IP->getDebugLoc().getFnDebugLoc(*C) : IP->getDebugLoc();
+ DebugLoc EntryLoc = IsEntryBB && IP->getDebugLoc()
+ ? IP->getDebugLoc().getFnDebugLoc()
+ : IP->getDebugLoc();
IRBuilder<> IRB(IP);
IRB.SetCurrentDebugLocation(EntryLoc);
SmallVector<Value *, 1> Indices;
Value *GuardP = IRB.CreateAdd(
IRB.CreatePointerCast(GuardArray, IntptrTy),
- ConstantInt::get(IntptrTy, (1 + SanCovFunction->getNumUses()) * 4));
+ ConstantInt::get(IntptrTy, (1 + NumberOfInstrumentedBlocks()) * 4));
Type *Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty());
GuardP = IRB.CreateIntToPtr(GuardP, Int32PtrTy);
- LoadInst *Load = IRB.CreateLoad(GuardP);
- Load->setAtomic(Monotonic);
- Load->setAlignment(4);
- Load->setMetadata(F.getParent()->getMDKindID("nosanitize"),
- MDNode::get(*C, None));
- Value *Cmp = IRB.CreateICmpSGE(Constant::getNullValue(Load->getType()), Load);
- Instruction *Ins = SplitBlockAndInsertIfThen(
- Cmp, IP, false, MDBuilder(*C).createBranchWeights(1, 100000));
- IRB.SetInsertPoint(Ins);
- IRB.SetCurrentDebugLocation(EntryLoc);
- // __sanitizer_cov gets the PC of the instruction using GET_CALLER_PC.
- IRB.CreateCall(SanCovFunction, GuardP);
- IRB.CreateCall(EmptyAsm); // Avoids callback merge.
+ if (UseCalls) {
+ IRB.CreateCall(SanCovWithCheckFunction, GuardP);
+ } else {
+ LoadInst *Load = IRB.CreateLoad(GuardP);
+ Load->setAtomic(Monotonic);
+ Load->setAlignment(4);
+ SetNoSanitizeMetadata(Load);
+ Value *Cmp = IRB.CreateICmpSGE(Constant::getNullValue(Load->getType()), Load);
+ Instruction *Ins = SplitBlockAndInsertIfThen(
+ Cmp, IP, false, MDBuilder(*C).createBranchWeights(1, 100000));
+ IRB.SetInsertPoint(Ins);
+ IRB.SetCurrentDebugLocation(EntryLoc);
+ // __sanitizer_cov gets the PC of the instruction using GET_CALLER_PC.
+ IRB.CreateCall(SanCovFunction, GuardP);
+ IRB.CreateCall(EmptyAsm, {}); // Avoids callback merge.
+ }
+
+ if (Options.Use8bitCounters) {
+ IRB.SetInsertPoint(IP);
+ Value *P = IRB.CreateAdd(
+ IRB.CreatePointerCast(EightBitCounterArray, IntptrTy),
+ ConstantInt::get(IntptrTy, NumberOfInstrumentedBlocks() - 1));
+ P = IRB.CreateIntToPtr(P, IRB.getInt8PtrTy());
+ LoadInst *LI = IRB.CreateLoad(P);
+ Value *Inc = IRB.CreateAdd(LI, ConstantInt::get(IRB.getInt8Ty(), 1));
+ StoreInst *SI = IRB.CreateStore(Inc, P);
+ SetNoSanitizeMetadata(LI);
+ SetNoSanitizeMetadata(SI);
+ }
- if (ClExperimentalTracing) {
+ if (Options.TraceBB) {
// Experimental support for tracing.
// Insert a callback with the same guard variable as used for coverage.
IRB.SetInsertPoint(IP);
@@ -309,6 +438,7 @@ char SanitizerCoverageModule::ID = 0;
INITIALIZE_PASS(SanitizerCoverageModule, "sancov",
"SanitizerCoverage: TODO."
"ModulePass", false, false)
-ModulePass *llvm::createSanitizerCoverageModulePass(int CoverageLevel) {
- return new SanitizerCoverageModule(CoverageLevel);
+ModulePass *llvm::createSanitizerCoverageModulePass(
+ const SanitizerCoverageOptions &Options) {
+ return new SanitizerCoverageModule(Options);
}
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index 1b86ae5..1a46bbb 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -25,6 +25,8 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
@@ -68,12 +70,16 @@ STATISTIC(NumInstrumentedVtableReads, "Number of vtable ptr reads");
STATISTIC(NumOmittedReadsFromConstantGlobals,
"Number of reads from constant globals");
STATISTIC(NumOmittedReadsFromVtable, "Number of vtable reads");
+STATISTIC(NumOmittedNonCaptured, "Number of accesses ignored due to capturing");
+
+static const char *const kTsanModuleCtorName = "tsan.module_ctor";
+static const char *const kTsanInitName = "__tsan_init";
namespace {
/// ThreadSanitizer: instrument the code in module to find races.
struct ThreadSanitizer : public FunctionPass {
- ThreadSanitizer() : FunctionPass(ID), DL(nullptr) {}
+ ThreadSanitizer() : FunctionPass(ID) {}
const char *getPassName() const override;
bool runOnFunction(Function &F) override;
bool doInitialization(Module &M) override;
@@ -81,15 +87,15 @@ struct ThreadSanitizer : public FunctionPass {
private:
void initializeCallbacks(Module &M);
- bool instrumentLoadOrStore(Instruction *I);
- bool instrumentAtomic(Instruction *I);
+ bool instrumentLoadOrStore(Instruction *I, const DataLayout &DL);
+ bool instrumentAtomic(Instruction *I, const DataLayout &DL);
bool instrumentMemIntrinsic(Instruction *I);
- void chooseInstructionsToInstrument(SmallVectorImpl<Instruction*> &Local,
- SmallVectorImpl<Instruction*> &All);
+ void chooseInstructionsToInstrument(SmallVectorImpl<Instruction *> &Local,
+ SmallVectorImpl<Instruction *> &All,
+ const DataLayout &DL);
bool addrPointsToConstantData(Value *Addr);
- int getMemoryAccessFuncIndex(Value *Addr);
+ int getMemoryAccessFuncIndex(Value *Addr, const DataLayout &DL);
- const DataLayout *DL;
Type *IntptrTy;
IntegerType *OrdTy;
// Callbacks to run-time library are computed in doInitialization.
@@ -99,6 +105,8 @@ struct ThreadSanitizer : public FunctionPass {
static const size_t kNumberOfAccessSizes = 5;
Function *TsanRead[kNumberOfAccessSizes];
Function *TsanWrite[kNumberOfAccessSizes];
+ Function *TsanUnalignedRead[kNumberOfAccessSizes];
+ Function *TsanUnalignedWrite[kNumberOfAccessSizes];
Function *TsanAtomicLoad[kNumberOfAccessSizes];
Function *TsanAtomicStore[kNumberOfAccessSizes];
Function *TsanAtomicRMW[AtomicRMWInst::LAST_BINOP + 1][kNumberOfAccessSizes];
@@ -108,6 +116,7 @@ struct ThreadSanitizer : public FunctionPass {
Function *TsanVptrUpdate;
Function *TsanVptrLoad;
Function *MemmoveFn, *MemcpyFn, *MemsetFn;
+ Function *TsanCtorFunction;
};
} // namespace
@@ -124,44 +133,48 @@ FunctionPass *llvm::createThreadSanitizerPass() {
return new ThreadSanitizer();
}
-static Function *checkInterfaceFunction(Constant *FuncOrBitcast) {
- if (Function *F = dyn_cast<Function>(FuncOrBitcast))
- return F;
- FuncOrBitcast->dump();
- report_fatal_error("ThreadSanitizer interface function redefined");
-}
-
void ThreadSanitizer::initializeCallbacks(Module &M) {
IRBuilder<> IRB(M.getContext());
// Initialize the callbacks.
- TsanFuncEntry = checkInterfaceFunction(M.getOrInsertFunction(
+ TsanFuncEntry = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
"__tsan_func_entry", IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
- TsanFuncExit = checkInterfaceFunction(M.getOrInsertFunction(
- "__tsan_func_exit", IRB.getVoidTy(), nullptr));
+ TsanFuncExit = checkSanitizerInterfaceFunction(
+ M.getOrInsertFunction("__tsan_func_exit", IRB.getVoidTy(), nullptr));
OrdTy = IRB.getInt32Ty();
for (size_t i = 0; i < kNumberOfAccessSizes; ++i) {
const size_t ByteSize = 1 << i;
const size_t BitSize = ByteSize * 8;
SmallString<32> ReadName("__tsan_read" + itostr(ByteSize));
- TsanRead[i] = checkInterfaceFunction(M.getOrInsertFunction(
+ TsanRead[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
ReadName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
SmallString<32> WriteName("__tsan_write" + itostr(ByteSize));
- TsanWrite[i] = checkInterfaceFunction(M.getOrInsertFunction(
+ TsanWrite[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
WriteName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
+ SmallString<64> UnalignedReadName("__tsan_unaligned_read" +
+ itostr(ByteSize));
+ TsanUnalignedRead[i] =
+ checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+ UnalignedReadName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
+
+ SmallString<64> UnalignedWriteName("__tsan_unaligned_write" +
+ itostr(ByteSize));
+ TsanUnalignedWrite[i] =
+ checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+ UnalignedWriteName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
+
Type *Ty = Type::getIntNTy(M.getContext(), BitSize);
Type *PtrTy = Ty->getPointerTo();
SmallString<32> AtomicLoadName("__tsan_atomic" + itostr(BitSize) +
"_load");
- TsanAtomicLoad[i] = checkInterfaceFunction(M.getOrInsertFunction(
- AtomicLoadName, Ty, PtrTy, OrdTy, nullptr));
+ TsanAtomicLoad[i] = checkSanitizerInterfaceFunction(
+ M.getOrInsertFunction(AtomicLoadName, Ty, PtrTy, OrdTy, nullptr));
SmallString<32> AtomicStoreName("__tsan_atomic" + itostr(BitSize) +
"_store");
- TsanAtomicStore[i] = checkInterfaceFunction(M.getOrInsertFunction(
- AtomicStoreName, IRB.getVoidTy(), PtrTy, Ty, OrdTy,
- nullptr));
+ TsanAtomicStore[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+ AtomicStoreName, IRB.getVoidTy(), PtrTy, Ty, OrdTy, nullptr));
for (int op = AtomicRMWInst::FIRST_BINOP;
op <= AtomicRMWInst::LAST_BINOP; ++op) {
@@ -184,48 +197,44 @@ void ThreadSanitizer::initializeCallbacks(Module &M) {
else
continue;
SmallString<32> RMWName("__tsan_atomic" + itostr(BitSize) + NamePart);
- TsanAtomicRMW[op][i] = checkInterfaceFunction(M.getOrInsertFunction(
- RMWName, Ty, PtrTy, Ty, OrdTy, nullptr));
+ TsanAtomicRMW[op][i] = checkSanitizerInterfaceFunction(
+ M.getOrInsertFunction(RMWName, Ty, PtrTy, Ty, OrdTy, nullptr));
}
SmallString<32> AtomicCASName("__tsan_atomic" + itostr(BitSize) +
"_compare_exchange_val");
- TsanAtomicCAS[i] = checkInterfaceFunction(M.getOrInsertFunction(
+ TsanAtomicCAS[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
AtomicCASName, Ty, PtrTy, Ty, Ty, OrdTy, OrdTy, nullptr));
}
- TsanVptrUpdate = checkInterfaceFunction(M.getOrInsertFunction(
- "__tsan_vptr_update", IRB.getVoidTy(), IRB.getInt8PtrTy(),
- IRB.getInt8PtrTy(), nullptr));
- TsanVptrLoad = checkInterfaceFunction(M.getOrInsertFunction(
+ TsanVptrUpdate = checkSanitizerInterfaceFunction(
+ M.getOrInsertFunction("__tsan_vptr_update", IRB.getVoidTy(),
+ IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), nullptr));
+ TsanVptrLoad = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
"__tsan_vptr_read", IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
- TsanAtomicThreadFence = checkInterfaceFunction(M.getOrInsertFunction(
+ TsanAtomicThreadFence = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
"__tsan_atomic_thread_fence", IRB.getVoidTy(), OrdTy, nullptr));
- TsanAtomicSignalFence = checkInterfaceFunction(M.getOrInsertFunction(
+ TsanAtomicSignalFence = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
"__tsan_atomic_signal_fence", IRB.getVoidTy(), OrdTy, nullptr));
- MemmoveFn = checkInterfaceFunction(M.getOrInsertFunction(
- "memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
- IRB.getInt8PtrTy(), IntptrTy, nullptr));
- MemcpyFn = checkInterfaceFunction(M.getOrInsertFunction(
- "memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
- IntptrTy, nullptr));
- MemsetFn = checkInterfaceFunction(M.getOrInsertFunction(
- "memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt32Ty(),
- IntptrTy, nullptr));
+ MemmoveFn = checkSanitizerInterfaceFunction(
+ M.getOrInsertFunction("memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+ IRB.getInt8PtrTy(), IntptrTy, nullptr));
+ MemcpyFn = checkSanitizerInterfaceFunction(
+ M.getOrInsertFunction("memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+ IRB.getInt8PtrTy(), IntptrTy, nullptr));
+ MemsetFn = checkSanitizerInterfaceFunction(
+ M.getOrInsertFunction("memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+ IRB.getInt32Ty(), IntptrTy, nullptr));
}
bool ThreadSanitizer::doInitialization(Module &M) {
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- if (!DLP)
- report_fatal_error("data layout missing");
- DL = &DLP->getDataLayout();
+ const DataLayout &DL = M.getDataLayout();
+ IntptrTy = DL.getIntPtrType(M.getContext());
+ std::tie(TsanCtorFunction, std::ignore) = createSanitizerCtorAndInitFunctions(
+ M, kTsanModuleCtorName, kTsanInitName, /*InitArgTypes=*/{},
+ /*InitArgs=*/{});
- // Always insert a call to __tsan_init into the module's CTORs.
- IRBuilder<> IRB(M.getContext());
- IntptrTy = IRB.getIntPtrTy(DL);
- Value *TsanInit = M.getOrInsertFunction("__tsan_init",
- IRB.getVoidTy(), nullptr);
- appendToGlobalCtors(M, cast<Function>(TsanInit), 0);
+ appendToGlobalCtors(M, TsanCtorFunction, 0);
return true;
}
@@ -260,6 +269,7 @@ bool ThreadSanitizer::addrPointsToConstantData(Value *Addr) {
// Instrumenting some of the accesses may be proven redundant.
// Currently handled:
// - read-before-write (within same BB, no calls between)
+// - not captured variables
//
// We do not handle some of the patterns that should not survive
// after the classic compiler optimizations.
@@ -269,8 +279,8 @@ bool ThreadSanitizer::addrPointsToConstantData(Value *Addr) {
// 'Local' is a vector of insns within the same BB (no calls between).
// 'All' is a vector of insns that will be instrumented.
void ThreadSanitizer::chooseInstructionsToInstrument(
- SmallVectorImpl<Instruction*> &Local,
- SmallVectorImpl<Instruction*> &All) {
+ SmallVectorImpl<Instruction *> &Local, SmallVectorImpl<Instruction *> &All,
+ const DataLayout &DL) {
SmallSet<Value*, 8> WriteTargets;
// Iterate from the end.
for (SmallVectorImpl<Instruction*>::reverse_iterator It = Local.rbegin(),
@@ -291,6 +301,17 @@ void ThreadSanitizer::chooseInstructionsToInstrument(
continue;
}
}
+ Value *Addr = isa<StoreInst>(*I)
+ ? cast<StoreInst>(I)->getPointerOperand()
+ : cast<LoadInst>(I)->getPointerOperand();
+ if (isa<AllocaInst>(GetUnderlyingObject(Addr, DL)) &&
+ !PointerMayBeCaptured(Addr, true, true)) {
+ // The variable is addressable but not captured, so it cannot be
+ // referenced from a different thread and participate in a data race
+ // (see llvm/Analysis/CaptureTracking.h for details).
+ NumOmittedNonCaptured++;
+ continue;
+ }
All.push_back(I);
}
Local.clear();
@@ -311,7 +332,10 @@ static bool isAtomic(Instruction *I) {
}
bool ThreadSanitizer::runOnFunction(Function &F) {
- if (!DL) return false;
+ // This is required to prevent instrumenting call to __tsan_init from within
+ // the module constructor.
+ if (&F == TsanCtorFunction)
+ return false;
initializeCallbacks(*F.getParent());
SmallVector<Instruction*, 8> RetVec;
SmallVector<Instruction*, 8> AllLoadsAndStores;
@@ -321,6 +345,7 @@ bool ThreadSanitizer::runOnFunction(Function &F) {
bool Res = false;
bool HasCalls = false;
bool SanitizeFunction = F.hasFnAttribute(Attribute::SanitizeThread);
+ const DataLayout &DL = F.getParent()->getDataLayout();
// Traverse all instructions, collect loads/stores/returns, check for calls.
for (auto &BB : F) {
@@ -335,10 +360,11 @@ bool ThreadSanitizer::runOnFunction(Function &F) {
if (isa<MemIntrinsic>(Inst))
MemIntrinCalls.push_back(&Inst);
HasCalls = true;
- chooseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores);
+ chooseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores,
+ DL);
}
}
- chooseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores);
+ chooseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores, DL);
}
// We have collected all loads and stores.
@@ -348,14 +374,14 @@ bool ThreadSanitizer::runOnFunction(Function &F) {
// Instrument memory accesses only if we want to report bugs in the function.
if (ClInstrumentMemoryAccesses && SanitizeFunction)
for (auto Inst : AllLoadsAndStores) {
- Res |= instrumentLoadOrStore(Inst);
+ Res |= instrumentLoadOrStore(Inst, DL);
}
// Instrument atomic memory accesses in any case (they can be used to
// implement synchronization).
if (ClInstrumentAtomics)
for (auto Inst : AtomicAccesses) {
- Res |= instrumentAtomic(Inst);
+ Res |= instrumentAtomic(Inst, DL);
}
if (ClInstrumentMemIntrinsics && SanitizeFunction)
@@ -372,20 +398,21 @@ bool ThreadSanitizer::runOnFunction(Function &F) {
IRB.CreateCall(TsanFuncEntry, ReturnAddress);
for (auto RetInst : RetVec) {
IRBuilder<> IRBRet(RetInst);
- IRBRet.CreateCall(TsanFuncExit);
+ IRBRet.CreateCall(TsanFuncExit, {});
}
Res = true;
}
return Res;
}
-bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I) {
+bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I,
+ const DataLayout &DL) {
IRBuilder<> IRB(I);
bool IsWrite = isa<StoreInst>(*I);
Value *Addr = IsWrite
? cast<StoreInst>(I)->getPointerOperand()
: cast<LoadInst>(I)->getPointerOperand();
- int Idx = getMemoryAccessFuncIndex(Addr);
+ int Idx = getMemoryAccessFuncIndex(Addr, DL);
if (Idx < 0)
return false;
if (IsWrite && isVtableAccess(I)) {
@@ -400,9 +427,9 @@ bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I) {
if (StoredValue->getType()->isIntegerTy())
StoredValue = IRB.CreateIntToPtr(StoredValue, IRB.getInt8PtrTy());
// Call TsanVptrUpdate.
- IRB.CreateCall2(TsanVptrUpdate,
- IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()),
- IRB.CreatePointerCast(StoredValue, IRB.getInt8PtrTy()));
+ IRB.CreateCall(TsanVptrUpdate,
+ {IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()),
+ IRB.CreatePointerCast(StoredValue, IRB.getInt8PtrTy())});
NumInstrumentedVtableWrites++;
return true;
}
@@ -412,7 +439,16 @@ bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I) {
NumInstrumentedVtableReads++;
return true;
}
- Value *OnAccessFunc = IsWrite ? TsanWrite[Idx] : TsanRead[Idx];
+ const unsigned Alignment = IsWrite
+ ? cast<StoreInst>(I)->getAlignment()
+ : cast<LoadInst>(I)->getAlignment();
+ Type *OrigTy = cast<PointerType>(Addr->getType())->getElementType();
+ const uint32_t TypeSize = DL.getTypeStoreSizeInBits(OrigTy);
+ Value *OnAccessFunc = nullptr;
+ if (Alignment == 0 || Alignment >= 8 || (Alignment % (TypeSize / 8)) == 0)
+ OnAccessFunc = IsWrite ? TsanWrite[Idx] : TsanRead[Idx];
+ else
+ OnAccessFunc = IsWrite ? TsanUnalignedWrite[Idx] : TsanUnalignedRead[Idx];
IRB.CreateCall(OnAccessFunc, IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()));
if (IsWrite) NumInstrumentedWrites++;
else NumInstrumentedReads++;
@@ -445,16 +481,18 @@ static ConstantInt *createOrdering(IRBuilder<> *IRB, AtomicOrdering ord) {
bool ThreadSanitizer::instrumentMemIntrinsic(Instruction *I) {
IRBuilder<> IRB(I);
if (MemSetInst *M = dyn_cast<MemSetInst>(I)) {
- IRB.CreateCall3(MemsetFn,
- IRB.CreatePointerCast(M->getArgOperand(0), IRB.getInt8PtrTy()),
- IRB.CreateIntCast(M->getArgOperand(1), IRB.getInt32Ty(), false),
- IRB.CreateIntCast(M->getArgOperand(2), IntptrTy, false));
+ IRB.CreateCall(
+ MemsetFn,
+ {IRB.CreatePointerCast(M->getArgOperand(0), IRB.getInt8PtrTy()),
+ IRB.CreateIntCast(M->getArgOperand(1), IRB.getInt32Ty(), false),
+ IRB.CreateIntCast(M->getArgOperand(2), IntptrTy, false)});
I->eraseFromParent();
} else if (MemTransferInst *M = dyn_cast<MemTransferInst>(I)) {
- IRB.CreateCall3(isa<MemCpyInst>(M) ? MemcpyFn : MemmoveFn,
- IRB.CreatePointerCast(M->getArgOperand(0), IRB.getInt8PtrTy()),
- IRB.CreatePointerCast(M->getArgOperand(1), IRB.getInt8PtrTy()),
- IRB.CreateIntCast(M->getArgOperand(2), IntptrTy, false));
+ IRB.CreateCall(
+ isa<MemCpyInst>(M) ? MemcpyFn : MemmoveFn,
+ {IRB.CreatePointerCast(M->getArgOperand(0), IRB.getInt8PtrTy()),
+ IRB.CreatePointerCast(M->getArgOperand(1), IRB.getInt8PtrTy()),
+ IRB.CreateIntCast(M->getArgOperand(2), IntptrTy, false)});
I->eraseFromParent();
}
return false;
@@ -468,11 +506,11 @@ bool ThreadSanitizer::instrumentMemIntrinsic(Instruction *I) {
// The following page contains more background information:
// http://www.hpl.hp.com/personal/Hans_Boehm/c++mm/
-bool ThreadSanitizer::instrumentAtomic(Instruction *I) {
+bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) {
IRBuilder<> IRB(I);
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
Value *Addr = LI->getPointerOperand();
- int Idx = getMemoryAccessFuncIndex(Addr);
+ int Idx = getMemoryAccessFuncIndex(Addr, DL);
if (Idx < 0)
return false;
const size_t ByteSize = 1 << Idx;
@@ -486,7 +524,7 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I) {
} else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
Value *Addr = SI->getPointerOperand();
- int Idx = getMemoryAccessFuncIndex(Addr);
+ int Idx = getMemoryAccessFuncIndex(Addr, DL);
if (Idx < 0)
return false;
const size_t ByteSize = 1 << Idx;
@@ -500,7 +538,7 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I) {
ReplaceInstWithInst(I, C);
} else if (AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I)) {
Value *Addr = RMWI->getPointerOperand();
- int Idx = getMemoryAccessFuncIndex(Addr);
+ int Idx = getMemoryAccessFuncIndex(Addr, DL);
if (Idx < 0)
return false;
Function *F = TsanAtomicRMW[RMWI->getOperation()][Idx];
@@ -517,7 +555,7 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I) {
ReplaceInstWithInst(I, C);
} else if (AtomicCmpXchgInst *CASI = dyn_cast<AtomicCmpXchgInst>(I)) {
Value *Addr = CASI->getPointerOperand();
- int Idx = getMemoryAccessFuncIndex(Addr);
+ int Idx = getMemoryAccessFuncIndex(Addr, DL);
if (Idx < 0)
return false;
const size_t ByteSize = 1 << Idx;
@@ -547,11 +585,12 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I) {
return true;
}
-int ThreadSanitizer::getMemoryAccessFuncIndex(Value *Addr) {
+int ThreadSanitizer::getMemoryAccessFuncIndex(Value *Addr,
+ const DataLayout &DL) {
Type *OrigPtrTy = Addr->getType();
Type *OrigTy = cast<PointerType>(OrigPtrTy)->getElementType();
assert(OrigTy->isSized());
- uint32_t TypeSize = DL->getTypeStoreSizeInBits(OrigTy);
+ uint32_t TypeSize = DL.getTypeStoreSizeInBits(OrigTy);
if (TypeSize != 8 && TypeSize != 16 &&
TypeSize != 32 && TypeSize != 64 && TypeSize != 128) {
NumAccessesWithBadSize++;
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ARCInstKind.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ARCInstKind.cpp
new file mode 100644
index 0000000..afb873a
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ARCInstKind.cpp
@@ -0,0 +1,673 @@
+//===- ARCInstKind.cpp - ObjC ARC Optimization ----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file defines several utility functions used by various ARC
+/// optimizations which are IMHO too big to be in a header file.
+///
+/// WARNING: This file knows about certain library functions. It recognizes them
+/// by name, and hardwires knowledge of their semantics.
+///
+/// WARNING: This file knows about how certain Objective-C library functions are
+/// used. Naive LLVM IR transformations which would otherwise be
+/// behavior-preserving may break these assumptions.
+///
+//===----------------------------------------------------------------------===//
+
+#include "ObjCARC.h"
+#include "llvm/IR/Intrinsics.h"
+
+using namespace llvm;
+using namespace llvm::objcarc;
+
+raw_ostream &llvm::objcarc::operator<<(raw_ostream &OS,
+ const ARCInstKind Class) {
+ switch (Class) {
+ case ARCInstKind::Retain:
+ return OS << "ARCInstKind::Retain";
+ case ARCInstKind::RetainRV:
+ return OS << "ARCInstKind::RetainRV";
+ case ARCInstKind::RetainBlock:
+ return OS << "ARCInstKind::RetainBlock";
+ case ARCInstKind::Release:
+ return OS << "ARCInstKind::Release";
+ case ARCInstKind::Autorelease:
+ return OS << "ARCInstKind::Autorelease";
+ case ARCInstKind::AutoreleaseRV:
+ return OS << "ARCInstKind::AutoreleaseRV";
+ case ARCInstKind::AutoreleasepoolPush:
+ return OS << "ARCInstKind::AutoreleasepoolPush";
+ case ARCInstKind::AutoreleasepoolPop:
+ return OS << "ARCInstKind::AutoreleasepoolPop";
+ case ARCInstKind::NoopCast:
+ return OS << "ARCInstKind::NoopCast";
+ case ARCInstKind::FusedRetainAutorelease:
+ return OS << "ARCInstKind::FusedRetainAutorelease";
+ case ARCInstKind::FusedRetainAutoreleaseRV:
+ return OS << "ARCInstKind::FusedRetainAutoreleaseRV";
+ case ARCInstKind::LoadWeakRetained:
+ return OS << "ARCInstKind::LoadWeakRetained";
+ case ARCInstKind::StoreWeak:
+ return OS << "ARCInstKind::StoreWeak";
+ case ARCInstKind::InitWeak:
+ return OS << "ARCInstKind::InitWeak";
+ case ARCInstKind::LoadWeak:
+ return OS << "ARCInstKind::LoadWeak";
+ case ARCInstKind::MoveWeak:
+ return OS << "ARCInstKind::MoveWeak";
+ case ARCInstKind::CopyWeak:
+ return OS << "ARCInstKind::CopyWeak";
+ case ARCInstKind::DestroyWeak:
+ return OS << "ARCInstKind::DestroyWeak";
+ case ARCInstKind::StoreStrong:
+ return OS << "ARCInstKind::StoreStrong";
+ case ARCInstKind::CallOrUser:
+ return OS << "ARCInstKind::CallOrUser";
+ case ARCInstKind::Call:
+ return OS << "ARCInstKind::Call";
+ case ARCInstKind::User:
+ return OS << "ARCInstKind::User";
+ case ARCInstKind::IntrinsicUser:
+ return OS << "ARCInstKind::IntrinsicUser";
+ case ARCInstKind::None:
+ return OS << "ARCInstKind::None";
+ }
+ llvm_unreachable("Unknown instruction class!");
+}
+
+ARCInstKind llvm::objcarc::GetFunctionClass(const Function *F) {
+ Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+
+ // No (mandatory) arguments.
+ if (AI == AE)
+ return StringSwitch<ARCInstKind>(F->getName())
+ .Case("objc_autoreleasePoolPush", ARCInstKind::AutoreleasepoolPush)
+ .Case("clang.arc.use", ARCInstKind::IntrinsicUser)
+ .Default(ARCInstKind::CallOrUser);
+
+ // One argument.
+ const Argument *A0 = AI++;
+ if (AI == AE)
+ // Argument is a pointer.
+ if (PointerType *PTy = dyn_cast<PointerType>(A0->getType())) {
+ Type *ETy = PTy->getElementType();
+ // Argument is i8*.
+ if (ETy->isIntegerTy(8))
+ return StringSwitch<ARCInstKind>(F->getName())
+ .Case("objc_retain", ARCInstKind::Retain)
+ .Case("objc_retainAutoreleasedReturnValue", ARCInstKind::RetainRV)
+ .Case("objc_retainBlock", ARCInstKind::RetainBlock)
+ .Case("objc_release", ARCInstKind::Release)
+ .Case("objc_autorelease", ARCInstKind::Autorelease)
+ .Case("objc_autoreleaseReturnValue", ARCInstKind::AutoreleaseRV)
+ .Case("objc_autoreleasePoolPop", ARCInstKind::AutoreleasepoolPop)
+ .Case("objc_retainedObject", ARCInstKind::NoopCast)
+ .Case("objc_unretainedObject", ARCInstKind::NoopCast)
+ .Case("objc_unretainedPointer", ARCInstKind::NoopCast)
+ .Case("objc_retain_autorelease",
+ ARCInstKind::FusedRetainAutorelease)
+ .Case("objc_retainAutorelease", ARCInstKind::FusedRetainAutorelease)
+ .Case("objc_retainAutoreleaseReturnValue",
+ ARCInstKind::FusedRetainAutoreleaseRV)
+ .Case("objc_sync_enter", ARCInstKind::User)
+ .Case("objc_sync_exit", ARCInstKind::User)
+ .Default(ARCInstKind::CallOrUser);
+
+ // Argument is i8**
+ if (PointerType *Pte = dyn_cast<PointerType>(ETy))
+ if (Pte->getElementType()->isIntegerTy(8))
+ return StringSwitch<ARCInstKind>(F->getName())
+ .Case("objc_loadWeakRetained", ARCInstKind::LoadWeakRetained)
+ .Case("objc_loadWeak", ARCInstKind::LoadWeak)
+ .Case("objc_destroyWeak", ARCInstKind::DestroyWeak)
+ .Default(ARCInstKind::CallOrUser);
+ }
+
+ // Two arguments, first is i8**.
+ const Argument *A1 = AI++;
+ if (AI == AE)
+ if (PointerType *PTy = dyn_cast<PointerType>(A0->getType()))
+ if (PointerType *Pte = dyn_cast<PointerType>(PTy->getElementType()))
+ if (Pte->getElementType()->isIntegerTy(8))
+ if (PointerType *PTy1 = dyn_cast<PointerType>(A1->getType())) {
+ Type *ETy1 = PTy1->getElementType();
+ // Second argument is i8*
+ if (ETy1->isIntegerTy(8))
+ return StringSwitch<ARCInstKind>(F->getName())
+ .Case("objc_storeWeak", ARCInstKind::StoreWeak)
+ .Case("objc_initWeak", ARCInstKind::InitWeak)
+ .Case("objc_storeStrong", ARCInstKind::StoreStrong)
+ .Default(ARCInstKind::CallOrUser);
+ // Second argument is i8**.
+ if (PointerType *Pte1 = dyn_cast<PointerType>(ETy1))
+ if (Pte1->getElementType()->isIntegerTy(8))
+ return StringSwitch<ARCInstKind>(F->getName())
+ .Case("objc_moveWeak", ARCInstKind::MoveWeak)
+ .Case("objc_copyWeak", ARCInstKind::CopyWeak)
+ // Ignore annotation calls. This is important to stop the
+ // optimizer from treating annotations as uses which would
+ // make the state of the pointers they are attempting to
+ // elucidate to be incorrect.
+ .Case("llvm.arc.annotation.topdown.bbstart",
+ ARCInstKind::None)
+ .Case("llvm.arc.annotation.topdown.bbend",
+ ARCInstKind::None)
+ .Case("llvm.arc.annotation.bottomup.bbstart",
+ ARCInstKind::None)
+ .Case("llvm.arc.annotation.bottomup.bbend",
+ ARCInstKind::None)
+ .Default(ARCInstKind::CallOrUser);
+ }
+
+ // Anything else.
+ return ARCInstKind::CallOrUser;
+}
+
+// A whitelist of intrinsics that we know do not use objc pointers or decrement
+// ref counts.
+static bool isInertIntrinsic(unsigned ID) {
+ // TODO: Make this into a covered switch.
+ switch (ID) {
+ case Intrinsic::returnaddress:
+ case Intrinsic::frameaddress:
+ case Intrinsic::stacksave:
+ case Intrinsic::stackrestore:
+ case Intrinsic::vastart:
+ case Intrinsic::vacopy:
+ case Intrinsic::vaend:
+ case Intrinsic::objectsize:
+ case Intrinsic::prefetch:
+ case Intrinsic::stackprotector:
+ case Intrinsic::eh_return_i32:
+ case Intrinsic::eh_return_i64:
+ case Intrinsic::eh_typeid_for:
+ case Intrinsic::eh_dwarf_cfa:
+ case Intrinsic::eh_sjlj_lsda:
+ case Intrinsic::eh_sjlj_functioncontext:
+ case Intrinsic::init_trampoline:
+ case Intrinsic::adjust_trampoline:
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ case Intrinsic::invariant_start:
+ case Intrinsic::invariant_end:
+ // Don't let dbg info affect our results.
+ case Intrinsic::dbg_declare:
+ case Intrinsic::dbg_value:
+ // Short cut: Some intrinsics obviously don't use ObjC pointers.
+ return true;
+ default:
+ return false;
+ }
+}
+
+// A whitelist of intrinsics that we know do not use objc pointers or decrement
+// ref counts.
+static bool isUseOnlyIntrinsic(unsigned ID) {
+ // We are conservative and even though intrinsics are unlikely to touch
+ // reference counts, we white list them for safety.
+ //
+ // TODO: Expand this into a covered switch. There is a lot more here.
+ switch (ID) {
+ case Intrinsic::memcpy:
+ case Intrinsic::memmove:
+ case Intrinsic::memset:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/// \brief Determine what kind of construct V is.
+ARCInstKind llvm::objcarc::GetARCInstKind(const Value *V) {
+ if (const Instruction *I = dyn_cast<Instruction>(V)) {
+ // Any instruction other than bitcast and gep with a pointer operand have a
+ // use of an objc pointer. Bitcasts, GEPs, Selects, PHIs transfer a pointer
+ // to a subsequent use, rather than using it themselves, in this sense.
+ // As a short cut, several other opcodes are known to have no pointer
+ // operands of interest. And ret is never followed by a release, so it's
+ // not interesting to examine.
+ switch (I->getOpcode()) {
+ case Instruction::Call: {
+ const CallInst *CI = cast<CallInst>(I);
+ // See if we have a function that we know something about.
+ if (const Function *F = CI->getCalledFunction()) {
+ ARCInstKind Class = GetFunctionClass(F);
+ if (Class != ARCInstKind::CallOrUser)
+ return Class;
+ Intrinsic::ID ID = F->getIntrinsicID();
+ if (isInertIntrinsic(ID))
+ return ARCInstKind::None;
+ if (isUseOnlyIntrinsic(ID))
+ return ARCInstKind::User;
+ }
+
+ // Otherwise, be conservative.
+ return GetCallSiteClass(CI);
+ }
+ case Instruction::Invoke:
+ // Otherwise, be conservative.
+ return GetCallSiteClass(cast<InvokeInst>(I));
+ case Instruction::BitCast:
+ case Instruction::GetElementPtr:
+ case Instruction::Select:
+ case Instruction::PHI:
+ case Instruction::Ret:
+ case Instruction::Br:
+ case Instruction::Switch:
+ case Instruction::IndirectBr:
+ case Instruction::Alloca:
+ case Instruction::VAArg:
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::SDiv:
+ case Instruction::UDiv:
+ case Instruction::FDiv:
+ case Instruction::SRem:
+ case Instruction::URem:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::SExt:
+ case Instruction::ZExt:
+ case Instruction::Trunc:
+ case Instruction::IntToPtr:
+ case Instruction::FCmp:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::InsertElement:
+ case Instruction::ExtractElement:
+ case Instruction::ShuffleVector:
+ case Instruction::ExtractValue:
+ break;
+ case Instruction::ICmp:
+ // Comparing a pointer with null, or any other constant, isn't an
+ // interesting use, because we don't care what the pointer points to, or
+ // about the values of any other dynamic reference-counted pointers.
+ if (IsPotentialRetainableObjPtr(I->getOperand(1)))
+ return ARCInstKind::User;
+ break;
+ default:
+ // For anything else, check all the operands.
+ // Note that this includes both operands of a Store: while the first
+ // operand isn't actually being dereferenced, it is being stored to
+ // memory where we can no longer track who might read it and dereference
+ // it, so we have to consider it potentially used.
+ for (User::const_op_iterator OI = I->op_begin(), OE = I->op_end();
+ OI != OE; ++OI)
+ if (IsPotentialRetainableObjPtr(*OI))
+ return ARCInstKind::User;
+ }
+ }
+
+ // Otherwise, it's totally inert for ARC purposes.
+ return ARCInstKind::None;
+}
+
+/// \brief Test if the given class is a kind of user.
+bool llvm::objcarc::IsUser(ARCInstKind Class) {
+ switch (Class) {
+ case ARCInstKind::User:
+ case ARCInstKind::CallOrUser:
+ case ARCInstKind::IntrinsicUser:
+ return true;
+ case ARCInstKind::Retain:
+ case ARCInstKind::RetainRV:
+ case ARCInstKind::RetainBlock:
+ case ARCInstKind::Release:
+ case ARCInstKind::Autorelease:
+ case ARCInstKind::AutoreleaseRV:
+ case ARCInstKind::AutoreleasepoolPush:
+ case ARCInstKind::AutoreleasepoolPop:
+ case ARCInstKind::NoopCast:
+ case ARCInstKind::FusedRetainAutorelease:
+ case ARCInstKind::FusedRetainAutoreleaseRV:
+ case ARCInstKind::LoadWeakRetained:
+ case ARCInstKind::StoreWeak:
+ case ARCInstKind::InitWeak:
+ case ARCInstKind::LoadWeak:
+ case ARCInstKind::MoveWeak:
+ case ARCInstKind::CopyWeak:
+ case ARCInstKind::DestroyWeak:
+ case ARCInstKind::StoreStrong:
+ case ARCInstKind::Call:
+ case ARCInstKind::None:
+ return false;
+ }
+ llvm_unreachable("covered switch isn't covered?");
+}
+
+/// \brief Test if the given class is objc_retain or equivalent.
+bool llvm::objcarc::IsRetain(ARCInstKind Class) {
+ switch (Class) {
+ case ARCInstKind::Retain:
+ case ARCInstKind::RetainRV:
+ return true;
+ // I believe we treat retain block as not a retain since it can copy its
+ // block.
+ case ARCInstKind::RetainBlock:
+ case ARCInstKind::Release:
+ case ARCInstKind::Autorelease:
+ case ARCInstKind::AutoreleaseRV:
+ case ARCInstKind::AutoreleasepoolPush:
+ case ARCInstKind::AutoreleasepoolPop:
+ case ARCInstKind::NoopCast:
+ case ARCInstKind::FusedRetainAutorelease:
+ case ARCInstKind::FusedRetainAutoreleaseRV:
+ case ARCInstKind::LoadWeakRetained:
+ case ARCInstKind::StoreWeak:
+ case ARCInstKind::InitWeak:
+ case ARCInstKind::LoadWeak:
+ case ARCInstKind::MoveWeak:
+ case ARCInstKind::CopyWeak:
+ case ARCInstKind::DestroyWeak:
+ case ARCInstKind::StoreStrong:
+ case ARCInstKind::IntrinsicUser:
+ case ARCInstKind::CallOrUser:
+ case ARCInstKind::Call:
+ case ARCInstKind::User:
+ case ARCInstKind::None:
+ return false;
+ }
+ llvm_unreachable("covered switch isn't covered?");
+}
+
+/// \brief Test if the given class is objc_autorelease or equivalent.
+bool llvm::objcarc::IsAutorelease(ARCInstKind Class) {
+ switch (Class) {
+ case ARCInstKind::Autorelease:
+ case ARCInstKind::AutoreleaseRV:
+ return true;
+ case ARCInstKind::Retain:
+ case ARCInstKind::RetainRV:
+ case ARCInstKind::RetainBlock:
+ case ARCInstKind::Release:
+ case ARCInstKind::AutoreleasepoolPush:
+ case ARCInstKind::AutoreleasepoolPop:
+ case ARCInstKind::NoopCast:
+ case ARCInstKind::FusedRetainAutorelease:
+ case ARCInstKind::FusedRetainAutoreleaseRV:
+ case ARCInstKind::LoadWeakRetained:
+ case ARCInstKind::StoreWeak:
+ case ARCInstKind::InitWeak:
+ case ARCInstKind::LoadWeak:
+ case ARCInstKind::MoveWeak:
+ case ARCInstKind::CopyWeak:
+ case ARCInstKind::DestroyWeak:
+ case ARCInstKind::StoreStrong:
+ case ARCInstKind::IntrinsicUser:
+ case ARCInstKind::CallOrUser:
+ case ARCInstKind::Call:
+ case ARCInstKind::User:
+ case ARCInstKind::None:
+ return false;
+ }
+ llvm_unreachable("covered switch isn't covered?");
+}
+
+/// \brief Test if the given class represents instructions which return their
+/// argument verbatim.
+bool llvm::objcarc::IsForwarding(ARCInstKind Class) {
+ switch (Class) {
+ case ARCInstKind::Retain:
+ case ARCInstKind::RetainRV:
+ case ARCInstKind::Autorelease:
+ case ARCInstKind::AutoreleaseRV:
+ case ARCInstKind::NoopCast:
+ return true;
+ case ARCInstKind::RetainBlock:
+ case ARCInstKind::Release:
+ case ARCInstKind::AutoreleasepoolPush:
+ case ARCInstKind::AutoreleasepoolPop:
+ case ARCInstKind::FusedRetainAutorelease:
+ case ARCInstKind::FusedRetainAutoreleaseRV:
+ case ARCInstKind::LoadWeakRetained:
+ case ARCInstKind::StoreWeak:
+ case ARCInstKind::InitWeak:
+ case ARCInstKind::LoadWeak:
+ case ARCInstKind::MoveWeak:
+ case ARCInstKind::CopyWeak:
+ case ARCInstKind::DestroyWeak:
+ case ARCInstKind::StoreStrong:
+ case ARCInstKind::IntrinsicUser:
+ case ARCInstKind::CallOrUser:
+ case ARCInstKind::Call:
+ case ARCInstKind::User:
+ case ARCInstKind::None:
+ return false;
+ }
+ llvm_unreachable("covered switch isn't covered?");
+}
+
+/// \brief Test if the given class represents instructions which do nothing if
+/// passed a null pointer.
+bool llvm::objcarc::IsNoopOnNull(ARCInstKind Class) {
+ switch (Class) {
+ case ARCInstKind::Retain:
+ case ARCInstKind::RetainRV:
+ case ARCInstKind::Release:
+ case ARCInstKind::Autorelease:
+ case ARCInstKind::AutoreleaseRV:
+ case ARCInstKind::RetainBlock:
+ return true;
+ case ARCInstKind::AutoreleasepoolPush:
+ case ARCInstKind::AutoreleasepoolPop:
+ case ARCInstKind::FusedRetainAutorelease:
+ case ARCInstKind::FusedRetainAutoreleaseRV:
+ case ARCInstKind::LoadWeakRetained:
+ case ARCInstKind::StoreWeak:
+ case ARCInstKind::InitWeak:
+ case ARCInstKind::LoadWeak:
+ case ARCInstKind::MoveWeak:
+ case ARCInstKind::CopyWeak:
+ case ARCInstKind::DestroyWeak:
+ case ARCInstKind::StoreStrong:
+ case ARCInstKind::IntrinsicUser:
+ case ARCInstKind::CallOrUser:
+ case ARCInstKind::Call:
+ case ARCInstKind::User:
+ case ARCInstKind::None:
+ case ARCInstKind::NoopCast:
+ return false;
+ }
+ llvm_unreachable("covered switch isn't covered?");
+}
+
+/// \brief Test if the given class represents instructions which are always safe
+/// to mark with the "tail" keyword.
+bool llvm::objcarc::IsAlwaysTail(ARCInstKind Class) {
+ // ARCInstKind::RetainBlock may be given a stack argument.
+ switch (Class) {
+ case ARCInstKind::Retain:
+ case ARCInstKind::RetainRV:
+ case ARCInstKind::AutoreleaseRV:
+ return true;
+ case ARCInstKind::Release:
+ case ARCInstKind::Autorelease:
+ case ARCInstKind::RetainBlock:
+ case ARCInstKind::AutoreleasepoolPush:
+ case ARCInstKind::AutoreleasepoolPop:
+ case ARCInstKind::FusedRetainAutorelease:
+ case ARCInstKind::FusedRetainAutoreleaseRV:
+ case ARCInstKind::LoadWeakRetained:
+ case ARCInstKind::StoreWeak:
+ case ARCInstKind::InitWeak:
+ case ARCInstKind::LoadWeak:
+ case ARCInstKind::MoveWeak:
+ case ARCInstKind::CopyWeak:
+ case ARCInstKind::DestroyWeak:
+ case ARCInstKind::StoreStrong:
+ case ARCInstKind::IntrinsicUser:
+ case ARCInstKind::CallOrUser:
+ case ARCInstKind::Call:
+ case ARCInstKind::User:
+ case ARCInstKind::None:
+ case ARCInstKind::NoopCast:
+ return false;
+ }
+ llvm_unreachable("covered switch isn't covered?");
+}
+
+/// \brief Test if the given class represents instructions which are never safe
+/// to mark with the "tail" keyword.
+bool llvm::objcarc::IsNeverTail(ARCInstKind Class) {
+ /// It is never safe to tail call objc_autorelease since by tail calling
+ /// objc_autorelease: fast autoreleasing causing our object to be potentially
+ /// reclaimed from the autorelease pool which violates the semantics of
+ /// __autoreleasing types in ARC.
+ switch (Class) {
+ case ARCInstKind::Autorelease:
+ return true;
+ case ARCInstKind::Retain:
+ case ARCInstKind::RetainRV:
+ case ARCInstKind::AutoreleaseRV:
+ case ARCInstKind::Release:
+ case ARCInstKind::RetainBlock:
+ case ARCInstKind::AutoreleasepoolPush:
+ case ARCInstKind::AutoreleasepoolPop:
+ case ARCInstKind::FusedRetainAutorelease:
+ case ARCInstKind::FusedRetainAutoreleaseRV:
+ case ARCInstKind::LoadWeakRetained:
+ case ARCInstKind::StoreWeak:
+ case ARCInstKind::InitWeak:
+ case ARCInstKind::LoadWeak:
+ case ARCInstKind::MoveWeak:
+ case ARCInstKind::CopyWeak:
+ case ARCInstKind::DestroyWeak:
+ case ARCInstKind::StoreStrong:
+ case ARCInstKind::IntrinsicUser:
+ case ARCInstKind::CallOrUser:
+ case ARCInstKind::Call:
+ case ARCInstKind::User:
+ case ARCInstKind::None:
+ case ARCInstKind::NoopCast:
+ return false;
+ }
+ llvm_unreachable("covered switch isn't covered?");
+}
+
+/// \brief Test if the given class represents instructions which are always safe
+/// to mark with the nounwind attribute.
+bool llvm::objcarc::IsNoThrow(ARCInstKind Class) {
+ // objc_retainBlock is not nounwind because it calls user copy constructors
+ // which could theoretically throw.
+ switch (Class) {
+ case ARCInstKind::Retain:
+ case ARCInstKind::RetainRV:
+ case ARCInstKind::Release:
+ case ARCInstKind::Autorelease:
+ case ARCInstKind::AutoreleaseRV:
+ case ARCInstKind::AutoreleasepoolPush:
+ case ARCInstKind::AutoreleasepoolPop:
+ return true;
+ case ARCInstKind::RetainBlock:
+ case ARCInstKind::FusedRetainAutorelease:
+ case ARCInstKind::FusedRetainAutoreleaseRV:
+ case ARCInstKind::LoadWeakRetained:
+ case ARCInstKind::StoreWeak:
+ case ARCInstKind::InitWeak:
+ case ARCInstKind::LoadWeak:
+ case ARCInstKind::MoveWeak:
+ case ARCInstKind::CopyWeak:
+ case ARCInstKind::DestroyWeak:
+ case ARCInstKind::StoreStrong:
+ case ARCInstKind::IntrinsicUser:
+ case ARCInstKind::CallOrUser:
+ case ARCInstKind::Call:
+ case ARCInstKind::User:
+ case ARCInstKind::None:
+ case ARCInstKind::NoopCast:
+ return false;
+ }
+ llvm_unreachable("covered switch isn't covered?");
+}
+
+/// Test whether the given instruction can autorelease any pointer or cause an
+/// autoreleasepool pop.
+///
+/// This means that it *could* interrupt the RV optimization.
+bool llvm::objcarc::CanInterruptRV(ARCInstKind Class) {
+ switch (Class) {
+ case ARCInstKind::AutoreleasepoolPop:
+ case ARCInstKind::CallOrUser:
+ case ARCInstKind::Call:
+ case ARCInstKind::Autorelease:
+ case ARCInstKind::AutoreleaseRV:
+ case ARCInstKind::FusedRetainAutorelease:
+ case ARCInstKind::FusedRetainAutoreleaseRV:
+ return true;
+ case ARCInstKind::Retain:
+ case ARCInstKind::RetainRV:
+ case ARCInstKind::Release:
+ case ARCInstKind::AutoreleasepoolPush:
+ case ARCInstKind::RetainBlock:
+ case ARCInstKind::LoadWeakRetained:
+ case ARCInstKind::StoreWeak:
+ case ARCInstKind::InitWeak:
+ case ARCInstKind::LoadWeak:
+ case ARCInstKind::MoveWeak:
+ case ARCInstKind::CopyWeak:
+ case ARCInstKind::DestroyWeak:
+ case ARCInstKind::StoreStrong:
+ case ARCInstKind::IntrinsicUser:
+ case ARCInstKind::User:
+ case ARCInstKind::None:
+ case ARCInstKind::NoopCast:
+ return false;
+ }
+ llvm_unreachable("covered switch isn't covered?");
+}
+
+bool llvm::objcarc::CanDecrementRefCount(ARCInstKind Kind) {
+ switch (Kind) {
+ case ARCInstKind::Retain:
+ case ARCInstKind::RetainRV:
+ case ARCInstKind::Autorelease:
+ case ARCInstKind::AutoreleaseRV:
+ case ARCInstKind::NoopCast:
+ case ARCInstKind::FusedRetainAutorelease:
+ case ARCInstKind::FusedRetainAutoreleaseRV:
+ case ARCInstKind::IntrinsicUser:
+ case ARCInstKind::User:
+ case ARCInstKind::None:
+ return false;
+
+ // The cases below are conservative.
+
+ // RetainBlock can result in user defined copy constructors being called
+ // implying releases may occur.
+ case ARCInstKind::RetainBlock:
+ case ARCInstKind::Release:
+ case ARCInstKind::AutoreleasepoolPush:
+ case ARCInstKind::AutoreleasepoolPop:
+ case ARCInstKind::LoadWeakRetained:
+ case ARCInstKind::StoreWeak:
+ case ARCInstKind::InitWeak:
+ case ARCInstKind::LoadWeak:
+ case ARCInstKind::MoveWeak:
+ case ARCInstKind::CopyWeak:
+ case ARCInstKind::DestroyWeak:
+ case ARCInstKind::StoreStrong:
+ case ARCInstKind::CallOrUser:
+ case ARCInstKind::Call:
+ return true;
+ }
+
+ llvm_unreachable("covered switch isn't covered?");
+}
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ARCInstKind.h b/contrib/llvm/lib/Transforms/ObjCARC/ARCInstKind.h
new file mode 100644
index 0000000..636c65c
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ARCInstKind.h
@@ -0,0 +1,123 @@
+//===--- ARCInstKind.h - ARC instruction equivalence classes -*- C++ -*----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TRANSFORMS_OBJCARC_ARCINSTKIND_H
+#define LLVM_LIB_TRANSFORMS_OBJCARC_ARCINSTKIND_H
+
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Function.h"
+
+namespace llvm {
+namespace objcarc {
+
+/// \enum ARCInstKind
+///
+/// \brief Equivalence classes of instructions in the ARC Model.
+///
+/// Since we do not have "instructions" to represent ARC concepts in LLVM IR,
+/// we instead operate on equivalence classes of instructions.
+///
+/// TODO: This should be split into two enums: a runtime entry point enum
+/// (possibly united with the ARCRuntimeEntrypoint class) and an enum that deals
+/// with effects of instructions in the ARC model (which would handle the notion
+/// of a User or CallOrUser).
+enum class ARCInstKind {
+ Retain, ///< objc_retain
+ RetainRV, ///< objc_retainAutoreleasedReturnValue
+ RetainBlock, ///< objc_retainBlock
+ Release, ///< objc_release
+ Autorelease, ///< objc_autorelease
+ AutoreleaseRV, ///< objc_autoreleaseReturnValue
+ AutoreleasepoolPush, ///< objc_autoreleasePoolPush
+ AutoreleasepoolPop, ///< objc_autoreleasePoolPop
+ NoopCast, ///< objc_retainedObject, etc.
+ FusedRetainAutorelease, ///< objc_retainAutorelease
+ FusedRetainAutoreleaseRV, ///< objc_retainAutoreleaseReturnValue
+ LoadWeakRetained, ///< objc_loadWeakRetained (primitive)
+ StoreWeak, ///< objc_storeWeak (primitive)
+ InitWeak, ///< objc_initWeak (derived)
+ LoadWeak, ///< objc_loadWeak (derived)
+ MoveWeak, ///< objc_moveWeak (derived)
+ CopyWeak, ///< objc_copyWeak (derived)
+ DestroyWeak, ///< objc_destroyWeak (derived)
+ StoreStrong, ///< objc_storeStrong (derived)
+ IntrinsicUser, ///< clang.arc.use
+ CallOrUser, ///< could call objc_release and/or "use" pointers
+ Call, ///< could call objc_release
+ User, ///< could "use" a pointer
+ None ///< anything that is inert from an ARC perspective.
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const ARCInstKind Class);
+
+/// \brief Test if the given class is a kind of user.
+bool IsUser(ARCInstKind Class);
+
+/// \brief Test if the given class is objc_retain or equivalent.
+bool IsRetain(ARCInstKind Class);
+
+/// \brief Test if the given class is objc_autorelease or equivalent.
+bool IsAutorelease(ARCInstKind Class);
+
+/// \brief Test if the given class represents instructions which return their
+/// argument verbatim.
+bool IsForwarding(ARCInstKind Class);
+
+/// \brief Test if the given class represents instructions which do nothing if
+/// passed a null pointer.
+bool IsNoopOnNull(ARCInstKind Class);
+
+/// \brief Test if the given class represents instructions which are always safe
+/// to mark with the "tail" keyword.
+bool IsAlwaysTail(ARCInstKind Class);
+
+/// \brief Test if the given class represents instructions which are never safe
+/// to mark with the "tail" keyword.
+bool IsNeverTail(ARCInstKind Class);
+
+/// \brief Test if the given class represents instructions which are always safe
+/// to mark with the nounwind attribute.
+bool IsNoThrow(ARCInstKind Class);
+
+/// Test whether the given instruction can autorelease any pointer or cause an
+/// autoreleasepool pop.
+bool CanInterruptRV(ARCInstKind Class);
+
+/// \brief Determine if F is one of the special known Functions. If it isn't,
+/// return ARCInstKind::CallOrUser.
+ARCInstKind GetFunctionClass(const Function *F);
+
+/// \brief Determine which objc runtime call instruction class V belongs to.
+///
+/// This is similar to GetARCInstKind except that it only detects objc
+/// runtime calls. This allows it to be faster.
+///
+static inline ARCInstKind GetBasicARCInstKind(const Value *V) {
+ if (const CallInst *CI = dyn_cast<CallInst>(V)) {
+ if (const Function *F = CI->getCalledFunction())
+ return GetFunctionClass(F);
+ // Otherwise, be conservative.
+ return ARCInstKind::CallOrUser;
+ }
+
+ // Otherwise, be conservative.
+ return isa<InvokeInst>(V) ? ARCInstKind::CallOrUser : ARCInstKind::User;
+}
+
+/// Map V to its ARCInstKind equivalence class.
+ARCInstKind GetARCInstKind(const Value *V);
+
+/// Returns false if conservatively we can prove that any instruction mapped to
+/// this kind can not decrement ref counts. Returns true otherwise.
+bool CanDecrementRefCount(ARCInstKind Kind);
+
+} // end namespace objcarc
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h b/contrib/llvm/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
index e286dbc..d4fef10 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
@@ -27,22 +27,22 @@
namespace llvm {
namespace objcarc {
+enum class ARCRuntimeEntryPointKind {
+ AutoreleaseRV,
+ Release,
+ Retain,
+ RetainBlock,
+ Autorelease,
+ StoreStrong,
+ RetainRV,
+ RetainAutorelease,
+ RetainAutoreleaseRV,
+};
+
/// Declarations for ObjC runtime functions and constants. These are initialized
/// lazily to avoid cluttering up the Module with unused declarations.
class ARCRuntimeEntryPoints {
public:
- enum EntryPointType {
- EPT_AutoreleaseRV,
- EPT_Release,
- EPT_Retain,
- EPT_RetainBlock,
- EPT_Autorelease,
- EPT_StoreStrong,
- EPT_RetainRV,
- EPT_RetainAutorelease,
- EPT_RetainAutoreleaseRV
- };
-
ARCRuntimeEntryPoints() : TheModule(nullptr),
AutoreleaseRV(nullptr),
Release(nullptr),
@@ -54,9 +54,7 @@ public:
RetainAutorelease(nullptr),
RetainAutoreleaseRV(nullptr) { }
- ~ARCRuntimeEntryPoints() { }
-
- void Initialize(Module *M) {
+ void init(Module *M) {
TheModule = M;
AutoreleaseRV = nullptr;
Release = nullptr;
@@ -69,30 +67,30 @@ public:
RetainAutoreleaseRV = nullptr;
}
- Constant *get(const EntryPointType entry) {
+ Constant *get(ARCRuntimeEntryPointKind kind) {
assert(TheModule != nullptr && "Not initialized.");
- switch (entry) {
- case EPT_AutoreleaseRV:
+ switch (kind) {
+ case ARCRuntimeEntryPointKind::AutoreleaseRV:
return getI8XRetI8XEntryPoint(AutoreleaseRV,
"objc_autoreleaseReturnValue", true);
- case EPT_Release:
+ case ARCRuntimeEntryPointKind::Release:
return getVoidRetI8XEntryPoint(Release, "objc_release");
- case EPT_Retain:
+ case ARCRuntimeEntryPointKind::Retain:
return getI8XRetI8XEntryPoint(Retain, "objc_retain", true);
- case EPT_RetainBlock:
+ case ARCRuntimeEntryPointKind::RetainBlock:
return getI8XRetI8XEntryPoint(RetainBlock, "objc_retainBlock", false);
- case EPT_Autorelease:
+ case ARCRuntimeEntryPointKind::Autorelease:
return getI8XRetI8XEntryPoint(Autorelease, "objc_autorelease", true);
- case EPT_StoreStrong:
+ case ARCRuntimeEntryPointKind::StoreStrong:
return getI8XRetI8XXI8XEntryPoint(StoreStrong, "objc_storeStrong");
- case EPT_RetainRV:
+ case ARCRuntimeEntryPointKind::RetainRV:
return getI8XRetI8XEntryPoint(RetainRV,
"objc_retainAutoreleasedReturnValue", true);
- case EPT_RetainAutorelease:
+ case ARCRuntimeEntryPointKind::RetainAutorelease:
return getI8XRetI8XEntryPoint(RetainAutorelease, "objc_retainAutorelease",
true);
- case EPT_RetainAutoreleaseRV:
+ case ARCRuntimeEntryPointKind::RetainAutoreleaseRV:
return getI8XRetI8XEntryPoint(RetainAutoreleaseRV,
"objc_retainAutoreleaseReturnValue", true);
}
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/BlotMapVector.h b/contrib/llvm/lib/Transforms/ObjCARC/BlotMapVector.h
new file mode 100644
index 0000000..d6439b6
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/ObjCARC/BlotMapVector.h
@@ -0,0 +1,108 @@
+//===- BlotMapVector.h - A MapVector with the blot operation -*- C++ -*----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseMap.h"
+#include <vector>
+#include <algorithm>
+
+namespace llvm {
+/// \brief An associative container with fast insertion-order (deterministic)
+/// iteration over its elements. Plus the special blot operation.
+template <class KeyT, class ValueT> class BlotMapVector {
+ /// Map keys to indices in Vector.
+ typedef DenseMap<KeyT, size_t> MapTy;
+ MapTy Map;
+
+ typedef std::vector<std::pair<KeyT, ValueT>> VectorTy;
+ /// Keys and values.
+ VectorTy Vector;
+
+public:
+ typedef typename VectorTy::iterator iterator;
+ typedef typename VectorTy::const_iterator const_iterator;
+ iterator begin() { return Vector.begin(); }
+ iterator end() { return Vector.end(); }
+ const_iterator begin() const { return Vector.begin(); }
+ const_iterator end() const { return Vector.end(); }
+
+#ifdef XDEBUG
+ ~BlotMapVector() {
+ assert(Vector.size() >= Map.size()); // May differ due to blotting.
+ for (typename MapTy::const_iterator I = Map.begin(), E = Map.end(); I != E;
+ ++I) {
+ assert(I->second < Vector.size());
+ assert(Vector[I->second].first == I->first);
+ }
+ for (typename VectorTy::const_iterator I = Vector.begin(), E = Vector.end();
+ I != E; ++I)
+ assert(!I->first || (Map.count(I->first) &&
+ Map[I->first] == size_t(I - Vector.begin())));
+ }
+#endif
+
+ ValueT &operator[](const KeyT &Arg) {
+ std::pair<typename MapTy::iterator, bool> Pair =
+ Map.insert(std::make_pair(Arg, size_t(0)));
+ if (Pair.second) {
+ size_t Num = Vector.size();
+ Pair.first->second = Num;
+ Vector.push_back(std::make_pair(Arg, ValueT()));
+ return Vector[Num].second;
+ }
+ return Vector[Pair.first->second].second;
+ }
+
+ std::pair<iterator, bool> insert(const std::pair<KeyT, ValueT> &InsertPair) {
+ std::pair<typename MapTy::iterator, bool> Pair =
+ Map.insert(std::make_pair(InsertPair.first, size_t(0)));
+ if (Pair.second) {
+ size_t Num = Vector.size();
+ Pair.first->second = Num;
+ Vector.push_back(InsertPair);
+ return std::make_pair(Vector.begin() + Num, true);
+ }
+ return std::make_pair(Vector.begin() + Pair.first->second, false);
+ }
+
+ iterator find(const KeyT &Key) {
+ typename MapTy::iterator It = Map.find(Key);
+ if (It == Map.end())
+ return Vector.end();
+ return Vector.begin() + It->second;
+ }
+
+ const_iterator find(const KeyT &Key) const {
+ typename MapTy::const_iterator It = Map.find(Key);
+ if (It == Map.end())
+ return Vector.end();
+ return Vector.begin() + It->second;
+ }
+
+ /// This is similar to erase, but instead of removing the element from the
+ /// vector, it just zeros out the key in the vector. This leaves iterators
+ /// intact, but clients must be prepared for zeroed-out keys when iterating.
+ void blot(const KeyT &Key) {
+ typename MapTy::iterator It = Map.find(Key);
+ if (It == Map.end())
+ return;
+ Vector[It->second].first = KeyT();
+ Map.erase(It);
+ }
+
+ void clear() {
+ Map.clear();
+ Vector.clear();
+ }
+
+ bool empty() const {
+ assert(Map.empty() == Vector.empty());
+ return Map.empty();
+ }
+};
+} //
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp b/contrib/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
index f6c236c..4edd029 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
+++ b/contrib/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
@@ -32,21 +32,20 @@ using namespace llvm::objcarc;
/// Test whether the given instruction can result in a reference count
/// modification (positive or negative) for the pointer's object.
-bool
-llvm::objcarc::CanAlterRefCount(const Instruction *Inst, const Value *Ptr,
- ProvenanceAnalysis &PA,
- InstructionClass Class) {
+bool llvm::objcarc::CanAlterRefCount(const Instruction *Inst, const Value *Ptr,
+ ProvenanceAnalysis &PA,
+ ARCInstKind Class) {
switch (Class) {
- case IC_Autorelease:
- case IC_AutoreleaseRV:
- case IC_IntrinsicUser:
- case IC_User:
+ case ARCInstKind::Autorelease:
+ case ARCInstKind::AutoreleaseRV:
+ case ARCInstKind::IntrinsicUser:
+ case ARCInstKind::User:
// These operations never directly modify a reference count.
return false;
default: break;
}
- ImmutableCallSite CS = static_cast<const Value *>(Inst);
+ ImmutableCallSite CS(Inst);
assert(CS && "Only calls can alter reference counts!");
// See if AliasAnalysis can help us with the call.
@@ -54,10 +53,12 @@ llvm::objcarc::CanAlterRefCount(const Instruction *Inst, const Value *Ptr,
if (AliasAnalysis::onlyReadsMemory(MRB))
return false;
if (AliasAnalysis::onlyAccessesArgPointees(MRB)) {
+ const DataLayout &DL = Inst->getModule()->getDataLayout();
for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
I != E; ++I) {
const Value *Op = *I;
- if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Ptr, Op))
+ if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) &&
+ PA.related(Ptr, Op, DL))
return true;
}
return false;
@@ -67,15 +68,29 @@ llvm::objcarc::CanAlterRefCount(const Instruction *Inst, const Value *Ptr,
return true;
}
+bool llvm::objcarc::CanDecrementRefCount(const Instruction *Inst,
+ const Value *Ptr,
+ ProvenanceAnalysis &PA,
+ ARCInstKind Class) {
+ // First perform a quick check if Class can not touch ref counts.
+ if (!CanDecrementRefCount(Class))
+ return false;
+
+ // Otherwise, just use CanAlterRefCount for now.
+ return CanAlterRefCount(Inst, Ptr, PA, Class);
+}
+
/// Test whether the given instruction can "use" the given pointer's object in a
/// way that requires the reference count to be positive.
-bool
-llvm::objcarc::CanUse(const Instruction *Inst, const Value *Ptr,
- ProvenanceAnalysis &PA, InstructionClass Class) {
- // IC_Call operations (as opposed to IC_CallOrUser) never "use" objc pointers.
- if (Class == IC_Call)
+bool llvm::objcarc::CanUse(const Instruction *Inst, const Value *Ptr,
+ ProvenanceAnalysis &PA, ARCInstKind Class) {
+ // ARCInstKind::Call operations (as opposed to
+ // ARCInstKind::CallOrUser) never "use" objc pointers.
+ if (Class == ARCInstKind::Call)
return false;
+ const DataLayout &DL = Inst->getModule()->getDataLayout();
+
// Consider various instructions which may have pointer arguments which are
// not "uses".
if (const ICmpInst *ICI = dyn_cast<ICmpInst>(Inst)) {
@@ -84,29 +99,31 @@ llvm::objcarc::CanUse(const Instruction *Inst, const Value *Ptr,
// of any other dynamic reference-counted pointers.
if (!IsPotentialRetainableObjPtr(ICI->getOperand(1), *PA.getAA()))
return false;
- } else if (ImmutableCallSite CS = static_cast<const Value *>(Inst)) {
+ } else if (auto CS = ImmutableCallSite(Inst)) {
// For calls, just check the arguments (and not the callee operand).
for (ImmutableCallSite::arg_iterator OI = CS.arg_begin(),
OE = CS.arg_end(); OI != OE; ++OI) {
const Value *Op = *OI;
- if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Ptr, Op))
+ if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) &&
+ PA.related(Ptr, Op, DL))
return true;
}
return false;
} else if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
// Special-case stores, because we don't care about the stored value, just
// the store address.
- const Value *Op = GetUnderlyingObjCPtr(SI->getPointerOperand());
+ const Value *Op = GetUnderlyingObjCPtr(SI->getPointerOperand(), DL);
// If we can't tell what the underlying object was, assume there is a
// dependence.
- return IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Op, Ptr);
+ return IsPotentialRetainableObjPtr(Op, *PA.getAA()) &&
+ PA.related(Op, Ptr, DL);
}
// Check each operand for a match.
for (User::const_op_iterator OI = Inst->op_begin(), OE = Inst->op_end();
OI != OE; ++OI) {
const Value *Op = *OI;
- if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Ptr, Op))
+ if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Ptr, Op, DL))
return true;
}
return false;
@@ -123,11 +140,11 @@ llvm::objcarc::Depends(DependenceKind Flavor, Instruction *Inst,
switch (Flavor) {
case NeedsPositiveRetainCount: {
- InstructionClass Class = GetInstructionClass(Inst);
+ ARCInstKind Class = GetARCInstKind(Inst);
switch (Class) {
- case IC_AutoreleasepoolPop:
- case IC_AutoreleasepoolPush:
- case IC_None:
+ case ARCInstKind::AutoreleasepoolPop:
+ case ARCInstKind::AutoreleasepoolPush:
+ case ARCInstKind::None:
return false;
default:
return CanUse(Inst, Arg, PA, Class);
@@ -135,10 +152,10 @@ llvm::objcarc::Depends(DependenceKind Flavor, Instruction *Inst,
}
case AutoreleasePoolBoundary: {
- InstructionClass Class = GetInstructionClass(Inst);
+ ARCInstKind Class = GetARCInstKind(Inst);
switch (Class) {
- case IC_AutoreleasepoolPop:
- case IC_AutoreleasepoolPush:
+ case ARCInstKind::AutoreleasepoolPop:
+ case ARCInstKind::AutoreleasepoolPush:
// These mark the end and begin of an autorelease pool scope.
return true;
default:
@@ -148,13 +165,13 @@ llvm::objcarc::Depends(DependenceKind Flavor, Instruction *Inst,
}
case CanChangeRetainCount: {
- InstructionClass Class = GetInstructionClass(Inst);
+ ARCInstKind Class = GetARCInstKind(Inst);
switch (Class) {
- case IC_AutoreleasepoolPop:
+ case ARCInstKind::AutoreleasepoolPop:
// Conservatively assume this can decrement any count.
return true;
- case IC_AutoreleasepoolPush:
- case IC_None:
+ case ARCInstKind::AutoreleasepoolPush:
+ case ARCInstKind::None:
return false;
default:
return CanAlterRefCount(Inst, Arg, PA, Class);
@@ -162,28 +179,28 @@ llvm::objcarc::Depends(DependenceKind Flavor, Instruction *Inst,
}
case RetainAutoreleaseDep:
- switch (GetBasicInstructionClass(Inst)) {
- case IC_AutoreleasepoolPop:
- case IC_AutoreleasepoolPush:
+ switch (GetBasicARCInstKind(Inst)) {
+ case ARCInstKind::AutoreleasepoolPop:
+ case ARCInstKind::AutoreleasepoolPush:
// Don't merge an objc_autorelease with an objc_retain inside a different
// autoreleasepool scope.
return true;
- case IC_Retain:
- case IC_RetainRV:
+ case ARCInstKind::Retain:
+ case ARCInstKind::RetainRV:
// Check for a retain of the same pointer for merging.
- return GetObjCArg(Inst) == Arg;
+ return GetArgRCIdentityRoot(Inst) == Arg;
default:
// Nothing else matters for objc_retainAutorelease formation.
return false;
}
case RetainAutoreleaseRVDep: {
- InstructionClass Class = GetBasicInstructionClass(Inst);
+ ARCInstKind Class = GetBasicARCInstKind(Inst);
switch (Class) {
- case IC_Retain:
- case IC_RetainRV:
+ case ARCInstKind::Retain:
+ case ARCInstKind::RetainRV:
// Check for a retain of the same pointer for merging.
- return GetObjCArg(Inst) == Arg;
+ return GetArgRCIdentityRoot(Inst) == Arg;
default:
// Anything that can autorelease interrupts
// retainAutoreleaseReturnValue formation.
@@ -192,7 +209,7 @@ llvm::objcarc::Depends(DependenceKind Flavor, Instruction *Inst,
}
case RetainRVDep:
- return CanInterruptRV(GetBasicInstructionClass(Inst));
+ return CanInterruptRV(GetBasicARCInstKind(Inst));
}
llvm_unreachable("Invalid dependence flavor");
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.h b/contrib/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.h
index 7b5601a..8e042d4 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.h
+++ b/contrib/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.h
@@ -63,15 +63,24 @@ Depends(DependenceKind Flavor, Instruction *Inst, const Value *Arg,
/// Test whether the given instruction can "use" the given pointer's object in a
/// way that requires the reference count to be positive.
-bool
-CanUse(const Instruction *Inst, const Value *Ptr, ProvenanceAnalysis &PA,
- InstructionClass Class);
+bool CanUse(const Instruction *Inst, const Value *Ptr, ProvenanceAnalysis &PA,
+ ARCInstKind Class);
/// Test whether the given instruction can result in a reference count
/// modification (positive or negative) for the pointer's object.
-bool
-CanAlterRefCount(const Instruction *Inst, const Value *Ptr,
- ProvenanceAnalysis &PA, InstructionClass Class);
+bool CanAlterRefCount(const Instruction *Inst, const Value *Ptr,
+ ProvenanceAnalysis &PA, ARCInstKind Class);
+
+/// Returns true if we can not conservatively prove that Inst can not decrement
+/// the reference count of Ptr. Returns false if we can.
+bool CanDecrementRefCount(const Instruction *Inst, const Value *Ptr,
+ ProvenanceAnalysis &PA, ARCInstKind Class);
+
+static inline bool CanDecrementRefCount(const Instruction *Inst,
+ const Value *Ptr,
+ ProvenanceAnalysis &PA) {
+ return CanDecrementRefCount(Inst, Ptr, PA, GetARCInstKind(Inst));
+}
} // namespace objcarc
} // namespace llvm
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.h b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.h
index 7a7eae8..7595e2d 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.h
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.h
@@ -24,6 +24,7 @@
#define LLVM_LIB_TRANSFORMS_OBJCARC_OBJCARC_H
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -33,6 +34,7 @@
#include "llvm/Pass.h"
#include "llvm/Transforms/ObjCARC.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "ARCInstKind.h"
namespace llvm {
class raw_ostream;
@@ -68,160 +70,14 @@ static inline bool ModuleHasARC(const Module &M) {
M.getNamedValue("clang.arc.use");
}
-/// \enum InstructionClass
-/// \brief A simple classification for instructions.
-enum InstructionClass {
- IC_Retain, ///< objc_retain
- IC_RetainRV, ///< objc_retainAutoreleasedReturnValue
- IC_RetainBlock, ///< objc_retainBlock
- IC_Release, ///< objc_release
- IC_Autorelease, ///< objc_autorelease
- IC_AutoreleaseRV, ///< objc_autoreleaseReturnValue
- IC_AutoreleasepoolPush, ///< objc_autoreleasePoolPush
- IC_AutoreleasepoolPop, ///< objc_autoreleasePoolPop
- IC_NoopCast, ///< objc_retainedObject, etc.
- IC_FusedRetainAutorelease, ///< objc_retainAutorelease
- IC_FusedRetainAutoreleaseRV, ///< objc_retainAutoreleaseReturnValue
- IC_LoadWeakRetained, ///< objc_loadWeakRetained (primitive)
- IC_StoreWeak, ///< objc_storeWeak (primitive)
- IC_InitWeak, ///< objc_initWeak (derived)
- IC_LoadWeak, ///< objc_loadWeak (derived)
- IC_MoveWeak, ///< objc_moveWeak (derived)
- IC_CopyWeak, ///< objc_copyWeak (derived)
- IC_DestroyWeak, ///< objc_destroyWeak (derived)
- IC_StoreStrong, ///< objc_storeStrong (derived)
- IC_IntrinsicUser, ///< clang.arc.use
- IC_CallOrUser, ///< could call objc_release and/or "use" pointers
- IC_Call, ///< could call objc_release
- IC_User, ///< could "use" a pointer
- IC_None ///< anything else
-};
-
-raw_ostream &operator<<(raw_ostream &OS, const InstructionClass Class);
-
-/// \brief Test if the given class is a kind of user.
-inline static bool IsUser(InstructionClass Class) {
- return Class == IC_User ||
- Class == IC_CallOrUser ||
- Class == IC_IntrinsicUser;
-}
-
-/// \brief Test if the given class is objc_retain or equivalent.
-static inline bool IsRetain(InstructionClass Class) {
- return Class == IC_Retain ||
- Class == IC_RetainRV;
-}
-
-/// \brief Test if the given class is objc_autorelease or equivalent.
-static inline bool IsAutorelease(InstructionClass Class) {
- return Class == IC_Autorelease ||
- Class == IC_AutoreleaseRV;
-}
-
-/// \brief Test if the given class represents instructions which return their
-/// argument verbatim.
-static inline bool IsForwarding(InstructionClass Class) {
- return Class == IC_Retain ||
- Class == IC_RetainRV ||
- Class == IC_Autorelease ||
- Class == IC_AutoreleaseRV ||
- Class == IC_NoopCast;
-}
-
-/// \brief Test if the given class represents instructions which do nothing if
-/// passed a null pointer.
-static inline bool IsNoopOnNull(InstructionClass Class) {
- return Class == IC_Retain ||
- Class == IC_RetainRV ||
- Class == IC_Release ||
- Class == IC_Autorelease ||
- Class == IC_AutoreleaseRV ||
- Class == IC_RetainBlock;
-}
-
-/// \brief Test if the given class represents instructions which are always safe
-/// to mark with the "tail" keyword.
-static inline bool IsAlwaysTail(InstructionClass Class) {
- // IC_RetainBlock may be given a stack argument.
- return Class == IC_Retain ||
- Class == IC_RetainRV ||
- Class == IC_AutoreleaseRV;
-}
-
-/// \brief Test if the given class represents instructions which are never safe
-/// to mark with the "tail" keyword.
-static inline bool IsNeverTail(InstructionClass Class) {
- /// It is never safe to tail call objc_autorelease since by tail calling
- /// objc_autorelease, we also tail call -[NSObject autorelease] which supports
- /// fast autoreleasing causing our object to be potentially reclaimed from the
- /// autorelease pool which violates the semantics of __autoreleasing types in
- /// ARC.
- return Class == IC_Autorelease;
-}
-
-/// \brief Test if the given class represents instructions which are always safe
-/// to mark with the nounwind attribute.
-static inline bool IsNoThrow(InstructionClass Class) {
- // objc_retainBlock is not nounwind because it calls user copy constructors
- // which could theoretically throw.
- return Class == IC_Retain ||
- Class == IC_RetainRV ||
- Class == IC_Release ||
- Class == IC_Autorelease ||
- Class == IC_AutoreleaseRV ||
- Class == IC_AutoreleasepoolPush ||
- Class == IC_AutoreleasepoolPop;
-}
-
-/// Test whether the given instruction can autorelease any pointer or cause an
-/// autoreleasepool pop.
-static inline bool
-CanInterruptRV(InstructionClass Class) {
- switch (Class) {
- case IC_AutoreleasepoolPop:
- case IC_CallOrUser:
- case IC_Call:
- case IC_Autorelease:
- case IC_AutoreleaseRV:
- case IC_FusedRetainAutorelease:
- case IC_FusedRetainAutoreleaseRV:
- return true;
- default:
- return false;
- }
-}
-
-/// \brief Determine if F is one of the special known Functions. If it isn't,
-/// return IC_CallOrUser.
-InstructionClass GetFunctionClass(const Function *F);
-
-/// \brief Determine which objc runtime call instruction class V belongs to.
-///
-/// This is similar to GetInstructionClass except that it only detects objc
-/// runtime calls. This allows it to be faster.
-///
-static inline InstructionClass GetBasicInstructionClass(const Value *V) {
- if (const CallInst *CI = dyn_cast<CallInst>(V)) {
- if (const Function *F = CI->getCalledFunction())
- return GetFunctionClass(F);
- // Otherwise, be conservative.
- return IC_CallOrUser;
- }
-
- // Otherwise, be conservative.
- return isa<InvokeInst>(V) ? IC_CallOrUser : IC_User;
-}
-
-/// \brief Determine what kind of construct V is.
-InstructionClass GetInstructionClass(const Value *V);
-
/// \brief This is a wrapper around getUnderlyingObject which also knows how to
/// look through objc_retain and objc_autorelease calls, which we know to return
/// their argument verbatim.
-static inline const Value *GetUnderlyingObjCPtr(const Value *V) {
+static inline const Value *GetUnderlyingObjCPtr(const Value *V,
+ const DataLayout &DL) {
for (;;) {
- V = GetUnderlyingObject(V);
- if (!IsForwarding(GetBasicInstructionClass(V)))
+ V = GetUnderlyingObject(V, DL);
+ if (!IsForwarding(GetBasicARCInstKind(V)))
break;
V = cast<CallInst>(V)->getArgOperand(0);
}
@@ -229,37 +85,44 @@ static inline const Value *GetUnderlyingObjCPtr(const Value *V) {
return V;
}
-/// \brief This is a wrapper around Value::stripPointerCasts which also knows
-/// how to look through objc_retain and objc_autorelease calls, which we know to
-/// return their argument verbatim.
-static inline const Value *StripPointerCastsAndObjCCalls(const Value *V) {
+/// The RCIdentity root of a value \p V is a dominating value U for which
+/// retaining or releasing U is equivalent to retaining or releasing V. In other
+/// words, ARC operations on \p V are equivalent to ARC operations on \p U.
+///
+/// We use this in the ARC optimizer to make it easier to match up ARC
+/// operations by always mapping ARC operations to RCIdentityRoots instead of
+/// pointers themselves.
+///
+/// The two ways that we see RCIdentical values in ObjC are via:
+///
+/// 1. PointerCasts
+/// 2. Forwarding Calls that return their argument verbatim.
+///
+/// Thus this function strips off pointer casts and forwarding calls. *NOTE*
+/// This implies that two RCIdentical values must alias.
+static inline const Value *GetRCIdentityRoot(const Value *V) {
for (;;) {
V = V->stripPointerCasts();
- if (!IsForwarding(GetBasicInstructionClass(V)))
+ if (!IsForwarding(GetBasicARCInstKind(V)))
break;
V = cast<CallInst>(V)->getArgOperand(0);
}
return V;
}
-/// \brief This is a wrapper around Value::stripPointerCasts which also knows
-/// how to look through objc_retain and objc_autorelease calls, which we know to
-/// return their argument verbatim.
-static inline Value *StripPointerCastsAndObjCCalls(Value *V) {
- for (;;) {
- V = V->stripPointerCasts();
- if (!IsForwarding(GetBasicInstructionClass(V)))
- break;
- V = cast<CallInst>(V)->getArgOperand(0);
- }
- return V;
+/// Helper which calls const Value *GetRCIdentityRoot(const Value *V) and just
+/// casts away the const of the result. For documentation about what an
+/// RCIdentityRoot (and by extension GetRCIdentityRoot is) look at that
+/// function.
+static inline Value *GetRCIdentityRoot(Value *V) {
+ return const_cast<Value *>(GetRCIdentityRoot((const Value *)V));
}
/// \brief Assuming the given instruction is one of the special calls such as
-/// objc_retain or objc_release, return the argument value, stripped of no-op
-/// casts and forwarding calls.
-static inline Value *GetObjCArg(Value *Inst) {
- return StripPointerCastsAndObjCCalls(cast<CallInst>(Inst)->getArgOperand(0));
+/// objc_retain or objc_release, return the RCIdentity root of the argument of
+/// the call.
+static inline Value *GetArgRCIdentityRoot(Value *Inst) {
+ return GetRCIdentityRoot(cast<CallInst>(Inst)->getArgOperand(0));
}
static inline bool IsNullOrUndef(const Value *V) {
@@ -286,8 +149,8 @@ static inline void EraseInstruction(Instruction *CI) {
if (!Unused) {
// Replace the return value with the argument.
- assert((IsForwarding(GetBasicInstructionClass(CI)) ||
- (IsNoopOnNull(GetBasicInstructionClass(CI)) &&
+ assert((IsForwarding(GetBasicARCInstKind(CI)) ||
+ (IsNoopOnNull(GetBasicARCInstKind(CI)) &&
isa<ConstantPointerNull>(OldArg))) &&
"Can't delete non-forwarding instruction with users!");
CI->replaceAllUsesWith(OldArg);
@@ -344,15 +207,15 @@ static inline bool IsPotentialRetainableObjPtr(const Value *Op,
return true;
}
-/// \brief Helper for GetInstructionClass. Determines what kind of construct CS
+/// \brief Helper for GetARCInstKind. Determines what kind of construct CS
/// is.
-static inline InstructionClass GetCallSiteClass(ImmutableCallSite CS) {
+static inline ARCInstKind GetCallSiteClass(ImmutableCallSite CS) {
for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
I != E; ++I)
if (IsPotentialRetainableObjPtr(*I))
- return CS.onlyReadsMemory() ? IC_User : IC_CallOrUser;
+ return CS.onlyReadsMemory() ? ARCInstKind::User : ARCInstKind::CallOrUser;
- return CS.onlyReadsMemory() ? IC_None : IC_Call;
+ return CS.onlyReadsMemory() ? ARCInstKind::None : ARCInstKind::Call;
}
/// \brief Return true if this value refers to a distinct and identifiable
@@ -371,7 +234,7 @@ static inline bool IsObjCIdentifiedObject(const Value *V) {
if (const LoadInst *LI = dyn_cast<LoadInst>(V)) {
const Value *Pointer =
- StripPointerCastsAndObjCCalls(LI->getPointerOperand());
+ GetRCIdentityRoot(LI->getPointerOperand());
if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Pointer)) {
// A constant pointer can't be pointing to an object on the heap. It may
// be reference-counted, but it won't be deleted.
@@ -396,6 +259,55 @@ static inline bool IsObjCIdentifiedObject(const Value *V) {
return false;
}
+enum class ARCMDKindID {
+ ImpreciseRelease,
+ CopyOnEscape,
+ NoObjCARCExceptions,
+};
+
+/// A cache of MDKinds used by various ARC optimizations.
+class ARCMDKindCache {
+ Module *M;
+
+ /// The Metadata Kind for clang.imprecise_release metadata.
+ llvm::Optional<unsigned> ImpreciseReleaseMDKind;
+
+ /// The Metadata Kind for clang.arc.copy_on_escape metadata.
+ llvm::Optional<unsigned> CopyOnEscapeMDKind;
+
+ /// The Metadata Kind for clang.arc.no_objc_arc_exceptions metadata.
+ llvm::Optional<unsigned> NoObjCARCExceptionsMDKind;
+
+public:
+ void init(Module *Mod) {
+ M = Mod;
+ ImpreciseReleaseMDKind = NoneType::None;
+ CopyOnEscapeMDKind = NoneType::None;
+ NoObjCARCExceptionsMDKind = NoneType::None;
+ }
+
+ unsigned get(ARCMDKindID ID) {
+ switch (ID) {
+ case ARCMDKindID::ImpreciseRelease:
+ if (!ImpreciseReleaseMDKind)
+ ImpreciseReleaseMDKind =
+ M->getContext().getMDKindID("clang.imprecise_release");
+ return *ImpreciseReleaseMDKind;
+ case ARCMDKindID::CopyOnEscape:
+ if (!CopyOnEscapeMDKind)
+ CopyOnEscapeMDKind =
+ M->getContext().getMDKindID("clang.arc.copy_on_escape");
+ return *CopyOnEscapeMDKind;
+ case ARCMDKindID::NoObjCARCExceptions:
+ if (!NoObjCARCExceptionsMDKind)
+ NoObjCARCExceptionsMDKind =
+ M->getContext().getMDKindID("clang.arc.no_objc_arc_exceptions");
+ return *NoObjCARCExceptionsMDKind;
+ }
+ llvm_unreachable("Covered switch isn't covered?!");
+ }
+};
+
} // end namespace objcarc
} // end namespace llvm
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
index 1a25391..d318643 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
@@ -97,11 +97,11 @@ bool ObjCARCAPElim::OptimizeBB(BasicBlock *BB) {
Instruction *Push = nullptr;
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
Instruction *Inst = I++;
- switch (GetBasicInstructionClass(Inst)) {
- case IC_AutoreleasepoolPush:
+ switch (GetBasicARCInstKind(Inst)) {
+ case ARCInstKind::AutoreleasepoolPush:
Push = Inst;
break;
- case IC_AutoreleasepoolPop:
+ case ARCInstKind::AutoreleasepoolPop:
// If this pop matches a push and nothing in between can autorelease,
// zap the pair.
if (Push && cast<CallInst>(Inst)->getArgOperand(0) == Push) {
@@ -115,7 +115,7 @@ bool ObjCARCAPElim::OptimizeBB(BasicBlock *BB) {
}
Push = nullptr;
break;
- case IC_CallOrUser:
+ case ARCInstKind::CallOrUser:
if (MayAutorelease(ImmutableCallSite(Inst)))
Push = nullptr;
break;
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp
index c61b6b0..b1515e3 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp
@@ -46,6 +46,11 @@ ImmutablePass *llvm::createObjCARCAliasAnalysisPass() {
return new ObjCARCAliasAnalysis();
}
+bool ObjCARCAliasAnalysis::doInitialization(Module &M) {
+ InitializeAliasAnalysis(this, &M.getDataLayout());
+ return true;
+}
+
void
ObjCARCAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
@@ -59,8 +64,8 @@ ObjCARCAliasAnalysis::alias(const Location &LocA, const Location &LocB) {
// First, strip off no-ops, including ObjC-specific no-ops, and try making a
// precise alias query.
- const Value *SA = StripPointerCastsAndObjCCalls(LocA.Ptr);
- const Value *SB = StripPointerCastsAndObjCCalls(LocB.Ptr);
+ const Value *SA = GetRCIdentityRoot(LocA.Ptr);
+ const Value *SB = GetRCIdentityRoot(LocB.Ptr);
AliasResult Result =
AliasAnalysis::alias(Location(SA, LocA.Size, LocA.AATags),
Location(SB, LocB.Size, LocB.AATags));
@@ -69,8 +74,8 @@ ObjCARCAliasAnalysis::alias(const Location &LocA, const Location &LocB) {
// If that failed, climb to the underlying object, including climbing through
// ObjC-specific no-ops, and try making an imprecise alias query.
- const Value *UA = GetUnderlyingObjCPtr(SA);
- const Value *UB = GetUnderlyingObjCPtr(SB);
+ const Value *UA = GetUnderlyingObjCPtr(SA, *DL);
+ const Value *UB = GetUnderlyingObjCPtr(SB, *DL);
if (UA != SA || UB != SB) {
Result = AliasAnalysis::alias(Location(UA), Location(UB));
// We can't use MustAlias or PartialAlias results here because
@@ -92,14 +97,14 @@ ObjCARCAliasAnalysis::pointsToConstantMemory(const Location &Loc,
// First, strip off no-ops, including ObjC-specific no-ops, and try making
// a precise alias query.
- const Value *S = StripPointerCastsAndObjCCalls(Loc.Ptr);
+ const Value *S = GetRCIdentityRoot(Loc.Ptr);
if (AliasAnalysis::pointsToConstantMemory(Location(S, Loc.Size, Loc.AATags),
OrLocal))
return true;
// If that failed, climb to the underlying object, including climbing through
// ObjC-specific no-ops, and try making an imprecise alias query.
- const Value *U = GetUnderlyingObjCPtr(S);
+ const Value *U = GetUnderlyingObjCPtr(S, *DL);
if (U != S)
return AliasAnalysis::pointsToConstantMemory(Location(U), OrLocal);
@@ -120,7 +125,7 @@ ObjCARCAliasAnalysis::getModRefBehavior(const Function *F) {
return AliasAnalysis::getModRefBehavior(F);
switch (GetFunctionClass(F)) {
- case IC_NoopCast:
+ case ARCInstKind::NoopCast:
return DoesNotAccessMemory;
default:
break;
@@ -134,15 +139,15 @@ ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS, const Location &Loc) {
if (!EnableARCOpts)
return AliasAnalysis::getModRefInfo(CS, Loc);
- switch (GetBasicInstructionClass(CS.getInstruction())) {
- case IC_Retain:
- case IC_RetainRV:
- case IC_Autorelease:
- case IC_AutoreleaseRV:
- case IC_NoopCast:
- case IC_AutoreleasepoolPush:
- case IC_FusedRetainAutorelease:
- case IC_FusedRetainAutoreleaseRV:
+ switch (GetBasicARCInstKind(CS.getInstruction())) {
+ case ARCInstKind::Retain:
+ case ARCInstKind::RetainRV:
+ case ARCInstKind::Autorelease:
+ case ARCInstKind::AutoreleaseRV:
+ case ARCInstKind::NoopCast:
+ case ARCInstKind::AutoreleasepoolPush:
+ case ARCInstKind::FusedRetainAutorelease:
+ case ARCInstKind::FusedRetainAutoreleaseRV:
// These functions don't access any memory visible to the compiler.
// Note that this doesn't include objc_retainBlock, because it updates
// pointers when it copies block data.
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h
index 3fcea4e..3c5a021 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h
@@ -44,9 +44,7 @@ namespace objcarc {
}
private:
- void initializePass() override {
- InitializeAliasAnalysis(this);
- }
+ bool doInitialization(Module &M) override;
/// This method is used when a pass implements an analysis interface through
/// multiple inheritance. If needed, it should override this to adjust the
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
index eb325eb..2a3139f 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
@@ -35,6 +35,7 @@
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Operator.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
using namespace llvm::objcarc;
@@ -44,6 +45,10 @@ using namespace llvm::objcarc;
STATISTIC(NumPeeps, "Number of calls peephole-optimized");
STATISTIC(NumStoreStrongs, "Number objc_storeStrong calls formed");
+//===----------------------------------------------------------------------===//
+// Declarations
+//===----------------------------------------------------------------------===//
+
namespace {
/// \brief Late ARC optimizations
///
@@ -68,17 +73,23 @@ namespace {
/// "tail".
SmallPtrSet<CallInst *, 8> StoreStrongCalls;
- bool OptimizeRetainCall(Function &F, Instruction *Retain);
+ /// Returns true if we eliminated Inst.
+ bool tryToPeepholeInstruction(Function &F, Instruction *Inst,
+ inst_iterator &Iter,
+ SmallPtrSetImpl<Instruction *> &DepInsts,
+ SmallPtrSetImpl<const BasicBlock *> &Visited,
+ bool &TailOkForStoreStrong);
- bool ContractAutorelease(Function &F, Instruction *Autorelease,
- InstructionClass Class,
- SmallPtrSetImpl<Instruction *>
- &DependingInstructions,
- SmallPtrSetImpl<const BasicBlock *>
- &Visited);
+ bool optimizeRetainCall(Function &F, Instruction *Retain);
- void ContractRelease(Instruction *Release,
- inst_iterator &Iter);
+ bool
+ contractAutorelease(Function &F, Instruction *Autorelease,
+ ARCInstKind Class,
+ SmallPtrSetImpl<Instruction *> &DependingInstructions,
+ SmallPtrSetImpl<const BasicBlock *> &Visited);
+
+ void tryToContractReleaseIntoStoreStrong(Instruction *Release,
+ inst_iterator &Iter);
void getAnalysisUsage(AnalysisUsage &AU) const override;
bool doInitialization(Module &M) override;
@@ -92,30 +103,15 @@ namespace {
};
}
-char ObjCARCContract::ID = 0;
-INITIALIZE_PASS_BEGIN(ObjCARCContract,
- "objc-arc-contract", "ObjC ARC contraction", false, false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_END(ObjCARCContract,
- "objc-arc-contract", "ObjC ARC contraction", false, false)
-
-Pass *llvm::createObjCARCContractPass() {
- return new ObjCARCContract();
-}
-
-void ObjCARCContract::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<AliasAnalysis>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.setPreservesCFG();
-}
+//===----------------------------------------------------------------------===//
+// Implementation
+//===----------------------------------------------------------------------===//
/// Turn objc_retain into objc_retainAutoreleasedReturnValue if the operand is a
/// return value. We do this late so we do not disrupt the dataflow analysis in
/// ObjCARCOpt.
-bool
-ObjCARCContract::OptimizeRetainCall(Function &F, Instruction *Retain) {
- ImmutableCallSite CS(GetObjCArg(Retain));
+bool ObjCARCContract::optimizeRetainCall(Function &F, Instruction *Retain) {
+ ImmutableCallSite CS(GetArgRCIdentityRoot(Retain));
const Instruction *Call = CS.getInstruction();
if (!Call)
return false;
@@ -139,7 +135,7 @@ ObjCARCContract::OptimizeRetainCall(Function &F, Instruction *Retain) {
// We do not have to worry about tail calls/does not throw since
// retain/retainRV have the same properties.
- Constant *Decl = EP.get(ARCRuntimeEntryPoints::EPT_RetainRV);
+ Constant *Decl = EP.get(ARCRuntimeEntryPointKind::RetainRV);
cast<CallInst>(Retain)->setCalledFunction(Decl);
DEBUG(dbgs() << "New: " << *Retain << "\n");
@@ -147,19 +143,16 @@ ObjCARCContract::OptimizeRetainCall(Function &F, Instruction *Retain) {
}
/// Merge an autorelease with a retain into a fused call.
-bool
-ObjCARCContract::ContractAutorelease(Function &F, Instruction *Autorelease,
- InstructionClass Class,
- SmallPtrSetImpl<Instruction *>
- &DependingInstructions,
- SmallPtrSetImpl<const BasicBlock *>
- &Visited) {
- const Value *Arg = GetObjCArg(Autorelease);
+bool ObjCARCContract::contractAutorelease(
+ Function &F, Instruction *Autorelease, ARCInstKind Class,
+ SmallPtrSetImpl<Instruction *> &DependingInstructions,
+ SmallPtrSetImpl<const BasicBlock *> &Visited) {
+ const Value *Arg = GetArgRCIdentityRoot(Autorelease);
// Check that there are no instructions between the retain and the autorelease
// (such as an autorelease_pop) which may change the count.
CallInst *Retain = nullptr;
- if (Class == IC_AutoreleaseRV)
+ if (Class == ARCInstKind::AutoreleaseRV)
FindDependencies(RetainAutoreleaseRVDep, Arg,
Autorelease->getParent(), Autorelease,
DependingInstructions, Visited, PA);
@@ -177,94 +170,208 @@ ObjCARCContract::ContractAutorelease(Function &F, Instruction *Autorelease,
Retain = dyn_cast_or_null<CallInst>(*DependingInstructions.begin());
DependingInstructions.clear();
- if (!Retain ||
- GetBasicInstructionClass(Retain) != IC_Retain ||
- GetObjCArg(Retain) != Arg)
+ if (!Retain || GetBasicARCInstKind(Retain) != ARCInstKind::Retain ||
+ GetArgRCIdentityRoot(Retain) != Arg)
return false;
Changed = true;
++NumPeeps;
- DEBUG(dbgs() << "ObjCARCContract::ContractAutorelease: Fusing "
- "retain/autorelease. Erasing: " << *Autorelease << "\n"
- " Old Retain: "
- << *Retain << "\n");
+ DEBUG(dbgs() << " Fusing retain/autorelease!\n"
+ " Autorelease:" << *Autorelease << "\n"
+ " Retain: " << *Retain << "\n");
- Constant *Decl = EP.get(Class == IC_AutoreleaseRV ?
- ARCRuntimeEntryPoints::EPT_RetainAutoreleaseRV :
- ARCRuntimeEntryPoints::EPT_RetainAutorelease);
+ Constant *Decl = EP.get(Class == ARCInstKind::AutoreleaseRV
+ ? ARCRuntimeEntryPointKind::RetainAutoreleaseRV
+ : ARCRuntimeEntryPointKind::RetainAutorelease);
Retain->setCalledFunction(Decl);
- DEBUG(dbgs() << " New Retain: "
- << *Retain << "\n");
+ DEBUG(dbgs() << " New RetainAutorelease: " << *Retain << "\n");
EraseInstruction(Autorelease);
return true;
}
-/// Attempt to merge an objc_release with a store, load, and objc_retain to form
-/// an objc_storeStrong. This can be a little tricky because the instructions
-/// don't always appear in order, and there may be unrelated intervening
-/// instructions.
-void ObjCARCContract::ContractRelease(Instruction *Release,
- inst_iterator &Iter) {
- LoadInst *Load = dyn_cast<LoadInst>(GetObjCArg(Release));
- if (!Load || !Load->isSimple()) return;
+static StoreInst *findSafeStoreForStoreStrongContraction(LoadInst *Load,
+ Instruction *Release,
+ ProvenanceAnalysis &PA,
+ AliasAnalysis *AA) {
+ StoreInst *Store = nullptr;
+ bool SawRelease = false;
- // For now, require everything to be in one basic block.
- BasicBlock *BB = Release->getParent();
- if (Load->getParent() != BB) return;
+ // Get the location associated with Load.
+ AliasAnalysis::Location Loc = AA->getLocation(Load);
// Walk down to find the store and the release, which may be in either order.
- BasicBlock::iterator I = Load, End = BB->end();
- ++I;
- AliasAnalysis::Location Loc = AA->getLocation(Load);
- StoreInst *Store = nullptr;
- bool SawRelease = false;
- for (; !Store || !SawRelease; ++I) {
- if (I == End)
- return;
+ for (auto I = std::next(BasicBlock::iterator(Load)),
+ E = Load->getParent()->end();
+ I != E; ++I) {
+ // If we found the store we were looking for and saw the release,
+ // break. There is no more work to be done.
+ if (Store && SawRelease)
+ break;
- Instruction *Inst = I;
+ // Now we know that we have not seen either the store or the release. If I
+ // is the the release, mark that we saw the release and continue.
+ Instruction *Inst = &*I;
if (Inst == Release) {
SawRelease = true;
continue;
}
- InstructionClass Class = GetBasicInstructionClass(Inst);
+ // Otherwise, we check if Inst is a "good" store. Grab the instruction class
+ // of Inst.
+ ARCInstKind Class = GetBasicARCInstKind(Inst);
- // Unrelated retains are harmless.
+ // If Inst is an unrelated retain, we don't care about it.
+ //
+ // TODO: This is one area where the optimization could be made more
+ // aggressive.
if (IsRetain(Class))
continue;
+ // If we have seen the store, but not the release...
if (Store) {
- // The store is the point where we're going to put the objc_storeStrong,
- // so make sure there are no uses after it.
- if (CanUse(Inst, Load, PA, Class))
- return;
- } else if (AA->getModRefInfo(Inst, Loc) & AliasAnalysis::Mod) {
- // We are moving the load down to the store, so check for anything
- // else which writes to the memory between the load and the store.
- Store = dyn_cast<StoreInst>(Inst);
- if (!Store || !Store->isSimple()) return;
- if (Store->getPointerOperand() != Loc.Ptr) return;
+ // We need to make sure that it is safe to move the release from its
+ // current position to the store. This implies proving that any
+ // instruction in between Store and the Release conservatively can not use
+ // the RCIdentityRoot of Release. If we can prove we can ignore Inst, so
+ // continue...
+ if (!CanUse(Inst, Load, PA, Class)) {
+ continue;
+ }
+
+ // Otherwise, be conservative and return nullptr.
+ return nullptr;
}
+
+ // Ok, now we know we have not seen a store yet. See if Inst can write to
+ // our load location, if it can not, just ignore the instruction.
+ if (!(AA->getModRefInfo(Inst, Loc) & AliasAnalysis::Mod))
+ continue;
+
+ Store = dyn_cast<StoreInst>(Inst);
+
+ // If Inst can, then check if Inst is a simple store. If Inst is not a
+ // store or a store that is not simple, then we have some we do not
+ // understand writing to this memory implying we can not move the load
+ // over the write to any subsequent store that we may find.
+ if (!Store || !Store->isSimple())
+ return nullptr;
+
+ // Then make sure that the pointer we are storing to is Ptr. If so, we
+ // found our Store!
+ if (Store->getPointerOperand() == Loc.Ptr)
+ continue;
+
+ // Otherwise, we have an unknown store to some other ptr that clobbers
+ // Loc.Ptr. Bail!
+ return nullptr;
}
- Value *New = StripPointerCastsAndObjCCalls(Store->getValueOperand());
+ // If we did not find the store or did not see the release, fail.
+ if (!Store || !SawRelease)
+ return nullptr;
+
+ // We succeeded!
+ return Store;
+}
- // Walk up to find the retain.
- I = Store;
- BasicBlock::iterator Begin = BB->begin();
- while (I != Begin && GetBasicInstructionClass(I) != IC_Retain)
+static Instruction *
+findRetainForStoreStrongContraction(Value *New, StoreInst *Store,
+ Instruction *Release,
+ ProvenanceAnalysis &PA) {
+ // Walk up from the Store to find the retain.
+ BasicBlock::iterator I = Store;
+ BasicBlock::iterator Begin = Store->getParent()->begin();
+ while (I != Begin && GetBasicARCInstKind(I) != ARCInstKind::Retain) {
+ Instruction *Inst = &*I;
+
+ // It is only safe to move the retain to the store if we can prove
+ // conservatively that nothing besides the release can decrement reference
+ // counts in between the retain and the store.
+ if (CanDecrementRefCount(Inst, New, PA) && Inst != Release)
+ return nullptr;
--I;
+ }
Instruction *Retain = I;
- if (GetBasicInstructionClass(Retain) != IC_Retain) return;
- if (GetObjCArg(Retain) != New) return;
+ if (GetBasicARCInstKind(Retain) != ARCInstKind::Retain)
+ return nullptr;
+ if (GetArgRCIdentityRoot(Retain) != New)
+ return nullptr;
+ return Retain;
+}
+
+/// Attempt to merge an objc_release with a store, load, and objc_retain to form
+/// an objc_storeStrong. An objc_storeStrong:
+///
+/// objc_storeStrong(i8** %old_ptr, i8* new_value)
+///
+/// is equivalent to the following IR sequence:
+///
+/// ; Load old value.
+/// %old_value = load i8** %old_ptr (1)
+///
+/// ; Increment the new value and then release the old value. This must occur
+/// ; in order in case old_value releases new_value in its destructor causing
+/// ; us to potentially have a dangling ptr.
+/// tail call i8* @objc_retain(i8* %new_value) (2)
+/// tail call void @objc_release(i8* %old_value) (3)
+///
+/// ; Store the new_value into old_ptr
+/// store i8* %new_value, i8** %old_ptr (4)
+///
+/// The safety of this optimization is based around the following
+/// considerations:
+///
+/// 1. We are forming the store strong at the store. Thus to perform this
+/// optimization it must be safe to move the retain, load, and release to
+/// (4).
+/// 2. We need to make sure that any re-orderings of (1), (2), (3), (4) are
+/// safe.
+void ObjCARCContract::tryToContractReleaseIntoStoreStrong(Instruction *Release,
+ inst_iterator &Iter) {
+ // See if we are releasing something that we just loaded.
+ auto *Load = dyn_cast<LoadInst>(GetArgRCIdentityRoot(Release));
+ if (!Load || !Load->isSimple())
+ return;
+
+ // For now, require everything to be in one basic block.
+ BasicBlock *BB = Release->getParent();
+ if (Load->getParent() != BB)
+ return;
+
+ // First scan down the BB from Load, looking for a store of the RCIdentityRoot
+ // of Load's
+ StoreInst *Store =
+ findSafeStoreForStoreStrongContraction(Load, Release, PA, AA);
+ // If we fail, bail.
+ if (!Store)
+ return;
+
+ // Then find what new_value's RCIdentity Root is.
+ Value *New = GetRCIdentityRoot(Store->getValueOperand());
+
+ // Then walk up the BB and look for a retain on New without any intervening
+ // instructions which conservatively might decrement ref counts.
+ Instruction *Retain =
+ findRetainForStoreStrongContraction(New, Store, Release, PA);
+
+ // If we fail, bail.
+ if (!Retain)
+ return;
Changed = true;
++NumStoreStrongs;
+ DEBUG(
+ llvm::dbgs() << " Contracting retain, release into objc_storeStrong.\n"
+ << " Old:\n"
+ << " Store: " << *Store << "\n"
+ << " Release: " << *Release << "\n"
+ << " Retain: " << *Retain << "\n"
+ << " Load: " << *Load << "\n");
+
LLVMContext &C = Release->getContext();
Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
Type *I8XX = PointerType::getUnqual(I8X);
@@ -274,7 +381,7 @@ void ObjCARCContract::ContractRelease(Instruction *Release,
Args[0] = new BitCastInst(Args[0], I8XX, "", Store);
if (Args[1]->getType() != I8X)
Args[1] = new BitCastInst(Args[1], I8X, "", Store);
- Constant *Decl = EP.get(ARCRuntimeEntryPoints::EPT_StoreStrong);
+ Constant *Decl = EP.get(ARCRuntimeEntryPointKind::StoreStrong);
CallInst *StoreStrong = CallInst::Create(Decl, Args, "", Store);
StoreStrong->setDoesNotThrow();
StoreStrong->setDebugLoc(Store->getDebugLoc());
@@ -284,6 +391,8 @@ void ObjCARCContract::ContractRelease(Instruction *Release,
// we can set the tail flag once we know it's safe.
StoreStrongCalls.insert(StoreStrong);
+ DEBUG(llvm::dbgs() << " New Store Strong: " << *StoreStrong << "\n");
+
if (&*Iter == Store) ++Iter;
Store->eraseFromParent();
Release->eraseFromParent();
@@ -292,85 +401,34 @@ void ObjCARCContract::ContractRelease(Instruction *Release,
Load->eraseFromParent();
}
-bool ObjCARCContract::doInitialization(Module &M) {
- // If nothing in the Module uses ARC, don't do anything.
- Run = ModuleHasARC(M);
- if (!Run)
- return false;
-
- EP.Initialize(&M);
-
- // Initialize RetainRVMarker.
- RetainRVMarker = nullptr;
- if (NamedMDNode *NMD =
- M.getNamedMetadata("clang.arc.retainAutoreleasedReturnValueMarker"))
- if (NMD->getNumOperands() == 1) {
- const MDNode *N = NMD->getOperand(0);
- if (N->getNumOperands() == 1)
- if (const MDString *S = dyn_cast<MDString>(N->getOperand(0)))
- RetainRVMarker = S;
- }
-
- return false;
-}
-
-bool ObjCARCContract::runOnFunction(Function &F) {
- if (!EnableARCOpts)
- return false;
-
- // If nothing in the Module uses ARC, don't do anything.
- if (!Run)
- return false;
-
- Changed = false;
- AA = &getAnalysis<AliasAnalysis>();
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-
- PA.setAA(&getAnalysis<AliasAnalysis>());
-
- // Track whether it's ok to mark objc_storeStrong calls with the "tail"
- // keyword. Be conservative if the function has variadic arguments.
- // It seems that functions which "return twice" are also unsafe for the
- // "tail" argument, because they are setjmp, which could need to
- // return to an earlier stack state.
- bool TailOkForStoreStrongs = !F.isVarArg() &&
- !F.callsFunctionThatReturnsTwice();
-
- // For ObjC library calls which return their argument, replace uses of the
- // argument with uses of the call return value, if it dominates the use. This
- // reduces register pressure.
- SmallPtrSet<Instruction *, 4> DependingInstructions;
- SmallPtrSet<const BasicBlock *, 4> Visited;
- for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
- Instruction *Inst = &*I++;
-
- DEBUG(dbgs() << "ObjCARCContract: Visiting: " << *Inst << "\n");
-
+bool ObjCARCContract::tryToPeepholeInstruction(
+ Function &F, Instruction *Inst, inst_iterator &Iter,
+ SmallPtrSetImpl<Instruction *> &DependingInsts,
+ SmallPtrSetImpl<const BasicBlock *> &Visited,
+ bool &TailOkForStoreStrongs) {
// Only these library routines return their argument. In particular,
// objc_retainBlock does not necessarily return its argument.
- InstructionClass Class = GetBasicInstructionClass(Inst);
+ ARCInstKind Class = GetBasicARCInstKind(Inst);
switch (Class) {
- case IC_FusedRetainAutorelease:
- case IC_FusedRetainAutoreleaseRV:
- break;
- case IC_Autorelease:
- case IC_AutoreleaseRV:
- if (ContractAutorelease(F, Inst, Class, DependingInstructions, Visited))
- continue;
- break;
- case IC_Retain:
+ case ARCInstKind::FusedRetainAutorelease:
+ case ARCInstKind::FusedRetainAutoreleaseRV:
+ return false;
+ case ARCInstKind::Autorelease:
+ case ARCInstKind::AutoreleaseRV:
+ return contractAutorelease(F, Inst, Class, DependingInsts, Visited);
+ case ARCInstKind::Retain:
// Attempt to convert retains to retainrvs if they are next to function
// calls.
- if (!OptimizeRetainCall(F, Inst))
- break;
+ if (!optimizeRetainCall(F, Inst))
+ return false;
// If we succeed in our optimization, fall through.
// FALLTHROUGH
- case IC_RetainRV: {
+ case ARCInstKind::RetainRV: {
// If we're compiling for a target which needs a special inline-asm
// marker to do the retainAutoreleasedReturnValue optimization,
// insert it now.
if (!RetainRVMarker)
- break;
+ return false;
BasicBlock::iterator BBI = Inst;
BasicBlock *InstParent = Inst->getParent();
@@ -388,8 +446,8 @@ bool ObjCARCContract::runOnFunction(Function &F) {
--BBI;
} while (IsNoopInstruction(BBI));
- if (&*BBI == GetObjCArg(Inst)) {
- DEBUG(dbgs() << "ObjCARCContract: Adding inline asm marker for "
+ if (&*BBI == GetArgRCIdentityRoot(Inst)) {
+ DEBUG(dbgs() << "Adding inline asm marker for "
"retainAutoreleasedReturnValue optimization.\n");
Changed = true;
InlineAsm *IA =
@@ -400,9 +458,9 @@ bool ObjCARCContract::runOnFunction(Function &F) {
CallInst::Create(IA, "", Inst);
}
decline_rv_optimization:
- break;
+ return false;
}
- case IC_InitWeak: {
+ case ARCInstKind::InitWeak: {
// objc_initWeak(p, null) => *p = null
CallInst *CI = cast<CallInst>(Inst);
if (IsNullOrUndef(CI->getArgOperand(1))) {
@@ -417,31 +475,80 @@ bool ObjCARCContract::runOnFunction(Function &F) {
CI->replaceAllUsesWith(Null);
CI->eraseFromParent();
}
- continue;
+ return true;
}
- case IC_Release:
- ContractRelease(Inst, I);
- continue;
- case IC_User:
+ case ARCInstKind::Release:
+ // Try to form an objc store strong from our release. If we fail, there is
+ // nothing further to do below, so continue.
+ tryToContractReleaseIntoStoreStrong(Inst, Iter);
+ return true;
+ case ARCInstKind::User:
// Be conservative if the function has any alloca instructions.
// Technically we only care about escaping alloca instructions,
// but this is sufficient to handle some interesting cases.
if (isa<AllocaInst>(Inst))
TailOkForStoreStrongs = false;
- continue;
- case IC_IntrinsicUser:
+ return true;
+ case ARCInstKind::IntrinsicUser:
// Remove calls to @clang.arc.use(...).
Inst->eraseFromParent();
- continue;
+ return true;
default:
- continue;
+ return true;
}
+}
+
+//===----------------------------------------------------------------------===//
+// Top Level Driver
+//===----------------------------------------------------------------------===//
+
+bool ObjCARCContract::runOnFunction(Function &F) {
+ if (!EnableARCOpts)
+ return false;
+
+ // If nothing in the Module uses ARC, don't do anything.
+ if (!Run)
+ return false;
+
+ Changed = false;
+ AA = &getAnalysis<AliasAnalysis>();
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+
+ PA.setAA(&getAnalysis<AliasAnalysis>());
+
+ DEBUG(llvm::dbgs() << "**** ObjCARC Contract ****\n");
+
+ // Track whether it's ok to mark objc_storeStrong calls with the "tail"
+ // keyword. Be conservative if the function has variadic arguments.
+ // It seems that functions which "return twice" are also unsafe for the
+ // "tail" argument, because they are setjmp, which could need to
+ // return to an earlier stack state.
+ bool TailOkForStoreStrongs =
+ !F.isVarArg() && !F.callsFunctionThatReturnsTwice();
+
+ // For ObjC library calls which return their argument, replace uses of the
+ // argument with uses of the call return value, if it dominates the use. This
+ // reduces register pressure.
+ SmallPtrSet<Instruction *, 4> DependingInstructions;
+ SmallPtrSet<const BasicBlock *, 4> Visited;
+ for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E;) {
+ Instruction *Inst = &*I++;
+
+ DEBUG(dbgs() << "Visiting: " << *Inst << "\n");
+
+ // First try to peephole Inst. If there is nothing further we can do in
+ // terms of undoing objc-arc-expand, process the next inst.
+ if (tryToPeepholeInstruction(F, Inst, I, DependingInstructions, Visited,
+ TailOkForStoreStrongs))
+ continue;
- DEBUG(dbgs() << "ObjCARCContract: Finished List.\n\n");
+ // Otherwise, try to undo objc-arc-expand.
- // Don't use GetObjCArg because we don't want to look through bitcasts
+ // Don't use GetArgRCIdentityRoot because we don't want to look through bitcasts
// and such; to do the replacement, the argument must have type i8*.
Value *Arg = cast<CallInst>(Inst)->getArgOperand(0);
+
+ // TODO: Change this to a do-while.
for (;;) {
// If we're compiling bugpointed code, don't get in trouble.
if (!isa<Instruction>(Arg) && !isa<Argument>(Arg))
@@ -458,7 +565,7 @@ bool ObjCARCContract::runOnFunction(Function &F) {
// reachability here because an unreachable call is considered to
// trivially dominate itself, which would lead us to rewriting its
// argument in terms of its return value, which would lead to
- // infinite loops in GetObjCArg.
+ // infinite loops in GetArgRCIdentityRoot.
if (DT->isReachableFromEntry(U) && DT->dominates(Inst, U)) {
Changed = true;
Instruction *Replacement = Inst;
@@ -514,3 +621,45 @@ bool ObjCARCContract::runOnFunction(Function &F) {
return Changed;
}
+
+//===----------------------------------------------------------------------===//
+// Misc Pass Manager
+//===----------------------------------------------------------------------===//
+
+char ObjCARCContract::ID = 0;
+INITIALIZE_PASS_BEGIN(ObjCARCContract, "objc-arc-contract",
+ "ObjC ARC contraction", false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(ObjCARCContract, "objc-arc-contract",
+ "ObjC ARC contraction", false, false)
+
+void ObjCARCContract::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.setPreservesCFG();
+}
+
+Pass *llvm::createObjCARCContractPass() { return new ObjCARCContract(); }
+
+bool ObjCARCContract::doInitialization(Module &M) {
+ // If nothing in the Module uses ARC, don't do anything.
+ Run = ModuleHasARC(M);
+ if (!Run)
+ return false;
+
+ EP.init(&M);
+
+ // Initialize RetainRVMarker.
+ RetainRVMarker = nullptr;
+ if (NamedMDNode *NMD =
+ M.getNamedMetadata("clang.arc.retainAutoreleasedReturnValueMarker"))
+ if (NMD->getNumOperands() == 1) {
+ const MDNode *N = NMD->getOperand(0);
+ if (N->getNumOperands() == 1)
+ if (const MDString *S = dyn_cast<MDString>(N->getOperand(0)))
+ RetainRVMarker = S;
+ }
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCExpand.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCExpand.cpp
index bf9fcbb..53c19c3 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCExpand.cpp
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCExpand.cpp
@@ -99,13 +99,13 @@ bool ObjCARCExpand::runOnFunction(Function &F) {
DEBUG(dbgs() << "ObjCARCExpand: Visiting: " << *Inst << "\n");
- switch (GetBasicInstructionClass(Inst)) {
- case IC_Retain:
- case IC_RetainRV:
- case IC_Autorelease:
- case IC_AutoreleaseRV:
- case IC_FusedRetainAutorelease:
- case IC_FusedRetainAutoreleaseRV: {
+ switch (GetBasicARCInstKind(Inst)) {
+ case ARCInstKind::Retain:
+ case ARCInstKind::RetainRV:
+ case ARCInstKind::Autorelease:
+ case ARCInstKind::AutoreleaseRV:
+ case ARCInstKind::FusedRetainAutorelease:
+ case ARCInstKind::FusedRetainAutoreleaseRV: {
// These calls return their argument verbatim, as a low-level
// optimization. However, this makes high-level optimizations
// harder. Undo any uses of this optimization that the front-end
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index 76932e6..dca3f1b 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -26,9 +26,11 @@
#include "ObjCARC.h"
#include "ARCRuntimeEntryPoints.h"
+#include "BlotMapVector.h"
#include "DependencyAnalysis.h"
#include "ObjCARCAliasAnalysis.h"
#include "ProvenanceAnalysis.h"
+#include "PtrState.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
@@ -45,106 +47,10 @@ using namespace llvm::objcarc;
#define DEBUG_TYPE "objc-arc-opts"
-/// \defgroup MiscUtils Miscellaneous utilities that are not ARC specific.
-/// @{
-
-namespace {
- /// \brief An associative container with fast insertion-order (deterministic)
- /// iteration over its elements. Plus the special blot operation.
- template<class KeyT, class ValueT>
- class MapVector {
- /// Map keys to indices in Vector.
- typedef DenseMap<KeyT, size_t> MapTy;
- MapTy Map;
-
- typedef std::vector<std::pair<KeyT, ValueT> > VectorTy;
- /// Keys and values.
- VectorTy Vector;
-
- public:
- typedef typename VectorTy::iterator iterator;
- typedef typename VectorTy::const_iterator const_iterator;
- iterator begin() { return Vector.begin(); }
- iterator end() { return Vector.end(); }
- const_iterator begin() const { return Vector.begin(); }
- const_iterator end() const { return Vector.end(); }
-
-#ifdef XDEBUG
- ~MapVector() {
- assert(Vector.size() >= Map.size()); // May differ due to blotting.
- for (typename MapTy::const_iterator I = Map.begin(), E = Map.end();
- I != E; ++I) {
- assert(I->second < Vector.size());
- assert(Vector[I->second].first == I->first);
- }
- for (typename VectorTy::const_iterator I = Vector.begin(),
- E = Vector.end(); I != E; ++I)
- assert(!I->first ||
- (Map.count(I->first) &&
- Map[I->first] == size_t(I - Vector.begin())));
- }
-#endif
-
- ValueT &operator[](const KeyT &Arg) {
- std::pair<typename MapTy::iterator, bool> Pair =
- Map.insert(std::make_pair(Arg, size_t(0)));
- if (Pair.second) {
- size_t Num = Vector.size();
- Pair.first->second = Num;
- Vector.push_back(std::make_pair(Arg, ValueT()));
- return Vector[Num].second;
- }
- return Vector[Pair.first->second].second;
- }
-
- std::pair<iterator, bool>
- insert(const std::pair<KeyT, ValueT> &InsertPair) {
- std::pair<typename MapTy::iterator, bool> Pair =
- Map.insert(std::make_pair(InsertPair.first, size_t(0)));
- if (Pair.second) {
- size_t Num = Vector.size();
- Pair.first->second = Num;
- Vector.push_back(InsertPair);
- return std::make_pair(Vector.begin() + Num, true);
- }
- return std::make_pair(Vector.begin() + Pair.first->second, false);
- }
-
- iterator find(const KeyT &Key) {
- typename MapTy::iterator It = Map.find(Key);
- if (It == Map.end()) return Vector.end();
- return Vector.begin() + It->second;
- }
-
- const_iterator find(const KeyT &Key) const {
- typename MapTy::const_iterator It = Map.find(Key);
- if (It == Map.end()) return Vector.end();
- return Vector.begin() + It->second;
- }
-
- /// This is similar to erase, but instead of removing the element from the
- /// vector, it just zeros out the key in the vector. This leaves iterators
- /// intact, but clients must be prepared for zeroed-out keys when iterating.
- void blot(const KeyT &Key) {
- typename MapTy::iterator It = Map.find(Key);
- if (It == Map.end()) return;
- Vector[It->second].first = KeyT();
- Map.erase(It);
- }
-
- void clear() {
- Map.clear();
- Vector.clear();
- }
- };
-}
-
-/// @}
-///
/// \defgroup ARCUtilities Utility declarations/definitions specific to ARC.
/// @{
-/// \brief This is similar to StripPointerCastsAndObjCCalls but it stops as soon
+/// \brief This is similar to GetRCIdentityRoot but it stops as soon
/// as it finds a value with multiple uses.
static const Value *FindSingleUseIdentifiedObject(const Value *Arg) {
if (Arg->hasOneUse()) {
@@ -153,7 +59,7 @@ static const Value *FindSingleUseIdentifiedObject(const Value *Arg) {
if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Arg))
if (GEP->hasAllZeroIndices())
return FindSingleUseIdentifiedObject(GEP->getPointerOperand());
- if (IsForwarding(GetBasicInstructionClass(Arg)))
+ if (IsForwarding(GetBasicARCInstKind(Arg)))
return FindSingleUseIdentifiedObject(
cast<CallInst>(Arg)->getArgOperand(0));
if (!IsObjCIdentifiedObject(Arg))
@@ -165,7 +71,7 @@ static const Value *FindSingleUseIdentifiedObject(const Value *Arg) {
// trivial uses, we can still consider this to be a single-use value.
if (IsObjCIdentifiedObject(Arg)) {
for (const User *U : Arg->users())
- if (!U->use_empty() || StripPointerCastsAndObjCCalls(U) != Arg)
+ if (!U->use_empty() || GetRCIdentityRoot(U) != Arg)
return nullptr;
return Arg;
@@ -177,13 +83,14 @@ static const Value *FindSingleUseIdentifiedObject(const Value *Arg) {
/// This is a wrapper around getUnderlyingObjCPtr along the lines of
/// GetUnderlyingObjects except that it returns early when it sees the first
/// alloca.
-static inline bool AreAnyUnderlyingObjectsAnAlloca(const Value *V) {
+static inline bool AreAnyUnderlyingObjectsAnAlloca(const Value *V,
+ const DataLayout &DL) {
SmallPtrSet<const Value *, 4> Visited;
SmallVector<const Value *, 4> Worklist;
Worklist.push_back(V);
do {
const Value *P = Worklist.pop_back_val();
- P = GetUnderlyingObjCPtr(P);
+ P = GetUnderlyingObjCPtr(P, DL);
if (isa<AllocaInst>(P))
return true;
@@ -198,8 +105,8 @@ static inline bool AreAnyUnderlyingObjectsAnAlloca(const Value *V) {
}
if (const PHINode *PN = dyn_cast<const PHINode>(P)) {
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- Worklist.push_back(PN->getIncomingValue(i));
+ for (Value *IncValue : PN->incoming_values())
+ Worklist.push_back(IncValue);
continue;
}
} while (!Worklist.empty());
@@ -270,293 +177,6 @@ STATISTIC(NumReleasesAfterOpt,
#endif
namespace {
- /// \enum Sequence
- ///
- /// \brief A sequence of states that a pointer may go through in which an
- /// objc_retain and objc_release are actually needed.
- enum Sequence {
- S_None,
- S_Retain, ///< objc_retain(x).
- S_CanRelease, ///< foo(x) -- x could possibly see a ref count decrement.
- S_Use, ///< any use of x.
- S_Stop, ///< like S_Release, but code motion is stopped.
- S_Release, ///< objc_release(x).
- S_MovableRelease ///< objc_release(x), !clang.imprecise_release.
- };
-
- raw_ostream &operator<<(raw_ostream &OS, const Sequence S)
- LLVM_ATTRIBUTE_UNUSED;
- raw_ostream &operator<<(raw_ostream &OS, const Sequence S) {
- switch (S) {
- case S_None:
- return OS << "S_None";
- case S_Retain:
- return OS << "S_Retain";
- case S_CanRelease:
- return OS << "S_CanRelease";
- case S_Use:
- return OS << "S_Use";
- case S_Release:
- return OS << "S_Release";
- case S_MovableRelease:
- return OS << "S_MovableRelease";
- case S_Stop:
- return OS << "S_Stop";
- }
- llvm_unreachable("Unknown sequence type.");
- }
-}
-
-static Sequence MergeSeqs(Sequence A, Sequence B, bool TopDown) {
- // The easy cases.
- if (A == B)
- return A;
- if (A == S_None || B == S_None)
- return S_None;
-
- if (A > B) std::swap(A, B);
- if (TopDown) {
- // Choose the side which is further along in the sequence.
- if ((A == S_Retain || A == S_CanRelease) &&
- (B == S_CanRelease || B == S_Use))
- return B;
- } else {
- // Choose the side which is further along in the sequence.
- if ((A == S_Use || A == S_CanRelease) &&
- (B == S_Use || B == S_Release || B == S_Stop || B == S_MovableRelease))
- return A;
- // If both sides are releases, choose the more conservative one.
- if (A == S_Stop && (B == S_Release || B == S_MovableRelease))
- return A;
- if (A == S_Release && B == S_MovableRelease)
- return A;
- }
-
- return S_None;
-}
-
-namespace {
- /// \brief Unidirectional information about either a
- /// retain-decrement-use-release sequence or release-use-decrement-retain
- /// reverse sequence.
- struct RRInfo {
- /// After an objc_retain, the reference count of the referenced
- /// object is known to be positive. Similarly, before an objc_release, the
- /// reference count of the referenced object is known to be positive. If
- /// there are retain-release pairs in code regions where the retain count
- /// is known to be positive, they can be eliminated, regardless of any side
- /// effects between them.
- ///
- /// Also, a retain+release pair nested within another retain+release
- /// pair all on the known same pointer value can be eliminated, regardless
- /// of any intervening side effects.
- ///
- /// KnownSafe is true when either of these conditions is satisfied.
- bool KnownSafe;
-
- /// True of the objc_release calls are all marked with the "tail" keyword.
- bool IsTailCallRelease;
-
- /// If the Calls are objc_release calls and they all have a
- /// clang.imprecise_release tag, this is the metadata tag.
- MDNode *ReleaseMetadata;
-
- /// For a top-down sequence, the set of objc_retains or
- /// objc_retainBlocks. For bottom-up, the set of objc_releases.
- SmallPtrSet<Instruction *, 2> Calls;
-
- /// The set of optimal insert positions for moving calls in the opposite
- /// sequence.
- SmallPtrSet<Instruction *, 2> ReverseInsertPts;
-
- /// If this is true, we cannot perform code motion but can still remove
- /// retain/release pairs.
- bool CFGHazardAfflicted;
-
- RRInfo() :
- KnownSafe(false), IsTailCallRelease(false), ReleaseMetadata(nullptr),
- CFGHazardAfflicted(false) {}
-
- void clear();
-
- /// Conservatively merge the two RRInfo. Returns true if a partial merge has
- /// occurred, false otherwise.
- bool Merge(const RRInfo &Other);
-
- };
-}
-
-void RRInfo::clear() {
- KnownSafe = false;
- IsTailCallRelease = false;
- ReleaseMetadata = nullptr;
- Calls.clear();
- ReverseInsertPts.clear();
- CFGHazardAfflicted = false;
-}
-
-bool RRInfo::Merge(const RRInfo &Other) {
- // Conservatively merge the ReleaseMetadata information.
- if (ReleaseMetadata != Other.ReleaseMetadata)
- ReleaseMetadata = nullptr;
-
- // Conservatively merge the boolean state.
- KnownSafe &= Other.KnownSafe;
- IsTailCallRelease &= Other.IsTailCallRelease;
- CFGHazardAfflicted |= Other.CFGHazardAfflicted;
-
- // Merge the call sets.
- Calls.insert(Other.Calls.begin(), Other.Calls.end());
-
- // Merge the insert point sets. If there are any differences,
- // that makes this a partial merge.
- bool Partial = ReverseInsertPts.size() != Other.ReverseInsertPts.size();
- for (Instruction *Inst : Other.ReverseInsertPts)
- Partial |= ReverseInsertPts.insert(Inst).second;
- return Partial;
-}
-
-namespace {
- /// \brief This class summarizes several per-pointer runtime properties which
- /// are propogated through the flow graph.
- class PtrState {
- /// True if the reference count is known to be incremented.
- bool KnownPositiveRefCount;
-
- /// True if we've seen an opportunity for partial RR elimination, such as
- /// pushing calls into a CFG triangle or into one side of a CFG diamond.
- bool Partial;
-
- /// The current position in the sequence.
- unsigned char Seq : 8;
-
- /// Unidirectional information about the current sequence.
- RRInfo RRI;
-
- public:
- PtrState() : KnownPositiveRefCount(false), Partial(false),
- Seq(S_None) {}
-
-
- bool IsKnownSafe() const {
- return RRI.KnownSafe;
- }
-
- void SetKnownSafe(const bool NewValue) {
- RRI.KnownSafe = NewValue;
- }
-
- bool IsTailCallRelease() const {
- return RRI.IsTailCallRelease;
- }
-
- void SetTailCallRelease(const bool NewValue) {
- RRI.IsTailCallRelease = NewValue;
- }
-
- bool IsTrackingImpreciseReleases() const {
- return RRI.ReleaseMetadata != nullptr;
- }
-
- const MDNode *GetReleaseMetadata() const {
- return RRI.ReleaseMetadata;
- }
-
- void SetReleaseMetadata(MDNode *NewValue) {
- RRI.ReleaseMetadata = NewValue;
- }
-
- bool IsCFGHazardAfflicted() const {
- return RRI.CFGHazardAfflicted;
- }
-
- void SetCFGHazardAfflicted(const bool NewValue) {
- RRI.CFGHazardAfflicted = NewValue;
- }
-
- void SetKnownPositiveRefCount() {
- DEBUG(dbgs() << "Setting Known Positive.\n");
- KnownPositiveRefCount = true;
- }
-
- void ClearKnownPositiveRefCount() {
- DEBUG(dbgs() << "Clearing Known Positive.\n");
- KnownPositiveRefCount = false;
- }
-
- bool HasKnownPositiveRefCount() const {
- return KnownPositiveRefCount;
- }
-
- void SetSeq(Sequence NewSeq) {
- DEBUG(dbgs() << "Old: " << Seq << "; New: " << NewSeq << "\n");
- Seq = NewSeq;
- }
-
- Sequence GetSeq() const {
- return static_cast<Sequence>(Seq);
- }
-
- void ClearSequenceProgress() {
- ResetSequenceProgress(S_None);
- }
-
- void ResetSequenceProgress(Sequence NewSeq) {
- DEBUG(dbgs() << "Resetting sequence progress.\n");
- SetSeq(NewSeq);
- Partial = false;
- RRI.clear();
- }
-
- void Merge(const PtrState &Other, bool TopDown);
-
- void InsertCall(Instruction *I) {
- RRI.Calls.insert(I);
- }
-
- void InsertReverseInsertPt(Instruction *I) {
- RRI.ReverseInsertPts.insert(I);
- }
-
- void ClearReverseInsertPts() {
- RRI.ReverseInsertPts.clear();
- }
-
- bool HasReverseInsertPts() const {
- return !RRI.ReverseInsertPts.empty();
- }
-
- const RRInfo &GetRRInfo() const {
- return RRI;
- }
- };
-}
-
-void
-PtrState::Merge(const PtrState &Other, bool TopDown) {
- Seq = MergeSeqs(GetSeq(), Other.GetSeq(), TopDown);
- KnownPositiveRefCount &= Other.KnownPositiveRefCount;
-
- // If we're not in a sequence (anymore), drop all associated state.
- if (Seq == S_None) {
- Partial = false;
- RRI.clear();
- } else if (Partial || Other.Partial) {
- // If we're doing a merge on a path that's previously seen a partial
- // merge, conservatively drop the sequence, to avoid doing partial
- // RR elimination. If the branch predicates for the two merge differ,
- // mixing them is unsafe.
- ClearSequenceProgress();
- } else {
- // Otherwise merge the other PtrState's RRInfo into our RRInfo. At this
- // point, we know that currently we are not partial. Stash whether or not
- // the merge operation caused us to undergo a partial merging of reverse
- // insertion points.
- Partial = RRI.Merge(Other.RRI);
- }
-}
-
-namespace {
/// \brief Per-BasicBlock state.
class BBState {
/// The number of unique control paths from the entry which can reach this
@@ -566,20 +186,18 @@ namespace {
/// The number of unique control paths to exits from this block.
unsigned BottomUpPathCount;
- /// A type for PerPtrTopDown and PerPtrBottomUp.
- typedef MapVector<const Value *, PtrState> MapTy;
-
/// The top-down traversal uses this to record information known about a
/// pointer at the bottom of each block.
- MapTy PerPtrTopDown;
+ BlotMapVector<const Value *, TopDownPtrState> PerPtrTopDown;
/// The bottom-up traversal uses this to record information known about a
/// pointer at the top of each block.
- MapTy PerPtrBottomUp;
+ BlotMapVector<const Value *, BottomUpPtrState> PerPtrBottomUp;
/// Effective predecessors of the current block ignoring ignorable edges and
/// ignored backedges.
SmallVector<BasicBlock *, 2> Preds;
+
/// Effective successors of the current block ignoring ignorable edges and
/// ignored backedges.
SmallVector<BasicBlock *, 2> Succs;
@@ -589,26 +207,38 @@ namespace {
BBState() : TopDownPathCount(0), BottomUpPathCount(0) { }
- typedef MapTy::iterator ptr_iterator;
- typedef MapTy::const_iterator ptr_const_iterator;
+ typedef decltype(PerPtrTopDown)::iterator top_down_ptr_iterator;
+ typedef decltype(PerPtrTopDown)::const_iterator const_top_down_ptr_iterator;
- ptr_iterator top_down_ptr_begin() { return PerPtrTopDown.begin(); }
- ptr_iterator top_down_ptr_end() { return PerPtrTopDown.end(); }
- ptr_const_iterator top_down_ptr_begin() const {
+ top_down_ptr_iterator top_down_ptr_begin() { return PerPtrTopDown.begin(); }
+ top_down_ptr_iterator top_down_ptr_end() { return PerPtrTopDown.end(); }
+ const_top_down_ptr_iterator top_down_ptr_begin() const {
return PerPtrTopDown.begin();
}
- ptr_const_iterator top_down_ptr_end() const {
+ const_top_down_ptr_iterator top_down_ptr_end() const {
return PerPtrTopDown.end();
}
+ bool hasTopDownPtrs() const {
+ return !PerPtrTopDown.empty();
+ }
- ptr_iterator bottom_up_ptr_begin() { return PerPtrBottomUp.begin(); }
- ptr_iterator bottom_up_ptr_end() { return PerPtrBottomUp.end(); }
- ptr_const_iterator bottom_up_ptr_begin() const {
+ typedef decltype(PerPtrBottomUp)::iterator bottom_up_ptr_iterator;
+ typedef decltype(
+ PerPtrBottomUp)::const_iterator const_bottom_up_ptr_iterator;
+
+ bottom_up_ptr_iterator bottom_up_ptr_begin() {
+ return PerPtrBottomUp.begin();
+ }
+ bottom_up_ptr_iterator bottom_up_ptr_end() { return PerPtrBottomUp.end(); }
+ const_bottom_up_ptr_iterator bottom_up_ptr_begin() const {
return PerPtrBottomUp.begin();
}
- ptr_const_iterator bottom_up_ptr_end() const {
+ const_bottom_up_ptr_iterator bottom_up_ptr_end() const {
return PerPtrBottomUp.end();
}
+ bool hasBottomUpPtrs() const {
+ return !PerPtrBottomUp.empty();
+ }
/// Mark this block as being an entry block, which has one path from the
/// entry by definition.
@@ -621,20 +251,20 @@ namespace {
/// Attempt to find the PtrState object describing the top down state for
/// pointer Arg. Return a new initialized PtrState describing the top down
/// state for Arg if we do not find one.
- PtrState &getPtrTopDownState(const Value *Arg) {
+ TopDownPtrState &getPtrTopDownState(const Value *Arg) {
return PerPtrTopDown[Arg];
}
/// Attempt to find the PtrState object describing the bottom up state for
/// pointer Arg. Return a new initialized PtrState describing the bottom up
/// state for Arg if we do not find one.
- PtrState &getPtrBottomUpState(const Value *Arg) {
+ BottomUpPtrState &getPtrBottomUpState(const Value *Arg) {
return PerPtrBottomUp[Arg];
}
/// Attempt to find the PtrState object describing the bottom up state for
/// pointer Arg.
- ptr_iterator findPtrBottomUpState(const Value *Arg) {
+ bottom_up_ptr_iterator findPtrBottomUpState(const Value *Arg) {
return PerPtrBottomUp.find(Arg);
}
@@ -685,6 +315,11 @@ namespace {
const unsigned BBState::OverflowOccurredValue = 0xffffffff;
}
+namespace llvm {
+raw_ostream &operator<<(raw_ostream &OS,
+ BBState &BBState) LLVM_ATTRIBUTE_UNUSED;
+}
+
void BBState::InitFromPred(const BBState &Other) {
PerPtrTopDown = Other.PerPtrTopDown;
TopDownPathCount = Other.TopDownPathCount;
@@ -724,19 +359,18 @@ void BBState::MergePred(const BBState &Other) {
// For each entry in the other set, if our set has an entry with the same key,
// merge the entries. Otherwise, copy the entry and merge it with an empty
// entry.
- for (ptr_const_iterator MI = Other.top_down_ptr_begin(),
- ME = Other.top_down_ptr_end(); MI != ME; ++MI) {
- std::pair<ptr_iterator, bool> Pair = PerPtrTopDown.insert(*MI);
- Pair.first->second.Merge(Pair.second ? PtrState() : MI->second,
+ for (auto MI = Other.top_down_ptr_begin(), ME = Other.top_down_ptr_end();
+ MI != ME; ++MI) {
+ auto Pair = PerPtrTopDown.insert(*MI);
+ Pair.first->second.Merge(Pair.second ? TopDownPtrState() : MI->second,
/*TopDown=*/true);
}
// For each entry in our set, if the other set doesn't have an entry with the
// same key, force it to merge with an empty entry.
- for (ptr_iterator MI = top_down_ptr_begin(),
- ME = top_down_ptr_end(); MI != ME; ++MI)
+ for (auto MI = top_down_ptr_begin(), ME = top_down_ptr_end(); MI != ME; ++MI)
if (Other.PerPtrTopDown.find(MI->first) == Other.PerPtrTopDown.end())
- MI->second.Merge(PtrState(), /*TopDown=*/true);
+ MI->second.Merge(TopDownPtrState(), /*TopDown=*/true);
}
/// The bottom-up traversal uses this to merge information about successors to
@@ -768,304 +402,80 @@ void BBState::MergeSucc(const BBState &Other) {
// For each entry in the other set, if our set has an entry with the
// same key, merge the entries. Otherwise, copy the entry and merge
// it with an empty entry.
- for (ptr_const_iterator MI = Other.bottom_up_ptr_begin(),
- ME = Other.bottom_up_ptr_end(); MI != ME; ++MI) {
- std::pair<ptr_iterator, bool> Pair = PerPtrBottomUp.insert(*MI);
- Pair.first->second.Merge(Pair.second ? PtrState() : MI->second,
+ for (auto MI = Other.bottom_up_ptr_begin(), ME = Other.bottom_up_ptr_end();
+ MI != ME; ++MI) {
+ auto Pair = PerPtrBottomUp.insert(*MI);
+ Pair.first->second.Merge(Pair.second ? BottomUpPtrState() : MI->second,
/*TopDown=*/false);
}
// For each entry in our set, if the other set doesn't have an entry
// with the same key, force it to merge with an empty entry.
- for (ptr_iterator MI = bottom_up_ptr_begin(),
- ME = bottom_up_ptr_end(); MI != ME; ++MI)
+ for (auto MI = bottom_up_ptr_begin(), ME = bottom_up_ptr_end(); MI != ME;
+ ++MI)
if (Other.PerPtrBottomUp.find(MI->first) == Other.PerPtrBottomUp.end())
- MI->second.Merge(PtrState(), /*TopDown=*/false);
+ MI->second.Merge(BottomUpPtrState(), /*TopDown=*/false);
}
-// Only enable ARC Annotations if we are building a debug version of
-// libObjCARCOpts.
-#ifndef NDEBUG
-#define ARC_ANNOTATIONS
-#endif
-
-// Define some macros along the lines of DEBUG and some helper functions to make
-// it cleaner to create annotations in the source code and to no-op when not
-// building in debug mode.
-#ifdef ARC_ANNOTATIONS
-
-#include "llvm/Support/CommandLine.h"
-
-/// Enable/disable ARC sequence annotations.
-static cl::opt<bool>
-EnableARCAnnotations("enable-objc-arc-annotations", cl::init(false),
- cl::desc("Enable emission of arc data flow analysis "
- "annotations"));
-static cl::opt<bool>
-DisableCheckForCFGHazards("disable-objc-arc-checkforcfghazards", cl::init(false),
- cl::desc("Disable check for cfg hazards when "
- "annotating"));
-static cl::opt<std::string>
-ARCAnnotationTargetIdentifier("objc-arc-annotation-target-identifier",
- cl::init(""),
- cl::desc("filter out all data flow annotations "
- "but those that apply to the given "
- "target llvm identifier."));
-
-/// This function appends a unique ARCAnnotationProvenanceSourceMDKind id to an
-/// instruction so that we can track backwards when post processing via the llvm
-/// arc annotation processor tool. If the function is an
-static MDString *AppendMDNodeToSourcePtr(unsigned NodeId,
- Value *Ptr) {
- MDString *Hash = nullptr;
-
- // If pointer is a result of an instruction and it does not have a source
- // MDNode it, attach a new MDNode onto it. If pointer is a result of
- // an instruction and does have a source MDNode attached to it, return a
- // reference to said Node. Otherwise just return 0.
- if (Instruction *Inst = dyn_cast<Instruction>(Ptr)) {
- MDNode *Node;
- if (!(Node = Inst->getMetadata(NodeId))) {
- // We do not have any node. Generate and attatch the hash MDString to the
- // instruction.
-
- // We just use an MDString to ensure that this metadata gets written out
- // of line at the module level and to provide a very simple format
- // encoding the information herein. Both of these makes it simpler to
- // parse the annotations by a simple external program.
- std::string Str;
- raw_string_ostream os(Str);
- os << "(" << Inst->getParent()->getParent()->getName() << ",%"
- << Inst->getName() << ")";
-
- Hash = MDString::get(Inst->getContext(), os.str());
- Inst->setMetadata(NodeId, MDNode::get(Inst->getContext(),Hash));
- } else {
- // We have a node. Grab its hash and return it.
- assert(Node->getNumOperands() == 1 &&
- "An ARCAnnotationProvenanceSourceMDKind can only have 1 operand.");
- Hash = cast<MDString>(Node->getOperand(0));
+raw_ostream &llvm::operator<<(raw_ostream &OS, BBState &BBInfo) {
+ // Dump the pointers we are tracking.
+ OS << " TopDown State:\n";
+ if (!BBInfo.hasTopDownPtrs()) {
+ DEBUG(llvm::dbgs() << " NONE!\n");
+ } else {
+ for (auto I = BBInfo.top_down_ptr_begin(), E = BBInfo.top_down_ptr_end();
+ I != E; ++I) {
+ const PtrState &P = I->second;
+ OS << " Ptr: " << *I->first
+ << "\n KnownSafe: " << (P.IsKnownSafe()?"true":"false")
+ << "\n ImpreciseRelease: "
+ << (P.IsTrackingImpreciseReleases()?"true":"false") << "\n"
+ << " HasCFGHazards: "
+ << (P.IsCFGHazardAfflicted()?"true":"false") << "\n"
+ << " KnownPositive: "
+ << (P.HasKnownPositiveRefCount()?"true":"false") << "\n"
+ << " Seq: "
+ << P.GetSeq() << "\n";
}
- } else if (Argument *Arg = dyn_cast<Argument>(Ptr)) {
- std::string str;
- raw_string_ostream os(str);
- os << "(" << Arg->getParent()->getName() << ",%" << Arg->getName()
- << ")";
- Hash = MDString::get(Arg->getContext(), os.str());
}
- return Hash;
-}
-
-static std::string SequenceToString(Sequence A) {
- std::string str;
- raw_string_ostream os(str);
- os << A;
- return os.str();
-}
-
-/// Helper function to change a Sequence into a String object using our overload
-/// for raw_ostream so we only have printing code in one location.
-static MDString *SequenceToMDString(LLVMContext &Context,
- Sequence A) {
- return MDString::get(Context, SequenceToString(A));
-}
-
-/// A simple function to generate a MDNode which describes the change in state
-/// for Value *Ptr caused by Instruction *Inst.
-static void AppendMDNodeToInstForPtr(unsigned NodeId,
- Instruction *Inst,
- Value *Ptr,
- MDString *PtrSourceMDNodeID,
- Sequence OldSeq,
- Sequence NewSeq) {
- MDNode *Node = nullptr;
- Metadata *tmp[3] = {PtrSourceMDNodeID,
- SequenceToMDString(Inst->getContext(), OldSeq),
- SequenceToMDString(Inst->getContext(), NewSeq)};
- Node = MDNode::get(Inst->getContext(), tmp);
-
- Inst->setMetadata(NodeId, Node);
-}
-
-/// Add to the beginning of the basic block llvm.ptr.annotations which show the
-/// state of a pointer at the entrance to a basic block.
-static void GenerateARCBBEntranceAnnotation(const char *Name, BasicBlock *BB,
- Value *Ptr, Sequence Seq) {
- // If we have a target identifier, make sure that we match it before
- // continuing.
- if(!ARCAnnotationTargetIdentifier.empty() &&
- !Ptr->getName().equals(ARCAnnotationTargetIdentifier))
- return;
-
- Module *M = BB->getParent()->getParent();
- LLVMContext &C = M->getContext();
- Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
- Type *I8XX = PointerType::getUnqual(I8X);
- Type *Params[] = {I8XX, I8XX};
- FunctionType *FTy = FunctionType::get(Type::getVoidTy(C), Params,
- /*isVarArg=*/false);
- Constant *Callee = M->getOrInsertFunction(Name, FTy);
-
- IRBuilder<> Builder(BB, BB->getFirstInsertionPt());
-
- Value *PtrName;
- StringRef Tmp = Ptr->getName();
- if (nullptr == (PtrName = M->getGlobalVariable(Tmp, true))) {
- Value *ActualPtrName = Builder.CreateGlobalStringPtr(Tmp,
- Tmp + "_STR");
- PtrName = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage,
- cast<Constant>(ActualPtrName), Tmp);
- }
-
- Value *S;
- std::string SeqStr = SequenceToString(Seq);
- if (nullptr == (S = M->getGlobalVariable(SeqStr, true))) {
- Value *ActualPtrName = Builder.CreateGlobalStringPtr(SeqStr,
- SeqStr + "_STR");
- S = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage,
- cast<Constant>(ActualPtrName), SeqStr);
- }
-
- Builder.CreateCall2(Callee, PtrName, S);
-}
-
-/// Add to the end of the basic block llvm.ptr.annotations which show the state
-/// of the pointer at the bottom of the basic block.
-static void GenerateARCBBTerminatorAnnotation(const char *Name, BasicBlock *BB,
- Value *Ptr, Sequence Seq) {
- // If we have a target identifier, make sure that we match it before emitting
- // an annotation.
- if(!ARCAnnotationTargetIdentifier.empty() &&
- !Ptr->getName().equals(ARCAnnotationTargetIdentifier))
- return;
-
- Module *M = BB->getParent()->getParent();
- LLVMContext &C = M->getContext();
- Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
- Type *I8XX = PointerType::getUnqual(I8X);
- Type *Params[] = {I8XX, I8XX};
- FunctionType *FTy = FunctionType::get(Type::getVoidTy(C), Params,
- /*isVarArg=*/false);
- Constant *Callee = M->getOrInsertFunction(Name, FTy);
-
- IRBuilder<> Builder(BB, std::prev(BB->end()));
-
- Value *PtrName;
- StringRef Tmp = Ptr->getName();
- if (nullptr == (PtrName = M->getGlobalVariable(Tmp, true))) {
- Value *ActualPtrName = Builder.CreateGlobalStringPtr(Tmp,
- Tmp + "_STR");
- PtrName = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage,
- cast<Constant>(ActualPtrName), Tmp);
- }
-
- Value *S;
- std::string SeqStr = SequenceToString(Seq);
- if (nullptr == (S = M->getGlobalVariable(SeqStr, true))) {
- Value *ActualPtrName = Builder.CreateGlobalStringPtr(SeqStr,
- SeqStr + "_STR");
- S = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage,
- cast<Constant>(ActualPtrName), SeqStr);
+ OS << " BottomUp State:\n";
+ if (!BBInfo.hasBottomUpPtrs()) {
+ DEBUG(llvm::dbgs() << " NONE!\n");
+ } else {
+ for (auto I = BBInfo.bottom_up_ptr_begin(), E = BBInfo.bottom_up_ptr_end();
+ I != E; ++I) {
+ const PtrState &P = I->second;
+ OS << " Ptr: " << *I->first
+ << "\n KnownSafe: " << (P.IsKnownSafe()?"true":"false")
+ << "\n ImpreciseRelease: "
+ << (P.IsTrackingImpreciseReleases()?"true":"false") << "\n"
+ << " HasCFGHazards: "
+ << (P.IsCFGHazardAfflicted()?"true":"false") << "\n"
+ << " KnownPositive: "
+ << (P.HasKnownPositiveRefCount()?"true":"false") << "\n"
+ << " Seq: "
+ << P.GetSeq() << "\n";
+ }
}
- Builder.CreateCall2(Callee, PtrName, S);
-}
-/// Adds a source annotation to pointer and a state change annotation to Inst
-/// referencing the source annotation and the old/new state of pointer.
-static void GenerateARCAnnotation(unsigned InstMDId,
- unsigned PtrMDId,
- Instruction *Inst,
- Value *Ptr,
- Sequence OldSeq,
- Sequence NewSeq) {
- if (EnableARCAnnotations) {
- // If we have a target identifier, make sure that we match it before
- // emitting an annotation.
- if(!ARCAnnotationTargetIdentifier.empty() &&
- !Ptr->getName().equals(ARCAnnotationTargetIdentifier))
- return;
-
- // First generate the source annotation on our pointer. This will return an
- // MDString* if Ptr actually comes from an instruction implying we can put
- // in a source annotation. If AppendMDNodeToSourcePtr returns 0 (i.e. NULL),
- // then we know that our pointer is from an Argument so we put a reference
- // to the argument number.
- //
- // The point of this is to make it easy for the
- // llvm-arc-annotation-processor tool to cross reference where the source
- // pointer is in the LLVM IR since the LLVM IR parser does not submit such
- // information via debug info for backends to use (since why would anyone
- // need such a thing from LLVM IR besides in non-standard cases
- // [i.e. this]).
- MDString *SourcePtrMDNode =
- AppendMDNodeToSourcePtr(PtrMDId, Ptr);
- AppendMDNodeToInstForPtr(InstMDId, Inst, Ptr, SourcePtrMDNode, OldSeq,
- NewSeq);
- }
+ return OS;
}
-// The actual interface for accessing the above functionality is defined via
-// some simple macros which are defined below. We do this so that the user does
-// not need to pass in what metadata id is needed resulting in cleaner code and
-// additionally since it provides an easy way to conditionally no-op all
-// annotation support in a non-debug build.
-
-/// Use this macro to annotate a sequence state change when processing
-/// instructions bottom up,
-#define ANNOTATE_BOTTOMUP(inst, ptr, old, new) \
- GenerateARCAnnotation(ARCAnnotationBottomUpMDKind, \
- ARCAnnotationProvenanceSourceMDKind, (inst), \
- const_cast<Value*>(ptr), (old), (new))
-/// Use this macro to annotate a sequence state change when processing
-/// instructions top down.
-#define ANNOTATE_TOPDOWN(inst, ptr, old, new) \
- GenerateARCAnnotation(ARCAnnotationTopDownMDKind, \
- ARCAnnotationProvenanceSourceMDKind, (inst), \
- const_cast<Value*>(ptr), (old), (new))
-
-#define ANNOTATE_BB(_states, _bb, _name, _type, _direction) \
- do { \
- if (EnableARCAnnotations) { \
- for(BBState::ptr_const_iterator I = (_states)._direction##_ptr_begin(), \
- E = (_states)._direction##_ptr_end(); I != E; ++I) { \
- Value *Ptr = const_cast<Value*>(I->first); \
- Sequence Seq = I->second.GetSeq(); \
- GenerateARCBB ## _type ## Annotation(_name, (_bb), Ptr, Seq); \
- } \
- } \
- } while (0)
-
-#define ANNOTATE_BOTTOMUP_BBSTART(_states, _basicblock) \
- ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.bottomup.bbstart", \
- Entrance, bottom_up)
-#define ANNOTATE_BOTTOMUP_BBEND(_states, _basicblock) \
- ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.bottomup.bbend", \
- Terminator, bottom_up)
-#define ANNOTATE_TOPDOWN_BBSTART(_states, _basicblock) \
- ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.topdown.bbstart", \
- Entrance, top_down)
-#define ANNOTATE_TOPDOWN_BBEND(_states, _basicblock) \
- ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.topdown.bbend", \
- Terminator, top_down)
-
-#else // !ARC_ANNOTATION
-// If annotations are off, noop.
-#define ANNOTATE_BOTTOMUP(inst, ptr, old, new)
-#define ANNOTATE_TOPDOWN(inst, ptr, old, new)
-#define ANNOTATE_BOTTOMUP_BBSTART(states, basicblock)
-#define ANNOTATE_BOTTOMUP_BBEND(states, basicblock)
-#define ANNOTATE_TOPDOWN_BBSTART(states, basicblock)
-#define ANNOTATE_TOPDOWN_BBEND(states, basicblock)
-#endif // !ARC_ANNOTATION
-
namespace {
+
/// \brief The main ARC optimization pass.
class ObjCARCOpt : public FunctionPass {
bool Changed;
ProvenanceAnalysis PA;
+
+ /// A cache of references to runtime entry point constants.
ARCRuntimeEntryPoints EP;
+ /// A cache of MDKinds that can be passed into other functions to propagate
+ /// MDKind identifiers.
+ ARCMDKindCache MDKindCache;
+
// This is used to track if a pointer is stored into an alloca.
DenseSet<const Value *> MultiOwnersSet;
@@ -1076,73 +486,49 @@ namespace {
/// is in fact used in the current function.
unsigned UsedInThisFunction;
- /// The Metadata Kind for clang.imprecise_release metadata.
- unsigned ImpreciseReleaseMDKind;
-
- /// The Metadata Kind for clang.arc.copy_on_escape metadata.
- unsigned CopyOnEscapeMDKind;
-
- /// The Metadata Kind for clang.arc.no_objc_arc_exceptions metadata.
- unsigned NoObjCARCExceptionsMDKind;
-
-#ifdef ARC_ANNOTATIONS
- /// The Metadata Kind for llvm.arc.annotation.bottomup metadata.
- unsigned ARCAnnotationBottomUpMDKind;
- /// The Metadata Kind for llvm.arc.annotation.topdown metadata.
- unsigned ARCAnnotationTopDownMDKind;
- /// The Metadata Kind for llvm.arc.annotation.provenancesource metadata.
- unsigned ARCAnnotationProvenanceSourceMDKind;
-#endif // ARC_ANNOATIONS
-
bool OptimizeRetainRVCall(Function &F, Instruction *RetainRV);
void OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV,
- InstructionClass &Class);
+ ARCInstKind &Class);
void OptimizeIndividualCalls(Function &F);
void CheckForCFGHazards(const BasicBlock *BB,
DenseMap<const BasicBlock *, BBState> &BBStates,
BBState &MyStates) const;
- bool VisitInstructionBottomUp(Instruction *Inst,
- BasicBlock *BB,
- MapVector<Value *, RRInfo> &Retains,
+ bool VisitInstructionBottomUp(Instruction *Inst, BasicBlock *BB,
+ BlotMapVector<Value *, RRInfo> &Retains,
BBState &MyStates);
bool VisitBottomUp(BasicBlock *BB,
DenseMap<const BasicBlock *, BBState> &BBStates,
- MapVector<Value *, RRInfo> &Retains);
+ BlotMapVector<Value *, RRInfo> &Retains);
bool VisitInstructionTopDown(Instruction *Inst,
DenseMap<Value *, RRInfo> &Releases,
BBState &MyStates);
bool VisitTopDown(BasicBlock *BB,
DenseMap<const BasicBlock *, BBState> &BBStates,
DenseMap<Value *, RRInfo> &Releases);
- bool Visit(Function &F,
- DenseMap<const BasicBlock *, BBState> &BBStates,
- MapVector<Value *, RRInfo> &Retains,
+ bool Visit(Function &F, DenseMap<const BasicBlock *, BBState> &BBStates,
+ BlotMapVector<Value *, RRInfo> &Retains,
DenseMap<Value *, RRInfo> &Releases);
void MoveCalls(Value *Arg, RRInfo &RetainsToMove, RRInfo &ReleasesToMove,
- MapVector<Value *, RRInfo> &Retains,
+ BlotMapVector<Value *, RRInfo> &Retains,
DenseMap<Value *, RRInfo> &Releases,
- SmallVectorImpl<Instruction *> &DeadInsts,
- Module *M);
-
- bool ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState> &BBStates,
- MapVector<Value *, RRInfo> &Retains,
- DenseMap<Value *, RRInfo> &Releases,
- Module *M,
- SmallVectorImpl<Instruction *> &NewRetains,
- SmallVectorImpl<Instruction *> &NewReleases,
- SmallVectorImpl<Instruction *> &DeadInsts,
- RRInfo &RetainsToMove,
- RRInfo &ReleasesToMove,
- Value *Arg,
- bool KnownSafe,
- bool &AnyPairsCompletelyEliminated);
+ SmallVectorImpl<Instruction *> &DeadInsts, Module *M);
+
+ bool
+ PairUpRetainsAndReleases(DenseMap<const BasicBlock *, BBState> &BBStates,
+ BlotMapVector<Value *, RRInfo> &Retains,
+ DenseMap<Value *, RRInfo> &Releases, Module *M,
+ SmallVectorImpl<Instruction *> &NewRetains,
+ SmallVectorImpl<Instruction *> &NewReleases,
+ SmallVectorImpl<Instruction *> &DeadInsts,
+ RRInfo &RetainsToMove, RRInfo &ReleasesToMove,
+ Value *Arg, bool KnownSafe,
+ bool &AnyPairsCompletelyEliminated);
bool PerformCodePlacement(DenseMap<const BasicBlock *, BBState> &BBStates,
- MapVector<Value *, RRInfo> &Retains,
- DenseMap<Value *, RRInfo> &Releases,
- Module *M);
+ BlotMapVector<Value *, RRInfo> &Retains,
+ DenseMap<Value *, RRInfo> &Releases, Module *M);
void OptimizeWeakCalls(Function &F);
@@ -1191,7 +577,7 @@ void ObjCARCOpt::getAnalysisUsage(AnalysisUsage &AU) const {
bool
ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
// Check for the argument being from an immediately preceding call or invoke.
- const Value *Arg = GetObjCArg(RetainRV);
+ const Value *Arg = GetArgRCIdentityRoot(RetainRV);
ImmutableCallSite CS(Arg);
if (const Instruction *Call = CS.getInstruction()) {
if (Call->getParent() == RetainRV->getParent()) {
@@ -1216,8 +602,8 @@ ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
BasicBlock::iterator I = RetainRV, Begin = RetainRV->getParent()->begin();
if (I != Begin) {
do --I; while (I != Begin && IsNoopInstruction(I));
- if (GetBasicInstructionClass(I) == IC_AutoreleaseRV &&
- GetObjCArg(I) == Arg) {
+ if (GetBasicARCInstKind(I) == ARCInstKind::AutoreleaseRV &&
+ GetArgRCIdentityRoot(I) == Arg) {
Changed = true;
++NumPeeps;
@@ -1238,7 +624,7 @@ ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
"objc_retain since the operand is not a return value.\n"
"Old = " << *RetainRV << "\n");
- Constant *NewDecl = EP.get(ARCRuntimeEntryPoints::EPT_Retain);
+ Constant *NewDecl = EP.get(ARCRuntimeEntryPointKind::Retain);
cast<CallInst>(RetainRV)->setCalledFunction(NewDecl);
DEBUG(dbgs() << "New = " << *RetainRV << "\n");
@@ -1248,17 +634,17 @@ ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
/// Turn objc_autoreleaseReturnValue into objc_autorelease if the result is not
/// used as a return value.
-void
-ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV,
- InstructionClass &Class) {
+void ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F,
+ Instruction *AutoreleaseRV,
+ ARCInstKind &Class) {
// Check for a return of the pointer value.
- const Value *Ptr = GetObjCArg(AutoreleaseRV);
+ const Value *Ptr = GetArgRCIdentityRoot(AutoreleaseRV);
SmallVector<const Value *, 2> Users;
Users.push_back(Ptr);
do {
Ptr = Users.pop_back_val();
for (const User *U : Ptr->users()) {
- if (isa<ReturnInst>(U) || GetBasicInstructionClass(U) == IC_RetainRV)
+ if (isa<ReturnInst>(U) || GetBasicARCInstKind(U) == ARCInstKind::RetainRV)
return;
if (isa<BitCastInst>(U))
Users.push_back(U);
@@ -1274,10 +660,10 @@ ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV,
"Old = " << *AutoreleaseRV << "\n");
CallInst *AutoreleaseRVCI = cast<CallInst>(AutoreleaseRV);
- Constant *NewDecl = EP.get(ARCRuntimeEntryPoints::EPT_Autorelease);
+ Constant *NewDecl = EP.get(ARCRuntimeEntryPointKind::Autorelease);
AutoreleaseRVCI->setCalledFunction(NewDecl);
AutoreleaseRVCI->setTailCall(false); // Never tail call objc_autorelease.
- Class = IC_Autorelease;
+ Class = ARCInstKind::Autorelease;
DEBUG(dbgs() << "New: " << *AutoreleaseRV << "\n");
@@ -1294,7 +680,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
Instruction *Inst = &*I++;
- InstructionClass Class = GetBasicInstructionClass(Inst);
+ ARCInstKind Class = GetBasicARCInstKind(Inst);
DEBUG(dbgs() << "Visiting: Class: " << Class << "; " << *Inst << "\n");
@@ -1309,7 +695,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
// There are gray areas here, as the ability to cast reference-counted
// pointers to raw void* and back allows code to break ARC assumptions,
// however these are currently considered to be unimportant.
- case IC_NoopCast:
+ case ARCInstKind::NoopCast:
Changed = true;
++NumNoops;
DEBUG(dbgs() << "Erasing no-op cast: " << *Inst << "\n");
@@ -1317,11 +703,11 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
continue;
// If the pointer-to-weak-pointer is null, it's undefined behavior.
- case IC_StoreWeak:
- case IC_LoadWeak:
- case IC_LoadWeakRetained:
- case IC_InitWeak:
- case IC_DestroyWeak: {
+ case ARCInstKind::StoreWeak:
+ case ARCInstKind::LoadWeak:
+ case ARCInstKind::LoadWeakRetained:
+ case ARCInstKind::InitWeak:
+ case ARCInstKind::DestroyWeak: {
CallInst *CI = cast<CallInst>(Inst);
if (IsNullOrUndef(CI->getArgOperand(0))) {
Changed = true;
@@ -1338,8 +724,8 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
}
break;
}
- case IC_CopyWeak:
- case IC_MoveWeak: {
+ case ARCInstKind::CopyWeak:
+ case ARCInstKind::MoveWeak: {
CallInst *CI = cast<CallInst>(Inst);
if (IsNullOrUndef(CI->getArgOperand(0)) ||
IsNullOrUndef(CI->getArgOperand(1))) {
@@ -1359,11 +745,11 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
}
break;
}
- case IC_RetainRV:
+ case ARCInstKind::RetainRV:
if (OptimizeRetainRVCall(F, Inst))
continue;
break;
- case IC_AutoreleaseRV:
+ case ARCInstKind::AutoreleaseRV:
OptimizeAutoreleaseRVCall(F, Inst, Class);
break;
}
@@ -1380,10 +766,11 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
// Create the declaration lazily.
LLVMContext &C = Inst->getContext();
- Constant *Decl = EP.get(ARCRuntimeEntryPoints::EPT_Release);
+ Constant *Decl = EP.get(ARCRuntimeEntryPointKind::Release);
CallInst *NewCall = CallInst::Create(Decl, Call->getArgOperand(0), "",
Call);
- NewCall->setMetadata(ImpreciseReleaseMDKind, MDNode::get(C, None));
+ NewCall->setMetadata(MDKindCache.get(ARCMDKindID::ImpreciseRelease),
+ MDNode::get(C, None));
DEBUG(dbgs() << "Replacing autorelease{,RV}(x) with objc_release(x) "
"since x is otherwise unused.\nOld: " << *Call << "\nNew: "
@@ -1391,7 +778,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
EraseInstruction(Call);
Inst = NewCall;
- Class = IC_Release;
+ Class = ARCInstKind::Release;
}
}
@@ -1422,11 +809,11 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
}
if (!IsNoopOnNull(Class)) {
- UsedInThisFunction |= 1 << Class;
+ UsedInThisFunction |= 1 << unsigned(Class);
continue;
}
- const Value *Arg = GetObjCArg(Inst);
+ const Value *Arg = GetArgRCIdentityRoot(Inst);
// ARC calls with null are no-ops. Delete them.
if (IsNullOrUndef(Arg)) {
@@ -1440,7 +827,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
// Keep track of which of retain, release, autorelease, and retain_block
// are actually present in this function.
- UsedInThisFunction |= 1 << Class;
+ UsedInThisFunction |= 1 << unsigned(Class);
// If Arg is a PHI, and one or more incoming values to the
// PHI are null, and the call is control-equivalent to the PHI, and there
@@ -1463,7 +850,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
bool HasCriticalEdges = false;
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
Value *Incoming =
- StripPointerCastsAndObjCCalls(PN->getIncomingValue(i));
+ GetRCIdentityRoot(PN->getIncomingValue(i));
if (IsNullOrUndef(Incoming))
HasNull = true;
else if (cast<TerminatorInst>(PN->getIncomingBlock(i)->back())
@@ -1480,25 +867,25 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
// Check that there is nothing that cares about the reference
// count between the call and the phi.
switch (Class) {
- case IC_Retain:
- case IC_RetainBlock:
+ case ARCInstKind::Retain:
+ case ARCInstKind::RetainBlock:
// These can always be moved up.
break;
- case IC_Release:
+ case ARCInstKind::Release:
// These can't be moved across things that care about the retain
// count.
FindDependencies(NeedsPositiveRetainCount, Arg,
Inst->getParent(), Inst,
DependingInstructions, Visited, PA);
break;
- case IC_Autorelease:
+ case ARCInstKind::Autorelease:
// These can't be moved across autorelease pool scope boundaries.
FindDependencies(AutoreleasePoolBoundary, Arg,
Inst->getParent(), Inst,
DependingInstructions, Visited, PA);
break;
- case IC_RetainRV:
- case IC_AutoreleaseRV:
+ case ARCInstKind::RetainRV:
+ case ARCInstKind::AutoreleaseRV:
// Don't move these; the RV optimization depends on the autoreleaseRV
// being tail called, and the retainRV being immediately after a call
// (which might still happen if we get lucky with codegen layout, but
@@ -1517,7 +904,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
Type *ParamTy = CInst->getArgOperand(0)->getType();
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
Value *Incoming =
- StripPointerCastsAndObjCCalls(PN->getIncomingValue(i));
+ GetRCIdentityRoot(PN->getIncomingValue(i));
if (!IsNullOrUndef(Incoming)) {
CallInst *Clone = cast<CallInst>(CInst->clone());
Value *Op = PN->getIncomingValue(i);
@@ -1547,7 +934,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
/// no CFG hazards by checking the states of various bottom up pointers.
static void CheckForUseCFGHazard(const Sequence SuccSSeq,
const bool SuccSRRIKnownSafe,
- PtrState &S,
+ TopDownPtrState &S,
bool &SomeSuccHasSame,
bool &AllSuccsHaveSame,
bool &NotAllSeqEqualButKnownSafe,
@@ -1585,7 +972,7 @@ static void CheckForUseCFGHazard(const Sequence SuccSSeq,
/// pointers.
static void CheckForCanReleaseCFGHazard(const Sequence SuccSSeq,
const bool SuccSRRIKnownSafe,
- PtrState &S,
+ TopDownPtrState &S,
bool &SomeSuccHasSame,
bool &AllSuccsHaveSame,
bool &NotAllSeqEqualButKnownSafe) {
@@ -1618,9 +1005,9 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
BBState &MyStates) const {
// If any top-down local-use or possible-dec has a succ which is earlier in
// the sequence, forget it.
- for (BBState::ptr_iterator I = MyStates.top_down_ptr_begin(),
- E = MyStates.top_down_ptr_end(); I != E; ++I) {
- PtrState &S = I->second;
+ for (auto I = MyStates.top_down_ptr_begin(), E = MyStates.top_down_ptr_end();
+ I != E; ++I) {
+ TopDownPtrState &S = I->second;
const Sequence Seq = I->second.GetSeq();
// We only care about S_Retain, S_CanRelease, and S_Use.
@@ -1646,7 +1033,7 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
const DenseMap<const BasicBlock *, BBState>::iterator BBI =
BBStates.find(*SI);
assert(BBI != BBStates.end());
- const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg);
+ const BottomUpPtrState &SuccS = BBI->second.getPtrBottomUpState(Arg);
const Sequence SuccSSeq = SuccS.GetSeq();
// If bottom up, the pointer is in an S_None state, clear the sequence
@@ -1705,94 +1092,53 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
}
}
-bool
-ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
- BasicBlock *BB,
- MapVector<Value *, RRInfo> &Retains,
- BBState &MyStates) {
+bool ObjCARCOpt::VisitInstructionBottomUp(
+ Instruction *Inst, BasicBlock *BB, BlotMapVector<Value *, RRInfo> &Retains,
+ BBState &MyStates) {
bool NestingDetected = false;
- InstructionClass Class = GetInstructionClass(Inst);
+ ARCInstKind Class = GetARCInstKind(Inst);
const Value *Arg = nullptr;
- DEBUG(dbgs() << "Class: " << Class << "\n");
+ DEBUG(dbgs() << " Class: " << Class << "\n");
switch (Class) {
- case IC_Release: {
- Arg = GetObjCArg(Inst);
-
- PtrState &S = MyStates.getPtrBottomUpState(Arg);
-
- // If we see two releases in a row on the same pointer. If so, make
- // a note, and we'll cicle back to revisit it after we've
- // hopefully eliminated the second release, which may allow us to
- // eliminate the first release too.
- // Theoretically we could implement removal of nested retain+release
- // pairs by making PtrState hold a stack of states, but this is
- // simple and avoids adding overhead for the non-nested case.
- if (S.GetSeq() == S_Release || S.GetSeq() == S_MovableRelease) {
- DEBUG(dbgs() << "Found nested releases (i.e. a release pair)\n");
- NestingDetected = true;
- }
+ case ARCInstKind::Release: {
+ Arg = GetArgRCIdentityRoot(Inst);
- MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
- Sequence NewSeq = ReleaseMetadata ? S_MovableRelease : S_Release;
- ANNOTATE_BOTTOMUP(Inst, Arg, S.GetSeq(), NewSeq);
- S.ResetSequenceProgress(NewSeq);
- S.SetReleaseMetadata(ReleaseMetadata);
- S.SetKnownSafe(S.HasKnownPositiveRefCount());
- S.SetTailCallRelease(cast<CallInst>(Inst)->isTailCall());
- S.InsertCall(Inst);
- S.SetKnownPositiveRefCount();
+ BottomUpPtrState &S = MyStates.getPtrBottomUpState(Arg);
+ NestingDetected |= S.InitBottomUp(MDKindCache, Inst);
break;
}
- case IC_RetainBlock:
+ case ARCInstKind::RetainBlock:
// In OptimizeIndividualCalls, we have strength reduced all optimizable
// objc_retainBlocks to objc_retains. Thus at this point any
// objc_retainBlocks that we see are not optimizable.
break;
- case IC_Retain:
- case IC_RetainRV: {
- Arg = GetObjCArg(Inst);
-
- PtrState &S = MyStates.getPtrBottomUpState(Arg);
- S.SetKnownPositiveRefCount();
-
- Sequence OldSeq = S.GetSeq();
- switch (OldSeq) {
- case S_Stop:
- case S_Release:
- case S_MovableRelease:
- case S_Use:
- // If OldSeq is not S_Use or OldSeq is S_Use and we are tracking an
- // imprecise release, clear our reverse insertion points.
- if (OldSeq != S_Use || S.IsTrackingImpreciseReleases())
- S.ClearReverseInsertPts();
- // FALL THROUGH
- case S_CanRelease:
- // Don't do retain+release tracking for IC_RetainRV, because it's
- // better to let it remain as the first instruction after a call.
- if (Class != IC_RetainRV)
+ case ARCInstKind::Retain:
+ case ARCInstKind::RetainRV: {
+ Arg = GetArgRCIdentityRoot(Inst);
+ BottomUpPtrState &S = MyStates.getPtrBottomUpState(Arg);
+ if (S.MatchWithRetain()) {
+ // Don't do retain+release tracking for ARCInstKind::RetainRV, because
+ // it's better to let it remain as the first instruction after a call.
+ if (Class != ARCInstKind::RetainRV) {
+ DEBUG(llvm::dbgs() << " Matching with: " << *Inst << "\n");
Retains[Inst] = S.GetRRInfo();
+ }
S.ClearSequenceProgress();
- break;
- case S_None:
- break;
- case S_Retain:
- llvm_unreachable("bottom-up pointer in retain state!");
}
- ANNOTATE_BOTTOMUP(Inst, Arg, OldSeq, S.GetSeq());
// A retain moving bottom up can be a use.
break;
}
- case IC_AutoreleasepoolPop:
+ case ARCInstKind::AutoreleasepoolPop:
// Conservatively, clear MyStates for all known pointers.
MyStates.clearBottomUpPointers();
return NestingDetected;
- case IC_AutoreleasepoolPush:
- case IC_None:
+ case ARCInstKind::AutoreleasepoolPush:
+ case ARCInstKind::None:
// These are irrelevant.
return NestingDetected;
- case IC_User:
+ case ARCInstKind::User:
// If we have a store into an alloca of a pointer we are tracking, the
// pointer has multiple owners implying that we must be more conservative.
//
@@ -1806,9 +1152,10 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
// in the presence of allocas we only unconditionally remove pointers if
// both our retain and our release are KnownSafe.
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
- if (AreAnyUnderlyingObjectsAnAlloca(SI->getPointerOperand())) {
- BBState::ptr_iterator I = MyStates.findPtrBottomUpState(
- StripPointerCastsAndObjCCalls(SI->getValueOperand()));
+ const DataLayout &DL = BB->getModule()->getDataLayout();
+ if (AreAnyUnderlyingObjectsAnAlloca(SI->getPointerOperand(), DL)) {
+ auto I = MyStates.findPtrBottomUpState(
+ GetRCIdentityRoot(SI->getValueOperand()));
if (I != MyStates.bottom_up_ptr_end())
MultiOwnersSet.insert(I->first);
}
@@ -1820,90 +1167,26 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
// Consider any other possible effects of this instruction on each
// pointer being tracked.
- for (BBState::ptr_iterator MI = MyStates.bottom_up_ptr_begin(),
- ME = MyStates.bottom_up_ptr_end(); MI != ME; ++MI) {
+ for (auto MI = MyStates.bottom_up_ptr_begin(),
+ ME = MyStates.bottom_up_ptr_end();
+ MI != ME; ++MI) {
const Value *Ptr = MI->first;
if (Ptr == Arg)
continue; // Handled above.
- PtrState &S = MI->second;
- Sequence Seq = S.GetSeq();
+ BottomUpPtrState &S = MI->second;
- // Check for possible releases.
- if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
- DEBUG(dbgs() << "CanAlterRefCount: Seq: " << Seq << "; " << *Ptr
- << "\n");
- S.ClearKnownPositiveRefCount();
- switch (Seq) {
- case S_Use:
- S.SetSeq(S_CanRelease);
- ANNOTATE_BOTTOMUP(Inst, Ptr, Seq, S.GetSeq());
- continue;
- case S_CanRelease:
- case S_Release:
- case S_MovableRelease:
- case S_Stop:
- case S_None:
- break;
- case S_Retain:
- llvm_unreachable("bottom-up pointer in retain state!");
- }
- }
+ if (S.HandlePotentialAlterRefCount(Inst, Ptr, PA, Class))
+ continue;
- // Check for possible direct uses.
- switch (Seq) {
- case S_Release:
- case S_MovableRelease:
- if (CanUse(Inst, Ptr, PA, Class)) {
- DEBUG(dbgs() << "CanUse: Seq: " << Seq << "; " << *Ptr
- << "\n");
- assert(!S.HasReverseInsertPts());
- // If this is an invoke instruction, we're scanning it as part of
- // one of its successor blocks, since we can't insert code after it
- // in its own block, and we don't want to split critical edges.
- if (isa<InvokeInst>(Inst))
- S.InsertReverseInsertPt(BB->getFirstInsertionPt());
- else
- S.InsertReverseInsertPt(std::next(BasicBlock::iterator(Inst)));
- S.SetSeq(S_Use);
- ANNOTATE_BOTTOMUP(Inst, Ptr, Seq, S_Use);
- } else if (Seq == S_Release && IsUser(Class)) {
- DEBUG(dbgs() << "PreciseReleaseUse: Seq: " << Seq << "; " << *Ptr
- << "\n");
- // Non-movable releases depend on any possible objc pointer use.
- S.SetSeq(S_Stop);
- ANNOTATE_BOTTOMUP(Inst, Ptr, S_Release, S_Stop);
- assert(!S.HasReverseInsertPts());
- // As above; handle invoke specially.
- if (isa<InvokeInst>(Inst))
- S.InsertReverseInsertPt(BB->getFirstInsertionPt());
- else
- S.InsertReverseInsertPt(std::next(BasicBlock::iterator(Inst)));
- }
- break;
- case S_Stop:
- if (CanUse(Inst, Ptr, PA, Class)) {
- DEBUG(dbgs() << "PreciseStopUse: Seq: " << Seq << "; " << *Ptr
- << "\n");
- S.SetSeq(S_Use);
- ANNOTATE_BOTTOMUP(Inst, Ptr, Seq, S_Use);
- }
- break;
- case S_CanRelease:
- case S_Use:
- case S_None:
- break;
- case S_Retain:
- llvm_unreachable("bottom-up pointer in retain state!");
- }
+ S.HandlePotentialUse(BB, Inst, Ptr, PA, Class);
}
return NestingDetected;
}
-bool
-ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
- DenseMap<const BasicBlock *, BBState> &BBStates,
- MapVector<Value *, RRInfo> &Retains) {
+bool ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
+ DenseMap<const BasicBlock *, BBState> &BBStates,
+ BlotMapVector<Value *, RRInfo> &Retains) {
DEBUG(dbgs() << "\n== ObjCARCOpt::VisitBottomUp ==\n");
@@ -1928,9 +1211,8 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
}
}
- // If ARC Annotations are enabled, output the current state of pointers at the
- // bottom of the basic block.
- ANNOTATE_BOTTOMUP_BBEND(MyStates, BB);
+ DEBUG(llvm::dbgs() << "Before:\n" << BBStates[BB] << "\n"
+ << "Performing Dataflow:\n");
// Visit all the instructions, bottom-up.
for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; --I) {
@@ -1940,7 +1222,7 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
if (isa<InvokeInst>(Inst))
continue;
- DEBUG(dbgs() << "Visiting " << *Inst << "\n");
+ DEBUG(dbgs() << " Visiting " << *Inst << "\n");
NestingDetected |= VisitInstructionBottomUp(Inst, BB, Retains, MyStates);
}
@@ -1955,9 +1237,7 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
NestingDetected |= VisitInstructionBottomUp(II, BB, Retains, MyStates);
}
- // If ARC Annotations are enabled, output the current state of pointers at the
- // top of the basic block.
- ANNOTATE_BOTTOMUP_BBSTART(MyStates, BB);
+ DEBUG(llvm::dbgs() << "\nFinal State:\n" << BBStates[BB] << "\n");
return NestingDetected;
}
@@ -1967,146 +1247,66 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
DenseMap<Value *, RRInfo> &Releases,
BBState &MyStates) {
bool NestingDetected = false;
- InstructionClass Class = GetInstructionClass(Inst);
+ ARCInstKind Class = GetARCInstKind(Inst);
const Value *Arg = nullptr;
+ DEBUG(llvm::dbgs() << " Class: " << Class << "\n");
+
switch (Class) {
- case IC_RetainBlock:
+ case ARCInstKind::RetainBlock:
// In OptimizeIndividualCalls, we have strength reduced all optimizable
// objc_retainBlocks to objc_retains. Thus at this point any
- // objc_retainBlocks that we see are not optimizable.
+ // objc_retainBlocks that we see are not optimizable. We need to break since
+ // a retain can be a potential use.
break;
- case IC_Retain:
- case IC_RetainRV: {
- Arg = GetObjCArg(Inst);
-
- PtrState &S = MyStates.getPtrTopDownState(Arg);
-
- // Don't do retain+release tracking for IC_RetainRV, because it's
- // better to let it remain as the first instruction after a call.
- if (Class != IC_RetainRV) {
- // If we see two retains in a row on the same pointer. If so, make
- // a note, and we'll cicle back to revisit it after we've
- // hopefully eliminated the second retain, which may allow us to
- // eliminate the first retain too.
- // Theoretically we could implement removal of nested retain+release
- // pairs by making PtrState hold a stack of states, but this is
- // simple and avoids adding overhead for the non-nested case.
- if (S.GetSeq() == S_Retain)
- NestingDetected = true;
-
- ANNOTATE_TOPDOWN(Inst, Arg, S.GetSeq(), S_Retain);
- S.ResetSequenceProgress(S_Retain);
- S.SetKnownSafe(S.HasKnownPositiveRefCount());
- S.InsertCall(Inst);
- }
-
- S.SetKnownPositiveRefCount();
-
+ case ARCInstKind::Retain:
+ case ARCInstKind::RetainRV: {
+ Arg = GetArgRCIdentityRoot(Inst);
+ TopDownPtrState &S = MyStates.getPtrTopDownState(Arg);
+ NestingDetected |= S.InitTopDown(Class, Inst);
// A retain can be a potential use; procede to the generic checking
// code below.
break;
}
- case IC_Release: {
- Arg = GetObjCArg(Inst);
-
- PtrState &S = MyStates.getPtrTopDownState(Arg);
- S.ClearKnownPositiveRefCount();
-
- Sequence OldSeq = S.GetSeq();
-
- MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
-
- switch (OldSeq) {
- case S_Retain:
- case S_CanRelease:
- if (OldSeq == S_Retain || ReleaseMetadata != nullptr)
- S.ClearReverseInsertPts();
- // FALL THROUGH
- case S_Use:
- S.SetReleaseMetadata(ReleaseMetadata);
- S.SetTailCallRelease(cast<CallInst>(Inst)->isTailCall());
+ case ARCInstKind::Release: {
+ Arg = GetArgRCIdentityRoot(Inst);
+ TopDownPtrState &S = MyStates.getPtrTopDownState(Arg);
+ // Try to form a tentative pair in between this release instruction and the
+ // top down pointers that we are tracking.
+ if (S.MatchWithRelease(MDKindCache, Inst)) {
+ // If we succeed, copy S's RRInfo into the Release -> {Retain Set
+ // Map}. Then we clear S.
+ DEBUG(llvm::dbgs() << " Matching with: " << *Inst << "\n");
Releases[Inst] = S.GetRRInfo();
- ANNOTATE_TOPDOWN(Inst, Arg, S.GetSeq(), S_None);
S.ClearSequenceProgress();
- break;
- case S_None:
- break;
- case S_Stop:
- case S_Release:
- case S_MovableRelease:
- llvm_unreachable("top-down pointer in release state!");
}
break;
}
- case IC_AutoreleasepoolPop:
+ case ARCInstKind::AutoreleasepoolPop:
// Conservatively, clear MyStates for all known pointers.
MyStates.clearTopDownPointers();
- return NestingDetected;
- case IC_AutoreleasepoolPush:
- case IC_None:
- // These are irrelevant.
- return NestingDetected;
+ return false;
+ case ARCInstKind::AutoreleasepoolPush:
+ case ARCInstKind::None:
+ // These can not be uses of
+ return false;
default:
break;
}
// Consider any other possible effects of this instruction on each
// pointer being tracked.
- for (BBState::ptr_iterator MI = MyStates.top_down_ptr_begin(),
- ME = MyStates.top_down_ptr_end(); MI != ME; ++MI) {
+ for (auto MI = MyStates.top_down_ptr_begin(),
+ ME = MyStates.top_down_ptr_end();
+ MI != ME; ++MI) {
const Value *Ptr = MI->first;
if (Ptr == Arg)
continue; // Handled above.
- PtrState &S = MI->second;
- Sequence Seq = S.GetSeq();
-
- // Check for possible releases.
- if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
- DEBUG(dbgs() << "CanAlterRefCount: Seq: " << Seq << "; " << *Ptr
- << "\n");
- S.ClearKnownPositiveRefCount();
- switch (Seq) {
- case S_Retain:
- S.SetSeq(S_CanRelease);
- ANNOTATE_TOPDOWN(Inst, Ptr, Seq, S_CanRelease);
- assert(!S.HasReverseInsertPts());
- S.InsertReverseInsertPt(Inst);
-
- // One call can't cause a transition from S_Retain to S_CanRelease
- // and S_CanRelease to S_Use. If we've made the first transition,
- // we're done.
- continue;
- case S_Use:
- case S_CanRelease:
- case S_None:
- break;
- case S_Stop:
- case S_Release:
- case S_MovableRelease:
- llvm_unreachable("top-down pointer in release state!");
- }
- }
+ TopDownPtrState &S = MI->second;
+ if (S.HandlePotentialAlterRefCount(Inst, Ptr, PA, Class))
+ continue;
- // Check for possible direct uses.
- switch (Seq) {
- case S_CanRelease:
- if (CanUse(Inst, Ptr, PA, Class)) {
- DEBUG(dbgs() << "CanUse: Seq: " << Seq << "; " << *Ptr
- << "\n");
- S.SetSeq(S_Use);
- ANNOTATE_TOPDOWN(Inst, Ptr, Seq, S_Use);
- }
- break;
- case S_Retain:
- case S_Use:
- case S_None:
- break;
- case S_Stop:
- case S_Release:
- case S_MovableRelease:
- llvm_unreachable("top-down pointer in release state!");
- }
+ S.HandlePotentialUse(Inst, Ptr, PA, Class);
}
return NestingDetected;
@@ -2138,27 +1338,22 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB,
}
}
- // If ARC Annotations are enabled, output the current state of pointers at the
- // top of the basic block.
- ANNOTATE_TOPDOWN_BBSTART(MyStates, BB);
+ DEBUG(llvm::dbgs() << "Before:\n" << BBStates[BB] << "\n"
+ << "Performing Dataflow:\n");
// Visit all the instructions, top-down.
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
Instruction *Inst = I;
- DEBUG(dbgs() << "Visiting " << *Inst << "\n");
+ DEBUG(dbgs() << " Visiting " << *Inst << "\n");
NestingDetected |= VisitInstructionTopDown(Inst, Releases, MyStates);
}
- // If ARC Annotations are enabled, output the current state of pointers at the
- // bottom of the basic block.
- ANNOTATE_TOPDOWN_BBEND(MyStates, BB);
-
-#ifdef ARC_ANNOTATIONS
- if (!(EnableARCAnnotations && DisableCheckForCFGHazards))
-#endif
+ DEBUG(llvm::dbgs() << "\nState Before Checking for CFG Hazards:\n"
+ << BBStates[BB] << "\n\n");
CheckForCFGHazards(BB, BBStates, MyStates);
+ DEBUG(llvm::dbgs() << "Final State:\n" << BBStates[BB] << "\n");
return NestingDetected;
}
@@ -2244,11 +1439,10 @@ ComputePostOrders(Function &F,
}
// Visit the function both top-down and bottom-up.
-bool
-ObjCARCOpt::Visit(Function &F,
- DenseMap<const BasicBlock *, BBState> &BBStates,
- MapVector<Value *, RRInfo> &Retains,
- DenseMap<Value *, RRInfo> &Releases) {
+bool ObjCARCOpt::Visit(Function &F,
+ DenseMap<const BasicBlock *, BBState> &BBStates,
+ BlotMapVector<Value *, RRInfo> &Retains,
+ DenseMap<Value *, RRInfo> &Releases) {
// Use reverse-postorder traversals, because we magically know that loops
// will be well behaved, i.e. they won't repeatedly call retain on a single
@@ -2258,7 +1452,7 @@ ObjCARCOpt::Visit(Function &F,
SmallVector<BasicBlock *, 16> PostOrder;
SmallVector<BasicBlock *, 16> ReverseCFGPostOrder;
ComputePostOrders(F, PostOrder, ReverseCFGPostOrder,
- NoObjCARCExceptionsMDKind,
+ MDKindCache.get(ARCMDKindID::NoObjCARCExceptions),
BBStates);
// Use reverse-postorder on the reverse CFG for bottom-up.
@@ -2279,10 +1473,9 @@ ObjCARCOpt::Visit(Function &F,
}
/// Move the calls in RetainsToMove and ReleasesToMove.
-void ObjCARCOpt::MoveCalls(Value *Arg,
- RRInfo &RetainsToMove,
+void ObjCARCOpt::MoveCalls(Value *Arg, RRInfo &RetainsToMove,
RRInfo &ReleasesToMove,
- MapVector<Value *, RRInfo> &Retains,
+ BlotMapVector<Value *, RRInfo> &Retains,
DenseMap<Value *, RRInfo> &Releases,
SmallVectorImpl<Instruction *> &DeadInsts,
Module *M) {
@@ -2295,7 +1488,7 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
for (Instruction *InsertPt : ReleasesToMove.ReverseInsertPts) {
Value *MyArg = ArgTy == ParamTy ? Arg :
new BitCastInst(Arg, ParamTy, "", InsertPt);
- Constant *Decl = EP.get(ARCRuntimeEntryPoints::EPT_Retain);
+ Constant *Decl = EP.get(ARCRuntimeEntryPointKind::Retain);
CallInst *Call = CallInst::Create(Decl, MyArg, "", InsertPt);
Call->setDoesNotThrow();
Call->setTailCall();
@@ -2306,11 +1499,11 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
for (Instruction *InsertPt : RetainsToMove.ReverseInsertPts) {
Value *MyArg = ArgTy == ParamTy ? Arg :
new BitCastInst(Arg, ParamTy, "", InsertPt);
- Constant *Decl = EP.get(ARCRuntimeEntryPoints::EPT_Release);
+ Constant *Decl = EP.get(ARCRuntimeEntryPointKind::Release);
CallInst *Call = CallInst::Create(Decl, MyArg, "", InsertPt);
// Attach a clang.imprecise_release metadata tag, if appropriate.
if (MDNode *M = ReleasesToMove.ReleaseMetadata)
- Call->setMetadata(ImpreciseReleaseMDKind, M);
+ Call->setMetadata(MDKindCache.get(ARCMDKindID::ImpreciseRelease), M);
Call->setDoesNotThrow();
if (ReleasesToMove.IsTailCallRelease)
Call->setTailCall();
@@ -2333,20 +1526,15 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
}
-bool
-ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
- &BBStates,
- MapVector<Value *, RRInfo> &Retains,
- DenseMap<Value *, RRInfo> &Releases,
- Module *M,
- SmallVectorImpl<Instruction *> &NewRetains,
- SmallVectorImpl<Instruction *> &NewReleases,
- SmallVectorImpl<Instruction *> &DeadInsts,
- RRInfo &RetainsToMove,
- RRInfo &ReleasesToMove,
- Value *Arg,
- bool KnownSafe,
- bool &AnyPairsCompletelyEliminated) {
+bool ObjCARCOpt::PairUpRetainsAndReleases(
+ DenseMap<const BasicBlock *, BBState> &BBStates,
+ BlotMapVector<Value *, RRInfo> &Retains,
+ DenseMap<Value *, RRInfo> &Releases, Module *M,
+ SmallVectorImpl<Instruction *> &NewRetains,
+ SmallVectorImpl<Instruction *> &NewReleases,
+ SmallVectorImpl<Instruction *> &DeadInsts, RRInfo &RetainsToMove,
+ RRInfo &ReleasesToMove, Value *Arg, bool KnownSafe,
+ bool &AnyPairsCompletelyEliminated) {
// If a pair happens in a region where it is known that the reference count
// is already incremented, we can similarly ignore possible decrements unless
// we are dealing with a retainable object with multiple provenance sources.
@@ -2367,15 +1555,14 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
for (SmallVectorImpl<Instruction *>::const_iterator
NI = NewRetains.begin(), NE = NewRetains.end(); NI != NE; ++NI) {
Instruction *NewRetain = *NI;
- MapVector<Value *, RRInfo>::const_iterator It = Retains.find(NewRetain);
+ auto It = Retains.find(NewRetain);
assert(It != Retains.end());
const RRInfo &NewRetainRRI = It->second;
KnownSafeTD &= NewRetainRRI.KnownSafe;
MultipleOwners =
- MultipleOwners || MultiOwnersSet.count(GetObjCArg(NewRetain));
+ MultipleOwners || MultiOwnersSet.count(GetArgRCIdentityRoot(NewRetain));
for (Instruction *NewRetainRelease : NewRetainRRI.Calls) {
- DenseMap<Value *, RRInfo>::const_iterator Jt =
- Releases.find(NewRetainRelease);
+ auto Jt = Releases.find(NewRetainRelease);
if (Jt == Releases.end())
return false;
const RRInfo &NewRetainReleaseRRI = Jt->second;
@@ -2444,15 +1631,13 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
for (SmallVectorImpl<Instruction *>::const_iterator
NI = NewReleases.begin(), NE = NewReleases.end(); NI != NE; ++NI) {
Instruction *NewRelease = *NI;
- DenseMap<Value *, RRInfo>::const_iterator It =
- Releases.find(NewRelease);
+ auto It = Releases.find(NewRelease);
assert(It != Releases.end());
const RRInfo &NewReleaseRRI = It->second;
KnownSafeBU &= NewReleaseRRI.KnownSafe;
CFGHazardAfflicted |= NewReleaseRRI.CFGHazardAfflicted;
for (Instruction *NewReleaseRetain : NewReleaseRRI.Calls) {
- MapVector<Value *, RRInfo>::const_iterator Jt =
- Retains.find(NewReleaseRetain);
+ auto Jt = Retains.find(NewReleaseRetain);
if (Jt == Retains.end())
return false;
const RRInfo &NewReleaseRetainRRI = Jt->second;
@@ -2504,11 +1689,8 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
if (NewRetains.empty()) break;
}
- // If the pointer is known incremented in 1 direction and we do not have
- // MultipleOwners, we can safely remove the retain/releases. Otherwise we need
- // to be known safe in both directions.
- bool UnconditionallySafe = (KnownSafeTD && KnownSafeBU) ||
- ((KnownSafeTD || KnownSafeBU) && !MultipleOwners);
+ // We can only remove pointers if we are known safe in both directions.
+ bool UnconditionallySafe = KnownSafeTD && KnownSafeBU;
if (UnconditionallySafe) {
RetainsToMove.ReverseInsertPts.clear();
ReleasesToMove.ReverseInsertPts.clear();
@@ -2538,12 +1720,6 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
if (OldDelta != 0)
return false;
-#ifdef ARC_ANNOTATIONS
- // Do not move calls if ARC annotations are requested.
- if (EnableARCAnnotations)
- return false;
-#endif // ARC_ANNOTATIONS
-
Changed = true;
assert(OldCount != 0 && "Unreachable code?");
NumRRs += OldCount - NewCount;
@@ -2556,12 +1732,10 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
/// Identify pairings between the retains and releases, and delete and/or move
/// them.
-bool
-ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
- &BBStates,
- MapVector<Value *, RRInfo> &Retains,
- DenseMap<Value *, RRInfo> &Releases,
- Module *M) {
+bool ObjCARCOpt::PerformCodePlacement(
+ DenseMap<const BasicBlock *, BBState> &BBStates,
+ BlotMapVector<Value *, RRInfo> &Retains,
+ DenseMap<Value *, RRInfo> &Releases, Module *M) {
DEBUG(dbgs() << "\n== ObjCARCOpt::PerformCodePlacement ==\n");
bool AnyPairsCompletelyEliminated = false;
@@ -2572,8 +1746,9 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
SmallVector<Instruction *, 8> DeadInsts;
// Visit each retain.
- for (MapVector<Value *, RRInfo>::const_iterator I = Retains.begin(),
- E = Retains.end(); I != E; ++I) {
+ for (BlotMapVector<Value *, RRInfo>::const_iterator I = Retains.begin(),
+ E = Retains.end();
+ I != E; ++I) {
Value *V = I->first;
if (!V) continue; // blotted
@@ -2581,7 +1756,7 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
DEBUG(dbgs() << "Visiting: " << *Retain << "\n");
- Value *Arg = GetObjCArg(Retain);
+ Value *Arg = GetArgRCIdentityRoot(Retain);
// If the object being released is in static or stack storage, we know it's
// not being managed by ObjC reference counting, so we can delete pairs
@@ -2593,18 +1768,17 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
if (const LoadInst *LI = dyn_cast<LoadInst>(Arg))
if (const GlobalVariable *GV =
dyn_cast<GlobalVariable>(
- StripPointerCastsAndObjCCalls(LI->getPointerOperand())))
+ GetRCIdentityRoot(LI->getPointerOperand())))
if (GV->isConstant())
KnownSafe = true;
// Connect the dots between the top-down-collected RetainsToMove and
// bottom-up-collected ReleasesToMove to form sets of related calls.
NewRetains.push_back(Retain);
- bool PerformMoveCalls =
- ConnectTDBUTraversals(BBStates, Retains, Releases, M, NewRetains,
- NewReleases, DeadInsts, RetainsToMove,
- ReleasesToMove, Arg, KnownSafe,
- AnyPairsCompletelyEliminated);
+ bool PerformMoveCalls = PairUpRetainsAndReleases(
+ BBStates, Retains, Releases, M, NewRetains, NewReleases, DeadInsts,
+ RetainsToMove, ReleasesToMove, Arg, KnownSafe,
+ AnyPairsCompletelyEliminated);
if (PerformMoveCalls) {
// Ok, everything checks out and we're all set. Let's move/delete some
@@ -2640,12 +1814,13 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
DEBUG(dbgs() << "Visiting: " << *Inst << "\n");
- InstructionClass Class = GetBasicInstructionClass(Inst);
- if (Class != IC_LoadWeak && Class != IC_LoadWeakRetained)
+ ARCInstKind Class = GetBasicARCInstKind(Inst);
+ if (Class != ARCInstKind::LoadWeak &&
+ Class != ARCInstKind::LoadWeakRetained)
continue;
// Delete objc_loadWeak calls with no users.
- if (Class == IC_LoadWeak && Inst->use_empty()) {
+ if (Class == ARCInstKind::LoadWeak && Inst->use_empty()) {
Inst->eraseFromParent();
continue;
}
@@ -2660,10 +1835,10 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
J = Current.getInstructionIterator();
J != B; --J) {
Instruction *EarlierInst = &*std::prev(J);
- InstructionClass EarlierClass = GetInstructionClass(EarlierInst);
+ ARCInstKind EarlierClass = GetARCInstKind(EarlierInst);
switch (EarlierClass) {
- case IC_LoadWeak:
- case IC_LoadWeakRetained: {
+ case ARCInstKind::LoadWeak:
+ case ARCInstKind::LoadWeakRetained: {
// If this is loading from the same pointer, replace this load's value
// with that one.
CallInst *Call = cast<CallInst>(Inst);
@@ -2674,8 +1849,8 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
case AliasAnalysis::MustAlias:
Changed = true;
// If the load has a builtin retain, insert a plain retain for it.
- if (Class == IC_LoadWeakRetained) {
- Constant *Decl = EP.get(ARCRuntimeEntryPoints::EPT_Retain);
+ if (Class == ARCInstKind::LoadWeakRetained) {
+ Constant *Decl = EP.get(ARCRuntimeEntryPointKind::Retain);
CallInst *CI = CallInst::Create(Decl, EarlierCall, "", Call);
CI->setTailCall();
}
@@ -2691,8 +1866,8 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
}
break;
}
- case IC_StoreWeak:
- case IC_InitWeak: {
+ case ARCInstKind::StoreWeak:
+ case ARCInstKind::InitWeak: {
// If this is storing to the same pointer and has the same size etc.
// replace this load's value with the stored value.
CallInst *Call = cast<CallInst>(Inst);
@@ -2703,8 +1878,8 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
case AliasAnalysis::MustAlias:
Changed = true;
// If the load has a builtin retain, insert a plain retain for it.
- if (Class == IC_LoadWeakRetained) {
- Constant *Decl = EP.get(ARCRuntimeEntryPoints::EPT_Retain);
+ if (Class == ARCInstKind::LoadWeakRetained) {
+ Constant *Decl = EP.get(ARCRuntimeEntryPointKind::Retain);
CallInst *CI = CallInst::Create(Decl, EarlierCall, "", Call);
CI->setTailCall();
}
@@ -2720,14 +1895,14 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
}
break;
}
- case IC_MoveWeak:
- case IC_CopyWeak:
+ case ARCInstKind::MoveWeak:
+ case ARCInstKind::CopyWeak:
// TOOD: Grab the copied value.
goto clobbered;
- case IC_AutoreleasepoolPush:
- case IC_None:
- case IC_IntrinsicUser:
- case IC_User:
+ case ARCInstKind::AutoreleasepoolPush:
+ case ARCInstKind::None:
+ case ARCInstKind::IntrinsicUser:
+ case ARCInstKind::User:
// Weak pointers are only modified through the weak entry points
// (and arbitrary calls, which could call the weak entry points).
break;
@@ -2743,8 +1918,8 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
// the alloca and all its users can be zapped.
for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
Instruction *Inst = &*I++;
- InstructionClass Class = GetBasicInstructionClass(Inst);
- if (Class != IC_DestroyWeak)
+ ARCInstKind Class = GetBasicARCInstKind(Inst);
+ if (Class != ARCInstKind::DestroyWeak)
continue;
CallInst *Call = cast<CallInst>(Inst);
@@ -2752,10 +1927,10 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
if (AllocaInst *Alloca = dyn_cast<AllocaInst>(Arg)) {
for (User *U : Alloca->users()) {
const Instruction *UserInst = cast<Instruction>(U);
- switch (GetBasicInstructionClass(UserInst)) {
- case IC_InitWeak:
- case IC_StoreWeak:
- case IC_DestroyWeak:
+ switch (GetBasicARCInstKind(UserInst)) {
+ case ARCInstKind::InitWeak:
+ case ARCInstKind::StoreWeak:
+ case ARCInstKind::DestroyWeak:
continue;
default:
goto done;
@@ -2764,13 +1939,13 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
Changed = true;
for (auto UI = Alloca->user_begin(), UE = Alloca->user_end(); UI != UE;) {
CallInst *UserInst = cast<CallInst>(*UI++);
- switch (GetBasicInstructionClass(UserInst)) {
- case IC_InitWeak:
- case IC_StoreWeak:
+ switch (GetBasicARCInstKind(UserInst)) {
+ case ARCInstKind::InitWeak:
+ case ARCInstKind::StoreWeak:
// These functions return their second argument.
UserInst->replaceAllUsesWith(UserInst->getArgOperand(1));
break;
- case IC_DestroyWeak:
+ case ARCInstKind::DestroyWeak:
// No return value.
break;
default:
@@ -2792,7 +1967,7 @@ bool ObjCARCOpt::OptimizeSequences(Function &F) {
// map stays valid when we get around to rewriting code and calls get
// replaced by arguments.
DenseMap<Value *, RRInfo> Releases;
- MapVector<Value *, RRInfo> Retains;
+ BlotMapVector<Value *, RRInfo> Retains;
// This is used during the traversal of the function to track the
// states for each identified object at each block.
@@ -2825,16 +2000,15 @@ HasSafePathToPredecessorCall(const Value *Arg, Instruction *Retain,
if (DepInsts.size() != 1)
return false;
- CallInst *Call =
- dyn_cast_or_null<CallInst>(*DepInsts.begin());
+ auto *Call = dyn_cast_or_null<CallInst>(*DepInsts.begin());
// Check that the pointer is the return value of the call.
if (!Call || Arg != Call)
return false;
// Check that the call is a regular call.
- InstructionClass Class = GetBasicInstructionClass(Call);
- if (Class != IC_CallOrUser && Class != IC_Call)
+ ARCInstKind Class = GetBasicARCInstKind(Call);
+ if (Class != ARCInstKind::CallOrUser && Class != ARCInstKind::Call)
return false;
return true;
@@ -2854,13 +2028,11 @@ FindPredecessorRetainWithSafePath(const Value *Arg, BasicBlock *BB,
if (DepInsts.size() != 1)
return nullptr;
- CallInst *Retain =
- dyn_cast_or_null<CallInst>(*DepInsts.begin());
+ auto *Retain = dyn_cast_or_null<CallInst>(*DepInsts.begin());
// Check that we found a retain with the same argument.
- if (!Retain ||
- !IsRetain(GetBasicInstructionClass(Retain)) ||
- GetObjCArg(Retain) != Arg) {
+ if (!Retain || !IsRetain(GetBasicARCInstKind(Retain)) ||
+ GetArgRCIdentityRoot(Retain) != Arg) {
return nullptr;
}
@@ -2881,14 +2053,13 @@ FindPredecessorAutoreleaseWithSafePath(const Value *Arg, BasicBlock *BB,
if (DepInsts.size() != 1)
return nullptr;
- CallInst *Autorelease =
- dyn_cast_or_null<CallInst>(*DepInsts.begin());
+ auto *Autorelease = dyn_cast_or_null<CallInst>(*DepInsts.begin());
if (!Autorelease)
return nullptr;
- InstructionClass AutoreleaseClass = GetBasicInstructionClass(Autorelease);
+ ARCInstKind AutoreleaseClass = GetBasicARCInstKind(Autorelease);
if (!IsAutorelease(AutoreleaseClass))
return nullptr;
- if (GetObjCArg(Autorelease) != Arg)
+ if (GetArgRCIdentityRoot(Autorelease) != Arg)
return nullptr;
return Autorelease;
@@ -2919,7 +2090,7 @@ void ObjCARCOpt::OptimizeReturns(Function &F) {
if (!Ret)
continue;
- const Value *Arg = StripPointerCastsAndObjCCalls(Ret->getOperand(0));
+ const Value *Arg = GetRCIdentityRoot(Ret->getOperand(0));
// Look for an ``autorelease'' instruction that is a predecessor of Ret and
// dependent on Arg such that there are no instructions dependent on Arg
@@ -2974,13 +2145,13 @@ ObjCARCOpt::GatherStatistics(Function &F, bool AfterOptimization) {
for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
Instruction *Inst = &*I++;
- switch (GetBasicInstructionClass(Inst)) {
+ switch (GetBasicARCInstKind(Inst)) {
default:
break;
- case IC_Retain:
+ case ARCInstKind::Retain:
++NumRetains;
break;
- case IC_Release:
+ case ARCInstKind::Release:
++NumReleases;
break;
}
@@ -2997,28 +2168,13 @@ bool ObjCARCOpt::doInitialization(Module &M) {
if (!Run)
return false;
- // Identify the imprecise release metadata kind.
- ImpreciseReleaseMDKind =
- M.getContext().getMDKindID("clang.imprecise_release");
- CopyOnEscapeMDKind =
- M.getContext().getMDKindID("clang.arc.copy_on_escape");
- NoObjCARCExceptionsMDKind =
- M.getContext().getMDKindID("clang.arc.no_objc_arc_exceptions");
-#ifdef ARC_ANNOTATIONS
- ARCAnnotationBottomUpMDKind =
- M.getContext().getMDKindID("llvm.arc.annotation.bottomup");
- ARCAnnotationTopDownMDKind =
- M.getContext().getMDKindID("llvm.arc.annotation.topdown");
- ARCAnnotationProvenanceSourceMDKind =
- M.getContext().getMDKindID("llvm.arc.annotation.provenancesource");
-#endif // ARC_ANNOTATIONS
-
// Intuitively, objc_retain and others are nocapture, however in practice
// they are not, because they return their argument value. And objc_release
// calls finalizers which can have arbitrary side effects.
+ MDKindCache.init(&M);
// Initialize our runtime entry point cache.
- EP.Initialize(&M);
+ EP.init(&M);
return false;
}
@@ -3052,27 +2208,27 @@ bool ObjCARCOpt::runOnFunction(Function &F) {
OptimizeIndividualCalls(F);
// Optimizations for weak pointers.
- if (UsedInThisFunction & ((1 << IC_LoadWeak) |
- (1 << IC_LoadWeakRetained) |
- (1 << IC_StoreWeak) |
- (1 << IC_InitWeak) |
- (1 << IC_CopyWeak) |
- (1 << IC_MoveWeak) |
- (1 << IC_DestroyWeak)))
+ if (UsedInThisFunction & ((1 << unsigned(ARCInstKind::LoadWeak)) |
+ (1 << unsigned(ARCInstKind::LoadWeakRetained)) |
+ (1 << unsigned(ARCInstKind::StoreWeak)) |
+ (1 << unsigned(ARCInstKind::InitWeak)) |
+ (1 << unsigned(ARCInstKind::CopyWeak)) |
+ (1 << unsigned(ARCInstKind::MoveWeak)) |
+ (1 << unsigned(ARCInstKind::DestroyWeak))))
OptimizeWeakCalls(F);
// Optimizations for retain+release pairs.
- if (UsedInThisFunction & ((1 << IC_Retain) |
- (1 << IC_RetainRV) |
- (1 << IC_RetainBlock)))
- if (UsedInThisFunction & (1 << IC_Release))
+ if (UsedInThisFunction & ((1 << unsigned(ARCInstKind::Retain)) |
+ (1 << unsigned(ARCInstKind::RetainRV)) |
+ (1 << unsigned(ARCInstKind::RetainBlock))))
+ if (UsedInThisFunction & (1 << unsigned(ARCInstKind::Release)))
// Run OptimizeSequences until it either stops making changes or
// no retain+release pair nesting is detected.
while (OptimizeSequences(F)) {}
// Optimizations if objc_autorelease is used.
- if (UsedInThisFunction & ((1 << IC_Autorelease) |
- (1 << IC_AutoreleaseRV)))
+ if (UsedInThisFunction & ((1 << unsigned(ARCInstKind::Autorelease)) |
+ (1 << unsigned(ARCInstKind::AutoreleaseRV))))
OptimizeReturns(F);
// Gather statistics after optimization.
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCUtil.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCUtil.cpp
deleted file mode 100644
index 53c077e..0000000
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCUtil.cpp
+++ /dev/null
@@ -1,254 +0,0 @@
-//===- ObjCARCUtil.cpp - ObjC ARC Optimization ----------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-/// This file defines several utility functions used by various ARC
-/// optimizations which are IMHO too big to be in a header file.
-///
-/// WARNING: This file knows about certain library functions. It recognizes them
-/// by name, and hardwires knowledge of their semantics.
-///
-/// WARNING: This file knows about how certain Objective-C library functions are
-/// used. Naive LLVM IR transformations which would otherwise be
-/// behavior-preserving may break these assumptions.
-///
-//===----------------------------------------------------------------------===//
-
-#include "ObjCARC.h"
-#include "llvm/IR/Intrinsics.h"
-
-using namespace llvm;
-using namespace llvm::objcarc;
-
-raw_ostream &llvm::objcarc::operator<<(raw_ostream &OS,
- const InstructionClass Class) {
- switch (Class) {
- case IC_Retain:
- return OS << "IC_Retain";
- case IC_RetainRV:
- return OS << "IC_RetainRV";
- case IC_RetainBlock:
- return OS << "IC_RetainBlock";
- case IC_Release:
- return OS << "IC_Release";
- case IC_Autorelease:
- return OS << "IC_Autorelease";
- case IC_AutoreleaseRV:
- return OS << "IC_AutoreleaseRV";
- case IC_AutoreleasepoolPush:
- return OS << "IC_AutoreleasepoolPush";
- case IC_AutoreleasepoolPop:
- return OS << "IC_AutoreleasepoolPop";
- case IC_NoopCast:
- return OS << "IC_NoopCast";
- case IC_FusedRetainAutorelease:
- return OS << "IC_FusedRetainAutorelease";
- case IC_FusedRetainAutoreleaseRV:
- return OS << "IC_FusedRetainAutoreleaseRV";
- case IC_LoadWeakRetained:
- return OS << "IC_LoadWeakRetained";
- case IC_StoreWeak:
- return OS << "IC_StoreWeak";
- case IC_InitWeak:
- return OS << "IC_InitWeak";
- case IC_LoadWeak:
- return OS << "IC_LoadWeak";
- case IC_MoveWeak:
- return OS << "IC_MoveWeak";
- case IC_CopyWeak:
- return OS << "IC_CopyWeak";
- case IC_DestroyWeak:
- return OS << "IC_DestroyWeak";
- case IC_StoreStrong:
- return OS << "IC_StoreStrong";
- case IC_CallOrUser:
- return OS << "IC_CallOrUser";
- case IC_Call:
- return OS << "IC_Call";
- case IC_User:
- return OS << "IC_User";
- case IC_IntrinsicUser:
- return OS << "IC_IntrinsicUser";
- case IC_None:
- return OS << "IC_None";
- }
- llvm_unreachable("Unknown instruction class!");
-}
-
-InstructionClass llvm::objcarc::GetFunctionClass(const Function *F) {
- Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
-
- // No (mandatory) arguments.
- if (AI == AE)
- return StringSwitch<InstructionClass>(F->getName())
- .Case("objc_autoreleasePoolPush", IC_AutoreleasepoolPush)
- .Case("clang.arc.use", IC_IntrinsicUser)
- .Default(IC_CallOrUser);
-
- // One argument.
- const Argument *A0 = AI++;
- if (AI == AE)
- // Argument is a pointer.
- if (PointerType *PTy = dyn_cast<PointerType>(A0->getType())) {
- Type *ETy = PTy->getElementType();
- // Argument is i8*.
- if (ETy->isIntegerTy(8))
- return StringSwitch<InstructionClass>(F->getName())
- .Case("objc_retain", IC_Retain)
- .Case("objc_retainAutoreleasedReturnValue", IC_RetainRV)
- .Case("objc_retainBlock", IC_RetainBlock)
- .Case("objc_release", IC_Release)
- .Case("objc_autorelease", IC_Autorelease)
- .Case("objc_autoreleaseReturnValue", IC_AutoreleaseRV)
- .Case("objc_autoreleasePoolPop", IC_AutoreleasepoolPop)
- .Case("objc_retainedObject", IC_NoopCast)
- .Case("objc_unretainedObject", IC_NoopCast)
- .Case("objc_unretainedPointer", IC_NoopCast)
- .Case("objc_retain_autorelease", IC_FusedRetainAutorelease)
- .Case("objc_retainAutorelease", IC_FusedRetainAutorelease)
- .Case("objc_retainAutoreleaseReturnValue",IC_FusedRetainAutoreleaseRV)
- .Case("objc_sync_enter", IC_User)
- .Case("objc_sync_exit", IC_User)
- .Default(IC_CallOrUser);
-
- // Argument is i8**
- if (PointerType *Pte = dyn_cast<PointerType>(ETy))
- if (Pte->getElementType()->isIntegerTy(8))
- return StringSwitch<InstructionClass>(F->getName())
- .Case("objc_loadWeakRetained", IC_LoadWeakRetained)
- .Case("objc_loadWeak", IC_LoadWeak)
- .Case("objc_destroyWeak", IC_DestroyWeak)
- .Default(IC_CallOrUser);
- }
-
- // Two arguments, first is i8**.
- const Argument *A1 = AI++;
- if (AI == AE)
- if (PointerType *PTy = dyn_cast<PointerType>(A0->getType()))
- if (PointerType *Pte = dyn_cast<PointerType>(PTy->getElementType()))
- if (Pte->getElementType()->isIntegerTy(8))
- if (PointerType *PTy1 = dyn_cast<PointerType>(A1->getType())) {
- Type *ETy1 = PTy1->getElementType();
- // Second argument is i8*
- if (ETy1->isIntegerTy(8))
- return StringSwitch<InstructionClass>(F->getName())
- .Case("objc_storeWeak", IC_StoreWeak)
- .Case("objc_initWeak", IC_InitWeak)
- .Case("objc_storeStrong", IC_StoreStrong)
- .Default(IC_CallOrUser);
- // Second argument is i8**.
- if (PointerType *Pte1 = dyn_cast<PointerType>(ETy1))
- if (Pte1->getElementType()->isIntegerTy(8))
- return StringSwitch<InstructionClass>(F->getName())
- .Case("objc_moveWeak", IC_MoveWeak)
- .Case("objc_copyWeak", IC_CopyWeak)
- // Ignore annotation calls. This is important to stop the
- // optimizer from treating annotations as uses which would
- // make the state of the pointers they are attempting to
- // elucidate to be incorrect.
- .Case("llvm.arc.annotation.topdown.bbstart", IC_None)
- .Case("llvm.arc.annotation.topdown.bbend", IC_None)
- .Case("llvm.arc.annotation.bottomup.bbstart", IC_None)
- .Case("llvm.arc.annotation.bottomup.bbend", IC_None)
- .Default(IC_CallOrUser);
- }
-
- // Anything else.
- return IC_CallOrUser;
-}
-
-/// \brief Determine what kind of construct V is.
-InstructionClass
-llvm::objcarc::GetInstructionClass(const Value *V) {
- if (const Instruction *I = dyn_cast<Instruction>(V)) {
- // Any instruction other than bitcast and gep with a pointer operand have a
- // use of an objc pointer. Bitcasts, GEPs, Selects, PHIs transfer a pointer
- // to a subsequent use, rather than using it themselves, in this sense.
- // As a short cut, several other opcodes are known to have no pointer
- // operands of interest. And ret is never followed by a release, so it's
- // not interesting to examine.
- switch (I->getOpcode()) {
- case Instruction::Call: {
- const CallInst *CI = cast<CallInst>(I);
- // Check for calls to special functions.
- if (const Function *F = CI->getCalledFunction()) {
- InstructionClass Class = GetFunctionClass(F);
- if (Class != IC_CallOrUser)
- return Class;
-
- // None of the intrinsic functions do objc_release. For intrinsics, the
- // only question is whether or not they may be users.
- switch (F->getIntrinsicID()) {
- case Intrinsic::returnaddress: case Intrinsic::frameaddress:
- case Intrinsic::stacksave: case Intrinsic::stackrestore:
- case Intrinsic::vastart: case Intrinsic::vacopy: case Intrinsic::vaend:
- case Intrinsic::objectsize: case Intrinsic::prefetch:
- case Intrinsic::stackprotector:
- case Intrinsic::eh_return_i32: case Intrinsic::eh_return_i64:
- case Intrinsic::eh_typeid_for: case Intrinsic::eh_dwarf_cfa:
- case Intrinsic::eh_sjlj_lsda: case Intrinsic::eh_sjlj_functioncontext:
- case Intrinsic::init_trampoline: case Intrinsic::adjust_trampoline:
- case Intrinsic::lifetime_start: case Intrinsic::lifetime_end:
- case Intrinsic::invariant_start: case Intrinsic::invariant_end:
- // Don't let dbg info affect our results.
- case Intrinsic::dbg_declare: case Intrinsic::dbg_value:
- // Short cut: Some intrinsics obviously don't use ObjC pointers.
- return IC_None;
- default:
- break;
- }
- }
- return GetCallSiteClass(CI);
- }
- case Instruction::Invoke:
- return GetCallSiteClass(cast<InvokeInst>(I));
- case Instruction::BitCast:
- case Instruction::GetElementPtr:
- case Instruction::Select: case Instruction::PHI:
- case Instruction::Ret: case Instruction::Br:
- case Instruction::Switch: case Instruction::IndirectBr:
- case Instruction::Alloca: case Instruction::VAArg:
- case Instruction::Add: case Instruction::FAdd:
- case Instruction::Sub: case Instruction::FSub:
- case Instruction::Mul: case Instruction::FMul:
- case Instruction::SDiv: case Instruction::UDiv: case Instruction::FDiv:
- case Instruction::SRem: case Instruction::URem: case Instruction::FRem:
- case Instruction::Shl: case Instruction::LShr: case Instruction::AShr:
- case Instruction::And: case Instruction::Or: case Instruction::Xor:
- case Instruction::SExt: case Instruction::ZExt: case Instruction::Trunc:
- case Instruction::IntToPtr: case Instruction::FCmp:
- case Instruction::FPTrunc: case Instruction::FPExt:
- case Instruction::FPToUI: case Instruction::FPToSI:
- case Instruction::UIToFP: case Instruction::SIToFP:
- case Instruction::InsertElement: case Instruction::ExtractElement:
- case Instruction::ShuffleVector:
- case Instruction::ExtractValue:
- break;
- case Instruction::ICmp:
- // Comparing a pointer with null, or any other constant, isn't an
- // interesting use, because we don't care what the pointer points to, or
- // about the values of any other dynamic reference-counted pointers.
- if (IsPotentialRetainableObjPtr(I->getOperand(1)))
- return IC_User;
- break;
- default:
- // For anything else, check all the operands.
- // Note that this includes both operands of a Store: while the first
- // operand isn't actually being dereferenced, it is being stored to
- // memory where we can no longer track who might read it and dereference
- // it, so we have to consider it potentially used.
- for (User::const_op_iterator OI = I->op_begin(), OE = I->op_end();
- OI != OE; ++OI)
- if (IsPotentialRetainableObjPtr(*OI))
- return IC_User;
- }
- }
-
- // Otherwise, it's totally inert for ARC purposes.
- return IC_None;
-}
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp
index 410abfc..8346345 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp
@@ -32,20 +32,22 @@ using namespace llvm::objcarc;
bool ProvenanceAnalysis::relatedSelect(const SelectInst *A,
const Value *B) {
+ const DataLayout &DL = A->getModule()->getDataLayout();
// If the values are Selects with the same condition, we can do a more precise
// check: just check for relations between the values on corresponding arms.
if (const SelectInst *SB = dyn_cast<SelectInst>(B))
if (A->getCondition() == SB->getCondition())
- return related(A->getTrueValue(), SB->getTrueValue()) ||
- related(A->getFalseValue(), SB->getFalseValue());
+ return related(A->getTrueValue(), SB->getTrueValue(), DL) ||
+ related(A->getFalseValue(), SB->getFalseValue(), DL);
// Check both arms of the Select node individually.
- return related(A->getTrueValue(), B) ||
- related(A->getFalseValue(), B);
+ return related(A->getTrueValue(), B, DL) ||
+ related(A->getFalseValue(), B, DL);
}
bool ProvenanceAnalysis::relatedPHI(const PHINode *A,
const Value *B) {
+ const DataLayout &DL = A->getModule()->getDataLayout();
// If the values are PHIs in the same block, we can do a more precise as well
// as efficient check: just check for relations between the values on
// corresponding edges.
@@ -53,16 +55,15 @@ bool ProvenanceAnalysis::relatedPHI(const PHINode *A,
if (PNB->getParent() == A->getParent()) {
for (unsigned i = 0, e = A->getNumIncomingValues(); i != e; ++i)
if (related(A->getIncomingValue(i),
- PNB->getIncomingValueForBlock(A->getIncomingBlock(i))))
+ PNB->getIncomingValueForBlock(A->getIncomingBlock(i)), DL))
return true;
return false;
}
// Check each unique source of the PHI node against B.
SmallPtrSet<const Value *, 4> UniqueSrc;
- for (unsigned i = 0, e = A->getNumIncomingValues(); i != e; ++i) {
- const Value *PV1 = A->getIncomingValue(i);
- if (UniqueSrc.insert(PV1).second && related(PV1, B))
+ for (Value *PV1 : A->incoming_values()) {
+ if (UniqueSrc.insert(PV1).second && related(PV1, B, DL))
return true;
}
@@ -103,11 +104,11 @@ static bool IsStoredObjCPointer(const Value *P) {
return false;
}
-bool ProvenanceAnalysis::relatedCheck(const Value *A,
- const Value *B) {
+bool ProvenanceAnalysis::relatedCheck(const Value *A, const Value *B,
+ const DataLayout &DL) {
// Skip past provenance pass-throughs.
- A = GetUnderlyingObjCPtr(A);
- B = GetUnderlyingObjCPtr(B);
+ A = GetUnderlyingObjCPtr(A, DL);
+ B = GetUnderlyingObjCPtr(B, DL);
// Quick check.
if (A == B)
@@ -159,8 +160,8 @@ bool ProvenanceAnalysis::relatedCheck(const Value *A,
return true;
}
-bool ProvenanceAnalysis::related(const Value *A,
- const Value *B) {
+bool ProvenanceAnalysis::related(const Value *A, const Value *B,
+ const DataLayout &DL) {
// Begin by inserting a conservative value into the map. If the insertion
// fails, we have the answer already. If it succeeds, leave it there until we
// compute the real answer to guard against recursive queries.
@@ -170,7 +171,7 @@ bool ProvenanceAnalysis::related(const Value *A,
if (!Pair.second)
return Pair.first->second;
- bool Result = relatedCheck(A, B);
+ bool Result = relatedCheck(A, B, DL);
CachedResults[ValuePairTy(A, B)] = Result;
return Result;
}
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.h b/contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.h
index 7820468..0ac41d3 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.h
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.h
@@ -30,6 +30,7 @@
namespace llvm {
class Value;
class AliasAnalysis;
+ class DataLayout;
class PHINode;
class SelectInst;
}
@@ -53,12 +54,12 @@ class ProvenanceAnalysis {
typedef DenseMap<ValuePairTy, bool> CachedResultsTy;
CachedResultsTy CachedResults;
- bool relatedCheck(const Value *A, const Value *B);
+ bool relatedCheck(const Value *A, const Value *B, const DataLayout &DL);
bool relatedSelect(const SelectInst *A, const Value *B);
bool relatedPHI(const PHINode *A, const Value *B);
- void operator=(const ProvenanceAnalysis &) LLVM_DELETED_FUNCTION;
- ProvenanceAnalysis(const ProvenanceAnalysis &) LLVM_DELETED_FUNCTION;
+ void operator=(const ProvenanceAnalysis &) = delete;
+ ProvenanceAnalysis(const ProvenanceAnalysis &) = delete;
public:
ProvenanceAnalysis() {}
@@ -67,7 +68,7 @@ public:
AliasAnalysis *getAA() const { return AA; }
- bool related(const Value *A, const Value *B);
+ bool related(const Value *A, const Value *B, const DataLayout &DL);
void clear() {
CachedResults.clear();
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp
index d836632..0be75af 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp
@@ -14,6 +14,7 @@
#include "llvm/Analysis/Passes.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -65,6 +66,7 @@ bool PAEval::runOnFunction(Function &F) {
ProvenanceAnalysis PA;
PA.setAA(&getAnalysis<AliasAnalysis>());
+ const DataLayout &DL = F.getParent()->getDataLayout();
for (Value *V1 : Values) {
StringRef NameV1 = getName(V1);
@@ -73,7 +75,7 @@ bool PAEval::runOnFunction(Function &F) {
if (NameV1 >= NameV2)
continue;
errs() << NameV1 << " and " << NameV2;
- if (PA.related(V1, V2))
+ if (PA.related(V1, V2, DL))
errs() << " are related.\n";
else
errs() << " are not related.\n";
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/PtrState.cpp b/contrib/llvm/lib/Transforms/ObjCARC/PtrState.cpp
new file mode 100644
index 0000000..ae20e7e
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/ObjCARC/PtrState.cpp
@@ -0,0 +1,404 @@
+//===--- PtrState.cpp -----------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PtrState.h"
+#include "DependencyAnalysis.h"
+#include "ObjCARC.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace llvm::objcarc;
+
+#define DEBUG_TYPE "objc-arc-ptr-state"
+
+//===----------------------------------------------------------------------===//
+// Utility
+//===----------------------------------------------------------------------===//
+
+raw_ostream &llvm::objcarc::operator<<(raw_ostream &OS, const Sequence S) {
+ switch (S) {
+ case S_None:
+ return OS << "S_None";
+ case S_Retain:
+ return OS << "S_Retain";
+ case S_CanRelease:
+ return OS << "S_CanRelease";
+ case S_Use:
+ return OS << "S_Use";
+ case S_Release:
+ return OS << "S_Release";
+ case S_MovableRelease:
+ return OS << "S_MovableRelease";
+ case S_Stop:
+ return OS << "S_Stop";
+ }
+ llvm_unreachable("Unknown sequence type.");
+}
+
+//===----------------------------------------------------------------------===//
+// Sequence
+//===----------------------------------------------------------------------===//
+
+static Sequence MergeSeqs(Sequence A, Sequence B, bool TopDown) {
+ // The easy cases.
+ if (A == B)
+ return A;
+ if (A == S_None || B == S_None)
+ return S_None;
+
+ if (A > B)
+ std::swap(A, B);
+ if (TopDown) {
+ // Choose the side which is further along in the sequence.
+ if ((A == S_Retain || A == S_CanRelease) &&
+ (B == S_CanRelease || B == S_Use))
+ return B;
+ } else {
+ // Choose the side which is further along in the sequence.
+ if ((A == S_Use || A == S_CanRelease) &&
+ (B == S_Use || B == S_Release || B == S_Stop || B == S_MovableRelease))
+ return A;
+ // If both sides are releases, choose the more conservative one.
+ if (A == S_Stop && (B == S_Release || B == S_MovableRelease))
+ return A;
+ if (A == S_Release && B == S_MovableRelease)
+ return A;
+ }
+
+ return S_None;
+}
+
+//===----------------------------------------------------------------------===//
+// RRInfo
+//===----------------------------------------------------------------------===//
+
+void RRInfo::clear() {
+ KnownSafe = false;
+ IsTailCallRelease = false;
+ ReleaseMetadata = nullptr;
+ Calls.clear();
+ ReverseInsertPts.clear();
+ CFGHazardAfflicted = false;
+}
+
+bool RRInfo::Merge(const RRInfo &Other) {
+ // Conservatively merge the ReleaseMetadata information.
+ if (ReleaseMetadata != Other.ReleaseMetadata)
+ ReleaseMetadata = nullptr;
+
+ // Conservatively merge the boolean state.
+ KnownSafe &= Other.KnownSafe;
+ IsTailCallRelease &= Other.IsTailCallRelease;
+ CFGHazardAfflicted |= Other.CFGHazardAfflicted;
+
+ // Merge the call sets.
+ Calls.insert(Other.Calls.begin(), Other.Calls.end());
+
+ // Merge the insert point sets. If there are any differences,
+ // that makes this a partial merge.
+ bool Partial = ReverseInsertPts.size() != Other.ReverseInsertPts.size();
+ for (Instruction *Inst : Other.ReverseInsertPts)
+ Partial |= ReverseInsertPts.insert(Inst).second;
+ return Partial;
+}
+
+//===----------------------------------------------------------------------===//
+// PtrState
+//===----------------------------------------------------------------------===//
+
+void PtrState::SetKnownPositiveRefCount() {
+ DEBUG(dbgs() << " Setting Known Positive.\n");
+ KnownPositiveRefCount = true;
+}
+
+void PtrState::ClearKnownPositiveRefCount() {
+ DEBUG(dbgs() << " Clearing Known Positive.\n");
+ KnownPositiveRefCount = false;
+}
+
+void PtrState::SetSeq(Sequence NewSeq) {
+ DEBUG(dbgs() << " Old: " << GetSeq() << "; New: " << NewSeq << "\n");
+ Seq = NewSeq;
+}
+
+void PtrState::ResetSequenceProgress(Sequence NewSeq) {
+ DEBUG(dbgs() << " Resetting sequence progress.\n");
+ SetSeq(NewSeq);
+ Partial = false;
+ RRI.clear();
+}
+
+void PtrState::Merge(const PtrState &Other, bool TopDown) {
+ Seq = MergeSeqs(GetSeq(), Other.GetSeq(), TopDown);
+ KnownPositiveRefCount &= Other.KnownPositiveRefCount;
+
+ // If we're not in a sequence (anymore), drop all associated state.
+ if (Seq == S_None) {
+ Partial = false;
+ RRI.clear();
+ } else if (Partial || Other.Partial) {
+ // If we're doing a merge on a path that's previously seen a partial
+ // merge, conservatively drop the sequence, to avoid doing partial
+ // RR elimination. If the branch predicates for the two merge differ,
+ // mixing them is unsafe.
+ ClearSequenceProgress();
+ } else {
+ // Otherwise merge the other PtrState's RRInfo into our RRInfo. At this
+ // point, we know that currently we are not partial. Stash whether or not
+ // the merge operation caused us to undergo a partial merging of reverse
+ // insertion points.
+ Partial = RRI.Merge(Other.RRI);
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// BottomUpPtrState
+//===----------------------------------------------------------------------===//
+
+bool BottomUpPtrState::InitBottomUp(ARCMDKindCache &Cache, Instruction *I) {
+ // If we see two releases in a row on the same pointer. If so, make
+ // a note, and we'll cicle back to revisit it after we've
+ // hopefully eliminated the second release, which may allow us to
+ // eliminate the first release too.
+ // Theoretically we could implement removal of nested retain+release
+ // pairs by making PtrState hold a stack of states, but this is
+ // simple and avoids adding overhead for the non-nested case.
+ bool NestingDetected = false;
+ if (GetSeq() == S_Release || GetSeq() == S_MovableRelease) {
+ DEBUG(dbgs() << " Found nested releases (i.e. a release pair)\n");
+ NestingDetected = true;
+ }
+
+ MDNode *ReleaseMetadata =
+ I->getMetadata(Cache.get(ARCMDKindID::ImpreciseRelease));
+ Sequence NewSeq = ReleaseMetadata ? S_MovableRelease : S_Release;
+ ResetSequenceProgress(NewSeq);
+ SetReleaseMetadata(ReleaseMetadata);
+ SetKnownSafe(HasKnownPositiveRefCount());
+ SetTailCallRelease(cast<CallInst>(I)->isTailCall());
+ InsertCall(I);
+ SetKnownPositiveRefCount();
+ return NestingDetected;
+}
+
+bool BottomUpPtrState::MatchWithRetain() {
+ SetKnownPositiveRefCount();
+
+ Sequence OldSeq = GetSeq();
+ switch (OldSeq) {
+ case S_Stop:
+ case S_Release:
+ case S_MovableRelease:
+ case S_Use:
+ // If OldSeq is not S_Use or OldSeq is S_Use and we are tracking an
+ // imprecise release, clear our reverse insertion points.
+ if (OldSeq != S_Use || IsTrackingImpreciseReleases())
+ ClearReverseInsertPts();
+ // FALL THROUGH
+ case S_CanRelease:
+ return true;
+ case S_None:
+ return false;
+ case S_Retain:
+ llvm_unreachable("bottom-up pointer in retain state!");
+ }
+ llvm_unreachable("Sequence unknown enum value");
+}
+
+bool BottomUpPtrState::HandlePotentialAlterRefCount(Instruction *Inst,
+ const Value *Ptr,
+ ProvenanceAnalysis &PA,
+ ARCInstKind Class) {
+ Sequence S = GetSeq();
+
+ // Check for possible releases.
+ if (!CanAlterRefCount(Inst, Ptr, PA, Class))
+ return false;
+
+ DEBUG(dbgs() << " CanAlterRefCount: Seq: " << S << "; " << *Ptr
+ << "\n");
+ switch (S) {
+ case S_Use:
+ SetSeq(S_CanRelease);
+ return true;
+ case S_CanRelease:
+ case S_Release:
+ case S_MovableRelease:
+ case S_Stop:
+ case S_None:
+ return false;
+ case S_Retain:
+ llvm_unreachable("bottom-up pointer in retain state!");
+ }
+ llvm_unreachable("Sequence unknown enum value");
+}
+
+void BottomUpPtrState::HandlePotentialUse(BasicBlock *BB, Instruction *Inst,
+ const Value *Ptr,
+ ProvenanceAnalysis &PA,
+ ARCInstKind Class) {
+ // Check for possible direct uses.
+ switch (GetSeq()) {
+ case S_Release:
+ case S_MovableRelease:
+ if (CanUse(Inst, Ptr, PA, Class)) {
+ DEBUG(dbgs() << " CanUse: Seq: " << GetSeq() << "; " << *Ptr
+ << "\n");
+ assert(!HasReverseInsertPts());
+ // If this is an invoke instruction, we're scanning it as part of
+ // one of its successor blocks, since we can't insert code after it
+ // in its own block, and we don't want to split critical edges.
+ if (isa<InvokeInst>(Inst))
+ InsertReverseInsertPt(BB->getFirstInsertionPt());
+ else
+ InsertReverseInsertPt(std::next(BasicBlock::iterator(Inst)));
+ SetSeq(S_Use);
+ } else if (Seq == S_Release && IsUser(Class)) {
+ DEBUG(dbgs() << " PreciseReleaseUse: Seq: " << GetSeq() << "; "
+ << *Ptr << "\n");
+ // Non-movable releases depend on any possible objc pointer use.
+ SetSeq(S_Stop);
+ assert(!HasReverseInsertPts());
+ // As above; handle invoke specially.
+ if (isa<InvokeInst>(Inst))
+ InsertReverseInsertPt(BB->getFirstInsertionPt());
+ else
+ InsertReverseInsertPt(std::next(BasicBlock::iterator(Inst)));
+ }
+ break;
+ case S_Stop:
+ if (CanUse(Inst, Ptr, PA, Class)) {
+ DEBUG(dbgs() << " PreciseStopUse: Seq: " << GetSeq() << "; "
+ << *Ptr << "\n");
+ SetSeq(S_Use);
+ }
+ break;
+ case S_CanRelease:
+ case S_Use:
+ case S_None:
+ break;
+ case S_Retain:
+ llvm_unreachable("bottom-up pointer in retain state!");
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// TopDownPtrState
+//===----------------------------------------------------------------------===//
+
+bool TopDownPtrState::InitTopDown(ARCInstKind Kind, Instruction *I) {
+ bool NestingDetected = false;
+ // Don't do retain+release tracking for ARCInstKind::RetainRV, because
+ // it's
+ // better to let it remain as the first instruction after a call.
+ if (Kind != ARCInstKind::RetainRV) {
+ // If we see two retains in a row on the same pointer. If so, make
+ // a note, and we'll cicle back to revisit it after we've
+ // hopefully eliminated the second retain, which may allow us to
+ // eliminate the first retain too.
+ // Theoretically we could implement removal of nested retain+release
+ // pairs by making PtrState hold a stack of states, but this is
+ // simple and avoids adding overhead for the non-nested case.
+ if (GetSeq() == S_Retain)
+ NestingDetected = true;
+
+ ResetSequenceProgress(S_Retain);
+ SetKnownSafe(HasKnownPositiveRefCount());
+ InsertCall(I);
+ }
+
+ SetKnownPositiveRefCount();
+ return NestingDetected;
+}
+
+bool TopDownPtrState::MatchWithRelease(ARCMDKindCache &Cache,
+ Instruction *Release) {
+ ClearKnownPositiveRefCount();
+
+ Sequence OldSeq = GetSeq();
+
+ MDNode *ReleaseMetadata =
+ Release->getMetadata(Cache.get(ARCMDKindID::ImpreciseRelease));
+
+ switch (OldSeq) {
+ case S_Retain:
+ case S_CanRelease:
+ if (OldSeq == S_Retain || ReleaseMetadata != nullptr)
+ ClearReverseInsertPts();
+ // FALL THROUGH
+ case S_Use:
+ SetReleaseMetadata(ReleaseMetadata);
+ SetTailCallRelease(cast<CallInst>(Release)->isTailCall());
+ return true;
+ case S_None:
+ return false;
+ case S_Stop:
+ case S_Release:
+ case S_MovableRelease:
+ llvm_unreachable("top-down pointer in bottom up state!");
+ }
+ llvm_unreachable("Sequence unknown enum value");
+}
+
+bool TopDownPtrState::HandlePotentialAlterRefCount(Instruction *Inst,
+ const Value *Ptr,
+ ProvenanceAnalysis &PA,
+ ARCInstKind Class) {
+ // Check for possible releases.
+ if (!CanAlterRefCount(Inst, Ptr, PA, Class))
+ return false;
+
+ DEBUG(dbgs() << " CanAlterRefCount: Seq: " << GetSeq() << "; " << *Ptr
+ << "\n");
+ ClearKnownPositiveRefCount();
+ switch (GetSeq()) {
+ case S_Retain:
+ SetSeq(S_CanRelease);
+ assert(!HasReverseInsertPts());
+ InsertReverseInsertPt(Inst);
+
+ // One call can't cause a transition from S_Retain to S_CanRelease
+ // and S_CanRelease to S_Use. If we've made the first transition,
+ // we're done.
+ return true;
+ case S_Use:
+ case S_CanRelease:
+ case S_None:
+ return false;
+ case S_Stop:
+ case S_Release:
+ case S_MovableRelease:
+ llvm_unreachable("top-down pointer in release state!");
+ }
+ llvm_unreachable("covered switch is not covered!?");
+}
+
+void TopDownPtrState::HandlePotentialUse(Instruction *Inst, const Value *Ptr,
+ ProvenanceAnalysis &PA,
+ ARCInstKind Class) {
+ // Check for possible direct uses.
+ switch (GetSeq()) {
+ case S_CanRelease:
+ if (!CanUse(Inst, Ptr, PA, Class))
+ return;
+ DEBUG(dbgs() << " CanUse: Seq: " << GetSeq() << "; " << *Ptr
+ << "\n");
+ SetSeq(S_Use);
+ return;
+ case S_Retain:
+ case S_Use:
+ case S_None:
+ return;
+ case S_Stop:
+ case S_Release:
+ case S_MovableRelease:
+ llvm_unreachable("top-down pointer in release state!");
+ }
+}
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/PtrState.h b/contrib/llvm/lib/Transforms/ObjCARC/PtrState.h
new file mode 100644
index 0000000..e45e1ea
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/ObjCARC/PtrState.h
@@ -0,0 +1,210 @@
+//===--- PtrState.h - ARC State for a Ptr -------------------*- C++ -*-----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains declarations for the ARC state associated with a ptr. It
+// is only used by the ARC Sequence Dataflow computation. By separating this
+// from the actual dataflow, it is easier to consider the mechanics of the ARC
+// optimization separate from the actual predicates being used.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TRANSFORMS_OBJCARC_PTRSTATE_H
+#define LLVM_LIB_TRANSFORMS_OBJCARC_PTRSTATE_H
+
+#include "ARCInstKind.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Debug.h"
+
+namespace llvm {
+namespace objcarc {
+
+class ARCMDKindCache;
+class ProvenanceAnalysis;
+
+/// \enum Sequence
+///
+/// \brief A sequence of states that a pointer may go through in which an
+/// objc_retain and objc_release are actually needed.
+enum Sequence {
+ S_None,
+ S_Retain, ///< objc_retain(x).
+ S_CanRelease, ///< foo(x) -- x could possibly see a ref count decrement.
+ S_Use, ///< any use of x.
+ S_Stop, ///< like S_Release, but code motion is stopped.
+ S_Release, ///< objc_release(x).
+ S_MovableRelease ///< objc_release(x), !clang.imprecise_release.
+};
+
+raw_ostream &operator<<(raw_ostream &OS,
+ const Sequence S) LLVM_ATTRIBUTE_UNUSED;
+
+/// \brief Unidirectional information about either a
+/// retain-decrement-use-release sequence or release-use-decrement-retain
+/// reverse sequence.
+struct RRInfo {
+ /// After an objc_retain, the reference count of the referenced
+ /// object is known to be positive. Similarly, before an objc_release, the
+ /// reference count of the referenced object is known to be positive. If
+ /// there are retain-release pairs in code regions where the retain count
+ /// is known to be positive, they can be eliminated, regardless of any side
+ /// effects between them.
+ ///
+ /// Also, a retain+release pair nested within another retain+release
+ /// pair all on the known same pointer value can be eliminated, regardless
+ /// of any intervening side effects.
+ ///
+ /// KnownSafe is true when either of these conditions is satisfied.
+ bool KnownSafe;
+
+ /// True of the objc_release calls are all marked with the "tail" keyword.
+ bool IsTailCallRelease;
+
+ /// If the Calls are objc_release calls and they all have a
+ /// clang.imprecise_release tag, this is the metadata tag.
+ MDNode *ReleaseMetadata;
+
+ /// For a top-down sequence, the set of objc_retains or
+ /// objc_retainBlocks. For bottom-up, the set of objc_releases.
+ SmallPtrSet<Instruction *, 2> Calls;
+
+ /// The set of optimal insert positions for moving calls in the opposite
+ /// sequence.
+ SmallPtrSet<Instruction *, 2> ReverseInsertPts;
+
+ /// If this is true, we cannot perform code motion but can still remove
+ /// retain/release pairs.
+ bool CFGHazardAfflicted;
+
+ RRInfo()
+ : KnownSafe(false), IsTailCallRelease(false), ReleaseMetadata(nullptr),
+ CFGHazardAfflicted(false) {}
+
+ void clear();
+
+ /// Conservatively merge the two RRInfo. Returns true if a partial merge has
+ /// occurred, false otherwise.
+ bool Merge(const RRInfo &Other);
+};
+
+/// \brief This class summarizes several per-pointer runtime properties which
+/// are propogated through the flow graph.
+class PtrState {
+protected:
+ /// True if the reference count is known to be incremented.
+ bool KnownPositiveRefCount;
+
+ /// True if we've seen an opportunity for partial RR elimination, such as
+ /// pushing calls into a CFG triangle or into one side of a CFG diamond.
+ bool Partial;
+
+ /// The current position in the sequence.
+ unsigned char Seq : 8;
+
+ /// Unidirectional information about the current sequence.
+ RRInfo RRI;
+
+ PtrState() : KnownPositiveRefCount(false), Partial(false), Seq(S_None) {}
+
+public:
+ bool IsKnownSafe() const { return RRI.KnownSafe; }
+
+ void SetKnownSafe(const bool NewValue) { RRI.KnownSafe = NewValue; }
+
+ bool IsTailCallRelease() const { return RRI.IsTailCallRelease; }
+
+ void SetTailCallRelease(const bool NewValue) {
+ RRI.IsTailCallRelease = NewValue;
+ }
+
+ bool IsTrackingImpreciseReleases() const {
+ return RRI.ReleaseMetadata != nullptr;
+ }
+
+ const MDNode *GetReleaseMetadata() const { return RRI.ReleaseMetadata; }
+
+ void SetReleaseMetadata(MDNode *NewValue) { RRI.ReleaseMetadata = NewValue; }
+
+ bool IsCFGHazardAfflicted() const { return RRI.CFGHazardAfflicted; }
+
+ void SetCFGHazardAfflicted(const bool NewValue) {
+ RRI.CFGHazardAfflicted = NewValue;
+ }
+
+ void SetKnownPositiveRefCount();
+ void ClearKnownPositiveRefCount();
+
+ bool HasKnownPositiveRefCount() const { return KnownPositiveRefCount; }
+
+ void SetSeq(Sequence NewSeq);
+
+ Sequence GetSeq() const { return static_cast<Sequence>(Seq); }
+
+ void ClearSequenceProgress() { ResetSequenceProgress(S_None); }
+
+ void ResetSequenceProgress(Sequence NewSeq);
+ void Merge(const PtrState &Other, bool TopDown);
+
+ void InsertCall(Instruction *I) { RRI.Calls.insert(I); }
+
+ void InsertReverseInsertPt(Instruction *I) { RRI.ReverseInsertPts.insert(I); }
+
+ void ClearReverseInsertPts() { RRI.ReverseInsertPts.clear(); }
+
+ bool HasReverseInsertPts() const { return !RRI.ReverseInsertPts.empty(); }
+
+ const RRInfo &GetRRInfo() const { return RRI; }
+};
+
+struct BottomUpPtrState : PtrState {
+ BottomUpPtrState() : PtrState() {}
+
+ /// (Re-)Initialize this bottom up pointer returning true if we detected a
+ /// pointer with nested releases.
+ bool InitBottomUp(ARCMDKindCache &Cache, Instruction *I);
+
+ /// Return true if this set of releases can be paired with a release. Modifies
+ /// state appropriately to reflect that the matching occured if it is
+ /// successful.
+ ///
+ /// It is assumed that one has already checked that the RCIdentity of the
+ /// retain and the RCIdentity of this ptr state are the same.
+ bool MatchWithRetain();
+
+ void HandlePotentialUse(BasicBlock *BB, Instruction *Inst, const Value *Ptr,
+ ProvenanceAnalysis &PA, ARCInstKind Class);
+ bool HandlePotentialAlterRefCount(Instruction *Inst, const Value *Ptr,
+ ProvenanceAnalysis &PA, ARCInstKind Class);
+};
+
+struct TopDownPtrState : PtrState {
+ TopDownPtrState() : PtrState() {}
+
+ /// (Re-)Initialize this bottom up pointer returning true if we detected a
+ /// pointer with nested releases.
+ bool InitTopDown(ARCInstKind Kind, Instruction *I);
+
+ /// Return true if this set of retains can be paired with the given
+ /// release. Modifies state appropriately to reflect that the matching
+ /// occured.
+ bool MatchWithRelease(ARCMDKindCache &Cache, Instruction *Release);
+
+ void HandlePotentialUse(Instruction *Inst, const Value *Ptr,
+ ProvenanceAnalysis &PA, ARCInstKind Class);
+
+ bool HandlePotentialAlterRefCount(Instruction *Inst, const Value *Ptr,
+ ProvenanceAnalysis &PA, ARCInstKind Class);
+};
+
+} // end namespace objcarc
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp b/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp
index 3d91984..d6fc916 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp
@@ -32,19 +32,18 @@ using namespace llvm;
STATISTIC(NumRemoved, "Number of instructions removed");
namespace {
- struct ADCE : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
- ADCE() : FunctionPass(ID) {
- initializeADCEPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function& F) override;
+struct ADCE : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ ADCE() : FunctionPass(ID) {
+ initializeADCEPass(*PassRegistry::getPassRegistry());
+ }
- void getAnalysisUsage(AnalysisUsage& AU) const override {
- AU.setPreservesCFG();
- }
+ bool runOnFunction(Function& F) override;
- };
+ void getAnalysisUsage(AnalysisUsage& AU) const override {
+ AU.setPreservesCFG();
+ }
+};
}
char ADCE::ID = 0;
@@ -54,46 +53,45 @@ bool ADCE::runOnFunction(Function& F) {
if (skipOptnoneFunction(F))
return false;
- SmallPtrSet<Instruction*, 128> alive;
- SmallVector<Instruction*, 128> worklist;
+ SmallPtrSet<Instruction*, 128> Alive;
+ SmallVector<Instruction*, 128> Worklist;
// Collect the set of "root" instructions that are known live.
- for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
- if (isa<TerminatorInst>(I.getInstructionIterator()) ||
- isa<DbgInfoIntrinsic>(I.getInstructionIterator()) ||
- isa<LandingPadInst>(I.getInstructionIterator()) ||
- I->mayHaveSideEffects()) {
- alive.insert(I.getInstructionIterator());
- worklist.push_back(I.getInstructionIterator());
+ for (Instruction &I : inst_range(F)) {
+ if (isa<TerminatorInst>(I) || isa<DbgInfoIntrinsic>(I) ||
+ isa<LandingPadInst>(I) || I.mayHaveSideEffects()) {
+ Alive.insert(&I);
+ Worklist.push_back(&I);
}
+ }
// Propagate liveness backwards to operands.
- while (!worklist.empty()) {
- Instruction* curr = worklist.pop_back_val();
- for (Instruction::op_iterator OI = curr->op_begin(), OE = curr->op_end();
- OI != OE; ++OI)
- if (Instruction* Inst = dyn_cast<Instruction>(OI))
- if (alive.insert(Inst).second)
- worklist.push_back(Inst);
+ while (!Worklist.empty()) {
+ Instruction *Curr = Worklist.pop_back_val();
+ for (Use &OI : Curr->operands()) {
+ if (Instruction *Inst = dyn_cast<Instruction>(OI))
+ if (Alive.insert(Inst).second)
+ Worklist.push_back(Inst);
+ }
}
// The inverse of the live set is the dead set. These are those instructions
// which have no side effects and do not influence the control flow or return
// value of the function, and may therefore be deleted safely.
- // NOTE: We reuse the worklist vector here for memory efficiency.
- for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
- if (!alive.count(I.getInstructionIterator())) {
- worklist.push_back(I.getInstructionIterator());
- I->dropAllReferences();
+ // NOTE: We reuse the Worklist vector here for memory efficiency.
+ for (Instruction &I : inst_range(F)) {
+ if (!Alive.count(&I)) {
+ Worklist.push_back(&I);
+ I.dropAllReferences();
}
+ }
- for (SmallVectorImpl<Instruction *>::iterator I = worklist.begin(),
- E = worklist.end(); I != E; ++I) {
+ for (Instruction *&I : Worklist) {
++NumRemoved;
- (*I)->eraseFromParent();
+ I->eraseFromParent();
}
- return !worklist.empty();
+ return !Worklist.empty();
}
FunctionPass *llvm::createAggressiveDCEPass() {
diff --git a/contrib/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/contrib/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index f48cefa..8918909 100644
--- a/contrib/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -23,15 +23,15 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -50,15 +50,15 @@ struct AlignmentFromAssumptions : public FunctionPass {
initializeAlignmentFromAssumptionsPass(*PassRegistry::getPassRegistry());
}
- bool runOnFunction(Function &F);
+ bool runOnFunction(Function &F) override;
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<ScalarEvolution>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.setPreservesCFG();
- AU.addPreserved<LoopInfo>();
+ AU.addPreserved<LoopInfoWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<ScalarEvolution>();
}
@@ -71,7 +71,6 @@ struct AlignmentFromAssumptions : public FunctionPass {
ScalarEvolution *SE;
DominatorTree *DT;
- const DataLayout *DL;
bool extractAlignmentInfo(CallInst *I, Value *&AAPtr, const SCEV *&AlignSCEV,
const SCEV *&OffSCEV);
@@ -123,7 +122,7 @@ static unsigned getNewAlignmentDiff(const SCEV *DiffSCEV,
// If the displacement is not an exact multiple, but the remainder is a
// constant, then return this remainder (but only if it is a power of 2).
- uint64_t DiffUnitsAbs = abs64(DiffUnits);
+ uint64_t DiffUnitsAbs = std::abs(DiffUnits);
if (isPowerOf2_64(DiffUnitsAbs))
return (unsigned) DiffUnitsAbs;
}
@@ -316,7 +315,7 @@ bool AlignmentFromAssumptions::processAssumption(CallInst *ACall) {
continue;
if (Instruction *K = dyn_cast<Instruction>(J))
- if (isValidAssumeForContext(ACall, K, DL, DT))
+ if (isValidAssumeForContext(ACall, K, DT))
WorkList.push_back(K);
}
@@ -400,7 +399,7 @@ bool AlignmentFromAssumptions::processAssumption(CallInst *ACall) {
Visited.insert(J);
for (User *UJ : J->users()) {
Instruction *K = cast<Instruction>(UJ);
- if (!Visited.count(K) && isValidAssumeForContext(ACall, K, DL, DT))
+ if (!Visited.count(K) && isValidAssumeForContext(ACall, K, DT))
WorkList.push_back(K);
}
}
@@ -413,8 +412,6 @@ bool AlignmentFromAssumptions::runOnFunction(Function &F) {
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
SE = &getAnalysis<ScalarEvolution>();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
NewDestAlignments.clear();
NewSrcAlignments.clear();
diff --git a/contrib/llvm/lib/Transforms/Scalar/BDCE.cpp b/contrib/llvm/lib/Transforms/Scalar/BDCE.cpp
new file mode 100644
index 0000000..09c605e
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/BDCE.cpp
@@ -0,0 +1,410 @@
+//===---- BDCE.cpp - Bit-tracking dead code elimination -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Bit-Tracking Dead Code Elimination pass. Some
+// instructions (shifts, some ands, ors, etc.) kill some of their input bits.
+// We track these dead bits and remove instructions that compute only these
+// dead bits.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "bdce"
+
+STATISTIC(NumRemoved, "Number of instructions removed (unused)");
+STATISTIC(NumSimplified, "Number of instructions trivialized (dead bits)");
+
+namespace {
+struct BDCE : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ BDCE() : FunctionPass(ID) {
+ initializeBDCEPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function& F) override;
+
+ void getAnalysisUsage(AnalysisUsage& AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ }
+
+ void determineLiveOperandBits(const Instruction *UserI,
+ const Instruction *I, unsigned OperandNo,
+ const APInt &AOut, APInt &AB,
+ APInt &KnownZero, APInt &KnownOne,
+ APInt &KnownZero2, APInt &KnownOne2);
+
+ AssumptionCache *AC;
+ DominatorTree *DT;
+};
+}
+
+char BDCE::ID = 0;
+INITIALIZE_PASS_BEGIN(BDCE, "bdce", "Bit-Tracking Dead Code Elimination",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(BDCE, "bdce", "Bit-Tracking Dead Code Elimination",
+ false, false)
+
+static bool isAlwaysLive(Instruction *I) {
+ return isa<TerminatorInst>(I) || isa<DbgInfoIntrinsic>(I) ||
+ isa<LandingPadInst>(I) || I->mayHaveSideEffects();
+}
+
+void BDCE::determineLiveOperandBits(const Instruction *UserI,
+ const Instruction *I, unsigned OperandNo,
+ const APInt &AOut, APInt &AB,
+ APInt &KnownZero, APInt &KnownOne,
+ APInt &KnownZero2, APInt &KnownOne2) {
+ unsigned BitWidth = AB.getBitWidth();
+
+ // We're called once per operand, but for some instructions, we need to
+ // compute known bits of both operands in order to determine the live bits of
+ // either (when both operands are instructions themselves). We don't,
+ // however, want to do this twice, so we cache the result in APInts that live
+ // in the caller. For the two-relevant-operands case, both operand values are
+ // provided here.
+ auto ComputeKnownBits =
+ [&](unsigned BitWidth, const Value *V1, const Value *V2) {
+ const DataLayout &DL = I->getModule()->getDataLayout();
+ KnownZero = APInt(BitWidth, 0);
+ KnownOne = APInt(BitWidth, 0);
+ computeKnownBits(const_cast<Value *>(V1), KnownZero, KnownOne, DL, 0,
+ AC, UserI, DT);
+
+ if (V2) {
+ KnownZero2 = APInt(BitWidth, 0);
+ KnownOne2 = APInt(BitWidth, 0);
+ computeKnownBits(const_cast<Value *>(V2), KnownZero2, KnownOne2, DL,
+ 0, AC, UserI, DT);
+ }
+ };
+
+ switch (UserI->getOpcode()) {
+ default: break;
+ case Instruction::Call:
+ case Instruction::Invoke:
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(UserI))
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::bswap:
+ // The alive bits of the input are the swapped alive bits of
+ // the output.
+ AB = AOut.byteSwap();
+ break;
+ case Intrinsic::ctlz:
+ if (OperandNo == 0) {
+ // We need some output bits, so we need all bits of the
+ // input to the left of, and including, the leftmost bit
+ // known to be one.
+ ComputeKnownBits(BitWidth, I, nullptr);
+ AB = APInt::getHighBitsSet(BitWidth,
+ std::min(BitWidth, KnownOne.countLeadingZeros()+1));
+ }
+ break;
+ case Intrinsic::cttz:
+ if (OperandNo == 0) {
+ // We need some output bits, so we need all bits of the
+ // input to the right of, and including, the rightmost bit
+ // known to be one.
+ ComputeKnownBits(BitWidth, I, nullptr);
+ AB = APInt::getLowBitsSet(BitWidth,
+ std::min(BitWidth, KnownOne.countTrailingZeros()+1));
+ }
+ break;
+ }
+ break;
+ case Instruction::Add:
+ case Instruction::Sub:
+ // Find the highest live output bit. We don't need any more input
+ // bits than that (adds, and thus subtracts, ripple only to the
+ // left).
+ AB = APInt::getLowBitsSet(BitWidth, AOut.getActiveBits());
+ break;
+ case Instruction::Shl:
+ if (OperandNo == 0)
+ if (ConstantInt *CI =
+ dyn_cast<ConstantInt>(UserI->getOperand(1))) {
+ uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1);
+ AB = AOut.lshr(ShiftAmt);
+
+ // If the shift is nuw/nsw, then the high bits are not dead
+ // (because we've promised that they *must* be zero).
+ const ShlOperator *S = cast<ShlOperator>(UserI);
+ if (S->hasNoSignedWrap())
+ AB |= APInt::getHighBitsSet(BitWidth, ShiftAmt+1);
+ else if (S->hasNoUnsignedWrap())
+ AB |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
+ }
+ break;
+ case Instruction::LShr:
+ if (OperandNo == 0)
+ if (ConstantInt *CI =
+ dyn_cast<ConstantInt>(UserI->getOperand(1))) {
+ uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1);
+ AB = AOut.shl(ShiftAmt);
+
+ // If the shift is exact, then the low bits are not dead
+ // (they must be zero).
+ if (cast<LShrOperator>(UserI)->isExact())
+ AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
+ }
+ break;
+ case Instruction::AShr:
+ if (OperandNo == 0)
+ if (ConstantInt *CI =
+ dyn_cast<ConstantInt>(UserI->getOperand(1))) {
+ uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1);
+ AB = AOut.shl(ShiftAmt);
+ // Because the high input bit is replicated into the
+ // high-order bits of the result, if we need any of those
+ // bits, then we must keep the highest input bit.
+ if ((AOut & APInt::getHighBitsSet(BitWidth, ShiftAmt))
+ .getBoolValue())
+ AB.setBit(BitWidth-1);
+
+ // If the shift is exact, then the low bits are not dead
+ // (they must be zero).
+ if (cast<AShrOperator>(UserI)->isExact())
+ AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
+ }
+ break;
+ case Instruction::And:
+ AB = AOut;
+
+ // For bits that are known zero, the corresponding bits in the
+ // other operand are dead (unless they're both zero, in which
+ // case they can't both be dead, so just mark the LHS bits as
+ // dead).
+ if (OperandNo == 0) {
+ ComputeKnownBits(BitWidth, I, UserI->getOperand(1));
+ AB &= ~KnownZero2;
+ } else {
+ if (!isa<Instruction>(UserI->getOperand(0)))
+ ComputeKnownBits(BitWidth, UserI->getOperand(0), I);
+ AB &= ~(KnownZero & ~KnownZero2);
+ }
+ break;
+ case Instruction::Or:
+ AB = AOut;
+
+ // For bits that are known one, the corresponding bits in the
+ // other operand are dead (unless they're both one, in which
+ // case they can't both be dead, so just mark the LHS bits as
+ // dead).
+ if (OperandNo == 0) {
+ ComputeKnownBits(BitWidth, I, UserI->getOperand(1));
+ AB &= ~KnownOne2;
+ } else {
+ if (!isa<Instruction>(UserI->getOperand(0)))
+ ComputeKnownBits(BitWidth, UserI->getOperand(0), I);
+ AB &= ~(KnownOne & ~KnownOne2);
+ }
+ break;
+ case Instruction::Xor:
+ case Instruction::PHI:
+ AB = AOut;
+ break;
+ case Instruction::Trunc:
+ AB = AOut.zext(BitWidth);
+ break;
+ case Instruction::ZExt:
+ AB = AOut.trunc(BitWidth);
+ break;
+ case Instruction::SExt:
+ AB = AOut.trunc(BitWidth);
+ // Because the high input bit is replicated into the
+ // high-order bits of the result, if we need any of those
+ // bits, then we must keep the highest input bit.
+ if ((AOut & APInt::getHighBitsSet(AOut.getBitWidth(),
+ AOut.getBitWidth() - BitWidth))
+ .getBoolValue())
+ AB.setBit(BitWidth-1);
+ break;
+ case Instruction::Select:
+ if (OperandNo != 0)
+ AB = AOut;
+ break;
+ }
+}
+
+bool BDCE::runOnFunction(Function& F) {
+ if (skipOptnoneFunction(F))
+ return false;
+
+ AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+
+ DenseMap<Instruction *, APInt> AliveBits;
+ SmallVector<Instruction*, 128> Worklist;
+
+ // The set of visited instructions (non-integer-typed only).
+ SmallPtrSet<Instruction*, 128> Visited;
+
+ // Collect the set of "root" instructions that are known live.
+ for (Instruction &I : inst_range(F)) {
+ if (!isAlwaysLive(&I))
+ continue;
+
+ DEBUG(dbgs() << "BDCE: Root: " << I << "\n");
+ // For integer-valued instructions, set up an initial empty set of alive
+ // bits and add the instruction to the work list. For other instructions
+ // add their operands to the work list (for integer values operands, mark
+ // all bits as live).
+ if (IntegerType *IT = dyn_cast<IntegerType>(I.getType())) {
+ if (!AliveBits.count(&I)) {
+ AliveBits[&I] = APInt(IT->getBitWidth(), 0);
+ Worklist.push_back(&I);
+ }
+
+ continue;
+ }
+
+ // Non-integer-typed instructions...
+ for (Use &OI : I.operands()) {
+ if (Instruction *J = dyn_cast<Instruction>(OI)) {
+ if (IntegerType *IT = dyn_cast<IntegerType>(J->getType()))
+ AliveBits[J] = APInt::getAllOnesValue(IT->getBitWidth());
+ Worklist.push_back(J);
+ }
+ }
+ // To save memory, we don't add I to the Visited set here. Instead, we
+ // check isAlwaysLive on every instruction when searching for dead
+ // instructions later (we need to check isAlwaysLive for the
+ // integer-typed instructions anyway).
+ }
+
+ // Propagate liveness backwards to operands.
+ while (!Worklist.empty()) {
+ Instruction *UserI = Worklist.pop_back_val();
+
+ DEBUG(dbgs() << "BDCE: Visiting: " << *UserI);
+ APInt AOut;
+ if (UserI->getType()->isIntegerTy()) {
+ AOut = AliveBits[UserI];
+ DEBUG(dbgs() << " Alive Out: " << AOut);
+ }
+ DEBUG(dbgs() << "\n");
+
+ if (!UserI->getType()->isIntegerTy())
+ Visited.insert(UserI);
+
+ APInt KnownZero, KnownOne, KnownZero2, KnownOne2;
+ // Compute the set of alive bits for each operand. These are anded into the
+ // existing set, if any, and if that changes the set of alive bits, the
+ // operand is added to the work-list.
+ for (Use &OI : UserI->operands()) {
+ if (Instruction *I = dyn_cast<Instruction>(OI)) {
+ if (IntegerType *IT = dyn_cast<IntegerType>(I->getType())) {
+ unsigned BitWidth = IT->getBitWidth();
+ APInt AB = APInt::getAllOnesValue(BitWidth);
+ if (UserI->getType()->isIntegerTy() && !AOut &&
+ !isAlwaysLive(UserI)) {
+ AB = APInt(BitWidth, 0);
+ } else {
+ // If all bits of the output are dead, then all bits of the input
+ // Bits of each operand that are used to compute alive bits of the
+ // output are alive, all others are dead.
+ determineLiveOperandBits(UserI, I, OI.getOperandNo(), AOut, AB,
+ KnownZero, KnownOne,
+ KnownZero2, KnownOne2);
+ }
+
+ // If we've added to the set of alive bits (or the operand has not
+ // been previously visited), then re-queue the operand to be visited
+ // again.
+ APInt ABPrev(BitWidth, 0);
+ auto ABI = AliveBits.find(I);
+ if (ABI != AliveBits.end())
+ ABPrev = ABI->second;
+
+ APInt ABNew = AB | ABPrev;
+ if (ABNew != ABPrev || ABI == AliveBits.end()) {
+ AliveBits[I] = std::move(ABNew);
+ Worklist.push_back(I);
+ }
+ } else if (!Visited.count(I)) {
+ Worklist.push_back(I);
+ }
+ }
+ }
+ }
+
+ bool Changed = false;
+ // The inverse of the live set is the dead set. These are those instructions
+ // which have no side effects and do not influence the control flow or return
+ // value of the function, and may therefore be deleted safely.
+ // NOTE: We reuse the Worklist vector here for memory efficiency.
+ for (Instruction &I : inst_range(F)) {
+ // For live instructions that have all dead bits, first make them dead by
+ // replacing all uses with something else. Then, if they don't need to
+ // remain live (because they have side effects, etc.) we can remove them.
+ if (I.getType()->isIntegerTy()) {
+ auto ABI = AliveBits.find(&I);
+ if (ABI != AliveBits.end()) {
+ if (ABI->second.getBoolValue())
+ continue;
+
+ DEBUG(dbgs() << "BDCE: Trivializing: " << I << " (all bits dead)\n");
+ // FIXME: In theory we could substitute undef here instead of zero.
+ // This should be reconsidered once we settle on the semantics of
+ // undef, poison, etc.
+ Value *Zero = ConstantInt::get(I.getType(), 0);
+ ++NumSimplified;
+ I.replaceAllUsesWith(Zero);
+ Changed = true;
+ }
+ } else if (Visited.count(&I)) {
+ continue;
+ }
+
+ if (isAlwaysLive(&I))
+ continue;
+
+ Worklist.push_back(&I);
+ I.dropAllReferences();
+ Changed = true;
+ }
+
+ for (Instruction *&I : Worklist) {
+ ++NumRemoved;
+ I->eraseFromParent();
+ }
+
+ return Changed;
+}
+
+FunctionPass *llvm::createBitTrackingDCEPass() {
+ return new BDCE();
+}
+
diff --git a/contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp b/contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
index 27c177a..4288742 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -43,6 +43,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include <tuple>
using namespace llvm;
@@ -131,14 +132,14 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<TargetTransformInfo>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
}
private:
/// \brief Initialize the pass.
void setup(Function &Fn) {
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- TTI = &getAnalysis<TargetTransformInfo>();
+ TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(Fn);
Entry = &Fn.getEntryBlock();
}
@@ -176,7 +177,7 @@ char ConstantHoisting::ID = 0;
INITIALIZE_PASS_BEGIN(ConstantHoisting, "consthoist", "Constant Hoisting",
false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(ConstantHoisting, "consthoist", "Constant Hoisting",
false, false)
@@ -186,6 +187,9 @@ FunctionPass *llvm::createConstantHoistingPass() {
/// \brief Perform the constant hoisting optimization for the given function.
bool ConstantHoisting::runOnFunction(Function &Fn) {
+ if (skipOptnoneFunction(Fn))
+ return false;
+
DEBUG(dbgs() << "********** Begin Constant Hoisting **********\n");
DEBUG(dbgs() << "********** Function: " << Fn.getName() << '\n');
diff --git a/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp b/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp
index dd51ce1..c974ebb 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp
@@ -22,11 +22,10 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/IR/Constant.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instruction.h"
#include "llvm/Pass.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include <set>
using namespace llvm;
@@ -45,7 +44,7 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
- AU.addRequired<TargetLibraryInfo>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
}
};
}
@@ -53,7 +52,7 @@ namespace {
char ConstantPropagation::ID = 0;
INITIALIZE_PASS_BEGIN(ConstantPropagation, "constprop",
"Simple constant propagation", false, false)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(ConstantPropagation, "constprop",
"Simple constant propagation", false, false)
@@ -68,9 +67,9 @@ bool ConstantPropagation::runOnFunction(Function &F) {
WorkList.insert(&*i);
}
bool Changed = false;
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
- TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ TargetLibraryInfo *TLI =
+ &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
while (!WorkList.empty()) {
Instruction *I = *WorkList.begin();
diff --git a/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 5a3b5cf..d1302c6 100644
--- a/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -19,6 +19,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -102,32 +103,52 @@ bool CorrelatedValuePropagation::processPHI(PHINode *P) {
Value *V = LVI->getConstantOnEdge(Incoming, P->getIncomingBlock(i), BB, P);
- // Look if the incoming value is a select with a constant but LVI tells us
- // that the incoming value can never be that constant. In that case replace
- // the incoming value with the other value of the select. This often allows
- // us to remove the select later.
+ // Look if the incoming value is a select with a scalar condition for which
+ // LVI can tells us the value. In that case replace the incoming value with
+ // the appropriate value of the select. This often allows us to remove the
+ // select later.
if (!V) {
SelectInst *SI = dyn_cast<SelectInst>(Incoming);
if (!SI) continue;
- Constant *C = dyn_cast<Constant>(SI->getFalseValue());
- if (!C) continue;
+ Value *Condition = SI->getCondition();
+ if (!Condition->getType()->isVectorTy()) {
+ if (Constant *C = LVI->getConstantOnEdge(Condition, P->getIncomingBlock(i), BB, P)) {
+ if (C == ConstantInt::getTrue(Condition->getType())) {
+ V = SI->getTrueValue();
+ } else {
+ V = SI->getFalseValue();
+ }
+ // Once LVI learns to handle vector types, we could also add support
+ // for vector type constants that are not all zeroes or all ones.
+ }
+ }
- if (LVI->getPredicateOnEdge(ICmpInst::ICMP_EQ, SI, C,
- P->getIncomingBlock(i), BB, P) !=
- LazyValueInfo::False)
- continue;
+ // Look if the select has a constant but LVI tells us that the incoming
+ // value can never be that constant. In that case replace the incoming
+ // value with the other value of the select. This often allows us to
+ // remove the select later.
+ if (!V) {
+ Constant *C = dyn_cast<Constant>(SI->getFalseValue());
+ if (!C) continue;
+
+ if (LVI->getPredicateOnEdge(ICmpInst::ICMP_EQ, SI, C,
+ P->getIncomingBlock(i), BB, P) !=
+ LazyValueInfo::False)
+ continue;
+ V = SI->getTrueValue();
+ }
DEBUG(dbgs() << "CVP: Threading PHI over " << *SI << '\n');
- V = SI->getTrueValue();
}
P->setIncomingValue(i, V);
Changed = true;
}
- // FIXME: Provide DL, TLI, DT, AT to SimplifyInstruction.
- if (Value *V = SimplifyInstruction(P)) {
+ // FIXME: Provide TLI, DT, AT to SimplifyInstruction.
+ const DataLayout &DL = BB->getModule()->getDataLayout();
+ if (Value *V = SimplifyInstruction(P, DL)) {
P->replaceAllUsesWith(V);
P->eraseFromParent();
Changed = true;
diff --git a/contrib/llvm/lib/Transforms/Scalar/DCE.cpp b/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
index 99fac75..3b262a2 100644
--- a/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
@@ -21,7 +21,7 @@
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instruction.h"
#include "llvm/Pass.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -42,7 +42,8 @@ namespace {
bool runOnBasicBlock(BasicBlock &BB) override {
if (skipOptnoneFunction(BB))
return false;
- TargetLibraryInfo *TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
+ auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
+ TargetLibraryInfo *TLI = TLIP ? &TLIP->getTLI() : nullptr;
bool Changed = false;
for (BasicBlock::iterator DI = BB.begin(); DI != BB.end(); ) {
Instruction *Inst = DI++;
@@ -95,7 +96,8 @@ bool DCE::runOnFunction(Function &F) {
if (skipOptnoneFunction(F))
return false;
- TargetLibraryInfo *TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
+ auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
+ TargetLibraryInfo *TLI = TLIP ? &TLIP->getTLI() : nullptr;
// Start out with all of the instructions in the worklist...
std::vector<Instruction*> WorkList;
diff --git a/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index a1ddc00..01952cf 100644
--- a/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -23,6 +23,7 @@
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -33,7 +34,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -78,7 +79,8 @@ namespace {
bool HandleFree(CallInst *F);
bool handleEndBlock(BasicBlock &BB);
void RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc,
- SmallSetVector<Value*, 16> &DeadStackObjects);
+ SmallSetVector<Value *, 16> &DeadStackObjects,
+ const DataLayout &DL);
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
@@ -166,7 +168,7 @@ static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo *TLI) {
return true;
}
}
- if (CallSite CS = I) {
+ if (auto CS = CallSite(I)) {
if (Function *F = CS.getCalledFunction()) {
if (TLI && TLI->has(LibFunc::strcpy) &&
F->getName() == TLI->getName(LibFunc::strcpy)) {
@@ -194,18 +196,12 @@ static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo *TLI) {
/// describe the memory operations for this instruction.
static AliasAnalysis::Location
getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {
- const DataLayout *DL = AA.getDataLayout();
if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
return AA.getLocation(SI);
if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(Inst)) {
// memcpy/memmove/memset.
AliasAnalysis::Location Loc = AA.getLocationForDest(MI);
- // If we don't have target data around, an unknown size in Location means
- // that we should use the size of the pointee type. This isn't valid for
- // memset/memcpy, which writes more than an i8.
- if (Loc.Size == AliasAnalysis::UnknownSize && DL == nullptr)
- return AliasAnalysis::Location();
return Loc;
}
@@ -215,11 +211,6 @@ getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {
switch (II->getIntrinsicID()) {
default: return AliasAnalysis::Location(); // Unhandled intrinsic.
case Intrinsic::init_trampoline:
- // If we don't have target data around, an unknown size in Location means
- // that we should use the size of the pointee type. This isn't valid for
- // init.trampoline, which writes more than an i8.
- if (!DL) return AliasAnalysis::Location();
-
// FIXME: We don't know the size of the trampoline, so we can't really
// handle it here.
return AliasAnalysis::Location(II->getArgOperand(0));
@@ -271,7 +262,7 @@ static bool isRemovable(Instruction *I) {
}
}
- if (CallSite CS = I)
+ if (auto CS = CallSite(I))
return CS.getInstruction()->use_empty();
return false;
@@ -315,15 +306,16 @@ static Value *getStoredPointerOperand(Instruction *I) {
}
}
- CallSite CS = I;
+ CallSite CS(I);
// All the supported functions so far happen to have dest as their first
// argument.
return CS.getArgument(0);
}
-static uint64_t getPointerSize(const Value *V, AliasAnalysis &AA) {
+static uint64_t getPointerSize(const Value *V, const DataLayout &DL,
+ const TargetLibraryInfo *TLI) {
uint64_t Size;
- if (getObjectSize(V, Size, AA.getDataLayout(), AA.getTargetLibraryInfo()))
+ if (getObjectSize(V, Size, DL, TLI))
return Size;
return AliasAnalysis::UnknownSize;
}
@@ -343,10 +335,9 @@ namespace {
/// overwritten by 'Later', or 'OverwriteUnknown' if nothing can be determined
static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
const AliasAnalysis::Location &Earlier,
- AliasAnalysis &AA,
- int64_t &EarlierOff,
- int64_t &LaterOff) {
- const DataLayout *DL = AA.getDataLayout();
+ const DataLayout &DL,
+ const TargetLibraryInfo *TLI,
+ int64_t &EarlierOff, int64_t &LaterOff) {
const Value *P1 = Earlier.Ptr->stripPointerCasts();
const Value *P2 = Later.Ptr->stripPointerCasts();
@@ -367,7 +358,7 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
// Otherwise, we have to have size information, and the later store has to be
// larger than the earlier one.
if (Later.Size == AliasAnalysis::UnknownSize ||
- Earlier.Size == AliasAnalysis::UnknownSize || DL == nullptr)
+ Earlier.Size == AliasAnalysis::UnknownSize)
return OverwriteUnknown;
// Check to see if the later store is to the entire object (either a global,
@@ -382,7 +373,7 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
return OverwriteUnknown;
// If the "Later" store is to a recognizable object, get its size.
- uint64_t ObjectSize = getPointerSize(UO2, AA);
+ uint64_t ObjectSize = getPointerSize(UO2, DL, TLI);
if (ObjectSize != AliasAnalysis::UnknownSize)
if (ObjectSize == Later.Size && ObjectSize >= Earlier.Size)
return OverwriteComplete;
@@ -560,8 +551,10 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
if (isRemovable(DepWrite) &&
!isPossibleSelfRead(Inst, Loc, DepWrite, *AA)) {
int64_t InstWriteOffset, DepWriteOffset;
- OverwriteResult OR = isOverwrite(Loc, DepLoc, *AA,
- DepWriteOffset, InstWriteOffset);
+ const DataLayout &DL = BB.getModule()->getDataLayout();
+ OverwriteResult OR =
+ isOverwrite(Loc, DepLoc, DL, AA->getTargetLibraryInfo(),
+ DepWriteOffset, InstWriteOffset);
if (OR == OverwriteComplete) {
DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
<< *DepWrite << "\n KILLER: " << *Inst << '\n');
@@ -655,6 +648,7 @@ bool DSE::HandleFree(CallInst *F) {
AliasAnalysis::Location Loc = AliasAnalysis::Location(F->getOperand(0));
SmallVector<BasicBlock *, 16> Blocks;
Blocks.push_back(F->getParent());
+ const DataLayout &DL = F->getModule()->getDataLayout();
while (!Blocks.empty()) {
BasicBlock *BB = Blocks.pop_back_val();
@@ -668,7 +662,7 @@ bool DSE::HandleFree(CallInst *F) {
break;
Value *DepPointer =
- GetUnderlyingObject(getStoredPointerOperand(Dependency));
+ GetUnderlyingObject(getStoredPointerOperand(Dependency), DL);
// Check for aliasing.
if (!AA->isMustAlias(F->getArgOperand(0), DepPointer))
@@ -728,6 +722,8 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
if (AI->hasByValOrInAllocaAttr())
DeadStackObjects.insert(AI);
+ const DataLayout &DL = BB.getModule()->getDataLayout();
+
// Scan the basic block backwards
for (BasicBlock::iterator BBI = BB.end(); BBI != BB.begin(); ){
--BBI;
@@ -736,7 +732,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
if (hasMemoryWrite(BBI, TLI) && isRemovable(BBI)) {
// See through pointer-to-pointer bitcasts
SmallVector<Value *, 4> Pointers;
- GetUnderlyingObjects(getStoredPointerOperand(BBI), Pointers);
+ GetUnderlyingObjects(getStoredPointerOperand(BBI), Pointers, DL);
// Stores to stack values are valid candidates for removal.
bool AllDead = true;
@@ -784,7 +780,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
continue;
}
- if (CallSite CS = cast<Value>(BBI)) {
+ if (auto CS = CallSite(BBI)) {
// Remove allocation function calls from the list of dead stack objects;
// there can't be any references before the definition.
if (isAllocLikeFn(BBI, TLI))
@@ -799,8 +795,8 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
// the call is live.
DeadStackObjects.remove_if([&](Value *I) {
// See if the call site touches the value.
- AliasAnalysis::ModRefResult A =
- AA->getModRefInfo(CS, I, getPointerSize(I, *AA));
+ AliasAnalysis::ModRefResult A = AA->getModRefInfo(
+ CS, I, getPointerSize(I, DL, AA->getTargetLibraryInfo()));
return A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref;
});
@@ -835,7 +831,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
// Remove any allocas from the DeadPointer set that are loaded, as this
// makes any stores above the access live.
- RemoveAccessedObjects(LoadedLoc, DeadStackObjects);
+ RemoveAccessedObjects(LoadedLoc, DeadStackObjects, DL);
// If all of the allocas were clobbered by the access then we're not going
// to find anything else to process.
@@ -850,8 +846,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
/// of the stack objects in the DeadStackObjects set. If so, they become live
/// because the location is being loaded.
void DSE::RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc,
- SmallSetVector<Value*, 16> &DeadStackObjects) {
- const Value *UnderlyingPointer = GetUnderlyingObject(LoadedLoc.Ptr);
+ SmallSetVector<Value *, 16> &DeadStackObjects,
+ const DataLayout &DL) {
+ const Value *UnderlyingPointer = GetUnderlyingObject(LoadedLoc.Ptr, DL);
// A constant can't be in the dead pointer set.
if (isa<Constant>(UnderlyingPointer))
@@ -867,7 +864,8 @@ void DSE::RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc,
// Remove objects that could alias LoadedLoc.
DeadStackObjects.remove_if([&](Value *I) {
// See if the loaded location could alias the stack location.
- AliasAnalysis::Location StackLoc(I, getPointerSize(I, *AA));
+ AliasAnalysis::Location StackLoc(
+ I, getPointerSize(I, DL, AA->getTargetLibraryInfo()));
return !AA->isNoAlias(StackLoc, LoadedLoc);
});
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
index 969b9a8..d536a93 100644
--- a/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -12,12 +12,14 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/EarlyCSE.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/ScopedHashTable.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
@@ -26,7 +28,8 @@
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/RecyclingAllocator.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
#include <deque>
using namespace llvm;
@@ -40,49 +43,44 @@ STATISTIC(NumCSELoad, "Number of load instructions CSE'd");
STATISTIC(NumCSECall, "Number of call instructions CSE'd");
STATISTIC(NumDSE, "Number of trivial dead stores removed");
-static unsigned getHash(const void *V) {
- return DenseMapInfo<const void*>::getHashValue(V);
-}
-
//===----------------------------------------------------------------------===//
// SimpleValue
//===----------------------------------------------------------------------===//
namespace {
- /// SimpleValue - Instances of this struct represent available values in the
- /// scoped hash table.
- struct SimpleValue {
- Instruction *Inst;
+/// \brief Struct representing the available values in the scoped hash table.
+struct SimpleValue {
+ Instruction *Inst;
- SimpleValue(Instruction *I) : Inst(I) {
- assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
- }
+ SimpleValue(Instruction *I) : Inst(I) {
+ assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
+ }
- bool isSentinel() const {
- return Inst == DenseMapInfo<Instruction*>::getEmptyKey() ||
- Inst == DenseMapInfo<Instruction*>::getTombstoneKey();
- }
+ bool isSentinel() const {
+ return Inst == DenseMapInfo<Instruction *>::getEmptyKey() ||
+ Inst == DenseMapInfo<Instruction *>::getTombstoneKey();
+ }
- static bool canHandle(Instruction *Inst) {
- // This can only handle non-void readnone functions.
- if (CallInst *CI = dyn_cast<CallInst>(Inst))
- return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy();
- return isa<CastInst>(Inst) || isa<BinaryOperator>(Inst) ||
- isa<GetElementPtrInst>(Inst) || isa<CmpInst>(Inst) ||
- isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) ||
- isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst) ||
- isa<ExtractValueInst>(Inst) || isa<InsertValueInst>(Inst);
- }
- };
+ static bool canHandle(Instruction *Inst) {
+ // This can only handle non-void readnone functions.
+ if (CallInst *CI = dyn_cast<CallInst>(Inst))
+ return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy();
+ return isa<CastInst>(Inst) || isa<BinaryOperator>(Inst) ||
+ isa<GetElementPtrInst>(Inst) || isa<CmpInst>(Inst) ||
+ isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) ||
+ isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst) ||
+ isa<ExtractValueInst>(Inst) || isa<InsertValueInst>(Inst);
+ }
+};
}
namespace llvm {
-template<> struct DenseMapInfo<SimpleValue> {
+template <> struct DenseMapInfo<SimpleValue> {
static inline SimpleValue getEmptyKey() {
- return DenseMapInfo<Instruction*>::getEmptyKey();
+ return DenseMapInfo<Instruction *>::getEmptyKey();
}
static inline SimpleValue getTombstoneKey() {
- return DenseMapInfo<Instruction*>::getTombstoneKey();
+ return DenseMapInfo<Instruction *>::getTombstoneKey();
}
static unsigned getHashValue(SimpleValue Val);
static bool isEqual(SimpleValue LHS, SimpleValue RHS);
@@ -92,7 +90,7 @@ template<> struct DenseMapInfo<SimpleValue> {
unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
Instruction *Inst = Val.Inst;
// Hash in all of the operands as pointers.
- if (BinaryOperator* BinOp = dyn_cast<BinaryOperator>(Inst)) {
+ if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst)) {
Value *LHS = BinOp->getOperand(0);
Value *RHS = BinOp->getOperand(1);
if (BinOp->isCommutative() && BinOp->getOperand(0) > BinOp->getOperand(1))
@@ -101,8 +99,9 @@ unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
if (isa<OverflowingBinaryOperator>(BinOp)) {
// Hash the overflow behavior
unsigned Overflow =
- BinOp->hasNoSignedWrap() * OverflowingBinaryOperator::NoSignedWrap |
- BinOp->hasNoUnsignedWrap() * OverflowingBinaryOperator::NoUnsignedWrap;
+ BinOp->hasNoSignedWrap() * OverflowingBinaryOperator::NoSignedWrap |
+ BinOp->hasNoUnsignedWrap() *
+ OverflowingBinaryOperator::NoUnsignedWrap;
return hash_combine(BinOp->getOpcode(), Overflow, LHS, RHS);
}
@@ -135,12 +134,13 @@ unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
assert((isa<CallInst>(Inst) || isa<BinaryOperator>(Inst) ||
isa<GetElementPtrInst>(Inst) || isa<SelectInst>(Inst) ||
isa<ExtractElementInst>(Inst) || isa<InsertElementInst>(Inst) ||
- isa<ShuffleVectorInst>(Inst)) && "Invalid/unknown instruction");
+ isa<ShuffleVectorInst>(Inst)) &&
+ "Invalid/unknown instruction");
// Mix in the opcode.
- return hash_combine(Inst->getOpcode(),
- hash_combine_range(Inst->value_op_begin(),
- Inst->value_op_end()));
+ return hash_combine(
+ Inst->getOpcode(),
+ hash_combine_range(Inst->value_op_begin(), Inst->value_op_end()));
}
bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
@@ -149,22 +149,24 @@ bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
if (LHS.isSentinel() || RHS.isSentinel())
return LHSI == RHSI;
- if (LHSI->getOpcode() != RHSI->getOpcode()) return false;
- if (LHSI->isIdenticalTo(RHSI)) return true;
+ if (LHSI->getOpcode() != RHSI->getOpcode())
+ return false;
+ if (LHSI->isIdenticalTo(RHSI))
+ return true;
// If we're not strictly identical, we still might be a commutable instruction
if (BinaryOperator *LHSBinOp = dyn_cast<BinaryOperator>(LHSI)) {
if (!LHSBinOp->isCommutative())
return false;
- assert(isa<BinaryOperator>(RHSI)
- && "same opcode, but different instruction type?");
+ assert(isa<BinaryOperator>(RHSI) &&
+ "same opcode, but different instruction type?");
BinaryOperator *RHSBinOp = cast<BinaryOperator>(RHSI);
// Check overflow attributes
if (isa<OverflowingBinaryOperator>(LHSBinOp)) {
- assert(isa<OverflowingBinaryOperator>(RHSBinOp)
- && "same opcode, but different operator type?");
+ assert(isa<OverflowingBinaryOperator>(RHSBinOp) &&
+ "same opcode, but different operator type?");
if (LHSBinOp->hasNoUnsignedWrap() != RHSBinOp->hasNoUnsignedWrap() ||
LHSBinOp->hasNoSignedWrap() != RHSBinOp->hasNoSignedWrap())
return false;
@@ -172,16 +174,16 @@ bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
// Commuted equality
return LHSBinOp->getOperand(0) == RHSBinOp->getOperand(1) &&
- LHSBinOp->getOperand(1) == RHSBinOp->getOperand(0);
+ LHSBinOp->getOperand(1) == RHSBinOp->getOperand(0);
}
if (CmpInst *LHSCmp = dyn_cast<CmpInst>(LHSI)) {
- assert(isa<CmpInst>(RHSI)
- && "same opcode, but different instruction type?");
+ assert(isa<CmpInst>(RHSI) &&
+ "same opcode, but different instruction type?");
CmpInst *RHSCmp = cast<CmpInst>(RHSI);
// Commuted equality
return LHSCmp->getOperand(0) == RHSCmp->getOperand(1) &&
- LHSCmp->getOperand(1) == RHSCmp->getOperand(0) &&
- LHSCmp->getSwappedPredicate() == RHSCmp->getPredicate();
+ LHSCmp->getOperand(1) == RHSCmp->getOperand(0) &&
+ LHSCmp->getSwappedPredicate() == RHSCmp->getPredicate();
}
return false;
@@ -192,57 +194,52 @@ bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
//===----------------------------------------------------------------------===//
namespace {
- /// CallValue - Instances of this struct represent available call values in
- /// the scoped hash table.
- struct CallValue {
- Instruction *Inst;
+/// \brief Struct representing the available call values in the scoped hash
+/// table.
+struct CallValue {
+ Instruction *Inst;
- CallValue(Instruction *I) : Inst(I) {
- assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
- }
+ CallValue(Instruction *I) : Inst(I) {
+ assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
+ }
- bool isSentinel() const {
- return Inst == DenseMapInfo<Instruction*>::getEmptyKey() ||
- Inst == DenseMapInfo<Instruction*>::getTombstoneKey();
- }
+ bool isSentinel() const {
+ return Inst == DenseMapInfo<Instruction *>::getEmptyKey() ||
+ Inst == DenseMapInfo<Instruction *>::getTombstoneKey();
+ }
- static bool canHandle(Instruction *Inst) {
- // Don't value number anything that returns void.
- if (Inst->getType()->isVoidTy())
- return false;
+ static bool canHandle(Instruction *Inst) {
+ // Don't value number anything that returns void.
+ if (Inst->getType()->isVoidTy())
+ return false;
- CallInst *CI = dyn_cast<CallInst>(Inst);
- if (!CI || !CI->onlyReadsMemory())
- return false;
- return true;
- }
- };
+ CallInst *CI = dyn_cast<CallInst>(Inst);
+ if (!CI || !CI->onlyReadsMemory())
+ return false;
+ return true;
+ }
+};
}
namespace llvm {
- template<> struct DenseMapInfo<CallValue> {
- static inline CallValue getEmptyKey() {
- return DenseMapInfo<Instruction*>::getEmptyKey();
- }
- static inline CallValue getTombstoneKey() {
- return DenseMapInfo<Instruction*>::getTombstoneKey();
- }
- static unsigned getHashValue(CallValue Val);
- static bool isEqual(CallValue LHS, CallValue RHS);
- };
+template <> struct DenseMapInfo<CallValue> {
+ static inline CallValue getEmptyKey() {
+ return DenseMapInfo<Instruction *>::getEmptyKey();
+ }
+ static inline CallValue getTombstoneKey() {
+ return DenseMapInfo<Instruction *>::getTombstoneKey();
+ }
+ static unsigned getHashValue(CallValue Val);
+ static bool isEqual(CallValue LHS, CallValue RHS);
+};
}
+
unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) {
Instruction *Inst = Val.Inst;
- // Hash in all of the operands as pointers.
- unsigned Res = 0;
- for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i) {
- assert(!Inst->getOperand(i)->getType()->isMetadataTy() &&
- "Cannot value number calls with metadata operands");
- Res ^= getHash(Inst->getOperand(i)) << (i & 0xF);
- }
-
- // Mix in the opcode.
- return (Res << 1) ^ Inst->getOpcode();
+ // Hash all of the operands as pointers and mix in the opcode.
+ return hash_combine(
+ Inst->getOpcode(),
+ hash_combine_range(Inst->value_op_begin(), Inst->value_op_end()));
}
bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) {
@@ -252,103 +249,104 @@ bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) {
return LHSI->isIdenticalTo(RHSI);
}
-
//===----------------------------------------------------------------------===//
-// EarlyCSE pass.
+// EarlyCSE implementation
//===----------------------------------------------------------------------===//
namespace {
-
-/// EarlyCSE - This pass does a simple depth-first walk over the dominator
-/// tree, eliminating trivially redundant instructions and using instsimplify
-/// to canonicalize things as it goes. It is intended to be fast and catch
-/// obvious cases so that instcombine and other passes are more effective. It
-/// is expected that a later pass of GVN will catch the interesting/hard
-/// cases.
-class EarlyCSE : public FunctionPass {
+/// \brief A simple and fast domtree-based CSE pass.
+///
+/// This pass does a simple depth-first walk over the dominator tree,
+/// eliminating trivially redundant instructions and using instsimplify to
+/// canonicalize things as it goes. It is intended to be fast and catch obvious
+/// cases so that instcombine and other passes are more effective. It is
+/// expected that a later pass of GVN will catch the interesting/hard cases.
+class EarlyCSE {
public:
- const DataLayout *DL;
- const TargetLibraryInfo *TLI;
- DominatorTree *DT;
- AssumptionCache *AC;
- typedef RecyclingAllocator<BumpPtrAllocator,
- ScopedHashTableVal<SimpleValue, Value*> > AllocatorTy;
- typedef ScopedHashTable<SimpleValue, Value*, DenseMapInfo<SimpleValue>,
+ Function &F;
+ const TargetLibraryInfo &TLI;
+ const TargetTransformInfo &TTI;
+ DominatorTree &DT;
+ AssumptionCache &AC;
+ typedef RecyclingAllocator<
+ BumpPtrAllocator, ScopedHashTableVal<SimpleValue, Value *>> AllocatorTy;
+ typedef ScopedHashTable<SimpleValue, Value *, DenseMapInfo<SimpleValue>,
AllocatorTy> ScopedHTType;
- /// AvailableValues - This scoped hash table contains the current values of
- /// all of our simple scalar expressions. As we walk down the domtree, we
- /// look to see if instructions are in this: if so, we replace them with what
- /// we find, otherwise we insert them so that dominated values can succeed in
- /// their lookup.
- ScopedHTType *AvailableValues;
-
- /// AvailableLoads - This scoped hash table contains the current values
- /// of loads. This allows us to get efficient access to dominating loads when
- /// we have a fully redundant load. In addition to the most recent load, we
- /// keep track of a generation count of the read, which is compared against
- /// the current generation count. The current generation count is
- /// incremented after every possibly writing memory operation, which ensures
- /// that we only CSE loads with other loads that have no intervening store.
- typedef RecyclingAllocator<BumpPtrAllocator,
- ScopedHashTableVal<Value*, std::pair<Value*, unsigned> > > LoadMapAllocator;
- typedef ScopedHashTable<Value*, std::pair<Value*, unsigned>,
- DenseMapInfo<Value*>, LoadMapAllocator> LoadHTType;
- LoadHTType *AvailableLoads;
-
- /// AvailableCalls - This scoped hash table contains the current values
- /// of read-only call values. It uses the same generation count as loads.
- typedef ScopedHashTable<CallValue, std::pair<Value*, unsigned> > CallHTType;
- CallHTType *AvailableCalls;
-
- /// CurrentGeneration - This is the current generation of the memory value.
+ /// \brief A scoped hash table of the current values of all of our simple
+ /// scalar expressions.
+ ///
+ /// As we walk down the domtree, we look to see if instructions are in this:
+ /// if so, we replace them with what we find, otherwise we insert them so
+ /// that dominated values can succeed in their lookup.
+ ScopedHTType AvailableValues;
+
+ /// \brief A scoped hash table of the current values of loads.
+ ///
+ /// This allows us to get efficient access to dominating loads when we have
+ /// a fully redundant load. In addition to the most recent load, we keep
+ /// track of a generation count of the read, which is compared against the
+ /// current generation count. The current generation count is incremented
+ /// after every possibly writing memory operation, which ensures that we only
+ /// CSE loads with other loads that have no intervening store.
+ typedef RecyclingAllocator<
+ BumpPtrAllocator,
+ ScopedHashTableVal<Value *, std::pair<Value *, unsigned>>>
+ LoadMapAllocator;
+ typedef ScopedHashTable<Value *, std::pair<Value *, unsigned>,
+ DenseMapInfo<Value *>, LoadMapAllocator> LoadHTType;
+ LoadHTType AvailableLoads;
+
+ /// \brief A scoped hash table of the current values of read-only call
+ /// values.
+ ///
+ /// It uses the same generation count as loads.
+ typedef ScopedHashTable<CallValue, std::pair<Value *, unsigned>> CallHTType;
+ CallHTType AvailableCalls;
+
+ /// \brief This is the current generation of the memory value.
unsigned CurrentGeneration;
- static char ID;
- explicit EarlyCSE() : FunctionPass(ID) {
- initializeEarlyCSEPass(*PassRegistry::getPassRegistry());
- }
+ /// \brief Set up the EarlyCSE runner for a particular function.
+ EarlyCSE(Function &F, const TargetLibraryInfo &TLI,
+ const TargetTransformInfo &TTI, DominatorTree &DT,
+ AssumptionCache &AC)
+ : F(F), TLI(TLI), TTI(TTI), DT(DT), AC(AC), CurrentGeneration(0) {}
- bool runOnFunction(Function &F) override;
+ bool run();
private:
-
- // NodeScope - almost a POD, but needs to call the constructors for the
- // scoped hash tables so that a new scope gets pushed on. These are RAII so
- // that the scope gets popped when the NodeScope is destroyed.
+ // Almost a POD, but needs to call the constructors for the scoped hash
+ // tables so that a new scope gets pushed on. These are RAII so that the
+ // scope gets popped when the NodeScope is destroyed.
class NodeScope {
- public:
- NodeScope(ScopedHTType *availableValues,
- LoadHTType *availableLoads,
- CallHTType *availableCalls) :
- Scope(*availableValues),
- LoadScope(*availableLoads),
- CallScope(*availableCalls) {}
-
- private:
- NodeScope(const NodeScope&) LLVM_DELETED_FUNCTION;
- void operator=(const NodeScope&) LLVM_DELETED_FUNCTION;
+ public:
+ NodeScope(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
+ CallHTType &AvailableCalls)
+ : Scope(AvailableValues), LoadScope(AvailableLoads),
+ CallScope(AvailableCalls) {}
+
+ private:
+ NodeScope(const NodeScope &) = delete;
+ void operator=(const NodeScope &) = delete;
ScopedHTType::ScopeTy Scope;
LoadHTType::ScopeTy LoadScope;
CallHTType::ScopeTy CallScope;
};
- // StackNode - contains all the needed information to create a stack for
- // doing a depth first tranversal of the tree. This includes scopes for
- // values, loads, and calls as well as the generation. There is a child
- // iterator so that the children do not need to be store spearately.
+ // Contains all the needed information to create a stack for doing a depth
+ // first tranversal of the tree. This includes scopes for values, loads, and
+ // calls as well as the generation. There is a child iterator so that the
+ // children do not need to be store spearately.
class StackNode {
- public:
- StackNode(ScopedHTType *availableValues,
- LoadHTType *availableLoads,
- CallHTType *availableCalls,
- unsigned cg, DomTreeNode *n,
- DomTreeNode::iterator child, DomTreeNode::iterator end) :
- CurrentGeneration(cg), ChildGeneration(cg), Node(n),
- ChildIter(child), EndIter(end),
- Scopes(availableValues, availableLoads, availableCalls),
- Processed(false) {}
+ public:
+ StackNode(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
+ CallHTType &AvailableCalls, unsigned cg, DomTreeNode *n,
+ DomTreeNode::iterator child, DomTreeNode::iterator end)
+ : CurrentGeneration(cg), ChildGeneration(cg), Node(n), ChildIter(child),
+ EndIter(end), Scopes(AvailableValues, AvailableLoads, AvailableCalls),
+ Processed(false) {}
// Accessors.
unsigned currentGeneration() { return CurrentGeneration; }
@@ -365,9 +363,9 @@ private:
bool isProcessed() { return Processed; }
void process() { Processed = true; }
- private:
- StackNode(const StackNode&) LLVM_DELETED_FUNCTION;
- void operator=(const StackNode&) LLVM_DELETED_FUNCTION;
+ private:
+ StackNode(const StackNode &) = delete;
+ void operator=(const StackNode &) = delete;
// Members.
unsigned CurrentGeneration;
@@ -379,31 +377,78 @@ private:
bool Processed;
};
+ /// \brief Wrapper class to handle memory instructions, including loads,
+ /// stores and intrinsic loads and stores defined by the target.
+ class ParseMemoryInst {
+ public:
+ ParseMemoryInst(Instruction *Inst, const TargetTransformInfo &TTI)
+ : Load(false), Store(false), Vol(false), MayReadFromMemory(false),
+ MayWriteToMemory(false), MatchingId(-1), Ptr(nullptr) {
+ MayReadFromMemory = Inst->mayReadFromMemory();
+ MayWriteToMemory = Inst->mayWriteToMemory();
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+ MemIntrinsicInfo Info;
+ if (!TTI.getTgtMemIntrinsic(II, Info))
+ return;
+ if (Info.NumMemRefs == 1) {
+ Store = Info.WriteMem;
+ Load = Info.ReadMem;
+ MatchingId = Info.MatchingId;
+ MayReadFromMemory = Info.ReadMem;
+ MayWriteToMemory = Info.WriteMem;
+ Vol = Info.Vol;
+ Ptr = Info.PtrVal;
+ }
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+ Load = true;
+ Vol = !LI->isSimple();
+ Ptr = LI->getPointerOperand();
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ Store = true;
+ Vol = !SI->isSimple();
+ Ptr = SI->getPointerOperand();
+ }
+ }
+ bool isLoad() { return Load; }
+ bool isStore() { return Store; }
+ bool isVolatile() { return Vol; }
+ bool isMatchingMemLoc(const ParseMemoryInst &Inst) {
+ return Ptr == Inst.Ptr && MatchingId == Inst.MatchingId;
+ }
+ bool isValid() { return Ptr != nullptr; }
+ int getMatchingId() { return MatchingId; }
+ Value *getPtr() { return Ptr; }
+ bool mayReadFromMemory() { return MayReadFromMemory; }
+ bool mayWriteToMemory() { return MayWriteToMemory; }
+
+ private:
+ bool Load;
+ bool Store;
+ bool Vol;
+ bool MayReadFromMemory;
+ bool MayWriteToMemory;
+ // For regular (non-intrinsic) loads/stores, this is set to -1. For
+ // intrinsic loads/stores, the id is retrieved from the corresponding
+ // field in the MemIntrinsicInfo structure. That field contains
+ // non-negative values only.
+ int MatchingId;
+ Value *Ptr;
+ };
+
bool processNode(DomTreeNode *Node);
- // This transformation requires dominator postdominator info
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<TargetLibraryInfo>();
- AU.setPreservesCFG();
+ Value *getOrCreateResult(Value *Inst, Type *ExpectedType) const {
+ if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
+ return LI;
+ else if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
+ return SI->getValueOperand();
+ assert(isa<IntrinsicInst>(Inst) && "Instruction not supported");
+ return TTI.getOrCreateResultFromMemIntrinsic(cast<IntrinsicInst>(Inst),
+ ExpectedType);
}
};
}
-char EarlyCSE::ID = 0;
-
-// createEarlyCSEPass - The public interface to this file.
-FunctionPass *llvm::createEarlyCSEPass() {
- return new EarlyCSE();
-}
-
-INITIALIZE_PASS_BEGIN(EarlyCSE, "early-cse", "Early CSE", false, false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
-INITIALIZE_PASS_END(EarlyCSE, "early-cse", "Early CSE", false, false)
-
bool EarlyCSE::processNode(DomTreeNode *Node) {
BasicBlock *BB = Node->getBlock();
@@ -416,21 +461,46 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
if (!BB->getSinglePredecessor())
++CurrentGeneration;
+ // If this node has a single predecessor which ends in a conditional branch,
+ // we can infer the value of the branch condition given that we took this
+ // path. We need the single predeccesor to ensure there's not another path
+ // which reaches this block where the condition might hold a different
+ // value. Since we're adding this to the scoped hash table (like any other
+ // def), it will have been popped if we encounter a future merge block.
+ if (BasicBlock *Pred = BB->getSinglePredecessor())
+ if (auto *BI = dyn_cast<BranchInst>(Pred->getTerminator()))
+ if (BI->isConditional())
+ if (auto *CondInst = dyn_cast<Instruction>(BI->getCondition()))
+ if (SimpleValue::canHandle(CondInst)) {
+ assert(BI->getSuccessor(0) == BB || BI->getSuccessor(1) == BB);
+ auto *ConditionalConstant = (BI->getSuccessor(0) == BB) ?
+ ConstantInt::getTrue(BB->getContext()) :
+ ConstantInt::getFalse(BB->getContext());
+ AvailableValues.insert(CondInst, ConditionalConstant);
+ DEBUG(dbgs() << "EarlyCSE CVP: Add conditional value for '"
+ << CondInst->getName() << "' as " << *ConditionalConstant
+ << " in " << BB->getName() << "\n");
+ // Replace all dominated uses with the known value
+ replaceDominatedUsesWith(CondInst, ConditionalConstant, DT,
+ BasicBlockEdge(Pred, BB));
+ }
+
/// LastStore - Keep track of the last non-volatile store that we saw... for
/// as long as there in no instruction that reads memory. If we see a store
/// to the same location, we delete the dead store. This zaps trivial dead
/// stores which can occur in bitfield code among other things.
- StoreInst *LastStore = nullptr;
+ Instruction *LastStore = nullptr;
bool Changed = false;
+ const DataLayout &DL = BB->getModule()->getDataLayout();
// See if any instructions in the block can be eliminated. If so, do it. If
// not, add them to AvailableValues.
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
Instruction *Inst = I++;
// Dead instructions should just be removed.
- if (isInstructionTriviallyDead(Inst, TLI)) {
+ if (isInstructionTriviallyDead(Inst, &TLI)) {
DEBUG(dbgs() << "EarlyCSE DCE: " << *Inst << '\n');
Inst->eraseFromParent();
Changed = true;
@@ -449,7 +519,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// If the instruction can be simplified (e.g. X+0 = X) then replace it with
// its simpler value.
- if (Value *V = SimplifyInstruction(Inst, DL, TLI, DT, AC)) {
+ if (Value *V = SimplifyInstruction(Inst, DL, &TLI, &DT, &AC)) {
DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << " to: " << *V << '\n');
Inst->replaceAllUsesWith(V);
Inst->eraseFromParent();
@@ -461,7 +531,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// If this is a simple instruction that we can value number, process it.
if (SimpleValue::canHandle(Inst)) {
// See if the instruction has an available value. If so, use it.
- if (Value *V = AvailableValues->lookup(Inst)) {
+ if (Value *V = AvailableValues.lookup(Inst)) {
DEBUG(dbgs() << "EarlyCSE CSE: " << *Inst << " to: " << *V << '\n');
Inst->replaceAllUsesWith(V);
Inst->eraseFromParent();
@@ -471,14 +541,15 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
}
// Otherwise, just remember that this value is available.
- AvailableValues->insert(Inst, Inst);
+ AvailableValues.insert(Inst, Inst);
continue;
}
+ ParseMemoryInst MemInst(Inst, TTI);
// If this is a non-volatile load, process it.
- if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+ if (MemInst.isValid() && MemInst.isLoad()) {
// Ignore volatile loads.
- if (!LI->isSimple()) {
+ if (MemInst.isVolatile()) {
LastStore = nullptr;
// Don't CSE across synchronization boundaries.
if (Inst->mayWriteToMemory())
@@ -488,38 +559,48 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// If we have an available version of this load, and if it is the right
// generation, replace this instruction.
- std::pair<Value*, unsigned> InVal =
- AvailableLoads->lookup(Inst->getOperand(0));
+ std::pair<Value *, unsigned> InVal =
+ AvailableLoads.lookup(MemInst.getPtr());
if (InVal.first != nullptr && InVal.second == CurrentGeneration) {
- DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst << " to: "
- << *InVal.first << '\n');
- if (!Inst->use_empty()) Inst->replaceAllUsesWith(InVal.first);
- Inst->eraseFromParent();
- Changed = true;
- ++NumCSELoad;
- continue;
+ Value *Op = getOrCreateResult(InVal.first, Inst->getType());
+ if (Op != nullptr) {
+ DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst
+ << " to: " << *InVal.first << '\n');
+ if (!Inst->use_empty())
+ Inst->replaceAllUsesWith(Op);
+ Inst->eraseFromParent();
+ Changed = true;
+ ++NumCSELoad;
+ continue;
+ }
}
// Otherwise, remember that we have this instruction.
- AvailableLoads->insert(Inst->getOperand(0),
- std::pair<Value*, unsigned>(Inst, CurrentGeneration));
+ AvailableLoads.insert(MemInst.getPtr(), std::pair<Value *, unsigned>(
+ Inst, CurrentGeneration));
LastStore = nullptr;
continue;
}
// If this instruction may read from memory, forget LastStore.
- if (Inst->mayReadFromMemory())
+ // Load/store intrinsics will indicate both a read and a write to
+ // memory. The target may override this (e.g. so that a store intrinsic
+ // does not read from memory, and thus will be treated the same as a
+ // regular store for commoning purposes).
+ if (Inst->mayReadFromMemory() &&
+ !(MemInst.isValid() && !MemInst.mayReadFromMemory()))
LastStore = nullptr;
// If this is a read-only call, process it.
if (CallValue::canHandle(Inst)) {
// If we have an available version of this call, and if it is the right
// generation, replace this instruction.
- std::pair<Value*, unsigned> InVal = AvailableCalls->lookup(Inst);
+ std::pair<Value *, unsigned> InVal = AvailableCalls.lookup(Inst);
if (InVal.first != nullptr && InVal.second == CurrentGeneration) {
- DEBUG(dbgs() << "EarlyCSE CSE CALL: " << *Inst << " to: "
- << *InVal.first << '\n');
- if (!Inst->use_empty()) Inst->replaceAllUsesWith(InVal.first);
+ DEBUG(dbgs() << "EarlyCSE CSE CALL: " << *Inst
+ << " to: " << *InVal.first << '\n');
+ if (!Inst->use_empty())
+ Inst->replaceAllUsesWith(InVal.first);
Inst->eraseFromParent();
Changed = true;
++NumCSECall;
@@ -527,8 +608,8 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
}
// Otherwise, remember that we have this instruction.
- AvailableCalls->insert(Inst,
- std::pair<Value*, unsigned>(Inst, CurrentGeneration));
+ AvailableCalls.insert(
+ Inst, std::pair<Value *, unsigned>(Inst, CurrentGeneration));
continue;
}
@@ -538,17 +619,19 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
if (Inst->mayWriteToMemory()) {
++CurrentGeneration;
- if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ if (MemInst.isValid() && MemInst.isStore()) {
// We do a trivial form of DSE if there are two stores to the same
// location with no intervening loads. Delete the earlier store.
- if (LastStore &&
- LastStore->getPointerOperand() == SI->getPointerOperand()) {
- DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore << " due to: "
- << *Inst << '\n');
- LastStore->eraseFromParent();
- Changed = true;
- ++NumDSE;
- LastStore = nullptr;
+ if (LastStore) {
+ ParseMemoryInst LastStoreMemInst(LastStore, TTI);
+ if (LastStoreMemInst.isMatchingMemLoc(MemInst)) {
+ DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore
+ << " due to: " << *Inst << '\n');
+ LastStore->eraseFromParent();
+ Changed = true;
+ ++NumDSE;
+ LastStore = nullptr;
+ }
// fallthrough - we can exploit information about this store
}
@@ -557,12 +640,12 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// version of the pointer. It is safe to forward from volatile stores
// to non-volatile loads, so we don't have to check for volatility of
// the store.
- AvailableLoads->insert(SI->getPointerOperand(),
- std::pair<Value*, unsigned>(SI->getValueOperand(), CurrentGeneration));
+ AvailableLoads.insert(MemInst.getPtr(), std::pair<Value *, unsigned>(
+ Inst, CurrentGeneration));
// Remember that this was the last store we saw for DSE.
- if (SI->isSimple())
- LastStore = SI;
+ if (!MemInst.isVolatile())
+ LastStore = Inst;
}
}
}
@@ -570,40 +653,20 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
return Changed;
}
-
-bool EarlyCSE::runOnFunction(Function &F) {
- if (skipOptnoneFunction(F))
- return false;
-
- // Note, deque is being used here because there is significant performance gains
- // over vector when the container becomes very large due to the specific access
- // patterns. For more information see the mailing list discussion on this:
+bool EarlyCSE::run() {
+ // Note, deque is being used here because there is significant performance
+ // gains over vector when the container becomes very large due to the
+ // specific access patterns. For more information see the mailing list
+ // discussion on this:
// http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20120116/135228.html
std::deque<StackNode *> nodesToProcess;
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
- TLI = &getAnalysis<TargetLibraryInfo>();
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
-
- // Tables that the pass uses when walking the domtree.
- ScopedHTType AVTable;
- AvailableValues = &AVTable;
- LoadHTType LoadTable;
- AvailableLoads = &LoadTable;
- CallHTType CallTable;
- AvailableCalls = &CallTable;
-
- CurrentGeneration = 0;
bool Changed = false;
// Process the root node.
- nodesToProcess.push_back(
- new StackNode(AvailableValues, AvailableLoads, AvailableCalls,
- CurrentGeneration, DT->getRootNode(),
- DT->getRootNode()->begin(),
- DT->getRootNode()->end()));
+ nodesToProcess.push_back(new StackNode(
+ AvailableValues, AvailableLoads, AvailableCalls, CurrentGeneration,
+ DT.getRootNode(), DT.getRootNode()->begin(), DT.getRootNode()->end()));
// Save the current generation.
unsigned LiveOutGeneration = CurrentGeneration;
@@ -627,11 +690,9 @@ bool EarlyCSE::runOnFunction(Function &F) {
// Push the next child onto the stack.
DomTreeNode *child = NodeToProcess->nextChild();
nodesToProcess.push_back(
- new StackNode(AvailableValues,
- AvailableLoads,
- AvailableCalls,
- NodeToProcess->childGeneration(), child,
- child->begin(), child->end()));
+ new StackNode(AvailableValues, AvailableLoads, AvailableCalls,
+ NodeToProcess->childGeneration(), child, child->begin(),
+ child->end()));
} else {
// It has been processed, and there are no more children to process,
// so delete it and pop it off the stack.
@@ -645,3 +706,74 @@ bool EarlyCSE::runOnFunction(Function &F) {
return Changed;
}
+
+PreservedAnalyses EarlyCSEPass::run(Function &F,
+ AnalysisManager<Function> *AM) {
+ auto &TLI = AM->getResult<TargetLibraryAnalysis>(F);
+ auto &TTI = AM->getResult<TargetIRAnalysis>(F);
+ auto &DT = AM->getResult<DominatorTreeAnalysis>(F);
+ auto &AC = AM->getResult<AssumptionAnalysis>(F);
+
+ EarlyCSE CSE(F, TLI, TTI, DT, AC);
+
+ if (!CSE.run())
+ return PreservedAnalyses::all();
+
+ // CSE preserves the dominator tree because it doesn't mutate the CFG.
+ // FIXME: Bundle this with other CFG-preservation.
+ PreservedAnalyses PA;
+ PA.preserve<DominatorTreeAnalysis>();
+ return PA;
+}
+
+namespace {
+/// \brief A simple and fast domtree-based CSE pass.
+///
+/// This pass does a simple depth-first walk over the dominator tree,
+/// eliminating trivially redundant instructions and using instsimplify to
+/// canonicalize things as it goes. It is intended to be fast and catch obvious
+/// cases so that instcombine and other passes are more effective. It is
+/// expected that a later pass of GVN will catch the interesting/hard cases.
+class EarlyCSELegacyPass : public FunctionPass {
+public:
+ static char ID;
+
+ EarlyCSELegacyPass() : FunctionPass(ID) {
+ initializeEarlyCSELegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ if (skipOptnoneFunction(F))
+ return false;
+
+ auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+
+ EarlyCSE CSE(F, TLI, TTI, DT, AC);
+
+ return CSE.run();
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.setPreservesCFG();
+ }
+};
+}
+
+char EarlyCSELegacyPass::ID = 0;
+
+FunctionPass *llvm::createEarlyCSEPass() { return new EarlyCSELegacyPass(); }
+
+INITIALIZE_PASS_BEGIN(EarlyCSELegacyPass, "early-cse", "Early CSE", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(EarlyCSELegacyPass, "early-cse", "Early CSE", false, false)
diff --git a/contrib/llvm/lib/Transforms/Scalar/Float2Int.cpp b/contrib/llvm/lib/Transforms/Scalar/Float2Int.cpp
new file mode 100644
index 0000000..c931422
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/Float2Int.cpp
@@ -0,0 +1,540 @@
+//===- Float2Int.cpp - Demote floating point ops to work on integers ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Float2Int pass, which aims to demote floating
+// point operations to work on integers, where that is losslessly possible.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "float2int"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/ConstantRange.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+#include <deque>
+#include <functional> // For std::function
+using namespace llvm;
+
+// The algorithm is simple. Start at instructions that convert from the
+// float to the int domain: fptoui, fptosi and fcmp. Walk up the def-use
+// graph, using an equivalence datastructure to unify graphs that interfere.
+//
+// Mappable instructions are those with an integer corrollary that, given
+// integer domain inputs, produce an integer output; fadd, for example.
+//
+// If a non-mappable instruction is seen, this entire def-use graph is marked
+// as non-transformable. If we see an instruction that converts from the
+// integer domain to FP domain (uitofp,sitofp), we terminate our walk.
+
+/// The largest integer type worth dealing with.
+static cl::opt<unsigned>
+MaxIntegerBW("float2int-max-integer-bw", cl::init(64), cl::Hidden,
+ cl::desc("Max integer bitwidth to consider in float2int"
+ "(default=64)"));
+
+namespace {
+ struct Float2Int : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ Float2Int() : FunctionPass(ID) {
+ initializeFloat2IntPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ }
+
+ void findRoots(Function &F, SmallPtrSet<Instruction*,8> &Roots);
+ ConstantRange seen(Instruction *I, ConstantRange R);
+ ConstantRange badRange();
+ ConstantRange unknownRange();
+ ConstantRange validateRange(ConstantRange R);
+ void walkBackwards(const SmallPtrSetImpl<Instruction*> &Roots);
+ void walkForwards();
+ bool validateAndTransform();
+ Value *convert(Instruction *I, Type *ToTy);
+ void cleanup();
+
+ MapVector<Instruction*, ConstantRange > SeenInsts;
+ SmallPtrSet<Instruction*,8> Roots;
+ EquivalenceClasses<Instruction*> ECs;
+ MapVector<Instruction*, Value*> ConvertedInsts;
+ LLVMContext *Ctx;
+ };
+}
+
+char Float2Int::ID = 0;
+INITIALIZE_PASS(Float2Int, "float2int", "Float to int", false, false)
+
+// Given a FCmp predicate, return a matching ICmp predicate if one
+// exists, otherwise return BAD_ICMP_PREDICATE.
+static CmpInst::Predicate mapFCmpPred(CmpInst::Predicate P) {
+ switch (P) {
+ case CmpInst::FCMP_OEQ:
+ case CmpInst::FCMP_UEQ:
+ return CmpInst::ICMP_EQ;
+ case CmpInst::FCMP_OGT:
+ case CmpInst::FCMP_UGT:
+ return CmpInst::ICMP_SGT;
+ case CmpInst::FCMP_OGE:
+ case CmpInst::FCMP_UGE:
+ return CmpInst::ICMP_SGE;
+ case CmpInst::FCMP_OLT:
+ case CmpInst::FCMP_ULT:
+ return CmpInst::ICMP_SLT;
+ case CmpInst::FCMP_OLE:
+ case CmpInst::FCMP_ULE:
+ return CmpInst::ICMP_SLE;
+ case CmpInst::FCMP_ONE:
+ case CmpInst::FCMP_UNE:
+ return CmpInst::ICMP_NE;
+ default:
+ return CmpInst::BAD_ICMP_PREDICATE;
+ }
+}
+
+// Given a floating point binary operator, return the matching
+// integer version.
+static Instruction::BinaryOps mapBinOpcode(unsigned Opcode) {
+ switch (Opcode) {
+ default: llvm_unreachable("Unhandled opcode!");
+ case Instruction::FAdd: return Instruction::Add;
+ case Instruction::FSub: return Instruction::Sub;
+ case Instruction::FMul: return Instruction::Mul;
+ }
+}
+
+// Find the roots - instructions that convert from the FP domain to
+// integer domain.
+void Float2Int::findRoots(Function &F, SmallPtrSet<Instruction*,8> &Roots) {
+ for (auto &I : inst_range(F)) {
+ switch (I.getOpcode()) {
+ default: break;
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ Roots.insert(&I);
+ break;
+ case Instruction::FCmp:
+ if (mapFCmpPred(cast<CmpInst>(&I)->getPredicate()) !=
+ CmpInst::BAD_ICMP_PREDICATE)
+ Roots.insert(&I);
+ break;
+ }
+ }
+}
+
+// Helper - mark I as having been traversed, having range R.
+ConstantRange Float2Int::seen(Instruction *I, ConstantRange R) {
+ DEBUG(dbgs() << "F2I: " << *I << ":" << R << "\n");
+ if (SeenInsts.find(I) != SeenInsts.end())
+ SeenInsts.find(I)->second = R;
+ else
+ SeenInsts.insert(std::make_pair(I, R));
+ return R;
+}
+
+// Helper - get a range representing a poison value.
+ConstantRange Float2Int::badRange() {
+ return ConstantRange(MaxIntegerBW + 1, true);
+}
+ConstantRange Float2Int::unknownRange() {
+ return ConstantRange(MaxIntegerBW + 1, false);
+}
+ConstantRange Float2Int::validateRange(ConstantRange R) {
+ if (R.getBitWidth() > MaxIntegerBW + 1)
+ return badRange();
+ return R;
+}
+
+// The most obvious way to structure the search is a depth-first, eager
+// search from each root. However, that require direct recursion and so
+// can only handle small instruction sequences. Instead, we split the search
+// up into two phases:
+// - walkBackwards: A breadth-first walk of the use-def graph starting from
+// the roots. Populate "SeenInsts" with interesting
+// instructions and poison values if they're obvious and
+// cheap to compute. Calculate the equivalance set structure
+// while we're here too.
+// - walkForwards: Iterate over SeenInsts in reverse order, so we visit
+// defs before their uses. Calculate the real range info.
+
+// Breadth-first walk of the use-def graph; determine the set of nodes
+// we care about and eagerly determine if some of them are poisonous.
+void Float2Int::walkBackwards(const SmallPtrSetImpl<Instruction*> &Roots) {
+ std::deque<Instruction*> Worklist(Roots.begin(), Roots.end());
+ while (!Worklist.empty()) {
+ Instruction *I = Worklist.back();
+ Worklist.pop_back();
+
+ if (SeenInsts.find(I) != SeenInsts.end())
+ // Seen already.
+ continue;
+
+ switch (I->getOpcode()) {
+ // FIXME: Handle select and phi nodes.
+ default:
+ // Path terminated uncleanly.
+ seen(I, badRange());
+ break;
+
+ case Instruction::UIToFP: {
+ // Path terminated cleanly.
+ unsigned BW = I->getOperand(0)->getType()->getPrimitiveSizeInBits();
+ APInt Min = APInt::getMinValue(BW).zextOrSelf(MaxIntegerBW+1);
+ APInt Max = APInt::getMaxValue(BW).zextOrSelf(MaxIntegerBW+1);
+ seen(I, validateRange(ConstantRange(Min, Max)));
+ continue;
+ }
+
+ case Instruction::SIToFP: {
+ // Path terminated cleanly.
+ unsigned BW = I->getOperand(0)->getType()->getPrimitiveSizeInBits();
+ APInt SMin = APInt::getSignedMinValue(BW).sextOrSelf(MaxIntegerBW+1);
+ APInt SMax = APInt::getSignedMaxValue(BW).sextOrSelf(MaxIntegerBW+1);
+ seen(I, validateRange(ConstantRange(SMin, SMax)));
+ continue;
+ }
+
+ case Instruction::FAdd:
+ case Instruction::FSub:
+ case Instruction::FMul:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::FCmp:
+ seen(I, unknownRange());
+ break;
+ }
+
+ for (Value *O : I->operands()) {
+ if (Instruction *OI = dyn_cast<Instruction>(O)) {
+ // Unify def-use chains if they interfere.
+ ECs.unionSets(I, OI);
+ if (SeenInsts.find(I)->second != badRange())
+ Worklist.push_back(OI);
+ } else if (!isa<ConstantFP>(O)) {
+ // Not an instruction or ConstantFP? we can't do anything.
+ seen(I, badRange());
+ }
+ }
+ }
+}
+
+// Walk forwards down the list of seen instructions, so we visit defs before
+// uses.
+void Float2Int::walkForwards() {
+ for (auto It = SeenInsts.rbegin(), E = SeenInsts.rend(); It != E; ++It) {
+ if (It->second != unknownRange())
+ continue;
+
+ Instruction *I = It->first;
+ std::function<ConstantRange(ArrayRef<ConstantRange>)> Op;
+ switch (I->getOpcode()) {
+ // FIXME: Handle select and phi nodes.
+ default:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ llvm_unreachable("Should have been handled in walkForwards!");
+
+ case Instruction::FAdd:
+ Op = [](ArrayRef<ConstantRange> Ops) {
+ assert(Ops.size() == 2 && "FAdd is a binary operator!");
+ return Ops[0].add(Ops[1]);
+ };
+ break;
+
+ case Instruction::FSub:
+ Op = [](ArrayRef<ConstantRange> Ops) {
+ assert(Ops.size() == 2 && "FSub is a binary operator!");
+ return Ops[0].sub(Ops[1]);
+ };
+ break;
+
+ case Instruction::FMul:
+ Op = [](ArrayRef<ConstantRange> Ops) {
+ assert(Ops.size() == 2 && "FMul is a binary operator!");
+ return Ops[0].multiply(Ops[1]);
+ };
+ break;
+
+ //
+ // Root-only instructions - we'll only see these if they're the
+ // first node in a walk.
+ //
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ Op = [](ArrayRef<ConstantRange> Ops) {
+ assert(Ops.size() == 1 && "FPTo[US]I is a unary operator!");
+ return Ops[0];
+ };
+ break;
+
+ case Instruction::FCmp:
+ Op = [](ArrayRef<ConstantRange> Ops) {
+ assert(Ops.size() == 2 && "FCmp is a binary operator!");
+ return Ops[0].unionWith(Ops[1]);
+ };
+ break;
+ }
+
+ bool Abort = false;
+ SmallVector<ConstantRange,4> OpRanges;
+ for (Value *O : I->operands()) {
+ if (Instruction *OI = dyn_cast<Instruction>(O)) {
+ assert(SeenInsts.find(OI) != SeenInsts.end() &&
+ "def not seen before use!");
+ OpRanges.push_back(SeenInsts.find(OI)->second);
+ } else if (ConstantFP *CF = dyn_cast<ConstantFP>(O)) {
+ // Work out if the floating point number can be losslessly represented
+ // as an integer.
+ // APFloat::convertToInteger(&Exact) purports to do what we want, but
+ // the exactness can be too precise. For example, negative zero can
+ // never be exactly converted to an integer.
+ //
+ // Instead, we ask APFloat to round itself to an integral value - this
+ // preserves sign-of-zero - then compare the result with the original.
+ //
+ APFloat F = CF->getValueAPF();
+
+ // First, weed out obviously incorrect values. Non-finite numbers
+ // can't be represented and neither can negative zero, unless
+ // we're in fast math mode.
+ if (!F.isFinite() ||
+ (F.isZero() && F.isNegative() && isa<FPMathOperator>(I) &&
+ !I->hasNoSignedZeros())) {
+ seen(I, badRange());
+ Abort = true;
+ break;
+ }
+
+ APFloat NewF = F;
+ auto Res = NewF.roundToIntegral(APFloat::rmNearestTiesToEven);
+ if (Res != APFloat::opOK || NewF.compare(F) != APFloat::cmpEqual) {
+ seen(I, badRange());
+ Abort = true;
+ break;
+ }
+ // OK, it's representable. Now get it.
+ APSInt Int(MaxIntegerBW+1, false);
+ bool Exact;
+ CF->getValueAPF().convertToInteger(Int,
+ APFloat::rmNearestTiesToEven,
+ &Exact);
+ OpRanges.push_back(ConstantRange(Int));
+ } else {
+ llvm_unreachable("Should have already marked this as badRange!");
+ }
+ }
+
+ // Reduce the operands' ranges to a single range and return.
+ if (!Abort)
+ seen(I, Op(OpRanges));
+ }
+}
+
+// If there is a valid transform to be done, do it.
+bool Float2Int::validateAndTransform() {
+ bool MadeChange = false;
+
+ // Iterate over every disjoint partition of the def-use graph.
+ for (auto It = ECs.begin(), E = ECs.end(); It != E; ++It) {
+ ConstantRange R(MaxIntegerBW + 1, false);
+ bool Fail = false;
+ Type *ConvertedToTy = nullptr;
+
+ // For every member of the partition, union all the ranges together.
+ for (auto MI = ECs.member_begin(It), ME = ECs.member_end();
+ MI != ME; ++MI) {
+ Instruction *I = *MI;
+ auto SeenI = SeenInsts.find(I);
+ if (SeenI == SeenInsts.end())
+ continue;
+
+ R = R.unionWith(SeenI->second);
+ // We need to ensure I has no users that have not been seen.
+ // If it does, transformation would be illegal.
+ //
+ // Don't count the roots, as they terminate the graphs.
+ if (Roots.count(I) == 0) {
+ // Set the type of the conversion while we're here.
+ if (!ConvertedToTy)
+ ConvertedToTy = I->getType();
+ for (User *U : I->users()) {
+ Instruction *UI = dyn_cast<Instruction>(U);
+ if (!UI || SeenInsts.find(UI) == SeenInsts.end()) {
+ DEBUG(dbgs() << "F2I: Failing because of " << *U << "\n");
+ Fail = true;
+ break;
+ }
+ }
+ }
+ if (Fail)
+ break;
+ }
+
+ // If the set was empty, or we failed, or the range is poisonous,
+ // bail out.
+ if (ECs.member_begin(It) == ECs.member_end() || Fail ||
+ R.isFullSet() || R.isSignWrappedSet())
+ continue;
+ assert(ConvertedToTy && "Must have set the convertedtoty by this point!");
+
+ // The number of bits required is the maximum of the upper and
+ // lower limits, plus one so it can be signed.
+ unsigned MinBW = std::max(R.getLower().getMinSignedBits(),
+ R.getUpper().getMinSignedBits()) + 1;
+ DEBUG(dbgs() << "F2I: MinBitwidth=" << MinBW << ", R: " << R << "\n");
+
+ // If we've run off the realms of the exactly representable integers,
+ // the floating point result will differ from an integer approximation.
+
+ // Do we need more bits than are in the mantissa of the type we converted
+ // to? semanticsPrecision returns the number of mantissa bits plus one
+ // for the sign bit.
+ unsigned MaxRepresentableBits
+ = APFloat::semanticsPrecision(ConvertedToTy->getFltSemantics()) - 1;
+ if (MinBW > MaxRepresentableBits) {
+ DEBUG(dbgs() << "F2I: Value not guaranteed to be representable!\n");
+ continue;
+ }
+ if (MinBW > 64) {
+ DEBUG(dbgs() << "F2I: Value requires more than 64 bits to represent!\n");
+ continue;
+ }
+
+ // OK, R is known to be representable. Now pick a type for it.
+ // FIXME: Pick the smallest legal type that will fit.
+ Type *Ty = (MinBW > 32) ? Type::getInt64Ty(*Ctx) : Type::getInt32Ty(*Ctx);
+
+ for (auto MI = ECs.member_begin(It), ME = ECs.member_end();
+ MI != ME; ++MI)
+ convert(*MI, Ty);
+ MadeChange = true;
+ }
+
+ return MadeChange;
+}
+
+Value *Float2Int::convert(Instruction *I, Type *ToTy) {
+ if (ConvertedInsts.find(I) != ConvertedInsts.end())
+ // Already converted this instruction.
+ return ConvertedInsts[I];
+
+ SmallVector<Value*,4> NewOperands;
+ for (Value *V : I->operands()) {
+ // Don't recurse if we're an instruction that terminates the path.
+ if (I->getOpcode() == Instruction::UIToFP ||
+ I->getOpcode() == Instruction::SIToFP) {
+ NewOperands.push_back(V);
+ } else if (Instruction *VI = dyn_cast<Instruction>(V)) {
+ NewOperands.push_back(convert(VI, ToTy));
+ } else if (ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
+ APSInt Val(ToTy->getPrimitiveSizeInBits(), /*IsUnsigned=*/false);
+ bool Exact;
+ CF->getValueAPF().convertToInteger(Val,
+ APFloat::rmNearestTiesToEven,
+ &Exact);
+ NewOperands.push_back(ConstantInt::get(ToTy, Val));
+ } else {
+ llvm_unreachable("Unhandled operand type?");
+ }
+ }
+
+ // Now create a new instruction.
+ IRBuilder<> IRB(I);
+ Value *NewV = nullptr;
+ switch (I->getOpcode()) {
+ default: llvm_unreachable("Unhandled instruction!");
+
+ case Instruction::FPToUI:
+ NewV = IRB.CreateZExtOrTrunc(NewOperands[0], I->getType());
+ break;
+
+ case Instruction::FPToSI:
+ NewV = IRB.CreateSExtOrTrunc(NewOperands[0], I->getType());
+ break;
+
+ case Instruction::FCmp: {
+ CmpInst::Predicate P = mapFCmpPred(cast<CmpInst>(I)->getPredicate());
+ assert(P != CmpInst::BAD_ICMP_PREDICATE && "Unhandled predicate!");
+ NewV = IRB.CreateICmp(P, NewOperands[0], NewOperands[1], I->getName());
+ break;
+ }
+
+ case Instruction::UIToFP:
+ NewV = IRB.CreateZExtOrTrunc(NewOperands[0], ToTy);
+ break;
+
+ case Instruction::SIToFP:
+ NewV = IRB.CreateSExtOrTrunc(NewOperands[0], ToTy);
+ break;
+
+ case Instruction::FAdd:
+ case Instruction::FSub:
+ case Instruction::FMul:
+ NewV = IRB.CreateBinOp(mapBinOpcode(I->getOpcode()),
+ NewOperands[0], NewOperands[1],
+ I->getName());
+ break;
+ }
+
+ // If we're a root instruction, RAUW.
+ if (Roots.count(I))
+ I->replaceAllUsesWith(NewV);
+
+ ConvertedInsts[I] = NewV;
+ return NewV;
+}
+
+// Perform dead code elimination on the instructions we just modified.
+void Float2Int::cleanup() {
+ for (auto I = ConvertedInsts.rbegin(), E = ConvertedInsts.rend();
+ I != E; ++I)
+ I->first->eraseFromParent();
+}
+
+bool Float2Int::runOnFunction(Function &F) {
+ if (skipOptnoneFunction(F))
+ return false;
+
+ DEBUG(dbgs() << "F2I: Looking at function " << F.getName() << "\n");
+ // Clear out all state.
+ ECs = EquivalenceClasses<Instruction*>();
+ SeenInsts.clear();
+ ConvertedInsts.clear();
+ Roots.clear();
+
+ Ctx = &F.getParent()->getContext();
+
+ findRoots(F, Roots);
+
+ walkBackwards(Roots);
+ walkForwards();
+
+ bool Modified = validateAndTransform();
+ if (Modified)
+ cleanup();
+ return Modified;
+}
+
+FunctionPass *llvm::createFloat2IntPass() {
+ return new Float2Int();
+}
+
diff --git a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
index 1ed14d0..7770ddc 100644
--- a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -33,6 +33,7 @@
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/PHITransAddr.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
@@ -45,7 +46,7 @@
#include "llvm/Support/Allocator.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
@@ -458,7 +459,7 @@ uint32_t ValueTable::lookup_or_add(Value *V) {
return e;
}
-/// lookup - Returns the value number of the specified value. Fails if
+/// Returns the value number of the specified value. Fails if
/// the value has not yet been numbered.
uint32_t ValueTable::lookup(Value *V) const {
DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V);
@@ -466,7 +467,7 @@ uint32_t ValueTable::lookup(Value *V) const {
return VI->second;
}
-/// lookup_or_add_cmp - Returns the value number of the given comparison,
+/// Returns the value number of the given comparison,
/// assigning it a new number if it did not have one before. Useful when
/// we deduced the result of a comparison, but don't immediately have an
/// instruction realizing that comparison to hand.
@@ -479,14 +480,14 @@ uint32_t ValueTable::lookup_or_add_cmp(unsigned Opcode,
return e;
}
-/// clear - Remove all entries from the ValueTable.
+/// Remove all entries from the ValueTable.
void ValueTable::clear() {
valueNumbering.clear();
expressionNumbering.clear();
nextValueNumber = 1;
}
-/// erase - Remove a value from the value numbering.
+/// Remove a value from the value numbering.
void ValueTable::erase(Value *V) {
valueNumbering.erase(V);
}
@@ -582,23 +583,22 @@ namespace {
return cast<MemIntrinsic>(Val.getPointer());
}
- /// MaterializeAdjustedValue - Emit code into this block to adjust the value
- /// defined here to the specified type. This handles various coercion cases.
- Value *MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const;
+ /// Emit code into this block to adjust the value defined here to the
+ /// specified type. This handles various coercion cases.
+ Value *MaterializeAdjustedValue(LoadInst *LI, GVN &gvn) const;
};
class GVN : public FunctionPass {
bool NoLoads;
MemoryDependenceAnalysis *MD;
DominatorTree *DT;
- const DataLayout *DL;
const TargetLibraryInfo *TLI;
AssumptionCache *AC;
SetVector<BasicBlock *> DeadBlocks;
ValueTable VN;
- /// LeaderTable - A mapping from value numbers to lists of Value*'s that
+ /// A mapping from value numbers to lists of Value*'s that
/// have that value number. Use findLeader to query it.
struct LeaderTableEntry {
Value *Val;
@@ -623,20 +623,18 @@ namespace {
bool runOnFunction(Function &F) override;
- /// markInstructionForDeletion - This removes the specified instruction from
+ /// This removes the specified instruction from
/// our various maps and marks it for deletion.
void markInstructionForDeletion(Instruction *I) {
VN.erase(I);
InstrsToErase.push_back(I);
}
- const DataLayout *getDataLayout() const { return DL; }
DominatorTree &getDominatorTree() const { return *DT; }
AliasAnalysis *getAliasAnalysis() const { return VN.getAliasAnalysis(); }
MemoryDependenceAnalysis &getMemDep() const { return *MD; }
private:
- /// addToLeaderTable - Push a new Value to the LeaderTable onto the list for
- /// its value number.
+ /// Push a new Value to the LeaderTable onto the list for its value number.
void addToLeaderTable(uint32_t N, Value *V, const BasicBlock *BB) {
LeaderTableEntry &Curr = LeaderTable[N];
if (!Curr.Val) {
@@ -652,7 +650,7 @@ namespace {
Curr.Next = Node;
}
- /// removeFromLeaderTable - Scan the list of values corresponding to a given
+ /// Scan the list of values corresponding to a given
/// value number, and remove the given instruction if encountered.
void removeFromLeaderTable(uint32_t N, Instruction *I, BasicBlock *BB) {
LeaderTableEntry* Prev = nullptr;
@@ -685,7 +683,7 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<TargetLibraryInfo>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
if (!NoLoads)
AU.addRequired<MemoryDependenceAnalysis>();
AU.addRequired<AliasAnalysis>();
@@ -711,13 +709,13 @@ namespace {
bool iterateOnFunction(Function &F);
bool performPRE(Function &F);
bool performScalarPRE(Instruction *I);
+ bool performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
+ unsigned int ValNo);
Value *findLeader(const BasicBlock *BB, uint32_t num);
void cleanupGlobalSets();
void verifyRemoved(const Instruction *I) const;
bool splitCriticalEdges();
BasicBlock *splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ);
- unsigned replaceAllDominatedUsesWith(Value *From, Value *To,
- const BasicBlockEdge &Root);
bool propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root);
bool processFoldableCondBr(BranchInst *BI);
void addDeadBlock(BasicBlock *BB);
@@ -727,7 +725,7 @@ namespace {
char GVN::ID = 0;
}
-// createGVNPass - The public interface to this file...
+// The public interface to this file...
FunctionPass *llvm::createGVNPass(bool NoLoads) {
return new GVN(NoLoads);
}
@@ -736,7 +734,7 @@ INITIALIZE_PASS_BEGIN(GVN, "gvn", "Global Value Numbering", false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false)
@@ -752,7 +750,7 @@ void GVN::dump(DenseMap<uint32_t, Value*>& d) {
}
#endif
-/// IsValueFullyAvailableInBlock - Return true if we can prove that the value
+/// Return true if we can prove that the value
/// we're analyzing is fully available in the specified block. As we go, keep
/// track of which blocks we know are fully alive in FullyAvailableBlocks. This
/// map is actually a tri-state map with the following values:
@@ -798,7 +796,7 @@ static bool IsValueFullyAvailableInBlock(BasicBlock *BB,
return true;
-// SpeculationFailure - If we get here, we found out that this is not, after
+// If we get here, we found out that this is not, after
// all, a fully-available block. We have a problem if we speculated on this and
// used the speculation to mark other blocks as available.
SpeculationFailure:
@@ -833,8 +831,7 @@ SpeculationFailure:
}
-/// CanCoerceMustAliasedValueToLoad - Return true if
-/// CoerceAvailableValueToLoadType will succeed.
+/// Return true if CoerceAvailableValueToLoadType will succeed.
static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal,
Type *LoadTy,
const DataLayout &DL) {
@@ -853,7 +850,7 @@ static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal,
return true;
}
-/// CoerceAvailableValueToLoadType - If we saw a store of a value to memory, and
+/// If we saw a store of a value to memory, and
/// then a load from a must-aliased pointer of a different type, try to coerce
/// the stored value. LoadedTy is the type of the load we want to replace and
/// InsertPt is the place to insert new instructions.
@@ -938,7 +935,7 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
return new BitCastInst(StoredVal, LoadedTy, "bitcast", InsertPt);
}
-/// AnalyzeLoadFromClobberingWrite - This function is called when we have a
+/// This function is called when we have a
/// memdep query of a load that ends up being a clobbering memory write (store,
/// memset, memcpy, memmove). This means that the write *may* provide bits used
/// by the load but we can't be sure because the pointers don't mustalias.
@@ -956,8 +953,9 @@ static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
return -1;
int64_t StoreOffset = 0, LoadOffset = 0;
- Value *StoreBase = GetPointerBaseWithConstantOffset(WritePtr,StoreOffset,&DL);
- Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, &DL);
+ Value *StoreBase =
+ GetPointerBaseWithConstantOffset(WritePtr, StoreOffset, DL);
+ Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, DL);
if (StoreBase != LoadBase)
return -1;
@@ -1018,23 +1016,23 @@ static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
return LoadOffset-StoreOffset;
}
-/// AnalyzeLoadFromClobberingStore - This function is called when we have a
+/// This function is called when we have a
/// memdep query of a load that ends up being a clobbering store.
static int AnalyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr,
- StoreInst *DepSI,
- const DataLayout &DL) {
+ StoreInst *DepSI) {
// Cannot handle reading from store of first-class aggregate yet.
if (DepSI->getValueOperand()->getType()->isStructTy() ||
DepSI->getValueOperand()->getType()->isArrayTy())
return -1;
+ const DataLayout &DL = DepSI->getModule()->getDataLayout();
Value *StorePtr = DepSI->getPointerOperand();
uint64_t StoreSize =DL.getTypeSizeInBits(DepSI->getValueOperand()->getType());
return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
StorePtr, StoreSize, DL);
}
-/// AnalyzeLoadFromClobberingLoad - This function is called when we have a
+/// This function is called when we have a
/// memdep query of a load that ends up being clobbered by another load. See if
/// the other load can feed into the second load.
static int AnalyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr,
@@ -1052,11 +1050,11 @@ static int AnalyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr,
// then we should widen it!
int64_t LoadOffs = 0;
const Value *LoadBase =
- GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, &DL);
+ GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, DL);
unsigned LoadSize = DL.getTypeStoreSize(LoadTy);
- unsigned Size = MemoryDependenceAnalysis::
- getLoadLoadClobberFullWidthSize(LoadBase, LoadOffs, LoadSize, DepLI, DL);
+ unsigned Size = MemoryDependenceAnalysis::getLoadLoadClobberFullWidthSize(
+ LoadBase, LoadOffs, LoadSize, DepLI);
if (Size == 0) return -1;
return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size*8, DL);
@@ -1086,7 +1084,7 @@ static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
Constant *Src = dyn_cast<Constant>(MTI->getSource());
if (!Src) return -1;
- GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, &DL));
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, DL));
if (!GV || !GV->isConstant()) return -1;
// See if the access is within the bounds of the transfer.
@@ -1102,15 +1100,16 @@ static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
Type::getInt8PtrTy(Src->getContext(), AS));
Constant *OffsetCst =
ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
- Src = ConstantExpr::getGetElementPtr(Src, OffsetCst);
+ Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src,
+ OffsetCst);
Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));
- if (ConstantFoldLoadFromConstPtr(Src, &DL))
+ if (ConstantFoldLoadFromConstPtr(Src, DL))
return Offset;
return -1;
}
-/// GetStoreValueForLoad - This function is called when we have a
+/// This function is called when we have a
/// memdep query of a load that ends up being a clobbering store. This means
/// that the store provides bits used by the load but we the pointers don't
/// mustalias. Check this case to see if there is anything more we can do
@@ -1149,7 +1148,7 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, DL);
}
-/// GetLoadValueForLoad - This function is called when we have a
+/// This function is called when we have a
/// memdep query of a load that ends up being a clobbering load. This means
/// that the load *may* provide bits used by the load but we can't be sure
/// because the pointers don't mustalias. Check this case to see if there is
@@ -1157,7 +1156,7 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
Type *LoadTy, Instruction *InsertPt,
GVN &gvn) {
- const DataLayout &DL = *gvn.getDataLayout();
+ const DataLayout &DL = SrcVal->getModule()->getDataLayout();
// If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to
// widen SrcVal out to a larger load.
unsigned SrcValSize = DL.getTypeStoreSize(SrcVal->getType());
@@ -1212,7 +1211,7 @@ static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
}
-/// GetMemInstValueForLoad - This function is called when we have a
+/// This function is called when we have a
/// memdep query of a load that ends up being a clobbering mem intrinsic.
static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
Type *LoadTy, Instruction *InsertPt,
@@ -1263,13 +1262,14 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
Type::getInt8PtrTy(Src->getContext(), AS));
Constant *OffsetCst =
ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
- Src = ConstantExpr::getGetElementPtr(Src, OffsetCst);
+ Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src,
+ OffsetCst);
Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));
- return ConstantFoldLoadFromConstPtr(Src, &DL);
+ return ConstantFoldLoadFromConstPtr(Src, DL);
}
-/// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock,
+/// Given a set of loads specified by ValuesPerBlock,
/// construct SSA form, allowing us to eliminate LI. This returns the value
/// that should be used at LI's definition site.
static Value *ConstructSSAForLoadSet(LoadInst *LI,
@@ -1281,7 +1281,7 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
gvn.getDominatorTree().properlyDominates(ValuesPerBlock[0].BB,
LI->getParent())) {
assert(!ValuesPerBlock[0].isUndefValue() && "Dead BB dominate this block");
- return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), gvn);
+ return ValuesPerBlock[0].MaterializeAdjustedValue(LI, gvn);
}
// Otherwise, we have to construct SSA form.
@@ -1289,8 +1289,6 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
SSAUpdater SSAUpdate(&NewPHIs);
SSAUpdate.Initialize(LI->getType(), LI->getName());
- Type *LoadTy = LI->getType();
-
for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
const AvailableValueInBlock &AV = ValuesPerBlock[i];
BasicBlock *BB = AV.BB;
@@ -1298,7 +1296,7 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
if (SSAUpdate.HasValueForBlock(BB))
continue;
- SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, gvn));
+ SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LI, gvn));
}
// Perform PHI construction.
@@ -1326,16 +1324,16 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
return V;
}
-Value *AvailableValueInBlock::MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const {
+Value *AvailableValueInBlock::MaterializeAdjustedValue(LoadInst *LI,
+ GVN &gvn) const {
Value *Res;
+ Type *LoadTy = LI->getType();
+ const DataLayout &DL = LI->getModule()->getDataLayout();
if (isSimpleValue()) {
Res = getSimpleValue();
if (Res->getType() != LoadTy) {
- const DataLayout *DL = gvn.getDataLayout();
- assert(DL && "Need target data to handle type mismatch case");
- Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(),
- *DL);
-
+ Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(), DL);
+
DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " "
<< *getSimpleValue() << '\n'
<< *Res << '\n' << "\n\n\n");
@@ -1353,10 +1351,8 @@ Value *AvailableValueInBlock::MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) c
<< *Res << '\n' << "\n\n\n");
}
} else if (isMemIntrinValue()) {
- const DataLayout *DL = gvn.getDataLayout();
- assert(DL && "Need target data to handle type mismatch case");
- Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset,
- LoadTy, BB->getTerminator(), *DL);
+ Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset, LoadTy,
+ BB->getTerminator(), DL);
DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
<< " " << *getMemIntrinValue() << '\n'
<< *Res << '\n' << "\n\n\n");
@@ -1383,6 +1379,7 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
// dependencies that produce an unknown value for the load (such as a call
// that could potentially clobber the load).
unsigned NumDeps = Deps.size();
+ const DataLayout &DL = LI->getModule()->getDataLayout();
for (unsigned i = 0, e = NumDeps; i != e; ++i) {
BasicBlock *DepBB = Deps[i].getBB();
MemDepResult DepInfo = Deps[i].getResult();
@@ -1409,9 +1406,9 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
// read by the load, we can extract the bits we need for the load from the
// stored value.
if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) {
- if (DL && Address) {
- int Offset = AnalyzeLoadFromClobberingStore(LI->getType(), Address,
- DepSI, *DL);
+ if (Address) {
+ int Offset =
+ AnalyzeLoadFromClobberingStore(LI->getType(), Address, DepSI);
if (Offset != -1) {
ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
DepSI->getValueOperand(),
@@ -1428,9 +1425,9 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInfo.getInst())) {
// If this is a clobber and L is the first instruction in its block, then
// we have the first instruction in the entry block.
- if (DepLI != LI && Address && DL) {
- int Offset = AnalyzeLoadFromClobberingLoad(LI->getType(), Address,
- DepLI, *DL);
+ if (DepLI != LI && Address) {
+ int Offset =
+ AnalyzeLoadFromClobberingLoad(LI->getType(), Address, DepLI, DL);
if (Offset != -1) {
ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB,DepLI,
@@ -1443,9 +1440,9 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
// If the clobbering value is a memset/memcpy/memmove, see if we can
// forward a value on from it.
if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) {
- if (DL && Address) {
+ if (Address) {
int Offset = AnalyzeLoadFromClobberingMemInst(LI->getType(), Address,
- DepMI, *DL);
+ DepMI, DL);
if (Offset != -1) {
ValuesPerBlock.push_back(AvailableValueInBlock::getMI(DepBB, DepMI,
Offset));
@@ -1484,8 +1481,8 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
if (S->getValueOperand()->getType() != LI->getType()) {
// If the stored value is larger or equal to the loaded value, we can
// reuse it.
- if (!DL || !CanCoerceMustAliasedValueToLoad(S->getValueOperand(),
- LI->getType(), *DL)) {
+ if (!CanCoerceMustAliasedValueToLoad(S->getValueOperand(),
+ LI->getType(), DL)) {
UnavailableBlocks.push_back(DepBB);
continue;
}
@@ -1501,7 +1498,7 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
if (LD->getType() != LI->getType()) {
// If the stored value is larger or equal to the loaded value, we can
// reuse it.
- if (!DL || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*DL)) {
+ if (!CanCoerceMustAliasedValueToLoad(LD, LI->getType(), DL)) {
UnavailableBlocks.push_back(DepBB);
continue;
}
@@ -1613,6 +1610,7 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
// Check if the load can safely be moved to all the unavailable predecessors.
bool CanDoPRE = true;
+ const DataLayout &DL = LI->getModule()->getDataLayout();
SmallVector<Instruction*, 8> NewInsts;
for (auto &PredLoad : PredLoads) {
BasicBlock *UnavailablePred = PredLoad.first;
@@ -1704,7 +1702,7 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
return true;
}
-/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are
+/// Attempt to eliminate a load whose dependencies are
/// non-local by performing PHI construction.
bool GVN::processNonLocalLoad(LoadInst *LI) {
// Step 1: Find the non-local dependencies of the load.
@@ -1817,7 +1815,7 @@ static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) {
I->replaceAllUsesWith(Repl);
}
-/// processLoad - Attempt to eliminate a load, first by eliminating it
+/// Attempt to eliminate a load, first by eliminating it
/// locally, and then attempting non-local elimination if that fails.
bool GVN::processLoad(LoadInst *L) {
if (!MD)
@@ -1833,10 +1831,11 @@ bool GVN::processLoad(LoadInst *L) {
// ... to a pointer that has been loaded from before...
MemDepResult Dep = MD->getDependency(L);
+ const DataLayout &DL = L->getModule()->getDataLayout();
// If we have a clobber and target data is around, see if this is a clobber
// that we can fix up through code synthesis.
- if (Dep.isClobber() && DL) {
+ if (Dep.isClobber()) {
// Check to see if we have something like this:
// store i32 123, i32* %P
// %A = bitcast i32* %P to i8*
@@ -1849,12 +1848,11 @@ bool GVN::processLoad(LoadInst *L) {
// access code.
Value *AvailVal = nullptr;
if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst())) {
- int Offset = AnalyzeLoadFromClobberingStore(L->getType(),
- L->getPointerOperand(),
- DepSI, *DL);
+ int Offset = AnalyzeLoadFromClobberingStore(
+ L->getType(), L->getPointerOperand(), DepSI);
if (Offset != -1)
AvailVal = GetStoreValueForLoad(DepSI->getValueOperand(), Offset,
- L->getType(), L, *DL);
+ L->getType(), L, DL);
}
// Check to see if we have something like this:
@@ -1867,9 +1865,8 @@ bool GVN::processLoad(LoadInst *L) {
if (DepLI == L)
return false;
- int Offset = AnalyzeLoadFromClobberingLoad(L->getType(),
- L->getPointerOperand(),
- DepLI, *DL);
+ int Offset = AnalyzeLoadFromClobberingLoad(
+ L->getType(), L->getPointerOperand(), DepLI, DL);
if (Offset != -1)
AvailVal = GetLoadValueForLoad(DepLI, Offset, L->getType(), L, *this);
}
@@ -1877,11 +1874,10 @@ bool GVN::processLoad(LoadInst *L) {
// If the clobbering value is a memset/memcpy/memmove, see if we can forward
// a value on from it.
if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) {
- int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(),
- L->getPointerOperand(),
- DepMI, *DL);
+ int Offset = AnalyzeLoadFromClobberingMemInst(
+ L->getType(), L->getPointerOperand(), DepMI, DL);
if (Offset != -1)
- AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L, *DL);
+ AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L, DL);
}
if (AvailVal) {
@@ -1932,17 +1928,13 @@ bool GVN::processLoad(LoadInst *L) {
// actually have the same type. See if we know how to reuse the stored
// value (depending on its type).
if (StoredVal->getType() != L->getType()) {
- if (DL) {
- StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(),
- L, *DL);
- if (!StoredVal)
- return false;
-
- DEBUG(dbgs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal
- << '\n' << *L << "\n\n\n");
- }
- else
+ StoredVal =
+ CoerceAvailableValueToLoadType(StoredVal, L->getType(), L, DL);
+ if (!StoredVal)
return false;
+
+ DEBUG(dbgs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal
+ << '\n' << *L << "\n\n\n");
}
// Remove it!
@@ -1961,17 +1953,12 @@ bool GVN::processLoad(LoadInst *L) {
// the same type. See if we know how to reuse the previously loaded value
// (depending on its type).
if (DepLI->getType() != L->getType()) {
- if (DL) {
- AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(),
- L, *DL);
- if (!AvailableVal)
- return false;
-
- DEBUG(dbgs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal
- << "\n" << *L << "\n\n\n");
- }
- else
+ AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L, DL);
+ if (!AvailableVal)
return false;
+
+ DEBUG(dbgs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal
+ << "\n" << *L << "\n\n\n");
}
// Remove it!
@@ -2016,7 +2003,7 @@ bool GVN::processLoad(LoadInst *L) {
return false;
}
-// findLeader - In order to find a leader for a given value number at a
+// In order to find a leader for a given value number at a
// specific basic block, we first obtain the list of all Values for that number,
// and then scan the list to find one whose block dominates the block in
// question. This is fast because dominator tree queries consist of only
@@ -2044,25 +2031,7 @@ Value *GVN::findLeader(const BasicBlock *BB, uint32_t num) {
return Val;
}
-/// replaceAllDominatedUsesWith - Replace all uses of 'From' with 'To' if the
-/// use is dominated by the given basic block. Returns the number of uses that
-/// were replaced.
-unsigned GVN::replaceAllDominatedUsesWith(Value *From, Value *To,
- const BasicBlockEdge &Root) {
- unsigned Count = 0;
- for (Value::use_iterator UI = From->use_begin(), UE = From->use_end();
- UI != UE; ) {
- Use &U = *UI++;
-
- if (DT->dominates(Root, U)) {
- U.set(To);
- ++Count;
- }
- }
- return Count;
-}
-
-/// isOnlyReachableViaThisEdge - There is an edge from 'Src' to 'Dst'. Return
+/// There is an edge from 'Src' to 'Dst'. Return
/// true if every path from the entry block to 'Dst' passes via this edge. In
/// particular 'Dst' must not be reachable via another edge from 'Src'.
static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E,
@@ -2079,7 +2048,7 @@ static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E,
return Pred != nullptr;
}
-/// propagateEquality - The given values are known to be equal in every block
+/// The given values are known to be equal in every block
/// dominated by 'Root'. Exploit this, for example by replacing 'LHS' with
/// 'RHS' everywhere in the scope. Returns whether a change was made.
bool GVN::propagateEquality(Value *LHS, Value *RHS,
@@ -2138,7 +2107,7 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS,
// LHS always has at least one use that is not dominated by Root, this will
// never do anything if LHS has only one use.
if (!LHS->hasOneUse()) {
- unsigned NumReplacements = replaceAllDominatedUsesWith(LHS, RHS, Root);
+ unsigned NumReplacements = replaceDominatedUsesWith(LHS, RHS, *DT, Root);
Changed |= NumReplacements > 0;
NumGVNEqProp += NumReplacements;
}
@@ -2210,7 +2179,7 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS,
Value *NotCmp = findLeader(Root.getEnd(), Num);
if (NotCmp && isa<Instruction>(NotCmp)) {
unsigned NumReplacements =
- replaceAllDominatedUsesWith(NotCmp, NotVal, Root);
+ replaceDominatedUsesWith(NotCmp, NotVal, *DT, Root);
Changed |= NumReplacements > 0;
NumGVNEqProp += NumReplacements;
}
@@ -2229,7 +2198,7 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS,
return Changed;
}
-/// processInstruction - When calculating availability, handle an instruction
+/// When calculating availability, handle an instruction
/// by inserting it into the appropriate sets
bool GVN::processInstruction(Instruction *I) {
// Ignore dbg info intrinsics.
@@ -2240,6 +2209,7 @@ bool GVN::processInstruction(Instruction *I) {
// to value numbering it. Value numbering often exposes redundancies, for
// example if it determines that %y is equal to %x then the instruction
// "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify.
+ const DataLayout &DL = I->getModule()->getDataLayout();
if (Value *V = SimplifyInstruction(I, DL, TLI, DT, AC)) {
I->replaceAllUsesWith(V);
if (MD && V->getType()->getScalarType()->isPointerTy())
@@ -2358,10 +2328,8 @@ bool GVN::runOnFunction(Function& F) {
if (!NoLoads)
MD = &getAnalysis<MemoryDependenceAnalysis>();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- TLI = &getAnalysis<TargetLibraryInfo>();
+ TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>());
VN.setMemDep(MD);
VN.setDomTree(DT);
@@ -2374,7 +2342,8 @@ bool GVN::runOnFunction(Function& F) {
for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) {
BasicBlock *BB = FI++;
- bool removedBlock = MergeBlockIntoPredecessor(BB, this);
+ bool removedBlock = MergeBlockIntoPredecessor(
+ BB, DT, /* LoopInfo */ nullptr, VN.getAliasAnalysis(), MD);
if (removedBlock) ++NumGVNBlocks;
Changed |= removedBlock;
@@ -2457,6 +2426,43 @@ bool GVN::processBlock(BasicBlock *BB) {
return ChangedFunction;
}
+// Instantiate an expression in a predecessor that lacked it.
+bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
+ unsigned int ValNo) {
+ // Because we are going top-down through the block, all value numbers
+ // will be available in the predecessor by the time we need them. Any
+ // that weren't originally present will have been instantiated earlier
+ // in this loop.
+ bool success = true;
+ for (unsigned i = 0, e = Instr->getNumOperands(); i != e; ++i) {
+ Value *Op = Instr->getOperand(i);
+ if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op))
+ continue;
+
+ if (Value *V = findLeader(Pred, VN.lookup(Op))) {
+ Instr->setOperand(i, V);
+ } else {
+ success = false;
+ break;
+ }
+ }
+
+ // Fail out if we encounter an operand that is not available in
+ // the PRE predecessor. This is typically because of loads which
+ // are not value numbered precisely.
+ if (!success)
+ return false;
+
+ Instr->insertBefore(Pred->getTerminator());
+ Instr->setName(Instr->getName() + ".pre");
+ Instr->setDebugLoc(Instr->getDebugLoc());
+ VN.add(Instr, ValNo);
+
+ // Update the availability map to include the new instruction.
+ addToLeaderTable(ValNo, Instr, Pred);
+ return true;
+}
+
bool GVN::performScalarPRE(Instruction *CurInst) {
SmallVector<std::pair<Value*, BasicBlock*>, 8> predMap;
@@ -2523,60 +2529,43 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
// Don't do PRE when it might increase code size, i.e. when
// we would need to insert instructions in more than one pred.
- if (NumWithout != 1 || NumWith == 0)
+ if (NumWithout > 1 || NumWith == 0)
return false;
- // Don't do PRE across indirect branch.
- if (isa<IndirectBrInst>(PREPred->getTerminator()))
- return false;
+ // We may have a case where all predecessors have the instruction,
+ // and we just need to insert a phi node. Otherwise, perform
+ // insertion.
+ Instruction *PREInstr = nullptr;
- // We can't do PRE safely on a critical edge, so instead we schedule
- // the edge to be split and perform the PRE the next time we iterate
- // on the function.
- unsigned SuccNum = GetSuccessorNumber(PREPred, CurrentBlock);
- if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) {
- toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum));
- return false;
- }
-
- // Instantiate the expression in the predecessor that lacked it.
- // Because we are going top-down through the block, all value numbers
- // will be available in the predecessor by the time we need them. Any
- // that weren't originally present will have been instantiated earlier
- // in this loop.
- Instruction *PREInstr = CurInst->clone();
- bool success = true;
- for (unsigned i = 0, e = CurInst->getNumOperands(); i != e; ++i) {
- Value *Op = PREInstr->getOperand(i);
- if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op))
- continue;
+ if (NumWithout != 0) {
+ // Don't do PRE across indirect branch.
+ if (isa<IndirectBrInst>(PREPred->getTerminator()))
+ return false;
- if (Value *V = findLeader(PREPred, VN.lookup(Op))) {
- PREInstr->setOperand(i, V);
- } else {
- success = false;
- break;
+ // We can't do PRE safely on a critical edge, so instead we schedule
+ // the edge to be split and perform the PRE the next time we iterate
+ // on the function.
+ unsigned SuccNum = GetSuccessorNumber(PREPred, CurrentBlock);
+ if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) {
+ toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum));
+ return false;
+ }
+ // We need to insert somewhere, so let's give it a shot
+ PREInstr = CurInst->clone();
+ if (!performScalarPREInsertion(PREInstr, PREPred, ValNo)) {
+ // If we failed insertion, make sure we remove the instruction.
+ DEBUG(verifyRemoved(PREInstr));
+ delete PREInstr;
+ return false;
}
}
- // Fail out if we encounter an operand that is not available in
- // the PRE predecessor. This is typically because of loads which
- // are not value numbered precisely.
- if (!success) {
- DEBUG(verifyRemoved(PREInstr));
- delete PREInstr;
- return false;
- }
+ // Either we should have filled in the PRE instruction, or we should
+ // not have needed insertions.
+ assert (PREInstr != nullptr || NumWithout == 0);
- PREInstr->insertBefore(PREPred->getTerminator());
- PREInstr->setName(CurInst->getName() + ".pre");
- PREInstr->setDebugLoc(CurInst->getDebugLoc());
- VN.add(PREInstr, ValNo);
++NumGVNPRE;
- // Update the availability map to include the new instruction.
- addToLeaderTable(ValNo, PREInstr, PREPred);
-
// Create a PHI to make the value available in this block.
PHINode *Phi =
PHINode::Create(CurInst->getType(), predMap.size(),
@@ -2612,10 +2601,12 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
MD->removeInstruction(CurInst);
DEBUG(verifyRemoved(CurInst));
CurInst->eraseFromParent();
+ ++NumGVNInstr;
+
return true;
}
-/// performPRE - Perform a purely local form of PRE that looks for diamond
+/// Perform a purely local form of PRE that looks for diamond
/// control flow patterns and attempts to perform simple PRE at the join point.
bool GVN::performPRE(Function &F) {
bool Changed = false;
@@ -2645,26 +2636,28 @@ bool GVN::performPRE(Function &F) {
/// Split the critical edge connecting the given two blocks, and return
/// the block inserted to the critical edge.
BasicBlock *GVN::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) {
- BasicBlock *BB = SplitCriticalEdge(Pred, Succ, this);
+ BasicBlock *BB = SplitCriticalEdge(
+ Pred, Succ, CriticalEdgeSplittingOptions(getAliasAnalysis(), DT));
if (MD)
MD->invalidateCachedPredecessors();
return BB;
}
-/// splitCriticalEdges - Split critical edges found during the previous
+/// Split critical edges found during the previous
/// iteration that may enable further optimization.
bool GVN::splitCriticalEdges() {
if (toSplit.empty())
return false;
do {
std::pair<TerminatorInst*, unsigned> Edge = toSplit.pop_back_val();
- SplitCriticalEdge(Edge.first, Edge.second, this);
+ SplitCriticalEdge(Edge.first, Edge.second,
+ CriticalEdgeSplittingOptions(getAliasAnalysis(), DT));
} while (!toSplit.empty());
if (MD) MD->invalidateCachedPredecessors();
return true;
}
-/// iterateOnFunction - Executes one iteration of GVN
+/// Executes one iteration of GVN
bool GVN::iterateOnFunction(Function &F) {
cleanupGlobalSets();
@@ -2695,7 +2688,7 @@ void GVN::cleanupGlobalSets() {
TableAllocator.Reset();
}
-/// verifyRemoved - Verify that the specified instruction does not occur in our
+/// Verify that the specified instruction does not occur in our
/// internal data structures.
void GVN::verifyRemoved(const Instruction *Inst) const {
VN.verifyRemoved(Inst);
@@ -2714,11 +2707,10 @@ void GVN::verifyRemoved(const Instruction *Inst) const {
}
}
-// BB is declared dead, which implied other blocks become dead as well. This
-// function is to add all these blocks to "DeadBlocks". For the dead blocks'
-// live successors, update their phi nodes by replacing the operands
-// corresponding to dead blocks with UndefVal.
-//
+/// BB is declared dead, which implied other blocks become dead as well. This
+/// function is to add all these blocks to "DeadBlocks". For the dead blocks'
+/// live successors, update their phi nodes by replacing the operands
+/// corresponding to dead blocks with UndefVal.
void GVN::addDeadBlock(BasicBlock *BB) {
SmallVector<BasicBlock *, 4> NewDead;
SmallSetVector<BasicBlock *, 4> DF;
diff --git a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index c01f57f..600589c 100644
--- a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -31,6 +31,7 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
@@ -44,7 +45,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SimplifyIndVar.h"
@@ -73,7 +73,6 @@ namespace {
LoopInfo *LI;
ScalarEvolution *SE;
DominatorTree *DT;
- const DataLayout *DL;
TargetLibraryInfo *TLI;
const TargetTransformInfo *TTI;
@@ -82,8 +81,8 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
- IndVarSimplify() : LoopPass(ID), LI(nullptr), SE(nullptr), DT(nullptr),
- DL(nullptr), Changed(false) {
+ IndVarSimplify()
+ : LoopPass(ID), LI(nullptr), SE(nullptr), DT(nullptr), Changed(false) {
initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry());
}
@@ -91,7 +90,7 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<LoopInfo>();
+ AU.addRequired<LoopInfoWrapperPass>();
AU.addRequired<ScalarEvolution>();
AU.addRequiredID(LoopSimplifyID);
AU.addRequiredID(LCSSAID);
@@ -126,7 +125,7 @@ char IndVarSimplify::ID = 0;
INITIALIZE_PASS_BEGIN(IndVarSimplify, "indvars",
"Induction Variable Simplification", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
@@ -622,17 +621,6 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
PN->eraseFromParent();
}
}
-
- // If we were unable to completely replace the PHI node, clone the PHI
- // and delete the original one. This lets IVUsers and any other maps
- // purge the original user from their records.
- if (!LCSSASafePhiForRAUW) {
- PHINode *NewPN = cast<PHINode>(PN->clone());
- NewPN->takeName(PN);
- NewPN->insertBefore(PN);
- PN->replaceAllUsesWith(NewPN);
- PN->eraseFromParent();
- }
}
}
@@ -663,14 +651,14 @@ namespace {
/// extended by this sign or zero extend operation. This is used to determine
/// the final width of the IV before actually widening it.
static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
- const DataLayout *DL, const TargetTransformInfo *TTI) {
+ const TargetTransformInfo *TTI) {
bool IsSigned = Cast->getOpcode() == Instruction::SExt;
if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
return;
Type *Ty = Cast->getType();
uint64_t Width = SE->getTypeSizeInBits(Ty);
- if (DL && !DL->isLegalInteger(Width))
+ if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
return;
// Cast is either an sext or zext up to this point.
@@ -916,8 +904,8 @@ const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) {
return AddRec;
}
-/// GetWideRecurrence - Is this instruction potentially interesting from
-/// IVUsers' perspective after widening it's type? In other words, can the
+/// GetWideRecurrence - Is this instruction potentially interesting for further
+/// simplification after widening it's type? In other words, can the
/// extend be safely hoisted out of the loop with SCEV reducing the value to a
/// recurrence on the same loop. If so, return the sign or zero extended
/// recurrence. Otherwise return NULL.
@@ -1201,7 +1189,6 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
namespace {
class IndVarSimplifyVisitor : public IVVisitor {
ScalarEvolution *SE;
- const DataLayout *DL;
const TargetTransformInfo *TTI;
PHINode *IVPhi;
@@ -1209,9 +1196,9 @@ namespace {
WideIVInfo WI;
IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
- const DataLayout *DL, const TargetTransformInfo *TTI,
+ const TargetTransformInfo *TTI,
const DominatorTree *DTree)
- : SE(SCEV), DL(DL), TTI(TTI), IVPhi(IV) {
+ : SE(SCEV), TTI(TTI), IVPhi(IV) {
DT = DTree;
WI.NarrowIV = IVPhi;
if (ReduceLiveIVs)
@@ -1219,9 +1206,7 @@ namespace {
}
// Implement the interface used by simplifyUsersOfIV.
- void visitCast(CastInst *Cast) override {
- visitIVCast(Cast, WI, SE, DL, TTI);
- }
+ void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
};
}
@@ -1255,7 +1240,7 @@ void IndVarSimplify::SimplifyAndExtend(Loop *L,
PHINode *CurrIV = LoopPhis.pop_back_val();
// Information about sign/zero extensions of CurrIV.
- IndVarSimplifyVisitor Visitor(CurrIV, SE, DL, TTI, DT);
+ IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
Changed |= simplifyUsersOfIV(CurrIV, SE, &LPM, DeadInsts, &Visitor);
@@ -1278,55 +1263,6 @@ void IndVarSimplify::SimplifyAndExtend(Loop *L,
// LinearFunctionTestReplace and its kin. Rewrite the loop exit condition.
//===----------------------------------------------------------------------===//
-/// Check for expressions that ScalarEvolution generates to compute
-/// BackedgeTakenInfo. If these expressions have not been reduced, then
-/// expanding them may incur additional cost (albeit in the loop preheader).
-static bool isHighCostExpansion(const SCEV *S, BranchInst *BI,
- SmallPtrSetImpl<const SCEV*> &Processed,
- ScalarEvolution *SE) {
- if (!Processed.insert(S).second)
- return false;
-
- // If the backedge-taken count is a UDiv, it's very likely a UDiv that
- // ScalarEvolution's HowFarToZero or HowManyLessThans produced to compute a
- // precise expression, rather than a UDiv from the user's code. If we can't
- // find a UDiv in the code with some simple searching, assume the former and
- // forego rewriting the loop.
- if (isa<SCEVUDivExpr>(S)) {
- ICmpInst *OrigCond = dyn_cast<ICmpInst>(BI->getCondition());
- if (!OrigCond) return true;
- const SCEV *R = SE->getSCEV(OrigCond->getOperand(1));
- R = SE->getMinusSCEV(R, SE->getConstant(R->getType(), 1));
- if (R != S) {
- const SCEV *L = SE->getSCEV(OrigCond->getOperand(0));
- L = SE->getMinusSCEV(L, SE->getConstant(L->getType(), 1));
- if (L != S)
- return true;
- }
- }
-
- // Recurse past add expressions, which commonly occur in the
- // BackedgeTakenCount. They may already exist in program code, and if not,
- // they are not too expensive rematerialize.
- if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
- for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
- I != E; ++I) {
- if (isHighCostExpansion(*I, BI, Processed, SE))
- return true;
- }
- return false;
- }
-
- // HowManyLessThans uses a Max expression whenever the loop is not guarded by
- // the exit condition.
- if (isa<SCEVSMaxExpr>(S) || isa<SCEVUMaxExpr>(S))
- return true;
-
- // If we haven't recognized an expensive SCEV pattern, assume it's an
- // expression produced by program code.
- return false;
-}
-
/// canExpandBackedgeTakenCount - Return true if this loop's backedge taken
/// count expression can be safely and cheaply expanded into an instruction
/// sequence that can be used by LinearFunctionTestReplace.
@@ -1340,7 +1276,8 @@ static bool isHighCostExpansion(const SCEV *S, BranchInst *BI,
/// used by ABI constrained operation, as opposed to inttoptr/ptrtoint).
/// However, we don't yet have a strong motivation for converting loop tests
/// into inequality tests.
-static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) {
+static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE,
+ SCEVExpander &Rewriter) {
const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
BackedgeTakenCount->isZero())
@@ -1350,12 +1287,10 @@ static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) {
return false;
// Can't rewrite non-branch yet.
- BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
- if (!BI)
+ if (!isa<BranchInst>(L->getExitingBlock()->getTerminator()))
return false;
- SmallPtrSet<const SCEV*, 8> Processed;
- if (isHighCostExpansion(BackedgeTakenCount, BI, Processed, SE))
+ if (Rewriter.isHighCostExpansion(BackedgeTakenCount, L))
return false;
return true;
@@ -1521,9 +1456,8 @@ static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
/// FIXME: Accept non-unit stride as long as SCEV can reduce BECount * Stride.
/// This is difficult in general for SCEV because of potential overflow. But we
/// could at least handle constant BECounts.
-static PHINode *
-FindLoopCounter(Loop *L, const SCEV *BECount,
- ScalarEvolution *SE, DominatorTree *DT, const DataLayout *DL) {
+static PHINode *FindLoopCounter(Loop *L, const SCEV *BECount,
+ ScalarEvolution *SE, DominatorTree *DT) {
uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
Value *Cond =
@@ -1552,7 +1486,8 @@ FindLoopCounter(Loop *L, const SCEV *BECount,
// AR may be wider than BECount. With eq/ne tests overflow is immaterial.
// AR may not be a narrower type, or we may never exit.
uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
- if (PhiWidth < BCWidth || (DL && !DL->isLegalInteger(PhiWidth)))
+ if (PhiWidth < BCWidth ||
+ !L->getHeader()->getModule()->getDataLayout().isLegalInteger(PhiWidth))
continue;
const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
@@ -1641,7 +1576,7 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
&& "unit stride pointer IV must be i8*");
IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
- return Builder.CreateGEP(GEPBase, GEPOffset, "lftr.limit");
+ return Builder.CreateGEP(nullptr, GEPBase, GEPOffset, "lftr.limit");
}
else {
// In any other case, convert both IVInit and IVCount to integers before
@@ -1695,7 +1630,7 @@ LinearFunctionTestReplace(Loop *L,
const SCEV *BackedgeTakenCount,
PHINode *IndVar,
SCEVExpander &Rewriter) {
- assert(canExpandBackedgeTakenCount(L, SE) && "precondition");
+ assert(canExpandBackedgeTakenCount(L, SE, Rewriter) && "precondition");
// Initialize CmpIndVar and IVCount to their preincremented values.
Value *CmpIndVar = IndVar;
@@ -1705,51 +1640,15 @@ LinearFunctionTestReplace(Loop *L,
// compare against the post-incremented value, otherwise we must compare
// against the preincremented value.
if (L->getExitingBlock() == L->getLoopLatch()) {
+ // Add one to the "backedge-taken" count to get the trip count.
+ // This addition may overflow, which is valid as long as the comparison is
+ // truncated to BackedgeTakenCount->getType().
+ IVCount = SE->getAddExpr(BackedgeTakenCount,
+ SE->getConstant(BackedgeTakenCount->getType(), 1));
// The BackedgeTaken expression contains the number of times that the
// backedge branches to the loop header. This is one less than the
// number of times the loop executes, so use the incremented indvar.
- llvm::Value *IncrementedIndvar =
- IndVar->getIncomingValueForBlock(L->getExitingBlock());
- const auto *IncrementedIndvarSCEV =
- cast<SCEVAddRecExpr>(SE->getSCEV(IncrementedIndvar));
- // It is unsafe to use the incremented indvar if it has a wrapping flag, we
- // don't want to compare against a poison value. Check the SCEV that
- // corresponds to the incremented indvar, the SCEVExpander will only insert
- // flags in the IR if the SCEV originally had wrapping flags.
- // FIXME: In theory, SCEV could drop flags even though they exist in IR.
- // A more robust solution would involve getting a new expression for
- // CmpIndVar by applying non-NSW/NUW AddExprs.
- auto WrappingFlags =
- ScalarEvolution::setFlags(SCEV::FlagNUW, SCEV::FlagNSW);
- const SCEV *IVInit = IncrementedIndvarSCEV->getStart();
- if (SE->getTypeSizeInBits(IVInit->getType()) >
- SE->getTypeSizeInBits(IVCount->getType()))
- IVInit = SE->getTruncateExpr(IVInit, IVCount->getType());
- unsigned BitWidth = SE->getTypeSizeInBits(IVCount->getType());
- Type *WideTy = IntegerType::get(SE->getContext(), BitWidth + 1);
- // Check if InitIV + BECount+1 requires sign/zero extension.
- // If not, clear the corresponding flag from WrappingFlags because it is not
- // necessary for those flags in the IncrementedIndvarSCEV expression.
- if (SE->getSignExtendExpr(SE->getAddExpr(IVInit, BackedgeTakenCount),
- WideTy) ==
- SE->getAddExpr(SE->getSignExtendExpr(IVInit, WideTy),
- SE->getSignExtendExpr(BackedgeTakenCount, WideTy)))
- WrappingFlags = ScalarEvolution::clearFlags(WrappingFlags, SCEV::FlagNSW);
- if (SE->getZeroExtendExpr(SE->getAddExpr(IVInit, BackedgeTakenCount),
- WideTy) ==
- SE->getAddExpr(SE->getZeroExtendExpr(IVInit, WideTy),
- SE->getZeroExtendExpr(BackedgeTakenCount, WideTy)))
- WrappingFlags = ScalarEvolution::clearFlags(WrappingFlags, SCEV::FlagNUW);
- if (!ScalarEvolution::maskFlags(IncrementedIndvarSCEV->getNoWrapFlags(),
- WrappingFlags)) {
- // Add one to the "backedge-taken" count to get the trip count.
- // This addition may overflow, which is valid as long as the comparison is
- // truncated to BackedgeTakenCount->getType().
- IVCount =
- SE->getAddExpr(BackedgeTakenCount,
- SE->getConstant(BackedgeTakenCount->getType(), 1));
- CmpIndVar = IncrementedIndvar;
- }
+ CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
}
Value *ExitCnt = genLoopLimit(IndVar, IVCount, L, Rewriter, SE);
@@ -1929,13 +1828,14 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
if (!L->isLoopSimplifyForm())
return false;
- LI = &getAnalysis<LoopInfo>();
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
SE = &getAnalysis<ScalarEvolution>();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
- TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
- TTI = getAnalysisIfAvailable<TargetTransformInfo>();
+ auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
+ TLI = TLIP ? &TLIP->getTLI() : nullptr;
+ auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
+ TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
+ const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
DeadInsts.clear();
Changed = false;
@@ -1947,7 +1847,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
// Create a rewriter object which we'll use to transform the code with.
- SCEVExpander Rewriter(*SE, "indvars");
+ SCEVExpander Rewriter(*SE, DL, "indvars");
#ifndef NDEBUG
Rewriter.setDebugType(DEBUG_TYPE);
#endif
@@ -1975,8 +1875,8 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// If we have a trip count expression, rewrite the loop's exit condition
// using it. We can currently only handle loops with a single exit.
- if (canExpandBackedgeTakenCount(L, SE) && needsLFTR(L, DT)) {
- PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT, DL);
+ if (canExpandBackedgeTakenCount(L, SE, Rewriter) && needsLFTR(L, DT)) {
+ PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT);
if (IndVar) {
// Check preconditions for proper SCEVExpander operation. SCEV does not
// express SCEVExpander's dependencies, such as LoopSimplify. Instead any
diff --git a/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
new file mode 100644
index 0000000..cbdacad
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
@@ -0,0 +1,1495 @@
+//===-- InductiveRangeCheckElimination.cpp - ------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// The InductiveRangeCheckElimination pass splits a loop's iteration space into
+// three disjoint ranges. It does that in a way such that the loop running in
+// the middle loop provably does not need range checks. As an example, it will
+// convert
+//
+// len = < known positive >
+// for (i = 0; i < n; i++) {
+// if (0 <= i && i < len) {
+// do_something();
+// } else {
+// throw_out_of_bounds();
+// }
+// }
+//
+// to
+//
+// len = < known positive >
+// limit = smin(n, len)
+// // no first segment
+// for (i = 0; i < limit; i++) {
+// if (0 <= i && i < len) { // this check is fully redundant
+// do_something();
+// } else {
+// throw_out_of_bounds();
+// }
+// }
+// for (i = limit; i < n; i++) {
+// if (0 <= i && i < len) {
+// do_something();
+// } else {
+// throw_out_of_bounds();
+// }
+// }
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/SimplifyIndVar.h"
+#include "llvm/Transforms/Utils/UnrollLoop.h"
+#include <array>
+
+using namespace llvm;
+
+static cl::opt<unsigned> LoopSizeCutoff("irce-loop-size-cutoff", cl::Hidden,
+ cl::init(64));
+
+static cl::opt<bool> PrintChangedLoops("irce-print-changed-loops", cl::Hidden,
+ cl::init(false));
+
+static cl::opt<bool> PrintRangeChecks("irce-print-range-checks", cl::Hidden,
+ cl::init(false));
+
+static cl::opt<int> MaxExitProbReciprocal("irce-max-exit-prob-reciprocal",
+ cl::Hidden, cl::init(10));
+
+#define DEBUG_TYPE "irce"
+
+namespace {
+
+/// An inductive range check is conditional branch in a loop with
+///
+/// 1. a very cold successor (i.e. the branch jumps to that successor very
+/// rarely)
+///
+/// and
+///
+/// 2. a condition that is provably true for some contiguous range of values
+/// taken by the containing loop's induction variable.
+///
+class InductiveRangeCheck {
+ // Classifies a range check
+ enum RangeCheckKind : unsigned {
+ // Range check of the form "0 <= I".
+ RANGE_CHECK_LOWER = 1,
+
+ // Range check of the form "I < L" where L is known positive.
+ RANGE_CHECK_UPPER = 2,
+
+ // The logical and of the RANGE_CHECK_LOWER and RANGE_CHECK_UPPER
+ // conditions.
+ RANGE_CHECK_BOTH = RANGE_CHECK_LOWER | RANGE_CHECK_UPPER,
+
+ // Unrecognized range check condition.
+ RANGE_CHECK_UNKNOWN = (unsigned)-1
+ };
+
+ static const char *rangeCheckKindToStr(RangeCheckKind);
+
+ const SCEV *Offset;
+ const SCEV *Scale;
+ Value *Length;
+ BranchInst *Branch;
+ RangeCheckKind Kind;
+
+ static RangeCheckKind parseRangeCheckICmp(Loop *L, ICmpInst *ICI,
+ ScalarEvolution &SE, Value *&Index,
+ Value *&Length);
+
+ static InductiveRangeCheck::RangeCheckKind
+ parseRangeCheck(Loop *L, ScalarEvolution &SE, Value *Condition,
+ const SCEV *&Index, Value *&UpperLimit);
+
+ InductiveRangeCheck() :
+ Offset(nullptr), Scale(nullptr), Length(nullptr), Branch(nullptr) { }
+
+public:
+ const SCEV *getOffset() const { return Offset; }
+ const SCEV *getScale() const { return Scale; }
+ Value *getLength() const { return Length; }
+
+ void print(raw_ostream &OS) const {
+ OS << "InductiveRangeCheck:\n";
+ OS << " Kind: " << rangeCheckKindToStr(Kind) << "\n";
+ OS << " Offset: ";
+ Offset->print(OS);
+ OS << " Scale: ";
+ Scale->print(OS);
+ OS << " Length: ";
+ if (Length)
+ Length->print(OS);
+ else
+ OS << "(null)";
+ OS << "\n Branch: ";
+ getBranch()->print(OS);
+ OS << "\n";
+ }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ void dump() {
+ print(dbgs());
+ }
+#endif
+
+ BranchInst *getBranch() const { return Branch; }
+
+ /// Represents an signed integer range [Range.getBegin(), Range.getEnd()). If
+ /// R.getEnd() sle R.getBegin(), then R denotes the empty range.
+
+ class Range {
+ const SCEV *Begin;
+ const SCEV *End;
+
+ public:
+ Range(const SCEV *Begin, const SCEV *End) : Begin(Begin), End(End) {
+ assert(Begin->getType() == End->getType() && "ill-typed range!");
+ }
+
+ Type *getType() const { return Begin->getType(); }
+ const SCEV *getBegin() const { return Begin; }
+ const SCEV *getEnd() const { return End; }
+ };
+
+ typedef SpecificBumpPtrAllocator<InductiveRangeCheck> AllocatorTy;
+
+ /// This is the value the condition of the branch needs to evaluate to for the
+ /// branch to take the hot successor (see (1) above).
+ bool getPassingDirection() { return true; }
+
+ /// Computes a range for the induction variable (IndVar) in which the range
+ /// check is redundant and can be constant-folded away. The induction
+ /// variable is not required to be the canonical {0,+,1} induction variable.
+ Optional<Range> computeSafeIterationSpace(ScalarEvolution &SE,
+ const SCEVAddRecExpr *IndVar,
+ IRBuilder<> &B) const;
+
+ /// Create an inductive range check out of BI if possible, else return
+ /// nullptr.
+ static InductiveRangeCheck *create(AllocatorTy &Alloc, BranchInst *BI,
+ Loop *L, ScalarEvolution &SE,
+ BranchProbabilityInfo &BPI);
+};
+
+class InductiveRangeCheckElimination : public LoopPass {
+ InductiveRangeCheck::AllocatorTy Allocator;
+
+public:
+ static char ID;
+ InductiveRangeCheckElimination() : LoopPass(ID) {
+ initializeInductiveRangeCheckEliminationPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addRequiredID(LCSSAID);
+ AU.addRequired<ScalarEvolution>();
+ AU.addRequired<BranchProbabilityInfo>();
+ }
+
+ bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+};
+
+char InductiveRangeCheckElimination::ID = 0;
+}
+
+INITIALIZE_PASS(InductiveRangeCheckElimination, "irce",
+ "Inductive range check elimination", false, false)
+
+const char *InductiveRangeCheck::rangeCheckKindToStr(
+ InductiveRangeCheck::RangeCheckKind RCK) {
+ switch (RCK) {
+ case InductiveRangeCheck::RANGE_CHECK_UNKNOWN:
+ return "RANGE_CHECK_UNKNOWN";
+
+ case InductiveRangeCheck::RANGE_CHECK_UPPER:
+ return "RANGE_CHECK_UPPER";
+
+ case InductiveRangeCheck::RANGE_CHECK_LOWER:
+ return "RANGE_CHECK_LOWER";
+
+ case InductiveRangeCheck::RANGE_CHECK_BOTH:
+ return "RANGE_CHECK_BOTH";
+ }
+
+ llvm_unreachable("unknown range check type!");
+}
+
+/// Parse a single ICmp instruction, `ICI`, into a range check. If `ICI`
+/// cannot
+/// be interpreted as a range check, return `RANGE_CHECK_UNKNOWN` and set
+/// `Index` and `Length` to `nullptr`. Otherwise set `Index` to the value
+/// being
+/// range checked, and set `Length` to the upper limit `Index` is being range
+/// checked with if (and only if) the range check type is stronger or equal to
+/// RANGE_CHECK_UPPER.
+///
+InductiveRangeCheck::RangeCheckKind
+InductiveRangeCheck::parseRangeCheckICmp(Loop *L, ICmpInst *ICI,
+ ScalarEvolution &SE, Value *&Index,
+ Value *&Length) {
+
+ auto IsNonNegativeAndNotLoopVarying = [&SE, L](Value *V) {
+ const SCEV *S = SE.getSCEV(V);
+ if (isa<SCEVCouldNotCompute>(S))
+ return false;
+
+ return SE.getLoopDisposition(S, L) == ScalarEvolution::LoopInvariant &&
+ SE.isKnownNonNegative(S);
+ };
+
+ using namespace llvm::PatternMatch;
+
+ ICmpInst::Predicate Pred = ICI->getPredicate();
+ Value *LHS = ICI->getOperand(0);
+ Value *RHS = ICI->getOperand(1);
+
+ switch (Pred) {
+ default:
+ return RANGE_CHECK_UNKNOWN;
+
+ case ICmpInst::ICMP_SLE:
+ std::swap(LHS, RHS);
+ // fallthrough
+ case ICmpInst::ICMP_SGE:
+ if (match(RHS, m_ConstantInt<0>())) {
+ Index = LHS;
+ return RANGE_CHECK_LOWER;
+ }
+ return RANGE_CHECK_UNKNOWN;
+
+ case ICmpInst::ICMP_SLT:
+ std::swap(LHS, RHS);
+ // fallthrough
+ case ICmpInst::ICMP_SGT:
+ if (match(RHS, m_ConstantInt<-1>())) {
+ Index = LHS;
+ return RANGE_CHECK_LOWER;
+ }
+
+ if (IsNonNegativeAndNotLoopVarying(LHS)) {
+ Index = RHS;
+ Length = LHS;
+ return RANGE_CHECK_UPPER;
+ }
+ return RANGE_CHECK_UNKNOWN;
+
+ case ICmpInst::ICMP_ULT:
+ std::swap(LHS, RHS);
+ // fallthrough
+ case ICmpInst::ICMP_UGT:
+ if (IsNonNegativeAndNotLoopVarying(LHS)) {
+ Index = RHS;
+ Length = LHS;
+ return RANGE_CHECK_BOTH;
+ }
+ return RANGE_CHECK_UNKNOWN;
+ }
+
+ llvm_unreachable("default clause returns!");
+}
+
+/// Parses an arbitrary condition into a range check. `Length` is set only if
+/// the range check is recognized to be `RANGE_CHECK_UPPER` or stronger.
+InductiveRangeCheck::RangeCheckKind
+InductiveRangeCheck::parseRangeCheck(Loop *L, ScalarEvolution &SE,
+ Value *Condition, const SCEV *&Index,
+ Value *&Length) {
+ using namespace llvm::PatternMatch;
+
+ Value *A = nullptr;
+ Value *B = nullptr;
+
+ if (match(Condition, m_And(m_Value(A), m_Value(B)))) {
+ Value *IndexA = nullptr, *IndexB = nullptr;
+ Value *LengthA = nullptr, *LengthB = nullptr;
+ ICmpInst *ICmpA = dyn_cast<ICmpInst>(A), *ICmpB = dyn_cast<ICmpInst>(B);
+
+ if (!ICmpA || !ICmpB)
+ return InductiveRangeCheck::RANGE_CHECK_UNKNOWN;
+
+ auto RCKindA = parseRangeCheckICmp(L, ICmpA, SE, IndexA, LengthA);
+ auto RCKindB = parseRangeCheckICmp(L, ICmpB, SE, IndexB, LengthB);
+
+ if (RCKindA == InductiveRangeCheck::RANGE_CHECK_UNKNOWN ||
+ RCKindB == InductiveRangeCheck::RANGE_CHECK_UNKNOWN)
+ return InductiveRangeCheck::RANGE_CHECK_UNKNOWN;
+
+ if (IndexA != IndexB)
+ return InductiveRangeCheck::RANGE_CHECK_UNKNOWN;
+
+ if (LengthA != nullptr && LengthB != nullptr && LengthA != LengthB)
+ return InductiveRangeCheck::RANGE_CHECK_UNKNOWN;
+
+ Index = SE.getSCEV(IndexA);
+ if (isa<SCEVCouldNotCompute>(Index))
+ return InductiveRangeCheck::RANGE_CHECK_UNKNOWN;
+
+ Length = LengthA == nullptr ? LengthB : LengthA;
+
+ return (InductiveRangeCheck::RangeCheckKind)(RCKindA | RCKindB);
+ }
+
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(Condition)) {
+ Value *IndexVal = nullptr;
+
+ auto RCKind = parseRangeCheckICmp(L, ICI, SE, IndexVal, Length);
+
+ if (RCKind == InductiveRangeCheck::RANGE_CHECK_UNKNOWN)
+ return InductiveRangeCheck::RANGE_CHECK_UNKNOWN;
+
+ Index = SE.getSCEV(IndexVal);
+ if (isa<SCEVCouldNotCompute>(Index))
+ return InductiveRangeCheck::RANGE_CHECK_UNKNOWN;
+
+ return RCKind;
+ }
+
+ return InductiveRangeCheck::RANGE_CHECK_UNKNOWN;
+}
+
+
+InductiveRangeCheck *
+InductiveRangeCheck::create(InductiveRangeCheck::AllocatorTy &A, BranchInst *BI,
+ Loop *L, ScalarEvolution &SE,
+ BranchProbabilityInfo &BPI) {
+
+ if (BI->isUnconditional() || BI->getParent() == L->getLoopLatch())
+ return nullptr;
+
+ BranchProbability LikelyTaken(15, 16);
+
+ if (BPI.getEdgeProbability(BI->getParent(), (unsigned) 0) < LikelyTaken)
+ return nullptr;
+
+ Value *Length = nullptr;
+ const SCEV *IndexSCEV = nullptr;
+
+ auto RCKind = InductiveRangeCheck::parseRangeCheck(L, SE, BI->getCondition(),
+ IndexSCEV, Length);
+
+ if (RCKind == InductiveRangeCheck::RANGE_CHECK_UNKNOWN)
+ return nullptr;
+
+ assert(IndexSCEV && "contract with SplitRangeCheckCondition!");
+ assert((!(RCKind & InductiveRangeCheck::RANGE_CHECK_UPPER) || Length) &&
+ "contract with SplitRangeCheckCondition!");
+
+ const SCEVAddRecExpr *IndexAddRec = dyn_cast<SCEVAddRecExpr>(IndexSCEV);
+ bool IsAffineIndex =
+ IndexAddRec && (IndexAddRec->getLoop() == L) && IndexAddRec->isAffine();
+
+ if (!IsAffineIndex)
+ return nullptr;
+
+ InductiveRangeCheck *IRC = new (A.Allocate()) InductiveRangeCheck;
+ IRC->Length = Length;
+ IRC->Offset = IndexAddRec->getStart();
+ IRC->Scale = IndexAddRec->getStepRecurrence(SE);
+ IRC->Branch = BI;
+ IRC->Kind = RCKind;
+ return IRC;
+}
+
+namespace {
+
+// Keeps track of the structure of a loop. This is similar to llvm::Loop,
+// except that it is more lightweight and can track the state of a loop through
+// changing and potentially invalid IR. This structure also formalizes the
+// kinds of loops we can deal with -- ones that have a single latch that is also
+// an exiting block *and* have a canonical induction variable.
+struct LoopStructure {
+ const char *Tag;
+
+ BasicBlock *Header;
+ BasicBlock *Latch;
+
+ // `Latch's terminator instruction is `LatchBr', and it's `LatchBrExitIdx'th
+ // successor is `LatchExit', the exit block of the loop.
+ BranchInst *LatchBr;
+ BasicBlock *LatchExit;
+ unsigned LatchBrExitIdx;
+
+ Value *IndVarNext;
+ Value *IndVarStart;
+ Value *LoopExitAt;
+ bool IndVarIncreasing;
+
+ LoopStructure()
+ : Tag(""), Header(nullptr), Latch(nullptr), LatchBr(nullptr),
+ LatchExit(nullptr), LatchBrExitIdx(-1), IndVarNext(nullptr),
+ IndVarStart(nullptr), LoopExitAt(nullptr), IndVarIncreasing(false) {}
+
+ template <typename M> LoopStructure map(M Map) const {
+ LoopStructure Result;
+ Result.Tag = Tag;
+ Result.Header = cast<BasicBlock>(Map(Header));
+ Result.Latch = cast<BasicBlock>(Map(Latch));
+ Result.LatchBr = cast<BranchInst>(Map(LatchBr));
+ Result.LatchExit = cast<BasicBlock>(Map(LatchExit));
+ Result.LatchBrExitIdx = LatchBrExitIdx;
+ Result.IndVarNext = Map(IndVarNext);
+ Result.IndVarStart = Map(IndVarStart);
+ Result.LoopExitAt = Map(LoopExitAt);
+ Result.IndVarIncreasing = IndVarIncreasing;
+ return Result;
+ }
+
+ static Optional<LoopStructure> parseLoopStructure(ScalarEvolution &,
+ BranchProbabilityInfo &BPI,
+ Loop &,
+ const char *&);
+};
+
+/// This class is used to constrain loops to run within a given iteration space.
+/// The algorithm this class implements is given a Loop and a range [Begin,
+/// End). The algorithm then tries to break out a "main loop" out of the loop
+/// it is given in a way that the "main loop" runs with the induction variable
+/// in a subset of [Begin, End). The algorithm emits appropriate pre and post
+/// loops to run any remaining iterations. The pre loop runs any iterations in
+/// which the induction variable is < Begin, and the post loop runs any
+/// iterations in which the induction variable is >= End.
+///
+class LoopConstrainer {
+ // The representation of a clone of the original loop we started out with.
+ struct ClonedLoop {
+ // The cloned blocks
+ std::vector<BasicBlock *> Blocks;
+
+ // `Map` maps values in the clonee into values in the cloned version
+ ValueToValueMapTy Map;
+
+ // An instance of `LoopStructure` for the cloned loop
+ LoopStructure Structure;
+ };
+
+ // Result of rewriting the range of a loop. See changeIterationSpaceEnd for
+ // more details on what these fields mean.
+ struct RewrittenRangeInfo {
+ BasicBlock *PseudoExit;
+ BasicBlock *ExitSelector;
+ std::vector<PHINode *> PHIValuesAtPseudoExit;
+ PHINode *IndVarEnd;
+
+ RewrittenRangeInfo()
+ : PseudoExit(nullptr), ExitSelector(nullptr), IndVarEnd(nullptr) {}
+ };
+
+ // Calculated subranges we restrict the iteration space of the main loop to.
+ // See the implementation of `calculateSubRanges' for more details on how
+ // these fields are computed. `LowLimit` is None if there is no restriction
+ // on low end of the restricted iteration space of the main loop. `HighLimit`
+ // is None if there is no restriction on high end of the restricted iteration
+ // space of the main loop.
+
+ struct SubRanges {
+ Optional<const SCEV *> LowLimit;
+ Optional<const SCEV *> HighLimit;
+ };
+
+ // A utility function that does a `replaceUsesOfWith' on the incoming block
+ // set of a `PHINode' -- replaces instances of `Block' in the `PHINode's
+ // incoming block list with `ReplaceBy'.
+ static void replacePHIBlock(PHINode *PN, BasicBlock *Block,
+ BasicBlock *ReplaceBy);
+
+ // Compute a safe set of limits for the main loop to run in -- effectively the
+ // intersection of `Range' and the iteration space of the original loop.
+ // Return None if unable to compute the set of subranges.
+ //
+ Optional<SubRanges> calculateSubRanges() const;
+
+ // Clone `OriginalLoop' and return the result in CLResult. The IR after
+ // running `cloneLoop' is well formed except for the PHI nodes in CLResult --
+ // the PHI nodes say that there is an incoming edge from `OriginalPreheader`
+ // but there is no such edge.
+ //
+ void cloneLoop(ClonedLoop &CLResult, const char *Tag) const;
+
+ // Rewrite the iteration space of the loop denoted by (LS, Preheader). The
+ // iteration space of the rewritten loop ends at ExitLoopAt. The start of the
+ // iteration space is not changed. `ExitLoopAt' is assumed to be slt
+ // `OriginalHeaderCount'.
+ //
+ // If there are iterations left to execute, control is made to jump to
+ // `ContinuationBlock', otherwise they take the normal loop exit. The
+ // returned `RewrittenRangeInfo' object is populated as follows:
+ //
+ // .PseudoExit is a basic block that unconditionally branches to
+ // `ContinuationBlock'.
+ //
+ // .ExitSelector is a basic block that decides, on exit from the loop,
+ // whether to branch to the "true" exit or to `PseudoExit'.
+ //
+ // .PHIValuesAtPseudoExit are PHINodes in `PseudoExit' that compute the value
+ // for each PHINode in the loop header on taking the pseudo exit.
+ //
+ // After changeIterationSpaceEnd, `Preheader' is no longer a legitimate
+ // preheader because it is made to branch to the loop header only
+ // conditionally.
+ //
+ RewrittenRangeInfo
+ changeIterationSpaceEnd(const LoopStructure &LS, BasicBlock *Preheader,
+ Value *ExitLoopAt,
+ BasicBlock *ContinuationBlock) const;
+
+ // The loop denoted by `LS' has `OldPreheader' as its preheader. This
+ // function creates a new preheader for `LS' and returns it.
+ //
+ BasicBlock *createPreheader(const LoopStructure &LS, BasicBlock *OldPreheader,
+ const char *Tag) const;
+
+ // `ContinuationBlockAndPreheader' was the continuation block for some call to
+ // `changeIterationSpaceEnd' and is the preheader to the loop denoted by `LS'.
+ // This function rewrites the PHI nodes in `LS.Header' to start with the
+ // correct value.
+ void rewriteIncomingValuesForPHIs(
+ LoopStructure &LS, BasicBlock *ContinuationBlockAndPreheader,
+ const LoopConstrainer::RewrittenRangeInfo &RRI) const;
+
+ // Even though we do not preserve any passes at this time, we at least need to
+ // keep the parent loop structure consistent. The `LPPassManager' seems to
+ // verify this after running a loop pass. This function adds the list of
+ // blocks denoted by BBs to this loops parent loop if required.
+ void addToParentLoopIfNeeded(ArrayRef<BasicBlock *> BBs);
+
+ // Some global state.
+ Function &F;
+ LLVMContext &Ctx;
+ ScalarEvolution &SE;
+
+ // Information about the original loop we started out with.
+ Loop &OriginalLoop;
+ LoopInfo &OriginalLoopInfo;
+ const SCEV *LatchTakenCount;
+ BasicBlock *OriginalPreheader;
+
+ // The preheader of the main loop. This may or may not be different from
+ // `OriginalPreheader'.
+ BasicBlock *MainLoopPreheader;
+
+ // The range we need to run the main loop in.
+ InductiveRangeCheck::Range Range;
+
+ // The structure of the main loop (see comment at the beginning of this class
+ // for a definition)
+ LoopStructure MainLoopStructure;
+
+public:
+ LoopConstrainer(Loop &L, LoopInfo &LI, const LoopStructure &LS,
+ ScalarEvolution &SE, InductiveRangeCheck::Range R)
+ : F(*L.getHeader()->getParent()), Ctx(L.getHeader()->getContext()),
+ SE(SE), OriginalLoop(L), OriginalLoopInfo(LI), LatchTakenCount(nullptr),
+ OriginalPreheader(nullptr), MainLoopPreheader(nullptr), Range(R),
+ MainLoopStructure(LS) {}
+
+ // Entry point for the algorithm. Returns true on success.
+ bool run();
+};
+
+}
+
+void LoopConstrainer::replacePHIBlock(PHINode *PN, BasicBlock *Block,
+ BasicBlock *ReplaceBy) {
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingBlock(i) == Block)
+ PN->setIncomingBlock(i, ReplaceBy);
+}
+
+static bool CanBeSMax(ScalarEvolution &SE, const SCEV *S) {
+ APInt SMax =
+ APInt::getSignedMaxValue(cast<IntegerType>(S->getType())->getBitWidth());
+ return SE.getSignedRange(S).contains(SMax) &&
+ SE.getUnsignedRange(S).contains(SMax);
+}
+
+static bool CanBeSMin(ScalarEvolution &SE, const SCEV *S) {
+ APInt SMin =
+ APInt::getSignedMinValue(cast<IntegerType>(S->getType())->getBitWidth());
+ return SE.getSignedRange(S).contains(SMin) &&
+ SE.getUnsignedRange(S).contains(SMin);
+}
+
+Optional<LoopStructure>
+LoopStructure::parseLoopStructure(ScalarEvolution &SE, BranchProbabilityInfo &BPI,
+ Loop &L, const char *&FailureReason) {
+ assert(L.isLoopSimplifyForm() && "should follow from addRequired<>");
+
+ BasicBlock *Latch = L.getLoopLatch();
+ if (!L.isLoopExiting(Latch)) {
+ FailureReason = "no loop latch";
+ return None;
+ }
+
+ BasicBlock *Header = L.getHeader();
+ BasicBlock *Preheader = L.getLoopPreheader();
+ if (!Preheader) {
+ FailureReason = "no preheader";
+ return None;
+ }
+
+ BranchInst *LatchBr = dyn_cast<BranchInst>(&*Latch->rbegin());
+ if (!LatchBr || LatchBr->isUnconditional()) {
+ FailureReason = "latch terminator not conditional branch";
+ return None;
+ }
+
+ unsigned LatchBrExitIdx = LatchBr->getSuccessor(0) == Header ? 1 : 0;
+
+ BranchProbability ExitProbability =
+ BPI.getEdgeProbability(LatchBr->getParent(), LatchBrExitIdx);
+
+ if (ExitProbability > BranchProbability(1, MaxExitProbReciprocal)) {
+ FailureReason = "short running loop, not profitable";
+ return None;
+ }
+
+ ICmpInst *ICI = dyn_cast<ICmpInst>(LatchBr->getCondition());
+ if (!ICI || !isa<IntegerType>(ICI->getOperand(0)->getType())) {
+ FailureReason = "latch terminator branch not conditional on integral icmp";
+ return None;
+ }
+
+ const SCEV *LatchCount = SE.getExitCount(&L, Latch);
+ if (isa<SCEVCouldNotCompute>(LatchCount)) {
+ FailureReason = "could not compute latch count";
+ return None;
+ }
+
+ ICmpInst::Predicate Pred = ICI->getPredicate();
+ Value *LeftValue = ICI->getOperand(0);
+ const SCEV *LeftSCEV = SE.getSCEV(LeftValue);
+ IntegerType *IndVarTy = cast<IntegerType>(LeftValue->getType());
+
+ Value *RightValue = ICI->getOperand(1);
+ const SCEV *RightSCEV = SE.getSCEV(RightValue);
+
+ // We canonicalize `ICI` such that `LeftSCEV` is an add recurrence.
+ if (!isa<SCEVAddRecExpr>(LeftSCEV)) {
+ if (isa<SCEVAddRecExpr>(RightSCEV)) {
+ std::swap(LeftSCEV, RightSCEV);
+ std::swap(LeftValue, RightValue);
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ } else {
+ FailureReason = "no add recurrences in the icmp";
+ return None;
+ }
+ }
+
+ auto HasNoSignedWrap = [&](const SCEVAddRecExpr *AR) {
+ if (AR->getNoWrapFlags(SCEV::FlagNSW))
+ return true;
+
+ IntegerType *Ty = cast<IntegerType>(AR->getType());
+ IntegerType *WideTy =
+ IntegerType::get(Ty->getContext(), Ty->getBitWidth() * 2);
+
+ const SCEVAddRecExpr *ExtendAfterOp =
+ dyn_cast<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy));
+ if (ExtendAfterOp) {
+ const SCEV *ExtendedStart = SE.getSignExtendExpr(AR->getStart(), WideTy);
+ const SCEV *ExtendedStep =
+ SE.getSignExtendExpr(AR->getStepRecurrence(SE), WideTy);
+
+ bool NoSignedWrap = ExtendAfterOp->getStart() == ExtendedStart &&
+ ExtendAfterOp->getStepRecurrence(SE) == ExtendedStep;
+
+ if (NoSignedWrap)
+ return true;
+ }
+
+ // We may have proved this when computing the sign extension above.
+ return AR->getNoWrapFlags(SCEV::FlagNSW) != SCEV::FlagAnyWrap;
+ };
+
+ auto IsInductionVar = [&](const SCEVAddRecExpr *AR, bool &IsIncreasing) {
+ if (!AR->isAffine())
+ return false;
+
+ // Currently we only work with induction variables that have been proved to
+ // not wrap. This restriction can potentially be lifted in the future.
+
+ if (!HasNoSignedWrap(AR))
+ return false;
+
+ if (const SCEVConstant *StepExpr =
+ dyn_cast<SCEVConstant>(AR->getStepRecurrence(SE))) {
+ ConstantInt *StepCI = StepExpr->getValue();
+ if (StepCI->isOne() || StepCI->isMinusOne()) {
+ IsIncreasing = StepCI->isOne();
+ return true;
+ }
+ }
+
+ return false;
+ };
+
+ // `ICI` is interpreted as taking the backedge if the *next* value of the
+ // induction variable satisfies some constraint.
+
+ const SCEVAddRecExpr *IndVarNext = cast<SCEVAddRecExpr>(LeftSCEV);
+ bool IsIncreasing = false;
+ if (!IsInductionVar(IndVarNext, IsIncreasing)) {
+ FailureReason = "LHS in icmp not induction variable";
+ return None;
+ }
+
+ ConstantInt *One = ConstantInt::get(IndVarTy, 1);
+ // TODO: generalize the predicates here to also match their unsigned variants.
+ if (IsIncreasing) {
+ bool FoundExpectedPred =
+ (Pred == ICmpInst::ICMP_SLT && LatchBrExitIdx == 1) ||
+ (Pred == ICmpInst::ICMP_SGT && LatchBrExitIdx == 0);
+
+ if (!FoundExpectedPred) {
+ FailureReason = "expected icmp slt semantically, found something else";
+ return None;
+ }
+
+ if (LatchBrExitIdx == 0) {
+ if (CanBeSMax(SE, RightSCEV)) {
+ // TODO: this restriction is easily removable -- we just have to
+ // remember that the icmp was an slt and not an sle.
+ FailureReason = "limit may overflow when coercing sle to slt";
+ return None;
+ }
+
+ IRBuilder<> B(&*Preheader->rbegin());
+ RightValue = B.CreateAdd(RightValue, One);
+ }
+
+ } else {
+ bool FoundExpectedPred =
+ (Pred == ICmpInst::ICMP_SGT && LatchBrExitIdx == 1) ||
+ (Pred == ICmpInst::ICMP_SLT && LatchBrExitIdx == 0);
+
+ if (!FoundExpectedPred) {
+ FailureReason = "expected icmp sgt semantically, found something else";
+ return None;
+ }
+
+ if (LatchBrExitIdx == 0) {
+ if (CanBeSMin(SE, RightSCEV)) {
+ // TODO: this restriction is easily removable -- we just have to
+ // remember that the icmp was an sgt and not an sge.
+ FailureReason = "limit may overflow when coercing sge to sgt";
+ return None;
+ }
+
+ IRBuilder<> B(&*Preheader->rbegin());
+ RightValue = B.CreateSub(RightValue, One);
+ }
+ }
+
+ const SCEV *StartNext = IndVarNext->getStart();
+ const SCEV *Addend = SE.getNegativeSCEV(IndVarNext->getStepRecurrence(SE));
+ const SCEV *IndVarStart = SE.getAddExpr(StartNext, Addend);
+
+ BasicBlock *LatchExit = LatchBr->getSuccessor(LatchBrExitIdx);
+
+ assert(SE.getLoopDisposition(LatchCount, &L) ==
+ ScalarEvolution::LoopInvariant &&
+ "loop variant exit count doesn't make sense!");
+
+ assert(!L.contains(LatchExit) && "expected an exit block!");
+ const DataLayout &DL = Preheader->getModule()->getDataLayout();
+ Value *IndVarStartV =
+ SCEVExpander(SE, DL, "irce")
+ .expandCodeFor(IndVarStart, IndVarTy, &*Preheader->rbegin());
+ IndVarStartV->setName("indvar.start");
+
+ LoopStructure Result;
+
+ Result.Tag = "main";
+ Result.Header = Header;
+ Result.Latch = Latch;
+ Result.LatchBr = LatchBr;
+ Result.LatchExit = LatchExit;
+ Result.LatchBrExitIdx = LatchBrExitIdx;
+ Result.IndVarStart = IndVarStartV;
+ Result.IndVarNext = LeftValue;
+ Result.IndVarIncreasing = IsIncreasing;
+ Result.LoopExitAt = RightValue;
+
+ FailureReason = nullptr;
+
+ return Result;
+}
+
+Optional<LoopConstrainer::SubRanges>
+LoopConstrainer::calculateSubRanges() const {
+ IntegerType *Ty = cast<IntegerType>(LatchTakenCount->getType());
+
+ if (Range.getType() != Ty)
+ return None;
+
+ LoopConstrainer::SubRanges Result;
+
+ // I think we can be more aggressive here and make this nuw / nsw if the
+ // addition that feeds into the icmp for the latch's terminating branch is nuw
+ // / nsw. In any case, a wrapping 2's complement addition is safe.
+ ConstantInt *One = ConstantInt::get(Ty, 1);
+ const SCEV *Start = SE.getSCEV(MainLoopStructure.IndVarStart);
+ const SCEV *End = SE.getSCEV(MainLoopStructure.LoopExitAt);
+
+ bool Increasing = MainLoopStructure.IndVarIncreasing;
+
+ // We compute `Smallest` and `Greatest` such that [Smallest, Greatest) is the
+ // range of values the induction variable takes.
+
+ const SCEV *Smallest = nullptr, *Greatest = nullptr;
+
+ if (Increasing) {
+ Smallest = Start;
+ Greatest = End;
+ } else {
+ // These two computations may sign-overflow. Here is why that is okay:
+ //
+ // We know that the induction variable does not sign-overflow on any
+ // iteration except the last one, and it starts at `Start` and ends at
+ // `End`, decrementing by one every time.
+ //
+ // * if `Smallest` sign-overflows we know `End` is `INT_SMAX`. Since the
+ // induction variable is decreasing we know that that the smallest value
+ // the loop body is actually executed with is `INT_SMIN` == `Smallest`.
+ //
+ // * if `Greatest` sign-overflows, we know it can only be `INT_SMIN`. In
+ // that case, `Clamp` will always return `Smallest` and
+ // [`Result.LowLimit`, `Result.HighLimit`) = [`Smallest`, `Smallest`)
+ // will be an empty range. Returning an empty range is always safe.
+ //
+
+ Smallest = SE.getAddExpr(End, SE.getSCEV(One));
+ Greatest = SE.getAddExpr(Start, SE.getSCEV(One));
+ }
+
+ auto Clamp = [this, Smallest, Greatest](const SCEV *S) {
+ return SE.getSMaxExpr(Smallest, SE.getSMinExpr(Greatest, S));
+ };
+
+ // In some cases we can prove that we don't need a pre or post loop
+
+ bool ProvablyNoPreloop =
+ SE.isKnownPredicate(ICmpInst::ICMP_SLE, Range.getBegin(), Smallest);
+ if (!ProvablyNoPreloop)
+ Result.LowLimit = Clamp(Range.getBegin());
+
+ bool ProvablyNoPostLoop =
+ SE.isKnownPredicate(ICmpInst::ICMP_SLE, Greatest, Range.getEnd());
+ if (!ProvablyNoPostLoop)
+ Result.HighLimit = Clamp(Range.getEnd());
+
+ return Result;
+}
+
+void LoopConstrainer::cloneLoop(LoopConstrainer::ClonedLoop &Result,
+ const char *Tag) const {
+ for (BasicBlock *BB : OriginalLoop.getBlocks()) {
+ BasicBlock *Clone = CloneBasicBlock(BB, Result.Map, Twine(".") + Tag, &F);
+ Result.Blocks.push_back(Clone);
+ Result.Map[BB] = Clone;
+ }
+
+ auto GetClonedValue = [&Result](Value *V) {
+ assert(V && "null values not in domain!");
+ auto It = Result.Map.find(V);
+ if (It == Result.Map.end())
+ return V;
+ return static_cast<Value *>(It->second);
+ };
+
+ Result.Structure = MainLoopStructure.map(GetClonedValue);
+ Result.Structure.Tag = Tag;
+
+ for (unsigned i = 0, e = Result.Blocks.size(); i != e; ++i) {
+ BasicBlock *ClonedBB = Result.Blocks[i];
+ BasicBlock *OriginalBB = OriginalLoop.getBlocks()[i];
+
+ assert(Result.Map[OriginalBB] == ClonedBB && "invariant!");
+
+ for (Instruction &I : *ClonedBB)
+ RemapInstruction(&I, Result.Map,
+ RF_NoModuleLevelChanges | RF_IgnoreMissingEntries);
+
+ // Exit blocks will now have one more predecessor and their PHI nodes need
+ // to be edited to reflect that. No phi nodes need to be introduced because
+ // the loop is in LCSSA.
+
+ for (auto SBBI = succ_begin(OriginalBB), SBBE = succ_end(OriginalBB);
+ SBBI != SBBE; ++SBBI) {
+
+ if (OriginalLoop.contains(*SBBI))
+ continue; // not an exit block
+
+ for (Instruction &I : **SBBI) {
+ if (!isa<PHINode>(&I))
+ break;
+
+ PHINode *PN = cast<PHINode>(&I);
+ Value *OldIncoming = PN->getIncomingValueForBlock(OriginalBB);
+ PN->addIncoming(GetClonedValue(OldIncoming), ClonedBB);
+ }
+ }
+ }
+}
+
+LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd(
+ const LoopStructure &LS, BasicBlock *Preheader, Value *ExitSubloopAt,
+ BasicBlock *ContinuationBlock) const {
+
+ // We start with a loop with a single latch:
+ //
+ // +--------------------+
+ // | |
+ // | preheader |
+ // | |
+ // +--------+-----------+
+ // | ----------------\
+ // | / |
+ // +--------v----v------+ |
+ // | | |
+ // | header | |
+ // | | |
+ // +--------------------+ |
+ // |
+ // ..... |
+ // |
+ // +--------------------+ |
+ // | | |
+ // | latch >----------/
+ // | |
+ // +-------v------------+
+ // |
+ // |
+ // | +--------------------+
+ // | | |
+ // +---> original exit |
+ // | |
+ // +--------------------+
+ //
+ // We change the control flow to look like
+ //
+ //
+ // +--------------------+
+ // | |
+ // | preheader >-------------------------+
+ // | | |
+ // +--------v-----------+ |
+ // | /-------------+ |
+ // | / | |
+ // +--------v--v--------+ | |
+ // | | | |
+ // | header | | +--------+ |
+ // | | | | | |
+ // +--------------------+ | | +-----v-----v-----------+
+ // | | | |
+ // | | | .pseudo.exit |
+ // | | | |
+ // | | +-----------v-----------+
+ // | | |
+ // ..... | | |
+ // | | +--------v-------------+
+ // +--------------------+ | | | |
+ // | | | | | ContinuationBlock |
+ // | latch >------+ | | |
+ // | | | +----------------------+
+ // +---------v----------+ |
+ // | |
+ // | |
+ // | +---------------^-----+
+ // | | |
+ // +-----> .exit.selector |
+ // | |
+ // +----------v----------+
+ // |
+ // +--------------------+ |
+ // | | |
+ // | original exit <----+
+ // | |
+ // +--------------------+
+ //
+
+ RewrittenRangeInfo RRI;
+
+ auto BBInsertLocation = std::next(Function::iterator(LS.Latch));
+ RRI.ExitSelector = BasicBlock::Create(Ctx, Twine(LS.Tag) + ".exit.selector",
+ &F, BBInsertLocation);
+ RRI.PseudoExit = BasicBlock::Create(Ctx, Twine(LS.Tag) + ".pseudo.exit", &F,
+ BBInsertLocation);
+
+ BranchInst *PreheaderJump = cast<BranchInst>(&*Preheader->rbegin());
+ bool Increasing = LS.IndVarIncreasing;
+
+ IRBuilder<> B(PreheaderJump);
+
+ // EnterLoopCond - is it okay to start executing this `LS'?
+ Value *EnterLoopCond = Increasing
+ ? B.CreateICmpSLT(LS.IndVarStart, ExitSubloopAt)
+ : B.CreateICmpSGT(LS.IndVarStart, ExitSubloopAt);
+
+ B.CreateCondBr(EnterLoopCond, LS.Header, RRI.PseudoExit);
+ PreheaderJump->eraseFromParent();
+
+ LS.LatchBr->setSuccessor(LS.LatchBrExitIdx, RRI.ExitSelector);
+ B.SetInsertPoint(LS.LatchBr);
+ Value *TakeBackedgeLoopCond =
+ Increasing ? B.CreateICmpSLT(LS.IndVarNext, ExitSubloopAt)
+ : B.CreateICmpSGT(LS.IndVarNext, ExitSubloopAt);
+ Value *CondForBranch = LS.LatchBrExitIdx == 1
+ ? TakeBackedgeLoopCond
+ : B.CreateNot(TakeBackedgeLoopCond);
+
+ LS.LatchBr->setCondition(CondForBranch);
+
+ B.SetInsertPoint(RRI.ExitSelector);
+
+ // IterationsLeft - are there any more iterations left, given the original
+ // upper bound on the induction variable? If not, we branch to the "real"
+ // exit.
+ Value *IterationsLeft = Increasing
+ ? B.CreateICmpSLT(LS.IndVarNext, LS.LoopExitAt)
+ : B.CreateICmpSGT(LS.IndVarNext, LS.LoopExitAt);
+ B.CreateCondBr(IterationsLeft, RRI.PseudoExit, LS.LatchExit);
+
+ BranchInst *BranchToContinuation =
+ BranchInst::Create(ContinuationBlock, RRI.PseudoExit);
+
+ // We emit PHI nodes into `RRI.PseudoExit' that compute the "latest" value of
+ // each of the PHI nodes in the loop header. This feeds into the initial
+ // value of the same PHI nodes if/when we continue execution.
+ for (Instruction &I : *LS.Header) {
+ if (!isa<PHINode>(&I))
+ break;
+
+ PHINode *PN = cast<PHINode>(&I);
+
+ PHINode *NewPHI = PHINode::Create(PN->getType(), 2, PN->getName() + ".copy",
+ BranchToContinuation);
+
+ NewPHI->addIncoming(PN->getIncomingValueForBlock(Preheader), Preheader);
+ NewPHI->addIncoming(PN->getIncomingValueForBlock(LS.Latch),
+ RRI.ExitSelector);
+ RRI.PHIValuesAtPseudoExit.push_back(NewPHI);
+ }
+
+ RRI.IndVarEnd = PHINode::Create(LS.IndVarNext->getType(), 2, "indvar.end",
+ BranchToContinuation);
+ RRI.IndVarEnd->addIncoming(LS.IndVarStart, Preheader);
+ RRI.IndVarEnd->addIncoming(LS.IndVarNext, RRI.ExitSelector);
+
+ // The latch exit now has a branch from `RRI.ExitSelector' instead of
+ // `LS.Latch'. The PHI nodes need to be updated to reflect that.
+ for (Instruction &I : *LS.LatchExit) {
+ if (PHINode *PN = dyn_cast<PHINode>(&I))
+ replacePHIBlock(PN, LS.Latch, RRI.ExitSelector);
+ else
+ break;
+ }
+
+ return RRI;
+}
+
+void LoopConstrainer::rewriteIncomingValuesForPHIs(
+ LoopStructure &LS, BasicBlock *ContinuationBlock,
+ const LoopConstrainer::RewrittenRangeInfo &RRI) const {
+
+ unsigned PHIIndex = 0;
+ for (Instruction &I : *LS.Header) {
+ if (!isa<PHINode>(&I))
+ break;
+
+ PHINode *PN = cast<PHINode>(&I);
+
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i)
+ if (PN->getIncomingBlock(i) == ContinuationBlock)
+ PN->setIncomingValue(i, RRI.PHIValuesAtPseudoExit[PHIIndex++]);
+ }
+
+ LS.IndVarStart = RRI.IndVarEnd;
+}
+
+BasicBlock *LoopConstrainer::createPreheader(const LoopStructure &LS,
+ BasicBlock *OldPreheader,
+ const char *Tag) const {
+
+ BasicBlock *Preheader = BasicBlock::Create(Ctx, Tag, &F, LS.Header);
+ BranchInst::Create(LS.Header, Preheader);
+
+ for (Instruction &I : *LS.Header) {
+ if (!isa<PHINode>(&I))
+ break;
+
+ PHINode *PN = cast<PHINode>(&I);
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i)
+ replacePHIBlock(PN, OldPreheader, Preheader);
+ }
+
+ return Preheader;
+}
+
+void LoopConstrainer::addToParentLoopIfNeeded(ArrayRef<BasicBlock *> BBs) {
+ Loop *ParentLoop = OriginalLoop.getParentLoop();
+ if (!ParentLoop)
+ return;
+
+ for (BasicBlock *BB : BBs)
+ ParentLoop->addBasicBlockToLoop(BB, OriginalLoopInfo);
+}
+
+bool LoopConstrainer::run() {
+ BasicBlock *Preheader = nullptr;
+ LatchTakenCount = SE.getExitCount(&OriginalLoop, MainLoopStructure.Latch);
+ Preheader = OriginalLoop.getLoopPreheader();
+ assert(!isa<SCEVCouldNotCompute>(LatchTakenCount) && Preheader != nullptr &&
+ "preconditions!");
+
+ OriginalPreheader = Preheader;
+ MainLoopPreheader = Preheader;
+
+ Optional<SubRanges> MaybeSR = calculateSubRanges();
+ if (!MaybeSR.hasValue()) {
+ DEBUG(dbgs() << "irce: could not compute subranges\n");
+ return false;
+ }
+
+ SubRanges SR = MaybeSR.getValue();
+ bool Increasing = MainLoopStructure.IndVarIncreasing;
+ IntegerType *IVTy =
+ cast<IntegerType>(MainLoopStructure.IndVarNext->getType());
+
+ SCEVExpander Expander(SE, F.getParent()->getDataLayout(), "irce");
+ Instruction *InsertPt = OriginalPreheader->getTerminator();
+
+ // It would have been better to make `PreLoop' and `PostLoop'
+ // `Optional<ClonedLoop>'s, but `ValueToValueMapTy' does not have a copy
+ // constructor.
+ ClonedLoop PreLoop, PostLoop;
+ bool NeedsPreLoop =
+ Increasing ? SR.LowLimit.hasValue() : SR.HighLimit.hasValue();
+ bool NeedsPostLoop =
+ Increasing ? SR.HighLimit.hasValue() : SR.LowLimit.hasValue();
+
+ Value *ExitPreLoopAt = nullptr;
+ Value *ExitMainLoopAt = nullptr;
+ const SCEVConstant *MinusOneS =
+ cast<SCEVConstant>(SE.getConstant(IVTy, -1, true /* isSigned */));
+
+ if (NeedsPreLoop) {
+ const SCEV *ExitPreLoopAtSCEV = nullptr;
+
+ if (Increasing)
+ ExitPreLoopAtSCEV = *SR.LowLimit;
+ else {
+ if (CanBeSMin(SE, *SR.HighLimit)) {
+ DEBUG(dbgs() << "irce: could not prove no-overflow when computing "
+ << "preloop exit limit. HighLimit = " << *(*SR.HighLimit)
+ << "\n");
+ return false;
+ }
+ ExitPreLoopAtSCEV = SE.getAddExpr(*SR.HighLimit, MinusOneS);
+ }
+
+ ExitPreLoopAt = Expander.expandCodeFor(ExitPreLoopAtSCEV, IVTy, InsertPt);
+ ExitPreLoopAt->setName("exit.preloop.at");
+ }
+
+ if (NeedsPostLoop) {
+ const SCEV *ExitMainLoopAtSCEV = nullptr;
+
+ if (Increasing)
+ ExitMainLoopAtSCEV = *SR.HighLimit;
+ else {
+ if (CanBeSMin(SE, *SR.LowLimit)) {
+ DEBUG(dbgs() << "irce: could not prove no-overflow when computing "
+ << "mainloop exit limit. LowLimit = " << *(*SR.LowLimit)
+ << "\n");
+ return false;
+ }
+ ExitMainLoopAtSCEV = SE.getAddExpr(*SR.LowLimit, MinusOneS);
+ }
+
+ ExitMainLoopAt = Expander.expandCodeFor(ExitMainLoopAtSCEV, IVTy, InsertPt);
+ ExitMainLoopAt->setName("exit.mainloop.at");
+ }
+
+ // We clone these ahead of time so that we don't have to deal with changing
+ // and temporarily invalid IR as we transform the loops.
+ if (NeedsPreLoop)
+ cloneLoop(PreLoop, "preloop");
+ if (NeedsPostLoop)
+ cloneLoop(PostLoop, "postloop");
+
+ RewrittenRangeInfo PreLoopRRI;
+
+ if (NeedsPreLoop) {
+ Preheader->getTerminator()->replaceUsesOfWith(MainLoopStructure.Header,
+ PreLoop.Structure.Header);
+
+ MainLoopPreheader =
+ createPreheader(MainLoopStructure, Preheader, "mainloop");
+ PreLoopRRI = changeIterationSpaceEnd(PreLoop.Structure, Preheader,
+ ExitPreLoopAt, MainLoopPreheader);
+ rewriteIncomingValuesForPHIs(MainLoopStructure, MainLoopPreheader,
+ PreLoopRRI);
+ }
+
+ BasicBlock *PostLoopPreheader = nullptr;
+ RewrittenRangeInfo PostLoopRRI;
+
+ if (NeedsPostLoop) {
+ PostLoopPreheader =
+ createPreheader(PostLoop.Structure, Preheader, "postloop");
+ PostLoopRRI = changeIterationSpaceEnd(MainLoopStructure, MainLoopPreheader,
+ ExitMainLoopAt, PostLoopPreheader);
+ rewriteIncomingValuesForPHIs(PostLoop.Structure, PostLoopPreheader,
+ PostLoopRRI);
+ }
+
+ BasicBlock *NewMainLoopPreheader =
+ MainLoopPreheader != Preheader ? MainLoopPreheader : nullptr;
+ BasicBlock *NewBlocks[] = {PostLoopPreheader, PreLoopRRI.PseudoExit,
+ PreLoopRRI.ExitSelector, PostLoopRRI.PseudoExit,
+ PostLoopRRI.ExitSelector, NewMainLoopPreheader};
+
+ // Some of the above may be nullptr, filter them out before passing to
+ // addToParentLoopIfNeeded.
+ auto NewBlocksEnd =
+ std::remove(std::begin(NewBlocks), std::end(NewBlocks), nullptr);
+
+ addToParentLoopIfNeeded(makeArrayRef(std::begin(NewBlocks), NewBlocksEnd));
+ addToParentLoopIfNeeded(PreLoop.Blocks);
+ addToParentLoopIfNeeded(PostLoop.Blocks);
+
+ return true;
+}
+
+/// Computes and returns a range of values for the induction variable (IndVar)
+/// in which the range check can be safely elided. If it cannot compute such a
+/// range, returns None.
+Optional<InductiveRangeCheck::Range>
+InductiveRangeCheck::computeSafeIterationSpace(ScalarEvolution &SE,
+ const SCEVAddRecExpr *IndVar,
+ IRBuilder<> &) const {
+ // IndVar is of the form "A + B * I" (where "I" is the canonical induction
+ // variable, that may or may not exist as a real llvm::Value in the loop) and
+ // this inductive range check is a range check on the "C + D * I" ("C" is
+ // getOffset() and "D" is getScale()). We rewrite the value being range
+ // checked to "M + N * IndVar" where "N" = "D * B^(-1)" and "M" = "C - NA".
+ // Currently we support this only for "B" = "D" = { 1 or -1 }, but the code
+ // can be generalized as needed.
+ //
+ // The actual inequalities we solve are of the form
+ //
+ // 0 <= M + 1 * IndVar < L given L >= 0 (i.e. N == 1)
+ //
+ // The inequality is satisfied by -M <= IndVar < (L - M) [^1]. All additions
+ // and subtractions are twos-complement wrapping and comparisons are signed.
+ //
+ // Proof:
+ //
+ // If there exists IndVar such that -M <= IndVar < (L - M) then it follows
+ // that -M <= (-M + L) [== Eq. 1]. Since L >= 0, if (-M + L) sign-overflows
+ // then (-M + L) < (-M). Hence by [Eq. 1], (-M + L) could not have
+ // overflown.
+ //
+ // This means IndVar = t + (-M) for t in [0, L). Hence (IndVar + M) = t.
+ // Hence 0 <= (IndVar + M) < L
+
+ // [^1]: Note that the solution does _not_ apply if L < 0; consider values M =
+ // 127, IndVar = 126 and L = -2 in an i8 world.
+
+ if (!IndVar->isAffine())
+ return None;
+
+ const SCEV *A = IndVar->getStart();
+ const SCEVConstant *B = dyn_cast<SCEVConstant>(IndVar->getStepRecurrence(SE));
+ if (!B)
+ return None;
+
+ const SCEV *C = getOffset();
+ const SCEVConstant *D = dyn_cast<SCEVConstant>(getScale());
+ if (D != B)
+ return None;
+
+ ConstantInt *ConstD = D->getValue();
+ if (!(ConstD->isMinusOne() || ConstD->isOne()))
+ return None;
+
+ const SCEV *M = SE.getMinusSCEV(C, A);
+
+ const SCEV *Begin = SE.getNegativeSCEV(M);
+ const SCEV *UpperLimit = nullptr;
+
+ // We strengthen "0 <= I" to "0 <= I < INT_SMAX" and "I < L" to "0 <= I < L".
+ // We can potentially do much better here.
+ if (Value *V = getLength()) {
+ UpperLimit = SE.getSCEV(V);
+ } else {
+ assert(Kind == InductiveRangeCheck::RANGE_CHECK_LOWER && "invariant!");
+ unsigned BitWidth = cast<IntegerType>(IndVar->getType())->getBitWidth();
+ UpperLimit = SE.getConstant(APInt::getSignedMaxValue(BitWidth));
+ }
+
+ const SCEV *End = SE.getMinusSCEV(UpperLimit, M);
+ return InductiveRangeCheck::Range(Begin, End);
+}
+
+static Optional<InductiveRangeCheck::Range>
+IntersectRange(ScalarEvolution &SE,
+ const Optional<InductiveRangeCheck::Range> &R1,
+ const InductiveRangeCheck::Range &R2, IRBuilder<> &B) {
+ if (!R1.hasValue())
+ return R2;
+ auto &R1Value = R1.getValue();
+
+ // TODO: we could widen the smaller range and have this work; but for now we
+ // bail out to keep things simple.
+ if (R1Value.getType() != R2.getType())
+ return None;
+
+ const SCEV *NewBegin = SE.getSMaxExpr(R1Value.getBegin(), R2.getBegin());
+ const SCEV *NewEnd = SE.getSMinExpr(R1Value.getEnd(), R2.getEnd());
+
+ return InductiveRangeCheck::Range(NewBegin, NewEnd);
+}
+
+bool InductiveRangeCheckElimination::runOnLoop(Loop *L, LPPassManager &LPM) {
+ if (L->getBlocks().size() >= LoopSizeCutoff) {
+ DEBUG(dbgs() << "irce: giving up constraining loop, too large\n";);
+ return false;
+ }
+
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader) {
+ DEBUG(dbgs() << "irce: loop has no preheader, leaving\n");
+ return false;
+ }
+
+ LLVMContext &Context = Preheader->getContext();
+ InductiveRangeCheck::AllocatorTy IRCAlloc;
+ SmallVector<InductiveRangeCheck *, 16> RangeChecks;
+ ScalarEvolution &SE = getAnalysis<ScalarEvolution>();
+ BranchProbabilityInfo &BPI = getAnalysis<BranchProbabilityInfo>();
+
+ for (auto BBI : L->getBlocks())
+ if (BranchInst *TBI = dyn_cast<BranchInst>(BBI->getTerminator()))
+ if (InductiveRangeCheck *IRC =
+ InductiveRangeCheck::create(IRCAlloc, TBI, L, SE, BPI))
+ RangeChecks.push_back(IRC);
+
+ if (RangeChecks.empty())
+ return false;
+
+ auto PrintRecognizedRangeChecks = [&](raw_ostream &OS) {
+ OS << "irce: looking at loop "; L->print(OS);
+ OS << "irce: loop has " << RangeChecks.size()
+ << " inductive range checks: \n";
+ for (InductiveRangeCheck *IRC : RangeChecks)
+ IRC->print(OS);
+ };
+
+ DEBUG(PrintRecognizedRangeChecks(dbgs()));
+
+ if (PrintRangeChecks)
+ PrintRecognizedRangeChecks(errs());
+
+ const char *FailureReason = nullptr;
+ Optional<LoopStructure> MaybeLoopStructure =
+ LoopStructure::parseLoopStructure(SE, BPI, *L, FailureReason);
+ if (!MaybeLoopStructure.hasValue()) {
+ DEBUG(dbgs() << "irce: could not parse loop structure: " << FailureReason
+ << "\n";);
+ return false;
+ }
+ LoopStructure LS = MaybeLoopStructure.getValue();
+ bool Increasing = LS.IndVarIncreasing;
+ const SCEV *MinusOne =
+ SE.getConstant(LS.IndVarNext->getType(), Increasing ? -1 : 1, true);
+ const SCEVAddRecExpr *IndVar =
+ cast<SCEVAddRecExpr>(SE.getAddExpr(SE.getSCEV(LS.IndVarNext), MinusOne));
+
+ Optional<InductiveRangeCheck::Range> SafeIterRange;
+ Instruction *ExprInsertPt = Preheader->getTerminator();
+
+ SmallVector<InductiveRangeCheck *, 4> RangeChecksToEliminate;
+
+ IRBuilder<> B(ExprInsertPt);
+ for (InductiveRangeCheck *IRC : RangeChecks) {
+ auto Result = IRC->computeSafeIterationSpace(SE, IndVar, B);
+ if (Result.hasValue()) {
+ auto MaybeSafeIterRange =
+ IntersectRange(SE, SafeIterRange, Result.getValue(), B);
+ if (MaybeSafeIterRange.hasValue()) {
+ RangeChecksToEliminate.push_back(IRC);
+ SafeIterRange = MaybeSafeIterRange.getValue();
+ }
+ }
+ }
+
+ if (!SafeIterRange.hasValue())
+ return false;
+
+ LoopConstrainer LC(*L, getAnalysis<LoopInfoWrapperPass>().getLoopInfo(), LS,
+ SE, SafeIterRange.getValue());
+ bool Changed = LC.run();
+
+ if (Changed) {
+ auto PrintConstrainedLoopInfo = [L]() {
+ dbgs() << "irce: in function ";
+ dbgs() << L->getHeader()->getParent()->getName() << ": ";
+ dbgs() << "constrained ";
+ L->print(dbgs());
+ };
+
+ DEBUG(PrintConstrainedLoopInfo());
+
+ if (PrintChangedLoops)
+ PrintConstrainedLoopInfo();
+
+ // Optimize away the now-redundant range checks.
+
+ for (InductiveRangeCheck *IRC : RangeChecksToEliminate) {
+ ConstantInt *FoldedRangeCheck = IRC->getPassingDirection()
+ ? ConstantInt::getTrue(Context)
+ : ConstantInt::getFalse(Context);
+ IRC->getBranch()->setCondition(FoldedRangeCheck);
+ }
+ }
+
+ return Changed;
+}
+
+Pass *llvm::createInductiveRangeCheckEliminationPass() {
+ return new InductiveRangeCheckElimination;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index 78beb3f..711df41 100644
--- a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -23,6 +23,7 @@
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LazyValueInfo.h"
#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
@@ -32,7 +33,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
@@ -78,7 +78,6 @@ namespace {
/// revectored to the false side of the second if.
///
class JumpThreading : public FunctionPass {
- const DataLayout *DL;
TargetLibraryInfo *TLI;
LazyValueInfo *LVI;
#ifdef NDEBUG
@@ -115,7 +114,7 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<LazyValueInfo>();
AU.addPreserved<LazyValueInfo>();
- AU.addRequired<TargetLibraryInfo>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
}
void FindLoopHeaders(Function &F);
@@ -145,7 +144,7 @@ char JumpThreading::ID = 0;
INITIALIZE_PASS_BEGIN(JumpThreading, "jump-threading",
"Jump Threading", false, false)
INITIALIZE_PASS_DEPENDENCY(LazyValueInfo)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(JumpThreading, "jump-threading",
"Jump Threading", false, false)
@@ -159,9 +158,7 @@ bool JumpThreading::runOnFunction(Function &F) {
return false;
DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
- TLI = &getAnalysis<TargetLibraryInfo>();
+ TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
LVI = &getAnalysis<LazyValueInfo>();
// Remove unreachable blocks from function as they may result in infinite
@@ -505,6 +502,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
assert(Preference == WantInteger && "Compares only produce integers");
PHINode *PN = dyn_cast<PHINode>(Cmp->getOperand(0));
if (PN && PN->getParent() == BB) {
+ const DataLayout &DL = PN->getModule()->getDataLayout();
// We can do this simplification if any comparisons fold to true or false.
// See if any do.
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
@@ -709,7 +707,8 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
// Run constant folding to see if we can reduce the condition to a simple
// constant.
if (Instruction *I = dyn_cast<Instruction>(Condition)) {
- Value *SimpleVal = ConstantFoldInstruction(I, DL, TLI);
+ Value *SimpleVal =
+ ConstantFoldInstruction(I, BB->getModule()->getDataLayout(), TLI);
if (SimpleVal) {
I->replaceAllUsesWith(SimpleVal);
I->eraseFromParent();
@@ -792,6 +791,17 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
CondBr->getSuccessor(ToRemove)->removePredecessor(BB, true);
BranchInst::Create(CondBr->getSuccessor(ToKeep), CondBr);
CondBr->eraseFromParent();
+ if (CondCmp->use_empty())
+ CondCmp->eraseFromParent();
+ else if (CondCmp->getParent() == BB) {
+ // If the fact we just learned is true for all uses of the
+ // condition, replace it with a constant value
+ auto *CI = Baseline == LazyValueInfo::True ?
+ ConstantInt::getTrue(CondCmp->getType()) :
+ ConstantInt::getFalse(CondCmp->getType());
+ CondCmp->replaceAllUsesWith(CI);
+ CondCmp->eraseFromParent();
+ }
return true;
}
}
@@ -993,7 +1003,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
// Split them out to their own block.
UnavailablePred =
- SplitBlockPredecessors(LoadBB, PredsToSplit, "thread-pre-split", this);
+ SplitBlockPredecessors(LoadBB, PredsToSplit, "thread-pre-split");
}
// If the value isn't available in all predecessors, then there will be
@@ -1418,7 +1428,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
else {
DEBUG(dbgs() << " Factoring out " << PredBBs.size()
<< " common predecessors.\n");
- PredBB = SplitBlockPredecessors(BB, PredBBs, ".thr_comm", this);
+ PredBB = SplitBlockPredecessors(BB, PredBBs, ".thr_comm");
}
// And finally, do it!
@@ -1521,7 +1531,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
// At this point, the IR is fully up to date and consistent. Do a quick scan
// over the new instructions and zap any that are constants or dead. This
// frequently happens because of phi translation.
- SimplifyInstructionsInBlock(NewBB, DL, TLI);
+ SimplifyInstructionsInBlock(NewBB, TLI);
// Threaded an edge!
++NumThreads;
@@ -1561,7 +1571,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
else {
DEBUG(dbgs() << " Factoring out " << PredBBs.size()
<< " common predecessors.\n");
- PredBB = SplitBlockPredecessors(BB, PredBBs, ".thr_comm", this);
+ PredBB = SplitBlockPredecessors(BB, PredBBs, ".thr_comm");
}
// Okay, we decided to do this! Clone all the instructions in BB onto the end
@@ -1575,7 +1585,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
BranchInst *OldPredBranch = dyn_cast<BranchInst>(PredBB->getTerminator());
if (!OldPredBranch || !OldPredBranch->isUnconditional()) {
- PredBB = SplitEdge(PredBB, BB, this);
+ PredBB = SplitEdge(PredBB, BB);
OldPredBranch = cast<BranchInst>(PredBB->getTerminator());
}
@@ -1586,7 +1596,6 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
BasicBlock::iterator BI = BB->begin();
for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
-
// Clone the non-phi instructions of BB into PredBB, keeping track of the
// mapping and using it to remap operands in the cloned instructions.
for (; BI != BB->end(); ++BI) {
@@ -1603,7 +1612,8 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
// If this instruction can be simplified after the operands are updated,
// just use the simplified value instead. This frequently happens due to
// phi translation.
- if (Value *IV = SimplifyInstruction(New, DL)) {
+ if (Value *IV =
+ SimplifyInstruction(New, BB->getModule()->getDataLayout())) {
delete New;
ValueMapping[BI] = IV;
} else {
diff --git a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
index e145981..f0e6d64 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -38,6 +38,7 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
@@ -52,7 +53,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
@@ -71,6 +71,33 @@ static cl::opt<bool>
DisablePromotion("disable-licm-promotion", cl::Hidden,
cl::desc("Disable memory promotion in LICM pass"));
+static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
+static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop);
+static bool hoist(Instruction &I, BasicBlock *Preheader);
+static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT,
+ const Loop *CurLoop, AliasSetTracker *CurAST );
+static bool isGuaranteedToExecute(const Instruction &Inst,
+ const DominatorTree *DT,
+ const Loop *CurLoop,
+ const LICMSafetyInfo *SafetyInfo);
+static bool isSafeToExecuteUnconditionally(const Instruction &Inst,
+ const DominatorTree *DT,
+ const TargetLibraryInfo *TLI,
+ const Loop *CurLoop,
+ const LICMSafetyInfo *SafetyInfo,
+ const Instruction *CtxI = nullptr);
+static bool pointerInvalidatedByLoop(Value *V, uint64_t Size,
+ const AAMDNodes &AAInfo,
+ AliasSetTracker *CurAST);
+static Instruction *CloneInstructionInExitBlock(const Instruction &I,
+ BasicBlock &ExitBlock,
+ PHINode &PN,
+ const LoopInfo *LI);
+static bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA,
+ DominatorTree *DT, TargetLibraryInfo *TLI,
+ Loop *CurLoop, AliasSetTracker *CurAST,
+ LICMSafetyInfo *SafetyInfo);
+
namespace {
struct LICM : public LoopPass {
static char ID; // Pass identification, replacement for typeid
@@ -86,7 +113,7 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<LoopInfo>();
+ AU.addRequired<LoopInfoWrapperPass>();
AU.addRequiredID(LoopSimplifyID);
AU.addPreservedID(LoopSimplifyID);
AU.addRequiredID(LCSSAID);
@@ -94,7 +121,7 @@ namespace {
AU.addRequired<AliasAnalysis>();
AU.addPreserved<AliasAnalysis>();
AU.addPreserved<ScalarEvolution>();
- AU.addRequired<TargetLibraryInfo>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
}
using llvm::Pass::doFinalization;
@@ -109,7 +136,6 @@ namespace {
LoopInfo *LI; // Current LoopInfo
DominatorTree *DT; // Dominator Tree for the current Loop.
- const DataLayout *DL; // DataLayout for constant folding.
TargetLibraryInfo *TLI; // TargetLibraryInfo for constant folding.
// State that is updated as we process loops.
@@ -117,10 +143,6 @@ namespace {
BasicBlock *Preheader; // The preheader block of the current loop...
Loop *CurLoop; // The current loop we are working on...
AliasSetTracker *CurAST; // AliasSet information for the current loop...
- bool MayThrow; // The current loop contains an instruction which
- // may throw, thus preventing code motion of
- // instructions with side effects.
- bool HeaderMayThrow; // Same as previous, but specific to loop header
DenseMap<Loop*, AliasSetTracker*> LoopToAliasSetMap;
/// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info.
@@ -133,88 +155,17 @@ namespace {
/// Simple Analysis hook. Delete loop L from alias set map.
void deleteAnalysisLoop(Loop *L) override;
-
- /// SinkRegion - Walk the specified region of the CFG (defined by all blocks
- /// dominated by the specified block, and that are in the current loop) in
- /// reverse depth first order w.r.t the DominatorTree. This allows us to
- /// visit uses before definitions, allowing us to sink a loop body in one
- /// pass without iteration.
- ///
- void SinkRegion(DomTreeNode *N);
-
- /// HoistRegion - Walk the specified region of the CFG (defined by all
- /// blocks dominated by the specified block, and that are in the current
- /// loop) in depth first order w.r.t the DominatorTree. This allows us to
- /// visit definitions before uses, allowing us to hoist a loop body in one
- /// pass without iteration.
- ///
- void HoistRegion(DomTreeNode *N);
-
- /// inSubLoop - Little predicate that returns true if the specified basic
- /// block is in a subloop of the current one, not the current one itself.
- ///
- bool inSubLoop(BasicBlock *BB) {
- assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop");
- return LI->getLoopFor(BB) != CurLoop;
- }
-
- /// sink - When an instruction is found to only be used outside of the loop,
- /// this function moves it to the exit blocks and patches up SSA form as
- /// needed.
- ///
- void sink(Instruction &I);
-
- /// hoist - When an instruction is found to only use loop invariant operands
- /// that is safe to hoist, this instruction is called to do the dirty work.
- ///
- void hoist(Instruction &I);
-
- /// isSafeToExecuteUnconditionally - Only sink or hoist an instruction if it
- /// is not a trapping instruction or if it is a trapping instruction and is
- /// guaranteed to execute.
- ///
- bool isSafeToExecuteUnconditionally(Instruction &I);
-
- /// isGuaranteedToExecute - Check that the instruction is guaranteed to
- /// execute.
- ///
- bool isGuaranteedToExecute(Instruction &I);
-
- /// pointerInvalidatedByLoop - Return true if the body of this loop may
- /// store into the memory location pointed to by V.
- ///
- bool pointerInvalidatedByLoop(Value *V, uint64_t Size,
- const AAMDNodes &AAInfo) {
- // Check to see if any of the basic blocks in CurLoop invalidate *V.
- return CurAST->getAliasSetForPointer(V, Size, AAInfo).isMod();
- }
-
- bool canSinkOrHoistInst(Instruction &I);
- bool isNotUsedInLoop(Instruction &I);
-
- void PromoteAliasSet(AliasSet &AS,
- SmallVectorImpl<BasicBlock*> &ExitBlocks,
- SmallVectorImpl<Instruction*> &InsertPts,
- PredIteratorCache &PIC);
-
- /// \brief Create a copy of the instruction in the exit block and patch up
- /// SSA.
- /// PN is a user of I in ExitBlock that can be used to get the number and
- /// list of predecessors fast.
- Instruction *CloneInstructionInExitBlock(Instruction &I,
- BasicBlock &ExitBlock,
- PHINode &PN);
};
}
char LICM::ID = 0;
INITIALIZE_PASS_BEGIN(LICM, "licm", "Loop Invariant Code Motion", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_END(LICM, "licm", "Loop Invariant Code Motion", false, false)
@@ -231,13 +182,11 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
Changed = false;
// Get our Loop and Alias Analysis information...
- LI = &getAnalysis<LoopInfo>();
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
AA = &getAnalysis<AliasAnalysis>();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
- TLI = &getAnalysis<TargetLibraryInfo>();
+ TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
@@ -274,19 +223,9 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
CurAST->add(*BB); // Incorporate the specified basic block
}
- HeaderMayThrow = false;
- BasicBlock *Header = L->getHeader();
- for (BasicBlock::iterator I = Header->begin(), E = Header->end();
- (I != E) && !HeaderMayThrow; ++I)
- HeaderMayThrow |= I->mayThrow();
- MayThrow = HeaderMayThrow;
- // TODO: We've already searched for instructions which may throw in subloops.
- // We may want to reuse this information.
- for (Loop::block_iterator BB = L->block_begin(), BBE = L->block_end();
- (BB != BBE) && !MayThrow ; ++BB)
- for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end();
- (I != E) && !MayThrow; ++I)
- MayThrow |= I->mayThrow();
+ // Compute loop safety information.
+ LICMSafetyInfo SafetyInfo;
+ computeLICMSafetyInfo(&SafetyInfo, CurLoop);
// We want to visit all of the instructions in this loop... that are not parts
// of our subloops (they have already had their invariants hoisted out of
@@ -299,9 +238,11 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
// instructions, we perform another pass to hoist them out of the loop.
//
if (L->hasDedicatedExits())
- SinkRegion(DT->getNode(L->getHeader()));
+ Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, CurLoop,
+ CurAST, &SafetyInfo);
if (Preheader)
- HoistRegion(DT->getNode(L->getHeader()));
+ Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI,
+ CurLoop, CurAST, &SafetyInfo);
// Now that all loop invariants have been removed from the loop, promote any
// memory references to scalars that we can.
@@ -313,7 +254,9 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
// Loop over all of the alias sets in the tracker object.
for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end();
I != E; ++I)
- PromoteAliasSet(*I, ExitBlocks, InsertPts, PIC);
+ Changed |= promoteLoopAccessesToScalars(*I, ExitBlocks, InsertPts,
+ PIC, LI, DT, CurLoop,
+ CurAST, &SafetyInfo);
// Once we have promoted values across the loop body we have to recursively
// reform LCSSA as any nested loop may now have values defined within the
@@ -346,27 +289,35 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
return Changed;
}
-/// SinkRegion - Walk the specified region of the CFG (defined by all blocks
-/// dominated by the specified block, and that are in the current loop) in
-/// reverse depth first order w.r.t the DominatorTree. This allows us to visit
-/// uses before definitions, allowing us to sink a loop body in one pass without
-/// iteration.
+/// Walk the specified region of the CFG (defined by all blocks dominated by
+/// the specified block, and that are in the current loop) in reverse depth
+/// first order w.r.t the DominatorTree. This allows us to visit uses before
+/// definitions, allowing us to sink a loop body in one pass without iteration.
///
-void LICM::SinkRegion(DomTreeNode *N) {
- assert(N != nullptr && "Null dominator tree node?");
+bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
+ DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop,
+ AliasSetTracker *CurAST, LICMSafetyInfo *SafetyInfo) {
+
+ // Verify inputs.
+ assert(N != nullptr && AA != nullptr && LI != nullptr &&
+ DT != nullptr && CurLoop != nullptr && CurAST != nullptr &&
+ SafetyInfo != nullptr && "Unexpected input to sinkRegion");
+
+ // Set changed as false.
+ bool Changed = false;
+ // Get basic block
BasicBlock *BB = N->getBlock();
-
// If this subregion is not in the top level loop at all, exit.
- if (!CurLoop->contains(BB)) return;
+ if (!CurLoop->contains(BB)) return Changed;
// We are processing blocks in reverse dfo, so process children first.
const std::vector<DomTreeNode*> &Children = N->getChildren();
for (unsigned i = 0, e = Children.size(); i != e; ++i)
- SinkRegion(Children[i]);
-
+ Changed |=
+ sinkRegion(Children[i], AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo);
// Only need to process the contents of this block if it is not part of a
// subloop (which would already have been processed).
- if (inSubLoop(BB)) return;
+ if (inSubLoop(BB,CurLoop,LI)) return Changed;
for (BasicBlock::iterator II = BB->end(); II != BB->begin(); ) {
Instruction &I = *--II;
@@ -387,35 +338,43 @@ void LICM::SinkRegion(DomTreeNode *N) {
// outside of the loop. In this case, it doesn't even matter if the
// operands of the instruction are loop invariant.
//
- if (isNotUsedInLoop(I) && canSinkOrHoistInst(I)) {
+ if (isNotUsedInLoop(I, CurLoop) &&
+ canSinkOrHoistInst(I, AA, DT, TLI, CurLoop, CurAST, SafetyInfo)) {
++II;
- sink(I);
+ Changed |= sink(I, LI, DT, CurLoop, CurAST);
}
}
+ return Changed;
}
-/// HoistRegion - Walk the specified region of the CFG (defined by all blocks
-/// dominated by the specified block, and that are in the current loop) in depth
-/// first order w.r.t the DominatorTree. This allows us to visit definitions
-/// before uses, allowing us to hoist a loop body in one pass without iteration.
+/// Walk the specified region of the CFG (defined by all blocks dominated by
+/// the specified block, and that are in the current loop) in depth first
+/// order w.r.t the DominatorTree. This allows us to visit definitions before
+/// uses, allowing us to hoist a loop body in one pass without iteration.
///
-void LICM::HoistRegion(DomTreeNode *N) {
- assert(N != nullptr && "Null dominator tree node?");
+bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
+ DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop,
+ AliasSetTracker *CurAST, LICMSafetyInfo *SafetyInfo) {
+ // Verify inputs.
+ assert(N != nullptr && AA != nullptr && LI != nullptr &&
+ DT != nullptr && CurLoop != nullptr && CurAST != nullptr &&
+ SafetyInfo != nullptr && "Unexpected input to hoistRegion");
+ // Set changed as false.
+ bool Changed = false;
+ // Get basic block
BasicBlock *BB = N->getBlock();
-
// If this subregion is not in the top level loop at all, exit.
- if (!CurLoop->contains(BB)) return;
-
+ if (!CurLoop->contains(BB)) return Changed;
// Only need to process the contents of this block if it is not part of a
// subloop (which would already have been processed).
- if (!inSubLoop(BB))
+ if (!inSubLoop(BB, CurLoop, LI))
for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ) {
Instruction &I = *II++;
-
// Try constant folding this instruction. If all the operands are
// constants, it is technically hoistable, but it would be better to just
// fold it.
- if (Constant *C = ConstantFoldInstruction(&I, DL, TLI)) {
+ if (Constant *C = ConstantFoldInstruction(
+ &I, I.getModule()->getDataLayout(), TLI)) {
DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C << '\n');
CurAST->copyValue(&I, C);
CurAST->deleteValue(&I);
@@ -428,20 +387,49 @@ void LICM::HoistRegion(DomTreeNode *N) {
// if all of the operands of the instruction are loop invariant and if it
// is safe to hoist the instruction.
//
- if (CurLoop->hasLoopInvariantOperands(&I) && canSinkOrHoistInst(I) &&
- isSafeToExecuteUnconditionally(I))
- hoist(I);
+ if (CurLoop->hasLoopInvariantOperands(&I) &&
+ canSinkOrHoistInst(I, AA, DT, TLI, CurLoop, CurAST, SafetyInfo) &&
+ isSafeToExecuteUnconditionally(I, DT, TLI, CurLoop, SafetyInfo,
+ CurLoop->getLoopPreheader()->getTerminator()))
+ Changed |= hoist(I, CurLoop->getLoopPreheader());
}
const std::vector<DomTreeNode*> &Children = N->getChildren();
for (unsigned i = 0, e = Children.size(); i != e; ++i)
- HoistRegion(Children[i]);
+ Changed |=
+ hoistRegion(Children[i], AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo);
+ return Changed;
+}
+
+/// Computes loop safety information, checks loop body & header
+/// for the possiblity of may throw exception.
+///
+void llvm::computeLICMSafetyInfo(LICMSafetyInfo * SafetyInfo, Loop * CurLoop) {
+ assert(CurLoop != nullptr && "CurLoop cant be null");
+ BasicBlock *Header = CurLoop->getHeader();
+ // Setting default safety values.
+ SafetyInfo->MayThrow = false;
+ SafetyInfo->HeaderMayThrow = false;
+ // Iterate over header and compute dafety info.
+ for (BasicBlock::iterator I = Header->begin(), E = Header->end();
+ (I != E) && !SafetyInfo->HeaderMayThrow; ++I)
+ SafetyInfo->HeaderMayThrow |= I->mayThrow();
+
+ SafetyInfo->MayThrow = SafetyInfo->HeaderMayThrow;
+ // Iterate over loop instructions and compute safety info.
+ for (Loop::block_iterator BB = CurLoop->block_begin(),
+ BBE = CurLoop->block_end(); (BB != BBE) && !SafetyInfo->MayThrow ; ++BB)
+ for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end();
+ (I != E) && !SafetyInfo->MayThrow; ++I)
+ SafetyInfo->MayThrow |= I->mayThrow();
}
/// canSinkOrHoistInst - Return true if the hoister and sinker can handle this
/// instruction.
///
-bool LICM::canSinkOrHoistInst(Instruction &I) {
+bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, DominatorTree *DT,
+ TargetLibraryInfo *TLI, Loop *CurLoop,
+ AliasSetTracker *CurAST, LICMSafetyInfo *SafetyInfo) {
// Loads have extra constraints we have to verify before we can hoist them.
if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
if (!LI->isUnordered())
@@ -462,7 +450,7 @@ bool LICM::canSinkOrHoistInst(Instruction &I) {
AAMDNodes AAInfo;
LI->getAAMetadata(AAInfo);
- return !pointerInvalidatedByLoop(LI->getOperand(0), Size, AAInfo);
+ return !pointerInvalidatedByLoop(LI->getOperand(0), Size, AAInfo, CurAST);
} else if (CallInst *CI = dyn_cast<CallInst>(&I)) {
// Don't sink or hoist dbg info; it's legal, but not useful.
if (isa<DbgInfoIntrinsic>(I))
@@ -501,30 +489,34 @@ bool LICM::canSinkOrHoistInst(Instruction &I) {
!isa<InsertValueInst>(I))
return false;
- return isSafeToExecuteUnconditionally(I);
+ // TODO: Plumb the context instruction through to make hoisting and sinking
+ // more powerful. Hoisting of loads already works due to the special casing
+ // above.
+ return isSafeToExecuteUnconditionally(I, DT, TLI, CurLoop, SafetyInfo,
+ nullptr);
}
-/// \brief Returns true if a PHINode is a trivially replaceable with an
+/// Returns true if a PHINode is a trivially replaceable with an
/// Instruction.
+/// This is true when all incoming values are that instruction.
+/// This pattern occurs most often with LCSSA PHI nodes.
///
-/// This is true when all incoming values are that instruction. This pattern
-/// occurs most often with LCSSA PHI nodes.
-static bool isTriviallyReplacablePHI(PHINode &PN, Instruction &I) {
- for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
- if (PN.getIncomingValue(i) != &I)
+static bool isTriviallyReplacablePHI(const PHINode &PN, const Instruction &I) {
+ for (const Value *IncValue : PN.incoming_values())
+ if (IncValue != &I)
return false;
return true;
}
-/// isNotUsedInLoop - Return true if the only users of this instruction are
-/// outside of the loop. If this is true, we can sink the instruction to the
-/// exit blocks of the loop.
+/// Return true if the only users of this instruction are outside of
+/// the loop. If this is true, we can sink the instruction to the exit
+/// blocks of the loop.
///
-bool LICM::isNotUsedInLoop(Instruction &I) {
- for (User *U : I.users()) {
- Instruction *UI = cast<Instruction>(U);
- if (PHINode *PN = dyn_cast<PHINode>(UI)) {
+static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop) {
+ for (const User *U : I.users()) {
+ const Instruction *UI = cast<Instruction>(U);
+ if (const PHINode *PN = dyn_cast<PHINode>(UI)) {
// A PHI node where all of the incoming values are this instruction are
// special -- they can just be RAUW'ed with the instruction and thus
// don't require a use in the predecessor. This is a particular important
@@ -552,9 +544,10 @@ bool LICM::isNotUsedInLoop(Instruction &I) {
return true;
}
-Instruction *LICM::CloneInstructionInExitBlock(Instruction &I,
- BasicBlock &ExitBlock,
- PHINode &PN) {
+static Instruction *CloneInstructionInExitBlock(const Instruction &I,
+ BasicBlock &ExitBlock,
+ PHINode &PN,
+ const LoopInfo *LI) {
Instruction *New = I.clone();
ExitBlock.getInstList().insert(ExitBlock.getFirstInsertionPt(), New);
if (!I.getName().empty()) New->setName(I.getName() + ".le");
@@ -581,14 +574,15 @@ Instruction *LICM::CloneInstructionInExitBlock(Instruction &I,
return New;
}
-/// sink - When an instruction is found to only be used outside of the loop,
-/// this function moves it to the exit blocks and patches up SSA form as needed.
+/// When an instruction is found to only be used outside of the loop, this
+/// function moves it to the exit blocks and patches up SSA form as needed.
/// This method is guaranteed to remove the original instruction from its
/// position, and may either delete it or move it to outside of the loop.
///
-void LICM::sink(Instruction &I) {
+static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT,
+ const Loop *CurLoop, AliasSetTracker *CurAST ) {
DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
-
+ bool Changed = false;
if (isa<LoadInst>(I)) ++NumMovedLoads;
else if (isa<CallInst>(I)) ++NumMovedCalls;
++NumSunk;
@@ -597,7 +591,8 @@ void LICM::sink(Instruction &I) {
#ifndef NDEBUG
SmallVector<BasicBlock *, 32> ExitBlocks;
CurLoop->getUniqueExitBlocks(ExitBlocks);
- SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(), ExitBlocks.end());
+ SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(),
+ ExitBlocks.end());
#endif
// Clones of this instruction. Don't create more than one per exit block!
@@ -625,7 +620,7 @@ void LICM::sink(Instruction &I) {
New = It->second;
else
New = SunkCopies[ExitBlock] =
- CloneInstructionInExitBlock(I, *ExitBlock, *PN);
+ CloneInstructionInExitBlock(I, *ExitBlock, *PN, LI);
PN->replaceAllUsesWith(New);
PN->eraseFromParent();
@@ -633,37 +628,43 @@ void LICM::sink(Instruction &I) {
CurAST->deleteValue(&I);
I.eraseFromParent();
+ return Changed;
}
-/// hoist - When an instruction is found to only use loop invariant operands
-/// that is safe to hoist, this instruction is called to do the dirty work.
+/// When an instruction is found to only use loop invariant operands that
+/// is safe to hoist, this instruction is called to do the dirty work.
///
-void LICM::hoist(Instruction &I) {
+static bool hoist(Instruction &I, BasicBlock *Preheader) {
DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": "
<< I << "\n");
-
// Move the new node to the Preheader, before its terminator.
I.moveBefore(Preheader->getTerminator());
if (isa<LoadInst>(I)) ++NumMovedLoads;
else if (isa<CallInst>(I)) ++NumMovedCalls;
++NumHoisted;
- Changed = true;
+ return true;
}
-/// isSafeToExecuteUnconditionally - Only sink or hoist an instruction if it is
-/// not a trapping instruction or if it is a trapping instruction and is
-/// guaranteed to execute.
-///
-bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) {
- // If it is not a trapping instruction, it is always safe to hoist.
- if (isSafeToSpeculativelyExecute(&Inst, DL))
+/// Only sink or hoist an instruction if it is not a trapping instruction,
+/// or if the instruction is known not to trap when moved to the preheader.
+/// or if it is a trapping instruction and is guaranteed to execute.
+static bool isSafeToExecuteUnconditionally(const Instruction &Inst,
+ const DominatorTree *DT,
+ const TargetLibraryInfo *TLI,
+ const Loop *CurLoop,
+ const LICMSafetyInfo *SafetyInfo,
+ const Instruction *CtxI) {
+ if (isSafeToSpeculativelyExecute(&Inst, CtxI, DT, TLI))
return true;
- return isGuaranteedToExecute(Inst);
+ return isGuaranteedToExecute(Inst, DT, CurLoop, SafetyInfo);
}
-bool LICM::isGuaranteedToExecute(Instruction &Inst) {
+static bool isGuaranteedToExecute(const Instruction &Inst,
+ const DominatorTree *DT,
+ const Loop *CurLoop,
+ const LICMSafetyInfo * SafetyInfo) {
// We have to check to make sure that the instruction dominates all
// of the exit blocks. If it doesn't, then there is a path out of the loop
@@ -675,11 +676,11 @@ bool LICM::isGuaranteedToExecute(Instruction &Inst) {
if (Inst.getParent() == CurLoop->getHeader())
// If there's a throw in the header block, we can't guarantee we'll reach
// Inst.
- return !HeaderMayThrow;
+ return !SafetyInfo->HeaderMayThrow;
// Somewhere in this loop there is an instruction which may throw and make us
// exit the loop.
- if (MayThrow)
+ if (SafetyInfo->MayThrow)
return false;
// Get the exit blocks for the current loop.
@@ -719,17 +720,18 @@ namespace {
// We need to create an LCSSA PHI node for the incoming value and
// store that.
PHINode *PN = PHINode::Create(
- I->getType(), PredCache.GetNumPreds(BB),
+ I->getType(), PredCache.size(BB),
I->getName() + ".lcssa", BB->begin());
- for (BasicBlock **PI = PredCache.GetPreds(BB); *PI; ++PI)
- PN->addIncoming(I, *PI);
+ for (BasicBlock *Pred : PredCache.get(BB))
+ PN->addIncoming(I, Pred);
return PN;
}
return V;
}
public:
- LoopPromoter(Value *SP, const SmallVectorImpl<Instruction *> &Insts,
+ LoopPromoter(Value *SP,
+ ArrayRef<const Instruction *> Insts,
SSAUpdater &S, SmallPtrSetImpl<Value *> &PMA,
SmallVectorImpl<BasicBlock *> &LEB,
SmallVectorImpl<Instruction *> &LIP, PredIteratorCache &PIC,
@@ -777,25 +779,37 @@ namespace {
};
} // end anon namespace
-/// PromoteAliasSet - Try to promote memory values to scalars by sinking
-/// stores out of the loop and moving loads to before the loop. We do this by
-/// looping over the stores in the loop, looking for stores to Must pointers
-/// which are loop invariant.
+/// Try to promote memory values to scalars by sinking stores out of the
+/// loop and moving loads to before the loop. We do this by looping over
+/// the stores in the loop, looking for stores to Must pointers which are
+/// loop invariant.
///
-void LICM::PromoteAliasSet(AliasSet &AS,
- SmallVectorImpl<BasicBlock*> &ExitBlocks,
- SmallVectorImpl<Instruction*> &InsertPts,
- PredIteratorCache &PIC) {
+bool llvm::promoteLoopAccessesToScalars(AliasSet &AS,
+ SmallVectorImpl<BasicBlock*>&ExitBlocks,
+ SmallVectorImpl<Instruction*>&InsertPts,
+ PredIteratorCache &PIC, LoopInfo *LI,
+ DominatorTree *DT, Loop *CurLoop,
+ AliasSetTracker *CurAST,
+ LICMSafetyInfo * SafetyInfo) {
+ // Verify inputs.
+ assert(LI != nullptr && DT != nullptr &&
+ CurLoop != nullptr && CurAST != nullptr &&
+ SafetyInfo != nullptr &&
+ "Unexpected Input to promoteLoopAccessesToScalars");
+ // Initially set Changed status to false.
+ bool Changed = false;
// We can promote this alias set if it has a store, if it is a "Must" alias
// set, if the pointer is loop invariant, and if we are not eliminating any
// volatile loads or stores.
if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
AS.isVolatile() || !CurLoop->isLoopInvariant(AS.begin()->getValue()))
- return;
+ return Changed;
assert(!AS.empty() &&
"Must alias set should have at least one pointer element in it!");
+
Value *SomePtr = AS.begin()->getValue();
+ BasicBlock * Preheader = CurLoop->getLoopPreheader();
// It isn't safe to promote a load/store from the loop if the load/store is
// conditional. For example, turning:
@@ -832,7 +846,7 @@ void LICM::PromoteAliasSet(AliasSet &AS,
// cannot (yet) promote a memory location that is loaded and stored in
// different sizes.
if (SomePtr->getType() != ASIV->getType())
- return;
+ return Changed;
for (User *U : ASIV->users()) {
// Ignore instructions that are outside the loop.
@@ -842,25 +856,25 @@ void LICM::PromoteAliasSet(AliasSet &AS,
// If there is an non-load/store instruction in the loop, we can't promote
// it.
- if (LoadInst *load = dyn_cast<LoadInst>(UI)) {
+ if (const LoadInst *load = dyn_cast<LoadInst>(UI)) {
assert(!load->isVolatile() && "AST broken");
if (!load->isSimple())
- return;
- } else if (StoreInst *store = dyn_cast<StoreInst>(UI)) {
+ return Changed;
+ } else if (const StoreInst *store = dyn_cast<StoreInst>(UI)) {
// Stores *of* the pointer are not interesting, only stores *to* the
// pointer.
if (UI->getOperand(1) != ASIV)
continue;
assert(!store->isVolatile() && "AST broken");
if (!store->isSimple())
- return;
+ return Changed;
// Don't sink stores from loops without dedicated block exits. Exits
// containing indirect branches are not transformed by loop simplify,
// make sure we catch that. An additional load may be generated in the
// preheader for SSA updater, so also avoid sinking when no preheader
// is available.
if (!HasDedicatedExits || !Preheader)
- return;
+ return Changed;
// Note that we only check GuaranteedToExecute inside the store case
// so that we do not introduce stores where they did not exist before
@@ -872,16 +886,17 @@ void LICM::PromoteAliasSet(AliasSet &AS,
// Larger is better, with the exception of 0 being the best alignment.
unsigned InstAlignment = store->getAlignment();
if ((InstAlignment > Alignment || InstAlignment == 0) && Alignment != 0)
- if (isGuaranteedToExecute(*UI)) {
+ if (isGuaranteedToExecute(*UI, DT, CurLoop, SafetyInfo)) {
GuaranteedToExecute = true;
Alignment = InstAlignment;
}
if (!GuaranteedToExecute)
- GuaranteedToExecute = isGuaranteedToExecute(*UI);
+ GuaranteedToExecute = isGuaranteedToExecute(*UI, DT,
+ CurLoop, SafetyInfo);
} else
- return; // Not a load or store.
+ return Changed; // Not a load or store.
// Merge the AA tags.
if (LoopUses.empty()) {
@@ -897,7 +912,7 @@ void LICM::PromoteAliasSet(AliasSet &AS,
// If there isn't a guaranteed-to-execute instruction, we can't promote.
if (!GuaranteedToExecute)
- return;
+ return Changed;
// Otherwise, this is safe to promote, lets do it!
DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " <<*SomePtr<<'\n');
@@ -922,7 +937,8 @@ void LICM::PromoteAliasSet(AliasSet &AS,
// We use the SSAUpdater interface to insert phi nodes as required.
SmallVector<PHINode*, 16> NewPHIs;
SSAUpdater SSA(&NewPHIs);
- LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
+ LoopPromoter Promoter(SomePtr, LoopUses, SSA,
+ PointerMustAliases, ExitBlocks,
InsertPts, PIC, *CurAST, *LI, DL, Alignment, AATags);
// Set up the preheader to have a definition of the value. It is the live-out
@@ -942,10 +958,12 @@ void LICM::PromoteAliasSet(AliasSet &AS,
// If the SSAUpdater didn't use the load in the preheader, just zap it now.
if (PreheaderLoad->use_empty())
PreheaderLoad->eraseFromParent();
-}
+ return Changed;
+}
-/// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info.
+/// Simple Analysis hook. Clone alias set info.
+///
void LICM::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L) {
AliasSetTracker *AST = LoopToAliasSetMap.lookup(L);
if (!AST)
@@ -954,8 +972,8 @@ void LICM::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L) {
AST->copyValue(From, To);
}
-/// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias
-/// set.
+/// Simple Analysis hook. Delete value V from alias set
+///
void LICM::deleteAnalysisValue(Value *V, Loop *L) {
AliasSetTracker *AST = LoopToAliasSetMap.lookup(L);
if (!AST)
@@ -965,6 +983,7 @@ void LICM::deleteAnalysisValue(Value *V, Loop *L) {
}
/// Simple Analysis hook. Delete value L from alias set map.
+///
void LICM::deleteAnalysisLoop(Loop *L) {
AliasSetTracker *AST = LoopToAliasSetMap.lookup(L);
if (!AST)
@@ -973,3 +992,23 @@ void LICM::deleteAnalysisLoop(Loop *L) {
delete AST;
LoopToAliasSetMap.erase(L);
}
+
+
+/// Return true if the body of this loop may store into the memory
+/// location pointed to by V.
+///
+static bool pointerInvalidatedByLoop(Value *V, uint64_t Size,
+ const AAMDNodes &AAInfo,
+ AliasSetTracker *CurAST) {
+ // Check to see if any of the basic blocks in CurLoop invalidate *V.
+ return CurAST->getAliasSetForPointer(V, Size, AAInfo).isMod();
+}
+
+/// Little predicate that returns true if the specified basic block is in
+/// a subloop of the current one, not the current one itself.
+///
+static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI) {
+ assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop");
+ return LI->getLoopFor(BB) != CurLoop;
+}
+
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoadCombine.cpp b/contrib/llvm/lib/Transforms/Scalar/LoadCombine.cpp
index 11e4d76..c19cd19 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoadCombine.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoadCombine.cpp
@@ -12,17 +12,17 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar.h"
-
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AliasSetTracker.h"
#include "llvm/Analysis/TargetFolder.h"
-#include "llvm/Pass.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/Instructions.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
@@ -41,9 +41,9 @@ struct PointerOffsetPair {
};
struct LoadPOPPair {
+ LoadPOPPair() = default;
LoadPOPPair(LoadInst *L, PointerOffsetPair P, unsigned O)
: Load(L), POP(P), InsertOrder(O) {}
- LoadPOPPair() {}
LoadInst *Load;
PointerOffsetPair POP;
/// \brief The new load needs to be created before the first load in IR order.
@@ -52,13 +52,10 @@ struct LoadPOPPair {
class LoadCombine : public BasicBlockPass {
LLVMContext *C;
- const DataLayout *DL;
AliasAnalysis *AA;
public:
- LoadCombine()
- : BasicBlockPass(ID),
- C(nullptr), DL(nullptr), AA(nullptr) {
+ LoadCombine() : BasicBlockPass(ID), C(nullptr), AA(nullptr) {
initializeSROAPass(*PassRegistry::getPassRegistry());
}
@@ -85,12 +82,6 @@ private:
bool LoadCombine::doInitialization(Function &F) {
DEBUG(dbgs() << "LoadCombine function: " << F.getName() << "\n");
C = &F.getContext();
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- if (!DLP) {
- DEBUG(dbgs() << " Skipping LoadCombine -- no target data!\n");
- return false;
- }
- DL = &DLP->getDataLayout();
return true;
}
@@ -100,9 +91,10 @@ PointerOffsetPair LoadCombine::getPointerOffsetPair(LoadInst &LI) {
POP.Offset = 0;
while (isa<BitCastInst>(POP.Pointer) || isa<GetElementPtrInst>(POP.Pointer)) {
if (auto *GEP = dyn_cast<GetElementPtrInst>(POP.Pointer)) {
- unsigned BitWidth = DL->getPointerTypeSizeInBits(GEP->getType());
+ auto &DL = LI.getModule()->getDataLayout();
+ unsigned BitWidth = DL.getPointerTypeSizeInBits(GEP->getType());
APInt Offset(BitWidth, 0);
- if (GEP->accumulateConstantOffset(*DL, Offset))
+ if (GEP->accumulateConstantOffset(DL, Offset))
POP.Offset += Offset.getZExtValue();
else
// Can't handle GEPs with variable indices.
@@ -145,7 +137,8 @@ bool LoadCombine::aggregateLoads(SmallVectorImpl<LoadPOPPair> &Loads) {
if (PrevOffset == -1ull) {
BaseLoad = L.Load;
PrevOffset = L.POP.Offset;
- PrevSize = DL->getTypeStoreSize(L.Load->getType());
+ PrevSize = L.Load->getModule()->getDataLayout().getTypeStoreSize(
+ L.Load->getType());
AggregateLoads.push_back(L);
continue;
}
@@ -164,7 +157,8 @@ bool LoadCombine::aggregateLoads(SmallVectorImpl<LoadPOPPair> &Loads) {
// FIXME: We may want to handle this case.
continue;
PrevOffset = L.POP.Offset;
- PrevSize = DL->getTypeStoreSize(L.Load->getType());
+ PrevSize = L.Load->getModule()->getDataLayout().getTypeStoreSize(
+ L.Load->getType());
AggregateLoads.push_back(L);
}
if (combineLoads(AggregateLoads))
@@ -215,7 +209,8 @@ bool LoadCombine::combineLoads(SmallVectorImpl<LoadPOPPair> &Loads) {
for (const auto &L : Loads) {
Builder->SetInsertPoint(L.Load);
Value *V = Builder->CreateExtractInteger(
- *DL, NewLoad, cast<IntegerType>(L.Load->getType()),
+ L.Load->getModule()->getDataLayout(), NewLoad,
+ cast<IntegerType>(L.Load->getType()),
L.POP.Offset - Loads[0].POP.Offset, "combine.extract");
L.Load->replaceAllUsesWith(V);
}
@@ -225,13 +220,13 @@ bool LoadCombine::combineLoads(SmallVectorImpl<LoadPOPPair> &Loads) {
}
bool LoadCombine::runOnBasicBlock(BasicBlock &BB) {
- if (skipOptnoneFunction(BB) || !DL)
+ if (skipOptnoneFunction(BB))
return false;
AA = &getAnalysis<AliasAnalysis>();
- IRBuilder<true, TargetFolder>
- TheBuilder(BB.getContext(), TargetFolder(DL));
+ IRBuilder<true, TargetFolder> TheBuilder(
+ BB.getContext(), TargetFolder(BB.getModule()->getDataLayout()));
Builder = &TheBuilder;
DenseMap<const Value *, SmallVector<LoadPOPPair, 8>> LoadMap;
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
index 1d1f33a..98b068e 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -39,14 +39,14 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<LoopInfo>();
+ AU.addRequired<LoopInfoWrapperPass>();
AU.addRequired<ScalarEvolution>();
AU.addRequiredID(LoopSimplifyID);
AU.addRequiredID(LCSSAID);
AU.addPreserved<ScalarEvolution>();
AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<LoopInfo>();
+ AU.addPreserved<LoopInfoWrapperPass>();
AU.addPreservedID(LoopSimplifyID);
AU.addPreservedID(LCSSAID);
}
@@ -63,7 +63,7 @@ char LoopDeletion::ID = 0;
INITIALIZE_PASS_BEGIN(LoopDeletion, "loop-deletion",
"Delete dead loops", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
@@ -236,7 +236,7 @@ bool LoopDeletion::runOnLoop(Loop *L, LPPassManager &LPM) {
// Finally, the blocks from loopinfo. This has to happen late because
// otherwise our loop iterators won't work.
- LoopInfo &loopInfo = getAnalysis<LoopInfo>();
+ LoopInfo &loopInfo = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
SmallPtrSet<BasicBlock*, 8> blocks;
blocks.insert(L->block_begin(), L->block_end());
for (BasicBlock *BB : blocks)
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopDistribute.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
new file mode 100644
index 0000000..a907d59
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
@@ -0,0 +1,976 @@
+//===- LoopDistribute.cpp - Loop Distribution Pass ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Loop Distribution Pass. Its main focus is to
+// distribute loops that cannot be vectorized due to dependence cycles. It
+// tries to isolate the offending dependences into a new loop allowing
+// vectorization of the remaining parts.
+//
+// For dependence analysis, the pass uses the LoopVectorizer's
+// LoopAccessAnalysis. Because this analysis presumes no change in the order of
+// memory operations, special care is taken to preserve the lexical order of
+// these operations.
+//
+// Similarly to the Vectorizer, the pass also supports loop versioning to
+// run-time disambiguate potentially overlapping arrays.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/LoopAccessAnalysis.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include <list>
+
+#define LDIST_NAME "loop-distribute"
+#define DEBUG_TYPE LDIST_NAME
+
+using namespace llvm;
+
+static cl::opt<bool>
+ LDistVerify("loop-distribute-verify", cl::Hidden,
+ cl::desc("Turn on DominatorTree and LoopInfo verification "
+ "after Loop Distribution"),
+ cl::init(false));
+
+static cl::opt<bool> DistributeNonIfConvertible(
+ "loop-distribute-non-if-convertible", cl::Hidden,
+ cl::desc("Whether to distribute into a loop that may not be "
+ "if-convertible by the loop vectorizer"),
+ cl::init(false));
+
+STATISTIC(NumLoopsDistributed, "Number of loops distributed");
+
+/// \brief Remaps instructions in a loop including the preheader.
+static void remapInstructionsInLoop(const SmallVectorImpl<BasicBlock *> &Blocks,
+ ValueToValueMapTy &VMap) {
+ // Rewrite the code to refer to itself.
+ for (auto *BB : Blocks)
+ for (auto &Inst : *BB)
+ RemapInstruction(&Inst, VMap,
+ RF_NoModuleLevelChanges | RF_IgnoreMissingEntries);
+}
+
+/// \brief Clones a loop \p OrigLoop. Returns the loop and the blocks in \p
+/// Blocks.
+///
+/// Updates LoopInfo and DominatorTree assuming the loop is dominated by block
+/// \p LoopDomBB. Insert the new blocks before block specified in \p Before.
+static Loop *cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB,
+ Loop *OrigLoop, ValueToValueMapTy &VMap,
+ const Twine &NameSuffix, LoopInfo *LI,
+ DominatorTree *DT,
+ SmallVectorImpl<BasicBlock *> &Blocks) {
+ Function *F = OrigLoop->getHeader()->getParent();
+ Loop *ParentLoop = OrigLoop->getParentLoop();
+
+ Loop *NewLoop = new Loop();
+ if (ParentLoop)
+ ParentLoop->addChildLoop(NewLoop);
+ else
+ LI->addTopLevelLoop(NewLoop);
+
+ BasicBlock *OrigPH = OrigLoop->getLoopPreheader();
+ BasicBlock *NewPH = CloneBasicBlock(OrigPH, VMap, NameSuffix, F);
+ // To rename the loop PHIs.
+ VMap[OrigPH] = NewPH;
+ Blocks.push_back(NewPH);
+
+ // Update LoopInfo.
+ if (ParentLoop)
+ ParentLoop->addBasicBlockToLoop(NewPH, *LI);
+
+ // Update DominatorTree.
+ DT->addNewBlock(NewPH, LoopDomBB);
+
+ for (BasicBlock *BB : OrigLoop->getBlocks()) {
+ BasicBlock *NewBB = CloneBasicBlock(BB, VMap, NameSuffix, F);
+ VMap[BB] = NewBB;
+
+ // Update LoopInfo.
+ NewLoop->addBasicBlockToLoop(NewBB, *LI);
+
+ // Update DominatorTree.
+ BasicBlock *IDomBB = DT->getNode(BB)->getIDom()->getBlock();
+ DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDomBB]));
+
+ Blocks.push_back(NewBB);
+ }
+
+ // Move them physically from the end of the block list.
+ F->getBasicBlockList().splice(Before, F->getBasicBlockList(), NewPH);
+ F->getBasicBlockList().splice(Before, F->getBasicBlockList(),
+ NewLoop->getHeader(), F->end());
+
+ return NewLoop;
+}
+
+namespace {
+/// \brief Maintains the set of instructions of the loop for a partition before
+/// cloning. After cloning, it hosts the new loop.
+class InstPartition {
+ typedef SmallPtrSet<Instruction *, 8> InstructionSet;
+
+public:
+ InstPartition(Instruction *I, Loop *L, bool DepCycle = false)
+ : DepCycle(DepCycle), OrigLoop(L), ClonedLoop(nullptr) {
+ Set.insert(I);
+ }
+
+ /// \brief Returns whether this partition contains a dependence cycle.
+ bool hasDepCycle() const { return DepCycle; }
+
+ /// \brief Adds an instruction to this partition.
+ void add(Instruction *I) { Set.insert(I); }
+
+ /// \brief Collection accessors.
+ InstructionSet::iterator begin() { return Set.begin(); }
+ InstructionSet::iterator end() { return Set.end(); }
+ InstructionSet::const_iterator begin() const { return Set.begin(); }
+ InstructionSet::const_iterator end() const { return Set.end(); }
+ bool empty() const { return Set.empty(); }
+
+ /// \brief Moves this partition into \p Other. This partition becomes empty
+ /// after this.
+ void moveTo(InstPartition &Other) {
+ Other.Set.insert(Set.begin(), Set.end());
+ Set.clear();
+ Other.DepCycle |= DepCycle;
+ }
+
+ /// \brief Populates the partition with a transitive closure of all the
+ /// instructions that the seeded instructions dependent on.
+ void populateUsedSet() {
+ // FIXME: We currently don't use control-dependence but simply include all
+ // blocks (possibly empty at the end) and let simplifycfg mostly clean this
+ // up.
+ for (auto *B : OrigLoop->getBlocks())
+ Set.insert(B->getTerminator());
+
+ // Follow the use-def chains to form a transitive closure of all the
+ // instructions that the originally seeded instructions depend on.
+ SmallVector<Instruction *, 8> Worklist(Set.begin(), Set.end());
+ while (!Worklist.empty()) {
+ Instruction *I = Worklist.pop_back_val();
+ // Insert instructions from the loop that we depend on.
+ for (Value *V : I->operand_values()) {
+ auto *I = dyn_cast<Instruction>(V);
+ if (I && OrigLoop->contains(I->getParent()) && Set.insert(I).second)
+ Worklist.push_back(I);
+ }
+ }
+ }
+
+ /// \brief Clones the original loop.
+ ///
+ /// Updates LoopInfo and DominatorTree using the information that block \p
+ /// LoopDomBB dominates the loop.
+ Loop *cloneLoopWithPreheader(BasicBlock *InsertBefore, BasicBlock *LoopDomBB,
+ unsigned Index, LoopInfo *LI,
+ DominatorTree *DT) {
+ ClonedLoop = ::cloneLoopWithPreheader(InsertBefore, LoopDomBB, OrigLoop,
+ VMap, Twine(".ldist") + Twine(Index),
+ LI, DT, ClonedLoopBlocks);
+ return ClonedLoop;
+ }
+
+ /// \brief The cloned loop. If this partition is mapped to the original loop,
+ /// this is null.
+ const Loop *getClonedLoop() const { return ClonedLoop; }
+
+ /// \brief Returns the loop where this partition ends up after distribution.
+ /// If this partition is mapped to the original loop then use the block from
+ /// the loop.
+ const Loop *getDistributedLoop() const {
+ return ClonedLoop ? ClonedLoop : OrigLoop;
+ }
+
+ /// \brief The VMap that is populated by cloning and then used in
+ /// remapinstruction to remap the cloned instructions.
+ ValueToValueMapTy &getVMap() { return VMap; }
+
+ /// \brief Remaps the cloned instructions using VMap.
+ void remapInstructions() { remapInstructionsInLoop(ClonedLoopBlocks, VMap); }
+
+ /// \brief Based on the set of instructions selected for this partition,
+ /// removes the unnecessary ones.
+ void removeUnusedInsts() {
+ SmallVector<Instruction *, 8> Unused;
+
+ for (auto *Block : OrigLoop->getBlocks())
+ for (auto &Inst : *Block)
+ if (!Set.count(&Inst)) {
+ Instruction *NewInst = &Inst;
+ if (!VMap.empty())
+ NewInst = cast<Instruction>(VMap[NewInst]);
+
+ assert(!isa<BranchInst>(NewInst) &&
+ "Branches are marked used early on");
+ Unused.push_back(NewInst);
+ }
+
+ // Delete the instructions backwards, as it has a reduced likelihood of
+ // having to update as many def-use and use-def chains.
+ for (auto I = Unused.rbegin(), E = Unused.rend(); I != E; ++I) {
+ auto *Inst = *I;
+
+ if (!Inst->use_empty())
+ Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
+ Inst->eraseFromParent();
+ }
+ }
+
+ void print() const {
+ if (DepCycle)
+ dbgs() << " (cycle)\n";
+ for (auto *I : Set)
+ // Prefix with the block name.
+ dbgs() << " " << I->getParent()->getName() << ":" << *I << "\n";
+ }
+
+ void printBlocks() const {
+ for (auto *BB : getDistributedLoop()->getBlocks())
+ dbgs() << *BB;
+ }
+
+private:
+ /// \brief Instructions from OrigLoop selected for this partition.
+ InstructionSet Set;
+
+ /// \brief Whether this partition contains a dependence cycle.
+ bool DepCycle;
+
+ /// \brief The original loop.
+ Loop *OrigLoop;
+
+ /// \brief The cloned loop. If this partition is mapped to the original loop,
+ /// this is null.
+ Loop *ClonedLoop;
+
+ /// \brief The blocks of ClonedLoop including the preheader. If this
+ /// partition is mapped to the original loop, this is empty.
+ SmallVector<BasicBlock *, 8> ClonedLoopBlocks;
+
+ /// \brief These gets populated once the set of instructions have been
+ /// finalized. If this partition is mapped to the original loop, these are not
+ /// set.
+ ValueToValueMapTy VMap;
+};
+
+/// \brief Holds the set of Partitions. It populates them, merges them and then
+/// clones the loops.
+class InstPartitionContainer {
+ typedef DenseMap<Instruction *, int> InstToPartitionIdT;
+
+public:
+ InstPartitionContainer(Loop *L, LoopInfo *LI, DominatorTree *DT)
+ : L(L), LI(LI), DT(DT) {}
+
+ /// \brief Returns the number of partitions.
+ unsigned getSize() const { return PartitionContainer.size(); }
+
+ /// \brief Adds \p Inst into the current partition if that is marked to
+ /// contain cycles. Otherwise start a new partition for it.
+ void addToCyclicPartition(Instruction *Inst) {
+ // If the current partition is non-cyclic. Start a new one.
+ if (PartitionContainer.empty() || !PartitionContainer.back().hasDepCycle())
+ PartitionContainer.emplace_back(Inst, L, /*DepCycle=*/true);
+ else
+ PartitionContainer.back().add(Inst);
+ }
+
+ /// \brief Adds \p Inst into a partition that is not marked to contain
+ /// dependence cycles.
+ ///
+ // Initially we isolate memory instructions into as many partitions as
+ // possible, then later we may merge them back together.
+ void addToNewNonCyclicPartition(Instruction *Inst) {
+ PartitionContainer.emplace_back(Inst, L);
+ }
+
+ /// \brief Merges adjacent non-cyclic partitions.
+ ///
+ /// The idea is that we currently only want to isolate the non-vectorizable
+ /// partition. We could later allow more distribution among these partition
+ /// too.
+ void mergeAdjacentNonCyclic() {
+ mergeAdjacentPartitionsIf(
+ [](const InstPartition *P) { return !P->hasDepCycle(); });
+ }
+
+ /// \brief If a partition contains only conditional stores, we won't vectorize
+ /// it. Try to merge it with a previous cyclic partition.
+ void mergeNonIfConvertible() {
+ mergeAdjacentPartitionsIf([&](const InstPartition *Partition) {
+ if (Partition->hasDepCycle())
+ return true;
+
+ // Now, check if all stores are conditional in this partition.
+ bool seenStore = false;
+
+ for (auto *Inst : *Partition)
+ if (isa<StoreInst>(Inst)) {
+ seenStore = true;
+ if (!LoopAccessInfo::blockNeedsPredication(Inst->getParent(), L, DT))
+ return false;
+ }
+ return seenStore;
+ });
+ }
+
+ /// \brief Merges the partitions according to various heuristics.
+ void mergeBeforePopulating() {
+ mergeAdjacentNonCyclic();
+ if (!DistributeNonIfConvertible)
+ mergeNonIfConvertible();
+ }
+
+ /// \brief Merges partitions in order to ensure that no loads are duplicated.
+ ///
+ /// We can't duplicate loads because that could potentially reorder them.
+ /// LoopAccessAnalysis provides dependency information with the context that
+ /// the order of memory operation is preserved.
+ ///
+ /// Return if any partitions were merged.
+ bool mergeToAvoidDuplicatedLoads() {
+ typedef DenseMap<Instruction *, InstPartition *> LoadToPartitionT;
+ typedef EquivalenceClasses<InstPartition *> ToBeMergedT;
+
+ LoadToPartitionT LoadToPartition;
+ ToBeMergedT ToBeMerged;
+
+ // Step through the partitions and create equivalence between partitions
+ // that contain the same load. Also put partitions in between them in the
+ // same equivalence class to avoid reordering of memory operations.
+ for (PartitionContainerT::iterator I = PartitionContainer.begin(),
+ E = PartitionContainer.end();
+ I != E; ++I) {
+ auto *PartI = &*I;
+
+ // If a load occurs in two partitions PartI and PartJ, merge all
+ // partitions (PartI, PartJ] into PartI.
+ for (Instruction *Inst : *PartI)
+ if (isa<LoadInst>(Inst)) {
+ bool NewElt;
+ LoadToPartitionT::iterator LoadToPart;
+
+ std::tie(LoadToPart, NewElt) =
+ LoadToPartition.insert(std::make_pair(Inst, PartI));
+ if (!NewElt) {
+ DEBUG(dbgs() << "Merging partitions due to this load in multiple "
+ << "partitions: " << PartI << ", "
+ << LoadToPart->second << "\n" << *Inst << "\n");
+
+ auto PartJ = I;
+ do {
+ --PartJ;
+ ToBeMerged.unionSets(PartI, &*PartJ);
+ } while (&*PartJ != LoadToPart->second);
+ }
+ }
+ }
+ if (ToBeMerged.empty())
+ return false;
+
+ // Merge the member of an equivalence class into its class leader. This
+ // makes the members empty.
+ for (ToBeMergedT::iterator I = ToBeMerged.begin(), E = ToBeMerged.end();
+ I != E; ++I) {
+ if (!I->isLeader())
+ continue;
+
+ auto PartI = I->getData();
+ for (auto PartJ : make_range(std::next(ToBeMerged.member_begin(I)),
+ ToBeMerged.member_end())) {
+ PartJ->moveTo(*PartI);
+ }
+ }
+
+ // Remove the empty partitions.
+ PartitionContainer.remove_if(
+ [](const InstPartition &P) { return P.empty(); });
+
+ return true;
+ }
+
+ /// \brief Sets up the mapping between instructions to partitions. If the
+ /// instruction is duplicated across multiple partitions, set the entry to -1.
+ void setupPartitionIdOnInstructions() {
+ int PartitionID = 0;
+ for (const auto &Partition : PartitionContainer) {
+ for (Instruction *Inst : Partition) {
+ bool NewElt;
+ InstToPartitionIdT::iterator Iter;
+
+ std::tie(Iter, NewElt) =
+ InstToPartitionId.insert(std::make_pair(Inst, PartitionID));
+ if (!NewElt)
+ Iter->second = -1;
+ }
+ ++PartitionID;
+ }
+ }
+
+ /// \brief Populates the partition with everything that the seeding
+ /// instructions require.
+ void populateUsedSet() {
+ for (auto &P : PartitionContainer)
+ P.populateUsedSet();
+ }
+
+ /// \brief This performs the main chunk of the work of cloning the loops for
+ /// the partitions.
+ void cloneLoops(Pass *P) {
+ BasicBlock *OrigPH = L->getLoopPreheader();
+ // At this point the predecessor of the preheader is either the memcheck
+ // block or the top part of the original preheader.
+ BasicBlock *Pred = OrigPH->getSinglePredecessor();
+ assert(Pred && "Preheader does not have a single predecessor");
+ BasicBlock *ExitBlock = L->getExitBlock();
+ assert(ExitBlock && "No single exit block");
+ Loop *NewLoop;
+
+ assert(!PartitionContainer.empty() && "at least two partitions expected");
+ // We're cloning the preheader along with the loop so we already made sure
+ // it was empty.
+ assert(&*OrigPH->begin() == OrigPH->getTerminator() &&
+ "preheader not empty");
+
+ // Create a loop for each partition except the last. Clone the original
+ // loop before PH along with adding a preheader for the cloned loop. Then
+ // update PH to point to the newly added preheader.
+ BasicBlock *TopPH = OrigPH;
+ unsigned Index = getSize() - 1;
+ for (auto I = std::next(PartitionContainer.rbegin()),
+ E = PartitionContainer.rend();
+ I != E; ++I, --Index, TopPH = NewLoop->getLoopPreheader()) {
+ auto *Part = &*I;
+
+ NewLoop = Part->cloneLoopWithPreheader(TopPH, Pred, Index, LI, DT);
+
+ Part->getVMap()[ExitBlock] = TopPH;
+ Part->remapInstructions();
+ }
+ Pred->getTerminator()->replaceUsesOfWith(OrigPH, TopPH);
+
+ // Now go in forward order and update the immediate dominator for the
+ // preheaders with the exiting block of the previous loop. Dominance
+ // within the loop is updated in cloneLoopWithPreheader.
+ for (auto Curr = PartitionContainer.cbegin(),
+ Next = std::next(PartitionContainer.cbegin()),
+ E = PartitionContainer.cend();
+ Next != E; ++Curr, ++Next)
+ DT->changeImmediateDominator(
+ Next->getDistributedLoop()->getLoopPreheader(),
+ Curr->getDistributedLoop()->getExitingBlock());
+ }
+
+ /// \brief Removes the dead instructions from the cloned loops.
+ void removeUnusedInsts() {
+ for (auto &Partition : PartitionContainer)
+ Partition.removeUnusedInsts();
+ }
+
+ /// \brief For each memory pointer, it computes the partitionId the pointer is
+ /// used in.
+ ///
+ /// This returns an array of int where the I-th entry corresponds to I-th
+ /// entry in LAI.getRuntimePointerCheck(). If the pointer is used in multiple
+ /// partitions its entry is set to -1.
+ SmallVector<int, 8>
+ computePartitionSetForPointers(const LoopAccessInfo &LAI) {
+ const LoopAccessInfo::RuntimePointerCheck *RtPtrCheck =
+ LAI.getRuntimePointerCheck();
+
+ unsigned N = RtPtrCheck->Pointers.size();
+ SmallVector<int, 8> PtrToPartitions(N);
+ for (unsigned I = 0; I < N; ++I) {
+ Value *Ptr = RtPtrCheck->Pointers[I];
+ auto Instructions =
+ LAI.getInstructionsForAccess(Ptr, RtPtrCheck->IsWritePtr[I]);
+
+ int &Partition = PtrToPartitions[I];
+ // First set it to uninitialized.
+ Partition = -2;
+ for (Instruction *Inst : Instructions) {
+ // Note that this could be -1 if Inst is duplicated across multiple
+ // partitions.
+ int ThisPartition = this->InstToPartitionId[Inst];
+ if (Partition == -2)
+ Partition = ThisPartition;
+ // -1 means belonging to multiple partitions.
+ else if (Partition == -1)
+ break;
+ else if (Partition != (int)ThisPartition)
+ Partition = -1;
+ }
+ assert(Partition != -2 && "Pointer not belonging to any partition");
+ }
+
+ return PtrToPartitions;
+ }
+
+ void print(raw_ostream &OS) const {
+ unsigned Index = 0;
+ for (const auto &P : PartitionContainer) {
+ OS << "Partition " << Index++ << " (" << &P << "):\n";
+ P.print();
+ }
+ }
+
+ void dump() const { print(dbgs()); }
+
+#ifndef NDEBUG
+ friend raw_ostream &operator<<(raw_ostream &OS,
+ const InstPartitionContainer &Partitions) {
+ Partitions.print(OS);
+ return OS;
+ }
+#endif
+
+ void printBlocks() const {
+ unsigned Index = 0;
+ for (const auto &P : PartitionContainer) {
+ dbgs() << "\nPartition " << Index++ << " (" << &P << "):\n";
+ P.printBlocks();
+ }
+ }
+
+private:
+ typedef std::list<InstPartition> PartitionContainerT;
+
+ /// \brief List of partitions.
+ PartitionContainerT PartitionContainer;
+
+ /// \brief Mapping from Instruction to partition Id. If the instruction
+ /// belongs to multiple partitions the entry contains -1.
+ InstToPartitionIdT InstToPartitionId;
+
+ Loop *L;
+ LoopInfo *LI;
+ DominatorTree *DT;
+
+ /// \brief The control structure to merge adjacent partitions if both satisfy
+ /// the \p Predicate.
+ template <class UnaryPredicate>
+ void mergeAdjacentPartitionsIf(UnaryPredicate Predicate) {
+ InstPartition *PrevMatch = nullptr;
+ for (auto I = PartitionContainer.begin(); I != PartitionContainer.end();) {
+ auto DoesMatch = Predicate(&*I);
+ if (PrevMatch == nullptr && DoesMatch) {
+ PrevMatch = &*I;
+ ++I;
+ } else if (PrevMatch != nullptr && DoesMatch) {
+ I->moveTo(*PrevMatch);
+ I = PartitionContainer.erase(I);
+ } else {
+ PrevMatch = nullptr;
+ ++I;
+ }
+ }
+ }
+};
+
+/// \brief For each memory instruction, this class maintains difference of the
+/// number of unsafe dependences that start out from this instruction minus
+/// those that end here.
+///
+/// By traversing the memory instructions in program order and accumulating this
+/// number, we know whether any unsafe dependence crosses over a program point.
+class MemoryInstructionDependences {
+ typedef MemoryDepChecker::Dependence Dependence;
+
+public:
+ struct Entry {
+ Instruction *Inst;
+ unsigned NumUnsafeDependencesStartOrEnd;
+
+ Entry(Instruction *Inst) : Inst(Inst), NumUnsafeDependencesStartOrEnd(0) {}
+ };
+
+ typedef SmallVector<Entry, 8> AccessesType;
+
+ AccessesType::const_iterator begin() const { return Accesses.begin(); }
+ AccessesType::const_iterator end() const { return Accesses.end(); }
+
+ MemoryInstructionDependences(
+ const SmallVectorImpl<Instruction *> &Instructions,
+ const SmallVectorImpl<Dependence> &InterestingDependences) {
+ Accesses.append(Instructions.begin(), Instructions.end());
+
+ DEBUG(dbgs() << "Backward dependences:\n");
+ for (auto &Dep : InterestingDependences)
+ if (Dep.isPossiblyBackward()) {
+ // Note that the designations source and destination follow the program
+ // order, i.e. source is always first. (The direction is given by the
+ // DepType.)
+ ++Accesses[Dep.Source].NumUnsafeDependencesStartOrEnd;
+ --Accesses[Dep.Destination].NumUnsafeDependencesStartOrEnd;
+
+ DEBUG(Dep.print(dbgs(), 2, Instructions));
+ }
+ }
+
+private:
+ AccessesType Accesses;
+};
+
+/// \brief Handles the loop versioning based on memchecks.
+class RuntimeCheckEmitter {
+public:
+ RuntimeCheckEmitter(const LoopAccessInfo &LAI, Loop *L, LoopInfo *LI,
+ DominatorTree *DT)
+ : OrigLoop(L), NonDistributedLoop(nullptr), LAI(LAI), LI(LI), DT(DT) {}
+
+ /// \brief Given the \p Partitions formed by Loop Distribution, it determines
+ /// in which partition each pointer is used.
+ void partitionPointers(InstPartitionContainer &Partitions) {
+ // Set up partition id in PtrRtChecks. Ptr -> Access -> Intruction ->
+ // Partition.
+ PtrToPartition = Partitions.computePartitionSetForPointers(LAI);
+
+ DEBUG(dbgs() << "\nPointers:\n");
+ DEBUG(LAI.getRuntimePointerCheck()->print(dbgs(), 0, &PtrToPartition));
+ }
+
+ /// \brief Returns true if we need memchecks to distribute the loop.
+ bool needsRuntimeChecks() const {
+ return LAI.getRuntimePointerCheck()->needsAnyChecking(&PtrToPartition);
+ }
+
+ /// \brief Performs the CFG manipulation part of versioning the loop including
+ /// the DominatorTree and LoopInfo updates.
+ void versionLoop(Pass *P) {
+ Instruction *FirstCheckInst;
+ Instruction *MemRuntimeCheck;
+ // Add the memcheck in the original preheader (this is empty initially).
+ BasicBlock *MemCheckBB = OrigLoop->getLoopPreheader();
+ std::tie(FirstCheckInst, MemRuntimeCheck) =
+ LAI.addRuntimeCheck(MemCheckBB->getTerminator(), &PtrToPartition);
+ assert(MemRuntimeCheck && "called even though needsAnyChecking = false");
+
+ // Rename the block to make the IR more readable.
+ MemCheckBB->setName(OrigLoop->getHeader()->getName() + ".ldist.memcheck");
+
+ // Create empty preheader for the loop (and after cloning for the
+ // original/nondist loop).
+ BasicBlock *PH =
+ SplitBlock(MemCheckBB, MemCheckBB->getTerminator(), DT, LI);
+ PH->setName(OrigLoop->getHeader()->getName() + ".ph");
+
+ // Clone the loop including the preheader.
+ //
+ // FIXME: This does not currently preserve SimplifyLoop because the exit
+ // block is a join between the two loops.
+ SmallVector<BasicBlock *, 8> NonDistributedLoopBlocks;
+ NonDistributedLoop =
+ cloneLoopWithPreheader(PH, MemCheckBB, OrigLoop, VMap, ".ldist.nondist",
+ LI, DT, NonDistributedLoopBlocks);
+ remapInstructionsInLoop(NonDistributedLoopBlocks, VMap);
+
+ // Insert the conditional branch based on the result of the memchecks.
+ Instruction *OrigTerm = MemCheckBB->getTerminator();
+ BranchInst::Create(NonDistributedLoop->getLoopPreheader(),
+ OrigLoop->getLoopPreheader(), MemRuntimeCheck, OrigTerm);
+ OrigTerm->eraseFromParent();
+
+ // The loops merge in the original exit block. This is now dominated by the
+ // memchecking block.
+ DT->changeImmediateDominator(OrigLoop->getExitBlock(), MemCheckBB);
+ }
+
+ /// \brief Adds the necessary PHI nodes for the versioned loops based on the
+ /// loop-defined values used outside of the loop.
+ void addPHINodes(const SmallVectorImpl<Instruction *> &DefsUsedOutside) {
+ BasicBlock *PHIBlock = OrigLoop->getExitBlock();
+ assert(PHIBlock && "No single successor to loop exit block");
+
+ for (auto *Inst : DefsUsedOutside) {
+ auto *NonDistInst = cast<Instruction>(VMap[Inst]);
+ PHINode *PN;
+
+ // First see if we have a single-operand PHI with the value defined by the
+ // original loop.
+ for (auto I = PHIBlock->begin(); (PN = dyn_cast<PHINode>(I)); ++I) {
+ assert(PN->getNumOperands() == 1 &&
+ "Exit block should only have on predecessor");
+ if (PN->getIncomingValue(0) == Inst)
+ break;
+ }
+ // If not create it.
+ if (!PN) {
+ PN = PHINode::Create(Inst->getType(), 2, Inst->getName() + ".ldist",
+ PHIBlock->begin());
+ for (auto *User : Inst->users())
+ if (!OrigLoop->contains(cast<Instruction>(User)->getParent()))
+ User->replaceUsesOfWith(Inst, PN);
+ PN->addIncoming(Inst, OrigLoop->getExitingBlock());
+ }
+ // Add the new incoming value from the non-distributed loop.
+ PN->addIncoming(NonDistInst, NonDistributedLoop->getExitingBlock());
+ }
+ }
+
+private:
+ /// \brief The original loop. This becomes the "versioned" one, i.e. control
+ /// goes if the memchecks all pass.
+ Loop *OrigLoop;
+ /// \brief The fall-back loop, i.e. if any of the memchecks fail.
+ Loop *NonDistributedLoop;
+
+ /// \brief For each memory pointer it contains the partitionId it is used in.
+ ///
+ /// The I-th entry corresponds to I-th entry in LAI.getRuntimePointerCheck().
+ /// If the pointer is used in multiple partitions the entry is set to -1.
+ SmallVector<int, 8> PtrToPartition;
+
+ /// \brief This maps the instructions from OrigLoop to their counterpart in
+ /// NonDistributedLoop.
+ ValueToValueMapTy VMap;
+
+ /// \brief Analyses used.
+ const LoopAccessInfo &LAI;
+ LoopInfo *LI;
+ DominatorTree *DT;
+};
+
+/// \brief Returns the instructions that use values defined in the loop.
+static SmallVector<Instruction *, 8> findDefsUsedOutsideOfLoop(Loop *L) {
+ SmallVector<Instruction *, 8> UsedOutside;
+
+ for (auto *Block : L->getBlocks())
+ // FIXME: I believe that this could use copy_if if the Inst reference could
+ // be adapted into a pointer.
+ for (auto &Inst : *Block) {
+ auto Users = Inst.users();
+ if (std::any_of(Users.begin(), Users.end(), [&](User *U) {
+ auto *Use = cast<Instruction>(U);
+ return !L->contains(Use->getParent());
+ }))
+ UsedOutside.push_back(&Inst);
+ }
+
+ return UsedOutside;
+}
+
+/// \brief The pass class.
+class LoopDistribute : public FunctionPass {
+public:
+ LoopDistribute() : FunctionPass(ID) {
+ initializeLoopDistributePass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ LAA = &getAnalysis<LoopAccessAnalysis>();
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+
+ // Build up a worklist of inner-loops to vectorize. This is necessary as the
+ // act of distributing a loop creates new loops and can invalidate iterators
+ // across the loops.
+ SmallVector<Loop *, 8> Worklist;
+
+ for (Loop *TopLevelLoop : *LI)
+ for (Loop *L : depth_first(TopLevelLoop))
+ // We only handle inner-most loops.
+ if (L->empty())
+ Worklist.push_back(L);
+
+ // Now walk the identified inner loops.
+ bool Changed = false;
+ for (Loop *L : Worklist)
+ Changed |= processLoop(L);
+
+ // Process each loop nest in the function.
+ return Changed;
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addRequired<LoopAccessAnalysis>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ }
+
+ static char ID;
+
+private:
+ /// \brief Try to distribute an inner-most loop.
+ bool processLoop(Loop *L) {
+ assert(L->empty() && "Only process inner loops.");
+
+ DEBUG(dbgs() << "\nLDist: In \"" << L->getHeader()->getParent()->getName()
+ << "\" checking " << *L << "\n");
+
+ BasicBlock *PH = L->getLoopPreheader();
+ if (!PH) {
+ DEBUG(dbgs() << "Skipping; no preheader");
+ return false;
+ }
+ if (!L->getExitBlock()) {
+ DEBUG(dbgs() << "Skipping; multiple exit blocks");
+ return false;
+ }
+ // LAA will check that we only have a single exiting block.
+
+ const LoopAccessInfo &LAI = LAA->getInfo(L, ValueToValueMap());
+
+ // Currently, we only distribute to isolate the part of the loop with
+ // dependence cycles to enable partial vectorization.
+ if (LAI.canVectorizeMemory()) {
+ DEBUG(dbgs() << "Skipping; memory operations are safe for vectorization");
+ return false;
+ }
+ auto *InterestingDependences =
+ LAI.getDepChecker().getInterestingDependences();
+ if (!InterestingDependences || InterestingDependences->empty()) {
+ DEBUG(dbgs() << "Skipping; No unsafe dependences to isolate");
+ return false;
+ }
+
+ InstPartitionContainer Partitions(L, LI, DT);
+
+ // First, go through each memory operation and assign them to consecutive
+ // partitions (the order of partitions follows program order). Put those
+ // with unsafe dependences into "cyclic" partition otherwise put each store
+ // in its own "non-cyclic" partition (we'll merge these later).
+ //
+ // Note that a memory operation (e.g. Load2 below) at a program point that
+ // has an unsafe dependence (Store3->Load1) spanning over it must be
+ // included in the same cyclic partition as the dependent operations. This
+ // is to preserve the original program order after distribution. E.g.:
+ //
+ // NumUnsafeDependencesStartOrEnd NumUnsafeDependencesActive
+ // Load1 -. 1 0->1
+ // Load2 | /Unsafe/ 0 1
+ // Store3 -' -1 1->0
+ // Load4 0 0
+ //
+ // NumUnsafeDependencesActive > 0 indicates this situation and in this case
+ // we just keep assigning to the same cyclic partition until
+ // NumUnsafeDependencesActive reaches 0.
+ const MemoryDepChecker &DepChecker = LAI.getDepChecker();
+ MemoryInstructionDependences MID(DepChecker.getMemoryInstructions(),
+ *InterestingDependences);
+
+ int NumUnsafeDependencesActive = 0;
+ for (auto &InstDep : MID) {
+ Instruction *I = InstDep.Inst;
+ // We update NumUnsafeDependencesActive post-instruction, catch the
+ // start of a dependence directly via NumUnsafeDependencesStartOrEnd.
+ if (NumUnsafeDependencesActive ||
+ InstDep.NumUnsafeDependencesStartOrEnd > 0)
+ Partitions.addToCyclicPartition(I);
+ else
+ Partitions.addToNewNonCyclicPartition(I);
+ NumUnsafeDependencesActive += InstDep.NumUnsafeDependencesStartOrEnd;
+ assert(NumUnsafeDependencesActive >= 0 &&
+ "Negative number of dependences active");
+ }
+
+ // Add partitions for values used outside. These partitions can be out of
+ // order from the original program order. This is OK because if the
+ // partition uses a load we will merge this partition with the original
+ // partition of the load that we set up in the previous loop (see
+ // mergeToAvoidDuplicatedLoads).
+ auto DefsUsedOutside = findDefsUsedOutsideOfLoop(L);
+ for (auto *Inst : DefsUsedOutside)
+ Partitions.addToNewNonCyclicPartition(Inst);
+
+ DEBUG(dbgs() << "Seeded partitions:\n" << Partitions);
+ if (Partitions.getSize() < 2)
+ return false;
+
+ // Run the merge heuristics: Merge non-cyclic adjacent partitions since we
+ // should be able to vectorize these together.
+ Partitions.mergeBeforePopulating();
+ DEBUG(dbgs() << "\nMerged partitions:\n" << Partitions);
+ if (Partitions.getSize() < 2)
+ return false;
+
+ // Now, populate the partitions with non-memory operations.
+ Partitions.populateUsedSet();
+ DEBUG(dbgs() << "\nPopulated partitions:\n" << Partitions);
+
+ // In order to preserve original lexical order for loads, keep them in the
+ // partition that we set up in the MemoryInstructionDependences loop.
+ if (Partitions.mergeToAvoidDuplicatedLoads()) {
+ DEBUG(dbgs() << "\nPartitions merged to ensure unique loads:\n"
+ << Partitions);
+ if (Partitions.getSize() < 2)
+ return false;
+ }
+
+ DEBUG(dbgs() << "\nDistributing loop: " << *L << "\n");
+ // We're done forming the partitions set up the reverse mapping from
+ // instructions to partitions.
+ Partitions.setupPartitionIdOnInstructions();
+
+ // To keep things simple have an empty preheader before we version or clone
+ // the loop. (Also split if this has no predecessor, i.e. entry, because we
+ // rely on PH having a predecessor.)
+ if (!PH->getSinglePredecessor() || &*PH->begin() != PH->getTerminator())
+ SplitBlock(PH, PH->getTerminator(), DT, LI);
+
+ // If we need run-time checks to disambiguate pointers are run-time, version
+ // the loop now.
+ RuntimeCheckEmitter RtCheckEmitter(LAI, L, LI, DT);
+ RtCheckEmitter.partitionPointers(Partitions);
+ if (RtCheckEmitter.needsRuntimeChecks()) {
+ RtCheckEmitter.versionLoop(this);
+ RtCheckEmitter.addPHINodes(DefsUsedOutside);
+ }
+
+ // Create identical copies of the original loop for each partition and hook
+ // them up sequentially.
+ Partitions.cloneLoops(this);
+
+ // Now, we remove the instruction from each loop that don't belong to that
+ // partition.
+ Partitions.removeUnusedInsts();
+ DEBUG(dbgs() << "\nAfter removing unused Instrs:\n");
+ DEBUG(Partitions.printBlocks());
+
+ if (LDistVerify) {
+ LI->verify();
+ DT->verifyDomTree();
+ }
+
+ ++NumLoopsDistributed;
+ return true;
+ }
+
+ // Analyses used.
+ LoopInfo *LI;
+ LoopAccessAnalysis *LAA;
+ DominatorTree *DT;
+};
+} // anonymous namespace
+
+char LoopDistribute::ID;
+static const char ldist_name[] = "Loop Distribition";
+
+INITIALIZE_PASS_BEGIN(LoopDistribute, LDIST_NAME, ldist_name, false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopAccessAnalysis)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(LoopDistribute, LDIST_NAME, ldist_name, false, false)
+
+namespace llvm {
+FunctionPass *createLoopDistributePass() { return new LoopDistribute(); }
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index a12f5a7..f92ecd4 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -47,6 +47,7 @@
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
@@ -56,7 +57,6 @@
#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -130,7 +130,6 @@ namespace {
class LoopIdiomRecognize : public LoopPass {
Loop *CurLoop;
- const DataLayout *DL;
DominatorTree *DT;
ScalarEvolution *SE;
TargetLibraryInfo *TLI;
@@ -139,7 +138,10 @@ namespace {
static char ID;
explicit LoopIdiomRecognize() : LoopPass(ID) {
initializeLoopIdiomRecognizePass(*PassRegistry::getPassRegistry());
- DL = nullptr; DT = nullptr; SE = nullptr; TLI = nullptr; TTI = nullptr;
+ DT = nullptr;
+ SE = nullptr;
+ TLI = nullptr;
+ TTI = nullptr;
}
bool runOnLoop(Loop *L, LPPassManager &LPM) override;
@@ -163,8 +165,8 @@ namespace {
/// loop preheaders be inserted into the CFG.
///
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<LoopInfo>();
- AU.addPreserved<LoopInfo>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
AU.addRequiredID(LoopSimplifyID);
AU.addPreservedID(LoopSimplifyID);
AU.addRequiredID(LCSSAID);
@@ -175,16 +177,8 @@ namespace {
AU.addPreserved<ScalarEvolution>();
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<TargetLibraryInfo>();
- AU.addRequired<TargetTransformInfo>();
- }
-
- const DataLayout *getDataLayout() {
- if (DL)
- return DL;
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
- return DL;
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
}
DominatorTree *getDominatorTree() {
@@ -197,11 +191,16 @@ namespace {
}
TargetLibraryInfo *getTargetLibraryInfo() {
- return TLI ? TLI : (TLI = &getAnalysis<TargetLibraryInfo>());
+ if (!TLI)
+ TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+
+ return TLI;
}
const TargetTransformInfo *getTargetTransformInfo() {
- return TTI ? TTI : (TTI = &getAnalysis<TargetTransformInfo>());
+ return TTI ? TTI
+ : (TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
+ *CurLoop->getHeader()->getParent()));
}
Loop *getLoop() const { return CurLoop; }
@@ -215,14 +214,14 @@ namespace {
char LoopIdiomRecognize::ID = 0;
INITIALIZE_PASS_BEGIN(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms",
false, false)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
-INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms",
false, false)
@@ -232,44 +231,13 @@ Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognize(); }
/// and zero out all the operands of this instruction. If any of them become
/// dead, delete them and the computation tree that feeds them.
///
-static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE,
+static void deleteDeadInstruction(Instruction *I,
const TargetLibraryInfo *TLI) {
- SmallVector<Instruction*, 32> NowDeadInsts;
-
- NowDeadInsts.push_back(I);
-
- // Before we touch this instruction, remove it from SE!
- do {
- Instruction *DeadInst = NowDeadInsts.pop_back_val();
-
- // This instruction is dead, zap it, in stages. Start by removing it from
- // SCEV.
- SE.forgetValue(DeadInst);
-
- for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) {
- Value *Op = DeadInst->getOperand(op);
- DeadInst->setOperand(op, nullptr);
-
- // If this operand just became dead, add it to the NowDeadInsts list.
- if (!Op->use_empty()) continue;
-
- if (Instruction *OpI = dyn_cast<Instruction>(Op))
- if (isInstructionTriviallyDead(OpI, TLI))
- NowDeadInsts.push_back(OpI);
- }
-
- DeadInst->eraseFromParent();
-
- } while (!NowDeadInsts.empty());
-}
-
-/// deleteIfDeadInstruction - If the specified value is a dead instruction,
-/// delete it and any recursively used instructions.
-static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE,
- const TargetLibraryInfo *TLI) {
- if (Instruction *I = dyn_cast<Instruction>(V))
- if (isInstructionTriviallyDead(I, TLI))
- deleteDeadInstruction(I, SE, TLI);
+ SmallVector<Value *, 16> Operands(I->value_op_begin(), I->value_op_end());
+ I->replaceAllUsesWith(UndefValue::get(I->getType()));
+ I->eraseFromParent();
+ for (Value *Op : Operands)
+ RecursivelyDeleteTriviallyDeadInstructions(Op, TLI);
}
//===----------------------------------------------------------------------===//
@@ -285,7 +253,7 @@ static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE,
// the concern of breaking data dependence.
bool LIRUtil::isAlmostEmpty(BasicBlock *BB) {
if (BranchInst *Br = getBranch(BB)) {
- return Br->isUnconditional() && BB->size() == 1;
+ return Br->isUnconditional() && Br == BB->begin();
}
return false;
}
@@ -542,7 +510,7 @@ void NclPopcountRecognize::transform(Instruction *CntInst,
cast<ICmpInst>(Builder.CreateICmp(PreCond->getPredicate(), Opnd0, Opnd1));
PreCond->replaceAllUsesWith(NewPreCond);
- deleteDeadInstruction(PreCond, *SE, TLI);
+ RecursivelyDeleteTriviallyDeadInstructions(PreCond, TLI);
}
// Step 3: Note that the population count is exactly the trip count of the
@@ -592,15 +560,7 @@ void NclPopcountRecognize::transform(Instruction *CntInst,
// Step 4: All the references to the original population counter outside
// the loop are replaced with the NewCount -- the value returned from
// __builtin_ctpop().
- {
- SmallVector<Value *, 4> CntUses;
- for (User *U : CntInst->users())
- if (cast<Instruction>(U)->getParent() != Body)
- CntUses.push_back(U);
- for (unsigned Idx = 0; Idx < CntUses.size(); Idx++) {
- (cast<Instruction>(CntUses[Idx]))->replaceUsesOfWith(CntInst, NewCount);
- }
- }
+ CntInst->replaceUsesOutsideBlock(NewCount, Body);
// step 5: Forget the "non-computable" trip-count SCEV associated with the
// loop. The loop would otherwise not be deleted even if it becomes empty.
@@ -651,7 +611,9 @@ bool NclPopcountRecognize::recognize() {
bool LoopIdiomRecognize::runOnCountableLoop() {
const SCEV *BECount = SE->getBackedgeTakenCount(CurLoop);
- if (isa<SCEVCouldNotCompute>(BECount)) return false;
+ assert(!isa<SCEVCouldNotCompute>(BECount) &&
+ "runOnCountableLoop() called on a loop without a predictable"
+ "backedge-taken count");
// If this loop executes exactly one time, then it should be peeled, not
// optimized by this pass.
@@ -659,15 +621,11 @@ bool LoopIdiomRecognize::runOnCountableLoop() {
if (BECst->getValue()->getValue() == 0)
return false;
- // We require target data for now.
- if (!getDataLayout())
- return false;
-
// set DT
(void)getDominatorTree();
- LoopInfo &LI = getAnalysis<LoopInfo>();
- TLI = &getAnalysis<TargetLibraryInfo>();
+ LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
// set TLI
(void)getTargetLibraryInfo();
@@ -681,13 +639,12 @@ bool LoopIdiomRecognize::runOnCountableLoop() {
bool MadeChange = false;
// Scan all the blocks in the loop that are not in subloops.
- for (Loop::block_iterator BI = CurLoop->block_begin(),
- E = CurLoop->block_end(); BI != E; ++BI) {
+ for (auto *BB : CurLoop->getBlocks()) {
// Ignore blocks in subloops.
- if (LI.getLoopFor(*BI) != CurLoop)
+ if (LI.getLoopFor(BB) != CurLoop)
continue;
- MadeChange |= runOnLoopBlock(*BI, BECount, ExitBlocks);
+ MadeChange |= runOnLoopBlock(BB, BECount, ExitBlocks);
}
return MadeChange;
}
@@ -776,7 +733,8 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
Value *StorePtr = SI->getPointerOperand();
// Reject stores that are so large that they overflow an unsigned.
- uint64_t SizeInBits = DL->getTypeSizeInBits(StoredVal->getType());
+ auto &DL = CurLoop->getHeader()->getModule()->getDataLayout();
+ uint64_t SizeInBits = DL.getTypeSizeInBits(StoredVal->getType());
if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)
return false;
@@ -951,7 +909,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
// but it can be turned into memset_pattern if the target supports it.
Value *SplatValue = isBytewiseValue(StoredVal);
Constant *PatternValue = nullptr;
-
+ auto &DL = CurLoop->getHeader()->getModule()->getDataLayout();
unsigned DestAS = DestPtr->getType()->getPointerAddressSpace();
// If we're allowed to form a memset, and the stored value would be acceptable
@@ -962,9 +920,8 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
CurLoop->isLoopInvariant(SplatValue)) {
// Keep and use SplatValue.
PatternValue = nullptr;
- } else if (DestAS == 0 &&
- TLI->has(LibFunc::memset_pattern16) &&
- (PatternValue = getMemSetPatternValue(StoredVal, *DL))) {
+ } else if (DestAS == 0 && TLI->has(LibFunc::memset_pattern16) &&
+ (PatternValue = getMemSetPatternValue(StoredVal, DL))) {
// Don't create memset_pattern16s with address spaces.
// It looks like we can use PatternValue!
SplatValue = nullptr;
@@ -979,7 +936,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
// header. This allows us to insert code for it in the preheader.
BasicBlock *Preheader = CurLoop->getLoopPreheader();
IRBuilder<> Builder(Preheader->getTerminator());
- SCEVExpander Expander(*SE, "loop-idiom");
+ SCEVExpander Expander(*SE, DL, "loop-idiom");
Type *DestInt8PtrTy = Builder.getInt8PtrTy(DestAS);
@@ -997,7 +954,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
StoreSize, getAnalysis<AliasAnalysis>(), TheStore)) {
Expander.clear();
// If we generated new code for the base pointer, clean up.
- deleteIfDeadInstruction(BasePtr, *SE, TLI);
+ RecursivelyDeleteTriviallyDeadInstructions(BasePtr, TLI);
return false;
}
@@ -1039,12 +996,12 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
// Otherwise we should form a memset_pattern16. PatternValue is known to be
// an constant array of 16-bytes. Plop the value into a mergable global.
GlobalVariable *GV = new GlobalVariable(*M, PatternValue->getType(), true,
- GlobalValue::InternalLinkage,
+ GlobalValue::PrivateLinkage,
PatternValue, ".memset_pattern");
GV->setUnnamedAddr(true); // Ok to merge these.
GV->setAlignment(16);
Value *PatternPtr = ConstantExpr::getBitCast(GV, Int8PtrTy);
- NewCall = Builder.CreateCall3(MSP, BasePtr, PatternPtr, NumBytes);
+ NewCall = Builder.CreateCall(MSP, {BasePtr, PatternPtr, NumBytes});
}
DEBUG(dbgs() << " Formed memset: " << *NewCall << "\n"
@@ -1053,7 +1010,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
// Okay, the memset has been formed. Zap the original store and anything that
// feeds into it.
- deleteDeadInstruction(TheStore, *SE, TLI);
+ deleteDeadInstruction(TheStore, TLI);
++NumMemSet;
return true;
}
@@ -1076,7 +1033,8 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
// header. This allows us to insert code for it in the preheader.
BasicBlock *Preheader = CurLoop->getLoopPreheader();
IRBuilder<> Builder(Preheader->getTerminator());
- SCEVExpander Expander(*SE, "loop-idiom");
+ const DataLayout &DL = Preheader->getModule()->getDataLayout();
+ SCEVExpander Expander(*SE, DL, "loop-idiom");
// Okay, we have a strided store "p[i]" of a loaded value. We can turn
// this into a memcpy in the loop preheader now if we want. However, this
@@ -1094,7 +1052,7 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
getAnalysis<AliasAnalysis>(), SI)) {
Expander.clear();
// If we generated new code for the base pointer, clean up.
- deleteIfDeadInstruction(StoreBasePtr, *SE, TLI);
+ RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI);
return false;
}
@@ -1109,8 +1067,8 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
StoreSize, getAnalysis<AliasAnalysis>(), SI)) {
Expander.clear();
// If we generated new code for the base pointer, clean up.
- deleteIfDeadInstruction(LoadBasePtr, *SE, TLI);
- deleteIfDeadInstruction(StoreBasePtr, *SE, TLI);
+ RecursivelyDeleteTriviallyDeadInstructions(LoadBasePtr, TLI);
+ RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI);
return false;
}
@@ -1143,7 +1101,7 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
// Okay, the memset has been formed. Zap the original store and anything that
// feeds into it.
- deleteDeadInstruction(SI, *SE, TLI);
+ deleteDeadInstruction(SI, TLI);
++NumMemCpy;
return true;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
index d664f85..e125026 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -23,7 +23,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -44,12 +44,12 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<LoopInfo>();
+ AU.addRequired<LoopInfoWrapperPass>();
AU.addRequiredID(LoopSimplifyID);
AU.addPreservedID(LoopSimplifyID);
AU.addPreservedID(LCSSAID);
AU.addPreserved<ScalarEvolution>();
- AU.addRequired<TargetLibraryInfo>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
}
};
}
@@ -58,9 +58,9 @@ char LoopInstSimplify::ID = 0;
INITIALIZE_PASS_BEGIN(LoopInstSimplify, "loop-instsimplify",
"Simplify instructions in loops", false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
INITIALIZE_PASS_END(LoopInstSimplify, "loop-instsimplify",
"Simplify instructions in loops", false, false)
@@ -76,10 +76,9 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
DominatorTreeWrapperPass *DTWP =
getAnalysisIfAvailable<DominatorTreeWrapperPass>();
DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
- LoopInfo *LI = &getAnalysis<LoopInfo>();
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
- const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
+ LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ const TargetLibraryInfo *TLI =
+ &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
*L->getHeader()->getParent());
@@ -109,6 +108,7 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
WorklistItem Item = VisitStack.pop_back_val();
BasicBlock *BB = Item.getPointer();
bool IsSubloopHeader = Item.getInt();
+ const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
// Simplify instructions in the current basic block.
for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
new file mode 100644
index 0000000..f584018
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -0,0 +1,1300 @@
+//===- LoopInterchange.cpp - Loop interchange pass------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This Pass handles loop interchange transform.
+// This pass interchanges loops to provide a more cache-friendly memory access
+// patterns.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/DependenceAnalysis.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopIterator.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "loop-interchange"
+
+namespace {
+
+typedef SmallVector<Loop *, 8> LoopVector;
+
+// TODO: Check if we can use a sparse matrix here.
+typedef std::vector<std::vector<char>> CharMatrix;
+
+// Maximum number of dependencies that can be handled in the dependency matrix.
+static const unsigned MaxMemInstrCount = 100;
+
+// Maximum loop depth supported.
+static const unsigned MaxLoopNestDepth = 10;
+
+struct LoopInterchange;
+
+#ifdef DUMP_DEP_MATRICIES
+void printDepMatrix(CharMatrix &DepMatrix) {
+ for (auto I = DepMatrix.begin(), E = DepMatrix.end(); I != E; ++I) {
+ std::vector<char> Vec = *I;
+ for (auto II = Vec.begin(), EE = Vec.end(); II != EE; ++II)
+ DEBUG(dbgs() << *II << " ");
+ DEBUG(dbgs() << "\n");
+ }
+}
+#endif
+
+static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
+ Loop *L, DependenceAnalysis *DA) {
+ typedef SmallVector<Value *, 16> ValueVector;
+ ValueVector MemInstr;
+
+ if (Level > MaxLoopNestDepth) {
+ DEBUG(dbgs() << "Cannot handle loops of depth greater than "
+ << MaxLoopNestDepth << "\n");
+ return false;
+ }
+
+ // For each block.
+ for (Loop::block_iterator BB = L->block_begin(), BE = L->block_end();
+ BB != BE; ++BB) {
+ // Scan the BB and collect legal loads and stores.
+ for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E;
+ ++I) {
+ Instruction *Ins = dyn_cast<Instruction>(I);
+ if (!Ins)
+ return false;
+ LoadInst *Ld = dyn_cast<LoadInst>(I);
+ StoreInst *St = dyn_cast<StoreInst>(I);
+ if (!St && !Ld)
+ continue;
+ if (Ld && !Ld->isSimple())
+ return false;
+ if (St && !St->isSimple())
+ return false;
+ MemInstr.push_back(I);
+ }
+ }
+
+ DEBUG(dbgs() << "Found " << MemInstr.size()
+ << " Loads and Stores to analyze\n");
+
+ ValueVector::iterator I, IE, J, JE;
+
+ for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) {
+ for (J = I, JE = MemInstr.end(); J != JE; ++J) {
+ std::vector<char> Dep;
+ Instruction *Src = dyn_cast<Instruction>(*I);
+ Instruction *Des = dyn_cast<Instruction>(*J);
+ if (Src == Des)
+ continue;
+ if (isa<LoadInst>(Src) && isa<LoadInst>(Des))
+ continue;
+ if (auto D = DA->depends(Src, Des, true)) {
+ DEBUG(dbgs() << "Found Dependency between Src=" << Src << " Des=" << Des
+ << "\n");
+ if (D->isFlow()) {
+ // TODO: Handle Flow dependence.Check if it is sufficient to populate
+ // the Dependence Matrix with the direction reversed.
+ DEBUG(dbgs() << "Flow dependence not handled");
+ return false;
+ }
+ if (D->isAnti()) {
+ DEBUG(dbgs() << "Found Anti dependence \n");
+ unsigned Levels = D->getLevels();
+ char Direction;
+ for (unsigned II = 1; II <= Levels; ++II) {
+ const SCEV *Distance = D->getDistance(II);
+ const SCEVConstant *SCEVConst =
+ dyn_cast_or_null<SCEVConstant>(Distance);
+ if (SCEVConst) {
+ const ConstantInt *CI = SCEVConst->getValue();
+ if (CI->isNegative())
+ Direction = '<';
+ else if (CI->isZero())
+ Direction = '=';
+ else
+ Direction = '>';
+ Dep.push_back(Direction);
+ } else if (D->isScalar(II)) {
+ Direction = 'S';
+ Dep.push_back(Direction);
+ } else {
+ unsigned Dir = D->getDirection(II);
+ if (Dir == Dependence::DVEntry::LT ||
+ Dir == Dependence::DVEntry::LE)
+ Direction = '<';
+ else if (Dir == Dependence::DVEntry::GT ||
+ Dir == Dependence::DVEntry::GE)
+ Direction = '>';
+ else if (Dir == Dependence::DVEntry::EQ)
+ Direction = '=';
+ else
+ Direction = '*';
+ Dep.push_back(Direction);
+ }
+ }
+ while (Dep.size() != Level) {
+ Dep.push_back('I');
+ }
+
+ DepMatrix.push_back(Dep);
+ if (DepMatrix.size() > MaxMemInstrCount) {
+ DEBUG(dbgs() << "Cannot handle more than " << MaxMemInstrCount
+ << " dependencies inside loop\n");
+ return false;
+ }
+ }
+ }
+ }
+ }
+
+ // We don't have a DepMatrix to check legality return false
+ if (DepMatrix.size() == 0)
+ return false;
+ return true;
+}
+
+// A loop is moved from index 'from' to an index 'to'. Update the Dependence
+// matrix by exchanging the two columns.
+static void interChangeDepedencies(CharMatrix &DepMatrix, unsigned FromIndx,
+ unsigned ToIndx) {
+ unsigned numRows = DepMatrix.size();
+ for (unsigned i = 0; i < numRows; ++i) {
+ char TmpVal = DepMatrix[i][ToIndx];
+ DepMatrix[i][ToIndx] = DepMatrix[i][FromIndx];
+ DepMatrix[i][FromIndx] = TmpVal;
+ }
+}
+
+// Checks if outermost non '=','S'or'I' dependence in the dependence matrix is
+// '>'
+static bool isOuterMostDepPositive(CharMatrix &DepMatrix, unsigned Row,
+ unsigned Column) {
+ for (unsigned i = 0; i <= Column; ++i) {
+ if (DepMatrix[Row][i] == '<')
+ return false;
+ if (DepMatrix[Row][i] == '>')
+ return true;
+ }
+ // All dependencies were '=','S' or 'I'
+ return false;
+}
+
+// Checks if no dependence exist in the dependency matrix in Row before Column.
+static bool containsNoDependence(CharMatrix &DepMatrix, unsigned Row,
+ unsigned Column) {
+ for (unsigned i = 0; i < Column; ++i) {
+ if (DepMatrix[Row][i] != '=' || DepMatrix[Row][i] != 'S' ||
+ DepMatrix[Row][i] != 'I')
+ return false;
+ }
+ return true;
+}
+
+static bool validDepInterchange(CharMatrix &DepMatrix, unsigned Row,
+ unsigned OuterLoopId, char InnerDep,
+ char OuterDep) {
+
+ if (isOuterMostDepPositive(DepMatrix, Row, OuterLoopId))
+ return false;
+
+ if (InnerDep == OuterDep)
+ return true;
+
+ // It is legal to interchange if and only if after interchange no row has a
+ // '>' direction as the leftmost non-'='.
+
+ if (InnerDep == '=' || InnerDep == 'S' || InnerDep == 'I')
+ return true;
+
+ if (InnerDep == '<')
+ return true;
+
+ if (InnerDep == '>') {
+ // If OuterLoopId represents outermost loop then interchanging will make the
+ // 1st dependency as '>'
+ if (OuterLoopId == 0)
+ return false;
+
+ // If all dependencies before OuterloopId are '=','S'or 'I'. Then
+ // interchanging will result in this row having an outermost non '='
+ // dependency of '>'
+ if (!containsNoDependence(DepMatrix, Row, OuterLoopId))
+ return true;
+ }
+
+ return false;
+}
+
+// Checks if it is legal to interchange 2 loops.
+// [Theorem] A permutation of the loops in a perfect nest is legal if and only
+// if
+// the direction matrix, after the same permutation is applied to its columns,
+// has no ">" direction as the leftmost non-"=" direction in any row.
+static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix,
+ unsigned InnerLoopId,
+ unsigned OuterLoopId) {
+
+ unsigned NumRows = DepMatrix.size();
+ // For each row check if it is valid to interchange.
+ for (unsigned Row = 0; Row < NumRows; ++Row) {
+ char InnerDep = DepMatrix[Row][InnerLoopId];
+ char OuterDep = DepMatrix[Row][OuterLoopId];
+ if (InnerDep == '*' || OuterDep == '*')
+ return false;
+ else if (!validDepInterchange(DepMatrix, Row, OuterLoopId, InnerDep,
+ OuterDep))
+ return false;
+ }
+ return true;
+}
+
+static void populateWorklist(Loop &L, SmallVector<LoopVector, 8> &V) {
+
+ DEBUG(dbgs() << "Calling populateWorklist called\n");
+ LoopVector LoopList;
+ Loop *CurrentLoop = &L;
+ std::vector<Loop *> vec = CurrentLoop->getSubLoopsVector();
+ while (vec.size() != 0) {
+ // The current loop has multiple subloops in it hence it is not tightly
+ // nested.
+ // Discard all loops above it added into Worklist.
+ if (vec.size() != 1) {
+ LoopList.clear();
+ return;
+ }
+ LoopList.push_back(CurrentLoop);
+ CurrentLoop = *(vec.begin());
+ vec = CurrentLoop->getSubLoopsVector();
+ }
+ LoopList.push_back(CurrentLoop);
+ V.push_back(LoopList);
+}
+
+static PHINode *getInductionVariable(Loop *L, ScalarEvolution *SE) {
+ PHINode *InnerIndexVar = L->getCanonicalInductionVariable();
+ if (InnerIndexVar)
+ return InnerIndexVar;
+ if (L->getLoopLatch() == nullptr || L->getLoopPredecessor() == nullptr)
+ return nullptr;
+ for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PhiVar = cast<PHINode>(I);
+ Type *PhiTy = PhiVar->getType();
+ if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
+ !PhiTy->isPointerTy())
+ return nullptr;
+ const SCEVAddRecExpr *AddRec =
+ dyn_cast<SCEVAddRecExpr>(SE->getSCEV(PhiVar));
+ if (!AddRec || !AddRec->isAffine())
+ continue;
+ const SCEV *Step = AddRec->getStepRecurrence(*SE);
+ const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
+ if (!C)
+ continue;
+ // Found the induction variable.
+ // FIXME: Handle loops with more than one induction variable. Note that,
+ // currently, legality makes sure we have only one induction variable.
+ return PhiVar;
+ }
+ return nullptr;
+}
+
+/// LoopInterchangeLegality checks if it is legal to interchange the loop.
+class LoopInterchangeLegality {
+public:
+ LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
+ LoopInterchange *Pass)
+ : OuterLoop(Outer), InnerLoop(Inner), SE(SE), CurrentPass(Pass),
+ InnerLoopHasReduction(false) {}
+
+ /// Check if the loops can be interchanged.
+ bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId,
+ CharMatrix &DepMatrix);
+ /// Check if the loop structure is understood. We do not handle triangular
+ /// loops for now.
+ bool isLoopStructureUnderstood(PHINode *InnerInductionVar);
+
+ bool currentLimitations();
+
+ bool hasInnerLoopReduction() { return InnerLoopHasReduction; }
+
+private:
+ bool tightlyNested(Loop *Outer, Loop *Inner);
+ bool containsUnsafeInstructionsInHeader(BasicBlock *BB);
+ bool areAllUsesReductions(Instruction *Ins, Loop *L);
+ bool containsUnsafeInstructionsInLatch(BasicBlock *BB);
+ bool findInductionAndReductions(Loop *L,
+ SmallVector<PHINode *, 8> &Inductions,
+ SmallVector<PHINode *, 8> &Reductions);
+ Loop *OuterLoop;
+ Loop *InnerLoop;
+
+ /// Scev analysis.
+ ScalarEvolution *SE;
+ LoopInterchange *CurrentPass;
+
+ bool InnerLoopHasReduction;
+};
+
+/// LoopInterchangeProfitability checks if it is profitable to interchange the
+/// loop.
+class LoopInterchangeProfitability {
+public:
+ LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE)
+ : OuterLoop(Outer), InnerLoop(Inner), SE(SE) {}
+
+ /// Check if the loop interchange is profitable
+ bool isProfitable(unsigned InnerLoopId, unsigned OuterLoopId,
+ CharMatrix &DepMatrix);
+
+private:
+ int getInstrOrderCost();
+
+ Loop *OuterLoop;
+ Loop *InnerLoop;
+
+ /// Scev analysis.
+ ScalarEvolution *SE;
+};
+
+/// LoopInterchangeTransform interchanges the loop
+class LoopInterchangeTransform {
+public:
+ LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
+ LoopInfo *LI, DominatorTree *DT,
+ LoopInterchange *Pass, BasicBlock *LoopNestExit,
+ bool InnerLoopContainsReductions)
+ : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT),
+ LoopExit(LoopNestExit),
+ InnerLoopHasReduction(InnerLoopContainsReductions) {}
+
+ /// Interchange OuterLoop and InnerLoop.
+ bool transform();
+ void restructureLoops(Loop *InnerLoop, Loop *OuterLoop);
+ void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop);
+
+private:
+ void splitInnerLoopLatch(Instruction *);
+ void splitOuterLoopLatch();
+ void splitInnerLoopHeader();
+ bool adjustLoopLinks();
+ void adjustLoopPreheaders();
+ void adjustOuterLoopPreheader();
+ void adjustInnerLoopPreheader();
+ bool adjustLoopBranches();
+ void updateIncomingBlock(BasicBlock *CurrBlock, BasicBlock *OldPred,
+ BasicBlock *NewPred);
+
+ Loop *OuterLoop;
+ Loop *InnerLoop;
+
+ /// Scev analysis.
+ ScalarEvolution *SE;
+ LoopInfo *LI;
+ DominatorTree *DT;
+ BasicBlock *LoopExit;
+ bool InnerLoopHasReduction;
+};
+
+// Main LoopInterchange Pass
+struct LoopInterchange : public FunctionPass {
+ static char ID;
+ ScalarEvolution *SE;
+ LoopInfo *LI;
+ DependenceAnalysis *DA;
+ DominatorTree *DT;
+ LoopInterchange()
+ : FunctionPass(ID), SE(nullptr), LI(nullptr), DA(nullptr), DT(nullptr) {
+ initializeLoopInterchangePass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<ScalarEvolution>();
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addRequired<DependenceAnalysis>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addRequiredID(LCSSAID);
+ }
+
+ bool runOnFunction(Function &F) override {
+ SE = &getAnalysis<ScalarEvolution>();
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ DA = &getAnalysis<DependenceAnalysis>();
+ auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ DT = DTWP ? &DTWP->getDomTree() : nullptr;
+ // Build up a worklist of loop pairs to analyze.
+ SmallVector<LoopVector, 8> Worklist;
+
+ for (Loop *L : *LI)
+ populateWorklist(*L, Worklist);
+
+ DEBUG(dbgs() << "Worklist size = " << Worklist.size() << "\n");
+ bool Changed = true;
+ while (!Worklist.empty()) {
+ LoopVector LoopList = Worklist.pop_back_val();
+ Changed = processLoopList(LoopList, F);
+ }
+ return Changed;
+ }
+
+ bool isComputableLoopNest(LoopVector LoopList) {
+ for (auto I = LoopList.begin(), E = LoopList.end(); I != E; ++I) {
+ Loop *L = *I;
+ const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L);
+ if (ExitCountOuter == SE->getCouldNotCompute()) {
+ DEBUG(dbgs() << "Couldn't compute Backedge count\n");
+ return false;
+ }
+ if (L->getNumBackEdges() != 1) {
+ DEBUG(dbgs() << "NumBackEdges is not equal to 1\n");
+ return false;
+ }
+ if (!L->getExitingBlock()) {
+ DEBUG(dbgs() << "Loop Doesn't have unique exit block\n");
+ return false;
+ }
+ }
+ return true;
+ }
+
+ unsigned selectLoopForInterchange(LoopVector LoopList) {
+ // TODO: Add a better heuristic to select the loop to be interchanged based
+ // on the dependece matrix. Currently we select the innermost loop.
+ return LoopList.size() - 1;
+ }
+
+ bool processLoopList(LoopVector LoopList, Function &F) {
+
+ bool Changed = false;
+ CharMatrix DependencyMatrix;
+ if (LoopList.size() < 2) {
+ DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n");
+ return false;
+ }
+ if (!isComputableLoopNest(LoopList)) {
+ DEBUG(dbgs() << "Not vaild loop candidate for interchange\n");
+ return false;
+ }
+ Loop *OuterMostLoop = *(LoopList.begin());
+
+ DEBUG(dbgs() << "Processing LoopList of size = " << LoopList.size()
+ << "\n");
+
+ if (!populateDependencyMatrix(DependencyMatrix, LoopList.size(),
+ OuterMostLoop, DA)) {
+ DEBUG(dbgs() << "Populating Dependency matrix failed\n");
+ return false;
+ }
+#ifdef DUMP_DEP_MATRICIES
+ DEBUG(dbgs() << "Dependence before inter change \n");
+ printDepMatrix(DependencyMatrix);
+#endif
+
+ BasicBlock *OuterMostLoopLatch = OuterMostLoop->getLoopLatch();
+ BranchInst *OuterMostLoopLatchBI =
+ dyn_cast<BranchInst>(OuterMostLoopLatch->getTerminator());
+ if (!OuterMostLoopLatchBI)
+ return false;
+
+ // Since we currently do not handle LCSSA PHI's any failure in loop
+ // condition will now branch to LoopNestExit.
+ // TODO: This should be removed once we handle LCSSA PHI nodes.
+
+ // Get the Outermost loop exit.
+ BasicBlock *LoopNestExit;
+ if (OuterMostLoopLatchBI->getSuccessor(0) == OuterMostLoop->getHeader())
+ LoopNestExit = OuterMostLoopLatchBI->getSuccessor(1);
+ else
+ LoopNestExit = OuterMostLoopLatchBI->getSuccessor(0);
+
+ if (isa<PHINode>(LoopNestExit->begin())) {
+ DEBUG(dbgs() << "PHI Nodes in loop nest exit is not handled for now "
+ "since on failure all loops branch to loop nest exit.\n");
+ return false;
+ }
+
+ unsigned SelecLoopId = selectLoopForInterchange(LoopList);
+ // Move the selected loop outwards to the best posible position.
+ for (unsigned i = SelecLoopId; i > 0; i--) {
+ bool Interchanged =
+ processLoop(LoopList, i, i - 1, LoopNestExit, DependencyMatrix);
+ if (!Interchanged)
+ return Changed;
+ // Loops interchanged reflect the same in LoopList
+ std::swap(LoopList[i - 1], LoopList[i]);
+
+ // Update the DependencyMatrix
+ interChangeDepedencies(DependencyMatrix, i, i - 1);
+ DT->recalculate(F);
+#ifdef DUMP_DEP_MATRICIES
+ DEBUG(dbgs() << "Dependence after inter change \n");
+ printDepMatrix(DependencyMatrix);
+#endif
+ Changed |= Interchanged;
+ }
+ return Changed;
+ }
+
+ bool processLoop(LoopVector LoopList, unsigned InnerLoopId,
+ unsigned OuterLoopId, BasicBlock *LoopNestExit,
+ std::vector<std::vector<char>> &DependencyMatrix) {
+
+ DEBUG(dbgs() << "Processing Innder Loop Id = " << InnerLoopId
+ << " and OuterLoopId = " << OuterLoopId << "\n");
+ Loop *InnerLoop = LoopList[InnerLoopId];
+ Loop *OuterLoop = LoopList[OuterLoopId];
+
+ LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, this);
+ if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
+ DEBUG(dbgs() << "Not interchanging Loops. Cannot prove legality\n");
+ return false;
+ }
+ DEBUG(dbgs() << "Loops are legal to interchange\n");
+ LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE);
+ if (!LIP.isProfitable(InnerLoopId, OuterLoopId, DependencyMatrix)) {
+ DEBUG(dbgs() << "Interchanging Loops not profitable\n");
+ return false;
+ }
+
+ LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, this,
+ LoopNestExit, LIL.hasInnerLoopReduction());
+ LIT.transform();
+ DEBUG(dbgs() << "Loops interchanged\n");
+ return true;
+ }
+};
+
+} // end of namespace
+bool LoopInterchangeLegality::areAllUsesReductions(Instruction *Ins, Loop *L) {
+ return !std::any_of(Ins->user_begin(), Ins->user_end(), [=](User *U) -> bool {
+ PHINode *UserIns = dyn_cast<PHINode>(U);
+ ReductionDescriptor RD;
+ return !UserIns || !ReductionDescriptor::isReductionPHI(UserIns, L, RD);
+ });
+}
+
+bool LoopInterchangeLegality::containsUnsafeInstructionsInHeader(
+ BasicBlock *BB) {
+ for (auto I = BB->begin(), E = BB->end(); I != E; ++I) {
+ // Load corresponding to reduction PHI's are safe while concluding if
+ // tightly nested.
+ if (LoadInst *L = dyn_cast<LoadInst>(I)) {
+ if (!areAllUsesReductions(L, InnerLoop))
+ return true;
+ } else if (I->mayHaveSideEffects() || I->mayReadFromMemory())
+ return true;
+ }
+ return false;
+}
+
+bool LoopInterchangeLegality::containsUnsafeInstructionsInLatch(
+ BasicBlock *BB) {
+ for (auto I = BB->begin(), E = BB->end(); I != E; ++I) {
+ // Stores corresponding to reductions are safe while concluding if tightly
+ // nested.
+ if (StoreInst *L = dyn_cast<StoreInst>(I)) {
+ PHINode *PHI = dyn_cast<PHINode>(L->getOperand(0));
+ if (!PHI)
+ return true;
+ } else if (I->mayHaveSideEffects() || I->mayReadFromMemory())
+ return true;
+ }
+ return false;
+}
+
+bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
+ BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
+ BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
+ BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
+
+ DEBUG(dbgs() << "Checking if Loops are Tightly Nested\n");
+
+ // A perfectly nested loop will not have any branch in between the outer and
+ // inner block i.e. outer header will branch to either inner preheader and
+ // outerloop latch.
+ BranchInst *outerLoopHeaderBI =
+ dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
+ if (!outerLoopHeaderBI)
+ return false;
+ unsigned num = outerLoopHeaderBI->getNumSuccessors();
+ for (unsigned i = 0; i < num; i++) {
+ if (outerLoopHeaderBI->getSuccessor(i) != InnerLoopPreHeader &&
+ outerLoopHeaderBI->getSuccessor(i) != OuterLoopLatch)
+ return false;
+ }
+
+ DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch \n");
+ // We do not have any basic block in between now make sure the outer header
+ // and outer loop latch doesnt contain any unsafe instructions.
+ if (containsUnsafeInstructionsInHeader(OuterLoopHeader) ||
+ containsUnsafeInstructionsInLatch(OuterLoopLatch))
+ return false;
+
+ DEBUG(dbgs() << "Loops are perfectly nested \n");
+ // We have a perfect loop nest.
+ return true;
+}
+
+
+bool LoopInterchangeLegality::isLoopStructureUnderstood(
+ PHINode *InnerInduction) {
+
+ unsigned Num = InnerInduction->getNumOperands();
+ BasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader();
+ for (unsigned i = 0; i < Num; ++i) {
+ Value *Val = InnerInduction->getOperand(i);
+ if (isa<Constant>(Val))
+ continue;
+ Instruction *I = dyn_cast<Instruction>(Val);
+ if (!I)
+ return false;
+ // TODO: Handle triangular loops.
+ // e.g. for(int i=0;i<N;i++)
+ // for(int j=i;j<N;j++)
+ unsigned IncomBlockIndx = PHINode::getIncomingValueNumForOperand(i);
+ if (InnerInduction->getIncomingBlock(IncomBlockIndx) ==
+ InnerLoopPreheader &&
+ !OuterLoop->isLoopInvariant(I)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool LoopInterchangeLegality::findInductionAndReductions(
+ Loop *L, SmallVector<PHINode *, 8> &Inductions,
+ SmallVector<PHINode *, 8> &Reductions) {
+ if (!L->getLoopLatch() || !L->getLoopPredecessor())
+ return false;
+ for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
+ ReductionDescriptor RD;
+ PHINode *PHI = cast<PHINode>(I);
+ ConstantInt *StepValue = nullptr;
+ if (isInductionPHI(PHI, SE, StepValue))
+ Inductions.push_back(PHI);
+ else if (ReductionDescriptor::isReductionPHI(PHI, L, RD))
+ Reductions.push_back(PHI);
+ else {
+ DEBUG(
+ dbgs() << "Failed to recognize PHI as an induction or reduction.\n");
+ return false;
+ }
+ }
+ return true;
+}
+
+static bool containsSafePHI(BasicBlock *Block, bool isOuterLoopExitBlock) {
+ for (auto I = Block->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PHI = cast<PHINode>(I);
+ // Reduction lcssa phi will have only 1 incoming block that from loop latch.
+ if (PHI->getNumIncomingValues() > 1)
+ return false;
+ Instruction *Ins = dyn_cast<Instruction>(PHI->getIncomingValue(0));
+ if (!Ins)
+ return false;
+ // Incoming value for lcssa phi's in outer loop exit can only be inner loop
+ // exits lcssa phi else it would not be tightly nested.
+ if (!isa<PHINode>(Ins) && isOuterLoopExitBlock)
+ return false;
+ }
+ return true;
+}
+
+static BasicBlock *getLoopLatchExitBlock(BasicBlock *LatchBlock,
+ BasicBlock *LoopHeader) {
+ if (BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator())) {
+ unsigned Num = BI->getNumSuccessors();
+ assert(Num == 2);
+ for (unsigned i = 0; i < Num; ++i) {
+ if (BI->getSuccessor(i) == LoopHeader)
+ continue;
+ return BI->getSuccessor(i);
+ }
+ }
+ return nullptr;
+}
+
+// This function indicates the current limitations in the transform as a result
+// of which we do not proceed.
+bool LoopInterchangeLegality::currentLimitations() {
+
+ BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
+ BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
+ BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
+ BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
+ BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
+
+ PHINode *InnerInductionVar;
+ SmallVector<PHINode *, 8> Inductions;
+ SmallVector<PHINode *, 8> Reductions;
+ if (!findInductionAndReductions(InnerLoop, Inductions, Reductions))
+ return true;
+
+ // TODO: Currently we handle only loops with 1 induction variable.
+ if (Inductions.size() != 1) {
+ DEBUG(dbgs() << "We currently only support loops with 1 induction variable."
+ << "Failed to interchange due to current limitation\n");
+ return true;
+ }
+ if (Reductions.size() > 0)
+ InnerLoopHasReduction = true;
+
+ InnerInductionVar = Inductions.pop_back_val();
+ Reductions.clear();
+ if (!findInductionAndReductions(OuterLoop, Inductions, Reductions))
+ return true;
+
+ // Outer loop cannot have reduction because then loops will not be tightly
+ // nested.
+ if (!Reductions.empty())
+ return true;
+ // TODO: Currently we handle only loops with 1 induction variable.
+ if (Inductions.size() != 1)
+ return true;
+
+ // TODO: Triangular loops are not handled for now.
+ if (!isLoopStructureUnderstood(InnerInductionVar)) {
+ DEBUG(dbgs() << "Loop structure not understood by pass\n");
+ return true;
+ }
+
+ // TODO: We only handle LCSSA PHI's corresponding to reduction for now.
+ BasicBlock *LoopExitBlock =
+ getLoopLatchExitBlock(OuterLoopLatch, OuterLoopHeader);
+ if (!LoopExitBlock || !containsSafePHI(LoopExitBlock, true))
+ return true;
+
+ LoopExitBlock = getLoopLatchExitBlock(InnerLoopLatch, InnerLoopHeader);
+ if (!LoopExitBlock || !containsSafePHI(LoopExitBlock, false))
+ return true;
+
+ // TODO: Current limitation: Since we split the inner loop latch at the point
+ // were induction variable is incremented (induction.next); We cannot have
+ // more than 1 user of induction.next since it would result in broken code
+ // after split.
+ // e.g.
+ // for(i=0;i<N;i++) {
+ // for(j = 0;j<M;j++) {
+ // A[j+1][i+2] = A[j][i]+k;
+ // }
+ // }
+ bool FoundInduction = false;
+ Instruction *InnerIndexVarInc = nullptr;
+ if (InnerInductionVar->getIncomingBlock(0) == InnerLoopPreHeader)
+ InnerIndexVarInc =
+ dyn_cast<Instruction>(InnerInductionVar->getIncomingValue(1));
+ else
+ InnerIndexVarInc =
+ dyn_cast<Instruction>(InnerInductionVar->getIncomingValue(0));
+
+ if (!InnerIndexVarInc)
+ return true;
+
+ // Since we split the inner loop latch on this induction variable. Make sure
+ // we do not have any instruction between the induction variable and branch
+ // instruction.
+
+ for (auto I = InnerLoopLatch->rbegin(), E = InnerLoopLatch->rend();
+ I != E && !FoundInduction; ++I) {
+ if (isa<BranchInst>(*I) || isa<CmpInst>(*I) || isa<TruncInst>(*I))
+ continue;
+ const Instruction &Ins = *I;
+ // We found an instruction. If this is not induction variable then it is not
+ // safe to split this loop latch.
+ if (!Ins.isIdenticalTo(InnerIndexVarInc))
+ return true;
+ else
+ FoundInduction = true;
+ }
+ // The loop latch ended and we didnt find the induction variable return as
+ // current limitation.
+ if (!FoundInduction)
+ return true;
+
+ return false;
+}
+
+bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
+ unsigned OuterLoopId,
+ CharMatrix &DepMatrix) {
+
+ if (!isLegalToInterChangeLoops(DepMatrix, InnerLoopId, OuterLoopId)) {
+ DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopId
+ << "and OuterLoopId = " << OuterLoopId
+ << "due to dependence\n");
+ return false;
+ }
+
+ // Create unique Preheaders if we already do not have one.
+ BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
+ BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
+
+ // Create a unique outer preheader -
+ // 1) If OuterLoop preheader is not present.
+ // 2) If OuterLoop Preheader is same as OuterLoop Header
+ // 3) If OuterLoop Preheader is same as Header of the previous loop.
+ // 4) If OuterLoop Preheader is Entry node.
+ if (!OuterLoopPreHeader || OuterLoopPreHeader == OuterLoop->getHeader() ||
+ isa<PHINode>(OuterLoopPreHeader->begin()) ||
+ !OuterLoopPreHeader->getUniquePredecessor()) {
+ OuterLoopPreHeader = InsertPreheaderForLoop(OuterLoop, CurrentPass);
+ }
+
+ if (!InnerLoopPreHeader || InnerLoopPreHeader == InnerLoop->getHeader() ||
+ InnerLoopPreHeader == OuterLoop->getHeader()) {
+ InnerLoopPreHeader = InsertPreheaderForLoop(InnerLoop, CurrentPass);
+ }
+
+ // TODO: The loops could not be interchanged due to current limitations in the
+ // transform module.
+ if (currentLimitations()) {
+ DEBUG(dbgs() << "Not legal because of current transform limitation\n");
+ return false;
+ }
+
+ // Check if the loops are tightly nested.
+ if (!tightlyNested(OuterLoop, InnerLoop)) {
+ DEBUG(dbgs() << "Loops not tightly nested\n");
+ return false;
+ }
+
+ return true;
+}
+
+int LoopInterchangeProfitability::getInstrOrderCost() {
+ unsigned GoodOrder, BadOrder;
+ BadOrder = GoodOrder = 0;
+ for (auto BI = InnerLoop->block_begin(), BE = InnerLoop->block_end();
+ BI != BE; ++BI) {
+ for (auto I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I) {
+ const Instruction &Ins = *I;
+ if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&Ins)) {
+ unsigned NumOp = GEP->getNumOperands();
+ bool FoundInnerInduction = false;
+ bool FoundOuterInduction = false;
+ for (unsigned i = 0; i < NumOp; ++i) {
+ const SCEV *OperandVal = SE->getSCEV(GEP->getOperand(i));
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OperandVal);
+ if (!AR)
+ continue;
+
+ // If we find the inner induction after an outer induction e.g.
+ // for(int i=0;i<N;i++)
+ // for(int j=0;j<N;j++)
+ // A[i][j] = A[i-1][j-1]+k;
+ // then it is a good order.
+ if (AR->getLoop() == InnerLoop) {
+ // We found an InnerLoop induction after OuterLoop induction. It is
+ // a good order.
+ FoundInnerInduction = true;
+ if (FoundOuterInduction) {
+ GoodOrder++;
+ break;
+ }
+ }
+ // If we find the outer induction after an inner induction e.g.
+ // for(int i=0;i<N;i++)
+ // for(int j=0;j<N;j++)
+ // A[j][i] = A[j-1][i-1]+k;
+ // then it is a bad order.
+ if (AR->getLoop() == OuterLoop) {
+ // We found an OuterLoop induction after InnerLoop induction. It is
+ // a bad order.
+ FoundOuterInduction = true;
+ if (FoundInnerInduction) {
+ BadOrder++;
+ break;
+ }
+ }
+ }
+ }
+ }
+ }
+ return GoodOrder - BadOrder;
+}
+
+static bool isProfitabileForVectorization(unsigned InnerLoopId,
+ unsigned OuterLoopId,
+ CharMatrix &DepMatrix) {
+ // TODO: Improve this heuristic to catch more cases.
+ // If the inner loop is loop independent or doesn't carry any dependency it is
+ // profitable to move this to outer position.
+ unsigned Row = DepMatrix.size();
+ for (unsigned i = 0; i < Row; ++i) {
+ if (DepMatrix[i][InnerLoopId] != 'S' && DepMatrix[i][InnerLoopId] != 'I')
+ return false;
+ // TODO: We need to improve this heuristic.
+ if (DepMatrix[i][OuterLoopId] != '=')
+ return false;
+ }
+ // If outer loop has dependence and inner loop is loop independent then it is
+ // profitable to interchange to enable parallelism.
+ return true;
+}
+
+bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId,
+ unsigned OuterLoopId,
+ CharMatrix &DepMatrix) {
+
+ // TODO: Add Better Profitibility checks.
+ // e.g
+ // 1) Construct dependency matrix and move the one with no loop carried dep
+ // inside to enable vectorization.
+
+ // This is rough cost estimation algorithm. It counts the good and bad order
+ // of induction variables in the instruction and allows reordering if number
+ // of bad orders is more than good.
+ int Cost = 0;
+ Cost += getInstrOrderCost();
+ DEBUG(dbgs() << "Cost = " << Cost << "\n");
+ if (Cost < 0)
+ return true;
+
+ // It is not profitable as per current cache profitibility model. But check if
+ // we can move this loop outside to improve parallelism.
+ bool ImprovesPar =
+ isProfitabileForVectorization(InnerLoopId, OuterLoopId, DepMatrix);
+ return ImprovesPar;
+}
+
+void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop,
+ Loop *InnerLoop) {
+ for (Loop::iterator I = OuterLoop->begin(), E = OuterLoop->end(); I != E;
+ ++I) {
+ if (*I == InnerLoop) {
+ OuterLoop->removeChildLoop(I);
+ return;
+ }
+ }
+ assert(false && "Couldn't find loop");
+}
+
+void LoopInterchangeTransform::restructureLoops(Loop *InnerLoop,
+ Loop *OuterLoop) {
+ Loop *OuterLoopParent = OuterLoop->getParentLoop();
+ if (OuterLoopParent) {
+ // Remove the loop from its parent loop.
+ removeChildLoop(OuterLoopParent, OuterLoop);
+ removeChildLoop(OuterLoop, InnerLoop);
+ OuterLoopParent->addChildLoop(InnerLoop);
+ } else {
+ removeChildLoop(OuterLoop, InnerLoop);
+ LI->changeTopLevelLoop(OuterLoop, InnerLoop);
+ }
+
+ while (!InnerLoop->empty())
+ OuterLoop->addChildLoop(InnerLoop->removeChildLoop(InnerLoop->begin()));
+
+ InnerLoop->addChildLoop(OuterLoop);
+}
+
+bool LoopInterchangeTransform::transform() {
+
+ DEBUG(dbgs() << "transform\n");
+ bool Transformed = false;
+ Instruction *InnerIndexVar;
+
+ if (InnerLoop->getSubLoops().size() == 0) {
+ BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
+ DEBUG(dbgs() << "Calling Split Inner Loop\n");
+ PHINode *InductionPHI = getInductionVariable(InnerLoop, SE);
+ if (!InductionPHI) {
+ DEBUG(dbgs() << "Failed to find the point to split loop latch \n");
+ return false;
+ }
+
+ if (InductionPHI->getIncomingBlock(0) == InnerLoopPreHeader)
+ InnerIndexVar = dyn_cast<Instruction>(InductionPHI->getIncomingValue(1));
+ else
+ InnerIndexVar = dyn_cast<Instruction>(InductionPHI->getIncomingValue(0));
+
+ //
+ // Split at the place were the induction variable is
+ // incremented/decremented.
+ // TODO: This splitting logic may not work always. Fix this.
+ splitInnerLoopLatch(InnerIndexVar);
+ DEBUG(dbgs() << "splitInnerLoopLatch Done\n");
+
+ // Splits the inner loops phi nodes out into a seperate basic block.
+ splitInnerLoopHeader();
+ DEBUG(dbgs() << "splitInnerLoopHeader Done\n");
+ }
+
+ Transformed |= adjustLoopLinks();
+ if (!Transformed) {
+ DEBUG(dbgs() << "adjustLoopLinks Failed\n");
+ return false;
+ }
+
+ restructureLoops(InnerLoop, OuterLoop);
+ return true;
+}
+
+void LoopInterchangeTransform::splitInnerLoopLatch(Instruction *Inc) {
+ BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
+ BasicBlock *InnerLoopLatchPred = InnerLoopLatch;
+ InnerLoopLatch = SplitBlock(InnerLoopLatchPred, Inc, DT, LI);
+}
+
+void LoopInterchangeTransform::splitOuterLoopLatch() {
+ BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
+ BasicBlock *OuterLatchLcssaPhiBlock = OuterLoopLatch;
+ OuterLoopLatch = SplitBlock(OuterLatchLcssaPhiBlock,
+ OuterLoopLatch->getFirstNonPHI(), DT, LI);
+}
+
+void LoopInterchangeTransform::splitInnerLoopHeader() {
+
+ // Split the inner loop header out. Here make sure that the reduction PHI's
+ // stay in the innerloop body.
+ BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
+ BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
+ if (InnerLoopHasReduction) {
+ // FIXME: Check if the induction PHI will always be the first PHI.
+ BasicBlock *New = InnerLoopHeader->splitBasicBlock(
+ ++(InnerLoopHeader->begin()), InnerLoopHeader->getName() + ".split");
+ if (LI)
+ if (Loop *L = LI->getLoopFor(InnerLoopHeader))
+ L->addBasicBlockToLoop(New, *LI);
+
+ // Adjust Reduction PHI's in the block.
+ SmallVector<PHINode *, 8> PHIVec;
+ for (auto I = New->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PHI = dyn_cast<PHINode>(I);
+ Value *V = PHI->getIncomingValueForBlock(InnerLoopPreHeader);
+ PHI->replaceAllUsesWith(V);
+ PHIVec.push_back((PHI));
+ }
+ for (auto I = PHIVec.begin(), E = PHIVec.end(); I != E; ++I) {
+ PHINode *P = *I;
+ P->eraseFromParent();
+ }
+ } else {
+ SplitBlock(InnerLoopHeader, InnerLoopHeader->getFirstNonPHI(), DT, LI);
+ }
+
+ DEBUG(dbgs() << "Output of splitInnerLoopHeader InnerLoopHeaderSucc & "
+ "InnerLoopHeader \n");
+}
+
+/// \brief Move all instructions except the terminator from FromBB right before
+/// InsertBefore
+static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) {
+ auto &ToList = InsertBefore->getParent()->getInstList();
+ auto &FromList = FromBB->getInstList();
+
+ ToList.splice(InsertBefore, FromList, FromList.begin(),
+ FromBB->getTerminator());
+}
+
+void LoopInterchangeTransform::adjustOuterLoopPreheader() {
+ BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
+ BasicBlock *InnerPreHeader = InnerLoop->getLoopPreheader();
+
+ moveBBContents(OuterLoopPreHeader, InnerPreHeader->getTerminator());
+}
+
+void LoopInterchangeTransform::adjustInnerLoopPreheader() {
+ BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
+ BasicBlock *OuterHeader = OuterLoop->getHeader();
+
+ moveBBContents(InnerLoopPreHeader, OuterHeader->getTerminator());
+}
+
+void LoopInterchangeTransform::updateIncomingBlock(BasicBlock *CurrBlock,
+ BasicBlock *OldPred,
+ BasicBlock *NewPred) {
+ for (auto I = CurrBlock->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PHI = cast<PHINode>(I);
+ unsigned Num = PHI->getNumIncomingValues();
+ for (unsigned i = 0; i < Num; ++i) {
+ if (PHI->getIncomingBlock(i) == OldPred)
+ PHI->setIncomingBlock(i, NewPred);
+ }
+ }
+}
+
+bool LoopInterchangeTransform::adjustLoopBranches() {
+
+ DEBUG(dbgs() << "adjustLoopBranches called\n");
+ // Adjust the loop preheader
+ BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
+ BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
+ BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
+ BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
+ BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
+ BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
+ BasicBlock *OuterLoopPredecessor = OuterLoopPreHeader->getUniquePredecessor();
+ BasicBlock *InnerLoopLatchPredecessor =
+ InnerLoopLatch->getUniquePredecessor();
+ BasicBlock *InnerLoopLatchSuccessor;
+ BasicBlock *OuterLoopLatchSuccessor;
+
+ BranchInst *OuterLoopLatchBI =
+ dyn_cast<BranchInst>(OuterLoopLatch->getTerminator());
+ BranchInst *InnerLoopLatchBI =
+ dyn_cast<BranchInst>(InnerLoopLatch->getTerminator());
+ BranchInst *OuterLoopHeaderBI =
+ dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
+ BranchInst *InnerLoopHeaderBI =
+ dyn_cast<BranchInst>(InnerLoopHeader->getTerminator());
+
+ if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor ||
+ !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI ||
+ !InnerLoopHeaderBI)
+ return false;
+
+ BranchInst *InnerLoopLatchPredecessorBI =
+ dyn_cast<BranchInst>(InnerLoopLatchPredecessor->getTerminator());
+ BranchInst *OuterLoopPredecessorBI =
+ dyn_cast<BranchInst>(OuterLoopPredecessor->getTerminator());
+
+ if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI)
+ return false;
+ BasicBlock *InnerLoopHeaderSucessor = InnerLoopHeader->getUniqueSuccessor();
+ if (!InnerLoopHeaderSucessor)
+ return false;
+
+ // Adjust Loop Preheader and headers
+
+ unsigned NumSucc = OuterLoopPredecessorBI->getNumSuccessors();
+ for (unsigned i = 0; i < NumSucc; ++i) {
+ if (OuterLoopPredecessorBI->getSuccessor(i) == OuterLoopPreHeader)
+ OuterLoopPredecessorBI->setSuccessor(i, InnerLoopPreHeader);
+ }
+
+ NumSucc = OuterLoopHeaderBI->getNumSuccessors();
+ for (unsigned i = 0; i < NumSucc; ++i) {
+ if (OuterLoopHeaderBI->getSuccessor(i) == OuterLoopLatch)
+ OuterLoopHeaderBI->setSuccessor(i, LoopExit);
+ else if (OuterLoopHeaderBI->getSuccessor(i) == InnerLoopPreHeader)
+ OuterLoopHeaderBI->setSuccessor(i, InnerLoopHeaderSucessor);
+ }
+
+ // Adjust reduction PHI's now that the incoming block has changed.
+ updateIncomingBlock(InnerLoopHeaderSucessor, InnerLoopHeader,
+ OuterLoopHeader);
+
+ BranchInst::Create(OuterLoopPreHeader, InnerLoopHeaderBI);
+ InnerLoopHeaderBI->eraseFromParent();
+
+ // -------------Adjust loop latches-----------
+ if (InnerLoopLatchBI->getSuccessor(0) == InnerLoopHeader)
+ InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(1);
+ else
+ InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(0);
+
+ NumSucc = InnerLoopLatchPredecessorBI->getNumSuccessors();
+ for (unsigned i = 0; i < NumSucc; ++i) {
+ if (InnerLoopLatchPredecessorBI->getSuccessor(i) == InnerLoopLatch)
+ InnerLoopLatchPredecessorBI->setSuccessor(i, InnerLoopLatchSuccessor);
+ }
+
+ // Adjust PHI nodes in InnerLoopLatchSuccessor. Update all uses of PHI with
+ // the value and remove this PHI node from inner loop.
+ SmallVector<PHINode *, 8> LcssaVec;
+ for (auto I = InnerLoopLatchSuccessor->begin(); isa<PHINode>(I); ++I) {
+ PHINode *LcssaPhi = cast<PHINode>(I);
+ LcssaVec.push_back(LcssaPhi);
+ }
+ for (auto I = LcssaVec.begin(), E = LcssaVec.end(); I != E; ++I) {
+ PHINode *P = *I;
+ Value *Incoming = P->getIncomingValueForBlock(InnerLoopLatch);
+ P->replaceAllUsesWith(Incoming);
+ P->eraseFromParent();
+ }
+
+ if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopHeader)
+ OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(1);
+ else
+ OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(0);
+
+ if (InnerLoopLatchBI->getSuccessor(1) == InnerLoopLatchSuccessor)
+ InnerLoopLatchBI->setSuccessor(1, OuterLoopLatchSuccessor);
+ else
+ InnerLoopLatchBI->setSuccessor(0, OuterLoopLatchSuccessor);
+
+ updateIncomingBlock(OuterLoopLatchSuccessor, OuterLoopLatch, InnerLoopLatch);
+
+ if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopLatchSuccessor) {
+ OuterLoopLatchBI->setSuccessor(0, InnerLoopLatch);
+ } else {
+ OuterLoopLatchBI->setSuccessor(1, InnerLoopLatch);
+ }
+
+ return true;
+}
+void LoopInterchangeTransform::adjustLoopPreheaders() {
+
+ // We have interchanged the preheaders so we need to interchange the data in
+ // the preheader as well.
+ // This is because the content of inner preheader was previously executed
+ // inside the outer loop.
+ BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
+ BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
+ BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
+ BranchInst *InnerTermBI =
+ cast<BranchInst>(InnerLoopPreHeader->getTerminator());
+
+ // These instructions should now be executed inside the loop.
+ // Move instruction into a new block after outer header.
+ moveBBContents(InnerLoopPreHeader, OuterLoopHeader->getTerminator());
+ // These instructions were not executed previously in the loop so move them to
+ // the older inner loop preheader.
+ moveBBContents(OuterLoopPreHeader, InnerTermBI);
+}
+
+bool LoopInterchangeTransform::adjustLoopLinks() {
+
+ // Adjust all branches in the inner and outer loop.
+ bool Changed = adjustLoopBranches();
+ if (Changed)
+ adjustLoopPreheaders();
+ return Changed;
+}
+
+char LoopInterchange::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopInterchange, "loop-interchange",
+ "Interchanges loops for cache reuse", false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(DependenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+
+INITIALIZE_PASS_END(LoopInterchange, "loop-interchange",
+ "Interchanges loops for cache reuse", false, false)
+
+Pass *llvm::createLoopInterchangePass() { return new LoopInterchange(); }
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
index 8f12204..ed103e6 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
@@ -12,7 +12,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -21,6 +23,7 @@
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
@@ -28,7 +31,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
@@ -43,6 +45,12 @@ static cl::opt<unsigned>
MaxInc("max-reroll-increment", cl::init(2048), cl::Hidden,
cl::desc("The maximum increment for loop rerolling"));
+static cl::opt<unsigned>
+NumToleratedFailedMatches("reroll-num-tolerated-failed-matches", cl::init(400),
+ cl::Hidden,
+ cl::desc("The maximum number of failures to tolerate"
+ " during fuzzy matching. (default: 400)"));
+
// This loop re-rolling transformation aims to transform loops like this:
//
// int foo(int a);
@@ -119,6 +127,16 @@ MaxInc("max-reroll-increment", cl::init(2048), cl::Hidden,
// br %cmp, header, exit
namespace {
+ enum IterationLimits {
+ /// The maximum number of iterations that we'll try and reroll. This
+ /// has to be less than 25 in order to fit into a SmallBitVector.
+ IL_MaxRerollIterations = 16,
+ /// The bitvector index used by loop induction variables and other
+ /// instructions that belong to all iterations.
+ IL_All,
+ IL_End
+ };
+
class LoopReroll : public LoopPass {
public:
static char ID; // Pass ID, replacement for typeid
@@ -130,19 +148,18 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AliasAnalysis>();
- AU.addRequired<LoopInfo>();
- AU.addPreserved<LoopInfo>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addRequired<ScalarEvolution>();
- AU.addRequired<TargetLibraryInfo>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
}
-protected:
+ protected:
AliasAnalysis *AA;
LoopInfo *LI;
ScalarEvolution *SE;
- const DataLayout *DL;
TargetLibraryInfo *TLI;
DominatorTree *DT;
@@ -311,26 +328,113 @@ protected:
DenseSet<int> Reds;
};
+ // A DAGRootSet models an induction variable being used in a rerollable
+ // loop. For example,
+ //
+ // x[i*3+0] = y1
+ // x[i*3+1] = y2
+ // x[i*3+2] = y3
+ //
+ // Base instruction -> i*3
+ // +---+----+
+ // / | \
+ // ST[y1] +1 +2 <-- Roots
+ // | |
+ // ST[y2] ST[y3]
+ //
+ // There may be multiple DAGRoots, for example:
+ //
+ // x[i*2+0] = ... (1)
+ // x[i*2+1] = ... (1)
+ // x[i*2+4] = ... (2)
+ // x[i*2+5] = ... (2)
+ // x[(i+1234)*2+5678] = ... (3)
+ // x[(i+1234)*2+5679] = ... (3)
+ //
+ // The loop will be rerolled by adding a new loop induction variable,
+ // one for the Base instruction in each DAGRootSet.
+ //
+ struct DAGRootSet {
+ Instruction *BaseInst;
+ SmallInstructionVector Roots;
+ // The instructions between IV and BaseInst (but not including BaseInst).
+ SmallInstructionSet SubsumedInsts;
+ };
+
+ // The set of all DAG roots, and state tracking of all roots
+ // for a particular induction variable.
+ struct DAGRootTracker {
+ DAGRootTracker(LoopReroll *Parent, Loop *L, Instruction *IV,
+ ScalarEvolution *SE, AliasAnalysis *AA,
+ TargetLibraryInfo *TLI)
+ : Parent(Parent), L(L), SE(SE), AA(AA), TLI(TLI), IV(IV) {}
+
+ /// Stage 1: Find all the DAG roots for the induction variable.
+ bool findRoots();
+ /// Stage 2: Validate if the found roots are valid.
+ bool validate(ReductionTracker &Reductions);
+ /// Stage 3: Assuming validate() returned true, perform the
+ /// replacement.
+ /// @param IterCount The maximum iteration count of L.
+ void replace(const SCEV *IterCount);
+
+ protected:
+ typedef MapVector<Instruction*, SmallBitVector> UsesTy;
+
+ bool findRootsRecursive(Instruction *IVU,
+ SmallInstructionSet SubsumedInsts);
+ bool findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts);
+ bool collectPossibleRoots(Instruction *Base,
+ std::map<int64_t,Instruction*> &Roots);
+
+ bool collectUsedInstructions(SmallInstructionSet &PossibleRedSet);
+ void collectInLoopUserSet(const SmallInstructionVector &Roots,
+ const SmallInstructionSet &Exclude,
+ const SmallInstructionSet &Final,
+ DenseSet<Instruction *> &Users);
+ void collectInLoopUserSet(Instruction *Root,
+ const SmallInstructionSet &Exclude,
+ const SmallInstructionSet &Final,
+ DenseSet<Instruction *> &Users);
+
+ UsesTy::iterator nextInstr(int Val, UsesTy &In,
+ const SmallInstructionSet &Exclude,
+ UsesTy::iterator *StartI=nullptr);
+ bool isBaseInst(Instruction *I);
+ bool isRootInst(Instruction *I);
+ bool instrDependsOn(Instruction *I,
+ UsesTy::iterator Start,
+ UsesTy::iterator End);
+
+ LoopReroll *Parent;
+
+ // Members of Parent, replicated here for brevity.
+ Loop *L;
+ ScalarEvolution *SE;
+ AliasAnalysis *AA;
+ TargetLibraryInfo *TLI;
+
+ // The loop induction variable.
+ Instruction *IV;
+ // Loop step amount.
+ uint64_t Inc;
+ // Loop reroll count; if Inc == 1, this records the scaling applied
+ // to the indvar: a[i*2+0] = ...; a[i*2+1] = ... ;
+ // If Inc is not 1, Scale = Inc.
+ uint64_t Scale;
+ // The roots themselves.
+ SmallVector<DAGRootSet,16> RootSets;
+ // All increment instructions for IV.
+ SmallInstructionVector LoopIncs;
+ // Map of all instructions in the loop (in order) to the iterations
+ // they are used in (or specially, IL_All for instructions
+ // used in the loop increment mechanism).
+ UsesTy Uses;
+ };
+
void collectPossibleIVs(Loop *L, SmallInstructionVector &PossibleIVs);
void collectPossibleReductions(Loop *L,
ReductionTracker &Reductions);
- void collectInLoopUserSet(Loop *L,
- const SmallInstructionVector &Roots,
- const SmallInstructionSet &Exclude,
- const SmallInstructionSet &Final,
- DenseSet<Instruction *> &Users);
- void collectInLoopUserSet(Loop *L,
- Instruction * Root,
- const SmallInstructionSet &Exclude,
- const SmallInstructionSet &Final,
- DenseSet<Instruction *> &Users);
- bool findScaleFromMul(Instruction *RealIV, uint64_t &Scale,
- Instruction *&IV,
- SmallInstructionVector &LoopIncs);
- bool collectAllRoots(Loop *L, uint64_t Inc, uint64_t Scale, Instruction *IV,
- SmallVector<SmallInstructionVector, 32> &Roots,
- SmallInstructionSet &AllRoots,
- SmallInstructionVector &LoopIncs);
bool reroll(Instruction *IV, Loop *L, BasicBlock *Header, const SCEV *IterCount,
ReductionTracker &Reductions);
};
@@ -339,10 +443,10 @@ protected:
char LoopReroll::ID = 0;
INITIALIZE_PASS_BEGIN(LoopReroll, "loop-reroll", "Reroll loops", false, false)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(LoopReroll, "loop-reroll", "Reroll loops", false, false)
Pass *llvm::createLoopRerollPass() {
@@ -353,10 +457,10 @@ Pass *llvm::createLoopRerollPass() {
// This operates like Instruction::isUsedOutsideOfBlock, but considers PHIs in
// non-loop blocks to be outside the loop.
static bool hasUsesOutsideLoop(Instruction *I, Loop *L) {
- for (User *U : I->users())
+ for (User *U : I->users()) {
if (!L->contains(cast<Instruction>(U)))
return true;
-
+ }
return false;
}
@@ -403,6 +507,8 @@ void LoopReroll::SimpleLoopReduction::add(Loop *L) {
// (including the PHI), except for the last value (which is used by the PHI
// and also outside the loop).
Instruction *C = Instructions.front();
+ if (C->user_empty())
+ return;
do {
C = cast<Instruction>(*C->user_begin());
@@ -424,11 +530,12 @@ void LoopReroll::SimpleLoopReduction::add(Loop *L) {
return;
// C is now the (potential) last instruction in the reduction chain.
- for (User *U : C->users())
+ for (User *U : C->users()) {
// The only in-loop user can be the initial PHI.
if (L->contains(cast<Instruction>(U)))
if (cast<Instruction>(U) != Instructions.front())
return;
+ }
Instructions.push_back(C);
Valid = true;
@@ -467,7 +574,7 @@ void LoopReroll::collectPossibleReductions(Loop *L,
// if they are users, but their users are not added. This is used, for
// example, to prevent a reduction update from forcing all later reduction
// updates into the use set.
-void LoopReroll::collectInLoopUserSet(Loop *L,
+void LoopReroll::DAGRootTracker::collectInLoopUserSet(
Instruction *Root, const SmallInstructionSet &Exclude,
const SmallInstructionSet &Final,
DenseSet<Instruction *> &Users) {
@@ -504,14 +611,14 @@ void LoopReroll::collectInLoopUserSet(Loop *L,
// Collect all of the users of all of the provided root instructions (combined
// into a single set).
-void LoopReroll::collectInLoopUserSet(Loop *L,
+void LoopReroll::DAGRootTracker::collectInLoopUserSet(
const SmallInstructionVector &Roots,
const SmallInstructionSet &Exclude,
const SmallInstructionSet &Final,
DenseSet<Instruction *> &Users) {
for (SmallInstructionVector::const_iterator I = Roots.begin(),
IE = Roots.end(); I != IE; ++I)
- collectInLoopUserSet(L, *I, Exclude, Final, Users);
+ collectInLoopUserSet(*I, Exclude, Final, Users);
}
static bool isSimpleLoadStore(Instruction *I) {
@@ -524,130 +631,699 @@ static bool isSimpleLoadStore(Instruction *I) {
return false;
}
-// Recognize loops that are setup like this:
-//
-// %iv = phi [ (preheader, ...), (body, %iv.next) ]
-// %scaled.iv = mul %iv, scale
-// f(%scaled.iv)
-// %scaled.iv.1 = add %scaled.iv, 1
-// f(%scaled.iv.1)
-// %scaled.iv.2 = add %scaled.iv, 2
-// f(%scaled.iv.2)
-// %scaled.iv.scale_m_1 = add %scaled.iv, scale-1
-// f(%scaled.iv.scale_m_1)
-// ...
-// %iv.next = add %iv, 1
-// %cmp = icmp(%iv, ...)
-// br %cmp, header, exit
-//
-// and, if found, set IV = %scaled.iv, and add %iv.next to LoopIncs.
-bool LoopReroll::findScaleFromMul(Instruction *RealIV, uint64_t &Scale,
- Instruction *&IV,
- SmallInstructionVector &LoopIncs) {
- // This is a special case: here we're looking for all uses (except for
- // the increment) to be multiplied by a common factor. The increment must
- // be by one. This is to capture loops like:
- // for (int i = 0; i < 500; ++i) {
- // foo(3*i); foo(3*i+1); foo(3*i+2);
- // }
- if (RealIV->getNumUses() != 2)
+/// Return true if IVU is a "simple" arithmetic operation.
+/// This is used for narrowing the search space for DAGRoots; only arithmetic
+/// and GEPs can be part of a DAGRoot.
+static bool isSimpleArithmeticOp(User *IVU) {
+ if (Instruction *I = dyn_cast<Instruction>(IVU)) {
+ switch (I->getOpcode()) {
+ default: return false;
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::Shl:
+ case Instruction::AShr:
+ case Instruction::LShr:
+ case Instruction::GetElementPtr:
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ return true;
+ }
+ }
+ return false;
+}
+
+static bool isLoopIncrement(User *U, Instruction *IV) {
+ BinaryOperator *BO = dyn_cast<BinaryOperator>(U);
+ if (!BO || BO->getOpcode() != Instruction::Add)
+ return false;
+
+ for (auto *UU : BO->users()) {
+ PHINode *PN = dyn_cast<PHINode>(UU);
+ if (PN && PN == IV)
+ return true;
+ }
+ return false;
+}
+
+bool LoopReroll::DAGRootTracker::
+collectPossibleRoots(Instruction *Base, std::map<int64_t,Instruction*> &Roots) {
+ SmallInstructionVector BaseUsers;
+
+ for (auto *I : Base->users()) {
+ ConstantInt *CI = nullptr;
+
+ if (isLoopIncrement(I, IV)) {
+ LoopIncs.push_back(cast<Instruction>(I));
+ continue;
+ }
+
+ // The root nodes must be either GEPs, ORs or ADDs.
+ if (auto *BO = dyn_cast<BinaryOperator>(I)) {
+ if (BO->getOpcode() == Instruction::Add ||
+ BO->getOpcode() == Instruction::Or)
+ CI = dyn_cast<ConstantInt>(BO->getOperand(1));
+ } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
+ Value *LastOperand = GEP->getOperand(GEP->getNumOperands()-1);
+ CI = dyn_cast<ConstantInt>(LastOperand);
+ }
+
+ if (!CI) {
+ if (Instruction *II = dyn_cast<Instruction>(I)) {
+ BaseUsers.push_back(II);
+ continue;
+ } else {
+ DEBUG(dbgs() << "LRR: Aborting due to non-instruction: " << *I << "\n");
+ return false;
+ }
+ }
+
+ int64_t V = CI->getValue().getSExtValue();
+ if (Roots.find(V) != Roots.end())
+ // No duplicates, please.
+ return false;
+
+ // FIXME: Add support for negative values.
+ if (V < 0) {
+ DEBUG(dbgs() << "LRR: Aborting due to negative value: " << V << "\n");
+ return false;
+ }
+
+ Roots[V] = cast<Instruction>(I);
+ }
+
+ if (Roots.empty())
return false;
- const SCEVAddRecExpr *RealIVSCEV = cast<SCEVAddRecExpr>(SE->getSCEV(RealIV));
- Instruction *User1 = cast<Instruction>(*RealIV->user_begin()),
- *User2 = cast<Instruction>(*std::next(RealIV->user_begin()));
- if (!SE->isSCEVable(User1->getType()) || !SE->isSCEVable(User2->getType()))
+
+ // If we found non-loop-inc, non-root users of Base, assume they are
+ // for the zeroth root index. This is because "add %a, 0" gets optimized
+ // away.
+ if (BaseUsers.size()) {
+ if (Roots.find(0) != Roots.end()) {
+ DEBUG(dbgs() << "LRR: Multiple roots found for base - aborting!\n");
+ return false;
+ }
+ Roots[0] = Base;
+ }
+
+ // Calculate the number of users of the base, or lowest indexed, iteration.
+ unsigned NumBaseUses = BaseUsers.size();
+ if (NumBaseUses == 0)
+ NumBaseUses = Roots.begin()->second->getNumUses();
+
+ // Check that every node has the same number of users.
+ for (auto &KV : Roots) {
+ if (KV.first == 0)
+ continue;
+ if (KV.second->getNumUses() != NumBaseUses) {
+ DEBUG(dbgs() << "LRR: Aborting - Root and Base #users not the same: "
+ << "#Base=" << NumBaseUses << ", #Root=" <<
+ KV.second->getNumUses() << "\n");
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool LoopReroll::DAGRootTracker::
+findRootsRecursive(Instruction *I, SmallInstructionSet SubsumedInsts) {
+ // Does the user look like it could be part of a root set?
+ // All its users must be simple arithmetic ops.
+ if (I->getNumUses() > IL_MaxRerollIterations)
return false;
- const SCEVAddRecExpr *User1SCEV =
- dyn_cast<SCEVAddRecExpr>(SE->getSCEV(User1)),
- *User2SCEV =
- dyn_cast<SCEVAddRecExpr>(SE->getSCEV(User2));
- if (!User1SCEV || !User1SCEV->isAffine() ||
- !User2SCEV || !User2SCEV->isAffine())
+
+ if ((I->getOpcode() == Instruction::Mul ||
+ I->getOpcode() == Instruction::PHI) &&
+ I != IV &&
+ findRootsBase(I, SubsumedInsts))
+ return true;
+
+ SubsumedInsts.insert(I);
+
+ for (User *V : I->users()) {
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (std::find(LoopIncs.begin(), LoopIncs.end(), I) != LoopIncs.end())
+ continue;
+
+ if (!I || !isSimpleArithmeticOp(I) ||
+ !findRootsRecursive(I, SubsumedInsts))
+ return false;
+ }
+ return true;
+}
+
+bool LoopReroll::DAGRootTracker::
+findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts) {
+
+ // The base instruction needs to be a multiply so
+ // that we can erase it.
+ if (IVU->getOpcode() != Instruction::Mul &&
+ IVU->getOpcode() != Instruction::PHI)
+ return false;
+
+ std::map<int64_t, Instruction*> V;
+ if (!collectPossibleRoots(IVU, V))
return false;
- // We assume below that User1 is the scale multiply and User2 is the
- // increment. If this can't be true, then swap them.
- if (User1SCEV == RealIVSCEV->getPostIncExpr(*SE)) {
- std::swap(User1, User2);
- std::swap(User1SCEV, User2SCEV);
+ // If we didn't get a root for index zero, then IVU must be
+ // subsumed.
+ if (V.find(0) == V.end())
+ SubsumedInsts.insert(IVU);
+
+ // Partition the vector into monotonically increasing indexes.
+ DAGRootSet DRS;
+ DRS.BaseInst = nullptr;
+
+ for (auto &KV : V) {
+ if (!DRS.BaseInst) {
+ DRS.BaseInst = KV.second;
+ DRS.SubsumedInsts = SubsumedInsts;
+ } else if (DRS.Roots.empty()) {
+ DRS.Roots.push_back(KV.second);
+ } else if (V.find(KV.first - 1) != V.end()) {
+ DRS.Roots.push_back(KV.second);
+ } else {
+ // Linear sequence terminated.
+ RootSets.push_back(DRS);
+ DRS.BaseInst = KV.second;
+ DRS.SubsumedInsts = SubsumedInsts;
+ DRS.Roots.clear();
+ }
+ }
+ RootSets.push_back(DRS);
+
+ return true;
+}
+
+bool LoopReroll::DAGRootTracker::findRoots() {
+
+ const SCEVAddRecExpr *RealIVSCEV = cast<SCEVAddRecExpr>(SE->getSCEV(IV));
+ Inc = cast<SCEVConstant>(RealIVSCEV->getOperand(1))->
+ getValue()->getZExtValue();
+
+ assert(RootSets.empty() && "Unclean state!");
+ if (Inc == 1) {
+ for (auto *IVU : IV->users()) {
+ if (isLoopIncrement(IVU, IV))
+ LoopIncs.push_back(cast<Instruction>(IVU));
+ }
+ if (!findRootsRecursive(IV, SmallInstructionSet()))
+ return false;
+ LoopIncs.push_back(IV);
+ } else {
+ if (!findRootsBase(IV, SmallInstructionSet()))
+ return false;
}
- if (User2SCEV != RealIVSCEV->getPostIncExpr(*SE))
+ // Ensure all sets have the same size.
+ if (RootSets.empty()) {
+ DEBUG(dbgs() << "LRR: Aborting because no root sets found!\n");
return false;
- assert(User2SCEV->getStepRecurrence(*SE)->isOne() &&
- "Invalid non-unit step for multiplicative scaling");
- LoopIncs.push_back(User2);
-
- if (const SCEVConstant *MulScale =
- dyn_cast<SCEVConstant>(User1SCEV->getStepRecurrence(*SE))) {
- // Make sure that both the start and step have the same multiplier.
- if (RealIVSCEV->getStart()->getType() != MulScale->getType())
+ }
+ for (auto &V : RootSets) {
+ if (V.Roots.empty() || V.Roots.size() != RootSets[0].Roots.size()) {
+ DEBUG(dbgs()
+ << "LRR: Aborting because not all root sets have the same size\n");
return false;
- if (SE->getMulExpr(RealIVSCEV->getStart(), MulScale) !=
- User1SCEV->getStart())
+ }
+ }
+
+ // And ensure all loop iterations are consecutive. We rely on std::map
+ // providing ordered traversal.
+ for (auto &V : RootSets) {
+ const auto *ADR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(V.BaseInst));
+ if (!ADR)
return false;
- ConstantInt *MulScaleCI = MulScale->getValue();
- if (!MulScaleCI->uge(2) || MulScaleCI->uge(MaxInc))
+ // Consider a DAGRootSet with N-1 roots (so N different values including
+ // BaseInst).
+ // Define d = Roots[0] - BaseInst, which should be the same as
+ // Roots[I] - Roots[I-1] for all I in [1..N).
+ // Define D = BaseInst@J - BaseInst@J-1, where "@J" means the value at the
+ // loop iteration J.
+ //
+ // Now, For the loop iterations to be consecutive:
+ // D = d * N
+
+ unsigned N = V.Roots.size() + 1;
+ const SCEV *StepSCEV = SE->getMinusSCEV(SE->getSCEV(V.Roots[0]), ADR);
+ const SCEV *ScaleSCEV = SE->getConstant(StepSCEV->getType(), N);
+ if (ADR->getStepRecurrence(*SE) != SE->getMulExpr(StepSCEV, ScaleSCEV)) {
+ DEBUG(dbgs() << "LRR: Aborting because iterations are not consecutive\n");
return false;
- Scale = MulScaleCI->getZExtValue();
- IV = User1;
- } else
+ }
+ }
+ Scale = RootSets[0].Roots.size() + 1;
+
+ if (Scale > IL_MaxRerollIterations) {
+ DEBUG(dbgs() << "LRR: Aborting - too many iterations found. "
+ << "#Found=" << Scale << ", #Max=" << IL_MaxRerollIterations
+ << "\n");
return false;
+ }
+
+ DEBUG(dbgs() << "LRR: Successfully found roots: Scale=" << Scale << "\n");
- DEBUG(dbgs() << "LRR: Found possible scaling " << *User1 << "\n");
return true;
}
-// Collect all root increments with respect to the provided induction variable
-// (normally the PHI, but sometimes a multiply). A root increment is an
-// instruction, normally an add, with a positive constant less than Scale. In a
-// rerollable loop, each of these increments is the root of an instruction
-// graph isomorphic to the others. Also, we collect the final induction
-// increment (the increment equal to the Scale), and its users in LoopIncs.
-bool LoopReroll::collectAllRoots(Loop *L, uint64_t Inc, uint64_t Scale,
- Instruction *IV,
- SmallVector<SmallInstructionVector, 32> &Roots,
- SmallInstructionSet &AllRoots,
- SmallInstructionVector &LoopIncs) {
- for (User *U : IV->users()) {
- Instruction *UI = cast<Instruction>(U);
- if (!SE->isSCEVable(UI->getType()))
- continue;
- if (UI->getType() != IV->getType())
- continue;
- if (!L->contains(UI))
- continue;
- if (hasUsesOutsideLoop(UI, L))
- continue;
+bool LoopReroll::DAGRootTracker::collectUsedInstructions(SmallInstructionSet &PossibleRedSet) {
+ // Populate the MapVector with all instructions in the block, in order first,
+ // so we can iterate over the contents later in perfect order.
+ for (auto &I : *L->getHeader()) {
+ Uses[&I].resize(IL_End);
+ }
+
+ SmallInstructionSet Exclude;
+ for (auto &DRS : RootSets) {
+ Exclude.insert(DRS.Roots.begin(), DRS.Roots.end());
+ Exclude.insert(DRS.SubsumedInsts.begin(), DRS.SubsumedInsts.end());
+ Exclude.insert(DRS.BaseInst);
+ }
+ Exclude.insert(LoopIncs.begin(), LoopIncs.end());
+
+ for (auto &DRS : RootSets) {
+ DenseSet<Instruction*> VBase;
+ collectInLoopUserSet(DRS.BaseInst, Exclude, PossibleRedSet, VBase);
+ for (auto *I : VBase) {
+ Uses[I].set(0);
+ }
+
+ unsigned Idx = 1;
+ for (auto *Root : DRS.Roots) {
+ DenseSet<Instruction*> V;
+ collectInLoopUserSet(Root, Exclude, PossibleRedSet, V);
- if (const SCEVConstant *Diff = dyn_cast<SCEVConstant>(SE->getMinusSCEV(
- SE->getSCEV(UI), SE->getSCEV(IV)))) {
- uint64_t Idx = Diff->getValue()->getValue().getZExtValue();
- if (Idx > 0 && Idx < Scale) {
- Roots[Idx-1].push_back(UI);
- AllRoots.insert(UI);
- } else if (Idx == Scale && Inc > 1) {
- LoopIncs.push_back(UI);
+ // While we're here, check the use sets are the same size.
+ if (V.size() != VBase.size()) {
+ DEBUG(dbgs() << "LRR: Aborting - use sets are different sizes\n");
+ return false;
+ }
+
+ for (auto *I : V) {
+ Uses[I].set(Idx);
}
+ ++Idx;
}
+
+ // Make sure our subsumed instructions are remembered too.
+ for (auto *I : DRS.SubsumedInsts) {
+ Uses[I].set(IL_All);
+ }
+ }
+
+ // Make sure the loop increments are also accounted for.
+
+ Exclude.clear();
+ for (auto &DRS : RootSets) {
+ Exclude.insert(DRS.Roots.begin(), DRS.Roots.end());
+ Exclude.insert(DRS.SubsumedInsts.begin(), DRS.SubsumedInsts.end());
+ Exclude.insert(DRS.BaseInst);
+ }
+
+ DenseSet<Instruction*> V;
+ collectInLoopUserSet(LoopIncs, Exclude, PossibleRedSet, V);
+ for (auto *I : V) {
+ Uses[I].set(IL_All);
+ }
+
+ return true;
+
+}
+
+/// Get the next instruction in "In" that is a member of set Val.
+/// Start searching from StartI, and do not return anything in Exclude.
+/// If StartI is not given, start from In.begin().
+LoopReroll::DAGRootTracker::UsesTy::iterator
+LoopReroll::DAGRootTracker::nextInstr(int Val, UsesTy &In,
+ const SmallInstructionSet &Exclude,
+ UsesTy::iterator *StartI) {
+ UsesTy::iterator I = StartI ? *StartI : In.begin();
+ while (I != In.end() && (I->second.test(Val) == 0 ||
+ Exclude.count(I->first) != 0))
+ ++I;
+ return I;
+}
+
+bool LoopReroll::DAGRootTracker::isBaseInst(Instruction *I) {
+ for (auto &DRS : RootSets) {
+ if (DRS.BaseInst == I)
+ return true;
}
+ return false;
+}
- if (Roots[0].empty())
+bool LoopReroll::DAGRootTracker::isRootInst(Instruction *I) {
+ for (auto &DRS : RootSets) {
+ if (std::find(DRS.Roots.begin(), DRS.Roots.end(), I) != DRS.Roots.end())
+ return true;
+ }
+ return false;
+}
+
+/// Return true if instruction I depends on any instruction between
+/// Start and End.
+bool LoopReroll::DAGRootTracker::instrDependsOn(Instruction *I,
+ UsesTy::iterator Start,
+ UsesTy::iterator End) {
+ for (auto *U : I->users()) {
+ for (auto It = Start; It != End; ++It)
+ if (U == It->first)
+ return true;
+ }
+ return false;
+}
+
+bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) {
+ // We now need to check for equivalence of the use graph of each root with
+ // that of the primary induction variable (excluding the roots). Our goal
+ // here is not to solve the full graph isomorphism problem, but rather to
+ // catch common cases without a lot of work. As a result, we will assume
+ // that the relative order of the instructions in each unrolled iteration
+ // is the same (although we will not make an assumption about how the
+ // different iterations are intermixed). Note that while the order must be
+ // the same, the instructions may not be in the same basic block.
+
+ // An array of just the possible reductions for this scale factor. When we
+ // collect the set of all users of some root instructions, these reduction
+ // instructions are treated as 'final' (their uses are not considered).
+ // This is important because we don't want the root use set to search down
+ // the reduction chain.
+ SmallInstructionSet PossibleRedSet;
+ SmallInstructionSet PossibleRedLastSet;
+ SmallInstructionSet PossibleRedPHISet;
+ Reductions.restrictToScale(Scale, PossibleRedSet,
+ PossibleRedPHISet, PossibleRedLastSet);
+
+ // Populate "Uses" with where each instruction is used.
+ if (!collectUsedInstructions(PossibleRedSet))
return false;
- bool AllSame = true;
- for (unsigned i = 1; i < Scale-1; ++i)
- if (Roots[i].size() != Roots[0].size()) {
- AllSame = false;
- break;
+
+ // Make sure we mark the reduction PHIs as used in all iterations.
+ for (auto *I : PossibleRedPHISet) {
+ Uses[I].set(IL_All);
+ }
+
+ // Make sure all instructions in the loop are in one and only one
+ // set.
+ for (auto &KV : Uses) {
+ if (KV.second.count() != 1) {
+ DEBUG(dbgs() << "LRR: Aborting - instruction is not used in 1 iteration: "
+ << *KV.first << " (#uses=" << KV.second.count() << ")\n");
+ return false;
}
+ }
- if (!AllSame)
- return false;
+ DEBUG(
+ for (auto &KV : Uses) {
+ dbgs() << "LRR: " << KV.second.find_first() << "\t" << *KV.first << "\n";
+ }
+ );
+
+ for (unsigned Iter = 1; Iter < Scale; ++Iter) {
+ // In addition to regular aliasing information, we need to look for
+ // instructions from later (future) iterations that have side effects
+ // preventing us from reordering them past other instructions with side
+ // effects.
+ bool FutureSideEffects = false;
+ AliasSetTracker AST(*AA);
+ // The map between instructions in f(%iv.(i+1)) and f(%iv).
+ DenseMap<Value *, Value *> BaseMap;
+
+ // Compare iteration Iter to the base.
+ SmallInstructionSet Visited;
+ auto BaseIt = nextInstr(0, Uses, Visited);
+ auto RootIt = nextInstr(Iter, Uses, Visited);
+ auto LastRootIt = Uses.begin();
+
+ while (BaseIt != Uses.end() && RootIt != Uses.end()) {
+ Instruction *BaseInst = BaseIt->first;
+ Instruction *RootInst = RootIt->first;
+
+ // Skip over the IV or root instructions; only match their users.
+ bool Continue = false;
+ if (isBaseInst(BaseInst)) {
+ Visited.insert(BaseInst);
+ BaseIt = nextInstr(0, Uses, Visited);
+ Continue = true;
+ }
+ if (isRootInst(RootInst)) {
+ LastRootIt = RootIt;
+ Visited.insert(RootInst);
+ RootIt = nextInstr(Iter, Uses, Visited);
+ Continue = true;
+ }
+ if (Continue) continue;
+
+ if (!BaseInst->isSameOperationAs(RootInst)) {
+ // Last chance saloon. We don't try and solve the full isomorphism
+ // problem, but try and at least catch the case where two instructions
+ // *of different types* are round the wrong way. We won't be able to
+ // efficiently tell, given two ADD instructions, which way around we
+ // should match them, but given an ADD and a SUB, we can at least infer
+ // which one is which.
+ //
+ // This should allow us to deal with a greater subset of the isomorphism
+ // problem. It does however change a linear algorithm into a quadratic
+ // one, so limit the number of probes we do.
+ auto TryIt = RootIt;
+ unsigned N = NumToleratedFailedMatches;
+ while (TryIt != Uses.end() &&
+ !BaseInst->isSameOperationAs(TryIt->first) &&
+ N--) {
+ ++TryIt;
+ TryIt = nextInstr(Iter, Uses, Visited, &TryIt);
+ }
+
+ if (TryIt == Uses.end() || TryIt == RootIt ||
+ instrDependsOn(TryIt->first, RootIt, TryIt)) {
+ DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst <<
+ " vs. " << *RootInst << "\n");
+ return false;
+ }
+
+ RootIt = TryIt;
+ RootInst = TryIt->first;
+ }
+
+ // All instructions between the last root and this root
+ // may belong to some other iteration. If they belong to a
+ // future iteration, then they're dangerous to alias with.
+ //
+ // Note that because we allow a limited amount of flexibility in the order
+ // that we visit nodes, LastRootIt might be *before* RootIt, in which
+ // case we've already checked this set of instructions so we shouldn't
+ // do anything.
+ for (; LastRootIt < RootIt; ++LastRootIt) {
+ Instruction *I = LastRootIt->first;
+ if (LastRootIt->second.find_first() < (int)Iter)
+ continue;
+ if (I->mayWriteToMemory())
+ AST.add(I);
+ // Note: This is specifically guarded by a check on isa<PHINode>,
+ // which while a valid (somewhat arbitrary) micro-optimization, is
+ // needed because otherwise isSafeToSpeculativelyExecute returns
+ // false on PHI nodes.
+ if (!isa<PHINode>(I) && !isSimpleLoadStore(I) &&
+ !isSafeToSpeculativelyExecute(I))
+ // Intervening instructions cause side effects.
+ FutureSideEffects = true;
+ }
+
+ // Make sure that this instruction, which is in the use set of this
+ // root instruction, does not also belong to the base set or the set of
+ // some other root instruction.
+ if (RootIt->second.count() > 1) {
+ DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst <<
+ " vs. " << *RootInst << " (prev. case overlap)\n");
+ return false;
+ }
+
+ // Make sure that we don't alias with any instruction in the alias set
+ // tracker. If we do, then we depend on a future iteration, and we
+ // can't reroll.
+ if (RootInst->mayReadFromMemory())
+ for (auto &K : AST) {
+ if (K.aliasesUnknownInst(RootInst, *AA)) {
+ DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst <<
+ " vs. " << *RootInst << " (depends on future store)\n");
+ return false;
+ }
+ }
+
+ // If we've past an instruction from a future iteration that may have
+ // side effects, and this instruction might also, then we can't reorder
+ // them, and this matching fails. As an exception, we allow the alias
+ // set tracker to handle regular (simple) load/store dependencies.
+ if (FutureSideEffects && ((!isSimpleLoadStore(BaseInst) &&
+ !isSafeToSpeculativelyExecute(BaseInst)) ||
+ (!isSimpleLoadStore(RootInst) &&
+ !isSafeToSpeculativelyExecute(RootInst)))) {
+ DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst <<
+ " vs. " << *RootInst <<
+ " (side effects prevent reordering)\n");
+ return false;
+ }
+
+ // For instructions that are part of a reduction, if the operation is
+ // associative, then don't bother matching the operands (because we
+ // already know that the instructions are isomorphic, and the order
+ // within the iteration does not matter). For non-associative reductions,
+ // we do need to match the operands, because we need to reject
+ // out-of-order instructions within an iteration!
+ // For example (assume floating-point addition), we need to reject this:
+ // x += a[i]; x += b[i];
+ // x += a[i+1]; x += b[i+1];
+ // x += b[i+2]; x += a[i+2];
+ bool InReduction = Reductions.isPairInSame(BaseInst, RootInst);
+
+ if (!(InReduction && BaseInst->isAssociative())) {
+ bool Swapped = false, SomeOpMatched = false;
+ for (unsigned j = 0; j < BaseInst->getNumOperands(); ++j) {
+ Value *Op2 = RootInst->getOperand(j);
+
+ // If this is part of a reduction (and the operation is not
+ // associatve), then we match all operands, but not those that are
+ // part of the reduction.
+ if (InReduction)
+ if (Instruction *Op2I = dyn_cast<Instruction>(Op2))
+ if (Reductions.isPairInSame(RootInst, Op2I))
+ continue;
+
+ DenseMap<Value *, Value *>::iterator BMI = BaseMap.find(Op2);
+ if (BMI != BaseMap.end()) {
+ Op2 = BMI->second;
+ } else {
+ for (auto &DRS : RootSets) {
+ if (DRS.Roots[Iter-1] == (Instruction*) Op2) {
+ Op2 = DRS.BaseInst;
+ break;
+ }
+ }
+ }
+
+ if (BaseInst->getOperand(Swapped ? unsigned(!j) : j) != Op2) {
+ // If we've not already decided to swap the matched operands, and
+ // we've not already matched our first operand (note that we could
+ // have skipped matching the first operand because it is part of a
+ // reduction above), and the instruction is commutative, then try
+ // the swapped match.
+ if (!Swapped && BaseInst->isCommutative() && !SomeOpMatched &&
+ BaseInst->getOperand(!j) == Op2) {
+ Swapped = true;
+ } else {
+ DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst
+ << " vs. " << *RootInst << " (operand " << j << ")\n");
+ return false;
+ }
+ }
+
+ SomeOpMatched = true;
+ }
+ }
+
+ if ((!PossibleRedLastSet.count(BaseInst) &&
+ hasUsesOutsideLoop(BaseInst, L)) ||
+ (!PossibleRedLastSet.count(RootInst) &&
+ hasUsesOutsideLoop(RootInst, L))) {
+ DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst <<
+ " vs. " << *RootInst << " (uses outside loop)\n");
+ return false;
+ }
+
+ Reductions.recordPair(BaseInst, RootInst, Iter);
+ BaseMap.insert(std::make_pair(RootInst, BaseInst));
+
+ LastRootIt = RootIt;
+ Visited.insert(BaseInst);
+ Visited.insert(RootInst);
+ BaseIt = nextInstr(0, Uses, Visited);
+ RootIt = nextInstr(Iter, Uses, Visited);
+ }
+ assert (BaseIt == Uses.end() && RootIt == Uses.end() &&
+ "Mismatched set sizes!");
+ }
+
+ DEBUG(dbgs() << "LRR: Matched all iteration increments for " <<
+ *IV << "\n");
return true;
}
+void LoopReroll::DAGRootTracker::replace(const SCEV *IterCount) {
+ BasicBlock *Header = L->getHeader();
+ // Remove instructions associated with non-base iterations.
+ for (BasicBlock::reverse_iterator J = Header->rbegin();
+ J != Header->rend();) {
+ unsigned I = Uses[&*J].find_first();
+ if (I > 0 && I < IL_All) {
+ Instruction *D = &*J;
+ DEBUG(dbgs() << "LRR: removing: " << *D << "\n");
+ D->eraseFromParent();
+ continue;
+ }
+
+ ++J;
+ }
+ const DataLayout &DL = Header->getModule()->getDataLayout();
+
+ // We need to create a new induction variable for each different BaseInst.
+ for (auto &DRS : RootSets) {
+ // Insert the new induction variable.
+ const SCEVAddRecExpr *RealIVSCEV =
+ cast<SCEVAddRecExpr>(SE->getSCEV(DRS.BaseInst));
+ const SCEV *Start = RealIVSCEV->getStart();
+ const SCEVAddRecExpr *H = cast<SCEVAddRecExpr>
+ (SE->getAddRecExpr(Start,
+ SE->getConstant(RealIVSCEV->getType(), 1),
+ L, SCEV::FlagAnyWrap));
+ { // Limit the lifetime of SCEVExpander.
+ SCEVExpander Expander(*SE, DL, "reroll");
+ Value *NewIV = Expander.expandCodeFor(H, IV->getType(), Header->begin());
+
+ for (auto &KV : Uses) {
+ if (KV.second.find_first() == 0)
+ KV.first->replaceUsesOfWith(DRS.BaseInst, NewIV);
+ }
+
+ if (BranchInst *BI = dyn_cast<BranchInst>(Header->getTerminator())) {
+ // FIXME: Why do we need this check?
+ if (Uses[BI].find_first() == IL_All) {
+ const SCEV *ICSCEV = RealIVSCEV->evaluateAtIteration(IterCount, *SE);
+
+ // Iteration count SCEV minus 1
+ const SCEV *ICMinus1SCEV =
+ SE->getMinusSCEV(ICSCEV, SE->getConstant(ICSCEV->getType(), 1));
+
+ Value *ICMinus1; // Iteration count minus 1
+ if (isa<SCEVConstant>(ICMinus1SCEV)) {
+ ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(), BI);
+ } else {
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader)
+ Preheader = InsertPreheaderForLoop(L, Parent);
+
+ ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(),
+ Preheader->getTerminator());
+ }
+
+ Value *Cond =
+ new ICmpInst(BI, CmpInst::ICMP_EQ, NewIV, ICMinus1, "exitcond");
+ BI->setCondition(Cond);
+
+ if (BI->getSuccessor(1) != Header)
+ BI->swapSuccessors();
+ }
+ }
+ }
+ }
+
+ SimplifyInstructionsInBlock(Header, TLI);
+ DeleteDeadPHIs(Header, TLI);
+}
+
// Validate the selected reductions. All iterations must have an isomorphic
// part of the reduction chain and, for non-associative reductions, the chain
// entries must appear in order.
@@ -711,8 +1387,9 @@ void LoopReroll::ReductionTracker::replaceSelected() {
// Replace users with the new end-of-chain value.
SmallInstructionVector Users;
- for (User *U : PossibleReds[i].getReducedValue()->users())
+ for (User *U : PossibleReds[i].getReducedValue()->users()) {
Users.push_back(cast<Instruction>(U));
+ }
for (SmallInstructionVector::iterator J = Users.begin(),
JE = Users.end(); J != JE; ++J)
@@ -767,359 +1444,23 @@ void LoopReroll::ReductionTracker::replaceSelected() {
bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
const SCEV *IterCount,
ReductionTracker &Reductions) {
- const SCEVAddRecExpr *RealIVSCEV = cast<SCEVAddRecExpr>(SE->getSCEV(IV));
- uint64_t Inc = cast<SCEVConstant>(RealIVSCEV->getOperand(1))->
- getValue()->getZExtValue();
- // The collection of loop increment instructions.
- SmallInstructionVector LoopIncs;
- uint64_t Scale = Inc;
-
- // The effective induction variable, IV, is normally also the real induction
- // variable. When we're dealing with a loop like:
- // for (int i = 0; i < 500; ++i)
- // x[3*i] = ...;
- // x[3*i+1] = ...;
- // x[3*i+2] = ...;
- // then the real IV is still i, but the effective IV is (3*i).
- Instruction *RealIV = IV;
- if (Inc == 1 && !findScaleFromMul(RealIV, Scale, IV, LoopIncs))
- return false;
-
- assert(Scale <= MaxInc && "Scale is too large");
- assert(Scale > 1 && "Scale must be at least 2");
+ DAGRootTracker DAGRoots(this, L, IV, SE, AA, TLI);
- // The set of increment instructions for each increment value.
- SmallVector<SmallInstructionVector, 32> Roots(Scale-1);
- SmallInstructionSet AllRoots;
- if (!collectAllRoots(L, Inc, Scale, IV, Roots, AllRoots, LoopIncs))
+ if (!DAGRoots.findRoots())
return false;
-
DEBUG(dbgs() << "LRR: Found all root induction increments for: " <<
- *RealIV << "\n");
-
- // An array of just the possible reductions for this scale factor. When we
- // collect the set of all users of some root instructions, these reduction
- // instructions are treated as 'final' (their uses are not considered).
- // This is important because we don't want the root use set to search down
- // the reduction chain.
- SmallInstructionSet PossibleRedSet;
- SmallInstructionSet PossibleRedLastSet, PossibleRedPHISet;
- Reductions.restrictToScale(Scale, PossibleRedSet, PossibleRedPHISet,
- PossibleRedLastSet);
-
- // We now need to check for equivalence of the use graph of each root with
- // that of the primary induction variable (excluding the roots). Our goal
- // here is not to solve the full graph isomorphism problem, but rather to
- // catch common cases without a lot of work. As a result, we will assume
- // that the relative order of the instructions in each unrolled iteration
- // is the same (although we will not make an assumption about how the
- // different iterations are intermixed). Note that while the order must be
- // the same, the instructions may not be in the same basic block.
- SmallInstructionSet Exclude(AllRoots);
- Exclude.insert(LoopIncs.begin(), LoopIncs.end());
-
- DenseSet<Instruction *> BaseUseSet;
- collectInLoopUserSet(L, IV, Exclude, PossibleRedSet, BaseUseSet);
-
- DenseSet<Instruction *> AllRootUses;
- std::vector<DenseSet<Instruction *> > RootUseSets(Scale-1);
-
- bool MatchFailed = false;
- for (unsigned i = 0; i < Scale-1 && !MatchFailed; ++i) {
- DenseSet<Instruction *> &RootUseSet = RootUseSets[i];
- collectInLoopUserSet(L, Roots[i], SmallInstructionSet(),
- PossibleRedSet, RootUseSet);
-
- DEBUG(dbgs() << "LRR: base use set size: " << BaseUseSet.size() <<
- " vs. iteration increment " << (i+1) <<
- " use set size: " << RootUseSet.size() << "\n");
-
- if (BaseUseSet.size() != RootUseSet.size()) {
- MatchFailed = true;
- break;
- }
-
- // In addition to regular aliasing information, we need to look for
- // instructions from later (future) iterations that have side effects
- // preventing us from reordering them past other instructions with side
- // effects.
- bool FutureSideEffects = false;
- AliasSetTracker AST(*AA);
-
- // The map between instructions in f(%iv.(i+1)) and f(%iv).
- DenseMap<Value *, Value *> BaseMap;
-
- assert(L->getNumBlocks() == 1 && "Cannot handle multi-block loops");
- for (BasicBlock::iterator J1 = Header->begin(), J2 = Header->begin(),
- JE = Header->end(); J1 != JE && !MatchFailed; ++J1) {
- if (cast<Instruction>(J1) == RealIV)
- continue;
- if (cast<Instruction>(J1) == IV)
- continue;
- if (!BaseUseSet.count(J1))
- continue;
- if (PossibleRedPHISet.count(J1)) // Skip reduction PHIs.
- continue;
-
- while (J2 != JE && (!RootUseSet.count(J2) ||
- std::find(Roots[i].begin(), Roots[i].end(), J2) !=
- Roots[i].end())) {
- // As we iterate through the instructions, instructions that don't
- // belong to previous iterations (or the base case), must belong to
- // future iterations. We want to track the alias set of writes from
- // previous iterations.
- if (!isa<PHINode>(J2) && !BaseUseSet.count(J2) &&
- !AllRootUses.count(J2)) {
- if (J2->mayWriteToMemory())
- AST.add(J2);
-
- // Note: This is specifically guarded by a check on isa<PHINode>,
- // which while a valid (somewhat arbitrary) micro-optimization, is
- // needed because otherwise isSafeToSpeculativelyExecute returns
- // false on PHI nodes.
- if (!isSimpleLoadStore(J2) && !isSafeToSpeculativelyExecute(J2, DL))
- FutureSideEffects = true;
- }
-
- ++J2;
- }
-
- if (!J1->isSameOperationAs(J2)) {
- DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
- " vs. " << *J2 << "\n");
- MatchFailed = true;
- break;
- }
-
- // Make sure that this instruction, which is in the use set of this
- // root instruction, does not also belong to the base set or the set of
- // some previous root instruction.
- if (BaseUseSet.count(J2) || AllRootUses.count(J2)) {
- DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
- " vs. " << *J2 << " (prev. case overlap)\n");
- MatchFailed = true;
- break;
- }
-
- // Make sure that we don't alias with any instruction in the alias set
- // tracker. If we do, then we depend on a future iteration, and we
- // can't reroll.
- if (J2->mayReadFromMemory()) {
- for (AliasSetTracker::iterator K = AST.begin(), KE = AST.end();
- K != KE && !MatchFailed; ++K) {
- if (K->aliasesUnknownInst(J2, *AA)) {
- DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
- " vs. " << *J2 << " (depends on future store)\n");
- MatchFailed = true;
- break;
- }
- }
- }
-
- // If we've past an instruction from a future iteration that may have
- // side effects, and this instruction might also, then we can't reorder
- // them, and this matching fails. As an exception, we allow the alias
- // set tracker to handle regular (simple) load/store dependencies.
- if (FutureSideEffects &&
- ((!isSimpleLoadStore(J1) &&
- !isSafeToSpeculativelyExecute(J1, DL)) ||
- (!isSimpleLoadStore(J2) &&
- !isSafeToSpeculativelyExecute(J2, DL)))) {
- DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
- " vs. " << *J2 <<
- " (side effects prevent reordering)\n");
- MatchFailed = true;
- break;
- }
-
- // For instructions that are part of a reduction, if the operation is
- // associative, then don't bother matching the operands (because we
- // already know that the instructions are isomorphic, and the order
- // within the iteration does not matter). For non-associative reductions,
- // we do need to match the operands, because we need to reject
- // out-of-order instructions within an iteration!
- // For example (assume floating-point addition), we need to reject this:
- // x += a[i]; x += b[i];
- // x += a[i+1]; x += b[i+1];
- // x += b[i+2]; x += a[i+2];
- bool InReduction = Reductions.isPairInSame(J1, J2);
-
- if (!(InReduction && J1->isAssociative())) {
- bool Swapped = false, SomeOpMatched = false;
- for (unsigned j = 0; j < J1->getNumOperands() && !MatchFailed; ++j) {
- Value *Op2 = J2->getOperand(j);
-
- // If this is part of a reduction (and the operation is not
- // associatve), then we match all operands, but not those that are
- // part of the reduction.
- if (InReduction)
- if (Instruction *Op2I = dyn_cast<Instruction>(Op2))
- if (Reductions.isPairInSame(J2, Op2I))
- continue;
-
- DenseMap<Value *, Value *>::iterator BMI = BaseMap.find(Op2);
- if (BMI != BaseMap.end())
- Op2 = BMI->second;
- else if (std::find(Roots[i].begin(), Roots[i].end(),
- (Instruction*) Op2) != Roots[i].end())
- Op2 = IV;
-
- if (J1->getOperand(Swapped ? unsigned(!j) : j) != Op2) {
- // If we've not already decided to swap the matched operands, and
- // we've not already matched our first operand (note that we could
- // have skipped matching the first operand because it is part of a
- // reduction above), and the instruction is commutative, then try
- // the swapped match.
- if (!Swapped && J1->isCommutative() && !SomeOpMatched &&
- J1->getOperand(!j) == Op2) {
- Swapped = true;
- } else {
- DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
- " vs. " << *J2 << " (operand " << j << ")\n");
- MatchFailed = true;
- break;
- }
- }
-
- SomeOpMatched = true;
- }
- }
-
- if ((!PossibleRedLastSet.count(J1) && hasUsesOutsideLoop(J1, L)) ||
- (!PossibleRedLastSet.count(J2) && hasUsesOutsideLoop(J2, L))) {
- DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
- " vs. " << *J2 << " (uses outside loop)\n");
- MatchFailed = true;
- break;
- }
-
- if (!MatchFailed)
- BaseMap.insert(std::pair<Value *, Value *>(J2, J1));
-
- AllRootUses.insert(J2);
- Reductions.recordPair(J1, J2, i+1);
-
- ++J2;
- }
- }
-
- if (MatchFailed)
- return false;
-
- DEBUG(dbgs() << "LRR: Matched all iteration increments for " <<
- *RealIV << "\n");
-
- DenseSet<Instruction *> LoopIncUseSet;
- collectInLoopUserSet(L, LoopIncs, SmallInstructionSet(),
- SmallInstructionSet(), LoopIncUseSet);
- DEBUG(dbgs() << "LRR: Loop increment set size: " <<
- LoopIncUseSet.size() << "\n");
-
- // Make sure that all instructions in the loop have been included in some
- // use set.
- for (BasicBlock::iterator J = Header->begin(), JE = Header->end();
- J != JE; ++J) {
- if (isa<DbgInfoIntrinsic>(J))
- continue;
- if (cast<Instruction>(J) == RealIV)
- continue;
- if (cast<Instruction>(J) == IV)
- continue;
- if (BaseUseSet.count(J) || AllRootUses.count(J) ||
- (LoopIncUseSet.count(J) && (J->isTerminator() ||
- isSafeToSpeculativelyExecute(J, DL))))
- continue;
-
- if (AllRoots.count(J))
- continue;
-
- if (Reductions.isSelectedPHI(J))
- continue;
-
- DEBUG(dbgs() << "LRR: aborting reroll based on " << *RealIV <<
- " unprocessed instruction found: " << *J << "\n");
- MatchFailed = true;
- break;
- }
-
- if (MatchFailed)
+ *IV << "\n");
+
+ if (!DAGRoots.validate(Reductions))
return false;
-
- DEBUG(dbgs() << "LRR: all instructions processed from " <<
- *RealIV << "\n");
-
if (!Reductions.validateSelected())
return false;
-
// At this point, we've validated the rerolling, and we're committed to
// making changes!
Reductions.replaceSelected();
+ DAGRoots.replace(IterCount);
- // Remove instructions associated with non-base iterations.
- for (BasicBlock::reverse_iterator J = Header->rbegin();
- J != Header->rend();) {
- if (AllRootUses.count(&*J)) {
- Instruction *D = &*J;
- DEBUG(dbgs() << "LRR: removing: " << *D << "\n");
- D->eraseFromParent();
- continue;
- }
-
- ++J;
- }
-
- // Insert the new induction variable.
- const SCEV *Start = RealIVSCEV->getStart();
- if (Inc == 1)
- Start = SE->getMulExpr(Start,
- SE->getConstant(Start->getType(), Scale));
- const SCEVAddRecExpr *H =
- cast<SCEVAddRecExpr>(SE->getAddRecExpr(Start,
- SE->getConstant(RealIVSCEV->getType(), 1),
- L, SCEV::FlagAnyWrap));
- { // Limit the lifetime of SCEVExpander.
- SCEVExpander Expander(*SE, "reroll");
- Value *NewIV = Expander.expandCodeFor(H, IV->getType(), Header->begin());
-
- for (DenseSet<Instruction *>::iterator J = BaseUseSet.begin(),
- JE = BaseUseSet.end(); J != JE; ++J)
- (*J)->replaceUsesOfWith(IV, NewIV);
-
- if (BranchInst *BI = dyn_cast<BranchInst>(Header->getTerminator())) {
- if (LoopIncUseSet.count(BI)) {
- const SCEV *ICSCEV = RealIVSCEV->evaluateAtIteration(IterCount, *SE);
- if (Inc == 1)
- ICSCEV =
- SE->getMulExpr(ICSCEV, SE->getConstant(ICSCEV->getType(), Scale));
- // Iteration count SCEV minus 1
- const SCEV *ICMinus1SCEV =
- SE->getMinusSCEV(ICSCEV, SE->getConstant(ICSCEV->getType(), 1));
-
- Value *ICMinus1; // Iteration count minus 1
- if (isa<SCEVConstant>(ICMinus1SCEV)) {
- ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(), BI);
- } else {
- BasicBlock *Preheader = L->getLoopPreheader();
- if (!Preheader)
- Preheader = InsertPreheaderForLoop(L, this);
-
- ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(),
- Preheader->getTerminator());
- }
-
- Value *Cond =
- new ICmpInst(BI, CmpInst::ICMP_EQ, NewIV, ICMinus1, "exitcond");
- BI->setCondition(Cond);
-
- if (BI->getSuccessor(1) != Header)
- BI->swapSuccessors();
- }
- }
- }
-
- SimplifyInstructionsInBlock(Header, DL, TLI);
- DeleteDeadPHIs(Header, TLI);
++NumRerolledLoops;
return true;
}
@@ -1129,11 +1470,9 @@ bool LoopReroll::runOnLoop(Loop *L, LPPassManager &LPM) {
return false;
AA = &getAnalysis<AliasAnalysis>();
- LI = &getAnalysis<LoopInfo>();
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
SE = &getAnalysis<ScalarEvolution>();
- TLI = &getAnalysis<TargetLibraryInfo>();
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
+ TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
BasicBlock *Header = L->getHeader();
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
index 267cb99..a675e12 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
@@ -24,8 +24,10 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
@@ -56,14 +58,14 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addRequired<LoopInfo>();
- AU.addPreserved<LoopInfo>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
AU.addRequiredID(LoopSimplifyID);
AU.addPreservedID(LoopSimplifyID);
AU.addRequiredID(LCSSAID);
AU.addPreservedID(LCSSAID);
AU.addPreserved<ScalarEvolution>();
- AU.addRequired<TargetTransformInfo>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
}
bool runOnLoop(Loop *L, LPPassManager &LPM) override;
@@ -75,14 +77,15 @@ namespace {
LoopInfo *LI;
const TargetTransformInfo *TTI;
AssumptionCache *AC;
+ DominatorTree *DT;
};
}
char LoopRotate::ID = 0;
INITIALIZE_PASS_BEGIN(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
-INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
INITIALIZE_PASS_END(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
@@ -100,10 +103,13 @@ bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) {
// Save the loop metadata.
MDNode *LoopMD = L->getLoopID();
- LI = &getAnalysis<LoopInfo>();
- TTI = &getAnalysis<TargetTransformInfo>();
- AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
- *L->getHeader()->getParent());
+ Function &F = *L->getHeader()->getParent();
+
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ DT = DTWP ? &DTWP->getDomTree() : nullptr;
// Simplify the loop latch before attempting to rotate the header
// upward. Rotation may not be needed if the loop tail can be folded into the
@@ -226,20 +232,17 @@ static bool shouldSpeculateInstrs(BasicBlock::iterator Begin,
case Instruction::Shl:
case Instruction::LShr:
case Instruction::AShr: {
- Value *IVOpnd = nullptr;
- if (isa<ConstantInt>(I->getOperand(0)))
- IVOpnd = I->getOperand(1);
-
- if (isa<ConstantInt>(I->getOperand(1))) {
- if (IVOpnd)
- return false;
-
- IVOpnd = I->getOperand(0);
- }
+ Value *IVOpnd = !isa<Constant>(I->getOperand(0))
+ ? I->getOperand(0)
+ : !isa<Constant>(I->getOperand(1))
+ ? I->getOperand(1)
+ : nullptr;
+ if (!IVOpnd)
+ return false;
// If increment operand is used outside of the loop, this speculation
// could cause extra live range interference.
- if (MultiExitLoop && IVOpnd) {
+ if (MultiExitLoop) {
for (User *UseI : IVOpnd->users()) {
auto *UserInst = cast<Instruction>(UseI);
if (!L->contains(UserInst))
@@ -308,9 +311,8 @@ bool LoopRotate::simplifyLoopLatch(Loop *L) {
// Nuke the Latch block.
assert(Latch->empty() && "unable to evacuate Latch");
LI->removeBlock(Latch);
- if (DominatorTreeWrapperPass *DTWP =
- getAnalysisIfAvailable<DominatorTreeWrapperPass>())
- DTWP->getDomTree().eraseNode(Latch);
+ if (DT)
+ DT->eraseNode(Latch);
Latch->eraseFromParent();
return true;
}
@@ -412,6 +414,8 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
for (; PHINode *PN = dyn_cast<PHINode>(I); ++I)
ValueMap[PN] = PN->getIncomingValueForBlock(OrigPreheader);
+ const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
+
// For the rest of the instructions, either hoist to the OrigPreheader if
// possible or create a clone in the OldPreHeader if not.
TerminatorInst *LoopEntryBranch = OrigPreheader->getTerminator();
@@ -442,8 +446,8 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// With the operands remapped, see if the instruction constant folds or is
// otherwise simplifyable. This commonly occurs because the entry from PHI
// nodes allows icmps and other instructions to fold.
- // FIXME: Provide DL, TLI, DT, AC to SimplifyInstruction.
- Value *V = SimplifyInstruction(C);
+ // FIXME: Provide TLI, DT, AC to SimplifyInstruction.
+ Value *V = SimplifyInstruction(C, DL);
if (V && LI->replacementPreservesLCSSAForm(C, V)) {
// If so, then delete the temporary instruction and stick the folded value
// in the map.
@@ -495,31 +499,31 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// The conditional branch can't be folded, handle the general case.
// Update DominatorTree to reflect the CFG change we just made. Then split
// edges as necessary to preserve LoopSimplify form.
- if (DominatorTreeWrapperPass *DTWP =
- getAnalysisIfAvailable<DominatorTreeWrapperPass>()) {
- DominatorTree &DT = DTWP->getDomTree();
+ if (DT) {
// Everything that was dominated by the old loop header is now dominated
// by the original loop preheader. Conceptually the header was merged
// into the preheader, even though we reuse the actual block as a new
// loop latch.
- DomTreeNode *OrigHeaderNode = DT.getNode(OrigHeader);
+ DomTreeNode *OrigHeaderNode = DT->getNode(OrigHeader);
SmallVector<DomTreeNode *, 8> HeaderChildren(OrigHeaderNode->begin(),
OrigHeaderNode->end());
- DomTreeNode *OrigPreheaderNode = DT.getNode(OrigPreheader);
+ DomTreeNode *OrigPreheaderNode = DT->getNode(OrigPreheader);
for (unsigned I = 0, E = HeaderChildren.size(); I != E; ++I)
- DT.changeImmediateDominator(HeaderChildren[I], OrigPreheaderNode);
+ DT->changeImmediateDominator(HeaderChildren[I], OrigPreheaderNode);
- assert(DT.getNode(Exit)->getIDom() == OrigPreheaderNode);
- assert(DT.getNode(NewHeader)->getIDom() == OrigPreheaderNode);
+ assert(DT->getNode(Exit)->getIDom() == OrigPreheaderNode);
+ assert(DT->getNode(NewHeader)->getIDom() == OrigPreheaderNode);
// Update OrigHeader to be dominated by the new header block.
- DT.changeImmediateDominator(OrigHeader, OrigLatch);
+ DT->changeImmediateDominator(OrigHeader, OrigLatch);
}
// Right now OrigPreHeader has two successors, NewHeader and ExitBlock, and
// thus is not a preheader anymore.
// Split the edge to form a real preheader.
- BasicBlock *NewPH = SplitCriticalEdge(OrigPreheader, NewHeader, this);
+ BasicBlock *NewPH = SplitCriticalEdge(
+ OrigPreheader, NewHeader,
+ CriticalEdgeSplittingOptions(DT, LI).setPreserveLCSSA());
NewPH->setName(NewHeader->getName() + ".lr.ph");
// Preserve canonical loop form, which means that 'Exit' should have only
@@ -538,7 +542,8 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
if (isa<IndirectBrInst>((*PI)->getTerminator()))
continue;
SplitLatchEdge |= L->getLoopLatch() == *PI;
- BasicBlock *ExitSplit = SplitCriticalEdge(*PI, Exit, this);
+ BasicBlock *ExitSplit = SplitCriticalEdge(
+ *PI, Exit, CriticalEdgeSplittingOptions(DT, LI).setPreserveLCSSA());
ExitSplit->moveBefore(Exit);
}
assert(SplitLatchEdge &&
@@ -552,17 +557,15 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
PHBI->eraseFromParent();
// With our CFG finalized, update DomTree if it is available.
- if (DominatorTreeWrapperPass *DTWP =
- getAnalysisIfAvailable<DominatorTreeWrapperPass>()) {
- DominatorTree &DT = DTWP->getDomTree();
+ if (DT) {
// Update OrigHeader to be dominated by the new header block.
- DT.changeImmediateDominator(NewHeader, OrigPreheader);
- DT.changeImmediateDominator(OrigHeader, OrigLatch);
+ DT->changeImmediateDominator(NewHeader, OrigPreheader);
+ DT->changeImmediateDominator(OrigHeader, OrigLatch);
// Brute force incremental dominator tree update. Call
// findNearestCommonDominator on all CFG predecessors of each child of the
// original header.
- DomTreeNode *OrigHeaderNode = DT.getNode(OrigHeader);
+ DomTreeNode *OrigHeaderNode = DT->getNode(OrigHeader);
SmallVector<DomTreeNode *, 8> HeaderChildren(OrigHeaderNode->begin(),
OrigHeaderNode->end());
bool Changed;
@@ -575,11 +578,11 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
pred_iterator PI = pred_begin(BB);
BasicBlock *NearestDom = *PI;
for (pred_iterator PE = pred_end(BB); PI != PE; ++PI)
- NearestDom = DT.findNearestCommonDominator(NearestDom, *PI);
+ NearestDom = DT->findNearestCommonDominator(NearestDom, *PI);
// Remember if this changes the DomTree.
if (Node->getIDom()->getBlock() != NearestDom) {
- DT.changeImmediateDominator(BB, NearestDom);
+ DT->changeImmediateDominator(BB, NearestDom);
Changed = true;
}
}
@@ -597,7 +600,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// the OrigHeader block into OrigLatch. This will succeed if they are
// connected by an unconditional branch. This is just a cleanup so the
// emitted code isn't too gross in this common case.
- MergeBlockIntoPredecessor(OrigHeader, this);
+ MergeBlockIntoPredecessor(OrigHeader, DT, LI);
DEBUG(dbgs() << "LoopRotation: into "; L->dump());
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 7b60373..584c7ae 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -28,7 +28,7 @@
//
// The SCEV for %i is {0,+,1}<%L>. The SCEV for %i.next is {1,+,1}<%L>, however
// it's useful to think about these as the same register, with some uses using
-// the value of the register before the add and some using // it after. In this
+// the value of the register before the add and some using it after. In this
// example, the icmp is a post-increment user, since it uses %i.next, which is
// the value of the induction variable after the increment. The other common
// case of post-increment users is users outside the loop.
@@ -68,6 +68,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -111,8 +112,6 @@ public:
/// a particular register.
SmallBitVector UsedByIndices;
- RegSortData() {}
-
void print(raw_ostream &OS) const;
void dump() const;
};
@@ -186,9 +185,8 @@ RegUseTracker::SwapAndDropUse(size_t LUIdx, size_t LastLUIdx) {
// Update RegUses. The data structure is not optimized for this purpose;
// we must iterate through it and update each of the bit vectors.
- for (RegUsesTy::iterator I = RegUsesMap.begin(), E = RegUsesMap.end();
- I != E; ++I) {
- SmallBitVector &UsedByIndices = I->second.UsedByIndices;
+ for (auto &Pair : RegUsesMap) {
+ SmallBitVector &UsedByIndices = Pair.second.UsedByIndices;
if (LUIdx < UsedByIndices.size())
UsedByIndices[LUIdx] =
LastLUIdx < UsedByIndices.size() ? UsedByIndices[LastLUIdx] : 0;
@@ -298,9 +296,8 @@ static void DoInitialMatch(const SCEV *S, Loop *L,
// Look at add operands.
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
- for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
- I != E; ++I)
- DoInitialMatch(*I, L, Good, Bad, SE);
+ for (const SCEV *S : Add->operands())
+ DoInitialMatch(S, L, Good, Bad, SE);
return;
}
@@ -327,12 +324,10 @@ static void DoInitialMatch(const SCEV *S, Loop *L,
DoInitialMatch(NewMul, L, MyGood, MyBad, SE);
const SCEV *NegOne = SE.getSCEV(ConstantInt::getAllOnesValue(
SE.getEffectiveSCEVType(NewMul->getType())));
- for (SmallVectorImpl<const SCEV *>::const_iterator I = MyGood.begin(),
- E = MyGood.end(); I != E; ++I)
- Good.push_back(SE.getMulExpr(NegOne, *I));
- for (SmallVectorImpl<const SCEV *>::const_iterator I = MyBad.begin(),
- E = MyBad.end(); I != E; ++I)
- Bad.push_back(SE.getMulExpr(NegOne, *I));
+ for (const SCEV *S : MyGood)
+ Good.push_back(SE.getMulExpr(NegOne, S));
+ for (const SCEV *S : MyBad)
+ Bad.push_back(SE.getMulExpr(NegOne, S));
return;
}
@@ -444,9 +439,8 @@ bool Formula::hasRegsUsedByUsesOtherThan(size_t LUIdx,
if (ScaledReg)
if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))
return true;
- for (SmallVectorImpl<const SCEV *>::const_iterator I = BaseRegs.begin(),
- E = BaseRegs.end(); I != E; ++I)
- if (RegUses.isRegUsedByUsesOtherThan(*I, LUIdx))
+ for (const SCEV *BaseReg : BaseRegs)
+ if (RegUses.isRegUsedByUsesOtherThan(BaseReg, LUIdx))
return true;
return false;
}
@@ -461,10 +455,9 @@ void Formula::print(raw_ostream &OS) const {
if (!First) OS << " + "; else First = false;
OS << BaseOffset;
}
- for (SmallVectorImpl<const SCEV *>::const_iterator I = BaseRegs.begin(),
- E = BaseRegs.end(); I != E; ++I) {
+ for (const SCEV *BaseReg : BaseRegs) {
if (!First) OS << " + "; else First = false;
- OS << "reg(" << **I << ')';
+ OS << "reg(" << *BaseReg << ')';
}
if (HasBaseReg && BaseRegs.empty()) {
if (!First) OS << " + "; else First = false;
@@ -577,10 +570,8 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(LHS)) {
if (IgnoreSignificantBits || isAddSExtable(Add, SE)) {
SmallVector<const SCEV *, 8> Ops;
- for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
- I != E; ++I) {
- const SCEV *Op = getExactSDiv(*I, RHS, SE,
- IgnoreSignificantBits);
+ for (const SCEV *S : Add->operands()) {
+ const SCEV *Op = getExactSDiv(S, RHS, SE, IgnoreSignificantBits);
if (!Op) return nullptr;
Ops.push_back(Op);
}
@@ -594,9 +585,7 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
if (IgnoreSignificantBits || isMulSExtable(Mul, SE)) {
SmallVector<const SCEV *, 4> Ops;
bool Found = false;
- for (SCEVMulExpr::op_iterator I = Mul->op_begin(), E = Mul->op_end();
- I != E; ++I) {
- const SCEV *S = *I;
+ for (const SCEV *S : Mul->operands()) {
if (!Found)
if (const SCEV *Q = getExactSDiv(S, RHS, SE,
IgnoreSignificantBits)) {
@@ -766,9 +755,8 @@ static bool isHighCostExpansion(const SCEV *S,
return false;
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
- for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
- I != E; ++I) {
- if (isHighCostExpansion(*I, Processed, SE))
+ for (const SCEV *S : Add->operands()) {
+ if (isHighCostExpansion(S, Processed, SE))
return true;
}
return false;
@@ -819,9 +807,9 @@ DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) {
if (!I || !isInstructionTriviallyDead(I))
continue;
- for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
- if (Instruction *U = dyn_cast<Instruction>(*OI)) {
- *OI = nullptr;
+ for (Use &O : I->operands())
+ if (Instruction *U = dyn_cast<Instruction>(O)) {
+ O = nullptr;
if (U->use_empty())
DeadInsts.push_back(U);
}
@@ -1002,9 +990,7 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
if (isLoser())
return;
}
- for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
- E = F.BaseRegs.end(); I != E; ++I) {
- const SCEV *BaseReg = *I;
+ for (const SCEV *BaseReg : F.BaseRegs) {
if (VisitedRegs.count(BaseReg)) {
Lose();
return;
@@ -1027,9 +1013,8 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
ScaleCost += getScalingFactorCost(TTI, LU, F);
// Tally up the non-zero immediates.
- for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(),
- E = Offsets.end(); I != E; ++I) {
- int64_t Offset = (uint64_t)*I + F.BaseOffset;
+ for (int64_t O : Offsets) {
+ int64_t Offset = (uint64_t)O + F.BaseOffset;
if (F.BaseGV)
ImmCost += 64; // Handle symbolic values conservatively.
// TODO: This should probably be the pointer size.
@@ -1152,10 +1137,9 @@ void LSRFixup::print(raw_ostream &OS) const {
OS << ", OperandValToReplace=";
OperandValToReplace->printAsOperand(OS, /*PrintType=*/false);
- for (PostIncLoopSet::const_iterator I = PostIncLoops.begin(),
- E = PostIncLoops.end(); I != E; ++I) {
+ for (const Loop *PIL : PostIncLoops) {
OS << ", PostIncLoop=";
- (*I)->getHeader()->printAsOperand(OS, /*PrintType=*/false);
+ PIL->getHeader()->printAsOperand(OS, /*PrintType=*/false);
}
if (LUIdx != ~size_t(0))
@@ -1301,9 +1285,8 @@ bool LSRUse::InsertFormula(const Formula &F) {
assert((!F.ScaledReg || !F.ScaledReg->isZero()) &&
"Zero allocated in a scaled register!");
#ifndef NDEBUG
- for (SmallVectorImpl<const SCEV *>::const_iterator I =
- F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I)
- assert(!(*I)->isZero() && "Zero allocated in a base register!");
+ for (const SCEV *BaseReg : F.BaseRegs)
+ assert(!BaseReg->isZero() && "Zero allocated in a base register!");
#endif
// Add the formula to the list.
@@ -1327,11 +1310,9 @@ void LSRUse::DeleteFormula(Formula &F) {
/// RecomputeRegs - Recompute the Regs field, and update RegUses.
void LSRUse::RecomputeRegs(size_t LUIdx, RegUseTracker &RegUses) {
// Now that we've filtered out some formulae, recompute the Regs set.
- SmallPtrSet<const SCEV *, 4> OldRegs = Regs;
+ SmallPtrSet<const SCEV *, 4> OldRegs = std::move(Regs);
Regs.clear();
- for (SmallVectorImpl<Formula>::const_iterator I = Formulae.begin(),
- E = Formulae.end(); I != E; ++I) {
- const Formula &F = *I;
+ for (const Formula &F : Formulae) {
if (F.ScaledReg) Regs.insert(F.ScaledReg);
Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
}
@@ -1357,11 +1338,11 @@ void LSRUse::print(raw_ostream &OS) const {
}
OS << ", Offsets={";
- for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(),
- E = Offsets.end(); I != E; ++I) {
- OS << *I;
- if (std::next(I) != E)
- OS << ',';
+ bool NeedComma = false;
+ for (int64_t O : Offsets) {
+ if (NeedComma) OS << ',';
+ OS << O;
+ NeedComma = true;
}
OS << '}';
@@ -1386,9 +1367,6 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
case LSRUse::Address:
return TTI.isLegalAddressingMode(AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale);
- // Otherwise, just guess that reg+reg addressing is legal.
- //return ;
-
case LSRUse::ICmpZero:
// There's not even a target hook for querying whether it would be legal to
// fold a GV into an ICmp.
@@ -1928,12 +1906,12 @@ void LSRInstance::OptimizeShadowIV() {
/// set the IV user and stride information and return true, otherwise return
/// false.
bool LSRInstance::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse) {
- for (IVUsers::iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI)
- if (UI->getUser() == Cond) {
+ for (IVStrideUse &U : IU)
+ if (U.getUser() == Cond) {
// NOTE: we could handle setcc instructions with multiple uses here, but
// InstCombine does it as well for simple uses, it's not clear that it
// occurs enough in real life to handle.
- CondUse = UI;
+ CondUse = &U;
return true;
}
return false;
@@ -2108,8 +2086,7 @@ LSRInstance::OptimizeLoopTermCond() {
SmallVector<BasicBlock*, 8> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
- for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
- BasicBlock *ExitingBlock = ExitingBlocks[i];
+ for (BasicBlock *ExitingBlock : ExitingBlocks) {
// Get the terminating condition for the loop if possible. If we
// can, we want to change it to use a post-incremented version of its
@@ -2352,9 +2329,7 @@ LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,
LU.WidestFixupType == OrigLU.WidestFixupType &&
LU.HasFormulaWithSameRegs(OrigF)) {
// Scan through this use's formulae.
- for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
- E = LU.Formulae.end(); I != E; ++I) {
- const Formula &F = *I;
+ for (const Formula &F : LU.Formulae) {
// Check to see if this formula has the same registers and symbols
// as OrigF.
if (F.BaseRegs == OrigF.BaseRegs &&
@@ -2382,8 +2357,8 @@ void LSRInstance::CollectInterestingTypesAndFactors() {
// Collect interesting types and strides.
SmallVector<const SCEV *, 4> Worklist;
- for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
- const SCEV *Expr = IU.getExpr(*UI);
+ for (const IVStrideUse &U : IU) {
+ const SCEV *Expr = IU.getExpr(U);
// Collect interesting types.
Types.insert(SE.getEffectiveSCEVType(Expr->getType()));
@@ -2586,25 +2561,23 @@ isProfitableChain(IVChain &Chain, SmallPtrSetImpl<Instruction*> &Users,
unsigned NumConstIncrements = 0;
unsigned NumVarIncrements = 0;
unsigned NumReusedIncrements = 0;
- for (IVChain::const_iterator I = Chain.begin(), E = Chain.end();
- I != E; ++I) {
-
- if (I->IncExpr->isZero())
+ for (const IVInc &Inc : Chain) {
+ if (Inc.IncExpr->isZero())
continue;
// Incrementing by zero or some constant is neutral. We assume constants can
// be folded into an addressing mode or an add's immediate operand.
- if (isa<SCEVConstant>(I->IncExpr)) {
+ if (isa<SCEVConstant>(Inc.IncExpr)) {
++NumConstIncrements;
continue;
}
- if (I->IncExpr == LastIncExpr)
+ if (Inc.IncExpr == LastIncExpr)
++NumReusedIncrements;
else
++NumVarIncrements;
- LastIncExpr = I->IncExpr;
+ LastIncExpr = Inc.IncExpr;
}
// An IV chain with a single increment is handled by LSR's postinc
// uses. However, a chain with multiple increments requires keeping the IV's
@@ -2839,12 +2812,11 @@ void LSRInstance::FinalizeChain(IVChain &Chain) {
assert(!Chain.Incs.empty() && "empty IV chains are not allowed");
DEBUG(dbgs() << "Final Chain: " << *Chain.Incs[0].UserInst << "\n");
- for (IVChain::const_iterator I = Chain.begin(), E = Chain.end();
- I != E; ++I) {
- DEBUG(dbgs() << " Inc: " << *I->UserInst << "\n");
- User::op_iterator UseI =
- std::find(I->UserInst->op_begin(), I->UserInst->op_end(), I->IVOperand);
- assert(UseI != I->UserInst->op_end() && "cannot find IV operand");
+ for (const IVInc &Inc : Chain) {
+ DEBUG(dbgs() << " Inc: " << Inc.UserInst << "\n");
+ auto UseI = std::find(Inc.UserInst->op_begin(), Inc.UserInst->op_end(),
+ Inc.IVOperand);
+ assert(UseI != Inc.UserInst->op_end() && "cannot find IV operand");
IVIncSet.insert(UseI);
}
}
@@ -2907,20 +2879,18 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
Type *IVTy = IVSrc->getType();
Type *IntTy = SE.getEffectiveSCEVType(IVTy);
const SCEV *LeftOverExpr = nullptr;
- for (IVChain::const_iterator IncI = Chain.begin(),
- IncE = Chain.end(); IncI != IncE; ++IncI) {
-
- Instruction *InsertPt = IncI->UserInst;
+ for (const IVInc &Inc : Chain) {
+ Instruction *InsertPt = Inc.UserInst;
if (isa<PHINode>(InsertPt))
InsertPt = L->getLoopLatch()->getTerminator();
// IVOper will replace the current IV User's operand. IVSrc is the IV
// value currently held in a register.
Value *IVOper = IVSrc;
- if (!IncI->IncExpr->isZero()) {
+ if (!Inc.IncExpr->isZero()) {
// IncExpr was the result of subtraction of two narrow values, so must
// be signed.
- const SCEV *IncExpr = SE.getNoopOrSignExtend(IncI->IncExpr, IntTy);
+ const SCEV *IncExpr = SE.getNoopOrSignExtend(Inc.IncExpr, IntTy);
LeftOverExpr = LeftOverExpr ?
SE.getAddExpr(LeftOverExpr, IncExpr) : IncExpr;
}
@@ -2933,22 +2903,21 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
IVOper = Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
// If an IV increment can't be folded, use it as the next IV value.
- if (!canFoldIVIncExpr(LeftOverExpr, IncI->UserInst, IncI->IVOperand,
- TTI)) {
+ if (!canFoldIVIncExpr(LeftOverExpr, Inc.UserInst, Inc.IVOperand, TTI)) {
assert(IVTy == IVOper->getType() && "inconsistent IV increment type");
IVSrc = IVOper;
LeftOverExpr = nullptr;
}
}
- Type *OperTy = IncI->IVOperand->getType();
+ Type *OperTy = Inc.IVOperand->getType();
if (IVTy != OperTy) {
assert(SE.getTypeSizeInBits(IVTy) >= SE.getTypeSizeInBits(OperTy) &&
"cannot extend a chained IV");
IRBuilder<> Builder(InsertPt);
IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy, "lsr.chain");
}
- IncI->UserInst->replaceUsesOfWith(IncI->IVOperand, IVOper);
- DeadInsts.push_back(IncI->IVOperand);
+ Inc.UserInst->replaceUsesOfWith(Inc.IVOperand, IVOper);
+ DeadInsts.push_back(Inc.IVOperand);
}
// If LSR created a new, wider phi, we may also replace its postinc. We only
// do this if we also found a wide value for the head of the chain.
@@ -2976,11 +2945,11 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
}
void LSRInstance::CollectFixupsAndInitialFormulae() {
- for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
- Instruction *UserInst = UI->getUser();
+ for (const IVStrideUse &U : IU) {
+ Instruction *UserInst = U.getUser();
// Skip IV users that are part of profitable IV Chains.
User::op_iterator UseI = std::find(UserInst->op_begin(), UserInst->op_end(),
- UI->getOperandValToReplace());
+ U.getOperandValToReplace());
assert(UseI != UserInst->op_end() && "cannot find IV operand");
if (IVIncSet.count(UseI))
continue;
@@ -2988,8 +2957,8 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
// Record the uses.
LSRFixup &LF = getNewFixup();
LF.UserInst = UserInst;
- LF.OperandValToReplace = UI->getOperandValToReplace();
- LF.PostIncLoops = UI->getPostIncLoops();
+ LF.OperandValToReplace = U.getOperandValToReplace();
+ LF.PostIncLoops = U.getPostIncLoops();
LSRUse::KindType Kind = LSRUse::Basic;
Type *AccessTy = nullptr;
@@ -2998,7 +2967,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
AccessTy = getAccessType(LF.UserInst);
}
- const SCEV *S = IU.getExpr(*UI);
+ const SCEV *S = IU.getExpr(U);
// Equality (== and !=) ICmps are special. We can rewrite (i == N) as
// (N - i == 0), and this allows (N - i) to be the expression that we work
@@ -3090,9 +3059,8 @@ LSRInstance::InsertSupplementalFormula(const SCEV *S,
void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) {
if (F.ScaledReg)
RegUses.CountRegister(F.ScaledReg, LUIdx);
- for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
- E = F.BaseRegs.end(); I != E; ++I)
- RegUses.CountRegister(*I, LUIdx);
+ for (const SCEV *BaseReg : F.BaseRegs)
+ RegUses.CountRegister(BaseReg, LUIdx);
}
/// InsertFormula - If the given formula has not yet been inserted, add it to
@@ -3213,9 +3181,8 @@ static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C,
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
// Break out add operands.
- for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
- I != E; ++I) {
- const SCEV *Remainder = CollectSubexprs(*I, C, Ops, L, SE, Depth+1);
+ for (const SCEV *S : Add->operands()) {
+ const SCEV *Remainder = CollectSubexprs(S, C, Ops, L, SE, Depth+1);
if (Remainder)
Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
}
@@ -3373,9 +3340,7 @@ void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
Formula F = Base;
F.BaseRegs.clear();
SmallVector<const SCEV *, 4> Ops;
- for (SmallVectorImpl<const SCEV *>::const_iterator
- I = Base.BaseRegs.begin(), E = Base.BaseRegs.end(); I != E; ++I) {
- const SCEV *BaseReg = *I;
+ for (const SCEV *BaseReg : Base.BaseRegs) {
if (SE.properlyDominates(BaseReg, L->getHeader()) &&
!SE.hasComputableLoopEvolution(BaseReg, L))
Ops.push_back(BaseReg);
@@ -3432,15 +3397,13 @@ void LSRInstance::GenerateConstantOffsetsImpl(
LSRUse &LU, unsigned LUIdx, const Formula &Base,
const SmallVectorImpl<int64_t> &Worklist, size_t Idx, bool IsScaledReg) {
const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
- for (SmallVectorImpl<int64_t>::const_iterator I = Worklist.begin(),
- E = Worklist.end();
- I != E; ++I) {
+ for (int64_t Offset : Worklist) {
Formula F = Base;
- F.BaseOffset = (uint64_t)Base.BaseOffset - *I;
- if (isLegalUse(TTI, LU.MinOffset - *I, LU.MaxOffset - *I, LU.Kind,
+ F.BaseOffset = (uint64_t)Base.BaseOffset - Offset;
+ if (isLegalUse(TTI, LU.MinOffset - Offset, LU.MaxOffset - Offset, LU.Kind,
LU.AccessTy, F)) {
// Add the offset to the base register.
- const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G);
+ const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), Offset), G);
// If it cancelled out, drop the base register, otherwise update it.
if (NewG->isZero()) {
if (IsScaledReg) {
@@ -3506,10 +3469,7 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
assert(!Base.BaseGV && "ICmpZero use is not legal!");
// Check each interesting stride.
- for (SmallSetVector<int64_t, 8>::const_iterator
- I = Factors.begin(), E = Factors.end(); I != E; ++I) {
- int64_t Factor = *I;
-
+ for (int64_t Factor : Factors) {
// Check that the multiplication doesn't overflow.
if (Base.BaseOffset == INT64_MIN && Factor == -1)
continue;
@@ -3593,10 +3553,7 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
assert(Base.Scale == 0 && "Unscale did not did its job!");
// Check each interesting stride.
- for (SmallSetVector<int64_t, 8>::const_iterator
- I = Factors.begin(), E = Factors.end(); I != E; ++I) {
- int64_t Factor = *I;
-
+ for (int64_t Factor : Factors) {
Base.Scale = Factor;
Base.HasBaseReg = Base.BaseRegs.size() > 1;
// Check whether this scale is going to be legal.
@@ -3652,16 +3609,13 @@ void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
if (!DstTy) return;
DstTy = SE.getEffectiveSCEVType(DstTy);
- for (SmallSetVector<Type *, 4>::const_iterator
- I = Types.begin(), E = Types.end(); I != E; ++I) {
- Type *SrcTy = *I;
+ for (Type *SrcTy : Types) {
if (SrcTy != DstTy && TTI.isTruncateFree(SrcTy, DstTy)) {
Formula F = Base;
- if (F.ScaledReg) F.ScaledReg = SE.getAnyExtendExpr(F.ScaledReg, *I);
- for (SmallVectorImpl<const SCEV *>::iterator J = F.BaseRegs.begin(),
- JE = F.BaseRegs.end(); J != JE; ++J)
- *J = SE.getAnyExtendExpr(*J, SrcTy);
+ if (F.ScaledReg) F.ScaledReg = SE.getAnyExtendExpr(F.ScaledReg, SrcTy);
+ for (const SCEV *&BaseReg : F.BaseRegs)
+ BaseReg = SE.getAnyExtendExpr(BaseReg, SrcTy);
// TODO: This assumes we've done basic processing on all uses and
// have an idea what the register usage is.
@@ -3708,20 +3662,17 @@ void WorkItem::dump() const {
void LSRInstance::GenerateCrossUseConstantOffsets() {
// Group the registers by their value without any added constant offset.
typedef std::map<int64_t, const SCEV *> ImmMapTy;
- typedef DenseMap<const SCEV *, ImmMapTy> RegMapTy;
- RegMapTy Map;
+ DenseMap<const SCEV *, ImmMapTy> Map;
DenseMap<const SCEV *, SmallBitVector> UsedByIndicesMap;
SmallVector<const SCEV *, 8> Sequence;
- for (RegUseTracker::const_iterator I = RegUses.begin(), E = RegUses.end();
- I != E; ++I) {
- const SCEV *Reg = *I;
+ for (const SCEV *Use : RegUses) {
+ const SCEV *Reg = Use; // Make a copy for ExtractImmediate to modify.
int64_t Imm = ExtractImmediate(Reg, SE);
- std::pair<RegMapTy::iterator, bool> Pair =
- Map.insert(std::make_pair(Reg, ImmMapTy()));
+ auto Pair = Map.insert(std::make_pair(Reg, ImmMapTy()));
if (Pair.second)
Sequence.push_back(Reg);
- Pair.first->second.insert(std::make_pair(Imm, *I));
- UsedByIndicesMap[Reg] |= RegUses.getUsedByIndices(*I);
+ Pair.first->second.insert(std::make_pair(Imm, Use));
+ UsedByIndicesMap[Reg] |= RegUses.getUsedByIndices(Use);
}
// Now examine each set of registers with the same base value. Build up
@@ -3729,9 +3680,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
// not adding formulae and register counts while we're searching.
SmallVector<WorkItem, 32> WorkItems;
SmallSet<std::pair<size_t, int64_t>, 32> UniqueItems;
- for (SmallVectorImpl<const SCEV *>::const_iterator I = Sequence.begin(),
- E = Sequence.end(); I != E; ++I) {
- const SCEV *Reg = *I;
+ for (const SCEV *Reg : Sequence) {
const ImmMapTy &Imms = Map.find(Reg)->second;
// It's not worthwhile looking for reuse if there's only one offset.
@@ -3739,9 +3688,8 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
continue;
DEBUG(dbgs() << "Generating cross-use offsets for " << *Reg << ':';
- for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
- J != JE; ++J)
- dbgs() << ' ' << J->first;
+ for (const auto &Entry : Imms)
+ dbgs() << ' ' << Entry.first;
dbgs() << '\n');
// Examine each offset.
@@ -3786,9 +3734,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
UniqueItems.clear();
// Now iterate through the worklist and add new formulae.
- for (SmallVectorImpl<WorkItem>::const_iterator I = WorkItems.begin(),
- E = WorkItems.end(); I != E; ++I) {
- const WorkItem &WI = *I;
+ for (const WorkItem &WI : WorkItems) {
size_t LUIdx = WI.LUIdx;
LSRUse &LU = Uses[LUIdx];
int64_t Imm = WI.Imm;
@@ -3827,7 +3773,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
if (C->getValue()->isNegative() !=
(NewF.BaseOffset < 0) &&
(C->getValue()->getValue().abs() * APInt(BitWidth, F.Scale))
- .ule(abs64(NewF.BaseOffset)))
+ .ule(std::abs(NewF.BaseOffset)))
continue;
// OK, looks good.
@@ -3853,12 +3799,10 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
// If the new formula has a constant in a register, and adding the
// constant value to the immediate would produce a value closer to
// zero than the immediate itself, then the formula isn't worthwhile.
- for (SmallVectorImpl<const SCEV *>::const_iterator
- J = NewF.BaseRegs.begin(), JE = NewF.BaseRegs.end();
- J != JE; ++J)
- if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*J))
+ for (const SCEV *NewReg : NewF.BaseRegs)
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewReg))
if ((C->getValue()->getValue() + NewF.BaseOffset).abs().slt(
- abs64(NewF.BaseOffset)) &&
+ std::abs(NewF.BaseOffset)) &&
(C->getValue()->getValue() +
NewF.BaseOffset).countTrailingZeros() >=
countTrailingZeros<uint64_t>(NewF.BaseOffset))
@@ -3959,9 +3903,7 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
}
else {
SmallVector<const SCEV *, 4> Key;
- for (SmallVectorImpl<const SCEV *>::const_iterator J = F.BaseRegs.begin(),
- JE = F.BaseRegs.end(); J != JE; ++J) {
- const SCEV *Reg = *J;
+ for (const SCEV *Reg : F.BaseRegs) {
if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
Key.push_back(Reg);
}
@@ -4023,9 +3965,8 @@ static const size_t ComplexityLimit = UINT16_MAX;
/// isn't always sufficient.
size_t LSRInstance::EstimateSearchSpaceComplexity() const {
size_t Power = 1;
- for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
- E = Uses.end(); I != E; ++I) {
- size_t FSize = I->Formulae.size();
+ for (const LSRUse &LU : Uses) {
+ size_t FSize = LU.Formulae.size();
if (FSize >= ComplexityLimit) {
Power = ComplexityLimit;
break;
@@ -4116,9 +4057,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
LSRUse &LU = Uses[LUIdx];
- for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
- E = LU.Formulae.end(); I != E; ++I) {
- const Formula &F = *I;
+ for (const Formula &F : LU.Formulae) {
if (F.BaseOffset == 0 || (F.Scale != 0 && F.Scale != 1))
continue;
@@ -4135,9 +4074,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
// Update the relocs to reference the new use.
- for (SmallVectorImpl<LSRFixup>::iterator I = Fixups.begin(),
- E = Fixups.end(); I != E; ++I) {
- LSRFixup &Fixup = *I;
+ for (LSRFixup &Fixup : Fixups) {
if (Fixup.LUIdx == LUIdx) {
Fixup.LUIdx = LUThatHas - &Uses.front();
Fixup.Offset += F.BaseOffset;
@@ -4218,9 +4155,7 @@ void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
// to be a good reuse register candidate.
const SCEV *Best = nullptr;
unsigned BestNum = 0;
- for (RegUseTracker::const_iterator I = RegUses.begin(), E = RegUses.end();
- I != E; ++I) {
- const SCEV *Reg = *I;
+ for (const SCEV *Reg : RegUses) {
if (Taken.count(Reg))
continue;
if (!Best)
@@ -4308,17 +4243,12 @@ void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
SmallPtrSet<const SCEV *, 16> NewRegs;
Cost NewCost;
- for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
- E = LU.Formulae.end(); I != E; ++I) {
- const Formula &F = *I;
-
+ for (const Formula &F : LU.Formulae) {
// Ignore formulae which may not be ideal in terms of register reuse of
// ReqRegs. The formula should use all required registers before
// introducing new ones.
int NumReqRegsToFind = std::min(F.getNumRegs(), ReqRegs.size());
- for (SmallSetVector<const SCEV *, 4>::const_iterator J = ReqRegs.begin(),
- JE = ReqRegs.end(); J != JE; ++J) {
- const SCEV *Reg = *J;
+ for (const SCEV *Reg : ReqRegs) {
if ((F.ScaledReg && F.ScaledReg == Reg) ||
std::find(F.BaseRegs.begin(), F.BaseRegs.end(), Reg) !=
F.BaseRegs.end()) {
@@ -4426,9 +4356,7 @@ LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
bool AllDominate = true;
Instruction *BetterPos = nullptr;
Instruction *Tentative = IDom->getTerminator();
- for (SmallVectorImpl<Instruction *>::const_iterator I = Inputs.begin(),
- E = Inputs.end(); I != E; ++I) {
- Instruction *Inst = *I;
+ for (Instruction *Inst : Inputs) {
if (Inst == Tentative || !DT.dominates(Inst, Tentative)) {
AllDominate = false;
break;
@@ -4475,9 +4403,7 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP,
}
// The expansion must also be dominated by the increment positions of any
// loops it for which it is using post-inc mode.
- for (PostIncLoopSet::const_iterator I = LF.PostIncLoops.begin(),
- E = LF.PostIncLoops.end(); I != E; ++I) {
- const Loop *PIL = *I;
+ for (const Loop *PIL : LF.PostIncLoops) {
if (PIL == L) continue;
// Be dominated by the loop exit.
@@ -4552,9 +4478,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
SmallVector<const SCEV *, 8> Ops;
// Expand the BaseRegs portion.
- for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
- E = F.BaseRegs.end(); I != E; ++I) {
- const SCEV *Reg = *I;
+ for (const SCEV *Reg : F.BaseRegs) {
assert(!Reg->isZero() && "Zero allocated in a base register!");
// If we're expanding for a post-inc user, make the post-inc adjustment.
@@ -4728,12 +4652,14 @@ void LSRInstance::RewriteForPHI(PHINode *PN,
// Split the critical edge.
BasicBlock *NewBB = nullptr;
if (!Parent->isLandingPad()) {
- NewBB = SplitCriticalEdge(BB, Parent, P,
- /*MergeIdenticalEdges=*/true,
- /*DontDeleteUselessPhis=*/true);
+ NewBB = SplitCriticalEdge(BB, Parent,
+ CriticalEdgeSplittingOptions(&DT, &LI)
+ .setMergeIdenticalEdges()
+ .setDontDeleteUselessPHIs());
} else {
SmallVector<BasicBlock*, 2> NewBBs;
- SplitLandingPadPredecessors(Parent, BB, "", "", P, NewBBs);
+ SplitLandingPadPredecessors(Parent, BB, "", "", NewBBs,
+ /*AliasAnalysis*/ nullptr, &DT, &LI);
NewBB = NewBBs[0];
}
// If NewBB==NULL, then SplitCriticalEdge refused to split because all
@@ -4823,7 +4749,8 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
// we can remove them after we are done working.
SmallVector<WeakVH, 16> DeadInsts;
- SCEVExpander Rewriter(SE, "lsr");
+ SCEVExpander Rewriter(SE, L->getHeader()->getModule()->getDataLayout(),
+ "lsr");
#ifndef NDEBUG
Rewriter.setDebugType(DEBUG_TYPE);
#endif
@@ -4832,25 +4759,20 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
Rewriter.setIVIncInsertPos(L, IVIncInsertPos);
// Mark phi nodes that terminate chains so the expander tries to reuse them.
- for (SmallVectorImpl<IVChain>::const_iterator ChainI = IVChainVec.begin(),
- ChainE = IVChainVec.end(); ChainI != ChainE; ++ChainI) {
- if (PHINode *PN = dyn_cast<PHINode>(ChainI->tailUserInst()))
+ for (const IVChain &Chain : IVChainVec) {
+ if (PHINode *PN = dyn_cast<PHINode>(Chain.tailUserInst()))
Rewriter.setChainedPhi(PN);
}
// Expand the new value definitions and update the users.
- for (SmallVectorImpl<LSRFixup>::const_iterator I = Fixups.begin(),
- E = Fixups.end(); I != E; ++I) {
- const LSRFixup &Fixup = *I;
-
+ for (const LSRFixup &Fixup : Fixups) {
Rewrite(Fixup, *Solution[Fixup.LUIdx], Rewriter, DeadInsts, P);
Changed = true;
}
- for (SmallVectorImpl<IVChain>::const_iterator ChainI = IVChainVec.begin(),
- ChainE = IVChainVec.end(); ChainI != ChainE; ++ChainI) {
- GenerateIVChain(*ChainI, Rewriter, DeadInsts);
+ for (const IVChain &Chain : IVChainVec) {
+ GenerateIVChain(Chain, Rewriter, DeadInsts);
Changed = true;
}
// Clean up after ourselves. This must be done before deleting any
@@ -4863,9 +4785,10 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
LSRInstance::LSRInstance(Loop *L, Pass *P)
: IU(P->getAnalysis<IVUsers>()), SE(P->getAnalysis<ScalarEvolution>()),
DT(P->getAnalysis<DominatorTreeWrapperPass>().getDomTree()),
- LI(P->getAnalysis<LoopInfo>()),
- TTI(P->getAnalysis<TargetTransformInfo>()), L(L), Changed(false),
- IVIncInsertPos(nullptr) {
+ LI(P->getAnalysis<LoopInfoWrapperPass>().getLoopInfo()),
+ TTI(P->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
+ *L->getHeader()->getParent())),
+ L(L), Changed(false), IVIncInsertPos(nullptr) {
// If LoopSimplify form is not available, stay out of trouble.
if (!L->isLoopSimplifyForm())
return;
@@ -4876,10 +4799,10 @@ LSRInstance::LSRInstance(Loop *L, Pass *P)
// If there's too much analysis to be done, bail early. We won't be able to
// model the problem anyway.
unsigned NumUsers = 0;
- for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
+ for (const IVStrideUse &U : IU) {
if (++NumUsers > MaxIVUsers) {
- DEBUG(dbgs() << "LSR skipping loop, too many IV Users in " << *L
- << "\n");
+ (void)U;
+ DEBUG(dbgs() << "LSR skipping loop, too many IV Users in " << U << "\n");
return;
}
}
@@ -4948,14 +4871,10 @@ LSRInstance::LSRInstance(Loop *L, Pass *P)
#ifndef NDEBUG
// Formulae should be legal.
- for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(), E = Uses.end();
- I != E; ++I) {
- const LSRUse &LU = *I;
- for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(),
- JE = LU.Formulae.end();
- J != JE; ++J)
+ for (const LSRUse &LU : Uses) {
+ for (const Formula &F : LU.Formulae)
assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
- *J) && "Illegal formula generated!");
+ F) && "Illegal formula generated!");
};
#endif
@@ -4969,44 +4888,38 @@ void LSRInstance::print_factors_and_types(raw_ostream &OS) const {
OS << "LSR has identified the following interesting factors and types: ";
bool First = true;
- for (SmallSetVector<int64_t, 8>::const_iterator
- I = Factors.begin(), E = Factors.end(); I != E; ++I) {
+ for (int64_t Factor : Factors) {
if (!First) OS << ", ";
First = false;
- OS << '*' << *I;
+ OS << '*' << Factor;
}
- for (SmallSetVector<Type *, 4>::const_iterator
- I = Types.begin(), E = Types.end(); I != E; ++I) {
+ for (Type *Ty : Types) {
if (!First) OS << ", ";
First = false;
- OS << '(' << **I << ')';
+ OS << '(' << *Ty << ')';
}
OS << '\n';
}
void LSRInstance::print_fixups(raw_ostream &OS) const {
OS << "LSR is examining the following fixup sites:\n";
- for (SmallVectorImpl<LSRFixup>::const_iterator I = Fixups.begin(),
- E = Fixups.end(); I != E; ++I) {
+ for (const LSRFixup &LF : Fixups) {
dbgs() << " ";
- I->print(OS);
+ LF.print(OS);
OS << '\n';
}
}
void LSRInstance::print_uses(raw_ostream &OS) const {
OS << "LSR is examining the following uses:\n";
- for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
- E = Uses.end(); I != E; ++I) {
- const LSRUse &LU = *I;
+ for (const LSRUse &LU : Uses) {
dbgs() << " ";
LU.print(OS);
OS << '\n';
- for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(),
- JE = LU.Formulae.end(); J != JE; ++J) {
+ for (const Formula &F : LU.Formulae) {
OS << " ";
- J->print(OS);
+ F.print(OS);
OS << '\n';
}
}
@@ -5041,11 +4954,11 @@ private:
char LoopStrengthReduce::ID = 0;
INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",
"Loop Strength Reduction", false, false)
-INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
INITIALIZE_PASS_DEPENDENCY(IVUsers)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce",
"Loop Strength Reduction", false, false)
@@ -5064,8 +4977,8 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
// many analyses if they are around.
AU.addPreservedID(LoopSimplifyID);
- AU.addRequired<LoopInfo>();
- AU.addPreserved<LoopInfo>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
AU.addRequiredID(LoopSimplifyID);
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
@@ -5076,7 +4989,7 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequiredID(LoopSimplifyID);
AU.addRequired<IVUsers>();
AU.addPreserved<IVUsers>();
- AU.addRequired<TargetTransformInfo>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
}
bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
@@ -5092,13 +5005,15 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
Changed |= DeleteDeadPHIs(L->getHeader());
if (EnablePhiElim && L->isLoopSimplifyForm()) {
SmallVector<WeakVH, 16> DeadInsts;
- SCEVExpander Rewriter(getAnalysis<ScalarEvolution>(), "lsr");
+ const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
+ SCEVExpander Rewriter(getAnalysis<ScalarEvolution>(), DL, "lsr");
#ifndef NDEBUG
Rewriter.setDebugType(DEBUG_TYPE);
#endif
unsigned numFolded = Rewriter.replaceCongruentIVs(
L, &getAnalysis<DominatorTreeWrapperPass>().getDomTree(), DeadInsts,
- &getAnalysis<TargetTransformInfo>());
+ &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
+ *L->getHeader()->getParent()));
if (numFolded) {
Changed = true;
DeleteTriviallyDeadInstructions(DeadInsts);
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index fef5210..ccafd10 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -13,15 +13,18 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CodeMetrics.h"
-#include "llvm/Analysis/FunctionTargetTransformInfo.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
+#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Metadata.h"
#include "llvm/Support/CommandLine.h"
@@ -38,6 +41,22 @@ static cl::opt<unsigned>
UnrollThreshold("unroll-threshold", cl::init(150), cl::Hidden,
cl::desc("The cut-off point for automatic loop unrolling"));
+static cl::opt<unsigned> UnrollMaxIterationsCountToAnalyze(
+ "unroll-max-iteration-count-to-analyze", cl::init(0), cl::Hidden,
+ cl::desc("Don't allow loop unrolling to simulate more than this number of"
+ "iterations when checking full unroll profitability"));
+
+static cl::opt<unsigned> UnrollMinPercentOfOptimized(
+ "unroll-percent-of-optimized-for-complete-unroll", cl::init(20), cl::Hidden,
+ cl::desc("If complete unrolling could trigger further optimizations, and, "
+ "by that, remove the given percent of instructions, perform the "
+ "complete unroll even if it's beyond the threshold"));
+
+static cl::opt<unsigned> UnrollAbsoluteThreshold(
+ "unroll-absolute-threshold", cl::init(2000), cl::Hidden,
+ cl::desc("Don't unroll if the unrolled size is bigger than this threshold,"
+ " even if we can remove big portion of instructions later."));
+
static cl::opt<unsigned>
UnrollCount("unroll-count", cl::init(0), cl::Hidden,
cl::desc("Use this unroll count for all loops including those with "
@@ -63,11 +82,16 @@ namespace {
static char ID; // Pass ID, replacement for typeid
LoopUnroll(int T = -1, int C = -1, int P = -1, int R = -1) : LoopPass(ID) {
CurrentThreshold = (T == -1) ? UnrollThreshold : unsigned(T);
+ CurrentAbsoluteThreshold = UnrollAbsoluteThreshold;
+ CurrentMinPercentOfOptimized = UnrollMinPercentOfOptimized;
CurrentCount = (C == -1) ? UnrollCount : unsigned(C);
CurrentAllowPartial = (P == -1) ? UnrollAllowPartial : (bool)P;
CurrentRuntime = (R == -1) ? UnrollRuntime : (bool)R;
UserThreshold = (T != -1) || (UnrollThreshold.getNumOccurrences() > 0);
+ UserAbsoluteThreshold = (UnrollAbsoluteThreshold.getNumOccurrences() > 0);
+ UserPercentOfOptimized =
+ (UnrollMinPercentOfOptimized.getNumOccurrences() > 0);
UserAllowPartial = (P != -1) ||
(UnrollAllowPartial.getNumOccurrences() > 0);
UserRuntime = (R != -1) || (UnrollRuntime.getNumOccurrences() > 0);
@@ -91,10 +115,16 @@ namespace {
unsigned CurrentCount;
unsigned CurrentThreshold;
+ unsigned CurrentAbsoluteThreshold;
+ unsigned CurrentMinPercentOfOptimized;
bool CurrentAllowPartial;
bool CurrentRuntime;
bool UserCount; // CurrentCount is user-specified.
bool UserThreshold; // CurrentThreshold is user-specified.
+ bool UserAbsoluteThreshold; // CurrentAbsoluteThreshold is
+ // user-specified.
+ bool UserPercentOfOptimized; // CurrentMinPercentOfOptimized is
+ // user-specified.
bool UserAllowPartial; // CurrentAllowPartial is user-specified.
bool UserRuntime; // CurrentRuntime is user-specified.
@@ -105,16 +135,15 @@ namespace {
///
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<LoopInfo>();
- AU.addPreserved<LoopInfo>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
AU.addRequiredID(LoopSimplifyID);
AU.addPreservedID(LoopSimplifyID);
AU.addRequiredID(LCSSAID);
AU.addPreservedID(LCSSAID);
AU.addRequired<ScalarEvolution>();
AU.addPreserved<ScalarEvolution>();
- AU.addRequired<TargetTransformInfo>();
- AU.addRequired<FunctionTargetTransformInfo>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
// FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info.
// If loop unroll does not preserve dom info then LCSSA pass on next
// loop will receive invalid dom info.
@@ -124,9 +153,11 @@ namespace {
// Fill in the UnrollingPreferences parameter with values from the
// TargetTransformationInfo.
- void getUnrollingPreferences(Loop *L, const FunctionTargetTransformInfo &FTTI,
+ void getUnrollingPreferences(Loop *L, const TargetTransformInfo &TTI,
TargetTransformInfo::UnrollingPreferences &UP) {
UP.Threshold = CurrentThreshold;
+ UP.AbsoluteThreshold = CurrentAbsoluteThreshold;
+ UP.MinPercentOfOptimized = CurrentMinPercentOfOptimized;
UP.OptSizeThreshold = OptSizeUnrollThreshold;
UP.PartialThreshold = CurrentThreshold;
UP.PartialOptSizeThreshold = OptSizeUnrollThreshold;
@@ -134,7 +165,8 @@ namespace {
UP.MaxCount = UINT_MAX;
UP.Partial = CurrentAllowPartial;
UP.Runtime = CurrentRuntime;
- FTTI.getUnrollingPreferences(L, UP);
+ UP.AllowExpensiveTripCount = false;
+ TTI.getUnrollingPreferences(L, UP);
}
// Select and return an unroll count based on parameters from
@@ -153,7 +185,9 @@ namespace {
// unrolled loops respectively.
void selectThresholds(const Loop *L, bool HasPragma,
const TargetTransformInfo::UnrollingPreferences &UP,
- unsigned &Threshold, unsigned &PartialThreshold) {
+ unsigned &Threshold, unsigned &PartialThreshold,
+ unsigned &AbsoluteThreshold,
+ unsigned &PercentOfOptimizedForCompleteUnroll) {
// Determine the current unrolling threshold. While this is
// normally set from UnrollThreshold, it is overridden to a
// smaller value if the current function is marked as
@@ -161,10 +195,15 @@ namespace {
// specified.
Threshold = UserThreshold ? CurrentThreshold : UP.Threshold;
PartialThreshold = UserThreshold ? CurrentThreshold : UP.PartialThreshold;
+ AbsoluteThreshold = UserAbsoluteThreshold ? CurrentAbsoluteThreshold
+ : UP.AbsoluteThreshold;
+ PercentOfOptimizedForCompleteUnroll = UserPercentOfOptimized
+ ? CurrentMinPercentOfOptimized
+ : UP.MinPercentOfOptimized;
+
if (!UserThreshold &&
- L->getHeader()->getParent()->getAttributes().
- hasAttribute(AttributeSet::FunctionIndex,
- Attribute::OptimizeForSize)) {
+ L->getHeader()->getParent()->hasFnAttribute(
+ Attribute::OptimizeForSize)) {
Threshold = UP.OptSizeThreshold;
PartialThreshold = UP.PartialOptSizeThreshold;
}
@@ -180,15 +219,18 @@ namespace {
std::max<unsigned>(PartialThreshold, PragmaUnrollThreshold);
}
}
+ bool canUnrollCompletely(Loop *L, unsigned Threshold,
+ unsigned AbsoluteThreshold, uint64_t UnrolledSize,
+ unsigned NumberOfOptimizedInstructions,
+ unsigned PercentOfOptimizedForCompleteUnroll);
};
}
char LoopUnroll::ID = 0;
INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
-INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(FunctionTargetTransformInfo)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
@@ -203,6 +245,407 @@ Pass *llvm::createSimpleLoopUnrollPass() {
return llvm::createLoopUnrollPass(-1, -1, 0, 0);
}
+namespace {
+/// \brief SCEV expressions visitor used for finding expressions that would
+/// become constants if the loop L is unrolled.
+struct FindConstantPointers {
+ /// \brief Shows whether the expression is ConstAddress+Constant or not.
+ bool IndexIsConstant;
+
+ /// \brief Used for filtering out SCEV expressions with two or more AddRec
+ /// subexpressions.
+ ///
+ /// Used to filter out complicated SCEV expressions, having several AddRec
+ /// sub-expressions. We don't handle them, because unrolling one loop
+ /// would help to replace only one of these inductions with a constant, and
+ /// consequently, the expression would remain non-constant.
+ bool HaveSeenAR;
+
+ /// \brief If the SCEV expression becomes ConstAddress+Constant, this value
+ /// holds ConstAddress. Otherwise, it's nullptr.
+ Value *BaseAddress;
+
+ /// \brief The loop, which we try to completely unroll.
+ const Loop *L;
+
+ ScalarEvolution &SE;
+
+ FindConstantPointers(const Loop *L, ScalarEvolution &SE)
+ : IndexIsConstant(true), HaveSeenAR(false), BaseAddress(nullptr),
+ L(L), SE(SE) {}
+
+ /// Examine the given expression S and figure out, if it can be a part of an
+ /// expression, that could become a constant after the loop is unrolled.
+ /// The routine sets IndexIsConstant and HaveSeenAR according to the analysis
+ /// results.
+ /// \returns true if we need to examine subexpressions, and false otherwise.
+ bool follow(const SCEV *S) {
+ if (const SCEVUnknown *SC = dyn_cast<SCEVUnknown>(S)) {
+ // We've reached the leaf node of SCEV, it's most probably just a
+ // variable.
+ // If it's the only one SCEV-subexpression, then it might be a base
+ // address of an index expression.
+ // If we've already recorded base address, then just give up on this SCEV
+ // - it's too complicated.
+ if (BaseAddress) {
+ IndexIsConstant = false;
+ return false;
+ }
+ BaseAddress = SC->getValue();
+ return false;
+ }
+ if (isa<SCEVConstant>(S))
+ return false;
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+ // If the current SCEV expression is AddRec, and its loop isn't the loop
+ // we are about to unroll, then we won't get a constant address after
+ // unrolling, and thus, won't be able to eliminate the load.
+ if (AR->getLoop() != L) {
+ IndexIsConstant = false;
+ return false;
+ }
+ // We don't handle multiple AddRecs here, so give up in this case.
+ if (HaveSeenAR) {
+ IndexIsConstant = false;
+ return false;
+ }
+ HaveSeenAR = true;
+ }
+
+ // Continue traversal.
+ return true;
+ }
+ bool isDone() const { return !IndexIsConstant; }
+};
+} // End anonymous namespace.
+
+namespace {
+/// \brief A cache of SCEV results used to optimize repeated queries to SCEV on
+/// the same set of instructions.
+///
+/// The primary cost this saves is the cost of checking the validity of a SCEV
+/// every time it is looked up. However, in some cases we can provide a reduced
+/// and especially useful model for an instruction based upon SCEV that is
+/// non-trivial to compute but more useful to clients.
+class SCEVCache {
+public:
+ /// \brief Struct to represent a GEP whose start and step are known fixed
+ /// offsets from a base address due to SCEV's analysis.
+ struct GEPDescriptor {
+ Value *BaseAddr = nullptr;
+ unsigned Start = 0;
+ unsigned Step = 0;
+ };
+
+ Optional<GEPDescriptor> getGEPDescriptor(GetElementPtrInst *GEP);
+
+ SCEVCache(const Loop &L, ScalarEvolution &SE) : L(L), SE(SE) {}
+
+private:
+ const Loop &L;
+ ScalarEvolution &SE;
+
+ SmallDenseMap<GetElementPtrInst *, GEPDescriptor> GEPDescriptors;
+};
+} // End anonymous namespace.
+
+/// \brief Get a simplified descriptor for a GEP instruction.
+///
+/// Where possible, this produces a simplified descriptor for a GEP instruction
+/// using SCEV analysis of the containing loop. If this isn't possible, it
+/// returns an empty optional.
+///
+/// The model is a base address, an initial offset, and a per-iteration step.
+/// This fits very common patterns of GEPs inside loops and is something we can
+/// use to simulate the behavior of a particular iteration of a loop.
+///
+/// This is a cached interface. The first call may do non-trivial work to
+/// compute the result, but all subsequent calls will return a fast answer
+/// based on a cached result. This includes caching negative results.
+Optional<SCEVCache::GEPDescriptor>
+SCEVCache::getGEPDescriptor(GetElementPtrInst *GEP) {
+ decltype(GEPDescriptors)::iterator It;
+ bool Inserted;
+
+ std::tie(It, Inserted) = GEPDescriptors.insert({GEP, {}});
+
+ if (!Inserted) {
+ if (!It->second.BaseAddr)
+ return None;
+
+ return It->second;
+ }
+
+ // We've inserted a new record into the cache, so compute the GEP descriptor
+ // if possible.
+ Value *V = cast<Value>(GEP);
+ if (!SE.isSCEVable(V->getType()))
+ return None;
+ const SCEV *S = SE.getSCEV(V);
+
+ // FIXME: It'd be nice if the worklist and set used by the
+ // SCEVTraversal could be re-used between loop iterations, but the
+ // interface doesn't support that. There is no way to clear the visited
+ // sets between uses.
+ FindConstantPointers Visitor(&L, SE);
+ SCEVTraversal<FindConstantPointers> T(Visitor);
+
+ // Try to find (BaseAddress+Step+Offset) tuple.
+ // If succeeded, save it to the cache - it might help in folding
+ // loads.
+ T.visitAll(S);
+ if (!Visitor.IndexIsConstant || !Visitor.BaseAddress)
+ return None;
+
+ const SCEV *BaseAddrSE = SE.getSCEV(Visitor.BaseAddress);
+ if (BaseAddrSE->getType() != S->getType())
+ return None;
+ const SCEV *OffSE = SE.getMinusSCEV(S, BaseAddrSE);
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OffSE);
+
+ if (!AR)
+ return None;
+
+ const SCEVConstant *StepSE =
+ dyn_cast<SCEVConstant>(AR->getStepRecurrence(SE));
+ const SCEVConstant *StartSE = dyn_cast<SCEVConstant>(AR->getStart());
+ if (!StepSE || !StartSE)
+ return None;
+
+ // Check and skip caching if doing so would require lots of bits to
+ // avoid overflow.
+ APInt Start = StartSE->getValue()->getValue();
+ APInt Step = StepSE->getValue()->getValue();
+ if (Start.getActiveBits() > 32 || Step.getActiveBits() > 32)
+ return None;
+
+ // We found a cacheable SCEV model for the GEP.
+ It->second.BaseAddr = Visitor.BaseAddress;
+ It->second.Start = Start.getLimitedValue();
+ It->second.Step = Step.getLimitedValue();
+ return It->second;
+}
+
+namespace {
+// This class is used to get an estimate of the optimization effects that we
+// could get from complete loop unrolling. It comes from the fact that some
+// loads might be replaced with concrete constant values and that could trigger
+// a chain of instruction simplifications.
+//
+// E.g. we might have:
+// int a[] = {0, 1, 0};
+// v = 0;
+// for (i = 0; i < 3; i ++)
+// v += b[i]*a[i];
+// If we completely unroll the loop, we would get:
+// v = b[0]*a[0] + b[1]*a[1] + b[2]*a[2]
+// Which then will be simplified to:
+// v = b[0]* 0 + b[1]* 1 + b[2]* 0
+// And finally:
+// v = b[1]
+class UnrolledInstAnalyzer : private InstVisitor<UnrolledInstAnalyzer, bool> {
+ typedef InstVisitor<UnrolledInstAnalyzer, bool> Base;
+ friend class InstVisitor<UnrolledInstAnalyzer, bool>;
+
+public:
+ UnrolledInstAnalyzer(unsigned Iteration,
+ DenseMap<Value *, Constant *> &SimplifiedValues,
+ SCEVCache &SC)
+ : Iteration(Iteration), SimplifiedValues(SimplifiedValues), SC(SC) {}
+
+ // Allow access to the initial visit method.
+ using Base::visit;
+
+private:
+ /// \brief Number of currently simulated iteration.
+ ///
+ /// If an expression is ConstAddress+Constant, then the Constant is
+ /// Start + Iteration*Step, where Start and Step could be obtained from
+ /// SCEVGEPCache.
+ unsigned Iteration;
+
+ // While we walk the loop instructions, we we build up and maintain a mapping
+ // of simplified values specific to this iteration. The idea is to propagate
+ // any special information we have about loads that can be replaced with
+ // constants after complete unrolling, and account for likely simplifications
+ // post-unrolling.
+ DenseMap<Value *, Constant *> &SimplifiedValues;
+
+ // We use a cache to wrap all our SCEV queries.
+ SCEVCache &SC;
+
+ /// Base case for the instruction visitor.
+ bool visitInstruction(Instruction &I) { return false; };
+
+ /// TODO: Add visitors for other instruction types, e.g. ZExt, SExt.
+
+ /// Try to simplify binary operator I.
+ ///
+ /// TODO: Probaly it's worth to hoist the code for estimating the
+ /// simplifications effects to a separate class, since we have a very similar
+ /// code in InlineCost already.
+ bool visitBinaryOperator(BinaryOperator &I) {
+ Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+ if (!isa<Constant>(LHS))
+ if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS))
+ LHS = SimpleLHS;
+ if (!isa<Constant>(RHS))
+ if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
+ RHS = SimpleRHS;
+ Value *SimpleV = nullptr;
+ const DataLayout &DL = I.getModule()->getDataLayout();
+ if (auto FI = dyn_cast<FPMathOperator>(&I))
+ SimpleV =
+ SimplifyFPBinOp(I.getOpcode(), LHS, RHS, FI->getFastMathFlags(), DL);
+ else
+ SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, DL);
+
+ if (Constant *C = dyn_cast_or_null<Constant>(SimpleV))
+ SimplifiedValues[&I] = C;
+
+ return SimpleV;
+ }
+
+ /// Try to fold load I.
+ bool visitLoad(LoadInst &I) {
+ Value *AddrOp = I.getPointerOperand();
+ if (!isa<Constant>(AddrOp))
+ if (Constant *SimplifiedAddrOp = SimplifiedValues.lookup(AddrOp))
+ AddrOp = SimplifiedAddrOp;
+
+ auto *GEP = dyn_cast<GetElementPtrInst>(AddrOp);
+ if (!GEP)
+ return false;
+ auto OptionalGEPDesc = SC.getGEPDescriptor(GEP);
+ if (!OptionalGEPDesc)
+ return false;
+
+ auto GV = dyn_cast<GlobalVariable>(OptionalGEPDesc->BaseAddr);
+ // We're only interested in loads that can be completely folded to a
+ // constant.
+ if (!GV || !GV->hasInitializer())
+ return false;
+
+ ConstantDataSequential *CDS =
+ dyn_cast<ConstantDataSequential>(GV->getInitializer());
+ if (!CDS)
+ return false;
+
+ // This calculation should never overflow because we bound Iteration quite
+ // low and both the start and step are 32-bit integers. We use signed
+ // integers so that UBSan will catch if a bug sneaks into the code.
+ int ElemSize = CDS->getElementType()->getPrimitiveSizeInBits() / 8U;
+ int64_t Index = ((int64_t)OptionalGEPDesc->Start +
+ (int64_t)OptionalGEPDesc->Step * (int64_t)Iteration) /
+ ElemSize;
+ if (Index >= CDS->getNumElements()) {
+ // FIXME: For now we conservatively ignore out of bound accesses, but
+ // we're allowed to perform the optimization in this case.
+ return false;
+ }
+
+ Constant *CV = CDS->getElementAsConstant(Index);
+ assert(CV && "Constant expected.");
+ SimplifiedValues[&I] = CV;
+
+ return true;
+ }
+};
+} // namespace
+
+
+namespace {
+struct EstimatedUnrollCost {
+ /// \brief Count the number of optimized instructions.
+ unsigned NumberOfOptimizedInstructions;
+
+ /// \brief Count the total number of instructions.
+ unsigned UnrolledLoopSize;
+};
+}
+
+/// \brief Figure out if the loop is worth full unrolling.
+///
+/// Complete loop unrolling can make some loads constant, and we need to know
+/// if that would expose any further optimization opportunities. This routine
+/// estimates this optimization. It assigns computed number of instructions,
+/// that potentially might be optimized away, to
+/// NumberOfOptimizedInstructions, and total number of instructions to
+/// UnrolledLoopSize (not counting blocks that won't be reached, if we were
+/// able to compute the condition).
+/// \returns false if we can't analyze the loop, or if we discovered that
+/// unrolling won't give anything. Otherwise, returns true.
+Optional<EstimatedUnrollCost>
+analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, ScalarEvolution &SE,
+ const TargetTransformInfo &TTI,
+ unsigned MaxUnrolledLoopSize) {
+ // We want to be able to scale offsets by the trip count and add more offsets
+ // to them without checking for overflows, and we already don't want to
+ // analyze *massive* trip counts, so we force the max to be reasonably small.
+ assert(UnrollMaxIterationsCountToAnalyze < (INT_MAX / 2) &&
+ "The unroll iterations max is too large!");
+
+ // Don't simulate loops with a big or unknown tripcount
+ if (!UnrollMaxIterationsCountToAnalyze || !TripCount ||
+ TripCount > UnrollMaxIterationsCountToAnalyze)
+ return None;
+
+ SmallSetVector<BasicBlock *, 16> BBWorklist;
+ DenseMap<Value *, Constant *> SimplifiedValues;
+
+ // Use a cache to access SCEV expressions so that we don't pay the cost on
+ // each iteration. This cache is lazily self-populating.
+ SCEVCache SC(*L, SE);
+
+ unsigned NumberOfOptimizedInstructions = 0;
+ unsigned UnrolledLoopSize = 0;
+
+ // Simulate execution of each iteration of the loop counting instructions,
+ // which would be simplified.
+ // Since the same load will take different values on different iterations,
+ // we literally have to go through all loop's iterations.
+ for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) {
+ SimplifiedValues.clear();
+ UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, SC);
+
+ BBWorklist.clear();
+ BBWorklist.insert(L->getHeader());
+ // Note that we *must not* cache the size, this loop grows the worklist.
+ for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
+ BasicBlock *BB = BBWorklist[Idx];
+
+ // Visit all instructions in the given basic block and try to simplify
+ // it. We don't change the actual IR, just count optimization
+ // opportunities.
+ for (Instruction &I : *BB) {
+ UnrolledLoopSize += TTI.getUserCost(&I);
+
+ // Visit the instruction to analyze its loop cost after unrolling,
+ // and if the visitor returns true, then we can optimize this
+ // instruction away.
+ if (Analyzer.visit(I))
+ NumberOfOptimizedInstructions += TTI.getUserCost(&I);
+
+ // If unrolled body turns out to be too big, bail out.
+ if (UnrolledLoopSize - NumberOfOptimizedInstructions >
+ MaxUnrolledLoopSize)
+ return None;
+ }
+
+ // Add BB's successors to the worklist.
+ for (BasicBlock *Succ : successors(BB))
+ if (L->contains(Succ))
+ BBWorklist.insert(Succ);
+ }
+
+ // If we found no optimization opportunities on the first iteration, we
+ // won't find them on later ones too.
+ if (!NumberOfOptimizedInstructions)
+ return None;
+ }
+ return {{NumberOfOptimizedInstructions, UnrolledLoopSize}};
+}
+
/// ApproximateLoopSize - Approximate the size of the loop.
static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
bool &NotDuplicatable,
@@ -234,44 +677,31 @@ static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
// Returns the loop hint metadata node with the given name (for example,
// "llvm.loop.unroll.count"). If no such metadata node exists, then nullptr is
// returned.
-static const MDNode *GetUnrollMetadata(const Loop *L, StringRef Name) {
- MDNode *LoopID = L->getLoopID();
- if (!LoopID)
- return nullptr;
-
- // First operand should refer to the loop id itself.
- assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
- assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
-
- for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
- const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
- if (!MD)
- continue;
-
- const MDString *S = dyn_cast<MDString>(MD->getOperand(0));
- if (!S)
- continue;
-
- if (Name.equals(S->getString()))
- return MD;
- }
+static MDNode *GetUnrollMetadataForLoop(const Loop *L, StringRef Name) {
+ if (MDNode *LoopID = L->getLoopID())
+ return GetUnrollMetadata(LoopID, Name);
return nullptr;
}
// Returns true if the loop has an unroll(full) pragma.
static bool HasUnrollFullPragma(const Loop *L) {
- return GetUnrollMetadata(L, "llvm.loop.unroll.full");
+ return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.full");
}
// Returns true if the loop has an unroll(disable) pragma.
static bool HasUnrollDisablePragma(const Loop *L) {
- return GetUnrollMetadata(L, "llvm.loop.unroll.disable");
+ return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.disable");
+}
+
+// Returns true if the loop has an runtime unroll(disable) pragma.
+static bool HasRuntimeUnrollDisablePragma(const Loop *L) {
+ return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.runtime.disable");
}
// If loop has an unroll_count pragma return the (necessarily
// positive) value from the pragma. Otherwise return 0.
static unsigned UnrollCountPragmaValue(const Loop *L) {
- const MDNode *MD = GetUnrollMetadata(L, "llvm.loop.unroll.count");
+ MDNode *MD = GetUnrollMetadataForLoop(L, "llvm.loop.unroll.count");
if (MD) {
assert(MD->getNumOperands() == 2 &&
"Unroll count hint metadata should have two operands.");
@@ -319,6 +749,49 @@ static void SetLoopAlreadyUnrolled(Loop *L) {
L->setLoopID(NewLoopID);
}
+bool LoopUnroll::canUnrollCompletely(
+ Loop *L, unsigned Threshold, unsigned AbsoluteThreshold,
+ uint64_t UnrolledSize, unsigned NumberOfOptimizedInstructions,
+ unsigned PercentOfOptimizedForCompleteUnroll) {
+
+ if (Threshold == NoThreshold) {
+ DEBUG(dbgs() << " Can fully unroll, because no threshold is set.\n");
+ return true;
+ }
+
+ if (UnrolledSize <= Threshold) {
+ DEBUG(dbgs() << " Can fully unroll, because unrolled size: "
+ << UnrolledSize << "<" << Threshold << "\n");
+ return true;
+ }
+
+ assert(UnrolledSize && "UnrolledSize can't be 0 at this point.");
+ unsigned PercentOfOptimizedInstructions =
+ (uint64_t)NumberOfOptimizedInstructions * 100ull / UnrolledSize;
+
+ if (UnrolledSize <= AbsoluteThreshold &&
+ PercentOfOptimizedInstructions >= PercentOfOptimizedForCompleteUnroll) {
+ DEBUG(dbgs() << " Can fully unroll, because unrolling will help removing "
+ << PercentOfOptimizedInstructions
+ << "% instructions (threshold: "
+ << PercentOfOptimizedForCompleteUnroll << "%)\n");
+ DEBUG(dbgs() << " Unrolled size (" << UnrolledSize
+ << ") is less than the threshold (" << AbsoluteThreshold
+ << ").\n");
+ return true;
+ }
+
+ DEBUG(dbgs() << " Too large to fully unroll:\n");
+ DEBUG(dbgs() << " Unrolled size: " << UnrolledSize << "\n");
+ DEBUG(dbgs() << " Estimated number of optimized instructions: "
+ << NumberOfOptimizedInstructions << "\n");
+ DEBUG(dbgs() << " Absolute threshold: " << AbsoluteThreshold << "\n");
+ DEBUG(dbgs() << " Minimum percent of removed instructions: "
+ << PercentOfOptimizedForCompleteUnroll << "\n");
+ DEBUG(dbgs() << " Threshold for small loops: " << Threshold << "\n");
+ return false;
+}
+
unsigned LoopUnroll::selectUnrollCount(
const Loop *L, unsigned TripCount, bool PragmaFullUnroll,
unsigned PragmaCount, const TargetTransformInfo::UnrollingPreferences &UP,
@@ -363,13 +836,13 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
if (skipOptnoneFunction(L))
return false;
- LoopInfo *LI = &getAnalysis<LoopInfo>();
+ Function &F = *L->getHeader()->getParent();
+
+ LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
ScalarEvolution *SE = &getAnalysis<ScalarEvolution>();
- const TargetTransformInfo &TTI = getAnalysis<TargetTransformInfo>();
- const FunctionTargetTransformInfo &FTTI =
- getAnalysis<FunctionTargetTransformInfo>();
- auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
- *L->getHeader()->getParent());
+ const TargetTransformInfo &TTI =
+ getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
BasicBlock *Header = L->getHeader();
DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName()
@@ -383,7 +856,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
bool HasPragma = PragmaFullUnroll || PragmaCount > 0;
TargetTransformInfo::UnrollingPreferences UP;
- getUnrollingPreferences(L, FTTI, UP);
+ getUnrollingPreferences(L, TTI, UP);
// Find trip count and trip multiple if count is not available
unsigned TripCount = 0;
@@ -426,20 +899,33 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
}
unsigned Threshold, PartialThreshold;
- selectThresholds(L, HasPragma, UP, Threshold, PartialThreshold);
+ unsigned AbsoluteThreshold, PercentOfOptimizedForCompleteUnroll;
+ selectThresholds(L, HasPragma, UP, Threshold, PartialThreshold,
+ AbsoluteThreshold, PercentOfOptimizedForCompleteUnroll);
// Given Count, TripCount and thresholds determine the type of
// unrolling which is to be performed.
enum { Full = 0, Partial = 1, Runtime = 2 };
int Unrolling;
if (TripCount && Count == TripCount) {
- if (Threshold != NoThreshold && UnrolledSize > Threshold) {
- DEBUG(dbgs() << " Too large to fully unroll with count: " << Count
- << " because size: " << UnrolledSize << ">" << Threshold
- << "\n");
- Unrolling = Partial;
- } else {
+ Unrolling = Partial;
+ // If the loop is really small, we don't need to run an expensive analysis.
+ if (canUnrollCompletely(
+ L, Threshold, AbsoluteThreshold,
+ UnrolledSize, 0, 100)) {
Unrolling = Full;
+ } else {
+ // The loop isn't that small, but we still can fully unroll it if that
+ // helps to remove a significant number of instructions.
+ // To check that, run additional analysis on the loop.
+ if (Optional<EstimatedUnrollCost> Cost =
+ analyzeLoopUnrollCost(L, TripCount, *SE, TTI, AbsoluteThreshold))
+ if (canUnrollCompletely(L, Threshold, AbsoluteThreshold,
+ Cost->UnrolledLoopSize,
+ Cost->NumberOfOptimizedInstructions,
+ PercentOfOptimizedForCompleteUnroll)) {
+ Unrolling = Full;
+ }
}
} else if (TripCount && Count < TripCount) {
Unrolling = Partial;
@@ -450,6 +936,9 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
// Reduce count based on the type of unrolling and the threshold values.
unsigned OriginalCount = Count;
bool AllowRuntime = UserRuntime ? CurrentRuntime : UP.Runtime;
+ if (HasRuntimeUnrollDisablePragma(L)) {
+ AllowRuntime = false;
+ }
if (Unrolling == Partial) {
bool AllowPartial = UserAllowPartial ? CurrentAllowPartial : UP.Partial;
if (!AllowPartial && !CountSetExplicitly) {
@@ -518,8 +1007,8 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
}
// Unroll the loop.
- if (!UnrollLoop(L, Count, TripCount, AllowRuntime, TripMultiple, LI, this,
- &LPM, &AC))
+ if (!UnrollLoop(L, Count, TripCount, AllowRuntime, UP.AllowExpensiveTripCount,
+ TripMultiple, LI, this, &LPM, &AC))
return false;
return true;
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
index 9f4c122..988d2af 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -42,6 +42,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -170,13 +171,13 @@ namespace {
AU.addRequired<AssumptionCacheTracker>();
AU.addRequiredID(LoopSimplifyID);
AU.addPreservedID(LoopSimplifyID);
- AU.addRequired<LoopInfo>();
- AU.addPreserved<LoopInfo>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
AU.addRequiredID(LCSSAID);
AU.addPreservedID(LCSSAID);
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<ScalarEvolution>();
- AU.addRequired<TargetTransformInfo>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
}
private:
@@ -333,10 +334,10 @@ void LUAnalysisCache::cloneData(const Loop *NewLoop, const Loop *OldLoop,
char LoopUnswitch::ID = 0;
INITIALIZE_PASS_BEGIN(LoopUnswitch, "loop-unswitch", "Unswitch loops",
false, false)
-INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
INITIALIZE_PASS_END(LoopUnswitch, "loop-unswitch", "Unswitch loops",
false, false)
@@ -387,7 +388,7 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
*L->getHeader()->getParent());
- LI = &getAnalysis<LoopInfo>();
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
LPM = &LPM_Ref;
DominatorTreeWrapperPass *DTWP =
getAnalysisIfAvailable<DominatorTreeWrapperPass>();
@@ -432,8 +433,10 @@ bool LoopUnswitch::processCurrentLoop() {
// Probably we reach the quota of branches for this loop. If so
// stop unswitching.
- if (!BranchesInfo.countLoop(currentLoop, getAnalysis<TargetTransformInfo>(),
- AC))
+ if (!BranchesInfo.countLoop(
+ currentLoop, getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
+ *currentLoop->getHeader()->getParent()),
+ AC))
return false;
// Loop over all of the basic blocks in the loop. If we find an interior
@@ -655,9 +658,7 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) {
// Check to see if it would be profitable to unswitch current loop.
// Do not do non-trivial unswitch while optimizing for size.
- if (OptimizeForSize ||
- F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::OptimizeForSize))
+ if (OptimizeForSize || F->hasFnAttribute(Attribute::OptimizeForSize))
return false;
UnswitchNontrivialCondition(LoopCond, Val, currentLoop);
@@ -675,7 +676,7 @@ static Loop *CloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM,
for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
I != E; ++I)
if (LI->getLoopFor(*I) == L)
- New->addBasicBlockToLoop(cast<BasicBlock>(VM[*I]), LI->getBase());
+ New->addBasicBlockToLoop(cast<BasicBlock>(VM[*I]), *LI);
// Add all of the subloops to the new loop.
for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
@@ -706,8 +707,9 @@ void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
// If either edge is critical, split it. This helps preserve LoopSimplify
// form for enclosing loops.
- SplitCriticalEdge(BI, 0, this, false, false, true);
- SplitCriticalEdge(BI, 1, this, false, false, true);
+ auto Options = CriticalEdgeSplittingOptions(DT, LI).setPreserveLCSSA();
+ SplitCriticalEdge(BI, 0, Options);
+ SplitCriticalEdge(BI, 1, Options);
}
/// UnswitchTrivialCondition - Given a loop that has a trivial unswitchable
@@ -726,7 +728,7 @@ void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond,
// First step, split the preheader, so that we know that there is a safe place
// to insert the conditional branch. We will change loopPreheader to have a
// conditional branch on Cond.
- BasicBlock *NewPH = SplitEdge(loopPreheader, loopHeader, this);
+ BasicBlock *NewPH = SplitEdge(loopPreheader, loopHeader, DT, LI);
// Now that we have a place to insert the conditional branch, create a place
// to branch to: this is the exit block out of the loop that we should
@@ -737,7 +739,7 @@ void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond,
// without actually branching to it (the exit block should be dominated by the
// loop header, not the preheader).
assert(!L->contains(ExitBlock) && "Exit block is in the loop?");
- BasicBlock *NewExit = SplitBlock(ExitBlock, ExitBlock->begin(), this);
+ BasicBlock *NewExit = SplitBlock(ExitBlock, ExitBlock->begin(), DT, LI);
// Okay, now we have a position to branch from and a position to branch to,
// insert the new conditional branch.
@@ -768,13 +770,9 @@ void LoopUnswitch::SplitExitEdges(Loop *L,
// Although SplitBlockPredecessors doesn't preserve loop-simplify in
// general, if we call it on all predecessors of all exits then it does.
- if (!ExitBlock->isLandingPad()) {
- SplitBlockPredecessors(ExitBlock, Preds, ".us-lcssa", this);
- } else {
- SmallVector<BasicBlock*, 2> NewBBs;
- SplitLandingPadPredecessors(ExitBlock, Preds, ".us-lcssa", ".us-lcssa",
- this, NewBBs);
- }
+ SplitBlockPredecessors(ExitBlock, Preds, ".us-lcssa",
+ /*AliasAnalysis*/ nullptr, DT, LI,
+ /*PreserveLCSSA*/ true);
}
}
@@ -797,7 +795,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
// First step, split the preheader and exit blocks, and add these blocks to
// the LoopBlocks list.
- BasicBlock *NewPreheader = SplitEdge(loopPreheader, loopHeader, this);
+ BasicBlock *NewPreheader = SplitEdge(loopPreheader, loopHeader, DT, LI);
LoopBlocks.push_back(NewPreheader);
// We want the loop to come after the preheader, but before the exit blocks.
@@ -850,14 +848,14 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
if (ParentLoop) {
// Make sure to add the cloned preheader and exit blocks to the parent loop
// as well.
- ParentLoop->addBasicBlockToLoop(NewBlocks[0], LI->getBase());
+ ParentLoop->addBasicBlockToLoop(NewBlocks[0], *LI);
}
for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
BasicBlock *NewExit = cast<BasicBlock>(VMap[ExitBlocks[i]]);
// The new exit block should be in the same loop as the old one.
if (Loop *ExitBBLoop = LI->getLoopFor(ExitBlocks[i]))
- ExitBBLoop->addBasicBlockToLoop(NewExit, LI->getBase());
+ ExitBBLoop->addBasicBlockToLoop(NewExit, *LI);
assert(NewExit->getTerminator()->getNumSuccessors() == 1 &&
"Exit block should have been split to have one successor!");
@@ -1043,7 +1041,7 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
// and hooked up so as to preserve the loop structure, because
// trying to update it is complicated. So instead we preserve the
// loop structure and put the block on a dead code path.
- SplitEdge(Switch, SISucc, this);
+ SplitEdge(Switch, SISucc, DT, LI);
// Compute the successors instead of relying on the return value
// of SplitEdge, since it may have split the switch successor
// after PHI nodes.
@@ -1085,6 +1083,7 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
/// pass.
///
void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
+ const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
while (!Worklist.empty()) {
Instruction *I = Worklist.back();
Worklist.pop_back();
@@ -1107,7 +1106,7 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
// See if instruction simplification can hack this up. This is common for
// things like "select false, X, Y" after unswitching made the condition be
// 'false'. TODO: update the domtree properly so we can pass it here.
- if (Value *V = SimplifyInstruction(I))
+ if (Value *V = SimplifyInstruction(I, DL))
if (LI->replacementPreservesLCSSAForm(I, V)) {
ReplaceUsesOfWith(I, V, Worklist, L, LPM);
continue;
diff --git a/contrib/llvm/lib/Transforms/Utils/LowerExpectIntrinsic.cpp b/contrib/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
index ff89e74..0c47cbd 100644
--- a/contrib/llvm/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
@@ -11,7 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
@@ -24,13 +25,14 @@
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include <vector>
+#include "llvm/Transforms/Scalar.h"
using namespace llvm;
#define DEBUG_TYPE "lower-expect-intrinsic"
-STATISTIC(IfHandled, "Number of 'expect' intrinsic instructions handled");
+STATISTIC(ExpectIntrinsicsHandled,
+ "Number of 'expect' intrinsic instructions handled");
static cl::opt<uint32_t>
LikelyBranchWeight("likely-branch-weight", cl::Hidden, cl::init(64),
@@ -39,27 +41,8 @@ static cl::opt<uint32_t>
UnlikelyBranchWeight("unlikely-branch-weight", cl::Hidden, cl::init(4),
cl::desc("Weight of the branch unlikely to be taken (default = 4)"));
-namespace {
-
- class LowerExpectIntrinsic : public FunctionPass {
-
- bool HandleSwitchExpect(SwitchInst *SI);
-
- bool HandleIfExpect(BranchInst *BI);
-
- public:
- static char ID;
- LowerExpectIntrinsic() : FunctionPass(ID) {
- initializeLowerExpectIntrinsicPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override;
- };
-}
-
-
-bool LowerExpectIntrinsic::HandleSwitchExpect(SwitchInst *SI) {
- CallInst *CI = dyn_cast<CallInst>(SI->getCondition());
+static bool handleSwitchExpect(SwitchInst &SI) {
+ CallInst *CI = dyn_cast<CallInst>(SI.getCondition());
if (!CI)
return false;
@@ -72,26 +55,24 @@ bool LowerExpectIntrinsic::HandleSwitchExpect(SwitchInst *SI) {
if (!ExpectedValue)
return false;
- SwitchInst::CaseIt Case = SI->findCaseValue(ExpectedValue);
- unsigned n = SI->getNumCases(); // +1 for default case.
- std::vector<uint32_t> Weights(n + 1);
+ SwitchInst::CaseIt Case = SI.findCaseValue(ExpectedValue);
+ unsigned n = SI.getNumCases(); // +1 for default case.
+ SmallVector<uint32_t, 16> Weights(n + 1, UnlikelyBranchWeight);
- Weights[0] = Case == SI->case_default() ? LikelyBranchWeight
- : UnlikelyBranchWeight;
- for (unsigned i = 0; i != n; ++i)
- Weights[i + 1] = i == Case.getCaseIndex() ? LikelyBranchWeight
- : UnlikelyBranchWeight;
+ if (Case == SI.case_default())
+ Weights[0] = LikelyBranchWeight;
+ else
+ Weights[Case.getCaseIndex() + 1] = LikelyBranchWeight;
- SI->setMetadata(LLVMContext::MD_prof,
- MDBuilder(CI->getContext()).createBranchWeights(Weights));
+ SI.setMetadata(LLVMContext::MD_prof,
+ MDBuilder(CI->getContext()).createBranchWeights(Weights));
- SI->setCondition(ArgValue);
+ SI.setCondition(ArgValue);
return true;
}
-
-bool LowerExpectIntrinsic::HandleIfExpect(BranchInst *BI) {
- if (BI->isUnconditional())
+static bool handleBranchExpect(BranchInst &BI) {
+ if (BI.isUnconditional())
return false;
// Handle non-optimized IR code like:
@@ -105,9 +86,9 @@ bool LowerExpectIntrinsic::HandleIfExpect(BranchInst *BI) {
CallInst *CI;
- ICmpInst *CmpI = dyn_cast<ICmpInst>(BI->getCondition());
+ ICmpInst *CmpI = dyn_cast<ICmpInst>(BI.getCondition());
if (!CmpI) {
- CI = dyn_cast<CallInst>(BI->getCondition());
+ CI = dyn_cast<CallInst>(BI.getCondition());
} else {
if (CmpI->getPredicate() != CmpInst::ICMP_NE)
return false;
@@ -136,32 +117,30 @@ bool LowerExpectIntrinsic::HandleIfExpect(BranchInst *BI) {
else
Node = MDB.createBranchWeights(UnlikelyBranchWeight, LikelyBranchWeight);
- BI->setMetadata(LLVMContext::MD_prof, Node);
+ BI.setMetadata(LLVMContext::MD_prof, Node);
if (CmpI)
CmpI->setOperand(0, ArgValue);
else
- BI->setCondition(ArgValue);
+ BI.setCondition(ArgValue);
return true;
}
+static bool lowerExpectIntrinsic(Function &F) {
+ bool Changed = false;
-bool LowerExpectIntrinsic::runOnFunction(Function &F) {
- for (Function::iterator I = F.begin(), E = F.end(); I != E;) {
- BasicBlock *BB = I++;
-
+ for (BasicBlock &BB : F) {
// Create "block_weights" metadata.
- if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
- if (HandleIfExpect(BI))
- IfHandled++;
- } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
- if (HandleSwitchExpect(SI))
- IfHandled++;
+ if (BranchInst *BI = dyn_cast<BranchInst>(BB.getTerminator())) {
+ if (handleBranchExpect(*BI))
+ ExpectIntrinsicsHandled++;
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB.getTerminator())) {
+ if (handleSwitchExpect(*SI))
+ ExpectIntrinsicsHandled++;
}
// remove llvm.expect intrinsics.
- for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
- BI != BE; ) {
+ for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
CallInst *CI = dyn_cast<CallInst>(BI++);
if (!CI)
continue;
@@ -171,17 +150,42 @@ bool LowerExpectIntrinsic::runOnFunction(Function &F) {
Value *Exp = CI->getArgOperand(0);
CI->replaceAllUsesWith(Exp);
CI->eraseFromParent();
+ Changed = true;
}
}
}
- return false;
+ return Changed;
}
+PreservedAnalyses LowerExpectIntrinsicPass::run(Function &F) {
+ if (lowerExpectIntrinsic(F))
+ return PreservedAnalyses::none();
+
+ return PreservedAnalyses::all();
+}
+
+namespace {
+/// \brief Legacy pass for lowering expect intrinsics out of the IR.
+///
+/// When this pass is run over a function it uses expect intrinsics which feed
+/// branches and switches to provide branch weight metadata for those
+/// terminators. It then removes the expect intrinsics from the IR so the rest
+/// of the optimizer can ignore them.
+class LowerExpectIntrinsic : public FunctionPass {
+public:
+ static char ID;
+ LowerExpectIntrinsic() : FunctionPass(ID) {
+ initializeLowerExpectIntrinsicPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override { return lowerExpectIntrinsic(F); }
+};
+}
char LowerExpectIntrinsic::ID = 0;
-INITIALIZE_PASS(LowerExpectIntrinsic, "lower-expect", "Lower 'expect' "
- "Intrinsics", false, false)
+INITIALIZE_PASS(LowerExpectIntrinsic, "lower-expect",
+ "Lower 'expect' Intrinsics", false, false)
FunctionPass *llvm::createLowerExpectIntrinsicPass() {
return new LowerExpectIntrinsic();
diff --git a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 3eea3d4..66d6ac6 100644
--- a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -18,6 +18,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
@@ -28,7 +29,6 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/Local.h"
#include <list>
using namespace llvm;
@@ -41,7 +41,8 @@ STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");
STATISTIC(NumCpyToSet, "Number of memcpys converted to memset");
static int64_t GetOffsetFromIndex(const GEPOperator *GEP, unsigned Idx,
- bool &VariableIdxFound, const DataLayout &TD){
+ bool &VariableIdxFound,
+ const DataLayout &DL) {
// Skip over the first indices.
gep_type_iterator GTI = gep_type_begin(GEP);
for (unsigned i = 1; i != Idx; ++i, ++GTI)
@@ -57,13 +58,13 @@ static int64_t GetOffsetFromIndex(const GEPOperator *GEP, unsigned Idx,
// Handle struct indices, which add their field offset to the pointer.
if (StructType *STy = dyn_cast<StructType>(*GTI)) {
- Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
+ Offset += DL.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
continue;
}
// Otherwise, we have a sequential type like an array or vector. Multiply
// the index by the ElementSize.
- uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
+ uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType());
Offset += Size*OpC->getSExtValue();
}
@@ -74,7 +75,7 @@ static int64_t GetOffsetFromIndex(const GEPOperator *GEP, unsigned Idx,
/// constant offset, and return that constant offset. For example, Ptr1 might
/// be &A[42], and Ptr2 might be &A[40]. In this case offset would be -8.
static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
- const DataLayout &TD) {
+ const DataLayout &DL) {
Ptr1 = Ptr1->stripPointerCasts();
Ptr2 = Ptr2->stripPointerCasts();
@@ -92,12 +93,12 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
// If one pointer is a GEP and the other isn't, then see if the GEP is a
// constant offset from the base, as in "P" and "gep P, 1".
if (GEP1 && !GEP2 && GEP1->getOperand(0)->stripPointerCasts() == Ptr2) {
- Offset = -GetOffsetFromIndex(GEP1, 1, VariableIdxFound, TD);
+ Offset = -GetOffsetFromIndex(GEP1, 1, VariableIdxFound, DL);
return !VariableIdxFound;
}
if (GEP2 && !GEP1 && GEP2->getOperand(0)->stripPointerCasts() == Ptr1) {
- Offset = GetOffsetFromIndex(GEP2, 1, VariableIdxFound, TD);
+ Offset = GetOffsetFromIndex(GEP2, 1, VariableIdxFound, DL);
return !VariableIdxFound;
}
@@ -115,8 +116,8 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
if (GEP1->getOperand(Idx) != GEP2->getOperand(Idx))
break;
- int64_t Offset1 = GetOffsetFromIndex(GEP1, Idx, VariableIdxFound, TD);
- int64_t Offset2 = GetOffsetFromIndex(GEP2, Idx, VariableIdxFound, TD);
+ int64_t Offset1 = GetOffsetFromIndex(GEP1, Idx, VariableIdxFound, DL);
+ int64_t Offset2 = GetOffsetFromIndex(GEP2, Idx, VariableIdxFound, DL);
if (VariableIdxFound) return false;
Offset = Offset2-Offset1;
@@ -150,12 +151,11 @@ struct MemsetRange {
/// TheStores - The actual stores that make up this range.
SmallVector<Instruction*, 16> TheStores;
- bool isProfitableToUseMemset(const DataLayout &TD) const;
-
+ bool isProfitableToUseMemset(const DataLayout &DL) const;
};
} // end anon namespace
-bool MemsetRange::isProfitableToUseMemset(const DataLayout &TD) const {
+bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
// If we found more than 4 stores to merge or 16 bytes, use memset.
if (TheStores.size() >= 4 || End-Start >= 16) return true;
@@ -183,7 +183,7 @@ bool MemsetRange::isProfitableToUseMemset(const DataLayout &TD) const {
// size. If so, check to see whether we will end up actually reducing the
// number of stores used.
unsigned Bytes = unsigned(End-Start);
- unsigned MaxIntSize = TD.getLargestLegalIntTypeSize();
+ unsigned MaxIntSize = DL.getLargestLegalIntTypeSize();
if (MaxIntSize == 0)
MaxIntSize = 1;
unsigned NumPointerStores = Bytes / MaxIntSize;
@@ -314,14 +314,12 @@ namespace {
class MemCpyOpt : public FunctionPass {
MemoryDependenceAnalysis *MD;
TargetLibraryInfo *TLI;
- const DataLayout *DL;
public:
static char ID; // Pass identification, replacement for typeid
MemCpyOpt() : FunctionPass(ID) {
initializeMemCpyOptPass(*PassRegistry::getPassRegistry());
MD = nullptr;
TLI = nullptr;
- DL = nullptr;
}
bool runOnFunction(Function &F) override;
@@ -334,7 +332,7 @@ namespace {
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<MemoryDependenceAnalysis>();
AU.addRequired<AliasAnalysis>();
- AU.addRequired<TargetLibraryInfo>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addPreserved<AliasAnalysis>();
AU.addPreserved<MemoryDependenceAnalysis>();
}
@@ -346,8 +344,9 @@ namespace {
bool processMemMove(MemMoveInst *M);
bool performCallSlotOptzn(Instruction *cpy, Value *cpyDst, Value *cpySrc,
uint64_t cpyLen, unsigned cpyAlign, CallInst *C);
- bool processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
- uint64_t MSize);
+ bool processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep);
+ bool processMemSetMemCpyDependence(MemCpyInst *M, MemSetInst *MDep);
+ bool performMemCpyToMemSetOptzn(MemCpyInst *M, MemSetInst *MDep);
bool processByValArgument(CallSite CS, unsigned ArgNo);
Instruction *tryMergingIntoMemset(Instruction *I, Value *StartPtr,
Value *ByteVal);
@@ -366,7 +365,7 @@ INITIALIZE_PASS_BEGIN(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
false, false)
@@ -377,13 +376,13 @@ INITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
/// attempts to merge them together into a memcpy/memset.
Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
Value *StartPtr, Value *ByteVal) {
- if (!DL) return nullptr;
+ const DataLayout &DL = StartInst->getModule()->getDataLayout();
// Okay, so we now have a single store that can be splatable. Scan to find
// all subsequent stores of the same value to offset from the same pointer.
// Join these together into ranges, so we can decide whether contiguous blocks
// are stored.
- MemsetRanges Ranges(*DL);
+ MemsetRanges Ranges(DL);
BasicBlock::iterator BI = StartInst;
for (++BI; !isa<TerminatorInst>(BI); ++BI) {
@@ -406,8 +405,8 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
// Check to see if this store is to a constant offset from the start ptr.
int64_t Offset;
- if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(),
- Offset, *DL))
+ if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset,
+ DL))
break;
Ranges.addStore(Offset, NextStore);
@@ -420,7 +419,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
// Check to see if this store is to a constant offset from the start ptr.
int64_t Offset;
- if (!IsPointerOffset(StartPtr, MSI->getDest(), Offset, *DL))
+ if (!IsPointerOffset(StartPtr, MSI->getDest(), Offset, DL))
break;
Ranges.addMemSet(Offset, MSI);
@@ -452,7 +451,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
if (Range.TheStores.size() == 1) continue;
// If it is profitable to lower this range to memset, do so now.
- if (!Range.isProfitableToUseMemset(*DL))
+ if (!Range.isProfitableToUseMemset(DL))
continue;
// Otherwise, we do want to transform this! Create a new memset.
@@ -464,7 +463,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
if (Alignment == 0) {
Type *EltType =
cast<PointerType>(StartPtr->getType())->getElementType();
- Alignment = DL->getABITypeAlignment(EltType);
+ Alignment = DL.getABITypeAlignment(EltType);
}
AMemSet =
@@ -494,8 +493,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
if (!SI->isSimple()) return false;
-
- if (!DL) return false;
+ const DataLayout &DL = SI->getModule()->getDataLayout();
// Detect cases where we're performing call slot forwarding, but
// happen to be using a load-store pair to implement it, rather than
@@ -525,16 +523,16 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
if (C) {
unsigned storeAlign = SI->getAlignment();
if (!storeAlign)
- storeAlign = DL->getABITypeAlignment(SI->getOperand(0)->getType());
+ storeAlign = DL.getABITypeAlignment(SI->getOperand(0)->getType());
unsigned loadAlign = LI->getAlignment();
if (!loadAlign)
- loadAlign = DL->getABITypeAlignment(LI->getType());
+ loadAlign = DL.getABITypeAlignment(LI->getType());
- bool changed = performCallSlotOptzn(LI,
- SI->getPointerOperand()->stripPointerCasts(),
- LI->getPointerOperand()->stripPointerCasts(),
- DL->getTypeStoreSize(SI->getOperand(0)->getType()),
- std::min(storeAlign, loadAlign), C);
+ bool changed = performCallSlotOptzn(
+ LI, SI->getPointerOperand()->stripPointerCasts(),
+ LI->getPointerOperand()->stripPointerCasts(),
+ DL.getTypeStoreSize(SI->getOperand(0)->getType()),
+ std::min(storeAlign, loadAlign), C);
if (changed) {
MD->removeInstruction(SI);
SI->eraseFromParent();
@@ -606,15 +604,13 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
if (!srcAlloca)
return false;
- // Check that all of src is copied to dest.
- if (!DL) return false;
-
ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
if (!srcArraySize)
return false;
- uint64_t srcSize = DL->getTypeAllocSize(srcAlloca->getAllocatedType()) *
- srcArraySize->getZExtValue();
+ const DataLayout &DL = cpy->getModule()->getDataLayout();
+ uint64_t srcSize = DL.getTypeAllocSize(srcAlloca->getAllocatedType()) *
+ srcArraySize->getZExtValue();
if (cpyLen < srcSize)
return false;
@@ -628,8 +624,8 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
if (!destArraySize)
return false;
- uint64_t destSize = DL->getTypeAllocSize(A->getAllocatedType()) *
- destArraySize->getZExtValue();
+ uint64_t destSize = DL.getTypeAllocSize(A->getAllocatedType()) *
+ destArraySize->getZExtValue();
if (destSize < srcSize)
return false;
@@ -648,7 +644,7 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
return false;
}
- uint64_t destSize = DL->getTypeAllocSize(StructTy);
+ uint64_t destSize = DL.getTypeAllocSize(StructTy);
if (destSize < srcSize)
return false;
}
@@ -659,7 +655,7 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
// Check that dest points to memory that is at least as aligned as src.
unsigned srcAlign = srcAlloca->getAlignment();
if (!srcAlign)
- srcAlign = DL->getABITypeAlignment(srcAlloca->getAllocatedType());
+ srcAlign = DL.getABITypeAlignment(srcAlloca->getAllocatedType());
bool isDestSufficientlyAligned = srcAlign <= cpyAlign;
// If dest is not aligned enough and we can't increase its alignment then
// bail out.
@@ -769,10 +765,9 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
/// processMemCpyMemCpyDependence - We've found that the (upward scanning)
/// memory dependence of memcpy 'M' is the memcpy 'MDep'. Try to simplify M to
-/// copy from MDep's input if we can. MSize is the size of M's copy.
+/// copy from MDep's input if we can.
///
-bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
- uint64_t MSize) {
+bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep) {
// We can only transforms memcpy's where the dest of one is the source of the
// other.
if (M->getSource() != MDep->getDest() || MDep->isVolatile())
@@ -844,6 +839,103 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
return true;
}
+/// We've found that the (upward scanning) memory dependence of \p MemCpy is
+/// \p MemSet. Try to simplify \p MemSet to only set the trailing bytes that
+/// weren't copied over by \p MemCpy.
+///
+/// In other words, transform:
+/// \code
+/// memset(dst, c, dst_size);
+/// memcpy(dst, src, src_size);
+/// \endcode
+/// into:
+/// \code
+/// memcpy(dst, src, src_size);
+/// memset(dst + src_size, c, dst_size <= src_size ? 0 : dst_size - src_size);
+/// \endcode
+bool MemCpyOpt::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
+ MemSetInst *MemSet) {
+ // We can only transform memset/memcpy with the same destination.
+ if (MemSet->getDest() != MemCpy->getDest())
+ return false;
+
+ // Check that there are no other dependencies on the memset destination.
+ MemDepResult DstDepInfo =
+ MD->getPointerDependencyFrom(AliasAnalysis::getLocationForDest(MemSet),
+ false, MemCpy, MemCpy->getParent());
+ if (DstDepInfo.getInst() != MemSet)
+ return false;
+
+ // Use the same i8* dest as the memcpy, killing the memset dest if different.
+ Value *Dest = MemCpy->getRawDest();
+ Value *DestSize = MemSet->getLength();
+ Value *SrcSize = MemCpy->getLength();
+
+ // By default, create an unaligned memset.
+ unsigned Align = 1;
+ // If Dest is aligned, and SrcSize is constant, use the minimum alignment
+ // of the sum.
+ const unsigned DestAlign =
+ std::max(MemSet->getAlignment(), MemCpy->getAlignment());
+ if (DestAlign > 1)
+ if (ConstantInt *SrcSizeC = dyn_cast<ConstantInt>(SrcSize))
+ Align = MinAlign(SrcSizeC->getZExtValue(), DestAlign);
+
+ IRBuilder<> Builder(MemCpy);
+
+ // If the sizes have different types, zext the smaller one.
+ if (DestSize->getType() != SrcSize->getType()) {
+ if (DestSize->getType()->getIntegerBitWidth() >
+ SrcSize->getType()->getIntegerBitWidth())
+ SrcSize = Builder.CreateZExt(SrcSize, DestSize->getType());
+ else
+ DestSize = Builder.CreateZExt(DestSize, SrcSize->getType());
+ }
+
+ Value *MemsetLen =
+ Builder.CreateSelect(Builder.CreateICmpULE(DestSize, SrcSize),
+ ConstantInt::getNullValue(DestSize->getType()),
+ Builder.CreateSub(DestSize, SrcSize));
+ Builder.CreateMemSet(Builder.CreateGEP(Dest, SrcSize), MemSet->getOperand(1),
+ MemsetLen, Align);
+
+ MD->removeInstruction(MemSet);
+ MemSet->eraseFromParent();
+ return true;
+}
+
+/// Transform memcpy to memset when its source was just memset.
+/// In other words, turn:
+/// \code
+/// memset(dst1, c, dst1_size);
+/// memcpy(dst2, dst1, dst2_size);
+/// \endcode
+/// into:
+/// \code
+/// memset(dst1, c, dst1_size);
+/// memset(dst2, c, dst2_size);
+/// \endcode
+/// When dst2_size <= dst1_size.
+///
+/// The \p MemCpy must have a Constant length.
+bool MemCpyOpt::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
+ MemSetInst *MemSet) {
+ // This only makes sense on memcpy(..., memset(...), ...).
+ if (MemSet->getRawDest() != MemCpy->getRawSource())
+ return false;
+
+ ConstantInt *CopySize = cast<ConstantInt>(MemCpy->getLength());
+ ConstantInt *MemSetSize = dyn_cast<ConstantInt>(MemSet->getLength());
+ // Make sure the memcpy doesn't read any more than what the memset wrote.
+ // Don't worry about sizes larger than i64.
+ if (!MemSetSize || CopySize->getZExtValue() > MemSetSize->getZExtValue())
+ return false;
+
+ IRBuilder<> Builder(MemCpy);
+ Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1),
+ CopySize, MemCpy->getAlignment());
+ return true;
+}
/// processMemCpy - perform simplification of memcpy's. If we have memcpy A
/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
@@ -874,17 +966,26 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
return true;
}
+ MemDepResult DepInfo = MD->getDependency(M);
+
+ // Try to turn a partially redundant memset + memcpy into
+ // memcpy + smaller memset. We don't need the memcpy size for this.
+ if (DepInfo.isClobber())
+ if (MemSetInst *MDep = dyn_cast<MemSetInst>(DepInfo.getInst()))
+ if (processMemSetMemCpyDependence(M, MDep))
+ return true;
+
// The optimizations after this point require the memcpy size.
ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength());
if (!CopySize) return false;
- // The are three possible optimizations we can do for memcpy:
+ // There are four possible optimizations we can do for memcpy:
// a) memcpy-memcpy xform which exposes redundance for DSE.
// b) call-memcpy xform for return slot optimization.
// c) memcpy from freshly alloca'd space or space that has just started its
// lifetime copies undefined data, and we can therefore eliminate the
// memcpy in favor of the data that was already at the destination.
- MemDepResult DepInfo = MD->getDependency(M);
+ // d) memcpy from a just-memset'd source can be turned into memset.
if (DepInfo.isClobber()) {
if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) {
if (performCallSlotOptzn(M, M->getDest(), M->getSource(),
@@ -900,9 +1001,10 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
AliasAnalysis::Location SrcLoc = AliasAnalysis::getLocationForSource(M);
MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(SrcLoc, true,
M, M->getParent());
+
if (SrcDepInfo.isClobber()) {
if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst()))
- return processMemCpyMemCpyDependence(M, MDep, CopySize->getZExtValue());
+ return processMemCpyMemCpyDependence(M, MDep);
} else if (SrcDepInfo.isDef()) {
Instruction *I = SrcDepInfo.getInst();
bool hasUndefContents = false;
@@ -924,6 +1026,15 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
}
}
+ if (SrcDepInfo.isClobber())
+ if (MemSetInst *MDep = dyn_cast<MemSetInst>(SrcDepInfo.getInst()))
+ if (performMemCpyToMemSetOptzn(M, MDep)) {
+ MD->removeInstruction(M);
+ M->eraseFromParent();
+ ++NumCpyToSet;
+ return true;
+ }
+
return false;
}
@@ -959,12 +1070,11 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) {
/// processByValArgument - This is called on every byval argument in call sites.
bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
- if (!DL) return false;
-
+ const DataLayout &DL = CS.getCaller()->getParent()->getDataLayout();
// Find out what feeds this byval argument.
Value *ByValArg = CS.getArgument(ArgNo);
Type *ByValTy = cast<PointerType>(ByValArg->getType())->getElementType();
- uint64_t ByValSize = DL->getTypeAllocSize(ByValTy);
+ uint64_t ByValSize = DL.getTypeAllocSize(ByValTy);
MemDepResult DepInfo =
MD->getPointerDependencyFrom(AliasAnalysis::Location(ByValArg, ByValSize),
true, CS.getInstruction(),
@@ -997,8 +1107,8 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
*CS->getParent()->getParent());
DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
if (MDep->getAlignment() < ByValAlign &&
- getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL, &AC,
- CS.getInstruction(), &DT) < ByValAlign)
+ getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL,
+ CS.getInstruction(), &AC, &DT) < ByValAlign)
return false;
// Verify that the copied-from memory doesn't change in between the memcpy and
@@ -1051,7 +1161,7 @@ bool MemCpyOpt::iterateOnFunction(Function &F) {
RepeatInstruction = processMemCpy(M);
else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I))
RepeatInstruction = processMemMove(M);
- else if (CallSite CS = (Value*)I) {
+ else if (auto CS = CallSite(I)) {
for (unsigned i = 0, e = CS.arg_size(); i != e; ++i)
if (CS.isByValArgument(i))
MadeChange |= processByValArgument(CS, i);
@@ -1077,9 +1187,7 @@ bool MemCpyOpt::runOnFunction(Function &F) {
bool MadeChange = false;
MD = &getAnalysis<MemoryDependenceAnalysis>();
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
- TLI = &getAnalysis<TargetLibraryInfo>();
+ TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
// If we don't have at least memset and memcpy, there is little point of doing
// anything here. These are required by a freestanding implementation, so if
diff --git a/contrib/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/contrib/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
index 1f73cbc..611a941 100644
--- a/contrib/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
@@ -81,12 +81,13 @@
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include <vector>
@@ -115,9 +116,9 @@ public:
private:
// This transformation requires dominator postdominator info
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<TargetLibraryInfo>();
- AU.addRequired<MemoryDependenceAnalysis>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<AliasAnalysis>();
+ AU.addPreserved<MemoryDependenceAnalysis>();
AU.addPreserved<AliasAnalysis>();
}
@@ -168,7 +169,7 @@ FunctionPass *llvm::createMergedLoadStoreMotionPass() {
INITIALIZE_PASS_BEGIN(MergedLoadStoreMotion, "mldst-motion",
"MergedLoadStoreMotion", false, false)
INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_END(MergedLoadStoreMotion, "mldst-motion",
"MergedLoadStoreMotion", false, false)
@@ -579,7 +580,7 @@ bool MergedLoadStoreMotion::mergeStores(BasicBlock *T) {
/// \brief Run the transformation for each function
///
bool MergedLoadStoreMotion::runOnFunction(Function &F) {
- MD = &getAnalysis<MemoryDependenceAnalysis>();
+ MD = getAnalysisIfAvailable<MemoryDependenceAnalysis>();
AA = &getAnalysis<AliasAnalysis>();
bool Changed = false;
diff --git a/contrib/llvm/lib/Transforms/Scalar/NaryReassociate.cpp b/contrib/llvm/lib/Transforms/Scalar/NaryReassociate.cpp
new file mode 100644
index 0000000..5b370e0
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/NaryReassociate.cpp
@@ -0,0 +1,481 @@
+//===- NaryReassociate.cpp - Reassociate n-ary expressions ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass reassociates n-ary add expressions and eliminates the redundancy
+// exposed by the reassociation.
+//
+// A motivating example:
+//
+// void foo(int a, int b) {
+// bar(a + b);
+// bar((a + 2) + b);
+// }
+//
+// An ideal compiler should reassociate (a + 2) + b to (a + b) + 2 and simplify
+// the above code to
+//
+// int t = a + b;
+// bar(t);
+// bar(t + 2);
+//
+// However, the Reassociate pass is unable to do that because it processes each
+// instruction individually and believes (a + 2) + b is the best form according
+// to its rank system.
+//
+// To address this limitation, NaryReassociate reassociates an expression in a
+// form that reuses existing instructions. As a result, NaryReassociate can
+// reassociate (a + 2) + b in the example to (a + b) + 2 because it detects that
+// (a + b) is computed before.
+//
+// NaryReassociate works as follows. For every instruction in the form of (a +
+// b) + c, it checks whether a + c or b + c is already computed by a dominating
+// instruction. If so, it then reassociates (a + b) + c into (a + c) + b or (b +
+// c) + a and removes the redundancy accordingly. To efficiently look up whether
+// an expression is computed before, we store each instruction seen and its SCEV
+// into an SCEV-to-instruction map.
+//
+// Although the algorithm pattern-matches only ternary additions, it
+// automatically handles many >3-ary expressions by walking through the function
+// in the depth-first order. For example, given
+//
+// (a + c) + d
+// ((a + b) + c) + d
+//
+// NaryReassociate first rewrites (a + b) + c to (a + c) + b, and then rewrites
+// ((a + c) + b) + d into ((a + c) + d) + b.
+//
+// Finally, the above dominator-based algorithm may need to be run multiple
+// iterations before emitting optimal code. One source of this need is that we
+// only split an operand when it is used only once. The above algorithm can
+// eliminate an instruction and decrease the usage count of its operands. As a
+// result, an instruction that previously had multiple uses may become a
+// single-use instruction and thus eligible for split consideration. For
+// example,
+//
+// ac = a + c
+// ab = a + b
+// abc = ab + c
+// ab2 = ab + b
+// ab2c = ab2 + c
+//
+// In the first iteration, we cannot reassociate abc to ac+b because ab is used
+// twice. However, we can reassociate ab2c to abc+b in the first iteration. As a
+// result, ab2 becomes dead and ab will be used only once in the second
+// iteration.
+//
+// Limitations and TODO items:
+//
+// 1) We only considers n-ary adds for now. This should be extended and
+// generalized.
+//
+// 2) Besides arithmetic operations, similar reassociation can be applied to
+// GEPs. For example, if
+// X = &arr[a]
+// dominates
+// Y = &arr[a + b]
+// we may rewrite Y into X + b.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+using namespace PatternMatch;
+
+#define DEBUG_TYPE "nary-reassociate"
+
+namespace {
+class NaryReassociate : public FunctionPass {
+public:
+ static char ID;
+
+ NaryReassociate(): FunctionPass(ID) {
+ initializeNaryReassociatePass(*PassRegistry::getPassRegistry());
+ }
+
+ bool doInitialization(Module &M) override {
+ DL = &M.getDataLayout();
+ return false;
+ }
+ bool runOnFunction(Function &F) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addPreserved<ScalarEvolution>();
+ AU.addPreserved<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<ScalarEvolution>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.setPreservesCFG();
+ }
+
+private:
+ // Runs only one iteration of the dominator-based algorithm. See the header
+ // comments for why we need multiple iterations.
+ bool doOneIteration(Function &F);
+
+ // Reassociates I for better CSE.
+ Instruction *tryReassociate(Instruction *I);
+
+ // Reassociate GEP for better CSE.
+ Instruction *tryReassociateGEP(GetElementPtrInst *GEP);
+ // Try splitting GEP at the I-th index and see whether either part can be
+ // CSE'ed. This is a helper function for tryReassociateGEP.
+ //
+ // \p IndexedType The element type indexed by GEP's I-th index. This is
+ // equivalent to
+ // GEP->getIndexedType(GEP->getPointerOperand(), 0-th index,
+ // ..., i-th index).
+ GetElementPtrInst *tryReassociateGEPAtIndex(GetElementPtrInst *GEP,
+ unsigned I, Type *IndexedType);
+ // Given GEP's I-th index = LHS + RHS, see whether &Base[..][LHS][..] or
+ // &Base[..][RHS][..] can be CSE'ed and rewrite GEP accordingly.
+ GetElementPtrInst *tryReassociateGEPAtIndex(GetElementPtrInst *GEP,
+ unsigned I, Value *LHS,
+ Value *RHS, Type *IndexedType);
+
+ // Reassociate Add for better CSE.
+ Instruction *tryReassociateAdd(BinaryOperator *I);
+ // A helper function for tryReassociateAdd. LHS and RHS are explicitly passed.
+ Instruction *tryReassociateAdd(Value *LHS, Value *RHS, Instruction *I);
+ // Rewrites I to LHS + RHS if LHS is computed already.
+ Instruction *tryReassociatedAdd(const SCEV *LHS, Value *RHS, Instruction *I);
+
+ // Returns the closest dominator of \c Dominatee that computes
+ // \c CandidateExpr. Returns null if not found.
+ Instruction *findClosestMatchingDominator(const SCEV *CandidateExpr,
+ Instruction *Dominatee);
+ // GetElementPtrInst implicitly sign-extends an index if the index is shorter
+ // than the pointer size. This function returns whether Index is shorter than
+ // GEP's pointer size, i.e., whether Index needs to be sign-extended in order
+ // to be an index of GEP.
+ bool requiresSignExtension(Value *Index, GetElementPtrInst *GEP);
+
+ DominatorTree *DT;
+ ScalarEvolution *SE;
+ TargetLibraryInfo *TLI;
+ TargetTransformInfo *TTI;
+ const DataLayout *DL;
+ // A lookup table quickly telling which instructions compute the given SCEV.
+ // Note that there can be multiple instructions at different locations
+ // computing to the same SCEV, so we map a SCEV to an instruction list. For
+ // example,
+ //
+ // if (p1)
+ // foo(a + b);
+ // if (p2)
+ // bar(a + b);
+ DenseMap<const SCEV *, SmallVector<Instruction *, 2>> SeenExprs;
+};
+} // anonymous namespace
+
+char NaryReassociate::ID = 0;
+INITIALIZE_PASS_BEGIN(NaryReassociate, "nary-reassociate", "Nary reassociation",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_END(NaryReassociate, "nary-reassociate", "Nary reassociation",
+ false, false)
+
+FunctionPass *llvm::createNaryReassociatePass() {
+ return new NaryReassociate();
+}
+
+bool NaryReassociate::runOnFunction(Function &F) {
+ if (skipOptnoneFunction(F))
+ return false;
+
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ SE = &getAnalysis<ScalarEvolution>();
+ TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+
+ bool Changed = false, ChangedInThisIteration;
+ do {
+ ChangedInThisIteration = doOneIteration(F);
+ Changed |= ChangedInThisIteration;
+ } while (ChangedInThisIteration);
+ return Changed;
+}
+
+// Whitelist the instruction types NaryReassociate handles for now.
+static bool isPotentiallyNaryReassociable(Instruction *I) {
+ switch (I->getOpcode()) {
+ case Instruction::Add:
+ case Instruction::GetElementPtr:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool NaryReassociate::doOneIteration(Function &F) {
+ bool Changed = false;
+ SeenExprs.clear();
+ // Process the basic blocks in pre-order of the dominator tree. This order
+ // ensures that all bases of a candidate are in Candidates when we process it.
+ for (auto Node = GraphTraits<DominatorTree *>::nodes_begin(DT);
+ Node != GraphTraits<DominatorTree *>::nodes_end(DT); ++Node) {
+ BasicBlock *BB = Node->getBlock();
+ for (auto I = BB->begin(); I != BB->end(); ++I) {
+ if (SE->isSCEVable(I->getType()) && isPotentiallyNaryReassociable(I)) {
+ if (Instruction *NewI = tryReassociate(I)) {
+ Changed = true;
+ SE->forgetValue(I);
+ I->replaceAllUsesWith(NewI);
+ RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
+ I = NewI;
+ }
+ // Add the rewritten instruction to SeenExprs; the original instruction
+ // is deleted.
+ SeenExprs[SE->getSCEV(I)].push_back(I);
+ }
+ }
+ }
+ return Changed;
+}
+
+Instruction *NaryReassociate::tryReassociate(Instruction *I) {
+ switch (I->getOpcode()) {
+ case Instruction::Add:
+ return tryReassociateAdd(cast<BinaryOperator>(I));
+ case Instruction::GetElementPtr:
+ return tryReassociateGEP(cast<GetElementPtrInst>(I));
+ default:
+ llvm_unreachable("should be filtered out by isPotentiallyNaryReassociable");
+ }
+}
+
+// FIXME: extract this method into TTI->getGEPCost.
+static bool isGEPFoldable(GetElementPtrInst *GEP,
+ const TargetTransformInfo *TTI,
+ const DataLayout *DL) {
+ GlobalVariable *BaseGV = nullptr;
+ int64_t BaseOffset = 0;
+ bool HasBaseReg = false;
+ int64_t Scale = 0;
+
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getPointerOperand()))
+ BaseGV = GV;
+ else
+ HasBaseReg = true;
+
+ gep_type_iterator GTI = gep_type_begin(GEP);
+ for (auto I = GEP->idx_begin(); I != GEP->idx_end(); ++I, ++GTI) {
+ if (isa<SequentialType>(*GTI)) {
+ int64_t ElementSize = DL->getTypeAllocSize(GTI.getIndexedType());
+ if (ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I)) {
+ BaseOffset += ConstIdx->getSExtValue() * ElementSize;
+ } else {
+ // Needs scale register.
+ if (Scale != 0) {
+ // No addressing mode takes two scale registers.
+ return false;
+ }
+ Scale = ElementSize;
+ }
+ } else {
+ StructType *STy = cast<StructType>(*GTI);
+ uint64_t Field = cast<ConstantInt>(*I)->getZExtValue();
+ BaseOffset += DL->getStructLayout(STy)->getElementOffset(Field);
+ }
+ }
+ return TTI->isLegalAddressingMode(GEP->getType()->getElementType(), BaseGV,
+ BaseOffset, HasBaseReg, Scale);
+}
+
+Instruction *NaryReassociate::tryReassociateGEP(GetElementPtrInst *GEP) {
+ // Not worth reassociating GEP if it is foldable.
+ if (isGEPFoldable(GEP, TTI, DL))
+ return nullptr;
+
+ gep_type_iterator GTI = gep_type_begin(*GEP);
+ for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I) {
+ if (isa<SequentialType>(*GTI++)) {
+ if (auto *NewGEP = tryReassociateGEPAtIndex(GEP, I - 1, *GTI)) {
+ return NewGEP;
+ }
+ }
+ }
+ return nullptr;
+}
+
+bool NaryReassociate::requiresSignExtension(Value *Index,
+ GetElementPtrInst *GEP) {
+ unsigned PointerSizeInBits =
+ DL->getPointerSizeInBits(GEP->getType()->getPointerAddressSpace());
+ return cast<IntegerType>(Index->getType())->getBitWidth() < PointerSizeInBits;
+}
+
+GetElementPtrInst *
+NaryReassociate::tryReassociateGEPAtIndex(GetElementPtrInst *GEP, unsigned I,
+ Type *IndexedType) {
+ Value *IndexToSplit = GEP->getOperand(I + 1);
+ if (SExtInst *SExt = dyn_cast<SExtInst>(IndexToSplit))
+ IndexToSplit = SExt->getOperand(0);
+
+ if (AddOperator *AO = dyn_cast<AddOperator>(IndexToSplit)) {
+ // If the I-th index needs sext and the underlying add is not equipped with
+ // nsw, we cannot split the add because
+ // sext(LHS + RHS) != sext(LHS) + sext(RHS).
+ if (requiresSignExtension(IndexToSplit, GEP) && !AO->hasNoSignedWrap())
+ return nullptr;
+ Value *LHS = AO->getOperand(0), *RHS = AO->getOperand(1);
+ // IndexToSplit = LHS + RHS.
+ if (auto *NewGEP = tryReassociateGEPAtIndex(GEP, I, LHS, RHS, IndexedType))
+ return NewGEP;
+ // Symmetrically, try IndexToSplit = RHS + LHS.
+ if (LHS != RHS) {
+ if (auto *NewGEP =
+ tryReassociateGEPAtIndex(GEP, I, RHS, LHS, IndexedType))
+ return NewGEP;
+ }
+ }
+ return nullptr;
+}
+
+GetElementPtrInst *
+NaryReassociate::tryReassociateGEPAtIndex(GetElementPtrInst *GEP, unsigned I,
+ Value *LHS, Value *RHS,
+ Type *IndexedType) {
+ // Look for GEP's closest dominator that has the same SCEV as GEP except that
+ // the I-th index is replaced with LHS.
+ SmallVector<const SCEV *, 4> IndexExprs;
+ for (auto Index = GEP->idx_begin(); Index != GEP->idx_end(); ++Index)
+ IndexExprs.push_back(SE->getSCEV(*Index));
+ // Replace the I-th index with LHS.
+ IndexExprs[I] = SE->getSCEV(LHS);
+ const SCEV *CandidateExpr = SE->getGEPExpr(
+ GEP->getSourceElementType(), SE->getSCEV(GEP->getPointerOperand()),
+ IndexExprs, GEP->isInBounds());
+
+ auto *Candidate = findClosestMatchingDominator(CandidateExpr, GEP);
+ if (Candidate == nullptr)
+ return nullptr;
+
+ PointerType *TypeOfCandidate = dyn_cast<PointerType>(Candidate->getType());
+ // Pretty rare but theoretically possible when a numeric value happens to
+ // share CandidateExpr.
+ if (TypeOfCandidate == nullptr)
+ return nullptr;
+
+ // NewGEP = (char *)Candidate + RHS * sizeof(IndexedType)
+ uint64_t IndexedSize = DL->getTypeAllocSize(IndexedType);
+ Type *ElementType = TypeOfCandidate->getElementType();
+ uint64_t ElementSize = DL->getTypeAllocSize(ElementType);
+ // Another less rare case: because I is not necessarily the last index of the
+ // GEP, the size of the type at the I-th index (IndexedSize) is not
+ // necessarily divisible by ElementSize. For example,
+ //
+ // #pragma pack(1)
+ // struct S {
+ // int a[3];
+ // int64 b[8];
+ // };
+ // #pragma pack()
+ //
+ // sizeof(S) = 100 is indivisible by sizeof(int64) = 8.
+ //
+ // TODO: bail out on this case for now. We could emit uglygep.
+ if (IndexedSize % ElementSize != 0)
+ return nullptr;
+
+ // NewGEP = &Candidate[RHS * (sizeof(IndexedType) / sizeof(Candidate[0])));
+ IRBuilder<> Builder(GEP);
+ Type *IntPtrTy = DL->getIntPtrType(TypeOfCandidate);
+ if (RHS->getType() != IntPtrTy)
+ RHS = Builder.CreateSExtOrTrunc(RHS, IntPtrTy);
+ if (IndexedSize != ElementSize) {
+ RHS = Builder.CreateMul(
+ RHS, ConstantInt::get(IntPtrTy, IndexedSize / ElementSize));
+ }
+ GetElementPtrInst *NewGEP =
+ cast<GetElementPtrInst>(Builder.CreateGEP(Candidate, RHS));
+ NewGEP->setIsInBounds(GEP->isInBounds());
+ NewGEP->takeName(GEP);
+ return NewGEP;
+}
+
+Instruction *NaryReassociate::tryReassociateAdd(BinaryOperator *I) {
+ Value *LHS = I->getOperand(0), *RHS = I->getOperand(1);
+ if (auto *NewI = tryReassociateAdd(LHS, RHS, I))
+ return NewI;
+ if (auto *NewI = tryReassociateAdd(RHS, LHS, I))
+ return NewI;
+ return nullptr;
+}
+
+Instruction *NaryReassociate::tryReassociateAdd(Value *LHS, Value *RHS,
+ Instruction *I) {
+ Value *A = nullptr, *B = nullptr;
+ // To be conservative, we reassociate I only when it is the only user of A+B.
+ if (LHS->hasOneUse() && match(LHS, m_Add(m_Value(A), m_Value(B)))) {
+ // I = (A + B) + RHS
+ // = (A + RHS) + B or (B + RHS) + A
+ const SCEV *AExpr = SE->getSCEV(A), *BExpr = SE->getSCEV(B);
+ const SCEV *RHSExpr = SE->getSCEV(RHS);
+ if (BExpr != RHSExpr) {
+ if (auto *NewI = tryReassociatedAdd(SE->getAddExpr(AExpr, RHSExpr), B, I))
+ return NewI;
+ }
+ if (AExpr != RHSExpr) {
+ if (auto *NewI = tryReassociatedAdd(SE->getAddExpr(BExpr, RHSExpr), A, I))
+ return NewI;
+ }
+ }
+ return nullptr;
+}
+
+Instruction *NaryReassociate::tryReassociatedAdd(const SCEV *LHSExpr,
+ Value *RHS, Instruction *I) {
+ auto Pos = SeenExprs.find(LHSExpr);
+ // Bail out if LHSExpr is not previously seen.
+ if (Pos == SeenExprs.end())
+ return nullptr;
+
+ // Look for the closest dominator LHS of I that computes LHSExpr, and replace
+ // I with LHS + RHS.
+ auto *LHS = findClosestMatchingDominator(LHSExpr, I);
+ if (LHS == nullptr)
+ return nullptr;
+
+ Instruction *NewI = BinaryOperator::CreateAdd(LHS, RHS, "", I);
+ NewI->takeName(I);
+ return NewI;
+}
+
+Instruction *
+NaryReassociate::findClosestMatchingDominator(const SCEV *CandidateExpr,
+ Instruction *Dominatee) {
+ auto Pos = SeenExprs.find(CandidateExpr);
+ if (Pos == SeenExprs.end())
+ return nullptr;
+
+ auto &Candidates = Pos->second;
+ // Because we process the basic blocks in pre-order of the dominator tree, a
+ // candidate that doesn't dominate the current instruction won't dominate any
+ // future instruction either. Therefore, we pop it out of the stack. This
+ // optimization makes the algorithm O(n).
+ while (!Candidates.empty()) {
+ Instruction *Candidate = Candidates.back();
+ if (DT->dominates(Candidate, Dominatee))
+ return Candidate;
+ Candidates.pop_back();
+ }
+ return nullptr;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp b/contrib/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
index 5c8bed5..31d7df3 100644
--- a/contrib/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
@@ -18,7 +18,7 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -52,16 +52,18 @@ INITIALIZE_PASS(PartiallyInlineLibCalls, "partially-inline-libcalls",
"Partially inline calls to library functions", false, false)
void PartiallyInlineLibCalls::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<TargetLibraryInfo>();
- AU.addRequired<TargetTransformInfo>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
FunctionPass::getAnalysisUsage(AU);
}
bool PartiallyInlineLibCalls::runOnFunction(Function &F) {
bool Changed = false;
Function::iterator CurrBB;
- TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
- const TargetTransformInfo *TTI = &getAnalysis<TargetTransformInfo>();
+ TargetLibraryInfo *TLI =
+ &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ const TargetTransformInfo *TTI =
+ &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
for (Function::iterator BB = F.begin(), BE = F.end(); BB != BE;) {
CurrBB = BB++;
@@ -126,7 +128,7 @@ bool PartiallyInlineLibCalls::optimizeSQRT(CallInst *Call,
// Move all instructions following Call to newly created block JoinBB.
// Create phi and replace all uses.
- BasicBlock *JoinBB = llvm::SplitBlock(&CurrBB, Call->getNextNode(), this);
+ BasicBlock *JoinBB = llvm::SplitBlock(&CurrBB, Call->getNextNode());
IRBuilder<> Builder(JoinBB, JoinBB->begin());
PHINode *Phi = Builder.CreatePHI(Call->getType(), 2);
Call->replaceAllUsesWith(Phi);
diff --git a/contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp b/contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
new file mode 100644
index 0000000..3e7deeb
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
@@ -0,0 +1,993 @@
+//===- PlaceSafepoints.cpp - Place GC Safepoints --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Place garbage collection safepoints at appropriate locations in the IR. This
+// does not make relocation semantics or variable liveness explicit. That's
+// done by RewriteStatepointsForGC.
+//
+// Terminology:
+// - A call is said to be "parseable" if there is a stack map generated for the
+// return PC of the call. A runtime can determine where values listed in the
+// deopt arguments and (after RewriteStatepointsForGC) gc arguments are located
+// on the stack when the code is suspended inside such a call. Every parse
+// point is represented by a call wrapped in an gc.statepoint intrinsic.
+// - A "poll" is an explicit check in the generated code to determine if the
+// runtime needs the generated code to cooperate by calling a helper routine
+// and thus suspending its execution at a known state. The call to the helper
+// routine will be parseable. The (gc & runtime specific) logic of a poll is
+// assumed to be provided in a function of the name "gc.safepoint_poll".
+//
+// We aim to insert polls such that running code can quickly be brought to a
+// well defined state for inspection by the collector. In the current
+// implementation, this is done via the insertion of poll sites at method entry
+// and the backedge of most loops. We try to avoid inserting more polls than
+// are neccessary to ensure a finite period between poll sites. This is not
+// because the poll itself is expensive in the generated code; it's not. Polls
+// do tend to impact the optimizer itself in negative ways; we'd like to avoid
+// perturbing the optimization of the method as much as we can.
+//
+// We also need to make most call sites parseable. The callee might execute a
+// poll (or otherwise be inspected by the GC). If so, the entire stack
+// (including the suspended frame of the current method) must be parseable.
+//
+// This pass will insert:
+// - Call parse points ("call safepoints") for any call which may need to
+// reach a safepoint during the execution of the callee function.
+// - Backedge safepoint polls and entry safepoint polls to ensure that
+// executing code reaches a safepoint poll in a finite amount of time.
+//
+// We do not currently support return statepoints, but adding them would not
+// be hard. They are not required for correctness - entry safepoints are an
+// alternative - but some GCs may prefer them. Patches welcome.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Pass.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Statepoint.h"
+#include "llvm/IR/Value.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+
+#define DEBUG_TYPE "safepoint-placement"
+STATISTIC(NumEntrySafepoints, "Number of entry safepoints inserted");
+STATISTIC(NumCallSafepoints, "Number of call safepoints inserted");
+STATISTIC(NumBackedgeSafepoints, "Number of backedge safepoints inserted");
+
+STATISTIC(CallInLoop, "Number of loops w/o safepoints due to calls in loop");
+STATISTIC(FiniteExecution, "Number of loops w/o safepoints finite execution");
+
+using namespace llvm;
+
+// Ignore oppurtunities to avoid placing safepoints on backedges, useful for
+// validation
+static cl::opt<bool> AllBackedges("spp-all-backedges", cl::Hidden,
+ cl::init(false));
+
+/// If true, do not place backedge safepoints in counted loops.
+static cl::opt<bool> SkipCounted("spp-counted", cl::Hidden, cl::init(true));
+
+// If true, split the backedge of a loop when placing the safepoint, otherwise
+// split the latch block itself. Both are useful to support for
+// experimentation, but in practice, it looks like splitting the backedge
+// optimizes better.
+static cl::opt<bool> SplitBackedge("spp-split-backedge", cl::Hidden,
+ cl::init(false));
+
+// Print tracing output
+static cl::opt<bool> TraceLSP("spp-trace", cl::Hidden, cl::init(false));
+
+namespace {
+
+/// An analysis pass whose purpose is to identify each of the backedges in
+/// the function which require a safepoint poll to be inserted.
+struct PlaceBackedgeSafepointsImpl : public FunctionPass {
+ static char ID;
+
+ /// The output of the pass - gives a list of each backedge (described by
+ /// pointing at the branch) which need a poll inserted.
+ std::vector<TerminatorInst *> PollLocations;
+
+ /// True unless we're running spp-no-calls in which case we need to disable
+ /// the call dependend placement opts.
+ bool CallSafepointsEnabled;
+
+ ScalarEvolution *SE = nullptr;
+ DominatorTree *DT = nullptr;
+ LoopInfo *LI = nullptr;
+
+ PlaceBackedgeSafepointsImpl(bool CallSafepoints = false)
+ : FunctionPass(ID), CallSafepointsEnabled(CallSafepoints) {
+ initializePlaceBackedgeSafepointsImplPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnLoop(Loop *);
+ void runOnLoopAndSubLoops(Loop *L) {
+ // Visit all the subloops
+ for (auto I = L->begin(), E = L->end(); I != E; I++)
+ runOnLoopAndSubLoops(*I);
+ runOnLoop(L);
+ }
+
+ bool runOnFunction(Function &F) override {
+ SE = &getAnalysis<ScalarEvolution>();
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ for (auto I = LI->begin(), E = LI->end(); I != E; I++) {
+ runOnLoopAndSubLoops(*I);
+ }
+ return false;
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<ScalarEvolution>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ // We no longer modify the IR at all in this pass. Thus all
+ // analysis are preserved.
+ AU.setPreservesAll();
+ }
+};
+}
+
+static cl::opt<bool> NoEntry("spp-no-entry", cl::Hidden, cl::init(false));
+static cl::opt<bool> NoCall("spp-no-call", cl::Hidden, cl::init(false));
+static cl::opt<bool> NoBackedge("spp-no-backedge", cl::Hidden, cl::init(false));
+
+namespace {
+struct PlaceSafepoints : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+
+ PlaceSafepoints() : FunctionPass(ID) {
+ initializePlaceSafepointsPass(*PassRegistry::getPassRegistry());
+ }
+ bool runOnFunction(Function &F) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ // We modify the graph wholesale (inlining, block insertion, etc). We
+ // preserve nothing at the moment. We could potentially preserve dom tree
+ // if that was worth doing
+ }
+};
+}
+
+// Insert a safepoint poll immediately before the given instruction. Does
+// not handle the parsability of state at the runtime call, that's the
+// callers job.
+static void
+InsertSafepointPoll(Instruction *InsertBefore,
+ std::vector<CallSite> &ParsePointsNeeded /*rval*/);
+
+static bool isGCLeafFunction(const CallSite &CS);
+
+static bool needsStatepoint(const CallSite &CS) {
+ if (isGCLeafFunction(CS))
+ return false;
+ if (CS.isCall()) {
+ CallInst *call = cast<CallInst>(CS.getInstruction());
+ if (call->isInlineAsm())
+ return false;
+ }
+ if (isStatepoint(CS) || isGCRelocate(CS) || isGCResult(CS)) {
+ return false;
+ }
+ return true;
+}
+
+static Value *ReplaceWithStatepoint(const CallSite &CS, Pass *P);
+
+/// Returns true if this loop is known to contain a call safepoint which
+/// must unconditionally execute on any iteration of the loop which returns
+/// to the loop header via an edge from Pred. Returns a conservative correct
+/// answer; i.e. false is always valid.
+static bool containsUnconditionalCallSafepoint(Loop *L, BasicBlock *Header,
+ BasicBlock *Pred,
+ DominatorTree &DT) {
+ // In general, we're looking for any cut of the graph which ensures
+ // there's a call safepoint along every edge between Header and Pred.
+ // For the moment, we look only for the 'cuts' that consist of a single call
+ // instruction in a block which is dominated by the Header and dominates the
+ // loop latch (Pred) block. Somewhat surprisingly, walking the entire chain
+ // of such dominating blocks gets substaintially more occurences than just
+ // checking the Pred and Header blocks themselves. This may be due to the
+ // density of loop exit conditions caused by range and null checks.
+ // TODO: structure this as an analysis pass, cache the result for subloops,
+ // avoid dom tree recalculations
+ assert(DT.dominates(Header, Pred) && "loop latch not dominated by header?");
+
+ BasicBlock *Current = Pred;
+ while (true) {
+ for (Instruction &I : *Current) {
+ if (auto CS = CallSite(&I))
+ // Note: Technically, needing a safepoint isn't quite the right
+ // condition here. We should instead be checking if the target method
+ // has an
+ // unconditional poll. In practice, this is only a theoretical concern
+ // since we don't have any methods with conditional-only safepoint
+ // polls.
+ if (needsStatepoint(CS))
+ return true;
+ }
+
+ if (Current == Header)
+ break;
+ Current = DT.getNode(Current)->getIDom()->getBlock();
+ }
+
+ return false;
+}
+
+/// Returns true if this loop is known to terminate in a finite number of
+/// iterations. Note that this function may return false for a loop which
+/// does actual terminate in a finite constant number of iterations due to
+/// conservatism in the analysis.
+static bool mustBeFiniteCountedLoop(Loop *L, ScalarEvolution *SE,
+ BasicBlock *Pred) {
+ // Only used when SkipCounted is off
+ const unsigned upperTripBound = 8192;
+
+ // A conservative bound on the loop as a whole.
+ const SCEV *MaxTrips = SE->getMaxBackedgeTakenCount(L);
+ if (MaxTrips != SE->getCouldNotCompute()) {
+ if (SE->getUnsignedRange(MaxTrips).getUnsignedMax().ult(upperTripBound))
+ return true;
+ if (SkipCounted &&
+ SE->getUnsignedRange(MaxTrips).getUnsignedMax().isIntN(32))
+ return true;
+ }
+
+ // If this is a conditional branch to the header with the alternate path
+ // being outside the loop, we can ask questions about the execution frequency
+ // of the exit block.
+ if (L->isLoopExiting(Pred)) {
+ // This returns an exact expression only. TODO: We really only need an
+ // upper bound here, but SE doesn't expose that.
+ const SCEV *MaxExec = SE->getExitCount(L, Pred);
+ if (MaxExec != SE->getCouldNotCompute()) {
+ if (SE->getUnsignedRange(MaxExec).getUnsignedMax().ult(upperTripBound))
+ return true;
+ if (SkipCounted &&
+ SE->getUnsignedRange(MaxExec).getUnsignedMax().isIntN(32))
+ return true;
+ }
+ }
+
+ return /* not finite */ false;
+}
+
+static void scanOneBB(Instruction *start, Instruction *end,
+ std::vector<CallInst *> &calls,
+ std::set<BasicBlock *> &seen,
+ std::vector<BasicBlock *> &worklist) {
+ for (BasicBlock::iterator itr(start);
+ itr != start->getParent()->end() && itr != BasicBlock::iterator(end);
+ itr++) {
+ if (CallInst *CI = dyn_cast<CallInst>(&*itr)) {
+ calls.push_back(CI);
+ }
+ // FIXME: This code does not handle invokes
+ assert(!dyn_cast<InvokeInst>(&*itr) &&
+ "support for invokes in poll code needed");
+ // Only add the successor blocks if we reach the terminator instruction
+ // without encountering end first
+ if (itr->isTerminator()) {
+ BasicBlock *BB = itr->getParent();
+ for (BasicBlock *Succ : successors(BB)) {
+ if (seen.count(Succ) == 0) {
+ worklist.push_back(Succ);
+ seen.insert(Succ);
+ }
+ }
+ }
+ }
+}
+static void scanInlinedCode(Instruction *start, Instruction *end,
+ std::vector<CallInst *> &calls,
+ std::set<BasicBlock *> &seen) {
+ calls.clear();
+ std::vector<BasicBlock *> worklist;
+ seen.insert(start->getParent());
+ scanOneBB(start, end, calls, seen, worklist);
+ while (!worklist.empty()) {
+ BasicBlock *BB = worklist.back();
+ worklist.pop_back();
+ scanOneBB(&*BB->begin(), end, calls, seen, worklist);
+ }
+}
+
+bool PlaceBackedgeSafepointsImpl::runOnLoop(Loop *L) {
+ // Loop through all loop latches (branches controlling backedges). We need
+ // to place a safepoint on every backedge (potentially).
+ // Note: In common usage, there will be only one edge due to LoopSimplify
+ // having run sometime earlier in the pipeline, but this code must be correct
+ // w.r.t. loops with multiple backedges.
+ BasicBlock *header = L->getHeader();
+ SmallVector<BasicBlock*, 16> LoopLatches;
+ L->getLoopLatches(LoopLatches);
+ for (BasicBlock *pred : LoopLatches) {
+ assert(L->contains(pred));
+
+ // Make a policy decision about whether this loop needs a safepoint or
+ // not. Note that this is about unburdening the optimizer in loops, not
+ // avoiding the runtime cost of the actual safepoint.
+ if (!AllBackedges) {
+ if (mustBeFiniteCountedLoop(L, SE, pred)) {
+ if (TraceLSP)
+ errs() << "skipping safepoint placement in finite loop\n";
+ FiniteExecution++;
+ continue;
+ }
+ if (CallSafepointsEnabled &&
+ containsUnconditionalCallSafepoint(L, header, pred, *DT)) {
+ // Note: This is only semantically legal since we won't do any further
+ // IPO or inlining before the actual call insertion.. If we hadn't, we
+ // might latter loose this call safepoint.
+ if (TraceLSP)
+ errs() << "skipping safepoint placement due to unconditional call\n";
+ CallInLoop++;
+ continue;
+ }
+ }
+
+ // TODO: We can create an inner loop which runs a finite number of
+ // iterations with an outer loop which contains a safepoint. This would
+ // not help runtime performance that much, but it might help our ability to
+ // optimize the inner loop.
+
+ // Safepoint insertion would involve creating a new basic block (as the
+ // target of the current backedge) which does the safepoint (of all live
+ // variables) and branches to the true header
+ TerminatorInst *term = pred->getTerminator();
+
+ if (TraceLSP) {
+ errs() << "[LSP] terminator instruction: ";
+ term->dump();
+ }
+
+ PollLocations.push_back(term);
+ }
+
+ return false;
+}
+
+/// Returns true if an entry safepoint is not required before this callsite in
+/// the caller function.
+static bool doesNotRequireEntrySafepointBefore(const CallSite &CS) {
+ Instruction *Inst = CS.getInstruction();
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::experimental_gc_statepoint:
+ case Intrinsic::experimental_patchpoint_void:
+ case Intrinsic::experimental_patchpoint_i64:
+ // The can wrap an actual call which may grow the stack by an unbounded
+ // amount or run forever.
+ return false;
+ default:
+ // Most LLVM intrinsics are things which do not expand to actual calls, or
+ // at least if they do, are leaf functions that cause only finite stack
+ // growth. In particular, the optimizer likes to form things like memsets
+ // out of stores in the original IR. Another important example is
+ // llvm.frameescape which must occur in the entry block. Inserting a
+ // safepoint before it is not legal since it could push the frameescape
+ // out of the entry block.
+ return true;
+ }
+ }
+ return false;
+}
+
+static Instruction *findLocationForEntrySafepoint(Function &F,
+ DominatorTree &DT) {
+
+ // Conceptually, this poll needs to be on method entry, but in
+ // practice, we place it as late in the entry block as possible. We
+ // can place it as late as we want as long as it dominates all calls
+ // that can grow the stack. This, combined with backedge polls,
+ // give us all the progress guarantees we need.
+
+ // hasNextInstruction and nextInstruction are used to iterate
+ // through a "straight line" execution sequence.
+
+ auto hasNextInstruction = [](Instruction *I) {
+ if (!I->isTerminator()) {
+ return true;
+ }
+ BasicBlock *nextBB = I->getParent()->getUniqueSuccessor();
+ return nextBB && (nextBB->getUniquePredecessor() != nullptr);
+ };
+
+ auto nextInstruction = [&hasNextInstruction](Instruction *I) {
+ assert(hasNextInstruction(I) &&
+ "first check if there is a next instruction!");
+ if (I->isTerminator()) {
+ return I->getParent()->getUniqueSuccessor()->begin();
+ } else {
+ return std::next(BasicBlock::iterator(I));
+ }
+ };
+
+ Instruction *cursor = nullptr;
+ for (cursor = F.getEntryBlock().begin(); hasNextInstruction(cursor);
+ cursor = nextInstruction(cursor)) {
+
+ // We need to ensure a safepoint poll occurs before any 'real' call. The
+ // easiest way to ensure finite execution between safepoints in the face of
+ // recursive and mutually recursive functions is to enforce that each take
+ // a safepoint. Additionally, we need to ensure a poll before any call
+ // which can grow the stack by an unbounded amount. This isn't required
+ // for GC semantics per se, but is a common requirement for languages
+ // which detect stack overflow via guard pages and then throw exceptions.
+ if (auto CS = CallSite(cursor)) {
+ if (doesNotRequireEntrySafepointBefore(CS))
+ continue;
+ break;
+ }
+ }
+
+ assert((hasNextInstruction(cursor) || cursor->isTerminator()) &&
+ "either we stopped because of a call, or because of terminator");
+
+ return cursor;
+}
+
+/// Identify the list of call sites which need to be have parseable state
+static void findCallSafepoints(Function &F,
+ std::vector<CallSite> &Found /*rval*/) {
+ assert(Found.empty() && "must be empty!");
+ for (Instruction &I : inst_range(F)) {
+ Instruction *inst = &I;
+ if (isa<CallInst>(inst) || isa<InvokeInst>(inst)) {
+ CallSite CS(inst);
+
+ // No safepoint needed or wanted
+ if (!needsStatepoint(CS)) {
+ continue;
+ }
+
+ Found.push_back(CS);
+ }
+ }
+}
+
+/// Implement a unique function which doesn't require we sort the input
+/// vector. Doing so has the effect of changing the output of a couple of
+/// tests in ways which make them less useful in testing fused safepoints.
+template <typename T> static void unique_unsorted(std::vector<T> &vec) {
+ std::set<T> seen;
+ std::vector<T> tmp;
+ vec.reserve(vec.size());
+ std::swap(tmp, vec);
+ for (auto V : tmp) {
+ if (seen.insert(V).second) {
+ vec.push_back(V);
+ }
+ }
+}
+
+static std::string GCSafepointPollName("gc.safepoint_poll");
+
+static bool isGCSafepointPoll(Function &F) {
+ return F.getName().equals(GCSafepointPollName);
+}
+
+/// Returns true if this function should be rewritten to include safepoint
+/// polls and parseable call sites. The main point of this function is to be
+/// an extension point for custom logic.
+static bool shouldRewriteFunction(Function &F) {
+ // TODO: This should check the GCStrategy
+ if (F.hasGC()) {
+ const char *FunctionGCName = F.getGC();
+ const StringRef StatepointExampleName("statepoint-example");
+ const StringRef CoreCLRName("coreclr");
+ return (StatepointExampleName == FunctionGCName) ||
+ (CoreCLRName == FunctionGCName);
+ } else
+ return false;
+}
+
+// TODO: These should become properties of the GCStrategy, possibly with
+// command line overrides.
+static bool enableEntrySafepoints(Function &F) { return !NoEntry; }
+static bool enableBackedgeSafepoints(Function &F) { return !NoBackedge; }
+static bool enableCallSafepoints(Function &F) { return !NoCall; }
+
+// Normalize basic block to make it ready to be target of invoke statepoint.
+// Ensure that 'BB' does not have phi nodes. It may require spliting it.
+static BasicBlock *normalizeForInvokeSafepoint(BasicBlock *BB,
+ BasicBlock *InvokeParent) {
+ BasicBlock *ret = BB;
+
+ if (!BB->getUniquePredecessor()) {
+ ret = SplitBlockPredecessors(BB, InvokeParent, "");
+ }
+
+ // Now that 'ret' has unique predecessor we can safely remove all phi nodes
+ // from it
+ FoldSingleEntryPHINodes(ret);
+ assert(!isa<PHINode>(ret->begin()));
+
+ return ret;
+}
+
+bool PlaceSafepoints::runOnFunction(Function &F) {
+ if (F.isDeclaration() || F.empty()) {
+ // This is a declaration, nothing to do. Must exit early to avoid crash in
+ // dom tree calculation
+ return false;
+ }
+
+ if (isGCSafepointPoll(F)) {
+ // Given we're inlining this inside of safepoint poll insertion, this
+ // doesn't make any sense. Note that we do make any contained calls
+ // parseable after we inline a poll.
+ return false;
+ }
+
+ if (!shouldRewriteFunction(F))
+ return false;
+
+ bool modified = false;
+
+ // In various bits below, we rely on the fact that uses are reachable from
+ // defs. When there are basic blocks unreachable from the entry, dominance
+ // and reachablity queries return non-sensical results. Thus, we preprocess
+ // the function to ensure these properties hold.
+ modified |= removeUnreachableBlocks(F);
+
+ // STEP 1 - Insert the safepoint polling locations. We do not need to
+ // actually insert parse points yet. That will be done for all polls and
+ // calls in a single pass.
+
+ DominatorTree DT;
+ DT.recalculate(F);
+
+ SmallVector<Instruction *, 16> PollsNeeded;
+ std::vector<CallSite> ParsePointNeeded;
+
+ if (enableBackedgeSafepoints(F)) {
+ // Construct a pass manager to run the LoopPass backedge logic. We
+ // need the pass manager to handle scheduling all the loop passes
+ // appropriately. Doing this by hand is painful and just not worth messing
+ // with for the moment.
+ legacy::FunctionPassManager FPM(F.getParent());
+ bool CanAssumeCallSafepoints = enableCallSafepoints(F);
+ PlaceBackedgeSafepointsImpl *PBS =
+ new PlaceBackedgeSafepointsImpl(CanAssumeCallSafepoints);
+ FPM.add(PBS);
+ FPM.run(F);
+
+ // We preserve dominance information when inserting the poll, otherwise
+ // we'd have to recalculate this on every insert
+ DT.recalculate(F);
+
+ auto &PollLocations = PBS->PollLocations;
+
+ auto OrderByBBName = [](Instruction *a, Instruction *b) {
+ return a->getParent()->getName() < b->getParent()->getName();
+ };
+ // We need the order of list to be stable so that naming ends up stable
+ // when we split edges. This makes test cases much easier to write.
+ std::sort(PollLocations.begin(), PollLocations.end(), OrderByBBName);
+
+ // We can sometimes end up with duplicate poll locations. This happens if
+ // a single loop is visited more than once. The fact this happens seems
+ // wrong, but it does happen for the split-backedge.ll test case.
+ PollLocations.erase(std::unique(PollLocations.begin(),
+ PollLocations.end()),
+ PollLocations.end());
+
+ // Insert a poll at each point the analysis pass identified
+ // The poll location must be the terminator of a loop latch block.
+ for (TerminatorInst *Term : PollLocations) {
+ // We are inserting a poll, the function is modified
+ modified = true;
+
+ if (SplitBackedge) {
+ // Split the backedge of the loop and insert the poll within that new
+ // basic block. This creates a loop with two latches per original
+ // latch (which is non-ideal), but this appears to be easier to
+ // optimize in practice than inserting the poll immediately before the
+ // latch test.
+
+ // Since this is a latch, at least one of the successors must dominate
+ // it. Its possible that we have a) duplicate edges to the same header
+ // and b) edges to distinct loop headers. We need to insert pools on
+ // each.
+ SetVector<BasicBlock *> Headers;
+ for (unsigned i = 0; i < Term->getNumSuccessors(); i++) {
+ BasicBlock *Succ = Term->getSuccessor(i);
+ if (DT.dominates(Succ, Term->getParent())) {
+ Headers.insert(Succ);
+ }
+ }
+ assert(!Headers.empty() && "poll location is not a loop latch?");
+
+ // The split loop structure here is so that we only need to recalculate
+ // the dominator tree once. Alternatively, we could just keep it up to
+ // date and use a more natural merged loop.
+ SetVector<BasicBlock *> SplitBackedges;
+ for (BasicBlock *Header : Headers) {
+ BasicBlock *NewBB = SplitEdge(Term->getParent(), Header, &DT);
+ PollsNeeded.push_back(NewBB->getTerminator());
+ NumBackedgeSafepoints++;
+ }
+ } else {
+ // Split the latch block itself, right before the terminator.
+ PollsNeeded.push_back(Term);
+ NumBackedgeSafepoints++;
+ }
+ }
+ }
+
+ if (enableEntrySafepoints(F)) {
+ Instruction *Location = findLocationForEntrySafepoint(F, DT);
+ if (!Location) {
+ // policy choice not to insert?
+ } else {
+ PollsNeeded.push_back(Location);
+ modified = true;
+ NumEntrySafepoints++;
+ }
+ }
+
+ // Now that we've identified all the needed safepoint poll locations, insert
+ // safepoint polls themselves.
+ for (Instruction *PollLocation : PollsNeeded) {
+ std::vector<CallSite> RuntimeCalls;
+ InsertSafepointPoll(PollLocation, RuntimeCalls);
+ ParsePointNeeded.insert(ParsePointNeeded.end(), RuntimeCalls.begin(),
+ RuntimeCalls.end());
+ }
+ PollsNeeded.clear(); // make sure we don't accidentally use
+ // The dominator tree has been invalidated by the inlining performed in the
+ // above loop. TODO: Teach the inliner how to update the dom tree?
+ DT.recalculate(F);
+
+ if (enableCallSafepoints(F)) {
+ std::vector<CallSite> Calls;
+ findCallSafepoints(F, Calls);
+ NumCallSafepoints += Calls.size();
+ ParsePointNeeded.insert(ParsePointNeeded.end(), Calls.begin(), Calls.end());
+ }
+
+ // Unique the vectors since we can end up with duplicates if we scan the call
+ // site for call safepoints after we add it for entry or backedge. The
+ // only reason we need tracking at all is that some functions might have
+ // polls but not call safepoints and thus we might miss marking the runtime
+ // calls for the polls. (This is useful in test cases!)
+ unique_unsorted(ParsePointNeeded);
+
+ // Any parse point (no matter what source) will be handled here
+
+ // We're about to start modifying the function
+ if (!ParsePointNeeded.empty())
+ modified = true;
+
+ // Now run through and insert the safepoints, but do _NOT_ update or remove
+ // any existing uses. We have references to live variables that need to
+ // survive to the last iteration of this loop.
+ std::vector<Value *> Results;
+ Results.reserve(ParsePointNeeded.size());
+ for (size_t i = 0; i < ParsePointNeeded.size(); i++) {
+ CallSite &CS = ParsePointNeeded[i];
+
+ // For invoke statepoints we need to remove all phi nodes at the normal
+ // destination block.
+ // Reason for this is that we can place gc_result only after last phi node
+ // in basic block. We will get malformed code after RAUW for the
+ // gc_result if one of this phi nodes uses result from the invoke.
+ if (InvokeInst *Invoke = dyn_cast<InvokeInst>(CS.getInstruction())) {
+ normalizeForInvokeSafepoint(Invoke->getNormalDest(),
+ Invoke->getParent());
+ }
+
+ Value *GCResult = ReplaceWithStatepoint(CS, nullptr);
+ Results.push_back(GCResult);
+ }
+ assert(Results.size() == ParsePointNeeded.size());
+
+ // Adjust all users of the old call sites to use the new ones instead
+ for (size_t i = 0; i < ParsePointNeeded.size(); i++) {
+ CallSite &CS = ParsePointNeeded[i];
+ Value *GCResult = Results[i];
+ if (GCResult) {
+ // Can not RAUW for the invoke gc result in case of phi nodes preset.
+ assert(CS.isCall() || !isa<PHINode>(cast<Instruction>(GCResult)->getParent()->begin()));
+
+ // Replace all uses with the new call
+ CS.getInstruction()->replaceAllUsesWith(GCResult);
+ }
+
+ // Now that we've handled all uses, remove the original call itself
+ // Note: The insert point can't be the deleted instruction!
+ CS.getInstruction()->eraseFromParent();
+ }
+ return modified;
+}
+
+char PlaceBackedgeSafepointsImpl::ID = 0;
+char PlaceSafepoints::ID = 0;
+
+FunctionPass *llvm::createPlaceSafepointsPass() {
+ return new PlaceSafepoints();
+}
+
+INITIALIZE_PASS_BEGIN(PlaceBackedgeSafepointsImpl,
+ "place-backedge-safepoints-impl",
+ "Place Backedge Safepoints", false, false)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_END(PlaceBackedgeSafepointsImpl,
+ "place-backedge-safepoints-impl",
+ "Place Backedge Safepoints", false, false)
+
+INITIALIZE_PASS_BEGIN(PlaceSafepoints, "place-safepoints", "Place Safepoints",
+ false, false)
+INITIALIZE_PASS_END(PlaceSafepoints, "place-safepoints", "Place Safepoints",
+ false, false)
+
+static bool isGCLeafFunction(const CallSite &CS) {
+ Instruction *inst = CS.getInstruction();
+ if (isa<IntrinsicInst>(inst)) {
+ // Most LLVM intrinsics are things which can never take a safepoint.
+ // As a result, we don't need to have the stack parsable at the
+ // callsite. This is a highly useful optimization since intrinsic
+ // calls are fairly prevelent, particularly in debug builds.
+ return true;
+ }
+
+ // If this function is marked explicitly as a leaf call, we don't need to
+ // place a safepoint of it. In fact, for correctness we *can't* in many
+ // cases. Note: Indirect calls return Null for the called function,
+ // these obviously aren't runtime functions with attributes
+ // TODO: Support attributes on the call site as well.
+ const Function *F = CS.getCalledFunction();
+ bool isLeaf =
+ F &&
+ F->getFnAttribute("gc-leaf-function").getValueAsString().equals("true");
+ if (isLeaf) {
+ return true;
+ }
+ return false;
+}
+
+static void
+InsertSafepointPoll(Instruction *InsertBefore,
+ std::vector<CallSite> &ParsePointsNeeded /*rval*/) {
+ BasicBlock *OrigBB = InsertBefore->getParent();
+ Module *M = InsertBefore->getModule();
+ assert(M && "must be part of a module");
+
+ // Inline the safepoint poll implementation - this will get all the branch,
+ // control flow, etc.. Most importantly, it will introduce the actual slow
+ // path call - where we need to insert a safepoint (parsepoint).
+
+ auto *F = M->getFunction(GCSafepointPollName);
+ assert(F->getType()->getElementType() ==
+ FunctionType::get(Type::getVoidTy(M->getContext()), false) &&
+ "gc.safepoint_poll declared with wrong type");
+ assert(!F->empty() && "gc.safepoint_poll must be a non-empty function");
+ CallInst *PollCall = CallInst::Create(F, "", InsertBefore);
+
+ // Record some information about the call site we're replacing
+ BasicBlock::iterator before(PollCall), after(PollCall);
+ bool isBegin(false);
+ if (before == OrigBB->begin()) {
+ isBegin = true;
+ } else {
+ before--;
+ }
+ after++;
+ assert(after != OrigBB->end() && "must have successor");
+
+ // do the actual inlining
+ InlineFunctionInfo IFI;
+ bool InlineStatus = InlineFunction(PollCall, IFI);
+ assert(InlineStatus && "inline must succeed");
+ (void)InlineStatus; // suppress warning in release-asserts
+
+ // Check post conditions
+ assert(IFI.StaticAllocas.empty() && "can't have allocs");
+
+ std::vector<CallInst *> calls; // new calls
+ std::set<BasicBlock *> BBs; // new BBs + insertee
+ // Include only the newly inserted instructions, Note: begin may not be valid
+ // if we inserted to the beginning of the basic block
+ BasicBlock::iterator start;
+ if (isBegin) {
+ start = OrigBB->begin();
+ } else {
+ start = before;
+ start++;
+ }
+
+ // If your poll function includes an unreachable at the end, that's not
+ // valid. Bugpoint likes to create this, so check for it.
+ assert(isPotentiallyReachable(&*start, &*after, nullptr, nullptr) &&
+ "malformed poll function");
+
+ scanInlinedCode(&*(start), &*(after), calls, BBs);
+ assert(!calls.empty() && "slow path not found for safepoint poll");
+
+ // Record the fact we need a parsable state at the runtime call contained in
+ // the poll function. This is required so that the runtime knows how to
+ // parse the last frame when we actually take the safepoint (i.e. execute
+ // the slow path)
+ assert(ParsePointsNeeded.empty());
+ for (size_t i = 0; i < calls.size(); i++) {
+
+ // No safepoint needed or wanted
+ if (!needsStatepoint(calls[i])) {
+ continue;
+ }
+
+ // These are likely runtime calls. Should we assert that via calling
+ // convention or something?
+ ParsePointsNeeded.push_back(CallSite(calls[i]));
+ }
+ assert(ParsePointsNeeded.size() <= calls.size());
+}
+
+/// Replaces the given call site (Call or Invoke) with a gc.statepoint
+/// intrinsic with an empty deoptimization arguments list. This does
+/// NOT do explicit relocation for GC support.
+static Value *ReplaceWithStatepoint(const CallSite &CS, /* to replace */
+ Pass *P) {
+ assert(CS.getInstruction()->getParent()->getParent()->getParent() &&
+ "must be set");
+
+ // TODO: technically, a pass is not allowed to get functions from within a
+ // function pass since it might trigger a new function addition. Refactor
+ // this logic out to the initialization of the pass. Doesn't appear to
+ // matter in practice.
+
+ // Then go ahead and use the builder do actually do the inserts. We insert
+ // immediately before the previous instruction under the assumption that all
+ // arguments will be available here. We can't insert afterwards since we may
+ // be replacing a terminator.
+ IRBuilder<> Builder(CS.getInstruction());
+
+ // Note: The gc args are not filled in at this time, that's handled by
+ // RewriteStatepointsForGC (which is currently under review).
+
+ // Create the statepoint given all the arguments
+ Instruction *Token = nullptr;
+
+ uint64_t ID;
+ uint32_t NumPatchBytes;
+
+ AttributeSet OriginalAttrs = CS.getAttributes();
+ Attribute AttrID =
+ OriginalAttrs.getAttribute(AttributeSet::FunctionIndex, "statepoint-id");
+ Attribute AttrNumPatchBytes = OriginalAttrs.getAttribute(
+ AttributeSet::FunctionIndex, "statepoint-num-patch-bytes");
+
+ AttrBuilder AttrsToRemove;
+ bool HasID = AttrID.isStringAttribute() &&
+ !AttrID.getValueAsString().getAsInteger(10, ID);
+
+ if (HasID)
+ AttrsToRemove.addAttribute("statepoint-id");
+ else
+ ID = 0xABCDEF00;
+
+ bool HasNumPatchBytes =
+ AttrNumPatchBytes.isStringAttribute() &&
+ !AttrNumPatchBytes.getValueAsString().getAsInteger(10, NumPatchBytes);
+
+ if (HasNumPatchBytes)
+ AttrsToRemove.addAttribute("statepoint-num-patch-bytes");
+ else
+ NumPatchBytes = 0;
+
+ OriginalAttrs = OriginalAttrs.removeAttributes(
+ CS.getInstruction()->getContext(), AttributeSet::FunctionIndex,
+ AttrsToRemove);
+
+ Value *StatepointTarget = NumPatchBytes == 0
+ ? CS.getCalledValue()
+ : ConstantPointerNull::get(cast<PointerType>(
+ CS.getCalledValue()->getType()));
+
+ if (CS.isCall()) {
+ CallInst *ToReplace = cast<CallInst>(CS.getInstruction());
+ CallInst *Call = Builder.CreateGCStatepointCall(
+ ID, NumPatchBytes, StatepointTarget,
+ makeArrayRef(CS.arg_begin(), CS.arg_end()), None, None,
+ "safepoint_token");
+ Call->setTailCall(ToReplace->isTailCall());
+ Call->setCallingConv(ToReplace->getCallingConv());
+
+ // In case if we can handle this set of attributes - set up function
+ // attributes directly on statepoint and return attributes later for
+ // gc_result intrinsic.
+ Call->setAttributes(OriginalAttrs.getFnAttributes());
+
+ Token = Call;
+
+ // Put the following gc_result and gc_relocate calls immediately after the
+ // the old call (which we're about to delete).
+ assert(ToReplace->getNextNode() && "not a terminator, must have next");
+ Builder.SetInsertPoint(ToReplace->getNextNode());
+ Builder.SetCurrentDebugLocation(ToReplace->getNextNode()->getDebugLoc());
+ } else if (CS.isInvoke()) {
+ InvokeInst *ToReplace = cast<InvokeInst>(CS.getInstruction());
+
+ // Insert the new invoke into the old block. We'll remove the old one in a
+ // moment at which point this will become the new terminator for the
+ // original block.
+ Builder.SetInsertPoint(ToReplace->getParent());
+ InvokeInst *Invoke = Builder.CreateGCStatepointInvoke(
+ ID, NumPatchBytes, StatepointTarget, ToReplace->getNormalDest(),
+ ToReplace->getUnwindDest(), makeArrayRef(CS.arg_begin(), CS.arg_end()),
+ None, None, "safepoint_token");
+
+ Invoke->setCallingConv(ToReplace->getCallingConv());
+
+ // In case if we can handle this set of attributes - set up function
+ // attributes directly on statepoint and return attributes later for
+ // gc_result intrinsic.
+ Invoke->setAttributes(OriginalAttrs.getFnAttributes());
+
+ Token = Invoke;
+
+ // We'll insert the gc.result into the normal block
+ BasicBlock *NormalDest = ToReplace->getNormalDest();
+ // Can not insert gc.result in case of phi nodes preset.
+ // Should have removed this cases prior to runnning this function
+ assert(!isa<PHINode>(NormalDest->begin()));
+ Instruction *IP = &*(NormalDest->getFirstInsertionPt());
+ Builder.SetInsertPoint(IP);
+ } else {
+ llvm_unreachable("unexpect type of CallSite");
+ }
+ assert(Token);
+
+ // Handle the return value of the original call - update all uses to use a
+ // gc_result hanging off the statepoint node we just inserted
+
+ // Only add the gc_result iff there is actually a used result
+ if (!CS.getType()->isVoidTy() && !CS.getInstruction()->use_empty()) {
+ std::string TakenName =
+ CS.getInstruction()->hasName() ? CS.getInstruction()->getName() : "";
+ CallInst *GCResult = Builder.CreateGCResult(Token, CS.getType(), TakenName);
+ GCResult->setAttributes(OriginalAttrs.getRetAttributes());
+ return GCResult;
+ } else {
+ // No return value for the call.
+ return nullptr;
+ }
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
index 4e02255..b677523 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -59,7 +59,7 @@ namespace {
}
#ifndef NDEBUG
-/// PrintOps - Print out the expression identified in the Ops list.
+/// Print out the expression identified in the Ops list.
///
static void PrintOps(Instruction *I, const SmallVectorImpl<ValueEntry> &Ops) {
Module *M = I->getParent()->getParent()->getParent();
@@ -233,8 +233,8 @@ INITIALIZE_PASS(Reassociate, "reassociate",
// Public interface to the Reassociate pass
FunctionPass *llvm::createReassociatePass() { return new Reassociate(); }
-/// isReassociableOp - Return true if V is an instruction of the specified
-/// opcode and if it only has one use.
+/// Return true if V is an instruction of the specified opcode and if it
+/// only has one use.
static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) {
if (V->hasOneUse() && isa<Instruction>(V) &&
cast<Instruction>(V)->getOpcode() == Opcode &&
@@ -321,10 +321,8 @@ unsigned Reassociate::getRank(Value *V) {
// If this is a not or neg instruction, do not count it for rank. This
// assures us that X and ~X will have the same rank.
- Type *Ty = V->getType();
- if ((!Ty->isIntegerTy() && !Ty->isFloatingPointTy()) ||
- (!BinaryOperator::isNot(I) && !BinaryOperator::isNeg(I) &&
- !BinaryOperator::isFNeg(I)))
+ if (!BinaryOperator::isNot(I) && !BinaryOperator::isNeg(I) &&
+ !BinaryOperator::isFNeg(I))
++Rank;
DEBUG(dbgs() << "Calculated Rank[" << V->getName() << "] = " << Rank << "\n");
@@ -351,7 +349,7 @@ void Reassociate::canonicalizeOperands(Instruction *I) {
static BinaryOperator *CreateAdd(Value *S1, Value *S2, const Twine &Name,
Instruction *InsertBefore, Value *FlagsOp) {
- if (S1->getType()->isIntegerTy())
+ if (S1->getType()->isIntOrIntVectorTy())
return BinaryOperator::CreateAdd(S1, S2, Name, InsertBefore);
else {
BinaryOperator *Res =
@@ -363,7 +361,7 @@ static BinaryOperator *CreateAdd(Value *S1, Value *S2, const Twine &Name,
static BinaryOperator *CreateMul(Value *S1, Value *S2, const Twine &Name,
Instruction *InsertBefore, Value *FlagsOp) {
- if (S1->getType()->isIntegerTy())
+ if (S1->getType()->isIntOrIntVectorTy())
return BinaryOperator::CreateMul(S1, S2, Name, InsertBefore);
else {
BinaryOperator *Res =
@@ -375,7 +373,7 @@ static BinaryOperator *CreateMul(Value *S1, Value *S2, const Twine &Name,
static BinaryOperator *CreateNeg(Value *S1, const Twine &Name,
Instruction *InsertBefore, Value *FlagsOp) {
- if (S1->getType()->isIntegerTy())
+ if (S1->getType()->isIntOrIntVectorTy())
return BinaryOperator::CreateNeg(S1, Name, InsertBefore);
else {
BinaryOperator *Res = BinaryOperator::CreateFNeg(S1, Name, InsertBefore);
@@ -384,12 +382,11 @@ static BinaryOperator *CreateNeg(Value *S1, const Twine &Name,
}
}
-/// LowerNegateToMultiply - Replace 0-X with X*-1.
-///
+/// Replace 0-X with X*-1.
static BinaryOperator *LowerNegateToMultiply(Instruction *Neg) {
Type *Ty = Neg->getType();
- Constant *NegOne = Ty->isIntegerTy() ? ConstantInt::getAllOnesValue(Ty)
- : ConstantFP::get(Ty, -1.0);
+ Constant *NegOne = Ty->isIntOrIntVectorTy() ?
+ ConstantInt::getAllOnesValue(Ty) : ConstantFP::get(Ty, -1.0);
BinaryOperator *Res = CreateMul(Neg->getOperand(1), NegOne, "", Neg, Neg);
Neg->setOperand(1, Constant::getNullValue(Ty)); // Drop use of op.
@@ -399,8 +396,8 @@ static BinaryOperator *LowerNegateToMultiply(Instruction *Neg) {
return Res;
}
-/// CarmichaelShift - Returns k such that lambda(2^Bitwidth) = 2^k, where lambda
-/// is the Carmichael function. This means that x^(2^k) === 1 mod 2^Bitwidth for
+/// Returns k such that lambda(2^Bitwidth) = 2^k, where lambda is the Carmichael
+/// function. This means that x^(2^k) === 1 mod 2^Bitwidth for
/// every odd x, i.e. x^(2^k) = 1 for every odd x in Bitwidth-bit arithmetic.
/// Note that 0 <= k < Bitwidth, and if Bitwidth > 3 then x^(2^k) = 0 for every
/// even x in Bitwidth-bit arithmetic.
@@ -410,7 +407,7 @@ static unsigned CarmichaelShift(unsigned Bitwidth) {
return Bitwidth - 2;
}
-/// IncorporateWeight - Add the extra weight 'RHS' to the existing weight 'LHS',
+/// Add the extra weight 'RHS' to the existing weight 'LHS',
/// reducing the combined weight using any special properties of the operation.
/// The existing weight LHS represents the computation X op X op ... op X where
/// X occurs LHS times. The combined weight represents X op X op ... op X with
@@ -492,7 +489,7 @@ static void IncorporateWeight(APInt &LHS, const APInt &RHS, unsigned Opcode) {
typedef std::pair<Value*, APInt> RepeatedValue;
-/// LinearizeExprTree - Given an associative binary expression, return the leaf
+/// Given an associative binary expression, return the leaf
/// nodes in Ops along with their weights (how many times the leaf occurs). The
/// original expression is the same as
/// (Ops[0].first op Ops[0].first op ... Ops[0].first) <- Ops[0].second times
@@ -742,8 +739,8 @@ static bool LinearizeExprTree(BinaryOperator *I,
return Changed;
}
-// RewriteExprTree - Now that the operands for this expression tree are
-// linearized and optimized, emit them in-order.
+/// Now that the operands for this expression tree are
+/// linearized and optimized, emit them in-order.
void Reassociate::RewriteExprTree(BinaryOperator *I,
SmallVectorImpl<ValueEntry> &Ops) {
assert(Ops.size() > 1 && "Single values should be used directly!");
@@ -872,7 +869,7 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,
Constant *Undef = UndefValue::get(I->getType());
NewOp = BinaryOperator::Create(Instruction::BinaryOps(Opcode),
Undef, Undef, "", I);
- if (NewOp->getType()->isFloatingPointTy())
+ if (NewOp->getType()->isFPOrFPVectorTy())
NewOp->setFastMathFlags(I->getFastMathFlags());
} else {
NewOp = NodesToRewrite.pop_back_val();
@@ -912,15 +909,18 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,
RedoInsts.insert(NodesToRewrite[i]);
}
-/// NegateValue - Insert instructions before the instruction pointed to by BI,
+/// Insert instructions before the instruction pointed to by BI,
/// that computes the negative version of the value specified. The negative
/// version of the value is returned, and BI is left pointing at the instruction
/// that should be processed next by the reassociation pass.
static Value *NegateValue(Value *V, Instruction *BI) {
- if (ConstantFP *C = dyn_cast<ConstantFP>(V))
- return ConstantExpr::getFNeg(C);
- if (Constant *C = dyn_cast<Constant>(V))
+ if (Constant *C = dyn_cast<Constant>(V)) {
+ if (C->getType()->isFPOrFPVectorTy()) {
+ return ConstantExpr::getFNeg(C);
+ }
return ConstantExpr::getNeg(C);
+ }
+
// We are trying to expose opportunity for reassociation. One of the things
// that we want to do to achieve this is to push a negation as deep into an
@@ -984,8 +984,7 @@ static Value *NegateValue(Value *V, Instruction *BI) {
return CreateNeg(V, V->getName() + ".neg", BI, BI);
}
-/// ShouldBreakUpSubtract - Return true if we should break up this subtract of
-/// X-Y into (X + -Y).
+/// Return true if we should break up this subtract of X-Y into (X + -Y).
static bool ShouldBreakUpSubtract(Instruction *Sub) {
// If this is a negation, we can't split it up!
if (BinaryOperator::isNeg(Sub) || BinaryOperator::isFNeg(Sub))
@@ -1014,9 +1013,8 @@ static bool ShouldBreakUpSubtract(Instruction *Sub) {
return false;
}
-/// BreakUpSubtract - If we have (X-Y), and if either X is an add, or if this is
-/// only used by an add, transform this into (X+(0-Y)) to promote better
-/// reassociation.
+/// If we have (X-Y), and if either X is an add, or if this is only used by an
+/// add, transform this into (X+(0-Y)) to promote better reassociation.
static BinaryOperator *BreakUpSubtract(Instruction *Sub) {
// Convert a subtract into an add and a neg instruction. This allows sub
// instructions to be commuted with other add instructions.
@@ -1038,9 +1036,8 @@ static BinaryOperator *BreakUpSubtract(Instruction *Sub) {
return New;
}
-/// ConvertShiftToMul - If this is a shift of a reassociable multiply or is used
-/// by one, change this into a multiply by a constant to assist with further
-/// reassociation.
+/// If this is a shift of a reassociable multiply or is used by one, change
+/// this into a multiply by a constant to assist with further reassociation.
static BinaryOperator *ConvertShiftToMul(Instruction *Shl) {
Constant *MulCst = ConstantInt::get(Shl->getType(), 1);
MulCst = ConstantExpr::getShl(MulCst, cast<Constant>(Shl->getOperand(1)));
@@ -1065,10 +1062,9 @@ static BinaryOperator *ConvertShiftToMul(Instruction *Shl) {
return Mul;
}
-/// FindInOperandList - Scan backwards and forwards among values with the same
-/// rank as element i to see if X exists. If X does not exist, return i. This
-/// is useful when scanning for 'x' when we see '-x' because they both get the
-/// same rank.
+/// Scan backwards and forwards among values with the same rank as element i
+/// to see if X exists. If X does not exist, return i. This is useful when
+/// scanning for 'x' when we see '-x' because they both get the same rank.
static unsigned FindInOperandList(SmallVectorImpl<ValueEntry> &Ops, unsigned i,
Value *X) {
unsigned XRank = Ops[i].Rank;
@@ -1093,7 +1089,7 @@ static unsigned FindInOperandList(SmallVectorImpl<ValueEntry> &Ops, unsigned i,
return i;
}
-/// EmitAddTreeOfValues - Emit a tree of add instructions, summing Ops together
+/// Emit a tree of add instructions, summing Ops together
/// and returning the result. Insert the tree before I.
static Value *EmitAddTreeOfValues(Instruction *I,
SmallVectorImpl<WeakVH> &Ops){
@@ -1105,8 +1101,8 @@ static Value *EmitAddTreeOfValues(Instruction *I,
return CreateAdd(V2, V1, "tmp", I, I);
}
-/// RemoveFactorFromExpression - If V is an expression tree that is a
-/// multiplication sequence, and if this sequence contains a multiply by Factor,
+/// If V is an expression tree that is a multiplication sequence,
+/// and if this sequence contains a multiply by Factor,
/// remove Factor from the tree and return the new tree.
Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) {
BinaryOperator *BO = isReassociableOp(V, Instruction::Mul, Instruction::FMul);
@@ -1178,8 +1174,8 @@ Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) {
return V;
}
-/// FindSingleUseMultiplyFactors - If V is a single-use multiply, recursively
-/// add its operands as factors, otherwise add V to the list of factors.
+/// If V is a single-use multiply, recursively add its operands as factors,
+/// otherwise add V to the list of factors.
///
/// Ops is the top-level list of add operands we're trying to factor.
static void FindSingleUseMultiplyFactors(Value *V,
@@ -1196,10 +1192,9 @@ static void FindSingleUseMultiplyFactors(Value *V,
FindSingleUseMultiplyFactors(BO->getOperand(0), Factors, Ops);
}
-/// OptimizeAndOrXor - Optimize a series of operands to an 'and', 'or', or 'xor'
-/// instruction. This optimizes based on identities. If it can be reduced to
-/// a single Value, it is returned, otherwise the Ops list is mutated as
-/// necessary.
+/// Optimize a series of operands to an 'and', 'or', or 'xor' instruction.
+/// This optimizes based on identities. If it can be reduced to a single Value,
+/// it is returned, otherwise the Ops list is mutated as necessary.
static Value *OptimizeAndOrXor(unsigned Opcode,
SmallVectorImpl<ValueEntry> &Ops) {
// Scan the operand lists looking for X and ~X pairs, along with X,X pairs.
@@ -1489,7 +1484,7 @@ Value *Reassociate::OptimizeXor(Instruction *I,
return nullptr;
}
-/// OptimizeAdd - Optimize a series of operands to an 'add' instruction. This
+/// Optimize a series of operands to an 'add' instruction. This
/// optimizes based on identities. If it can be reduced to a single Value, it
/// is returned, otherwise the Ops list is mutated as necessary.
Value *Reassociate::OptimizeAdd(Instruction *I,
@@ -1517,8 +1512,8 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
// Insert a new multiply.
Type *Ty = TheOp->getType();
- Constant *C = Ty->isIntegerTy() ? ConstantInt::get(Ty, NumFound)
- : ConstantFP::get(Ty, NumFound);
+ Constant *C = Ty->isIntOrIntVectorTy() ?
+ ConstantInt::get(Ty, NumFound) : ConstantFP::get(Ty, NumFound);
Instruction *Mul = CreateMul(TheOp, C, "factor", I, I);
// Now that we have inserted a multiply, optimize it. This allows us to
@@ -1658,7 +1653,7 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
// from an expression will drop a use of maxocc, and this can cause
// RemoveFactorFromExpression on successive values to behave differently.
Instruction *DummyInst =
- I->getType()->isIntegerTy()
+ I->getType()->isIntOrIntVectorTy()
? BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal)
: BinaryOperator::CreateFAdd(MaxOccVal, MaxOccVal);
@@ -1789,7 +1784,7 @@ static Value *buildMultiplyTree(IRBuilder<> &Builder,
Value *LHS = Ops.pop_back_val();
do {
- if (LHS->getType()->isIntegerTy())
+ if (LHS->getType()->isIntOrIntVectorTy())
LHS = Builder.CreateMul(LHS, Ops.pop_back_val());
else
LHS = Builder.CreateFMul(LHS, Ops.pop_back_val());
@@ -1942,8 +1937,7 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I,
return nullptr;
}
-/// EraseInst - Zap the given instruction, adding interesting operands to the
-/// work list.
+/// Zap the given instruction, adding interesting operands to the work list.
void Reassociate::EraseInst(Instruction *I) {
assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!");
SmallVector<Value*, 8> Ops(I->op_begin(), I->op_end());
@@ -1988,7 +1982,7 @@ Instruction *Reassociate::canonicalizeNegConstExpr(Instruction *I) {
Constant *C = C0 ? C0 : C1;
unsigned ConstIdx = C0 ? 0 : 1;
if (auto *CI = dyn_cast<ConstantInt>(C)) {
- if (!CI->isNegative())
+ if (!CI->isNegative() || CI->isMinValue(true))
return nullptr;
} else if (auto *CF = dyn_cast<ConstantFP>(C)) {
if (!CF->isNegative())
@@ -2057,7 +2051,7 @@ Instruction *Reassociate::canonicalizeNegConstExpr(Instruction *I) {
return NI;
}
-/// OptimizeInst - Inspect and optimize the given instruction. Note that erasing
+/// Inspect and optimize the given instruction. Note that erasing
/// instructions is not allowed.
void Reassociate::OptimizeInst(Instruction *I) {
// Only consider operations that we understand.
@@ -2087,8 +2081,9 @@ void Reassociate::OptimizeInst(Instruction *I) {
if (I->isCommutative())
canonicalizeOperands(I);
- // Don't optimize vector instructions.
- if (I->getType()->isVectorTy())
+ // TODO: We should optimize vector Xor instructions, but they are
+ // currently unsupported.
+ if (I->getType()->isVectorTy() && I->getOpcode() == Instruction::Xor)
return;
// Don't optimize floating point instructions that don't have unsafe algebra.
@@ -2167,9 +2162,6 @@ void Reassociate::OptimizeInst(Instruction *I) {
}
void Reassociate::ReassociateExpression(BinaryOperator *I) {
- assert(!I->getType()->isVectorTy() &&
- "Reassociation of vector instructions is not supported.");
-
// First, walk the expression tree, linearizing the tree, collecting the
// operand information.
SmallVector<RepeatedValue, 8> Tree;
@@ -2192,7 +2184,7 @@ void Reassociate::ReassociateExpression(BinaryOperator *I) {
// the vector.
std::stable_sort(Ops.begin(), Ops.end());
- // OptimizeExpression - Now that we have the expression tree in a convenient
+ // Now that we have the expression tree in a convenient
// sorted form, optimize it globally if possible.
if (Value *V = OptimizeExpression(I, Ops)) {
if (V == I)
diff --git a/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
new file mode 100644
index 0000000..6cf765a
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -0,0 +1,2506 @@
+//===- RewriteStatepointsForGC.cpp - Make GC relocations explicit ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Rewrite an existing set of gc.statepoints such that they make potential
+// relocations performed by the garbage collector explicit in the IR.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Pass.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Statepoint.h"
+#include "llvm/IR/Value.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+
+#define DEBUG_TYPE "rewrite-statepoints-for-gc"
+
+using namespace llvm;
+
+// Print tracing output
+static cl::opt<bool> TraceLSP("trace-rewrite-statepoints", cl::Hidden,
+ cl::init(false));
+
+// Print the liveset found at the insert location
+static cl::opt<bool> PrintLiveSet("spp-print-liveset", cl::Hidden,
+ cl::init(false));
+static cl::opt<bool> PrintLiveSetSize("spp-print-liveset-size", cl::Hidden,
+ cl::init(false));
+// Print out the base pointers for debugging
+static cl::opt<bool> PrintBasePointers("spp-print-base-pointers", cl::Hidden,
+ cl::init(false));
+
+// Cost threshold measuring when it is profitable to rematerialize value instead
+// of relocating it
+static cl::opt<unsigned>
+RematerializationThreshold("spp-rematerialization-threshold", cl::Hidden,
+ cl::init(6));
+
+#ifdef XDEBUG
+static bool ClobberNonLive = true;
+#else
+static bool ClobberNonLive = false;
+#endif
+static cl::opt<bool, true> ClobberNonLiveOverride("rs4gc-clobber-non-live",
+ cl::location(ClobberNonLive),
+ cl::Hidden);
+
+namespace {
+struct RewriteStatepointsForGC : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+
+ RewriteStatepointsForGC() : FunctionPass(ID) {
+ initializeRewriteStatepointsForGCPass(*PassRegistry::getPassRegistry());
+ }
+ bool runOnFunction(Function &F) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ // We add and rewrite a bunch of instructions, but don't really do much
+ // else. We could in theory preserve a lot more analyses here.
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ }
+};
+} // namespace
+
+char RewriteStatepointsForGC::ID = 0;
+
+FunctionPass *llvm::createRewriteStatepointsForGCPass() {
+ return new RewriteStatepointsForGC();
+}
+
+INITIALIZE_PASS_BEGIN(RewriteStatepointsForGC, "rewrite-statepoints-for-gc",
+ "Make relocations explicit at statepoints", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(RewriteStatepointsForGC, "rewrite-statepoints-for-gc",
+ "Make relocations explicit at statepoints", false, false)
+
+namespace {
+struct GCPtrLivenessData {
+ /// Values defined in this block.
+ DenseMap<BasicBlock *, DenseSet<Value *>> KillSet;
+ /// Values used in this block (and thus live); does not included values
+ /// killed within this block.
+ DenseMap<BasicBlock *, DenseSet<Value *>> LiveSet;
+
+ /// Values live into this basic block (i.e. used by any
+ /// instruction in this basic block or ones reachable from here)
+ DenseMap<BasicBlock *, DenseSet<Value *>> LiveIn;
+
+ /// Values live out of this basic block (i.e. live into
+ /// any successor block)
+ DenseMap<BasicBlock *, DenseSet<Value *>> LiveOut;
+};
+
+// The type of the internal cache used inside the findBasePointers family
+// of functions. From the callers perspective, this is an opaque type and
+// should not be inspected.
+//
+// In the actual implementation this caches two relations:
+// - The base relation itself (i.e. this pointer is based on that one)
+// - The base defining value relation (i.e. before base_phi insertion)
+// Generally, after the execution of a full findBasePointer call, only the
+// base relation will remain. Internally, we add a mixture of the two
+// types, then update all the second type to the first type
+typedef DenseMap<Value *, Value *> DefiningValueMapTy;
+typedef DenseSet<llvm::Value *> StatepointLiveSetTy;
+typedef DenseMap<Instruction *, Value *> RematerializedValueMapTy;
+
+struct PartiallyConstructedSafepointRecord {
+ /// The set of values known to be live accross this safepoint
+ StatepointLiveSetTy liveset;
+
+ /// Mapping from live pointers to a base-defining-value
+ DenseMap<llvm::Value *, llvm::Value *> PointerToBase;
+
+ /// The *new* gc.statepoint instruction itself. This produces the token
+ /// that normal path gc.relocates and the gc.result are tied to.
+ Instruction *StatepointToken;
+
+ /// Instruction to which exceptional gc relocates are attached
+ /// Makes it easier to iterate through them during relocationViaAlloca.
+ Instruction *UnwindToken;
+
+ /// Record live values we are rematerialized instead of relocating.
+ /// They are not included into 'liveset' field.
+ /// Maps rematerialized copy to it's original value.
+ RematerializedValueMapTy RematerializedValues;
+};
+}
+
+/// Compute the live-in set for every basic block in the function
+static void computeLiveInValues(DominatorTree &DT, Function &F,
+ GCPtrLivenessData &Data);
+
+/// Given results from the dataflow liveness computation, find the set of live
+/// Values at a particular instruction.
+static void findLiveSetAtInst(Instruction *inst, GCPtrLivenessData &Data,
+ StatepointLiveSetTy &out);
+
+// TODO: Once we can get to the GCStrategy, this becomes
+// Optional<bool> isGCManagedPointer(const Value *V) const override {
+
+static bool isGCPointerType(const Type *T) {
+ if (const PointerType *PT = dyn_cast<PointerType>(T))
+ // For the sake of this example GC, we arbitrarily pick addrspace(1) as our
+ // GC managed heap. We know that a pointer into this heap needs to be
+ // updated and that no other pointer does.
+ return (1 == PT->getAddressSpace());
+ return false;
+}
+
+// Return true if this type is one which a) is a gc pointer or contains a GC
+// pointer and b) is of a type this code expects to encounter as a live value.
+// (The insertion code will assert that a type which matches (a) and not (b)
+// is not encountered.)
+static bool isHandledGCPointerType(Type *T) {
+ // We fully support gc pointers
+ if (isGCPointerType(T))
+ return true;
+ // We partially support vectors of gc pointers. The code will assert if it
+ // can't handle something.
+ if (auto VT = dyn_cast<VectorType>(T))
+ if (isGCPointerType(VT->getElementType()))
+ return true;
+ return false;
+}
+
+#ifndef NDEBUG
+/// Returns true if this type contains a gc pointer whether we know how to
+/// handle that type or not.
+static bool containsGCPtrType(Type *Ty) {
+ if (isGCPointerType(Ty))
+ return true;
+ if (VectorType *VT = dyn_cast<VectorType>(Ty))
+ return isGCPointerType(VT->getScalarType());
+ if (ArrayType *AT = dyn_cast<ArrayType>(Ty))
+ return containsGCPtrType(AT->getElementType());
+ if (StructType *ST = dyn_cast<StructType>(Ty))
+ return std::any_of(
+ ST->subtypes().begin(), ST->subtypes().end(),
+ [](Type *SubType) { return containsGCPtrType(SubType); });
+ return false;
+}
+
+// Returns true if this is a type which a) is a gc pointer or contains a GC
+// pointer and b) is of a type which the code doesn't expect (i.e. first class
+// aggregates). Used to trip assertions.
+static bool isUnhandledGCPointerType(Type *Ty) {
+ return containsGCPtrType(Ty) && !isHandledGCPointerType(Ty);
+}
+#endif
+
+static bool order_by_name(llvm::Value *a, llvm::Value *b) {
+ if (a->hasName() && b->hasName()) {
+ return -1 == a->getName().compare(b->getName());
+ } else if (a->hasName() && !b->hasName()) {
+ return true;
+ } else if (!a->hasName() && b->hasName()) {
+ return false;
+ } else {
+ // Better than nothing, but not stable
+ return a < b;
+ }
+}
+
+// Conservatively identifies any definitions which might be live at the
+// given instruction. The analysis is performed immediately before the
+// given instruction. Values defined by that instruction are not considered
+// live. Values used by that instruction are considered live.
+static void analyzeParsePointLiveness(
+ DominatorTree &DT, GCPtrLivenessData &OriginalLivenessData,
+ const CallSite &CS, PartiallyConstructedSafepointRecord &result) {
+ Instruction *inst = CS.getInstruction();
+
+ StatepointLiveSetTy liveset;
+ findLiveSetAtInst(inst, OriginalLivenessData, liveset);
+
+ if (PrintLiveSet) {
+ // Note: This output is used by several of the test cases
+ // The order of elemtns in a set is not stable, put them in a vec and sort
+ // by name
+ SmallVector<Value *, 64> temp;
+ temp.insert(temp.end(), liveset.begin(), liveset.end());
+ std::sort(temp.begin(), temp.end(), order_by_name);
+ errs() << "Live Variables:\n";
+ for (Value *V : temp) {
+ errs() << " " << V->getName(); // no newline
+ V->dump();
+ }
+ }
+ if (PrintLiveSetSize) {
+ errs() << "Safepoint For: " << CS.getCalledValue()->getName() << "\n";
+ errs() << "Number live values: " << liveset.size() << "\n";
+ }
+ result.liveset = liveset;
+}
+
+static Value *findBaseDefiningValue(Value *I);
+
+/// If we can trivially determine that the index specified in the given vector
+/// is a base pointer, return it. In cases where the entire vector is known to
+/// consist of base pointers, the entire vector will be returned. This
+/// indicates that the relevant extractelement is a valid base pointer and
+/// should be used directly.
+static Value *findBaseOfVector(Value *I, Value *Index) {
+ assert(I->getType()->isVectorTy() &&
+ cast<VectorType>(I->getType())->getElementType()->isPointerTy() &&
+ "Illegal to ask for the base pointer of a non-pointer type");
+
+ // Each case parallels findBaseDefiningValue below, see that code for
+ // detailed motivation.
+
+ if (isa<Argument>(I))
+ // An incoming argument to the function is a base pointer
+ return I;
+
+ // We shouldn't see the address of a global as a vector value?
+ assert(!isa<GlobalVariable>(I) &&
+ "unexpected global variable found in base of vector");
+
+ // inlining could possibly introduce phi node that contains
+ // undef if callee has multiple returns
+ if (isa<UndefValue>(I))
+ // utterly meaningless, but useful for dealing with partially optimized
+ // code.
+ return I;
+
+ // Due to inheritance, this must be _after_ the global variable and undef
+ // checks
+ if (Constant *Con = dyn_cast<Constant>(I)) {
+ assert(!isa<GlobalVariable>(I) && !isa<UndefValue>(I) &&
+ "order of checks wrong!");
+ assert(Con->isNullValue() && "null is the only case which makes sense");
+ return Con;
+ }
+
+ if (isa<LoadInst>(I))
+ return I;
+
+ // For an insert element, we might be able to look through it if we know
+ // something about the indexes, but if the indices are arbitrary values, we
+ // can't without much more extensive scalarization.
+ if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(I)) {
+ Value *InsertIndex = IEI->getOperand(2);
+ // This index is inserting the value, look for it's base
+ if (InsertIndex == Index)
+ return findBaseDefiningValue(IEI->getOperand(1));
+ // Both constant, and can't be equal per above. This insert is definitely
+ // not relevant, look back at the rest of the vector and keep trying.
+ if (isa<ConstantInt>(Index) && isa<ConstantInt>(InsertIndex))
+ return findBaseOfVector(IEI->getOperand(0), Index);
+ }
+
+ // Note: This code is currently rather incomplete. We are essentially only
+ // handling cases where the vector element is trivially a base pointer. We
+ // need to update the entire base pointer construction algorithm to know how
+ // to track vector elements and potentially scalarize, but the case which
+ // would motivate the work hasn't shown up in real workloads yet.
+ llvm_unreachable("no base found for vector element");
+}
+
+/// Helper function for findBasePointer - Will return a value which either a)
+/// defines the base pointer for the input or b) blocks the simple search
+/// (i.e. a PHI or Select of two derived pointers)
+static Value *findBaseDefiningValue(Value *I) {
+ assert(I->getType()->isPointerTy() &&
+ "Illegal to ask for the base pointer of a non-pointer type");
+
+ // This case is a bit of a hack - it only handles extracts from vectors which
+ // trivially contain only base pointers or cases where we can directly match
+ // the index of the original extract element to an insertion into the vector.
+ // See note inside the function for how to improve this.
+ if (auto *EEI = dyn_cast<ExtractElementInst>(I)) {
+ Value *VectorOperand = EEI->getVectorOperand();
+ Value *Index = EEI->getIndexOperand();
+ Value *VectorBase = findBaseOfVector(VectorOperand, Index);
+ // If the result returned is a vector, we know the entire vector must
+ // contain base pointers. In that case, the extractelement is a valid base
+ // for this value.
+ if (VectorBase->getType()->isVectorTy())
+ return EEI;
+ // Otherwise, we needed to look through the vector to find the base for
+ // this particular element.
+ assert(VectorBase->getType()->isPointerTy());
+ return VectorBase;
+ }
+
+ if (isa<Argument>(I))
+ // An incoming argument to the function is a base pointer
+ // We should have never reached here if this argument isn't an gc value
+ return I;
+
+ if (isa<GlobalVariable>(I))
+ // base case
+ return I;
+
+ // inlining could possibly introduce phi node that contains
+ // undef if callee has multiple returns
+ if (isa<UndefValue>(I))
+ // utterly meaningless, but useful for dealing with
+ // partially optimized code.
+ return I;
+
+ // Due to inheritance, this must be _after_ the global variable and undef
+ // checks
+ if (Constant *Con = dyn_cast<Constant>(I)) {
+ assert(!isa<GlobalVariable>(I) && !isa<UndefValue>(I) &&
+ "order of checks wrong!");
+ // Note: Finding a constant base for something marked for relocation
+ // doesn't really make sense. The most likely case is either a) some
+ // screwed up the address space usage or b) your validating against
+ // compiled C++ code w/o the proper separation. The only real exception
+ // is a null pointer. You could have generic code written to index of
+ // off a potentially null value and have proven it null. We also use
+ // null pointers in dead paths of relocation phis (which we might later
+ // want to find a base pointer for).
+ assert(isa<ConstantPointerNull>(Con) &&
+ "null is the only case which makes sense");
+ return Con;
+ }
+
+ if (CastInst *CI = dyn_cast<CastInst>(I)) {
+ Value *Def = CI->stripPointerCasts();
+ // If we find a cast instruction here, it means we've found a cast which is
+ // not simply a pointer cast (i.e. an inttoptr). We don't know how to
+ // handle int->ptr conversion.
+ assert(!isa<CastInst>(Def) && "shouldn't find another cast here");
+ return findBaseDefiningValue(Def);
+ }
+
+ if (isa<LoadInst>(I))
+ return I; // The value loaded is an gc base itself
+
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I))
+ // The base of this GEP is the base
+ return findBaseDefiningValue(GEP->getPointerOperand());
+
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::experimental_gc_result_ptr:
+ default:
+ // fall through to general call handling
+ break;
+ case Intrinsic::experimental_gc_statepoint:
+ case Intrinsic::experimental_gc_result_float:
+ case Intrinsic::experimental_gc_result_int:
+ llvm_unreachable("these don't produce pointers");
+ case Intrinsic::experimental_gc_relocate: {
+ // Rerunning safepoint insertion after safepoints are already
+ // inserted is not supported. It could probably be made to work,
+ // but why are you doing this? There's no good reason.
+ llvm_unreachable("repeat safepoint insertion is not supported");
+ }
+ case Intrinsic::gcroot:
+ // Currently, this mechanism hasn't been extended to work with gcroot.
+ // There's no reason it couldn't be, but I haven't thought about the
+ // implications much.
+ llvm_unreachable(
+ "interaction with the gcroot mechanism is not supported");
+ }
+ }
+ // We assume that functions in the source language only return base
+ // pointers. This should probably be generalized via attributes to support
+ // both source language and internal functions.
+ if (isa<CallInst>(I) || isa<InvokeInst>(I))
+ return I;
+
+ // I have absolutely no idea how to implement this part yet. It's not
+ // neccessarily hard, I just haven't really looked at it yet.
+ assert(!isa<LandingPadInst>(I) && "Landing Pad is unimplemented");
+
+ if (isa<AtomicCmpXchgInst>(I))
+ // A CAS is effectively a atomic store and load combined under a
+ // predicate. From the perspective of base pointers, we just treat it
+ // like a load.
+ return I;
+
+ assert(!isa<AtomicRMWInst>(I) && "Xchg handled above, all others are "
+ "binary ops which don't apply to pointers");
+
+ // The aggregate ops. Aggregates can either be in the heap or on the
+ // stack, but in either case, this is simply a field load. As a result,
+ // this is a defining definition of the base just like a load is.
+ if (isa<ExtractValueInst>(I))
+ return I;
+
+ // We should never see an insert vector since that would require we be
+ // tracing back a struct value not a pointer value.
+ assert(!isa<InsertValueInst>(I) &&
+ "Base pointer for a struct is meaningless");
+
+ // The last two cases here don't return a base pointer. Instead, they
+ // return a value which dynamically selects from amoung several base
+ // derived pointers (each with it's own base potentially). It's the job of
+ // the caller to resolve these.
+ assert((isa<SelectInst>(I) || isa<PHINode>(I)) &&
+ "missing instruction case in findBaseDefiningValing");
+ return I;
+}
+
+/// Returns the base defining value for this value.
+static Value *findBaseDefiningValueCached(Value *I, DefiningValueMapTy &Cache) {
+ Value *&Cached = Cache[I];
+ if (!Cached) {
+ Cached = findBaseDefiningValue(I);
+ }
+ assert(Cache[I] != nullptr);
+
+ if (TraceLSP) {
+ dbgs() << "fBDV-cached: " << I->getName() << " -> " << Cached->getName()
+ << "\n";
+ }
+ return Cached;
+}
+
+/// Return a base pointer for this value if known. Otherwise, return it's
+/// base defining value.
+static Value *findBaseOrBDV(Value *I, DefiningValueMapTy &Cache) {
+ Value *Def = findBaseDefiningValueCached(I, Cache);
+ auto Found = Cache.find(Def);
+ if (Found != Cache.end()) {
+ // Either a base-of relation, or a self reference. Caller must check.
+ return Found->second;
+ }
+ // Only a BDV available
+ return Def;
+}
+
+/// Given the result of a call to findBaseDefiningValue, or findBaseOrBDV,
+/// is it known to be a base pointer? Or do we need to continue searching.
+static bool isKnownBaseResult(Value *V) {
+ if (!isa<PHINode>(V) && !isa<SelectInst>(V)) {
+ // no recursion possible
+ return true;
+ }
+ if (isa<Instruction>(V) &&
+ cast<Instruction>(V)->getMetadata("is_base_value")) {
+ // This is a previously inserted base phi or select. We know
+ // that this is a base value.
+ return true;
+ }
+
+ // We need to keep searching
+ return false;
+}
+
+// TODO: find a better name for this
+namespace {
+class PhiState {
+public:
+ enum Status { Unknown, Base, Conflict };
+
+ PhiState(Status s, Value *b = nullptr) : status(s), base(b) {
+ assert(status != Base || b);
+ }
+ PhiState(Value *b) : status(Base), base(b) {}
+ PhiState() : status(Unknown), base(nullptr) {}
+
+ Status getStatus() const { return status; }
+ Value *getBase() const { return base; }
+
+ bool isBase() const { return getStatus() == Base; }
+ bool isUnknown() const { return getStatus() == Unknown; }
+ bool isConflict() const { return getStatus() == Conflict; }
+
+ bool operator==(const PhiState &other) const {
+ return base == other.base && status == other.status;
+ }
+
+ bool operator!=(const PhiState &other) const { return !(*this == other); }
+
+ void dump() {
+ errs() << status << " (" << base << " - "
+ << (base ? base->getName() : "nullptr") << "): ";
+ }
+
+private:
+ Status status;
+ Value *base; // non null only if status == base
+};
+
+typedef DenseMap<Value *, PhiState> ConflictStateMapTy;
+// Values of type PhiState form a lattice, and this is a helper
+// class that implementes the meet operation. The meat of the meet
+// operation is implemented in MeetPhiStates::pureMeet
+class MeetPhiStates {
+public:
+ // phiStates is a mapping from PHINodes and SelectInst's to PhiStates.
+ explicit MeetPhiStates(const ConflictStateMapTy &phiStates)
+ : phiStates(phiStates) {}
+
+ // Destructively meet the current result with the base V. V can
+ // either be a merge instruction (SelectInst / PHINode), in which
+ // case its status is looked up in the phiStates map; or a regular
+ // SSA value, in which case it is assumed to be a base.
+ void meetWith(Value *V) {
+ PhiState otherState = getStateForBDV(V);
+ assert((MeetPhiStates::pureMeet(otherState, currentResult) ==
+ MeetPhiStates::pureMeet(currentResult, otherState)) &&
+ "math is wrong: meet does not commute!");
+ currentResult = MeetPhiStates::pureMeet(otherState, currentResult);
+ }
+
+ PhiState getResult() const { return currentResult; }
+
+private:
+ const ConflictStateMapTy &phiStates;
+ PhiState currentResult;
+
+ /// Return a phi state for a base defining value. We'll generate a new
+ /// base state for known bases and expect to find a cached state otherwise
+ PhiState getStateForBDV(Value *baseValue) {
+ if (isKnownBaseResult(baseValue)) {
+ return PhiState(baseValue);
+ } else {
+ return lookupFromMap(baseValue);
+ }
+ }
+
+ PhiState lookupFromMap(Value *V) {
+ auto I = phiStates.find(V);
+ assert(I != phiStates.end() && "lookup failed!");
+ return I->second;
+ }
+
+ static PhiState pureMeet(const PhiState &stateA, const PhiState &stateB) {
+ switch (stateA.getStatus()) {
+ case PhiState::Unknown:
+ return stateB;
+
+ case PhiState::Base:
+ assert(stateA.getBase() && "can't be null");
+ if (stateB.isUnknown())
+ return stateA;
+
+ if (stateB.isBase()) {
+ if (stateA.getBase() == stateB.getBase()) {
+ assert(stateA == stateB && "equality broken!");
+ return stateA;
+ }
+ return PhiState(PhiState::Conflict);
+ }
+ assert(stateB.isConflict() && "only three states!");
+ return PhiState(PhiState::Conflict);
+
+ case PhiState::Conflict:
+ return stateA;
+ }
+ llvm_unreachable("only three states!");
+ }
+};
+}
+/// For a given value or instruction, figure out what base ptr it's derived
+/// from. For gc objects, this is simply itself. On success, returns a value
+/// which is the base pointer. (This is reliable and can be used for
+/// relocation.) On failure, returns nullptr.
+static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) {
+ Value *def = findBaseOrBDV(I, cache);
+
+ if (isKnownBaseResult(def)) {
+ return def;
+ }
+
+ // Here's the rough algorithm:
+ // - For every SSA value, construct a mapping to either an actual base
+ // pointer or a PHI which obscures the base pointer.
+ // - Construct a mapping from PHI to unknown TOP state. Use an
+ // optimistic algorithm to propagate base pointer information. Lattice
+ // looks like:
+ // UNKNOWN
+ // b1 b2 b3 b4
+ // CONFLICT
+ // When algorithm terminates, all PHIs will either have a single concrete
+ // base or be in a conflict state.
+ // - For every conflict, insert a dummy PHI node without arguments. Add
+ // these to the base[Instruction] = BasePtr mapping. For every
+ // non-conflict, add the actual base.
+ // - For every conflict, add arguments for the base[a] of each input
+ // arguments.
+ //
+ // Note: A simpler form of this would be to add the conflict form of all
+ // PHIs without running the optimistic algorithm. This would be
+ // analougous to pessimistic data flow and would likely lead to an
+ // overall worse solution.
+
+ ConflictStateMapTy states;
+ states[def] = PhiState();
+ // Recursively fill in all phis & selects reachable from the initial one
+ // for which we don't already know a definite base value for
+ // TODO: This should be rewritten with a worklist
+ bool done = false;
+ while (!done) {
+ done = true;
+ // Since we're adding elements to 'states' as we run, we can't keep
+ // iterators into the set.
+ SmallVector<Value *, 16> Keys;
+ Keys.reserve(states.size());
+ for (auto Pair : states) {
+ Value *V = Pair.first;
+ Keys.push_back(V);
+ }
+ for (Value *v : Keys) {
+ assert(!isKnownBaseResult(v) && "why did it get added?");
+ if (PHINode *phi = dyn_cast<PHINode>(v)) {
+ assert(phi->getNumIncomingValues() > 0 &&
+ "zero input phis are illegal");
+ for (Value *InVal : phi->incoming_values()) {
+ Value *local = findBaseOrBDV(InVal, cache);
+ if (!isKnownBaseResult(local) && states.find(local) == states.end()) {
+ states[local] = PhiState();
+ done = false;
+ }
+ }
+ } else if (SelectInst *sel = dyn_cast<SelectInst>(v)) {
+ Value *local = findBaseOrBDV(sel->getTrueValue(), cache);
+ if (!isKnownBaseResult(local) && states.find(local) == states.end()) {
+ states[local] = PhiState();
+ done = false;
+ }
+ local = findBaseOrBDV(sel->getFalseValue(), cache);
+ if (!isKnownBaseResult(local) && states.find(local) == states.end()) {
+ states[local] = PhiState();
+ done = false;
+ }
+ }
+ }
+ }
+
+ if (TraceLSP) {
+ errs() << "States after initialization:\n";
+ for (auto Pair : states) {
+ Instruction *v = cast<Instruction>(Pair.first);
+ PhiState state = Pair.second;
+ state.dump();
+ v->dump();
+ }
+ }
+
+ // TODO: come back and revisit the state transitions around inputs which
+ // have reached conflict state. The current version seems too conservative.
+
+ bool progress = true;
+ while (progress) {
+#ifndef NDEBUG
+ size_t oldSize = states.size();
+#endif
+ progress = false;
+ // We're only changing keys in this loop, thus safe to keep iterators
+ for (auto Pair : states) {
+ MeetPhiStates calculateMeet(states);
+ Value *v = Pair.first;
+ assert(!isKnownBaseResult(v) && "why did it get added?");
+ if (SelectInst *select = dyn_cast<SelectInst>(v)) {
+ calculateMeet.meetWith(findBaseOrBDV(select->getTrueValue(), cache));
+ calculateMeet.meetWith(findBaseOrBDV(select->getFalseValue(), cache));
+ } else
+ for (Value *Val : cast<PHINode>(v)->incoming_values())
+ calculateMeet.meetWith(findBaseOrBDV(Val, cache));
+
+ PhiState oldState = states[v];
+ PhiState newState = calculateMeet.getResult();
+ if (oldState != newState) {
+ progress = true;
+ states[v] = newState;
+ }
+ }
+
+ assert(oldSize <= states.size());
+ assert(oldSize == states.size() || progress);
+ }
+
+ if (TraceLSP) {
+ errs() << "States after meet iteration:\n";
+ for (auto Pair : states) {
+ Instruction *v = cast<Instruction>(Pair.first);
+ PhiState state = Pair.second;
+ state.dump();
+ v->dump();
+ }
+ }
+
+ // Insert Phis for all conflicts
+ // We want to keep naming deterministic in the loop that follows, so
+ // sort the keys before iteration. This is useful in allowing us to
+ // write stable tests. Note that there is no invalidation issue here.
+ SmallVector<Value *, 16> Keys;
+ Keys.reserve(states.size());
+ for (auto Pair : states) {
+ Value *V = Pair.first;
+ Keys.push_back(V);
+ }
+ std::sort(Keys.begin(), Keys.end(), order_by_name);
+ // TODO: adjust naming patterns to avoid this order of iteration dependency
+ for (Value *V : Keys) {
+ Instruction *v = cast<Instruction>(V);
+ PhiState state = states[V];
+ assert(!isKnownBaseResult(v) && "why did it get added?");
+ assert(!state.isUnknown() && "Optimistic algorithm didn't complete!");
+ if (!state.isConflict())
+ continue;
+
+ if (isa<PHINode>(v)) {
+ int num_preds =
+ std::distance(pred_begin(v->getParent()), pred_end(v->getParent()));
+ assert(num_preds > 0 && "how did we reach here");
+ PHINode *phi = PHINode::Create(v->getType(), num_preds, "base_phi", v);
+ // Add metadata marking this as a base value
+ auto *const_1 = ConstantInt::get(
+ Type::getInt32Ty(
+ v->getParent()->getParent()->getParent()->getContext()),
+ 1);
+ auto MDConst = ConstantAsMetadata::get(const_1);
+ MDNode *md = MDNode::get(
+ v->getParent()->getParent()->getParent()->getContext(), MDConst);
+ phi->setMetadata("is_base_value", md);
+ states[v] = PhiState(PhiState::Conflict, phi);
+ } else {
+ SelectInst *sel = cast<SelectInst>(v);
+ // The undef will be replaced later
+ UndefValue *undef = UndefValue::get(sel->getType());
+ SelectInst *basesel = SelectInst::Create(sel->getCondition(), undef,
+ undef, "base_select", sel);
+ // Add metadata marking this as a base value
+ auto *const_1 = ConstantInt::get(
+ Type::getInt32Ty(
+ v->getParent()->getParent()->getParent()->getContext()),
+ 1);
+ auto MDConst = ConstantAsMetadata::get(const_1);
+ MDNode *md = MDNode::get(
+ v->getParent()->getParent()->getParent()->getContext(), MDConst);
+ basesel->setMetadata("is_base_value", md);
+ states[v] = PhiState(PhiState::Conflict, basesel);
+ }
+ }
+
+ // Fixup all the inputs of the new PHIs
+ for (auto Pair : states) {
+ Instruction *v = cast<Instruction>(Pair.first);
+ PhiState state = Pair.second;
+
+ assert(!isKnownBaseResult(v) && "why did it get added?");
+ assert(!state.isUnknown() && "Optimistic algorithm didn't complete!");
+ if (!state.isConflict())
+ continue;
+
+ if (PHINode *basephi = dyn_cast<PHINode>(state.getBase())) {
+ PHINode *phi = cast<PHINode>(v);
+ unsigned NumPHIValues = phi->getNumIncomingValues();
+ for (unsigned i = 0; i < NumPHIValues; i++) {
+ Value *InVal = phi->getIncomingValue(i);
+ BasicBlock *InBB = phi->getIncomingBlock(i);
+
+ // If we've already seen InBB, add the same incoming value
+ // we added for it earlier. The IR verifier requires phi
+ // nodes with multiple entries from the same basic block
+ // to have the same incoming value for each of those
+ // entries. If we don't do this check here and basephi
+ // has a different type than base, we'll end up adding two
+ // bitcasts (and hence two distinct values) as incoming
+ // values for the same basic block.
+
+ int blockIndex = basephi->getBasicBlockIndex(InBB);
+ if (blockIndex != -1) {
+ Value *oldBase = basephi->getIncomingValue(blockIndex);
+ basephi->addIncoming(oldBase, InBB);
+#ifndef NDEBUG
+ Value *base = findBaseOrBDV(InVal, cache);
+ if (!isKnownBaseResult(base)) {
+ // Either conflict or base.
+ assert(states.count(base));
+ base = states[base].getBase();
+ assert(base != nullptr && "unknown PhiState!");
+ }
+
+ // In essense this assert states: the only way two
+ // values incoming from the same basic block may be
+ // different is by being different bitcasts of the same
+ // value. A cleanup that remains TODO is changing
+ // findBaseOrBDV to return an llvm::Value of the correct
+ // type (and still remain pure). This will remove the
+ // need to add bitcasts.
+ assert(base->stripPointerCasts() == oldBase->stripPointerCasts() &&
+ "sanity -- findBaseOrBDV should be pure!");
+#endif
+ continue;
+ }
+
+ // Find either the defining value for the PHI or the normal base for
+ // a non-phi node
+ Value *base = findBaseOrBDV(InVal, cache);
+ if (!isKnownBaseResult(base)) {
+ // Either conflict or base.
+ assert(states.count(base));
+ base = states[base].getBase();
+ assert(base != nullptr && "unknown PhiState!");
+ }
+ assert(base && "can't be null");
+ // Must use original input BB since base may not be Instruction
+ // The cast is needed since base traversal may strip away bitcasts
+ if (base->getType() != basephi->getType()) {
+ base = new BitCastInst(base, basephi->getType(), "cast",
+ InBB->getTerminator());
+ }
+ basephi->addIncoming(base, InBB);
+ }
+ assert(basephi->getNumIncomingValues() == NumPHIValues);
+ } else {
+ SelectInst *basesel = cast<SelectInst>(state.getBase());
+ SelectInst *sel = cast<SelectInst>(v);
+ // Operand 1 & 2 are true, false path respectively. TODO: refactor to
+ // something more safe and less hacky.
+ for (int i = 1; i <= 2; i++) {
+ Value *InVal = sel->getOperand(i);
+ // Find either the defining value for the PHI or the normal base for
+ // a non-phi node
+ Value *base = findBaseOrBDV(InVal, cache);
+ if (!isKnownBaseResult(base)) {
+ // Either conflict or base.
+ assert(states.count(base));
+ base = states[base].getBase();
+ assert(base != nullptr && "unknown PhiState!");
+ }
+ assert(base && "can't be null");
+ // Must use original input BB since base may not be Instruction
+ // The cast is needed since base traversal may strip away bitcasts
+ if (base->getType() != basesel->getType()) {
+ base = new BitCastInst(base, basesel->getType(), "cast", basesel);
+ }
+ basesel->setOperand(i, base);
+ }
+ }
+ }
+
+ // Cache all of our results so we can cheaply reuse them
+ // NOTE: This is actually two caches: one of the base defining value
+ // relation and one of the base pointer relation! FIXME
+ for (auto item : states) {
+ Value *v = item.first;
+ Value *base = item.second.getBase();
+ assert(v && base);
+ assert(!isKnownBaseResult(v) && "why did it get added?");
+
+ if (TraceLSP) {
+ std::string fromstr =
+ cache.count(v) ? (cache[v]->hasName() ? cache[v]->getName() : "")
+ : "none";
+ errs() << "Updating base value cache"
+ << " for: " << (v->hasName() ? v->getName() : "")
+ << " from: " << fromstr
+ << " to: " << (base->hasName() ? base->getName() : "") << "\n";
+ }
+
+ assert(isKnownBaseResult(base) &&
+ "must be something we 'know' is a base pointer");
+ if (cache.count(v)) {
+ // Once we transition from the BDV relation being store in the cache to
+ // the base relation being stored, it must be stable
+ assert((!isKnownBaseResult(cache[v]) || cache[v] == base) &&
+ "base relation should be stable");
+ }
+ cache[v] = base;
+ }
+ assert(cache.find(def) != cache.end());
+ return cache[def];
+}
+
+// For a set of live pointers (base and/or derived), identify the base
+// pointer of the object which they are derived from. This routine will
+// mutate the IR graph as needed to make the 'base' pointer live at the
+// definition site of 'derived'. This ensures that any use of 'derived' can
+// also use 'base'. This may involve the insertion of a number of
+// additional PHI nodes.
+//
+// preconditions: live is a set of pointer type Values
+//
+// side effects: may insert PHI nodes into the existing CFG, will preserve
+// CFG, will not remove or mutate any existing nodes
+//
+// post condition: PointerToBase contains one (derived, base) pair for every
+// pointer in live. Note that derived can be equal to base if the original
+// pointer was a base pointer.
+static void
+findBasePointers(const StatepointLiveSetTy &live,
+ DenseMap<llvm::Value *, llvm::Value *> &PointerToBase,
+ DominatorTree *DT, DefiningValueMapTy &DVCache) {
+ // For the naming of values inserted to be deterministic - which makes for
+ // much cleaner and more stable tests - we need to assign an order to the
+ // live values. DenseSets do not provide a deterministic order across runs.
+ SmallVector<Value *, 64> Temp;
+ Temp.insert(Temp.end(), live.begin(), live.end());
+ std::sort(Temp.begin(), Temp.end(), order_by_name);
+ for (Value *ptr : Temp) {
+ Value *base = findBasePointer(ptr, DVCache);
+ assert(base && "failed to find base pointer");
+ PointerToBase[ptr] = base;
+ assert((!isa<Instruction>(base) || !isa<Instruction>(ptr) ||
+ DT->dominates(cast<Instruction>(base)->getParent(),
+ cast<Instruction>(ptr)->getParent())) &&
+ "The base we found better dominate the derived pointer");
+
+ // If you see this trip and like to live really dangerously, the code should
+ // be correct, just with idioms the verifier can't handle. You can try
+ // disabling the verifier at your own substaintial risk.
+ assert(!isa<ConstantPointerNull>(base) &&
+ "the relocation code needs adjustment to handle the relocation of "
+ "a null pointer constant without causing false positives in the "
+ "safepoint ir verifier.");
+ }
+}
+
+/// Find the required based pointers (and adjust the live set) for the given
+/// parse point.
+static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache,
+ const CallSite &CS,
+ PartiallyConstructedSafepointRecord &result) {
+ DenseMap<llvm::Value *, llvm::Value *> PointerToBase;
+ findBasePointers(result.liveset, PointerToBase, &DT, DVCache);
+
+ if (PrintBasePointers) {
+ // Note: Need to print these in a stable order since this is checked in
+ // some tests.
+ errs() << "Base Pairs (w/o Relocation):\n";
+ SmallVector<Value *, 64> Temp;
+ Temp.reserve(PointerToBase.size());
+ for (auto Pair : PointerToBase) {
+ Temp.push_back(Pair.first);
+ }
+ std::sort(Temp.begin(), Temp.end(), order_by_name);
+ for (Value *Ptr : Temp) {
+ Value *Base = PointerToBase[Ptr];
+ errs() << " derived %" << Ptr->getName() << " base %" << Base->getName()
+ << "\n";
+ }
+ }
+
+ result.PointerToBase = PointerToBase;
+}
+
+/// Given an updated version of the dataflow liveness results, update the
+/// liveset and base pointer maps for the call site CS.
+static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
+ const CallSite &CS,
+ PartiallyConstructedSafepointRecord &result);
+
+static void recomputeLiveInValues(
+ Function &F, DominatorTree &DT, Pass *P, ArrayRef<CallSite> toUpdate,
+ MutableArrayRef<struct PartiallyConstructedSafepointRecord> records) {
+ // TODO-PERF: reuse the original liveness, then simply run the dataflow
+ // again. The old values are still live and will help it stablize quickly.
+ GCPtrLivenessData RevisedLivenessData;
+ computeLiveInValues(DT, F, RevisedLivenessData);
+ for (size_t i = 0; i < records.size(); i++) {
+ struct PartiallyConstructedSafepointRecord &info = records[i];
+ const CallSite &CS = toUpdate[i];
+ recomputeLiveInValues(RevisedLivenessData, CS, info);
+ }
+}
+
+// When inserting gc.relocate calls, we need to ensure there are no uses
+// of the original value between the gc.statepoint and the gc.relocate call.
+// One case which can arise is a phi node starting one of the successor blocks.
+// We also need to be able to insert the gc.relocates only on the path which
+// goes through the statepoint. We might need to split an edge to make this
+// possible.
+static BasicBlock *
+normalizeForInvokeSafepoint(BasicBlock *BB, BasicBlock *InvokeParent, Pass *P) {
+ DominatorTree *DT = nullptr;
+ if (auto *DTP = P->getAnalysisIfAvailable<DominatorTreeWrapperPass>())
+ DT = &DTP->getDomTree();
+
+ BasicBlock *Ret = BB;
+ if (!BB->getUniquePredecessor()) {
+ Ret = SplitBlockPredecessors(BB, InvokeParent, "", nullptr, DT);
+ }
+
+ // Now that 'ret' has unique predecessor we can safely remove all phi nodes
+ // from it
+ FoldSingleEntryPHINodes(Ret);
+ assert(!isa<PHINode>(Ret->begin()));
+
+ // At this point, we can safely insert a gc.relocate as the first instruction
+ // in Ret if needed.
+ return Ret;
+}
+
+static int find_index(ArrayRef<Value *> livevec, Value *val) {
+ auto itr = std::find(livevec.begin(), livevec.end(), val);
+ assert(livevec.end() != itr);
+ size_t index = std::distance(livevec.begin(), itr);
+ assert(index < livevec.size());
+ return index;
+}
+
+// Create new attribute set containing only attributes which can be transfered
+// from original call to the safepoint.
+static AttributeSet legalizeCallAttributes(AttributeSet AS) {
+ AttributeSet ret;
+
+ for (unsigned Slot = 0; Slot < AS.getNumSlots(); Slot++) {
+ unsigned index = AS.getSlotIndex(Slot);
+
+ if (index == AttributeSet::ReturnIndex ||
+ index == AttributeSet::FunctionIndex) {
+
+ for (auto it = AS.begin(Slot), it_end = AS.end(Slot); it != it_end;
+ ++it) {
+ Attribute attr = *it;
+
+ // Do not allow certain attributes - just skip them
+ // Safepoint can not be read only or read none.
+ if (attr.hasAttribute(Attribute::ReadNone) ||
+ attr.hasAttribute(Attribute::ReadOnly))
+ continue;
+
+ ret = ret.addAttributes(
+ AS.getContext(), index,
+ AttributeSet::get(AS.getContext(), index, AttrBuilder(attr)));
+ }
+ }
+
+ // Just skip parameter attributes for now
+ }
+
+ return ret;
+}
+
+/// Helper function to place all gc relocates necessary for the given
+/// statepoint.
+/// Inputs:
+/// liveVariables - list of variables to be relocated.
+/// liveStart - index of the first live variable.
+/// basePtrs - base pointers.
+/// statepointToken - statepoint instruction to which relocates should be
+/// bound.
+/// Builder - Llvm IR builder to be used to construct new calls.
+static void CreateGCRelocates(ArrayRef<llvm::Value *> LiveVariables,
+ const int LiveStart,
+ ArrayRef<llvm::Value *> BasePtrs,
+ Instruction *StatepointToken,
+ IRBuilder<> Builder) {
+ SmallVector<Instruction *, 64> NewDefs;
+ NewDefs.reserve(LiveVariables.size());
+
+ Module *M = StatepointToken->getParent()->getParent()->getParent();
+
+ for (unsigned i = 0; i < LiveVariables.size(); i++) {
+ // We generate a (potentially) unique declaration for every pointer type
+ // combination. This results is some blow up the function declarations in
+ // the IR, but removes the need for argument bitcasts which shrinks the IR
+ // greatly and makes it much more readable.
+ SmallVector<Type *, 1> Types; // one per 'any' type
+ // All gc_relocate are set to i8 addrspace(1)* type. This could help avoid
+ // cases where the actual value's type mangling is not supported by llvm. A
+ // bitcast is added later to convert gc_relocate to the actual value's type.
+ Types.push_back(Type::getInt8PtrTy(M->getContext(), 1));
+ Value *GCRelocateDecl = Intrinsic::getDeclaration(
+ M, Intrinsic::experimental_gc_relocate, Types);
+
+ // Generate the gc.relocate call and save the result
+ Value *BaseIdx =
+ ConstantInt::get(Type::getInt32Ty(M->getContext()),
+ LiveStart + find_index(LiveVariables, BasePtrs[i]));
+ Value *LiveIdx = ConstantInt::get(
+ Type::getInt32Ty(M->getContext()),
+ LiveStart + find_index(LiveVariables, LiveVariables[i]));
+
+ // only specify a debug name if we can give a useful one
+ Value *Reloc = Builder.CreateCall(
+ GCRelocateDecl, {StatepointToken, BaseIdx, LiveIdx},
+ LiveVariables[i]->hasName() ? LiveVariables[i]->getName() + ".relocated"
+ : "");
+ // Trick CodeGen into thinking there are lots of free registers at this
+ // fake call.
+ cast<CallInst>(Reloc)->setCallingConv(CallingConv::Cold);
+
+ NewDefs.push_back(cast<Instruction>(Reloc));
+ }
+ assert(NewDefs.size() == LiveVariables.size() &&
+ "missing or extra redefinition at safepoint");
+}
+
+static void
+makeStatepointExplicitImpl(const CallSite &CS, /* to replace */
+ const SmallVectorImpl<llvm::Value *> &basePtrs,
+ const SmallVectorImpl<llvm::Value *> &liveVariables,
+ Pass *P,
+ PartiallyConstructedSafepointRecord &result) {
+ assert(basePtrs.size() == liveVariables.size());
+ assert(isStatepoint(CS) &&
+ "This method expects to be rewriting a statepoint");
+
+ BasicBlock *BB = CS.getInstruction()->getParent();
+ assert(BB);
+ Function *F = BB->getParent();
+ assert(F && "must be set");
+ Module *M = F->getParent();
+ (void)M;
+ assert(M && "must be set");
+
+ // We're not changing the function signature of the statepoint since the gc
+ // arguments go into the var args section.
+ Function *gc_statepoint_decl = CS.getCalledFunction();
+
+ // Then go ahead and use the builder do actually do the inserts. We insert
+ // immediately before the previous instruction under the assumption that all
+ // arguments will be available here. We can't insert afterwards since we may
+ // be replacing a terminator.
+ Instruction *insertBefore = CS.getInstruction();
+ IRBuilder<> Builder(insertBefore);
+ // Copy all of the arguments from the original statepoint - this includes the
+ // target, call args, and deopt args
+ SmallVector<llvm::Value *, 64> args;
+ args.insert(args.end(), CS.arg_begin(), CS.arg_end());
+ // TODO: Clear the 'needs rewrite' flag
+
+ // add all the pointers to be relocated (gc arguments)
+ // Capture the start of the live variable list for use in the gc_relocates
+ const int live_start = args.size();
+ args.insert(args.end(), liveVariables.begin(), liveVariables.end());
+
+ // Create the statepoint given all the arguments
+ Instruction *token = nullptr;
+ AttributeSet return_attributes;
+ if (CS.isCall()) {
+ CallInst *toReplace = cast<CallInst>(CS.getInstruction());
+ CallInst *call =
+ Builder.CreateCall(gc_statepoint_decl, args, "safepoint_token");
+ call->setTailCall(toReplace->isTailCall());
+ call->setCallingConv(toReplace->getCallingConv());
+
+ // Currently we will fail on parameter attributes and on certain
+ // function attributes.
+ AttributeSet new_attrs = legalizeCallAttributes(toReplace->getAttributes());
+ // In case if we can handle this set of sttributes - set up function attrs
+ // directly on statepoint and return attrs later for gc_result intrinsic.
+ call->setAttributes(new_attrs.getFnAttributes());
+ return_attributes = new_attrs.getRetAttributes();
+
+ token = call;
+
+ // Put the following gc_result and gc_relocate calls immediately after the
+ // the old call (which we're about to delete)
+ BasicBlock::iterator next(toReplace);
+ assert(BB->end() != next && "not a terminator, must have next");
+ next++;
+ Instruction *IP = &*(next);
+ Builder.SetInsertPoint(IP);
+ Builder.SetCurrentDebugLocation(IP->getDebugLoc());
+
+ } else {
+ InvokeInst *toReplace = cast<InvokeInst>(CS.getInstruction());
+
+ // Insert the new invoke into the old block. We'll remove the old one in a
+ // moment at which point this will become the new terminator for the
+ // original block.
+ InvokeInst *invoke = InvokeInst::Create(
+ gc_statepoint_decl, toReplace->getNormalDest(),
+ toReplace->getUnwindDest(), args, "", toReplace->getParent());
+ invoke->setCallingConv(toReplace->getCallingConv());
+
+ // Currently we will fail on parameter attributes and on certain
+ // function attributes.
+ AttributeSet new_attrs = legalizeCallAttributes(toReplace->getAttributes());
+ // In case if we can handle this set of sttributes - set up function attrs
+ // directly on statepoint and return attrs later for gc_result intrinsic.
+ invoke->setAttributes(new_attrs.getFnAttributes());
+ return_attributes = new_attrs.getRetAttributes();
+
+ token = invoke;
+
+ // Generate gc relocates in exceptional path
+ BasicBlock *unwindBlock = toReplace->getUnwindDest();
+ assert(!isa<PHINode>(unwindBlock->begin()) &&
+ unwindBlock->getUniquePredecessor() &&
+ "can't safely insert in this block!");
+
+ Instruction *IP = &*(unwindBlock->getFirstInsertionPt());
+ Builder.SetInsertPoint(IP);
+ Builder.SetCurrentDebugLocation(toReplace->getDebugLoc());
+
+ // Extract second element from landingpad return value. We will attach
+ // exceptional gc relocates to it.
+ const unsigned idx = 1;
+ Instruction *exceptional_token =
+ cast<Instruction>(Builder.CreateExtractValue(
+ unwindBlock->getLandingPadInst(), idx, "relocate_token"));
+ result.UnwindToken = exceptional_token;
+
+ // Just throw away return value. We will use the one we got for normal
+ // block.
+ (void)CreateGCRelocates(liveVariables, live_start, basePtrs,
+ exceptional_token, Builder);
+
+ // Generate gc relocates and returns for normal block
+ BasicBlock *normalDest = toReplace->getNormalDest();
+ assert(!isa<PHINode>(normalDest->begin()) &&
+ normalDest->getUniquePredecessor() &&
+ "can't safely insert in this block!");
+
+ IP = &*(normalDest->getFirstInsertionPt());
+ Builder.SetInsertPoint(IP);
+
+ // gc relocates will be generated later as if it were regular call
+ // statepoint
+ }
+ assert(token);
+
+ // Take the name of the original value call if it had one.
+ token->takeName(CS.getInstruction());
+
+// The GCResult is already inserted, we just need to find it
+#ifndef NDEBUG
+ Instruction *toReplace = CS.getInstruction();
+ assert((toReplace->hasNUses(0) || toReplace->hasNUses(1)) &&
+ "only valid use before rewrite is gc.result");
+ assert(!toReplace->hasOneUse() ||
+ isGCResult(cast<Instruction>(*toReplace->user_begin())));
+#endif
+
+ // Update the gc.result of the original statepoint (if any) to use the newly
+ // inserted statepoint. This is safe to do here since the token can't be
+ // considered a live reference.
+ CS.getInstruction()->replaceAllUsesWith(token);
+
+ result.StatepointToken = token;
+
+ // Second, create a gc.relocate for every live variable
+ CreateGCRelocates(liveVariables, live_start, basePtrs, token, Builder);
+}
+
+namespace {
+struct name_ordering {
+ Value *base;
+ Value *derived;
+ bool operator()(name_ordering const &a, name_ordering const &b) {
+ return -1 == a.derived->getName().compare(b.derived->getName());
+ }
+};
+}
+static void stablize_order(SmallVectorImpl<Value *> &basevec,
+ SmallVectorImpl<Value *> &livevec) {
+ assert(basevec.size() == livevec.size());
+
+ SmallVector<name_ordering, 64> temp;
+ for (size_t i = 0; i < basevec.size(); i++) {
+ name_ordering v;
+ v.base = basevec[i];
+ v.derived = livevec[i];
+ temp.push_back(v);
+ }
+ std::sort(temp.begin(), temp.end(), name_ordering());
+ for (size_t i = 0; i < basevec.size(); i++) {
+ basevec[i] = temp[i].base;
+ livevec[i] = temp[i].derived;
+ }
+}
+
+// Replace an existing gc.statepoint with a new one and a set of gc.relocates
+// which make the relocations happening at this safepoint explicit.
+//
+// WARNING: Does not do any fixup to adjust users of the original live
+// values. That's the callers responsibility.
+static void
+makeStatepointExplicit(DominatorTree &DT, const CallSite &CS, Pass *P,
+ PartiallyConstructedSafepointRecord &result) {
+ auto liveset = result.liveset;
+ auto PointerToBase = result.PointerToBase;
+
+ // Convert to vector for efficient cross referencing.
+ SmallVector<Value *, 64> basevec, livevec;
+ livevec.reserve(liveset.size());
+ basevec.reserve(liveset.size());
+ for (Value *L : liveset) {
+ livevec.push_back(L);
+
+ assert(PointerToBase.find(L) != PointerToBase.end());
+ Value *base = PointerToBase[L];
+ basevec.push_back(base);
+ }
+ assert(livevec.size() == basevec.size());
+
+ // To make the output IR slightly more stable (for use in diffs), ensure a
+ // fixed order of the values in the safepoint (by sorting the value name).
+ // The order is otherwise meaningless.
+ stablize_order(basevec, livevec);
+
+ // Do the actual rewriting and delete the old statepoint
+ makeStatepointExplicitImpl(CS, basevec, livevec, P, result);
+ CS.getInstruction()->eraseFromParent();
+}
+
+// Helper function for the relocationViaAlloca.
+// It receives iterator to the statepoint gc relocates and emits store to the
+// assigned
+// location (via allocaMap) for the each one of them.
+// Add visited values into the visitedLiveValues set we will later use them
+// for sanity check.
+static void
+insertRelocationStores(iterator_range<Value::user_iterator> GCRelocs,
+ DenseMap<Value *, Value *> &AllocaMap,
+ DenseSet<Value *> &VisitedLiveValues) {
+
+ for (User *U : GCRelocs) {
+ if (!isa<IntrinsicInst>(U))
+ continue;
+
+ IntrinsicInst *RelocatedValue = cast<IntrinsicInst>(U);
+
+ // We only care about relocates
+ if (RelocatedValue->getIntrinsicID() !=
+ Intrinsic::experimental_gc_relocate) {
+ continue;
+ }
+
+ GCRelocateOperands RelocateOperands(RelocatedValue);
+ Value *OriginalValue =
+ const_cast<Value *>(RelocateOperands.getDerivedPtr());
+ assert(AllocaMap.count(OriginalValue));
+ Value *Alloca = AllocaMap[OriginalValue];
+
+ // Emit store into the related alloca
+ // All gc_relocate are i8 addrspace(1)* typed, and it must be bitcasted to
+ // the correct type according to alloca.
+ assert(RelocatedValue->getNextNode() && "Should always have one since it's not a terminator");
+ IRBuilder<> Builder(RelocatedValue->getNextNode());
+ Value *CastedRelocatedValue =
+ Builder.CreateBitCast(RelocatedValue, cast<AllocaInst>(Alloca)->getAllocatedType(),
+ RelocatedValue->hasName() ? RelocatedValue->getName() + ".casted" : "");
+
+ StoreInst *Store = new StoreInst(CastedRelocatedValue, Alloca);
+ Store->insertAfter(cast<Instruction>(CastedRelocatedValue));
+
+#ifndef NDEBUG
+ VisitedLiveValues.insert(OriginalValue);
+#endif
+ }
+}
+
+// Helper function for the "relocationViaAlloca". Similar to the
+// "insertRelocationStores" but works for rematerialized values.
+static void
+insertRematerializationStores(
+ RematerializedValueMapTy RematerializedValues,
+ DenseMap<Value *, Value *> &AllocaMap,
+ DenseSet<Value *> &VisitedLiveValues) {
+
+ for (auto RematerializedValuePair: RematerializedValues) {
+ Instruction *RematerializedValue = RematerializedValuePair.first;
+ Value *OriginalValue = RematerializedValuePair.second;
+
+ assert(AllocaMap.count(OriginalValue) &&
+ "Can not find alloca for rematerialized value");
+ Value *Alloca = AllocaMap[OriginalValue];
+
+ StoreInst *Store = new StoreInst(RematerializedValue, Alloca);
+ Store->insertAfter(RematerializedValue);
+
+#ifndef NDEBUG
+ VisitedLiveValues.insert(OriginalValue);
+#endif
+ }
+}
+
+/// do all the relocation update via allocas and mem2reg
+static void relocationViaAlloca(
+ Function &F, DominatorTree &DT, ArrayRef<Value *> Live,
+ ArrayRef<struct PartiallyConstructedSafepointRecord> Records) {
+#ifndef NDEBUG
+ // record initial number of (static) allocas; we'll check we have the same
+ // number when we get done.
+ int InitialAllocaNum = 0;
+ for (auto I = F.getEntryBlock().begin(), E = F.getEntryBlock().end(); I != E;
+ I++)
+ if (isa<AllocaInst>(*I))
+ InitialAllocaNum++;
+#endif
+
+ // TODO-PERF: change data structures, reserve
+ DenseMap<Value *, Value *> AllocaMap;
+ SmallVector<AllocaInst *, 200> PromotableAllocas;
+ // Used later to chack that we have enough allocas to store all values
+ std::size_t NumRematerializedValues = 0;
+ PromotableAllocas.reserve(Live.size());
+
+ // Emit alloca for "LiveValue" and record it in "allocaMap" and
+ // "PromotableAllocas"
+ auto emitAllocaFor = [&](Value *LiveValue) {
+ AllocaInst *Alloca = new AllocaInst(LiveValue->getType(), "",
+ F.getEntryBlock().getFirstNonPHI());
+ AllocaMap[LiveValue] = Alloca;
+ PromotableAllocas.push_back(Alloca);
+ };
+
+ // emit alloca for each live gc pointer
+ for (unsigned i = 0; i < Live.size(); i++) {
+ emitAllocaFor(Live[i]);
+ }
+
+ // emit allocas for rematerialized values
+ for (size_t i = 0; i < Records.size(); i++) {
+ const struct PartiallyConstructedSafepointRecord &Info = Records[i];
+
+ for (auto RematerializedValuePair : Info.RematerializedValues) {
+ Value *OriginalValue = RematerializedValuePair.second;
+ if (AllocaMap.count(OriginalValue) != 0)
+ continue;
+
+ emitAllocaFor(OriginalValue);
+ ++NumRematerializedValues;
+ }
+ }
+
+ // The next two loops are part of the same conceptual operation. We need to
+ // insert a store to the alloca after the original def and at each
+ // redefinition. We need to insert a load before each use. These are split
+ // into distinct loops for performance reasons.
+
+ // update gc pointer after each statepoint
+ // either store a relocated value or null (if no relocated value found for
+ // this gc pointer and it is not a gc_result)
+ // this must happen before we update the statepoint with load of alloca
+ // otherwise we lose the link between statepoint and old def
+ for (size_t i = 0; i < Records.size(); i++) {
+ const struct PartiallyConstructedSafepointRecord &Info = Records[i];
+ Value *Statepoint = Info.StatepointToken;
+
+ // This will be used for consistency check
+ DenseSet<Value *> VisitedLiveValues;
+
+ // Insert stores for normal statepoint gc relocates
+ insertRelocationStores(Statepoint->users(), AllocaMap, VisitedLiveValues);
+
+ // In case if it was invoke statepoint
+ // we will insert stores for exceptional path gc relocates.
+ if (isa<InvokeInst>(Statepoint)) {
+ insertRelocationStores(Info.UnwindToken->users(), AllocaMap,
+ VisitedLiveValues);
+ }
+
+ // Do similar thing with rematerialized values
+ insertRematerializationStores(Info.RematerializedValues, AllocaMap,
+ VisitedLiveValues);
+
+ if (ClobberNonLive) {
+ // As a debuging aid, pretend that an unrelocated pointer becomes null at
+ // the gc.statepoint. This will turn some subtle GC problems into
+ // slightly easier to debug SEGVs. Note that on large IR files with
+ // lots of gc.statepoints this is extremely costly both memory and time
+ // wise.
+ SmallVector<AllocaInst *, 64> ToClobber;
+ for (auto Pair : AllocaMap) {
+ Value *Def = Pair.first;
+ AllocaInst *Alloca = cast<AllocaInst>(Pair.second);
+
+ // This value was relocated
+ if (VisitedLiveValues.count(Def)) {
+ continue;
+ }
+ ToClobber.push_back(Alloca);
+ }
+
+ auto InsertClobbersAt = [&](Instruction *IP) {
+ for (auto *AI : ToClobber) {
+ auto AIType = cast<PointerType>(AI->getType());
+ auto PT = cast<PointerType>(AIType->getElementType());
+ Constant *CPN = ConstantPointerNull::get(PT);
+ StoreInst *Store = new StoreInst(CPN, AI);
+ Store->insertBefore(IP);
+ }
+ };
+
+ // Insert the clobbering stores. These may get intermixed with the
+ // gc.results and gc.relocates, but that's fine.
+ if (auto II = dyn_cast<InvokeInst>(Statepoint)) {
+ InsertClobbersAt(II->getNormalDest()->getFirstInsertionPt());
+ InsertClobbersAt(II->getUnwindDest()->getFirstInsertionPt());
+ } else {
+ BasicBlock::iterator Next(cast<CallInst>(Statepoint));
+ Next++;
+ InsertClobbersAt(Next);
+ }
+ }
+ }
+ // update use with load allocas and add store for gc_relocated
+ for (auto Pair : AllocaMap) {
+ Value *Def = Pair.first;
+ Value *Alloca = Pair.second;
+
+ // we pre-record the uses of allocas so that we dont have to worry about
+ // later update
+ // that change the user information.
+ SmallVector<Instruction *, 20> Uses;
+ // PERF: trade a linear scan for repeated reallocation
+ Uses.reserve(std::distance(Def->user_begin(), Def->user_end()));
+ for (User *U : Def->users()) {
+ if (!isa<ConstantExpr>(U)) {
+ // If the def has a ConstantExpr use, then the def is either a
+ // ConstantExpr use itself or null. In either case
+ // (recursively in the first, directly in the second), the oop
+ // it is ultimately dependent on is null and this particular
+ // use does not need to be fixed up.
+ Uses.push_back(cast<Instruction>(U));
+ }
+ }
+
+ std::sort(Uses.begin(), Uses.end());
+ auto Last = std::unique(Uses.begin(), Uses.end());
+ Uses.erase(Last, Uses.end());
+
+ for (Instruction *Use : Uses) {
+ if (isa<PHINode>(Use)) {
+ PHINode *Phi = cast<PHINode>(Use);
+ for (unsigned i = 0; i < Phi->getNumIncomingValues(); i++) {
+ if (Def == Phi->getIncomingValue(i)) {
+ LoadInst *Load = new LoadInst(
+ Alloca, "", Phi->getIncomingBlock(i)->getTerminator());
+ Phi->setIncomingValue(i, Load);
+ }
+ }
+ } else {
+ LoadInst *Load = new LoadInst(Alloca, "", Use);
+ Use->replaceUsesOfWith(Def, Load);
+ }
+ }
+
+ // emit store for the initial gc value
+ // store must be inserted after load, otherwise store will be in alloca's
+ // use list and an extra load will be inserted before it
+ StoreInst *Store = new StoreInst(Def, Alloca);
+ if (Instruction *Inst = dyn_cast<Instruction>(Def)) {
+ if (InvokeInst *Invoke = dyn_cast<InvokeInst>(Inst)) {
+ // InvokeInst is a TerminatorInst so the store need to be inserted
+ // into its normal destination block.
+ BasicBlock *NormalDest = Invoke->getNormalDest();
+ Store->insertBefore(NormalDest->getFirstNonPHI());
+ } else {
+ assert(!Inst->isTerminator() &&
+ "The only TerminatorInst that can produce a value is "
+ "InvokeInst which is handled above.");
+ Store->insertAfter(Inst);
+ }
+ } else {
+ assert(isa<Argument>(Def));
+ Store->insertAfter(cast<Instruction>(Alloca));
+ }
+ }
+
+ assert(PromotableAllocas.size() == Live.size() + NumRematerializedValues &&
+ "we must have the same allocas with lives");
+ if (!PromotableAllocas.empty()) {
+ // apply mem2reg to promote alloca to SSA
+ PromoteMemToReg(PromotableAllocas, DT);
+ }
+
+#ifndef NDEBUG
+ for (auto I = F.getEntryBlock().begin(), E = F.getEntryBlock().end(); I != E;
+ I++)
+ if (isa<AllocaInst>(*I))
+ InitialAllocaNum--;
+ assert(InitialAllocaNum == 0 && "We must not introduce any extra allocas");
+#endif
+}
+
+/// Implement a unique function which doesn't require we sort the input
+/// vector. Doing so has the effect of changing the output of a couple of
+/// tests in ways which make them less useful in testing fused safepoints.
+template <typename T> static void unique_unsorted(SmallVectorImpl<T> &Vec) {
+ DenseSet<T> Seen;
+ SmallVector<T, 128> TempVec;
+ TempVec.reserve(Vec.size());
+ for (auto Element : Vec)
+ TempVec.push_back(Element);
+ Vec.clear();
+ for (auto V : TempVec) {
+ if (Seen.insert(V).second) {
+ Vec.push_back(V);
+ }
+ }
+}
+
+/// Insert holders so that each Value is obviously live through the entire
+/// lifetime of the call.
+static void insertUseHolderAfter(CallSite &CS, const ArrayRef<Value *> Values,
+ SmallVectorImpl<CallInst *> &Holders) {
+ if (Values.empty())
+ // No values to hold live, might as well not insert the empty holder
+ return;
+
+ Module *M = CS.getInstruction()->getParent()->getParent()->getParent();
+ // Use a dummy vararg function to actually hold the values live
+ Function *Func = cast<Function>(M->getOrInsertFunction(
+ "__tmp_use", FunctionType::get(Type::getVoidTy(M->getContext()), true)));
+ if (CS.isCall()) {
+ // For call safepoints insert dummy calls right after safepoint
+ BasicBlock::iterator Next(CS.getInstruction());
+ Next++;
+ Holders.push_back(CallInst::Create(Func, Values, "", Next));
+ return;
+ }
+ // For invoke safepooints insert dummy calls both in normal and
+ // exceptional destination blocks
+ auto *II = cast<InvokeInst>(CS.getInstruction());
+ Holders.push_back(CallInst::Create(
+ Func, Values, "", II->getNormalDest()->getFirstInsertionPt()));
+ Holders.push_back(CallInst::Create(
+ Func, Values, "", II->getUnwindDest()->getFirstInsertionPt()));
+}
+
+static void findLiveReferences(
+ Function &F, DominatorTree &DT, Pass *P, ArrayRef<CallSite> toUpdate,
+ MutableArrayRef<struct PartiallyConstructedSafepointRecord> records) {
+ GCPtrLivenessData OriginalLivenessData;
+ computeLiveInValues(DT, F, OriginalLivenessData);
+ for (size_t i = 0; i < records.size(); i++) {
+ struct PartiallyConstructedSafepointRecord &info = records[i];
+ const CallSite &CS = toUpdate[i];
+ analyzeParsePointLiveness(DT, OriginalLivenessData, CS, info);
+ }
+}
+
+/// Remove any vector of pointers from the liveset by scalarizing them over the
+/// statepoint instruction. Adds the scalarized pieces to the liveset. It
+/// would be preferrable to include the vector in the statepoint itself, but
+/// the lowering code currently does not handle that. Extending it would be
+/// slightly non-trivial since it requires a format change. Given how rare
+/// such cases are (for the moment?) scalarizing is an acceptable comprimise.
+static void splitVectorValues(Instruction *StatepointInst,
+ StatepointLiveSetTy &LiveSet, DominatorTree &DT) {
+ SmallVector<Value *, 16> ToSplit;
+ for (Value *V : LiveSet)
+ if (isa<VectorType>(V->getType()))
+ ToSplit.push_back(V);
+
+ if (ToSplit.empty())
+ return;
+
+ Function &F = *(StatepointInst->getParent()->getParent());
+
+ DenseMap<Value *, AllocaInst *> AllocaMap;
+ // First is normal return, second is exceptional return (invoke only)
+ DenseMap<Value *, std::pair<Value *, Value *>> Replacements;
+ for (Value *V : ToSplit) {
+ LiveSet.erase(V);
+
+ AllocaInst *Alloca =
+ new AllocaInst(V->getType(), "", F.getEntryBlock().getFirstNonPHI());
+ AllocaMap[V] = Alloca;
+
+ VectorType *VT = cast<VectorType>(V->getType());
+ IRBuilder<> Builder(StatepointInst);
+ SmallVector<Value *, 16> Elements;
+ for (unsigned i = 0; i < VT->getNumElements(); i++)
+ Elements.push_back(Builder.CreateExtractElement(V, Builder.getInt32(i)));
+ LiveSet.insert(Elements.begin(), Elements.end());
+
+ auto InsertVectorReform = [&](Instruction *IP) {
+ Builder.SetInsertPoint(IP);
+ Builder.SetCurrentDebugLocation(IP->getDebugLoc());
+ Value *ResultVec = UndefValue::get(VT);
+ for (unsigned i = 0; i < VT->getNumElements(); i++)
+ ResultVec = Builder.CreateInsertElement(ResultVec, Elements[i],
+ Builder.getInt32(i));
+ return ResultVec;
+ };
+
+ if (isa<CallInst>(StatepointInst)) {
+ BasicBlock::iterator Next(StatepointInst);
+ Next++;
+ Instruction *IP = &*(Next);
+ Replacements[V].first = InsertVectorReform(IP);
+ Replacements[V].second = nullptr;
+ } else {
+ InvokeInst *Invoke = cast<InvokeInst>(StatepointInst);
+ // We've already normalized - check that we don't have shared destination
+ // blocks
+ BasicBlock *NormalDest = Invoke->getNormalDest();
+ assert(!isa<PHINode>(NormalDest->begin()));
+ BasicBlock *UnwindDest = Invoke->getUnwindDest();
+ assert(!isa<PHINode>(UnwindDest->begin()));
+ // Insert insert element sequences in both successors
+ Instruction *IP = &*(NormalDest->getFirstInsertionPt());
+ Replacements[V].first = InsertVectorReform(IP);
+ IP = &*(UnwindDest->getFirstInsertionPt());
+ Replacements[V].second = InsertVectorReform(IP);
+ }
+ }
+ for (Value *V : ToSplit) {
+ AllocaInst *Alloca = AllocaMap[V];
+
+ // Capture all users before we start mutating use lists
+ SmallVector<Instruction *, 16> Users;
+ for (User *U : V->users())
+ Users.push_back(cast<Instruction>(U));
+
+ for (Instruction *I : Users) {
+ if (auto Phi = dyn_cast<PHINode>(I)) {
+ for (unsigned i = 0; i < Phi->getNumIncomingValues(); i++)
+ if (V == Phi->getIncomingValue(i)) {
+ LoadInst *Load = new LoadInst(
+ Alloca, "", Phi->getIncomingBlock(i)->getTerminator());
+ Phi->setIncomingValue(i, Load);
+ }
+ } else {
+ LoadInst *Load = new LoadInst(Alloca, "", I);
+ I->replaceUsesOfWith(V, Load);
+ }
+ }
+
+ // Store the original value and the replacement value into the alloca
+ StoreInst *Store = new StoreInst(V, Alloca);
+ if (auto I = dyn_cast<Instruction>(V))
+ Store->insertAfter(I);
+ else
+ Store->insertAfter(Alloca);
+
+ // Normal return for invoke, or call return
+ Instruction *Replacement = cast<Instruction>(Replacements[V].first);
+ (new StoreInst(Replacement, Alloca))->insertAfter(Replacement);
+ // Unwind return for invoke only
+ Replacement = cast_or_null<Instruction>(Replacements[V].second);
+ if (Replacement)
+ (new StoreInst(Replacement, Alloca))->insertAfter(Replacement);
+ }
+
+ // apply mem2reg to promote alloca to SSA
+ SmallVector<AllocaInst *, 16> Allocas;
+ for (Value *V : ToSplit)
+ Allocas.push_back(AllocaMap[V]);
+ PromoteMemToReg(Allocas, DT);
+}
+
+// Helper function for the "rematerializeLiveValues". It walks use chain
+// starting from the "CurrentValue" until it meets "BaseValue". Only "simple"
+// values are visited (currently it is GEP's and casts). Returns true if it
+// sucessfully reached "BaseValue" and false otherwise.
+// Fills "ChainToBase" array with all visited values. "BaseValue" is not
+// recorded.
+static bool findRematerializableChainToBasePointer(
+ SmallVectorImpl<Instruction*> &ChainToBase,
+ Value *CurrentValue, Value *BaseValue) {
+
+ // We have found a base value
+ if (CurrentValue == BaseValue) {
+ return true;
+ }
+
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurrentValue)) {
+ ChainToBase.push_back(GEP);
+ return findRematerializableChainToBasePointer(ChainToBase,
+ GEP->getPointerOperand(),
+ BaseValue);
+ }
+
+ if (CastInst *CI = dyn_cast<CastInst>(CurrentValue)) {
+ Value *Def = CI->stripPointerCasts();
+
+ // This two checks are basically similar. First one is here for the
+ // consistency with findBasePointers logic.
+ assert(!isa<CastInst>(Def) && "not a pointer cast found");
+ if (!CI->isNoopCast(CI->getModule()->getDataLayout()))
+ return false;
+
+ ChainToBase.push_back(CI);
+ return findRematerializableChainToBasePointer(ChainToBase, Def, BaseValue);
+ }
+
+ // Not supported instruction in the chain
+ return false;
+}
+
+// Helper function for the "rematerializeLiveValues". Compute cost of the use
+// chain we are going to rematerialize.
+static unsigned
+chainToBasePointerCost(SmallVectorImpl<Instruction*> &Chain,
+ TargetTransformInfo &TTI) {
+ unsigned Cost = 0;
+
+ for (Instruction *Instr : Chain) {
+ if (CastInst *CI = dyn_cast<CastInst>(Instr)) {
+ assert(CI->isNoopCast(CI->getModule()->getDataLayout()) &&
+ "non noop cast is found during rematerialization");
+
+ Type *SrcTy = CI->getOperand(0)->getType();
+ Cost += TTI.getCastInstrCost(CI->getOpcode(), CI->getType(), SrcTy);
+
+ } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Instr)) {
+ // Cost of the address calculation
+ Type *ValTy = GEP->getPointerOperandType()->getPointerElementType();
+ Cost += TTI.getAddressComputationCost(ValTy);
+
+ // And cost of the GEP itself
+ // TODO: Use TTI->getGEPCost here (it exists, but appears to be not
+ // allowed for the external usage)
+ if (!GEP->hasAllConstantIndices())
+ Cost += 2;
+
+ } else {
+ llvm_unreachable("unsupported instruciton type during rematerialization");
+ }
+ }
+
+ return Cost;
+}
+
+// From the statepoint liveset pick values that are cheaper to recompute then to
+// relocate. Remove this values from the liveset, rematerialize them after
+// statepoint and record them in "Info" structure. Note that similar to
+// relocated values we don't do any user adjustments here.
+static void rematerializeLiveValues(CallSite CS,
+ PartiallyConstructedSafepointRecord &Info,
+ TargetTransformInfo &TTI) {
+ const unsigned int ChainLengthThreshold = 10;
+
+ // Record values we are going to delete from this statepoint live set.
+ // We can not di this in following loop due to iterator invalidation.
+ SmallVector<Value *, 32> LiveValuesToBeDeleted;
+
+ for (Value *LiveValue: Info.liveset) {
+ // For each live pointer find it's defining chain
+ SmallVector<Instruction *, 3> ChainToBase;
+ assert(Info.PointerToBase.find(LiveValue) != Info.PointerToBase.end());
+ bool FoundChain =
+ findRematerializableChainToBasePointer(ChainToBase,
+ LiveValue,
+ Info.PointerToBase[LiveValue]);
+ // Nothing to do, or chain is too long
+ if (!FoundChain ||
+ ChainToBase.size() == 0 ||
+ ChainToBase.size() > ChainLengthThreshold)
+ continue;
+
+ // Compute cost of this chain
+ unsigned Cost = chainToBasePointerCost(ChainToBase, TTI);
+ // TODO: We can also account for cases when we will be able to remove some
+ // of the rematerialized values by later optimization passes. I.e if
+ // we rematerialized several intersecting chains. Or if original values
+ // don't have any uses besides this statepoint.
+
+ // For invokes we need to rematerialize each chain twice - for normal and
+ // for unwind basic blocks. Model this by multiplying cost by two.
+ if (CS.isInvoke()) {
+ Cost *= 2;
+ }
+ // If it's too expensive - skip it
+ if (Cost >= RematerializationThreshold)
+ continue;
+
+ // Remove value from the live set
+ LiveValuesToBeDeleted.push_back(LiveValue);
+
+ // Clone instructions and record them inside "Info" structure
+
+ // Walk backwards to visit top-most instructions first
+ std::reverse(ChainToBase.begin(), ChainToBase.end());
+
+ // Utility function which clones all instructions from "ChainToBase"
+ // and inserts them before "InsertBefore". Returns rematerialized value
+ // which should be used after statepoint.
+ auto rematerializeChain = [&ChainToBase](Instruction *InsertBefore) {
+ Instruction *LastClonedValue = nullptr;
+ Instruction *LastValue = nullptr;
+ for (Instruction *Instr: ChainToBase) {
+ // Only GEP's and casts are suported as we need to be careful to not
+ // introduce any new uses of pointers not in the liveset.
+ // Note that it's fine to introduce new uses of pointers which were
+ // otherwise not used after this statepoint.
+ assert(isa<GetElementPtrInst>(Instr) || isa<CastInst>(Instr));
+
+ Instruction *ClonedValue = Instr->clone();
+ ClonedValue->insertBefore(InsertBefore);
+ ClonedValue->setName(Instr->getName() + ".remat");
+
+ // If it is not first instruction in the chain then it uses previously
+ // cloned value. We should update it to use cloned value.
+ if (LastClonedValue) {
+ assert(LastValue);
+ ClonedValue->replaceUsesOfWith(LastValue, LastClonedValue);
+#ifndef NDEBUG
+ // Assert that cloned instruction does not use any instructions from
+ // this chain other than LastClonedValue
+ for (auto OpValue : ClonedValue->operand_values()) {
+ assert(std::find(ChainToBase.begin(), ChainToBase.end(), OpValue) ==
+ ChainToBase.end() &&
+ "incorrect use in rematerialization chain");
+ }
+#endif
+ }
+
+ LastClonedValue = ClonedValue;
+ LastValue = Instr;
+ }
+ assert(LastClonedValue);
+ return LastClonedValue;
+ };
+
+ // Different cases for calls and invokes. For invokes we need to clone
+ // instructions both on normal and unwind path.
+ if (CS.isCall()) {
+ Instruction *InsertBefore = CS.getInstruction()->getNextNode();
+ assert(InsertBefore);
+ Instruction *RematerializedValue = rematerializeChain(InsertBefore);
+ Info.RematerializedValues[RematerializedValue] = LiveValue;
+ } else {
+ InvokeInst *Invoke = cast<InvokeInst>(CS.getInstruction());
+
+ Instruction *NormalInsertBefore =
+ Invoke->getNormalDest()->getFirstInsertionPt();
+ Instruction *UnwindInsertBefore =
+ Invoke->getUnwindDest()->getFirstInsertionPt();
+
+ Instruction *NormalRematerializedValue =
+ rematerializeChain(NormalInsertBefore);
+ Instruction *UnwindRematerializedValue =
+ rematerializeChain(UnwindInsertBefore);
+
+ Info.RematerializedValues[NormalRematerializedValue] = LiveValue;
+ Info.RematerializedValues[UnwindRematerializedValue] = LiveValue;
+ }
+ }
+
+ // Remove rematerializaed values from the live set
+ for (auto LiveValue: LiveValuesToBeDeleted) {
+ Info.liveset.erase(LiveValue);
+ }
+}
+
+static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
+ SmallVectorImpl<CallSite> &toUpdate) {
+#ifndef NDEBUG
+ // sanity check the input
+ std::set<CallSite> uniqued;
+ uniqued.insert(toUpdate.begin(), toUpdate.end());
+ assert(uniqued.size() == toUpdate.size() && "no duplicates please!");
+
+ for (size_t i = 0; i < toUpdate.size(); i++) {
+ CallSite &CS = toUpdate[i];
+ assert(CS.getInstruction()->getParent()->getParent() == &F);
+ assert(isStatepoint(CS) && "expected to already be a deopt statepoint");
+ }
+#endif
+
+ // When inserting gc.relocates for invokes, we need to be able to insert at
+ // the top of the successor blocks. See the comment on
+ // normalForInvokeSafepoint on exactly what is needed. Note that this step
+ // may restructure the CFG.
+ for (CallSite CS : toUpdate) {
+ if (!CS.isInvoke())
+ continue;
+ InvokeInst *invoke = cast<InvokeInst>(CS.getInstruction());
+ normalizeForInvokeSafepoint(invoke->getNormalDest(), invoke->getParent(),
+ P);
+ normalizeForInvokeSafepoint(invoke->getUnwindDest(), invoke->getParent(),
+ P);
+ }
+
+ // A list of dummy calls added to the IR to keep various values obviously
+ // live in the IR. We'll remove all of these when done.
+ SmallVector<CallInst *, 64> holders;
+
+ // Insert a dummy call with all of the arguments to the vm_state we'll need
+ // for the actual safepoint insertion. This ensures reference arguments in
+ // the deopt argument list are considered live through the safepoint (and
+ // thus makes sure they get relocated.)
+ for (size_t i = 0; i < toUpdate.size(); i++) {
+ CallSite &CS = toUpdate[i];
+ Statepoint StatepointCS(CS);
+
+ SmallVector<Value *, 64> DeoptValues;
+ for (Use &U : StatepointCS.vm_state_args()) {
+ Value *Arg = cast<Value>(&U);
+ assert(!isUnhandledGCPointerType(Arg->getType()) &&
+ "support for FCA unimplemented");
+ if (isHandledGCPointerType(Arg->getType()))
+ DeoptValues.push_back(Arg);
+ }
+ insertUseHolderAfter(CS, DeoptValues, holders);
+ }
+
+ SmallVector<struct PartiallyConstructedSafepointRecord, 64> records;
+ records.reserve(toUpdate.size());
+ for (size_t i = 0; i < toUpdate.size(); i++) {
+ struct PartiallyConstructedSafepointRecord info;
+ records.push_back(info);
+ }
+ assert(records.size() == toUpdate.size());
+
+ // A) Identify all gc pointers which are staticly live at the given call
+ // site.
+ findLiveReferences(F, DT, P, toUpdate, records);
+
+ // Do a limited scalarization of any live at safepoint vector values which
+ // contain pointers. This enables this pass to run after vectorization at
+ // the cost of some possible performance loss. TODO: it would be nice to
+ // natively support vectors all the way through the backend so we don't need
+ // to scalarize here.
+ for (size_t i = 0; i < records.size(); i++) {
+ struct PartiallyConstructedSafepointRecord &info = records[i];
+ Instruction *statepoint = toUpdate[i].getInstruction();
+ splitVectorValues(cast<Instruction>(statepoint), info.liveset, DT);
+ }
+
+ // B) Find the base pointers for each live pointer
+ /* scope for caching */ {
+ // Cache the 'defining value' relation used in the computation and
+ // insertion of base phis and selects. This ensures that we don't insert
+ // large numbers of duplicate base_phis.
+ DefiningValueMapTy DVCache;
+
+ for (size_t i = 0; i < records.size(); i++) {
+ struct PartiallyConstructedSafepointRecord &info = records[i];
+ CallSite &CS = toUpdate[i];
+ findBasePointers(DT, DVCache, CS, info);
+ }
+ } // end of cache scope
+
+ // The base phi insertion logic (for any safepoint) may have inserted new
+ // instructions which are now live at some safepoint. The simplest such
+ // example is:
+ // loop:
+ // phi a <-- will be a new base_phi here
+ // safepoint 1 <-- that needs to be live here
+ // gep a + 1
+ // safepoint 2
+ // br loop
+ // We insert some dummy calls after each safepoint to definitely hold live
+ // the base pointers which were identified for that safepoint. We'll then
+ // ask liveness for _every_ base inserted to see what is now live. Then we
+ // remove the dummy calls.
+ holders.reserve(holders.size() + records.size());
+ for (size_t i = 0; i < records.size(); i++) {
+ struct PartiallyConstructedSafepointRecord &info = records[i];
+ CallSite &CS = toUpdate[i];
+
+ SmallVector<Value *, 128> Bases;
+ for (auto Pair : info.PointerToBase) {
+ Bases.push_back(Pair.second);
+ }
+ insertUseHolderAfter(CS, Bases, holders);
+ }
+
+ // By selecting base pointers, we've effectively inserted new uses. Thus, we
+ // need to rerun liveness. We may *also* have inserted new defs, but that's
+ // not the key issue.
+ recomputeLiveInValues(F, DT, P, toUpdate, records);
+
+ if (PrintBasePointers) {
+ for (size_t i = 0; i < records.size(); i++) {
+ struct PartiallyConstructedSafepointRecord &info = records[i];
+ errs() << "Base Pairs: (w/Relocation)\n";
+ for (auto Pair : info.PointerToBase) {
+ errs() << " derived %" << Pair.first->getName() << " base %"
+ << Pair.second->getName() << "\n";
+ }
+ }
+ }
+ for (size_t i = 0; i < holders.size(); i++) {
+ holders[i]->eraseFromParent();
+ holders[i] = nullptr;
+ }
+ holders.clear();
+
+ // In order to reduce live set of statepoint we might choose to rematerialize
+ // some values instead of relocating them. This is purelly an optimization and
+ // does not influence correctness.
+ TargetTransformInfo &TTI =
+ P->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+
+ for (size_t i = 0; i < records.size(); i++) {
+ struct PartiallyConstructedSafepointRecord &info = records[i];
+ CallSite &CS = toUpdate[i];
+
+ rematerializeLiveValues(CS, info, TTI);
+ }
+
+ // Now run through and replace the existing statepoints with new ones with
+ // the live variables listed. We do not yet update uses of the values being
+ // relocated. We have references to live variables that need to
+ // survive to the last iteration of this loop. (By construction, the
+ // previous statepoint can not be a live variable, thus we can and remove
+ // the old statepoint calls as we go.)
+ for (size_t i = 0; i < records.size(); i++) {
+ struct PartiallyConstructedSafepointRecord &info = records[i];
+ CallSite &CS = toUpdate[i];
+ makeStatepointExplicit(DT, CS, P, info);
+ }
+ toUpdate.clear(); // prevent accident use of invalid CallSites
+
+ // Do all the fixups of the original live variables to their relocated selves
+ SmallVector<Value *, 128> live;
+ for (size_t i = 0; i < records.size(); i++) {
+ struct PartiallyConstructedSafepointRecord &info = records[i];
+ // We can't simply save the live set from the original insertion. One of
+ // the live values might be the result of a call which needs a safepoint.
+ // That Value* no longer exists and we need to use the new gc_result.
+ // Thankfully, the liveset is embedded in the statepoint (and updated), so
+ // we just grab that.
+ Statepoint statepoint(info.StatepointToken);
+ live.insert(live.end(), statepoint.gc_args_begin(),
+ statepoint.gc_args_end());
+#ifndef NDEBUG
+ // Do some basic sanity checks on our liveness results before performing
+ // relocation. Relocation can and will turn mistakes in liveness results
+ // into non-sensical code which is must harder to debug.
+ // TODO: It would be nice to test consistency as well
+ assert(DT.isReachableFromEntry(info.StatepointToken->getParent()) &&
+ "statepoint must be reachable or liveness is meaningless");
+ for (Value *V : statepoint.gc_args()) {
+ if (!isa<Instruction>(V))
+ // Non-instruction values trivial dominate all possible uses
+ continue;
+ auto LiveInst = cast<Instruction>(V);
+ assert(DT.isReachableFromEntry(LiveInst->getParent()) &&
+ "unreachable values should never be live");
+ assert(DT.dominates(LiveInst, info.StatepointToken) &&
+ "basic SSA liveness expectation violated by liveness analysis");
+ }
+#endif
+ }
+ unique_unsorted(live);
+
+#ifndef NDEBUG
+ // sanity check
+ for (auto ptr : live) {
+ assert(isGCPointerType(ptr->getType()) && "must be a gc pointer type");
+ }
+#endif
+
+ relocationViaAlloca(F, DT, live, records);
+ return !records.empty();
+}
+
+/// Returns true if this function should be rewritten by this pass. The main
+/// point of this function is as an extension point for custom logic.
+static bool shouldRewriteStatepointsIn(Function &F) {
+ // TODO: This should check the GCStrategy
+ if (F.hasGC()) {
+ const char *FunctionGCName = F.getGC();
+ const StringRef StatepointExampleName("statepoint-example");
+ const StringRef CoreCLRName("coreclr");
+ return (StatepointExampleName == FunctionGCName) ||
+ (CoreCLRName == FunctionGCName);
+ } else
+ return false;
+}
+
+bool RewriteStatepointsForGC::runOnFunction(Function &F) {
+ // Nothing to do for declarations.
+ if (F.isDeclaration() || F.empty())
+ return false;
+
+ // Policy choice says not to rewrite - the most common reason is that we're
+ // compiling code without a GCStrategy.
+ if (!shouldRewriteStatepointsIn(F))
+ return false;
+
+ DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+
+ // Gather all the statepoints which need rewritten. Be careful to only
+ // consider those in reachable code since we need to ask dominance queries
+ // when rewriting. We'll delete the unreachable ones in a moment.
+ SmallVector<CallSite, 64> ParsePointNeeded;
+ bool HasUnreachableStatepoint = false;
+ for (Instruction &I : inst_range(F)) {
+ // TODO: only the ones with the flag set!
+ if (isStatepoint(I)) {
+ if (DT.isReachableFromEntry(I.getParent()))
+ ParsePointNeeded.push_back(CallSite(&I));
+ else
+ HasUnreachableStatepoint = true;
+ }
+ }
+
+ bool MadeChange = false;
+
+ // Delete any unreachable statepoints so that we don't have unrewritten
+ // statepoints surviving this pass. This makes testing easier and the
+ // resulting IR less confusing to human readers. Rather than be fancy, we
+ // just reuse a utility function which removes the unreachable blocks.
+ if (HasUnreachableStatepoint)
+ MadeChange |= removeUnreachableBlocks(F);
+
+ // Return early if no work to do.
+ if (ParsePointNeeded.empty())
+ return MadeChange;
+
+ // As a prepass, go ahead and aggressively destroy single entry phi nodes.
+ // These are created by LCSSA. They have the effect of increasing the size
+ // of liveness sets for no good reason. It may be harder to do this post
+ // insertion since relocations and base phis can confuse things.
+ for (BasicBlock &BB : F)
+ if (BB.getUniquePredecessor()) {
+ MadeChange = true;
+ FoldSingleEntryPHINodes(&BB);
+ }
+
+ MadeChange |= insertParsePoints(F, DT, this, ParsePointNeeded);
+ return MadeChange;
+}
+
+// liveness computation via standard dataflow
+// -------------------------------------------------------------------
+
+// TODO: Consider using bitvectors for liveness, the set of potentially
+// interesting values should be small and easy to pre-compute.
+
+/// Compute the live-in set for the location rbegin starting from
+/// the live-out set of the basic block
+static void computeLiveInValues(BasicBlock::reverse_iterator rbegin,
+ BasicBlock::reverse_iterator rend,
+ DenseSet<Value *> &LiveTmp) {
+
+ for (BasicBlock::reverse_iterator ritr = rbegin; ritr != rend; ritr++) {
+ Instruction *I = &*ritr;
+
+ // KILL/Def - Remove this definition from LiveIn
+ LiveTmp.erase(I);
+
+ // Don't consider *uses* in PHI nodes, we handle their contribution to
+ // predecessor blocks when we seed the LiveOut sets
+ if (isa<PHINode>(I))
+ continue;
+
+ // USE - Add to the LiveIn set for this instruction
+ for (Value *V : I->operands()) {
+ assert(!isUnhandledGCPointerType(V->getType()) &&
+ "support for FCA unimplemented");
+ if (isHandledGCPointerType(V->getType()) && !isa<Constant>(V)) {
+ // The choice to exclude all things constant here is slightly subtle.
+ // There are two idependent reasons:
+ // - We assume that things which are constant (from LLVM's definition)
+ // do not move at runtime. For example, the address of a global
+ // variable is fixed, even though it's contents may not be.
+ // - Second, we can't disallow arbitrary inttoptr constants even
+ // if the language frontend does. Optimization passes are free to
+ // locally exploit facts without respect to global reachability. This
+ // can create sections of code which are dynamically unreachable and
+ // contain just about anything. (see constants.ll in tests)
+ LiveTmp.insert(V);
+ }
+ }
+ }
+}
+
+static void computeLiveOutSeed(BasicBlock *BB, DenseSet<Value *> &LiveTmp) {
+
+ for (BasicBlock *Succ : successors(BB)) {
+ const BasicBlock::iterator E(Succ->getFirstNonPHI());
+ for (BasicBlock::iterator I = Succ->begin(); I != E; I++) {
+ PHINode *Phi = cast<PHINode>(&*I);
+ Value *V = Phi->getIncomingValueForBlock(BB);
+ assert(!isUnhandledGCPointerType(V->getType()) &&
+ "support for FCA unimplemented");
+ if (isHandledGCPointerType(V->getType()) && !isa<Constant>(V)) {
+ LiveTmp.insert(V);
+ }
+ }
+ }
+}
+
+static DenseSet<Value *> computeKillSet(BasicBlock *BB) {
+ DenseSet<Value *> KillSet;
+ for (Instruction &I : *BB)
+ if (isHandledGCPointerType(I.getType()))
+ KillSet.insert(&I);
+ return KillSet;
+}
+
+#ifndef NDEBUG
+/// Check that the items in 'Live' dominate 'TI'. This is used as a basic
+/// sanity check for the liveness computation.
+static void checkBasicSSA(DominatorTree &DT, DenseSet<Value *> &Live,
+ TerminatorInst *TI, bool TermOkay = false) {
+ for (Value *V : Live) {
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ // The terminator can be a member of the LiveOut set. LLVM's definition
+ // of instruction dominance states that V does not dominate itself. As
+ // such, we need to special case this to allow it.
+ if (TermOkay && TI == I)
+ continue;
+ assert(DT.dominates(I, TI) &&
+ "basic SSA liveness expectation violated by liveness analysis");
+ }
+ }
+}
+
+/// Check that all the liveness sets used during the computation of liveness
+/// obey basic SSA properties. This is useful for finding cases where we miss
+/// a def.
+static void checkBasicSSA(DominatorTree &DT, GCPtrLivenessData &Data,
+ BasicBlock &BB) {
+ checkBasicSSA(DT, Data.LiveSet[&BB], BB.getTerminator());
+ checkBasicSSA(DT, Data.LiveOut[&BB], BB.getTerminator(), true);
+ checkBasicSSA(DT, Data.LiveIn[&BB], BB.getTerminator());
+}
+#endif
+
+static void computeLiveInValues(DominatorTree &DT, Function &F,
+ GCPtrLivenessData &Data) {
+
+ SmallSetVector<BasicBlock *, 200> Worklist;
+ auto AddPredsToWorklist = [&](BasicBlock *BB) {
+ // We use a SetVector so that we don't have duplicates in the worklist.
+ Worklist.insert(pred_begin(BB), pred_end(BB));
+ };
+ auto NextItem = [&]() {
+ BasicBlock *BB = Worklist.back();
+ Worklist.pop_back();
+ return BB;
+ };
+
+ // Seed the liveness for each individual block
+ for (BasicBlock &BB : F) {
+ Data.KillSet[&BB] = computeKillSet(&BB);
+ Data.LiveSet[&BB].clear();
+ computeLiveInValues(BB.rbegin(), BB.rend(), Data.LiveSet[&BB]);
+
+#ifndef NDEBUG
+ for (Value *Kill : Data.KillSet[&BB])
+ assert(!Data.LiveSet[&BB].count(Kill) && "live set contains kill");
+#endif
+
+ Data.LiveOut[&BB] = DenseSet<Value *>();
+ computeLiveOutSeed(&BB, Data.LiveOut[&BB]);
+ Data.LiveIn[&BB] = Data.LiveSet[&BB];
+ set_union(Data.LiveIn[&BB], Data.LiveOut[&BB]);
+ set_subtract(Data.LiveIn[&BB], Data.KillSet[&BB]);
+ if (!Data.LiveIn[&BB].empty())
+ AddPredsToWorklist(&BB);
+ }
+
+ // Propagate that liveness until stable
+ while (!Worklist.empty()) {
+ BasicBlock *BB = NextItem();
+
+ // Compute our new liveout set, then exit early if it hasn't changed
+ // despite the contribution of our successor.
+ DenseSet<Value *> LiveOut = Data.LiveOut[BB];
+ const auto OldLiveOutSize = LiveOut.size();
+ for (BasicBlock *Succ : successors(BB)) {
+ assert(Data.LiveIn.count(Succ));
+ set_union(LiveOut, Data.LiveIn[Succ]);
+ }
+ // assert OutLiveOut is a subset of LiveOut
+ if (OldLiveOutSize == LiveOut.size()) {
+ // If the sets are the same size, then we didn't actually add anything
+ // when unioning our successors LiveIn Thus, the LiveIn of this block
+ // hasn't changed.
+ continue;
+ }
+ Data.LiveOut[BB] = LiveOut;
+
+ // Apply the effects of this basic block
+ DenseSet<Value *> LiveTmp = LiveOut;
+ set_union(LiveTmp, Data.LiveSet[BB]);
+ set_subtract(LiveTmp, Data.KillSet[BB]);
+
+ assert(Data.LiveIn.count(BB));
+ const DenseSet<Value *> &OldLiveIn = Data.LiveIn[BB];
+ // assert: OldLiveIn is a subset of LiveTmp
+ if (OldLiveIn.size() != LiveTmp.size()) {
+ Data.LiveIn[BB] = LiveTmp;
+ AddPredsToWorklist(BB);
+ }
+ } // while( !worklist.empty() )
+
+#ifndef NDEBUG
+ // Sanity check our ouput against SSA properties. This helps catch any
+ // missing kills during the above iteration.
+ for (BasicBlock &BB : F) {
+ checkBasicSSA(DT, Data, BB);
+ }
+#endif
+}
+
+static void findLiveSetAtInst(Instruction *Inst, GCPtrLivenessData &Data,
+ StatepointLiveSetTy &Out) {
+
+ BasicBlock *BB = Inst->getParent();
+
+ // Note: The copy is intentional and required
+ assert(Data.LiveOut.count(BB));
+ DenseSet<Value *> LiveOut = Data.LiveOut[BB];
+
+ // We want to handle the statepoint itself oddly. It's
+ // call result is not live (normal), nor are it's arguments
+ // (unless they're used again later). This adjustment is
+ // specifically what we need to relocate
+ BasicBlock::reverse_iterator rend(Inst);
+ computeLiveInValues(BB->rbegin(), rend, LiveOut);
+ LiveOut.erase(Inst);
+ Out.insert(LiveOut.begin(), LiveOut.end());
+}
+
+static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
+ const CallSite &CS,
+ PartiallyConstructedSafepointRecord &Info) {
+ Instruction *Inst = CS.getInstruction();
+ StatepointLiveSetTy Updated;
+ findLiveSetAtInst(Inst, RevisedLivenessData, Updated);
+
+#ifndef NDEBUG
+ DenseSet<Value *> Bases;
+ for (auto KVPair : Info.PointerToBase) {
+ Bases.insert(KVPair.second);
+ }
+#endif
+ // We may have base pointers which are now live that weren't before. We need
+ // to update the PointerToBase structure to reflect this.
+ for (auto V : Updated)
+ if (!Info.PointerToBase.count(V)) {
+ assert(Bases.count(V) && "can't find base for unexpected live value");
+ Info.PointerToBase[V] = V;
+ continue;
+ }
+
+#ifndef NDEBUG
+ for (auto V : Updated) {
+ assert(Info.PointerToBase.count(V) &&
+ "must be able to find base for live value");
+ }
+#endif
+
+ // Remove any stale base mappings - this can happen since our liveness is
+ // more precise then the one inherent in the base pointer analysis
+ DenseSet<Value *> ToErase;
+ for (auto KVPair : Info.PointerToBase)
+ if (!Updated.count(KVPair.first))
+ ToErase.insert(KVPair.first);
+ for (auto V : ToErase)
+ Info.PointerToBase.erase(V);
+
+#ifndef NDEBUG
+ for (auto KVPair : Info.PointerToBase)
+ assert(Updated.count(KVPair.first) && "record for non-live value");
+#endif
+
+ Info.liveset = Updated;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
index cfc9a8e..bc068f7 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
@@ -25,6 +25,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -35,7 +36,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
@@ -154,7 +154,7 @@ namespace {
/// Constant Propagation.
///
class SCCPSolver : public InstVisitor<SCCPSolver> {
- const DataLayout *DL;
+ const DataLayout &DL;
const TargetLibraryInfo *TLI;
SmallPtrSet<BasicBlock*, 8> BBExecutable; // The BBs that are executable.
DenseMap<Value*, LatticeVal> ValueState; // The state each value is in.
@@ -206,8 +206,8 @@ class SCCPSolver : public InstVisitor<SCCPSolver> {
typedef std::pair<BasicBlock*, BasicBlock*> Edge;
DenseSet<Edge> KnownFeasibleEdges;
public:
- SCCPSolver(const DataLayout *DL, const TargetLibraryInfo *tli)
- : DL(DL), TLI(tli) {}
+ SCCPSolver(const DataLayout &DL, const TargetLibraryInfo *tli)
+ : DL(DL), TLI(tli) {}
/// MarkBlockExecutable - This method can be used by clients to mark all of
/// the blocks that are known to be intrinsically live in the processed unit.
@@ -1012,7 +1012,8 @@ void SCCPSolver::visitGetElementPtrInst(GetElementPtrInst &I) {
Constant *Ptr = Operands[0];
auto Indices = makeArrayRef(Operands.begin() + 1, Operands.end());
- markConstant(&I, ConstantExpr::getGetElementPtr(Ptr, Indices));
+ markConstant(&I, ConstantExpr::getGetElementPtr(I.getSourceElementType(), Ptr,
+ Indices));
}
void SCCPSolver::visitStoreInst(StoreInst &SI) {
@@ -1504,7 +1505,7 @@ namespace {
///
struct SCCP : public FunctionPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<TargetLibraryInfo>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
}
static char ID; // Pass identification, replacement for typeid
SCCP() : FunctionPass(ID) {
@@ -1561,9 +1562,9 @@ bool SCCP::runOnFunction(Function &F) {
return false;
DEBUG(dbgs() << "SCCP on function '" << F.getName() << "'\n");
- const DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
- const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ const TargetLibraryInfo *TLI =
+ &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
SCCPSolver Solver(DL, TLI);
// Mark the first block of the function as being executable.
@@ -1637,7 +1638,7 @@ namespace {
///
struct IPSCCP : public ModulePass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<TargetLibraryInfo>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
}
static char ID;
IPSCCP() : ModulePass(ID) {
@@ -1651,7 +1652,7 @@ char IPSCCP::ID = 0;
INITIALIZE_PASS_BEGIN(IPSCCP, "ipsccp",
"Interprocedural Sparse Conditional Constant Propagation",
false, false)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(IPSCCP, "ipsccp",
"Interprocedural Sparse Conditional Constant Propagation",
false, false)
@@ -1690,9 +1691,9 @@ static bool AddressIsTaken(const GlobalValue *GV) {
}
bool IPSCCP::runOnModule(Module &M) {
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
- const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
+ const DataLayout &DL = M.getDataLayout();
+ const TargetLibraryInfo *TLI =
+ &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
SCCPSolver Solver(DL, TLI);
// AddressTakenFunctions - This set keeps track of the address-taken functions
diff --git a/contrib/llvm/lib/Transforms/Scalar/SROA.cpp b/contrib/llvm/lib/Transforms/Scalar/SROA.cpp
index ed161fd..056dd11 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -247,7 +247,7 @@ public:
/// hold.
void insert(ArrayRef<Slice> NewSlices) {
int OldSize = Slices.size();
- std::move(NewSlices.begin(), NewSlices.end(), std::back_inserter(Slices));
+ Slices.append(NewSlices.begin(), NewSlices.end());
auto SliceI = Slices.begin() + OldSize;
std::sort(SliceI, Slices.end());
std::inplace_merge(Slices.begin(), SliceI, Slices.end());
@@ -701,6 +701,7 @@ private:
// by writing out the code here where we have tho underlying allocation
// size readily available.
APInt GEPOffset = Offset;
+ const DataLayout &DL = GEPI.getModule()->getDataLayout();
for (gep_type_iterator GTI = gep_type_begin(GEPI),
GTE = gep_type_end(GEPI);
GTI != GTE; ++GTI) {
@@ -750,6 +751,7 @@ private:
if (!IsOffsetKnown)
return PI.setAborted(&LI);
+ const DataLayout &DL = LI.getModule()->getDataLayout();
uint64_t Size = DL.getTypeStoreSize(LI.getType());
return handleLoadOrStore(LI.getType(), LI, Offset, Size, LI.isVolatile());
}
@@ -761,6 +763,7 @@ private:
if (!IsOffsetKnown)
return PI.setAborted(&SI);
+ const DataLayout &DL = SI.getModule()->getDataLayout();
uint64_t Size = DL.getTypeStoreSize(ValOp->getType());
// If this memory access can be shown to *statically* extend outside the
@@ -898,6 +901,7 @@ private:
SmallVector<std::pair<Instruction *, Instruction *>, 4> Uses;
Visited.insert(Root);
Uses.push_back(std::make_pair(cast<Instruction>(*U), Root));
+ const DataLayout &DL = Root->getModule()->getDataLayout();
// If there are no loads or stores, the access is dead. We mark that as
// a size zero access.
Size = 0;
@@ -1084,7 +1088,8 @@ class AllocaPromoter : public LoadAndStorePromoter {
SmallVector<DbgValueInst *, 4> DVIs;
public:
- AllocaPromoter(const SmallVectorImpl<Instruction *> &Insts, SSAUpdater &S,
+ AllocaPromoter(ArrayRef<const Instruction *> Insts,
+ SSAUpdater &S,
AllocaInst &AI, DIBuilder &DIB)
: LoadAndStorePromoter(Insts, S), AI(AI), DIB(DIB) {}
@@ -1092,8 +1097,8 @@ public:
// Retain the debug information attached to the alloca for use when
// rewriting loads and stores.
if (auto *L = LocalAsMetadata::getIfExists(&AI)) {
- if (auto *DebugNode = MetadataAsValue::getIfExists(AI.getContext(), L)) {
- for (User *U : DebugNode->users())
+ if (auto *DINode = MetadataAsValue::getIfExists(AI.getContext(), L)) {
+ for (User *U : DINode->users())
if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U))
DDIs.push_back(DDI);
else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(U))
@@ -1162,10 +1167,9 @@ public:
} else {
continue;
}
- Instruction *DbgVal =
- DIB.insertDbgValueIntrinsic(Arg, 0, DIVariable(DVI->getVariable()),
- DIExpression(DVI->getExpression()), Inst);
- DbgVal->setDebugLoc(DVI->getDebugLoc());
+ DIB.insertDbgValueIntrinsic(Arg, 0, DVI->getVariable(),
+ DVI->getExpression(), DVI->getDebugLoc(),
+ Inst);
}
}
};
@@ -1194,7 +1198,6 @@ class SROA : public FunctionPass {
const bool RequiresDomTree;
LLVMContext *C;
- const DataLayout *DL;
DominatorTree *DT;
AssumptionCache *AC;
@@ -1243,7 +1246,7 @@ class SROA : public FunctionPass {
public:
SROA(bool RequiresDomTree = true)
: FunctionPass(ID), RequiresDomTree(RequiresDomTree), C(nullptr),
- DL(nullptr), DT(nullptr) {
+ DT(nullptr) {
initializeSROAPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override;
@@ -1257,8 +1260,8 @@ private:
friend class AllocaSliceRewriter;
bool presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS);
- bool rewritePartition(AllocaInst &AI, AllocaSlices &AS,
- AllocaSlices::Partition &P);
+ AllocaInst *rewritePartition(AllocaInst &AI, AllocaSlices &AS,
+ AllocaSlices::Partition &P);
bool splitAlloca(AllocaInst &AI, AllocaSlices &AS);
bool runOnAlloca(AllocaInst &AI);
void clobberUse(Use &U);
@@ -1349,7 +1352,7 @@ static Type *findCommonType(AllocaSlices::const_iterator B,
///
/// FIXME: This should be hoisted into a generic utility, likely in
/// Transforms/Util/Local.h
-static bool isSafePHIToSpeculate(PHINode &PN, const DataLayout *DL = nullptr) {
+static bool isSafePHIToSpeculate(PHINode &PN) {
// For now, we can only do this promotion if the load is in the same block
// as the PHI, and if there are no stores between the phi and load.
// TODO: Allow recursive phi users.
@@ -1381,6 +1384,8 @@ static bool isSafePHIToSpeculate(PHINode &PN, const DataLayout *DL = nullptr) {
if (!HaveLoad)
return false;
+ const DataLayout &DL = PN.getModule()->getDataLayout();
+
// We can only transform this if it is safe to push the loads into the
// predecessor blocks. The only thing to watch out for is that we can't put
// a possibly trapping load in the predecessor if it is a critical edge.
@@ -1402,8 +1407,8 @@ static bool isSafePHIToSpeculate(PHINode &PN, const DataLayout *DL = nullptr) {
// If this pointer is always safe to load, or if we can prove that there
// is already a load in the block, then we can move the load to the pred
// block.
- if (InVal->isDereferenceablePointer(DL) ||
- isSafeToLoadUnconditionally(InVal, TI, MaxAlign, DL))
+ if (isDereferenceablePointer(InVal, DL) ||
+ isSafeToLoadUnconditionally(InVal, TI, MaxAlign))
continue;
return false;
@@ -1468,12 +1473,12 @@ static void speculatePHINodeLoads(PHINode &PN) {
///
/// We can do this to a select if its only uses are loads and if the operand
/// to the select can be loaded unconditionally.
-static bool isSafeSelectToSpeculate(SelectInst &SI,
- const DataLayout *DL = nullptr) {
+static bool isSafeSelectToSpeculate(SelectInst &SI) {
Value *TValue = SI.getTrueValue();
Value *FValue = SI.getFalseValue();
- bool TDerefable = TValue->isDereferenceablePointer(DL);
- bool FDerefable = FValue->isDereferenceablePointer(DL);
+ const DataLayout &DL = SI.getModule()->getDataLayout();
+ bool TDerefable = isDereferenceablePointer(TValue, DL);
+ bool FDerefable = isDereferenceablePointer(FValue, DL);
for (User *U : SI.users()) {
LoadInst *LI = dyn_cast<LoadInst>(U);
@@ -1484,10 +1489,10 @@ static bool isSafeSelectToSpeculate(SelectInst &SI,
// absolutely (e.g. allocas) or at this point because we can see other
// accesses to it.
if (!TDerefable &&
- !isSafeToLoadUnconditionally(TValue, LI, LI->getAlignment(), DL))
+ !isSafeToLoadUnconditionally(TValue, LI, LI->getAlignment()))
return false;
if (!FDerefable &&
- !isSafeToLoadUnconditionally(FValue, LI, LI->getAlignment(), DL))
+ !isSafeToLoadUnconditionally(FValue, LI, LI->getAlignment()))
return false;
}
@@ -1547,7 +1552,8 @@ static Value *buildGEP(IRBuilderTy &IRB, Value *BasePtr,
if (Indices.size() == 1 && cast<ConstantInt>(Indices.back())->isZero())
return BasePtr;
- return IRB.CreateInBoundsGEP(BasePtr, Indices, NamePrefix + "sroa_idx");
+ return IRB.CreateInBoundsGEP(nullptr, BasePtr, Indices,
+ NamePrefix + "sroa_idx");
}
/// \brief Get a natural GEP off of the BasePtr walking through Ty toward
@@ -1798,7 +1804,8 @@ static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr,
OffsetPtr = Int8PtrOffset == 0
? Int8Ptr
- : IRB.CreateInBoundsGEP(Int8Ptr, IRB.getInt(Int8PtrOffset),
+ : IRB.CreateInBoundsGEP(IRB.getInt8Ty(), Int8Ptr,
+ IRB.getInt(Int8PtrOffset),
NamePrefix + "sroa_raw_idx");
}
Ptr = OffsetPtr;
@@ -3245,7 +3252,8 @@ private:
void emitFunc(Type *Ty, Value *&Agg, const Twine &Name) {
assert(Ty->isSingleValueType());
// Load the single value and insert it using the indices.
- Value *GEP = IRB.CreateInBoundsGEP(Ptr, GEPIndices, Name + ".gep");
+ Value *GEP =
+ IRB.CreateInBoundsGEP(nullptr, Ptr, GEPIndices, Name + ".gep");
Value *Load = IRB.CreateLoad(GEP, Name + ".load");
Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert");
DEBUG(dbgs() << " to: " << *Load << "\n");
@@ -3278,7 +3286,7 @@ private:
// Extract the single value and store it using the indices.
Value *Store = IRB.CreateStore(
IRB.CreateExtractValue(Agg, Indices, Name + ".extract"),
- IRB.CreateInBoundsGEP(Ptr, GEPIndices, Name + ".gep"));
+ IRB.CreateInBoundsGEP(nullptr, Ptr, GEPIndices, Name + ".gep"));
(void)Store;
DEBUG(dbgs() << " to: " << *Store << "\n");
}
@@ -3699,6 +3707,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
// them to the alloca slices.
SmallDenseMap<LoadInst *, std::vector<LoadInst *>, 1> SplitLoadsMap;
std::vector<LoadInst *> SplitLoads;
+ const DataLayout &DL = AI.getModule()->getDataLayout();
for (LoadInst *LI : Loads) {
SplitLoads.clear();
@@ -3724,10 +3733,10 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
auto *PartTy = Type::getIntNTy(Ty->getContext(), PartSize * 8);
auto *PartPtrTy = PartTy->getPointerTo(LI->getPointerAddressSpace());
LoadInst *PLoad = IRB.CreateAlignedLoad(
- getAdjustedPtr(IRB, *DL, BasePtr,
- APInt(DL->getPointerSizeInBits(), PartOffset),
+ getAdjustedPtr(IRB, DL, BasePtr,
+ APInt(DL.getPointerSizeInBits(), PartOffset),
PartPtrTy, BasePtr->getName() + "."),
- getAdjustedAlignment(LI, PartOffset, *DL), /*IsVolatile*/ false,
+ getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false,
LI->getName());
// Append this load onto the list of split loads so we can find it later
@@ -3777,10 +3786,10 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
PLoad->getType()->getPointerTo(SI->getPointerAddressSpace());
StoreInst *PStore = IRB.CreateAlignedStore(
- PLoad, getAdjustedPtr(IRB, *DL, StoreBasePtr,
- APInt(DL->getPointerSizeInBits(), PartOffset),
+ PLoad, getAdjustedPtr(IRB, DL, StoreBasePtr,
+ APInt(DL.getPointerSizeInBits(), PartOffset),
PartPtrTy, StoreBasePtr->getName() + "."),
- getAdjustedAlignment(SI, PartOffset, *DL), /*IsVolatile*/ false);
+ getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false);
(void)PStore;
DEBUG(dbgs() << " +" << PartOffset << ":" << *PStore << "\n");
}
@@ -3857,20 +3866,20 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
} else {
IRB.SetInsertPoint(BasicBlock::iterator(LI));
PLoad = IRB.CreateAlignedLoad(
- getAdjustedPtr(IRB, *DL, LoadBasePtr,
- APInt(DL->getPointerSizeInBits(), PartOffset),
+ getAdjustedPtr(IRB, DL, LoadBasePtr,
+ APInt(DL.getPointerSizeInBits(), PartOffset),
PartPtrTy, LoadBasePtr->getName() + "."),
- getAdjustedAlignment(LI, PartOffset, *DL), /*IsVolatile*/ false,
+ getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false,
LI->getName());
}
// And store this partition.
IRB.SetInsertPoint(BasicBlock::iterator(SI));
StoreInst *PStore = IRB.CreateAlignedStore(
- PLoad, getAdjustedPtr(IRB, *DL, StoreBasePtr,
- APInt(DL->getPointerSizeInBits(), PartOffset),
+ PLoad, getAdjustedPtr(IRB, DL, StoreBasePtr,
+ APInt(DL.getPointerSizeInBits(), PartOffset),
PartPtrTy, StoreBasePtr->getName() + "."),
- getAdjustedAlignment(SI, PartOffset, *DL), /*IsVolatile*/ false);
+ getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false);
// Now build a new slice for the alloca.
NewSlices.push_back(
@@ -3964,31 +3973,32 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
/// appropriate new offsets. It also evaluates how successful the rewrite was
/// at enabling promotion and if it was successful queues the alloca to be
/// promoted.
-bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
- AllocaSlices::Partition &P) {
+AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
+ AllocaSlices::Partition &P) {
// Try to compute a friendly type for this partition of the alloca. This
// won't always succeed, in which case we fall back to a legal integer type
// or an i8 array of an appropriate size.
Type *SliceTy = nullptr;
+ const DataLayout &DL = AI.getModule()->getDataLayout();
if (Type *CommonUseTy = findCommonType(P.begin(), P.end(), P.endOffset()))
- if (DL->getTypeAllocSize(CommonUseTy) >= P.size())
+ if (DL.getTypeAllocSize(CommonUseTy) >= P.size())
SliceTy = CommonUseTy;
if (!SliceTy)
- if (Type *TypePartitionTy = getTypePartition(*DL, AI.getAllocatedType(),
+ if (Type *TypePartitionTy = getTypePartition(DL, AI.getAllocatedType(),
P.beginOffset(), P.size()))
SliceTy = TypePartitionTy;
if ((!SliceTy || (SliceTy->isArrayTy() &&
SliceTy->getArrayElementType()->isIntegerTy())) &&
- DL->isLegalInteger(P.size() * 8))
+ DL.isLegalInteger(P.size() * 8))
SliceTy = Type::getIntNTy(*C, P.size() * 8);
if (!SliceTy)
SliceTy = ArrayType::get(Type::getInt8Ty(*C), P.size());
- assert(DL->getTypeAllocSize(SliceTy) >= P.size());
+ assert(DL.getTypeAllocSize(SliceTy) >= P.size());
- bool IsIntegerPromotable = isIntegerWideningViable(P, SliceTy, *DL);
+ bool IsIntegerPromotable = isIntegerWideningViable(P, SliceTy, DL);
VectorType *VecTy =
- IsIntegerPromotable ? nullptr : isVectorPromotionViable(P, *DL);
+ IsIntegerPromotable ? nullptr : isVectorPromotionViable(P, DL);
if (VecTy)
SliceTy = VecTy;
@@ -4003,18 +4013,19 @@ bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
NewAI = &AI;
// FIXME: We should be able to bail at this point with "nothing changed".
// FIXME: We might want to defer PHI speculation until after here.
+ // FIXME: return nullptr;
} else {
unsigned Alignment = AI.getAlignment();
if (!Alignment) {
// The minimum alignment which users can rely on when the explicit
// alignment is omitted or zero is that required by the ABI for this
// type.
- Alignment = DL->getABITypeAlignment(AI.getAllocatedType());
+ Alignment = DL.getABITypeAlignment(AI.getAllocatedType());
}
Alignment = MinAlign(Alignment, P.beginOffset());
// If we will get at least this much alignment from the type alone, leave
// the alloca's alignment unconstrained.
- if (Alignment <= DL->getABITypeAlignment(SliceTy))
+ if (Alignment <= DL.getABITypeAlignment(SliceTy))
Alignment = 0;
NewAI = new AllocaInst(
SliceTy, nullptr, Alignment,
@@ -4034,7 +4045,7 @@ bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
SmallPtrSet<PHINode *, 8> PHIUsers;
SmallPtrSet<SelectInst *, 8> SelectUsers;
- AllocaSliceRewriter Rewriter(*DL, AS, *this, AI, *NewAI, P.beginOffset(),
+ AllocaSliceRewriter Rewriter(DL, AS, *this, AI, *NewAI, P.beginOffset(),
P.endOffset(), IsIntegerPromotable, VecTy,
PHIUsers, SelectUsers);
bool Promotable = true;
@@ -4056,7 +4067,7 @@ bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
for (SmallPtrSetImpl<PHINode *>::iterator I = PHIUsers.begin(),
E = PHIUsers.end();
I != E; ++I)
- if (!isSafePHIToSpeculate(**I, DL)) {
+ if (!isSafePHIToSpeculate(**I)) {
Promotable = false;
PHIUsers.clear();
SelectUsers.clear();
@@ -4065,7 +4076,7 @@ bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
for (SmallPtrSetImpl<SelectInst *>::iterator I = SelectUsers.begin(),
E = SelectUsers.end();
I != E; ++I)
- if (!isSafeSelectToSpeculate(**I, DL)) {
+ if (!isSafeSelectToSpeculate(**I)) {
Promotable = false;
PHIUsers.clear();
SelectUsers.clear();
@@ -4098,7 +4109,7 @@ bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
PostPromotionWorklist.pop_back();
}
- return true;
+ return NewAI;
}
/// \brief Walks the slices of an alloca and form partitions based on them,
@@ -4109,6 +4120,7 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
unsigned NumPartitions = 0;
bool Changed = false;
+ const DataLayout &DL = AI.getModule()->getDataLayout();
// First try to pre-split loads and stores.
Changed |= presplitLoadsAndStores(AI, AS);
@@ -4126,7 +4138,7 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
// confident that the above handling of splittable loads and stores is
// completely sufficient before we forcibly disable the remaining handling.
if (S.beginOffset() == 0 &&
- S.endOffset() >= DL->getTypeAllocSize(AI.getAllocatedType()))
+ S.endOffset() >= DL.getTypeAllocSize(AI.getAllocatedType()))
continue;
if (isa<LoadInst>(S.getUse()->getUser()) ||
isa<StoreInst>(S.getUse()->getUser())) {
@@ -4137,9 +4149,29 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
if (!IsSorted)
std::sort(AS.begin(), AS.end());
+ /// \brief Describes the allocas introduced by rewritePartition
+ /// in order to migrate the debug info.
+ struct Piece {
+ AllocaInst *Alloca;
+ uint64_t Offset;
+ uint64_t Size;
+ Piece(AllocaInst *AI, uint64_t O, uint64_t S)
+ : Alloca(AI), Offset(O), Size(S) {}
+ };
+ SmallVector<Piece, 4> Pieces;
+
// Rewrite each partition.
for (auto &P : AS.partitions()) {
- Changed |= rewritePartition(AI, AS, P);
+ if (AllocaInst *NewAI = rewritePartition(AI, AS, P)) {
+ Changed = true;
+ if (NewAI != &AI) {
+ uint64_t SizeOfByte = 8;
+ uint64_t AllocaSize = DL.getTypeSizeInBits(NewAI->getAllocatedType());
+ // Don't include any padding.
+ uint64_t Size = std::min(AllocaSize, P.size() * SizeOfByte);
+ Pieces.push_back(Piece(NewAI, P.beginOffset() * SizeOfByte, Size));
+ }
+ }
++NumPartitions;
}
@@ -4147,6 +4179,42 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
MaxPartitionsPerAlloca =
std::max<unsigned>(NumPartitions, MaxPartitionsPerAlloca);
+ // Migrate debug information from the old alloca to the new alloca(s)
+ // and the individial partitions.
+ if (DbgDeclareInst *DbgDecl = FindAllocaDbgDeclare(&AI)) {
+ auto *Var = DbgDecl->getVariable();
+ auto *Expr = DbgDecl->getExpression();
+ DIBuilder DIB(*AI.getParent()->getParent()->getParent(),
+ /*AllowUnresolved*/ false);
+ bool IsSplit = Pieces.size() > 1;
+ for (auto Piece : Pieces) {
+ // Create a piece expression describing the new partition or reuse AI's
+ // expression if there is only one partition.
+ auto *PieceExpr = Expr;
+ if (IsSplit || Expr->isBitPiece()) {
+ // If this alloca is already a scalar replacement of a larger aggregate,
+ // Piece.Offset describes the offset inside the scalar.
+ uint64_t Offset = Expr->isBitPiece() ? Expr->getBitPieceOffset() : 0;
+ uint64_t Start = Offset + Piece.Offset;
+ uint64_t Size = Piece.Size;
+ if (Expr->isBitPiece()) {
+ uint64_t AbsEnd = Expr->getBitPieceOffset() + Expr->getBitPieceSize();
+ if (Start >= AbsEnd)
+ // No need to describe a SROAed padding.
+ continue;
+ Size = std::min(Size, AbsEnd - Start);
+ }
+ PieceExpr = DIB.createBitPieceExpression(Start, Size);
+ }
+
+ // Remove any existing dbg.declare intrinsic describing the same alloca.
+ if (DbgDeclareInst *OldDDI = FindAllocaDbgDeclare(Piece.Alloca))
+ OldDDI->eraseFromParent();
+
+ DIB.insertDeclare(Piece.Alloca, Var, PieceExpr, DbgDecl->getDebugLoc(),
+ &AI);
+ }
+ }
return Changed;
}
@@ -4179,21 +4247,22 @@ bool SROA::runOnAlloca(AllocaInst &AI) {
AI.eraseFromParent();
return true;
}
+ const DataLayout &DL = AI.getModule()->getDataLayout();
// Skip alloca forms that this analysis can't handle.
if (AI.isArrayAllocation() || !AI.getAllocatedType()->isSized() ||
- DL->getTypeAllocSize(AI.getAllocatedType()) == 0)
+ DL.getTypeAllocSize(AI.getAllocatedType()) == 0)
return false;
bool Changed = false;
// First, split any FCA loads and stores touching this alloca to promote
// better splitting and promotion opportunities.
- AggLoadStoreRewriter AggRewriter(*DL);
+ AggLoadStoreRewriter AggRewriter(DL);
Changed |= AggRewriter.rewrite(AI);
// Build the slices using a recursive instruction-visiting builder.
- AllocaSlices AS(*DL, AI);
+ AllocaSlices AS(DL, AI);
DEBUG(AS.print(dbgs()));
if (AS.isEscaped())
return Changed;
@@ -4258,8 +4327,11 @@ void SROA::deleteDeadInstructions(
DeadInsts.insert(U);
}
- if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
DeletedAllocas.insert(AI);
+ if (DbgDeclareInst *DbgDecl = FindAllocaDbgDeclare(AI))
+ DbgDecl->eraseFromParent();
+ }
++NumDeleted;
I->eraseFromParent();
@@ -4363,12 +4435,6 @@ bool SROA::runOnFunction(Function &F) {
DEBUG(dbgs() << "SROA function: " << F.getName() << "\n");
C = &F.getContext();
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- if (!DLP) {
- DEBUG(dbgs() << " Skipping SROA -- no target data!\n");
- return false;
- }
- DL = &DLP->getDataLayout();
DominatorTreeWrapperPass *DTWP =
getAnalysisIfAvailable<DominatorTreeWrapperPass>();
DT = DTWP ? &DTWP->getDomTree() : nullptr;
@@ -4376,9 +4442,10 @@ bool SROA::runOnFunction(Function &F) {
BasicBlock &EntryBB = F.getEntryBlock();
for (BasicBlock::iterator I = EntryBB.begin(), E = std::prev(EntryBB.end());
- I != E; ++I)
+ I != E; ++I) {
if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
Worklist.insert(AI);
+ }
bool Changed = false;
// A set of deleted alloca instruction pointers which should be removed from
diff --git a/contrib/llvm/lib/Transforms/Scalar/SampleProfile.cpp b/contrib/llvm/lib/Transforms/Scalar/SampleProfile.cpp
index 179bbf7..3480cd4 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SampleProfile.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SampleProfile.cpp
@@ -95,7 +95,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
- AU.addRequired<LoopInfo>();
+ AU.addRequired<LoopInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<PostDominatorTree>();
}
@@ -217,13 +217,16 @@ void SampleProfileLoader::printBlockWeight(raw_ostream &OS, BasicBlock *BB) {
/// \returns The profiled weight of I.
unsigned SampleProfileLoader::getInstWeight(Instruction &Inst) {
DebugLoc DLoc = Inst.getDebugLoc();
+ if (!DLoc)
+ return 0;
+
unsigned Lineno = DLoc.getLine();
if (Lineno < HeaderLineno)
return 0;
- DILocation DIL(DLoc.getAsMDNode(*Ctx));
+ const DILocation *DIL = DLoc;
int LOffset = Lineno - HeaderLineno;
- unsigned Discriminator = DIL.getDiscriminator();
+ unsigned Discriminator = DIL->getDiscriminator();
unsigned Weight = Samples->samplesAt(LOffset, Discriminator);
DEBUG(dbgs() << " " << Lineno << "." << Discriminator << ":" << Inst
<< " (line offset: " << LOffset << "." << Discriminator
@@ -577,6 +580,10 @@ void SampleProfileLoader::propagateWeights(Function &F) {
bool Changed = true;
unsigned i = 0;
+ // Add an entry count to the function using the samples gathered
+ // at the function entry.
+ F.setEntryCount(Samples->getHeadSamples());
+
// Before propagation starts, build, for each block, a list of
// unique predecessors and successors. This is necessary to handle
// identical edges in multiway branches. Since we visit all blocks and all
@@ -639,9 +646,8 @@ void SampleProfileLoader::propagateWeights(Function &F) {
/// \returns the line number where \p F is defined. If it returns 0,
/// it means that there is no debug information available for \p F.
unsigned SampleProfileLoader::getFunctionLoc(Function &F) {
- DISubprogram S = getDISubprogram(&F);
- if (S.isSubprogram())
- return S.getLineNumber();
+ if (DISubprogram *S = getDISubprogram(&F))
+ return S->getLine();
// If could not find the start of \p F, emit a diagnostic to inform the user
// about the missed opportunity.
@@ -731,7 +737,7 @@ INITIALIZE_PASS_BEGIN(SampleProfileLoader, "sample-profile",
"Sample Profile loader", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(PostDominatorTree)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AddDiscriminators)
INITIALIZE_PASS_END(SampleProfileLoader, "sample-profile",
"Sample Profile loader", false, false)
@@ -762,7 +768,7 @@ bool SampleProfileLoader::runOnFunction(Function &F) {
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
PDT = &getAnalysis<PostDominatorTree>();
- LI = &getAnalysis<LoopInfo>();
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
Ctx = &F.getParent()->getContext();
Samples = Reader->getSamplesFor(F);
if (!Samples->empty())
diff --git a/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp b/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
index a16e9e2..d5d3605 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
@@ -20,7 +20,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Verifier.h"
#include "llvm/InitializePasses.h"
-#include "llvm/PassManager.h"
+#include "llvm/IR/LegacyPassManager.h"
using namespace llvm;
@@ -28,6 +28,7 @@ using namespace llvm;
/// ScalarOpts library.
void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeADCEPass(Registry);
+ initializeBDCEPass(Registry);
initializeAlignmentFromAssumptionsPass(Registry);
initializeSampleProfileLoaderPass(Registry);
initializeConstantHoistingPass(Registry);
@@ -38,13 +39,16 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeScalarizerPass(Registry);
initializeDSEPass(Registry);
initializeGVNPass(Registry);
- initializeEarlyCSEPass(Registry);
+ initializeEarlyCSELegacyPassPass(Registry);
initializeFlattenCFGPassPass(Registry);
+ initializeInductiveRangeCheckEliminationPass(Registry);
initializeIndVarSimplifyPass(Registry);
initializeJumpThreadingPass(Registry);
initializeLICMPass(Registry);
initializeLoopDeletionPass(Registry);
+ initializeLoopAccessAnalysisPass(Registry);
initializeLoopInstSimplifyPass(Registry);
+ initializeLoopInterchangePass(Registry);
initializeLoopRotatePass(Registry);
initializeLoopStrengthReducePass(Registry);
initializeLoopRerollPass(Registry);
@@ -55,9 +59,11 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeLowerExpectIntrinsicPass(Registry);
initializeMemCpyOptPass(Registry);
initializeMergedLoadStoreMotionPass(Registry);
+ initializeNaryReassociatePass(Registry);
initializePartiallyInlineLibCallsPass(Registry);
initializeReassociatePass(Registry);
initializeRegToMemPass(Registry);
+ initializeRewriteStatepointsForGCPass(Registry);
initializeSCCPPass(Registry);
initializeIPSCCPPass(Registry);
initializeSROAPass(Registry);
@@ -68,7 +74,13 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeSinkingPass(Registry);
initializeTailCallElimPass(Registry);
initializeSeparateConstOffsetFromGEPPass(Registry);
+ initializeSpeculativeExecutionPass(Registry);
+ initializeStraightLineStrengthReducePass(Registry);
initializeLoadCombinePass(Registry);
+ initializePlaceBackedgeSafepointsImplPass(Registry);
+ initializePlaceSafepointsPass(Registry);
+ initializeFloat2IntPass(Registry);
+ initializeLoopDistributePass(Registry);
}
void LLVMInitializeScalarOpts(LLVMPassRegistryRef R) {
@@ -79,6 +91,10 @@ void LLVMAddAggressiveDCEPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createAggressiveDCEPass());
}
+void LLVMAddBitTrackingDCEPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createBitTrackingDCEPass());
+}
+
void LLVMAddAlignmentFromAssumptionsPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createAlignmentFromAssumptionsPass());
}
@@ -198,7 +214,6 @@ void LLVMAddDemoteMemoryToRegisterPass(LLVMPassManagerRef PM) {
void LLVMAddVerifierPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createVerifierPass());
- // FIXME: should this also add createDebugInfoVerifierPass()?
}
void LLVMAddCorrelatedValuePropagationPass(LLVMPassManagerRef PM) {
diff --git a/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index 5c49a55..d955da7 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -89,7 +89,6 @@ namespace {
private:
bool HasDomTree;
- const DataLayout *DL;
/// DeadInsts - Keep track of instructions we have made dead, so that
/// we can remove them after we are done working.
@@ -159,9 +158,10 @@ namespace {
void isSafeMemAccess(uint64_t Offset, uint64_t MemSize,
Type *MemOpType, bool isStore, AllocaInfo &Info,
Instruction *TheAccess, bool AllowWholeAccess);
- bool TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size);
- uint64_t FindElementAndOffset(Type *&T, uint64_t &Offset,
- Type *&IdxTy);
+ bool TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size,
+ const DataLayout &DL);
+ uint64_t FindElementAndOffset(Type *&T, uint64_t &Offset, Type *&IdxTy,
+ const DataLayout &DL);
void DoScalarReplacement(AllocaInst *AI,
std::vector<AllocaInst*> &WorkList);
@@ -699,9 +699,9 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
// If the source and destination are both to the same alloca, then this is
// a noop copy-to-self, just delete it. Otherwise, emit a load and store
// as appropriate.
- AllocaInst *OrigAI = cast<AllocaInst>(GetUnderlyingObject(Ptr, &DL, 0));
+ AllocaInst *OrigAI = cast<AllocaInst>(GetUnderlyingObject(Ptr, DL, 0));
- if (GetUnderlyingObject(MTI->getSource(), &DL, 0) != OrigAI) {
+ if (GetUnderlyingObject(MTI->getSource(), DL, 0) != OrigAI) {
// Dest must be OrigAI, change this to be a load from the original
// pointer (bitcasted), then a store to our new alloca.
assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?");
@@ -717,7 +717,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval");
SrcVal->setAlignment(MTI->getAlignment());
Builder.CreateStore(SrcVal, NewAI);
- } else if (GetUnderlyingObject(MTI->getDest(), &DL, 0) != OrigAI) {
+ } else if (GetUnderlyingObject(MTI->getDest(), DL, 0) != OrigAI) {
// Src must be OrigAI, change this to be a load from NewAI then a store
// through the original dest pointer (bitcasted).
assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?");
@@ -1032,17 +1032,8 @@ bool SROA::runOnFunction(Function &F) {
if (skipOptnoneFunction(F))
return false;
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
-
bool Changed = performPromotion(F);
- // FIXME: ScalarRepl currently depends on DataLayout more than it
- // theoretically needs to. It should be refactored in order to support
- // target-independent IR. Until this is done, just skip the actual
- // scalar-replacement portion of this pass.
- if (!DL) return Changed;
-
while (1) {
bool LocalChange = performScalarRepl(F);
if (!LocalChange) break; // No need to repromote if no scalarrepl
@@ -1061,7 +1052,7 @@ class AllocaPromoter : public LoadAndStorePromoter {
SmallVector<DbgDeclareInst *, 4> DDIs;
SmallVector<DbgValueInst *, 4> DVIs;
public:
- AllocaPromoter(const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S,
+ AllocaPromoter(ArrayRef<Instruction*> Insts, SSAUpdater &S,
DIBuilder *DB)
: LoadAndStorePromoter(Insts, S), AI(nullptr), DIB(DB) {}
@@ -1069,8 +1060,8 @@ public:
// Remember which alloca we're promoting (for isInstInList).
this->AI = AI;
if (auto *L = LocalAsMetadata::getIfExists(AI)) {
- if (auto *DebugNode = MetadataAsValue::getIfExists(AI->getContext(), L)) {
- for (User *U : DebugNode->users())
+ if (auto *DINode = MetadataAsValue::getIfExists(AI->getContext(), L)) {
+ for (User *U : DINode->users())
if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U))
DDIs.push_back(DDI);
else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(U))
@@ -1126,10 +1117,9 @@ public:
} else {
continue;
}
- Instruction *DbgVal = DIB->insertDbgValueIntrinsic(
- Arg, 0, DIVariable(DVI->getVariable()),
- DIExpression(DVI->getExpression()), Inst);
- DbgVal->setDebugLoc(DVI->getDebugLoc());
+ DIB->insertDbgValueIntrinsic(Arg, 0, DVI->getVariable(),
+ DVI->getExpression(), DVI->getDebugLoc(),
+ Inst);
}
}
};
@@ -1148,9 +1138,10 @@ public:
///
/// We can do this to a select if its only uses are loads and if the operand to
/// the select can be loaded unconditionally.
-static bool isSafeSelectToSpeculate(SelectInst *SI, const DataLayout *DL) {
- bool TDerefable = SI->getTrueValue()->isDereferenceablePointer(DL);
- bool FDerefable = SI->getFalseValue()->isDereferenceablePointer(DL);
+static bool isSafeSelectToSpeculate(SelectInst *SI) {
+ const DataLayout &DL = SI->getModule()->getDataLayout();
+ bool TDerefable = isDereferenceablePointer(SI->getTrueValue(), DL);
+ bool FDerefable = isDereferenceablePointer(SI->getFalseValue(), DL);
for (User *U : SI->users()) {
LoadInst *LI = dyn_cast<LoadInst>(U);
@@ -1158,11 +1149,13 @@ static bool isSafeSelectToSpeculate(SelectInst *SI, const DataLayout *DL) {
// Both operands to the select need to be dereferencable, either absolutely
// (e.g. allocas) or at this point because we can see other accesses to it.
- if (!TDerefable && !isSafeToLoadUnconditionally(SI->getTrueValue(), LI,
- LI->getAlignment(), DL))
+ if (!TDerefable &&
+ !isSafeToLoadUnconditionally(SI->getTrueValue(), LI,
+ LI->getAlignment()))
return false;
- if (!FDerefable && !isSafeToLoadUnconditionally(SI->getFalseValue(), LI,
- LI->getAlignment(), DL))
+ if (!FDerefable &&
+ !isSafeToLoadUnconditionally(SI->getFalseValue(), LI,
+ LI->getAlignment()))
return false;
}
@@ -1185,7 +1178,7 @@ static bool isSafeSelectToSpeculate(SelectInst *SI, const DataLayout *DL) {
///
/// We can do this to a select if its only uses are loads and if the operand to
/// the select can be loaded unconditionally.
-static bool isSafePHIToSpeculate(PHINode *PN, const DataLayout *DL) {
+static bool isSafePHIToSpeculate(PHINode *PN) {
// For now, we can only do this promotion if the load is in the same block as
// the PHI, and if there are no stores between the phi and load.
// TODO: Allow recursive phi users.
@@ -1209,6 +1202,8 @@ static bool isSafePHIToSpeculate(PHINode *PN, const DataLayout *DL) {
MaxAlign = std::max(MaxAlign, LI->getAlignment());
}
+ const DataLayout &DL = PN->getModule()->getDataLayout();
+
// Okay, we know that we have one or more loads in the same block as the PHI.
// We can transform this if it is safe to push the loads into the predecessor
// blocks. The only thing to watch out for is that we can't put a possibly
@@ -1233,8 +1228,8 @@ static bool isSafePHIToSpeculate(PHINode *PN, const DataLayout *DL) {
// If this pointer is always safe to load, or if we can prove that there is
// already a load in the block, then we can move the load to the pred block.
- if (InVal->isDereferenceablePointer(DL) ||
- isSafeToLoadUnconditionally(InVal, Pred->getTerminator(), MaxAlign, DL))
+ if (isDereferenceablePointer(InVal, DL) ||
+ isSafeToLoadUnconditionally(InVal, Pred->getTerminator(), MaxAlign))
continue;
return false;
@@ -1248,7 +1243,7 @@ static bool isSafePHIToSpeculate(PHINode *PN, const DataLayout *DL) {
/// direct (non-volatile) loads and stores to it. If the alloca is close but
/// not quite there, this will transform the code to allow promotion. As such,
/// it is a non-pure predicate.
-static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout *DL) {
+static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout &DL) {
SetVector<Instruction*, SmallVector<Instruction*, 4>,
SmallPtrSet<Instruction*, 4> > InstsToRewrite;
for (User *U : AI->users()) {
@@ -1279,7 +1274,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout *DL) {
// If it is safe to turn "load (select c, AI, ptr)" into a select of two
// loads, then we can transform this by rewriting the select.
- if (!isSafeSelectToSpeculate(SI, DL))
+ if (!isSafeSelectToSpeculate(SI))
return false;
InstsToRewrite.insert(SI);
@@ -1294,7 +1289,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout *DL) {
// If it is safe to turn "load (phi [AI, ptr, ...])" into a PHI of loads
// in the pred blocks, then we can transform this by rewriting the PHI.
- if (!isSafePHIToSpeculate(PN, DL))
+ if (!isSafePHIToSpeculate(PN))
return false;
InstsToRewrite.insert(PN);
@@ -1416,6 +1411,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout *DL) {
bool SROA::performPromotion(Function &F) {
std::vector<AllocaInst*> Allocas;
+ const DataLayout &DL = F.getParent()->getDataLayout();
DominatorTree *DT = nullptr;
if (HasDomTree)
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
@@ -1479,6 +1475,7 @@ bool SROA::ShouldAttemptScalarRepl(AllocaInst *AI) {
//
bool SROA::performScalarRepl(Function &F) {
std::vector<AllocaInst*> WorkList;
+ const DataLayout &DL = F.getParent()->getDataLayout();
// Scan the entry basic block, adding allocas to the worklist.
BasicBlock &BB = F.getEntryBlock();
@@ -1508,7 +1505,7 @@ bool SROA::performScalarRepl(Function &F) {
// transform the allocation instruction if it is an array allocation
// (allocations OF arrays are ok though), and an allocation of a scalar
// value cannot be decomposed at all.
- uint64_t AllocaSize = DL->getTypeAllocSize(AI->getAllocatedType());
+ uint64_t AllocaSize = DL.getTypeAllocSize(AI->getAllocatedType());
// Do not promote [0 x %struct].
if (AllocaSize == 0) continue;
@@ -1531,8 +1528,9 @@ bool SROA::performScalarRepl(Function &F) {
// promoted itself. If so, we don't want to transform it needlessly. Note
// that we can't just check based on the type: the alloca may be of an i32
// but that has pointer arithmetic to set byte 3 of it or something.
- if (AllocaInst *NewAI = ConvertToScalarInfo(
- (unsigned)AllocaSize, *DL, ScalarLoadThreshold).TryConvert(AI)) {
+ if (AllocaInst *NewAI =
+ ConvertToScalarInfo((unsigned)AllocaSize, DL, ScalarLoadThreshold)
+ .TryConvert(AI)) {
NewAI->takeName(AI);
AI->eraseFromParent();
++NumConverted;
@@ -1610,6 +1608,7 @@ void SROA::DeleteDeadInstructions() {
/// referenced by this instruction.
void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset,
AllocaInfo &Info) {
+ const DataLayout &DL = I->getModule()->getDataLayout();
for (Use &U : I->uses()) {
Instruction *User = cast<Instruction>(U.getUser());
@@ -1632,8 +1631,8 @@ void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset,
if (!LI->isSimple())
return MarkUnsafe(Info, User);
Type *LIType = LI->getType();
- isSafeMemAccess(Offset, DL->getTypeAllocSize(LIType),
- LIType, false, Info, LI, true /*AllowWholeAccess*/);
+ isSafeMemAccess(Offset, DL.getTypeAllocSize(LIType), LIType, false, Info,
+ LI, true /*AllowWholeAccess*/);
Info.hasALoadOrStore = true;
} else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
@@ -1642,8 +1641,8 @@ void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset,
return MarkUnsafe(Info, User);
Type *SIType = SI->getOperand(0)->getType();
- isSafeMemAccess(Offset, DL->getTypeAllocSize(SIType),
- SIType, true, Info, SI, true /*AllowWholeAccess*/);
+ isSafeMemAccess(Offset, DL.getTypeAllocSize(SIType), SIType, true, Info,
+ SI, true /*AllowWholeAccess*/);
Info.hasALoadOrStore = true;
} else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) {
if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
@@ -1675,6 +1674,7 @@ void SROA::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset,
if (!Info.CheckedPHIs.insert(PN).second)
return;
+ const DataLayout &DL = I->getModule()->getDataLayout();
for (User *U : I->users()) {
Instruction *UI = cast<Instruction>(U);
@@ -1691,8 +1691,8 @@ void SROA::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset,
if (!LI->isSimple())
return MarkUnsafe(Info, UI);
Type *LIType = LI->getType();
- isSafeMemAccess(Offset, DL->getTypeAllocSize(LIType),
- LIType, false, Info, LI, false /*AllowWholeAccess*/);
+ isSafeMemAccess(Offset, DL.getTypeAllocSize(LIType), LIType, false, Info,
+ LI, false /*AllowWholeAccess*/);
Info.hasALoadOrStore = true;
} else if (StoreInst *SI = dyn_cast<StoreInst>(UI)) {
@@ -1701,8 +1701,8 @@ void SROA::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset,
return MarkUnsafe(Info, UI);
Type *SIType = SI->getOperand(0)->getType();
- isSafeMemAccess(Offset, DL->getTypeAllocSize(SIType),
- SIType, true, Info, SI, false /*AllowWholeAccess*/);
+ isSafeMemAccess(Offset, DL.getTypeAllocSize(SIType), SIType, true, Info,
+ SI, false /*AllowWholeAccess*/);
Info.hasALoadOrStore = true;
} else if (isa<PHINode>(UI) || isa<SelectInst>(UI)) {
isSafePHISelectUseForScalarRepl(UI, Offset, Info);
@@ -1746,9 +1746,11 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI,
// constant part of the offset.
if (NonConstant)
Indices.pop_back();
- Offset += DL->getIndexedOffset(GEPI->getPointerOperandType(), Indices);
- if (!TypeHasComponent(Info.AI->getAllocatedType(), Offset,
- NonConstantIdxSize))
+
+ const DataLayout &DL = GEPI->getModule()->getDataLayout();
+ Offset += DL.getIndexedOffset(GEPI->getPointerOperandType(), Indices);
+ if (!TypeHasComponent(Info.AI->getAllocatedType(), Offset, NonConstantIdxSize,
+ DL))
MarkUnsafe(Info, GEPI);
}
@@ -1803,9 +1805,10 @@ void SROA::isSafeMemAccess(uint64_t Offset, uint64_t MemSize,
Type *MemOpType, bool isStore,
AllocaInfo &Info, Instruction *TheAccess,
bool AllowWholeAccess) {
+ const DataLayout &DL = TheAccess->getModule()->getDataLayout();
// Check if this is a load/store of the entire alloca.
if (Offset == 0 && AllowWholeAccess &&
- MemSize == DL->getTypeAllocSize(Info.AI->getAllocatedType())) {
+ MemSize == DL.getTypeAllocSize(Info.AI->getAllocatedType())) {
// This can be safe for MemIntrinsics (where MemOpType is 0) and integer
// loads/stores (which are essentially the same as the MemIntrinsics with
// regard to copying padding between elements). But, if an alloca is
@@ -1828,7 +1831,7 @@ void SROA::isSafeMemAccess(uint64_t Offset, uint64_t MemSize,
}
// Check if the offset/size correspond to a component within the alloca type.
Type *T = Info.AI->getAllocatedType();
- if (TypeHasComponent(T, Offset, MemSize)) {
+ if (TypeHasComponent(T, Offset, MemSize, DL)) {
Info.hasSubelementAccess = true;
return;
}
@@ -1838,24 +1841,25 @@ void SROA::isSafeMemAccess(uint64_t Offset, uint64_t MemSize,
/// TypeHasComponent - Return true if T has a component type with the
/// specified offset and size. If Size is zero, do not check the size.
-bool SROA::TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size) {
+bool SROA::TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size,
+ const DataLayout &DL) {
Type *EltTy;
uint64_t EltSize;
if (StructType *ST = dyn_cast<StructType>(T)) {
- const StructLayout *Layout = DL->getStructLayout(ST);
+ const StructLayout *Layout = DL.getStructLayout(ST);
unsigned EltIdx = Layout->getElementContainingOffset(Offset);
EltTy = ST->getContainedType(EltIdx);
- EltSize = DL->getTypeAllocSize(EltTy);
+ EltSize = DL.getTypeAllocSize(EltTy);
Offset -= Layout->getElementOffset(EltIdx);
} else if (ArrayType *AT = dyn_cast<ArrayType>(T)) {
EltTy = AT->getElementType();
- EltSize = DL->getTypeAllocSize(EltTy);
+ EltSize = DL.getTypeAllocSize(EltTy);
if (Offset >= AT->getNumElements() * EltSize)
return false;
Offset %= EltSize;
} else if (VectorType *VT = dyn_cast<VectorType>(T)) {
EltTy = VT->getElementType();
- EltSize = DL->getTypeAllocSize(EltTy);
+ EltSize = DL.getTypeAllocSize(EltTy);
if (Offset >= VT->getNumElements() * EltSize)
return false;
Offset %= EltSize;
@@ -1867,7 +1871,7 @@ bool SROA::TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size) {
// Check if the component spans multiple elements.
if (Offset + Size > EltSize)
return false;
- return TypeHasComponent(EltTy, Offset, Size);
+ return TypeHasComponent(EltTy, Offset, Size, DL);
}
/// RewriteForScalarRepl - Alloca AI is being split into NewElts, so rewrite
@@ -1876,6 +1880,7 @@ bool SROA::TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size) {
/// instruction.
void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
SmallVectorImpl<AllocaInst *> &NewElts) {
+ const DataLayout &DL = I->getModule()->getDataLayout();
for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E;) {
Use &TheUse = *UI++;
Instruction *User = cast<Instruction>(TheUse.getUser());
@@ -1893,8 +1898,7 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
uint64_t MemSize = Length->getZExtValue();
- if (Offset == 0 &&
- MemSize == DL->getTypeAllocSize(AI->getAllocatedType()))
+ if (Offset == 0 && MemSize == DL.getTypeAllocSize(AI->getAllocatedType()))
RewriteMemIntrinUserOfAlloca(MI, I, AI, NewElts);
// Otherwise the intrinsic can only touch a single element and the
// address operand will be updated, so nothing else needs to be done.
@@ -1930,8 +1934,8 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
LI->replaceAllUsesWith(Insert);
DeadInsts.push_back(LI);
} else if (LIType->isIntegerTy() &&
- DL->getTypeAllocSize(LIType) ==
- DL->getTypeAllocSize(AI->getAllocatedType())) {
+ DL.getTypeAllocSize(LIType) ==
+ DL.getTypeAllocSize(AI->getAllocatedType())) {
// If this is a load of the entire alloca to an integer, rewrite it.
RewriteLoadUserOfWholeAlloca(LI, AI, NewElts);
}
@@ -1957,8 +1961,8 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
}
DeadInsts.push_back(SI);
} else if (SIType->isIntegerTy() &&
- DL->getTypeAllocSize(SIType) ==
- DL->getTypeAllocSize(AI->getAllocatedType())) {
+ DL.getTypeAllocSize(SIType) ==
+ DL.getTypeAllocSize(AI->getAllocatedType())) {
// If this is a store of the entire alloca from an integer, rewrite it.
RewriteStoreUserOfWholeAlloca(SI, AI, NewElts);
}
@@ -2001,7 +2005,8 @@ void SROA::RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset,
Type *T = AI->getAllocatedType();
uint64_t EltOffset = 0;
Type *IdxTy;
- uint64_t Idx = FindElementAndOffset(T, EltOffset, IdxTy);
+ uint64_t Idx = FindElementAndOffset(T, EltOffset, IdxTy,
+ BC->getModule()->getDataLayout());
Instruction *Val = NewElts[Idx];
if (Val->getType() != BC->getDestTy()) {
Val = new BitCastInst(Val, BC->getDestTy(), "", BC);
@@ -2016,11 +2021,12 @@ void SROA::RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset,
/// Sets T to the type of the element and Offset to the offset within that
/// element. IdxTy is set to the type of the index result to be used in a
/// GEP instruction.
-uint64_t SROA::FindElementAndOffset(Type *&T, uint64_t &Offset,
- Type *&IdxTy) {
+uint64_t SROA::FindElementAndOffset(Type *&T, uint64_t &Offset, Type *&IdxTy,
+ const DataLayout &DL) {
uint64_t Idx = 0;
+
if (StructType *ST = dyn_cast<StructType>(T)) {
- const StructLayout *Layout = DL->getStructLayout(ST);
+ const StructLayout *Layout = DL.getStructLayout(ST);
Idx = Layout->getElementContainingOffset(Offset);
T = ST->getContainedType(Idx);
Offset -= Layout->getElementOffset(Idx);
@@ -2028,7 +2034,7 @@ uint64_t SROA::FindElementAndOffset(Type *&T, uint64_t &Offset,
return Idx;
} else if (ArrayType *AT = dyn_cast<ArrayType>(T)) {
T = AT->getElementType();
- uint64_t EltSize = DL->getTypeAllocSize(T);
+ uint64_t EltSize = DL.getTypeAllocSize(T);
Idx = Offset / EltSize;
Offset -= Idx * EltSize;
IdxTy = Type::getInt64Ty(T->getContext());
@@ -2036,7 +2042,7 @@ uint64_t SROA::FindElementAndOffset(Type *&T, uint64_t &Offset,
}
VectorType *VT = cast<VectorType>(T);
T = VT->getElementType();
- uint64_t EltSize = DL->getTypeAllocSize(T);
+ uint64_t EltSize = DL.getTypeAllocSize(T);
Idx = Offset / EltSize;
Offset -= Idx * EltSize;
IdxTy = Type::getInt64Ty(T->getContext());
@@ -2049,6 +2055,7 @@ uint64_t SROA::FindElementAndOffset(Type *&T, uint64_t &Offset,
void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
SmallVectorImpl<AllocaInst *> &NewElts) {
uint64_t OldOffset = Offset;
+ const DataLayout &DL = GEPI->getModule()->getDataLayout();
SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end());
// If the GEP was dynamic then it must have been a dynamic vector lookup.
// In this case, it must be the last GEP operand which is dynamic so keep that
@@ -2057,19 +2064,19 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
Value* NonConstantIdx = nullptr;
if (!GEPI->hasAllConstantIndices())
NonConstantIdx = Indices.pop_back_val();
- Offset += DL->getIndexedOffset(GEPI->getPointerOperandType(), Indices);
+ Offset += DL.getIndexedOffset(GEPI->getPointerOperandType(), Indices);
RewriteForScalarRepl(GEPI, AI, Offset, NewElts);
Type *T = AI->getAllocatedType();
Type *IdxTy;
- uint64_t OldIdx = FindElementAndOffset(T, OldOffset, IdxTy);
+ uint64_t OldIdx = FindElementAndOffset(T, OldOffset, IdxTy, DL);
if (GEPI->getOperand(0) == AI)
OldIdx = ~0ULL; // Force the GEP to be rewritten.
T = AI->getAllocatedType();
uint64_t EltOffset = Offset;
- uint64_t Idx = FindElementAndOffset(T, EltOffset, IdxTy);
+ uint64_t Idx = FindElementAndOffset(T, EltOffset, IdxTy, DL);
// If this GEP does not move the pointer across elements of the alloca
// being split, then it does not needs to be rewritten.
@@ -2080,7 +2087,7 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
SmallVector<Value*, 8> NewArgs;
NewArgs.push_back(Constant::getNullValue(i32Ty));
while (EltOffset != 0) {
- uint64_t EltIdx = FindElementAndOffset(T, EltOffset, IdxTy);
+ uint64_t EltIdx = FindElementAndOffset(T, EltOffset, IdxTy, DL);
NewArgs.push_back(ConstantInt::get(IdxTy, EltIdx));
}
if (NonConstantIdx) {
@@ -2114,9 +2121,10 @@ void SROA::RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI,
// Put matching lifetime markers on everything from Offset up to
// Offset+OldSize.
Type *AIType = AI->getAllocatedType();
+ const DataLayout &DL = II->getModule()->getDataLayout();
uint64_t NewOffset = Offset;
Type *IdxTy;
- uint64_t Idx = FindElementAndOffset(AIType, NewOffset, IdxTy);
+ uint64_t Idx = FindElementAndOffset(AIType, NewOffset, IdxTy, DL);
IRBuilder<> Builder(II);
uint64_t Size = OldSize->getLimitedValue();
@@ -2126,10 +2134,10 @@ void SROA::RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI,
// split the alloca again later.
unsigned AS = AI->getType()->getAddressSpace();
Value *V = Builder.CreateBitCast(NewElts[Idx], Builder.getInt8PtrTy(AS));
- V = Builder.CreateGEP(V, Builder.getInt64(NewOffset));
+ V = Builder.CreateGEP(Builder.getInt8Ty(), V, Builder.getInt64(NewOffset));
IdxTy = NewElts[Idx]->getAllocatedType();
- uint64_t EltSize = DL->getTypeAllocSize(IdxTy) - NewOffset;
+ uint64_t EltSize = DL.getTypeAllocSize(IdxTy) - NewOffset;
if (EltSize > Size) {
EltSize = Size;
Size = 0;
@@ -2145,7 +2153,7 @@ void SROA::RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI,
for (; Idx != NewElts.size() && Size; ++Idx) {
IdxTy = NewElts[Idx]->getAllocatedType();
- uint64_t EltSize = DL->getTypeAllocSize(IdxTy);
+ uint64_t EltSize = DL.getTypeAllocSize(IdxTy);
if (EltSize > Size) {
EltSize = Size;
Size = 0;
@@ -2221,6 +2229,7 @@ SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
bool SROADest = MI->getRawDest() == Inst;
Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext()));
+ const DataLayout &DL = MI->getModule()->getDataLayout();
for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
// If this is a memcpy/memmove, emit a GEP of the other element address.
@@ -2237,10 +2246,10 @@ SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
PointerType *OtherPtrTy = cast<PointerType>(OtherPtr->getType());
Type *OtherTy = OtherPtrTy->getElementType();
if (StructType *ST = dyn_cast<StructType>(OtherTy)) {
- EltOffset = DL->getStructLayout(ST)->getElementOffset(i);
+ EltOffset = DL.getStructLayout(ST)->getElementOffset(i);
} else {
Type *EltTy = cast<SequentialType>(OtherTy)->getElementType();
- EltOffset = DL->getTypeAllocSize(EltTy)*i;
+ EltOffset = DL.getTypeAllocSize(EltTy) * i;
}
// The alignment of the other pointer is the guaranteed alignment of the
@@ -2281,7 +2290,7 @@ SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
Type *ValTy = EltTy->getScalarType();
// Construct an integer with the right value.
- unsigned EltSize = DL->getTypeSizeInBits(ValTy);
+ unsigned EltSize = DL.getTypeSizeInBits(ValTy);
APInt OneVal(EltSize, CI->getZExtValue());
APInt TotalVal(OneVal);
// Set each byte.
@@ -2311,7 +2320,7 @@ SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
// this element.
}
- unsigned EltSize = DL->getTypeAllocSize(EltTy);
+ unsigned EltSize = DL.getTypeAllocSize(EltTy);
if (!EltSize)
continue;
@@ -2345,12 +2354,13 @@ SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
// and store the element value to the individual alloca.
Value *SrcVal = SI->getOperand(0);
Type *AllocaEltTy = AI->getAllocatedType();
- uint64_t AllocaSizeBits = DL->getTypeAllocSizeInBits(AllocaEltTy);
+ const DataLayout &DL = SI->getModule()->getDataLayout();
+ uint64_t AllocaSizeBits = DL.getTypeAllocSizeInBits(AllocaEltTy);
IRBuilder<> Builder(SI);
// Handle tail padding by extending the operand
- if (DL->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits)
+ if (DL.getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits)
SrcVal = Builder.CreateZExt(SrcVal,
IntegerType::get(SI->getContext(), AllocaSizeBits));
@@ -2360,15 +2370,15 @@ SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
// There are two forms here: AI could be an array or struct. Both cases
// have different ways to compute the element offset.
if (StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
- const StructLayout *Layout = DL->getStructLayout(EltSTy);
+ const StructLayout *Layout = DL.getStructLayout(EltSTy);
for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
// Get the number of bits to shift SrcVal to get the value.
Type *FieldTy = EltSTy->getElementType(i);
uint64_t Shift = Layout->getElementOffsetInBits(i);
- if (DL->isBigEndian())
- Shift = AllocaSizeBits-Shift-DL->getTypeAllocSizeInBits(FieldTy);
+ if (DL.isBigEndian())
+ Shift = AllocaSizeBits - Shift - DL.getTypeAllocSizeInBits(FieldTy);
Value *EltVal = SrcVal;
if (Shift) {
@@ -2377,7 +2387,7 @@ SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
}
// Truncate down to an integer of the right size.
- uint64_t FieldSizeBits = DL->getTypeSizeInBits(FieldTy);
+ uint64_t FieldSizeBits = DL.getTypeSizeInBits(FieldTy);
// Ignore zero sized fields like {}, they obviously contain no data.
if (FieldSizeBits == 0) continue;
@@ -2402,12 +2412,12 @@ SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
} else {
ArrayType *ATy = cast<ArrayType>(AllocaEltTy);
Type *ArrayEltTy = ATy->getElementType();
- uint64_t ElementOffset = DL->getTypeAllocSizeInBits(ArrayEltTy);
- uint64_t ElementSizeBits = DL->getTypeSizeInBits(ArrayEltTy);
+ uint64_t ElementOffset = DL.getTypeAllocSizeInBits(ArrayEltTy);
+ uint64_t ElementSizeBits = DL.getTypeSizeInBits(ArrayEltTy);
uint64_t Shift;
- if (DL->isBigEndian())
+ if (DL.isBigEndian())
Shift = AllocaSizeBits-ElementOffset;
else
Shift = 0;
@@ -2441,7 +2451,7 @@ SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
}
new StoreInst(EltVal, DestField, SI);
- if (DL->isBigEndian())
+ if (DL.isBigEndian())
Shift -= ElementOffset;
else
Shift += ElementOffset;
@@ -2459,7 +2469,8 @@ SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
// Extract each element out of the NewElts according to its structure offset
// and form the result value.
Type *AllocaEltTy = AI->getAllocatedType();
- uint64_t AllocaSizeBits = DL->getTypeAllocSizeInBits(AllocaEltTy);
+ const DataLayout &DL = LI->getModule()->getDataLayout();
+ uint64_t AllocaSizeBits = DL.getTypeAllocSizeInBits(AllocaEltTy);
DEBUG(dbgs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI
<< '\n');
@@ -2469,10 +2480,10 @@ SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
const StructLayout *Layout = nullptr;
uint64_t ArrayEltBitOffset = 0;
if (StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
- Layout = DL->getStructLayout(EltSTy);
+ Layout = DL.getStructLayout(EltSTy);
} else {
Type *ArrayEltTy = cast<ArrayType>(AllocaEltTy)->getElementType();
- ArrayEltBitOffset = DL->getTypeAllocSizeInBits(ArrayEltTy);
+ ArrayEltBitOffset = DL.getTypeAllocSizeInBits(ArrayEltTy);
}
Value *ResultVal =
@@ -2484,7 +2495,7 @@ SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
Value *SrcField = NewElts[i];
Type *FieldTy =
cast<PointerType>(SrcField->getType())->getElementType();
- uint64_t FieldSizeBits = DL->getTypeSizeInBits(FieldTy);
+ uint64_t FieldSizeBits = DL.getTypeSizeInBits(FieldTy);
// Ignore zero sized fields like {}, they obviously contain no data.
if (FieldSizeBits == 0) continue;
@@ -2515,7 +2526,7 @@ SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
else // Array case.
Shift = i*ArrayEltBitOffset;
- if (DL->isBigEndian())
+ if (DL.isBigEndian())
Shift = AllocaSizeBits-Shift-FieldIntTy->getBitWidth();
if (Shift) {
@@ -2532,7 +2543,7 @@ SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
}
// Handle tail padding by truncating the result
- if (DL->getTypeSizeInBits(LI->getType()) != AllocaSizeBits)
+ if (DL.getTypeSizeInBits(LI->getType()) != AllocaSizeBits)
ResultVal = new TruncInst(ResultVal, LI->getType(), "", LI);
LI->replaceAllUsesWith(ResultVal);
@@ -2589,13 +2600,15 @@ bool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) {
return false;
}
+ const DataLayout &DL = AI->getModule()->getDataLayout();
+
// Okay, we know all the users are promotable. If the aggregate is a memcpy
// source and destination, we have to be careful. In particular, the memcpy
// could be moving around elements that live in structure padding of the LLVM
// types, but may actually be used. In these cases, we refuse to promote the
// struct.
if (Info.isMemCpySrc && Info.isMemCpyDst &&
- HasPadding(AI->getAllocatedType(), *DL))
+ HasPadding(AI->getAllocatedType(), DL))
return false;
// If the alloca never has an access to just *part* of it, but is accessed
diff --git a/contrib/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/contrib/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index 6036c09..d55dc6a 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -165,7 +165,7 @@ private:
void gather(Instruction *, const ValueVector &);
bool canTransferMetadata(unsigned Kind);
void transferMetadata(Instruction *, const ValueVector &);
- bool getVectorLayout(Type *, unsigned, VectorLayout &);
+ bool getVectorLayout(Type *, unsigned, VectorLayout &, const DataLayout &);
bool finish();
template<typename T> bool splitBinary(Instruction &, const T &);
@@ -173,7 +173,6 @@ private:
ScatterMap Scattered;
GatherList Gathered;
unsigned ParallelLoopAccessMDKind;
- const DataLayout *DL;
bool ScalarizeLoadStore;
};
@@ -214,7 +213,7 @@ Value *Scatterer::operator[](unsigned I) {
CV[0] = Builder.CreateBitCast(V, Ty, V->getName() + ".i0");
}
if (I != 0)
- CV[I] = Builder.CreateConstGEP1_32(CV[0], I,
+ CV[I] = Builder.CreateConstGEP1_32(nullptr, CV[0], I,
V->getName() + ".i" + Twine(I));
} else {
// Search through a chain of InsertElementInsts looking for element I.
@@ -248,8 +247,6 @@ bool Scalarizer::doInitialization(Module &M) {
}
bool Scalarizer::runOnFunction(Function &F) {
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) {
BasicBlock *BB = BBI;
for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) {
@@ -345,10 +342,7 @@ void Scalarizer::transferMetadata(Instruction *Op, const ValueVector &CV) {
// Try to fill in Layout from Ty, returning true on success. Alignment is
// the alignment of the vector, or 0 if the ABI default should be used.
bool Scalarizer::getVectorLayout(Type *Ty, unsigned Alignment,
- VectorLayout &Layout) {
- if (!DL)
- return false;
-
+ VectorLayout &Layout, const DataLayout &DL) {
// Make sure we're dealing with a vector.
Layout.VecTy = dyn_cast<VectorType>(Ty);
if (!Layout.VecTy)
@@ -356,15 +350,15 @@ bool Scalarizer::getVectorLayout(Type *Ty, unsigned Alignment,
// Check that we're dealing with full-byte elements.
Layout.ElemTy = Layout.VecTy->getElementType();
- if (DL->getTypeSizeInBits(Layout.ElemTy) !=
- DL->getTypeStoreSizeInBits(Layout.ElemTy))
+ if (DL.getTypeSizeInBits(Layout.ElemTy) !=
+ DL.getTypeStoreSizeInBits(Layout.ElemTy))
return false;
if (Alignment)
Layout.VecAlign = Alignment;
else
- Layout.VecAlign = DL->getABITypeAlignment(Layout.VecTy);
- Layout.ElemSize = DL->getTypeStoreSize(Layout.ElemTy);
+ Layout.VecAlign = DL.getABITypeAlignment(Layout.VecTy);
+ Layout.ElemSize = DL.getTypeStoreSize(Layout.ElemTy);
return true;
}
@@ -456,7 +450,7 @@ bool Scalarizer::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
Indices.resize(NumIndices);
for (unsigned J = 0; J < NumIndices; ++J)
Indices[J] = Ops[J][I];
- Res[I] = Builder.CreateGEP(Base[I], Indices,
+ Res[I] = Builder.CreateGEP(GEPI.getSourceElementType(), Base[I], Indices,
GEPI.getName() + ".i" + Twine(I));
if (GEPI.isInBounds())
if (GetElementPtrInst *NewGEPI = dyn_cast<GetElementPtrInst>(Res[I]))
@@ -595,7 +589,8 @@ bool Scalarizer::visitLoadInst(LoadInst &LI) {
return false;
VectorLayout Layout;
- if (!getVectorLayout(LI.getType(), LI.getAlignment(), Layout))
+ if (!getVectorLayout(LI.getType(), LI.getAlignment(), Layout,
+ LI.getModule()->getDataLayout()))
return false;
unsigned NumElems = Layout.VecTy->getNumElements();
@@ -619,7 +614,8 @@ bool Scalarizer::visitStoreInst(StoreInst &SI) {
VectorLayout Layout;
Value *FullValue = SI.getValueOperand();
- if (!getVectorLayout(FullValue->getType(), SI.getAlignment(), Layout))
+ if (!getVectorLayout(FullValue->getType(), SI.getAlignment(), Layout,
+ SI.getModule()->getDataLayout()))
return false;
unsigned NumElems = Layout.VecTy->getNumElements();
diff --git a/contrib/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/contrib/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index 6157746..3a782d1 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -160,6 +160,7 @@
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
@@ -167,6 +168,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/IR/IRBuilder.h"
@@ -177,6 +179,13 @@ static cl::opt<bool> DisableSeparateConstOffsetFromGEP(
"disable-separate-const-offset-from-gep", cl::init(false),
cl::desc("Do not separate the constant offset from a GEP instruction"),
cl::Hidden);
+// Setting this flag may emit false positives when the input module already
+// contains dead instructions. Therefore, we set it only in unit tests that are
+// free of dead code.
+static cl::opt<bool>
+ VerifyNoDeadCode("reassociate-geps-verify-no-dead-code", cl::init(false),
+ cl::desc("Verify this pass produces no dead code"),
+ cl::Hidden);
namespace {
@@ -194,23 +203,26 @@ namespace {
/// 5); nor can we transform (3 * (a + 5)) to (3 * a + 5), however in this case,
/// -instcombine probably already optimized (3 * (a + 5)) to (3 * a + 15).
class ConstantOffsetExtractor {
- public:
+public:
/// Extracts a constant offset from the given GEP index. It returns the
/// new index representing the remainder (equal to the original index minus
/// the constant offset), or nullptr if we cannot extract a constant offset.
- /// \p Idx The given GEP index
- /// \p DL The datalayout of the module
- /// \p GEP The given GEP
- static Value *Extract(Value *Idx, const DataLayout *DL,
- GetElementPtrInst *GEP);
+ /// \p Idx The given GEP index
+ /// \p GEP The given GEP
+ /// \p UserChainTail Outputs the tail of UserChain so that we can
+ /// garbage-collect unused instructions in UserChain.
+ static Value *Extract(Value *Idx, GetElementPtrInst *GEP,
+ User *&UserChainTail, const DominatorTree *DT);
/// Looks for a constant offset from the given GEP index without extracting
/// it. It returns the numeric value of the extracted constant offset (0 if
/// failed). The meaning of the arguments are the same as Extract.
- static int64_t Find(Value *Idx, const DataLayout *DL, GetElementPtrInst *GEP);
+ static int64_t Find(Value *Idx, GetElementPtrInst *GEP,
+ const DominatorTree *DT);
- private:
- ConstantOffsetExtractor(const DataLayout *Layout, Instruction *InsertionPt)
- : DL(Layout), IP(InsertionPt) {}
+private:
+ ConstantOffsetExtractor(Instruction *InsertionPt, const DominatorTree *DT)
+ : IP(InsertionPt), DL(InsertionPt->getModule()->getDataLayout()), DT(DT) {
+ }
/// Searches the expression that computes V for a non-zero constant C s.t.
/// V can be reassociated into the form V' + C. If the searching is
/// successful, returns C and update UserChain as a def-use chain from C to V;
@@ -268,12 +280,6 @@ class ConstantOffsetExtractor {
/// returns "sext i32 (zext i16 V to i32) to i64".
Value *applyExts(Value *V);
- /// Returns true if LHS and RHS have no bits in common, i.e., LHS | RHS == 0.
- bool NoCommonBits(Value *LHS, Value *RHS) const;
- /// Computes which bits are known to be one or zero.
- /// \p KnownOne Mask of all bits that are known to be one.
- /// \p KnownZero Mask of all bits that are known to be zero.
- void ComputeKnownBits(Value *V, APInt &KnownOne, APInt &KnownZero) const;
/// A helper function that returns whether we can trace into the operands
/// of binary operator BO for a constant offset.
///
@@ -294,39 +300,36 @@ class ConstantOffsetExtractor {
/// A data structure used in rebuildWithoutConstOffset. Contains all
/// sext/zext instructions along UserChain.
SmallVector<CastInst *, 16> ExtInsts;
- /// The data layout of the module. Used in ComputeKnownBits.
- const DataLayout *DL;
Instruction *IP; /// Insertion position of cloned instructions.
+ const DataLayout &DL;
+ const DominatorTree *DT;
};
/// \brief A pass that tries to split every GEP in the function into a variadic
/// base and a constant offset. It is a FunctionPass because searching for the
/// constant offset may inspect other basic blocks.
class SeparateConstOffsetFromGEP : public FunctionPass {
- public:
+public:
static char ID;
SeparateConstOffsetFromGEP(const TargetMachine *TM = nullptr,
bool LowerGEP = false)
- : FunctionPass(ID), TM(TM), LowerGEP(LowerGEP) {
+ : FunctionPass(ID), DL(nullptr), DT(nullptr), TM(TM), LowerGEP(LowerGEP) {
initializeSeparateConstOffsetFromGEPPass(*PassRegistry::getPassRegistry());
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<DataLayoutPass>();
- AU.addRequired<TargetTransformInfo>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.setPreservesCFG();
}
bool doInitialization(Module &M) override {
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- if (DLP == nullptr)
- report_fatal_error("data layout missing");
- DL = &DLP->getDataLayout();
+ DL = &M.getDataLayout();
return false;
}
-
bool runOnFunction(Function &F) override;
- private:
+private:
/// Tries to split the given GEP into a variadic base and a constant offset,
/// and returns true if the splitting succeeds.
bool splitGEP(GetElementPtrInst *GEP);
@@ -370,8 +373,11 @@ class SeparateConstOffsetFromGEP : public FunctionPass {
///
/// Verified in @i32_add in split-gep.ll
bool canonicalizeArrayIndicesToPointerSize(GetElementPtrInst *GEP);
+ /// Verify F is free of dead code.
+ void verifyNoDeadCode(Function &F);
const DataLayout *DL;
+ const DominatorTree *DT;
const TargetMachine *TM;
/// Whether to lower a GEP with multiple indices into arithmetic operations or
/// multiple GEPs with a single index.
@@ -384,8 +390,8 @@ INITIALIZE_PASS_BEGIN(
SeparateConstOffsetFromGEP, "separate-const-offset-from-gep",
"Split GEPs to a variadic base and a constant offset for better CSE", false,
false)
-INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
-INITIALIZE_PASS_DEPENDENCY(DataLayoutPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(
SeparateConstOffsetFromGEP, "separate-const-offset-from-gep",
"Split GEPs to a variadic base and a constant offset for better CSE", false,
@@ -413,7 +419,8 @@ bool ConstantOffsetExtractor::CanTraceInto(bool SignExtended,
Value *LHS = BO->getOperand(0), *RHS = BO->getOperand(1);
// Do not trace into "or" unless it is equivalent to "add". If LHS and RHS
// don't have common bits, (LHS | RHS) is equivalent to (LHS + RHS).
- if (BO->getOpcode() == Instruction::Or && !NoCommonBits(LHS, RHS))
+ if (BO->getOpcode() == Instruction::Or &&
+ !haveNoCommonBitsSet(LHS, RHS, DL, nullptr, BO, DT))
return false;
// In addition, tracing into BO requires that its surrounding s/zext (if
@@ -498,9 +505,8 @@ APInt ConstantOffsetExtractor::find(Value *V, bool SignExtended,
ConstantOffset = CI->getValue();
} else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V)) {
// Trace into subexpressions for more hoisting opportunities.
- if (CanTraceInto(SignExtended, ZeroExtended, BO, NonNegative)) {
+ if (CanTraceInto(SignExtended, ZeroExtended, BO, NonNegative))
ConstantOffset = findInEitherOperand(BO, SignExtended, ZeroExtended);
- }
} else if (isa<SExtInst>(V)) {
ConstantOffset = find(U->getOperand(0), /* SignExtended */ true,
ZeroExtended, NonNegative).sext(BitWidth);
@@ -597,6 +603,11 @@ Value *ConstantOffsetExtractor::removeConstOffset(unsigned ChainIndex) {
}
BinaryOperator *BO = cast<BinaryOperator>(UserChain[ChainIndex]);
+ assert(BO->getNumUses() <= 1 &&
+ "distributeExtsAndCloneChain clones each BinaryOperator in "
+ "UserChain, so no one should be used more than "
+ "once");
+
unsigned OpNo = (BO->getOperand(0) == UserChain[ChainIndex - 1] ? 0 : 1);
assert(BO->getOperand(OpNo) == UserChain[ChainIndex - 1]);
Value *NextInChain = removeConstOffset(ChainIndex - 1);
@@ -609,6 +620,7 @@ Value *ConstantOffsetExtractor::removeConstOffset(unsigned ChainIndex) {
return TheOther;
}
+ BinaryOperator::BinaryOps NewOp = BO->getOpcode();
if (BO->getOpcode() == Instruction::Or) {
// Rebuild "or" as "add", because "or" may be invalid for the new
// epxression.
@@ -623,68 +635,46 @@ Value *ConstantOffsetExtractor::removeConstOffset(unsigned ChainIndex) {
//
// Replacing the "or" with "add" is fine, because
// a | (b + 5) = a + (b + 5) = (a + b) + 5
- if (OpNo == 0) {
- return BinaryOperator::CreateAdd(NextInChain, TheOther, BO->getName(),
- IP);
- } else {
- return BinaryOperator::CreateAdd(TheOther, NextInChain, BO->getName(),
- IP);
- }
+ NewOp = Instruction::Add;
}
- // We can reuse BO in this case, because the new expression shares the same
- // instruction type and BO is used at most once.
- assert(BO->getNumUses() <= 1 &&
- "distributeExtsAndCloneChain clones each BinaryOperator in "
- "UserChain, so no one should be used more than "
- "once");
- BO->setOperand(OpNo, NextInChain);
- BO->setHasNoSignedWrap(false);
- BO->setHasNoUnsignedWrap(false);
- // Make sure it appears after all instructions we've inserted so far.
- BO->moveBefore(IP);
- return BO;
+ BinaryOperator *NewBO;
+ if (OpNo == 0) {
+ NewBO = BinaryOperator::Create(NewOp, NextInChain, TheOther, "", IP);
+ } else {
+ NewBO = BinaryOperator::Create(NewOp, TheOther, NextInChain, "", IP);
+ }
+ NewBO->takeName(BO);
+ return NewBO;
}
-Value *ConstantOffsetExtractor::Extract(Value *Idx, const DataLayout *DL,
- GetElementPtrInst *GEP) {
- ConstantOffsetExtractor Extractor(DL, GEP);
+Value *ConstantOffsetExtractor::Extract(Value *Idx, GetElementPtrInst *GEP,
+ User *&UserChainTail,
+ const DominatorTree *DT) {
+ ConstantOffsetExtractor Extractor(GEP, DT);
// Find a non-zero constant offset first.
APInt ConstantOffset =
Extractor.find(Idx, /* SignExtended */ false, /* ZeroExtended */ false,
GEP->isInBounds());
- if (ConstantOffset == 0)
+ if (ConstantOffset == 0) {
+ UserChainTail = nullptr;
return nullptr;
+ }
// Separates the constant offset from the GEP index.
- return Extractor.rebuildWithoutConstOffset();
+ Value *IdxWithoutConstOffset = Extractor.rebuildWithoutConstOffset();
+ UserChainTail = Extractor.UserChain.back();
+ return IdxWithoutConstOffset;
}
-int64_t ConstantOffsetExtractor::Find(Value *Idx, const DataLayout *DL,
- GetElementPtrInst *GEP) {
+int64_t ConstantOffsetExtractor::Find(Value *Idx, GetElementPtrInst *GEP,
+ const DominatorTree *DT) {
// If Idx is an index of an inbound GEP, Idx is guaranteed to be non-negative.
- return ConstantOffsetExtractor(DL, GEP)
+ return ConstantOffsetExtractor(GEP, DT)
.find(Idx, /* SignExtended */ false, /* ZeroExtended */ false,
GEP->isInBounds())
.getSExtValue();
}
-void ConstantOffsetExtractor::ComputeKnownBits(Value *V, APInt &KnownOne,
- APInt &KnownZero) const {
- IntegerType *IT = cast<IntegerType>(V->getType());
- KnownOne = APInt(IT->getBitWidth(), 0);
- KnownZero = APInt(IT->getBitWidth(), 0);
- llvm::computeKnownBits(V, KnownZero, KnownOne, DL, 0);
-}
-
-bool ConstantOffsetExtractor::NoCommonBits(Value *LHS, Value *RHS) const {
- assert(LHS->getType() == RHS->getType() &&
- "LHS and RHS should have the same type");
- APInt LHSKnownOne, LHSKnownZero, RHSKnownOne, RHSKnownZero;
- ComputeKnownBits(LHS, LHSKnownOne, LHSKnownZero);
- ComputeKnownBits(RHS, RHSKnownOne, RHSKnownZero);
- return (LHSKnownZero | RHSKnownZero).isAllOnesValue();
-}
-
bool SeparateConstOffsetFromGEP::canonicalizeArrayIndicesToPointerSize(
GetElementPtrInst *GEP) {
bool Changed = false;
@@ -713,7 +703,7 @@ SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP,
if (isa<SequentialType>(*GTI)) {
// Tries to extract a constant offset from this GEP index.
int64_t ConstantOffset =
- ConstantOffsetExtractor::Find(GEP->getOperand(I), DL, GEP);
+ ConstantOffsetExtractor::Find(GEP->getOperand(I), GEP, DT);
if (ConstantOffset != 0) {
NeedsExtraction = true;
// A GEP may have multiple indices. We accumulate the extracted
@@ -770,14 +760,16 @@ void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(
}
}
// Create an ugly GEP with a single index for each index.
- ResultPtr = Builder.CreateGEP(ResultPtr, Idx, "uglygep");
+ ResultPtr =
+ Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Idx, "uglygep");
}
}
// Create a GEP with the constant offset index.
if (AccumulativeByteOffset != 0) {
Value *Offset = ConstantInt::get(IntPtrTy, AccumulativeByteOffset);
- ResultPtr = Builder.CreateGEP(ResultPtr, Offset, "uglygep");
+ ResultPtr =
+ Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Offset, "uglygep");
}
if (ResultPtr->getType() != Variadic->getType())
ResultPtr = Builder.CreateBitCast(ResultPtr, Variadic->getType());
@@ -857,7 +849,9 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
// of variable indices. Therefore, we don't check for addressing modes in that
// case.
if (!LowerGEP) {
- TargetTransformInfo &TTI = getAnalysis<TargetTransformInfo>();
+ TargetTransformInfo &TTI =
+ getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
+ *GEP->getParent()->getParent());
if (!TTI.isLegalAddressingMode(GEP->getType()->getElementType(),
/*BaseGV=*/nullptr, AccumulativeByteOffset,
/*HasBaseReg=*/true, /*Scale=*/0)) {
@@ -877,10 +871,17 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
if (isa<SequentialType>(*GTI)) {
// Splits this GEP index into a variadic part and a constant offset, and
// uses the variadic part as the new index.
+ Value *OldIdx = GEP->getOperand(I);
+ User *UserChainTail;
Value *NewIdx =
- ConstantOffsetExtractor::Extract(GEP->getOperand(I), DL, GEP);
+ ConstantOffsetExtractor::Extract(OldIdx, GEP, UserChainTail, DT);
if (NewIdx != nullptr) {
+ // Switches to the index with the constant offset removed.
GEP->setOperand(I, NewIdx);
+ // After switching to the new index, we can garbage-collect UserChain
+ // and the old index if they are not used.
+ RecursivelyDeleteTriviallyDeadInstructions(UserChainTail);
+ RecursivelyDeleteTriviallyDeadInstructions(OldIdx);
}
}
}
@@ -910,7 +911,7 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
if (LowerGEP) {
// As currently BasicAA does not analyze ptrtoint/inttoptr, do not lower to
// arithmetic operations if the target uses alias analysis in codegen.
- if (TM && TM->getSubtarget<TargetSubtargetInfo>().useAA())
+ if (TM && TM->getSubtargetImpl(*GEP->getParent()->getParent())->useAA())
lowerToSingleIndexGEPs(GEP, AccumulativeByteOffset);
else
lowerToArithmetics(GEP, AccumulativeByteOffset);
@@ -962,8 +963,9 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
// Very likely. As long as %gep is natually aligned, the byte offset we
// extracted should be a multiple of sizeof(*%gep).
int64_t Index = AccumulativeByteOffset / ElementTypeSizeOfGEP;
- NewGEP = GetElementPtrInst::Create(
- NewGEP, ConstantInt::get(IntPtrTy, Index, true), GEP->getName(), GEP);
+ NewGEP = GetElementPtrInst::Create(GEP->getResultElementType(), NewGEP,
+ ConstantInt::get(IntPtrTy, Index, true),
+ GEP->getName(), GEP);
} else {
// Unlikely but possible. For example,
// #pragma pack(1)
@@ -983,8 +985,9 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
GEP->getPointerAddressSpace());
NewGEP = new BitCastInst(NewGEP, I8PtrTy, "", GEP);
NewGEP = GetElementPtrInst::Create(
- NewGEP, ConstantInt::get(IntPtrTy, AccumulativeByteOffset, true),
- "uglygep", GEP);
+ Type::getInt8Ty(GEP->getContext()), NewGEP,
+ ConstantInt::get(IntPtrTy, AccumulativeByteOffset, true), "uglygep",
+ GEP);
if (GEP->getType() != I8PtrTy)
NewGEP = new BitCastInst(NewGEP, GEP->getType(), GEP->getName(), GEP);
}
@@ -996,9 +999,14 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
}
bool SeparateConstOffsetFromGEP::runOnFunction(Function &F) {
+ if (skipOptnoneFunction(F))
+ return false;
+
if (DisableSeparateConstOffsetFromGEP)
return false;
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+
bool Changed = false;
for (Function::iterator B = F.begin(), BE = F.end(); B != BE; ++B) {
for (BasicBlock::iterator I = B->begin(), IE = B->end(); I != IE; ) {
@@ -1009,5 +1017,22 @@ bool SeparateConstOffsetFromGEP::runOnFunction(Function &F) {
// already.
}
}
+
+ if (VerifyNoDeadCode)
+ verifyNoDeadCode(F);
+
return Changed;
}
+
+void SeparateConstOffsetFromGEP::verifyNoDeadCode(Function &F) {
+ for (auto &B : F) {
+ for (auto &I : B) {
+ if (isInstructionTriviallyDead(&I)) {
+ std::string ErrMessage;
+ raw_string_ostream RSO(ErrMessage);
+ RSO << "Dead instruction detected!\n" << I << "\n";
+ llvm_unreachable(RSO.str().c_str());
+ }
+ }
+ }
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 2e317f9..8566cd9 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -21,7 +21,7 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/SimplifyCFG.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -37,6 +37,7 @@
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Scalar.h"
using namespace llvm;
#define DEBUG_TYPE "simplifycfg"
@@ -47,36 +48,6 @@ UserBonusInstThreshold("bonus-inst-threshold", cl::Hidden, cl::init(1),
STATISTIC(NumSimpl, "Number of blocks simplified");
-namespace {
-struct CFGSimplifyPass : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
- unsigned BonusInstThreshold;
- CFGSimplifyPass(int T = -1) : FunctionPass(ID) {
- BonusInstThreshold = (T == -1) ? UserBonusInstThreshold : unsigned(T);
- initializeCFGSimplifyPassPass(*PassRegistry::getPassRegistry());
- }
- bool runOnFunction(Function &F) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<TargetTransformInfo>();
- }
-};
-}
-
-char CFGSimplifyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(CFGSimplifyPass, "simplifycfg", "Simplify the CFG", false,
- false)
-INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_END(CFGSimplifyPass, "simplifycfg", "Simplify the CFG", false,
- false)
-
-// Public interface to the CFGSimplification pass
-FunctionPass *llvm::createCFGSimplificationPass(int Threshold) {
- return new CFGSimplifyPass(Threshold);
-}
-
/// mergeEmptyReturnBlocks - If we have more than one empty (other than phi
/// node) return blocks, merge them together to promote recursive block merging.
static bool mergeEmptyReturnBlocks(Function &F) {
@@ -156,7 +127,7 @@ static bool mergeEmptyReturnBlocks(Function &F) {
/// iterativelySimplifyCFG - Call SimplifyCFG on all the blocks in the function,
/// iterating until no more changes are made.
static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
- const DataLayout *DL, AssumptionCache *AC,
+ AssumptionCache *AC,
unsigned BonusInstThreshold) {
bool Changed = false;
bool LocalChange = true;
@@ -166,7 +137,7 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
// Loop over all of the basic blocks and remove them if they are unneeded...
//
for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) {
- if (SimplifyCFG(BBIt++, TTI, BonusInstThreshold, DL, AC)) {
+ if (SimplifyCFG(BBIt++, TTI, BonusInstThreshold, AC)) {
LocalChange = true;
++NumSimpl;
}
@@ -176,21 +147,11 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
return Changed;
}
-// It is possible that we may require multiple passes over the code to fully
-// simplify the CFG.
-//
-bool CFGSimplifyPass::runOnFunction(Function &F) {
- if (skipOptnoneFunction(F))
- return false;
-
- AssumptionCache *AC =
- &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- const TargetTransformInfo &TTI = getAnalysis<TargetTransformInfo>();
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
+static bool simplifyFunctionCFG(Function &F, const TargetTransformInfo &TTI,
+ AssumptionCache *AC, int BonusInstThreshold) {
bool EverChanged = removeUnreachableBlocks(F);
EverChanged |= mergeEmptyReturnBlocks(F);
- EverChanged |= iterativelySimplifyCFG(F, TTI, DL, AC, BonusInstThreshold);
+ EverChanged |= iterativelySimplifyCFG(F, TTI, AC, BonusInstThreshold);
// If neither pass changed anything, we're done.
if (!EverChanged) return false;
@@ -204,9 +165,66 @@ bool CFGSimplifyPass::runOnFunction(Function &F) {
return true;
do {
- EverChanged = iterativelySimplifyCFG(F, TTI, DL, AC, BonusInstThreshold);
+ EverChanged = iterativelySimplifyCFG(F, TTI, AC, BonusInstThreshold);
EverChanged |= removeUnreachableBlocks(F);
} while (EverChanged);
return true;
}
+
+SimplifyCFGPass::SimplifyCFGPass()
+ : BonusInstThreshold(UserBonusInstThreshold) {}
+
+SimplifyCFGPass::SimplifyCFGPass(int BonusInstThreshold)
+ : BonusInstThreshold(BonusInstThreshold) {}
+
+PreservedAnalyses SimplifyCFGPass::run(Function &F,
+ AnalysisManager<Function> *AM) {
+ auto &TTI = AM->getResult<TargetIRAnalysis>(F);
+ auto &AC = AM->getResult<AssumptionAnalysis>(F);
+
+ if (!simplifyFunctionCFG(F, TTI, &AC, BonusInstThreshold))
+ return PreservedAnalyses::none();
+
+ return PreservedAnalyses::all();
+}
+
+namespace {
+struct CFGSimplifyPass : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ unsigned BonusInstThreshold;
+ CFGSimplifyPass(int T = -1) : FunctionPass(ID) {
+ BonusInstThreshold = (T == -1) ? UserBonusInstThreshold : unsigned(T);
+ initializeCFGSimplifyPassPass(*PassRegistry::getPassRegistry());
+ }
+ bool runOnFunction(Function &F) override {
+ if (skipOptnoneFunction(F))
+ return false;
+
+ AssumptionCache *AC =
+ &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ const TargetTransformInfo &TTI =
+ getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ return simplifyFunctionCFG(F, TTI, AC, BonusInstThreshold);
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ }
+};
+}
+
+char CFGSimplifyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(CFGSimplifyPass, "simplifycfg", "Simplify the CFG", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_END(CFGSimplifyPass, "simplifycfg", "Simplify the CFG", false,
+ false)
+
+// Public interface to the CFGSimplification pass
+FunctionPass *llvm::createCFGSimplificationPass(int Threshold) {
+ return new CFGSimplifyPass(Threshold);
+}
+
diff --git a/contrib/llvm/lib/Transforms/Scalar/Sink.cpp b/contrib/llvm/lib/Transforms/Scalar/Sink.cpp
index 903b675..b169d56 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Sink.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Sink.cpp
@@ -21,6 +21,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -35,7 +36,6 @@ namespace {
DominatorTree *DT;
LoopInfo *LI;
AliasAnalysis *AA;
- const DataLayout *DL;
public:
static char ID; // Pass identification
@@ -50,9 +50,9 @@ namespace {
FunctionPass::getAnalysisUsage(AU);
AU.addRequired<AliasAnalysis>();
AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<LoopInfo>();
+ AU.addRequired<LoopInfoWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<LoopInfo>();
+ AU.addPreserved<LoopInfoWrapperPass>();
}
private:
bool ProcessBlock(BasicBlock &BB);
@@ -64,7 +64,7 @@ namespace {
char Sinking::ID = 0;
INITIALIZE_PASS_BEGIN(Sinking, "sink", "Code sinking", false, false)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_END(Sinking, "sink", "Code sinking", false, false)
@@ -98,10 +98,8 @@ bool Sinking::AllUsesDominatedByBlock(Instruction *Inst,
bool Sinking::runOnFunction(Function &F) {
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- LI = &getAnalysis<LoopInfo>();
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
AA = &getAnalysis<AliasAnalysis>();
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
bool MadeChange, EverMadeChange = false;
@@ -196,7 +194,7 @@ bool Sinking::IsAcceptableTarget(Instruction *Inst,
if (SuccToSinkTo->getUniquePredecessor() != Inst->getParent()) {
// We cannot sink a load across a critical edge - there may be stores in
// other code paths.
- if (!isSafeToSpeculativelyExecute(Inst, DL))
+ if (!isSafeToSpeculativelyExecute(Inst))
return false;
// We don't want to sink across a critical edge if we don't dominate the
diff --git a/contrib/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp b/contrib/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
new file mode 100644
index 0000000..ff3f00a
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
@@ -0,0 +1,243 @@
+//===- SpeculativeExecution.cpp ---------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass hoists instructions to enable speculative execution on
+// targets where branches are expensive. This is aimed at GPUs. It
+// currently works on simple if-then and if-then-else
+// patterns.
+//
+// Removing branches is not the only motivation for this
+// pass. E.g. consider this code and assume that there is no
+// addressing mode for multiplying by sizeof(*a):
+//
+// if (b > 0)
+// c = a[i + 1]
+// if (d > 0)
+// e = a[i + 2]
+//
+// turns into
+//
+// p = &a[i + 1];
+// if (b > 0)
+// c = *p;
+// q = &a[i + 2];
+// if (d > 0)
+// e = *q;
+//
+// which could later be optimized to
+//
+// r = &a[i];
+// if (b > 0)
+// c = r[1];
+// if (d > 0)
+// e = r[2];
+//
+// Later passes sink back much of the speculated code that did not enable
+// further optimization.
+//
+// This pass is more aggressive than the function SpeculativeyExecuteBB in
+// SimplifyCFG. SimplifyCFG will not speculate if no selects are introduced and
+// it will speculate at most one instruction. It also will not speculate if
+// there is a value defined in the if-block that is only used in the then-block.
+// These restrictions make sense since the speculation in SimplifyCFG seems
+// aimed at introducing cheap selects, while this pass is intended to do more
+// aggressive speculation while counting on later passes to either capitalize on
+// that or clean it up.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "speculative-execution"
+
+// The risk that speculation will not pay off increases with the
+// number of instructions speculated, so we put a limit on that.
+static cl::opt<unsigned> SpecExecMaxSpeculationCost(
+ "spec-exec-max-speculation-cost", cl::init(7), cl::Hidden,
+ cl::desc("Speculative execution is not applied to basic blocks where "
+ "the cost of the instructions to speculatively execute "
+ "exceeds this limit."));
+
+// Speculating just a few instructions from a larger block tends not
+// to be profitable and this limit prevents that. A reason for that is
+// that small basic blocks are more likely to be candidates for
+// further optimization.
+static cl::opt<unsigned> SpecExecMaxNotHoisted(
+ "spec-exec-max-not-hoisted", cl::init(5), cl::Hidden,
+ cl::desc("Speculative execution is not applied to basic blocks where the "
+ "number of instructions that would not be speculatively executed "
+ "exceeds this limit."));
+
+namespace {
+class SpeculativeExecution : public FunctionPass {
+ public:
+ static char ID;
+ SpeculativeExecution(): FunctionPass(ID) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool runOnFunction(Function &F) override;
+
+ private:
+ bool runOnBasicBlock(BasicBlock &B);
+ bool considerHoistingFromTo(BasicBlock &FromBlock, BasicBlock &ToBlock);
+
+ const TargetTransformInfo *TTI = nullptr;
+};
+} // namespace
+
+char SpeculativeExecution::ID = 0;
+INITIALIZE_PASS_BEGIN(SpeculativeExecution, "speculative-execution",
+ "Speculatively execute instructions", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_END(SpeculativeExecution, "speculative-execution",
+ "Speculatively execute instructions", false, false)
+
+void SpeculativeExecution::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+}
+
+bool SpeculativeExecution::runOnFunction(Function &F) {
+ if (skipOptnoneFunction(F))
+ return false;
+
+ TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+
+ bool Changed = false;
+ for (auto& B : F) {
+ Changed |= runOnBasicBlock(B);
+ }
+ return Changed;
+}
+
+bool SpeculativeExecution::runOnBasicBlock(BasicBlock &B) {
+ BranchInst *BI = dyn_cast<BranchInst>(B.getTerminator());
+ if (BI == nullptr)
+ return false;
+
+ if (BI->getNumSuccessors() != 2)
+ return false;
+ BasicBlock &Succ0 = *BI->getSuccessor(0);
+ BasicBlock &Succ1 = *BI->getSuccessor(1);
+
+ if (&B == &Succ0 || &B == &Succ1 || &Succ0 == &Succ1) {
+ return false;
+ }
+
+ // Hoist from if-then (triangle).
+ if (Succ0.getSinglePredecessor() != nullptr &&
+ Succ0.getSingleSuccessor() == &Succ1) {
+ return considerHoistingFromTo(Succ0, B);
+ }
+
+ // Hoist from if-else (triangle).
+ if (Succ1.getSinglePredecessor() != nullptr &&
+ Succ1.getSingleSuccessor() == &Succ0) {
+ return considerHoistingFromTo(Succ1, B);
+ }
+
+ // Hoist from if-then-else (diamond), but only if it is equivalent to
+ // an if-else or if-then due to one of the branches doing nothing.
+ if (Succ0.getSinglePredecessor() != nullptr &&
+ Succ1.getSinglePredecessor() != nullptr &&
+ Succ1.getSingleSuccessor() != nullptr &&
+ Succ1.getSingleSuccessor() != &B &&
+ Succ1.getSingleSuccessor() == Succ0.getSingleSuccessor()) {
+ // If a block has only one instruction, then that is a terminator
+ // instruction so that the block does nothing. This does happen.
+ if (Succ1.size() == 1) // equivalent to if-then
+ return considerHoistingFromTo(Succ0, B);
+ if (Succ0.size() == 1) // equivalent to if-else
+ return considerHoistingFromTo(Succ1, B);
+ }
+
+ return false;
+}
+
+static unsigned ComputeSpeculationCost(const Instruction *I,
+ const TargetTransformInfo &TTI) {
+ switch (Operator::getOpcode(I)) {
+ case Instruction::GetElementPtr:
+ case Instruction::Add:
+ case Instruction::Mul:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Select:
+ case Instruction::Shl:
+ case Instruction::Sub:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::Xor:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ return TTI.getUserCost(I);
+
+ default:
+ return UINT_MAX; // Disallow anything not whitelisted.
+ }
+}
+
+bool SpeculativeExecution::considerHoistingFromTo(BasicBlock &FromBlock,
+ BasicBlock &ToBlock) {
+ SmallSet<const Instruction *, 8> NotHoisted;
+ const auto AllPrecedingUsesFromBlockHoisted = [&NotHoisted](User *U) {
+ for (Value* V : U->operand_values()) {
+ if (Instruction *I = dyn_cast<Instruction>(V)) {
+ if (NotHoisted.count(I) > 0)
+ return false;
+ }
+ }
+ return true;
+ };
+
+ unsigned TotalSpeculationCost = 0;
+ for (auto& I : FromBlock) {
+ const unsigned Cost = ComputeSpeculationCost(&I, *TTI);
+ if (Cost != UINT_MAX && isSafeToSpeculativelyExecute(&I) &&
+ AllPrecedingUsesFromBlockHoisted(&I)) {
+ TotalSpeculationCost += Cost;
+ if (TotalSpeculationCost > SpecExecMaxSpeculationCost)
+ return false; // too much to hoist
+ } else {
+ NotHoisted.insert(&I);
+ if (NotHoisted.size() > SpecExecMaxNotHoisted)
+ return false; // too much left behind
+ }
+ }
+
+ if (TotalSpeculationCost == 0)
+ return false; // nothing to hoist
+
+ for (auto I = FromBlock.begin(); I != FromBlock.end();) {
+ // We have to increment I before moving Current as moving Current
+ // changes the list that I is iterating through.
+ auto Current = I;
+ ++I;
+ if (!NotHoisted.count(Current)) {
+ Current->moveBefore(ToBlock.getTerminator());
+ }
+ }
+ return true;
+}
+
+namespace llvm {
+
+FunctionPass *createSpeculativeExecutionPass() {
+ return new SpeculativeExecution();
+}
+
+} // namespace llvm
diff --git a/contrib/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/contrib/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
new file mode 100644
index 0000000..453503a
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
@@ -0,0 +1,710 @@
+//===-- StraightLineStrengthReduce.cpp - ------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements straight-line strength reduction (SLSR). Unlike loop
+// strength reduction, this algorithm is designed to reduce arithmetic
+// redundancy in straight-line code instead of loops. It has proven to be
+// effective in simplifying arithmetic statements derived from an unrolled loop.
+// It can also simplify the logic of SeparateConstOffsetFromGEP.
+//
+// There are many optimizations we can perform in the domain of SLSR. This file
+// for now contains only an initial step. Specifically, we look for strength
+// reduction candidates in the following forms:
+//
+// Form 1: B + i * S
+// Form 2: (B + i) * S
+// Form 3: &B[i * S]
+//
+// where S is an integer variable, and i is a constant integer. If we found two
+// candidates S1 and S2 in the same form and S1 dominates S2, we may rewrite S2
+// in a simpler way with respect to S1. For example,
+//
+// S1: X = B + i * S
+// S2: Y = B + i' * S => X + (i' - i) * S
+//
+// S1: X = (B + i) * S
+// S2: Y = (B + i') * S => X + (i' - i) * S
+//
+// S1: X = &B[i * S]
+// S2: Y = &B[i' * S] => &X[(i' - i) * S]
+//
+// Note: (i' - i) * S is folded to the extent possible.
+//
+// This rewriting is in general a good idea. The code patterns we focus on
+// usually come from loop unrolling, so (i' - i) * S is likely the same
+// across iterations and can be reused. When that happens, the optimized form
+// takes only one add starting from the second iteration.
+//
+// When such rewriting is possible, we call S1 a "basis" of S2. When S2 has
+// multiple bases, we choose to rewrite S2 with respect to its "immediate"
+// basis, the basis that is the closest ancestor in the dominator tree.
+//
+// TODO:
+//
+// - Floating point arithmetics when fast math is enabled.
+//
+// - SLSR may decrease ILP at the architecture level. Targets that are very
+// sensitive to ILP may want to disable it. Having SLSR to consider ILP is
+// left as future work.
+//
+// - When (i' - i) is constant but i and i' are not, we could still perform
+// SLSR.
+#include <vector>
+
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+
+using namespace llvm;
+using namespace PatternMatch;
+
+namespace {
+
+class StraightLineStrengthReduce : public FunctionPass {
+public:
+ // SLSR candidate. Such a candidate must be in one of the forms described in
+ // the header comments.
+ struct Candidate : public ilist_node<Candidate> {
+ enum Kind {
+ Invalid, // reserved for the default constructor
+ Add, // B + i * S
+ Mul, // (B + i) * S
+ GEP, // &B[..][i * S][..]
+ };
+
+ Candidate()
+ : CandidateKind(Invalid), Base(nullptr), Index(nullptr),
+ Stride(nullptr), Ins(nullptr), Basis(nullptr) {}
+ Candidate(Kind CT, const SCEV *B, ConstantInt *Idx, Value *S,
+ Instruction *I)
+ : CandidateKind(CT), Base(B), Index(Idx), Stride(S), Ins(I),
+ Basis(nullptr) {}
+ Kind CandidateKind;
+ const SCEV *Base;
+ // Note that Index and Stride of a GEP candidate do not necessarily have the
+ // same integer type. In that case, during rewriting, Stride will be
+ // sign-extended or truncated to Index's type.
+ ConstantInt *Index;
+ Value *Stride;
+ // The instruction this candidate corresponds to. It helps us to rewrite a
+ // candidate with respect to its immediate basis. Note that one instruction
+ // can correspond to multiple candidates depending on how you associate the
+ // expression. For instance,
+ //
+ // (a + 1) * (b + 2)
+ //
+ // can be treated as
+ //
+ // <Base: a, Index: 1, Stride: b + 2>
+ //
+ // or
+ //
+ // <Base: b, Index: 2, Stride: a + 1>
+ Instruction *Ins;
+ // Points to the immediate basis of this candidate, or nullptr if we cannot
+ // find any basis for this candidate.
+ Candidate *Basis;
+ };
+
+ static char ID;
+
+ StraightLineStrengthReduce()
+ : FunctionPass(ID), DL(nullptr), DT(nullptr), TTI(nullptr) {
+ initializeStraightLineStrengthReducePass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<ScalarEvolution>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ // We do not modify the shape of the CFG.
+ AU.setPreservesCFG();
+ }
+
+ bool doInitialization(Module &M) override {
+ DL = &M.getDataLayout();
+ return false;
+ }
+
+ bool runOnFunction(Function &F) override;
+
+private:
+ // Returns true if Basis is a basis for C, i.e., Basis dominates C and they
+ // share the same base and stride.
+ bool isBasisFor(const Candidate &Basis, const Candidate &C);
+ // Returns whether the candidate can be folded into an addressing mode.
+ bool isFoldable(const Candidate &C, TargetTransformInfo *TTI,
+ const DataLayout *DL);
+ // Returns true if C is already in a simplest form and not worth being
+ // rewritten.
+ bool isSimplestForm(const Candidate &C);
+ // Checks whether I is in a candidate form. If so, adds all the matching forms
+ // to Candidates, and tries to find the immediate basis for each of them.
+ void allocateCandidatesAndFindBasis(Instruction *I);
+ // Allocate candidates and find bases for Add instructions.
+ void allocateCandidatesAndFindBasisForAdd(Instruction *I);
+ // Given I = LHS + RHS, factors RHS into i * S and makes (LHS + i * S) a
+ // candidate.
+ void allocateCandidatesAndFindBasisForAdd(Value *LHS, Value *RHS,
+ Instruction *I);
+ // Allocate candidates and find bases for Mul instructions.
+ void allocateCandidatesAndFindBasisForMul(Instruction *I);
+ // Splits LHS into Base + Index and, if succeeds, calls
+ // allocateCandidatesAndFindBasis.
+ void allocateCandidatesAndFindBasisForMul(Value *LHS, Value *RHS,
+ Instruction *I);
+ // Allocate candidates and find bases for GetElementPtr instructions.
+ void allocateCandidatesAndFindBasisForGEP(GetElementPtrInst *GEP);
+ // A helper function that scales Idx with ElementSize before invoking
+ // allocateCandidatesAndFindBasis.
+ void allocateCandidatesAndFindBasisForGEP(const SCEV *B, ConstantInt *Idx,
+ Value *S, uint64_t ElementSize,
+ Instruction *I);
+ // Adds the given form <CT, B, Idx, S> to Candidates, and finds its immediate
+ // basis.
+ void allocateCandidatesAndFindBasis(Candidate::Kind CT, const SCEV *B,
+ ConstantInt *Idx, Value *S,
+ Instruction *I);
+ // Rewrites candidate C with respect to Basis.
+ void rewriteCandidateWithBasis(const Candidate &C, const Candidate &Basis);
+ // A helper function that factors ArrayIdx to a product of a stride and a
+ // constant index, and invokes allocateCandidatesAndFindBasis with the
+ // factorings.
+ void factorArrayIndex(Value *ArrayIdx, const SCEV *Base, uint64_t ElementSize,
+ GetElementPtrInst *GEP);
+ // Emit code that computes the "bump" from Basis to C. If the candidate is a
+ // GEP and the bump is not divisible by the element size of the GEP, this
+ // function sets the BumpWithUglyGEP flag to notify its caller to bump the
+ // basis using an ugly GEP.
+ static Value *emitBump(const Candidate &Basis, const Candidate &C,
+ IRBuilder<> &Builder, const DataLayout *DL,
+ bool &BumpWithUglyGEP);
+
+ const DataLayout *DL;
+ DominatorTree *DT;
+ ScalarEvolution *SE;
+ TargetTransformInfo *TTI;
+ ilist<Candidate> Candidates;
+ // Temporarily holds all instructions that are unlinked (but not deleted) by
+ // rewriteCandidateWithBasis. These instructions will be actually removed
+ // after all rewriting finishes.
+ std::vector<Instruction *> UnlinkedInstructions;
+};
+} // anonymous namespace
+
+char StraightLineStrengthReduce::ID = 0;
+INITIALIZE_PASS_BEGIN(StraightLineStrengthReduce, "slsr",
+ "Straight line strength reduction", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_END(StraightLineStrengthReduce, "slsr",
+ "Straight line strength reduction", false, false)
+
+FunctionPass *llvm::createStraightLineStrengthReducePass() {
+ return new StraightLineStrengthReduce();
+}
+
+bool StraightLineStrengthReduce::isBasisFor(const Candidate &Basis,
+ const Candidate &C) {
+ return (Basis.Ins != C.Ins && // skip the same instruction
+ // Basis must dominate C in order to rewrite C with respect to Basis.
+ DT->dominates(Basis.Ins->getParent(), C.Ins->getParent()) &&
+ // They share the same base, stride, and candidate kind.
+ Basis.Base == C.Base &&
+ Basis.Stride == C.Stride &&
+ Basis.CandidateKind == C.CandidateKind);
+}
+
+static bool isGEPFoldable(GetElementPtrInst *GEP,
+ const TargetTransformInfo *TTI,
+ const DataLayout *DL) {
+ GlobalVariable *BaseGV = nullptr;
+ int64_t BaseOffset = 0;
+ bool HasBaseReg = false;
+ int64_t Scale = 0;
+
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getPointerOperand()))
+ BaseGV = GV;
+ else
+ HasBaseReg = true;
+
+ gep_type_iterator GTI = gep_type_begin(GEP);
+ for (auto I = GEP->idx_begin(); I != GEP->idx_end(); ++I, ++GTI) {
+ if (isa<SequentialType>(*GTI)) {
+ int64_t ElementSize = DL->getTypeAllocSize(GTI.getIndexedType());
+ if (ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I)) {
+ BaseOffset += ConstIdx->getSExtValue() * ElementSize;
+ } else {
+ // Needs scale register.
+ if (Scale != 0) {
+ // No addressing mode takes two scale registers.
+ return false;
+ }
+ Scale = ElementSize;
+ }
+ } else {
+ StructType *STy = cast<StructType>(*GTI);
+ uint64_t Field = cast<ConstantInt>(*I)->getZExtValue();
+ BaseOffset += DL->getStructLayout(STy)->getElementOffset(Field);
+ }
+ }
+ return TTI->isLegalAddressingMode(GEP->getType()->getElementType(), BaseGV,
+ BaseOffset, HasBaseReg, Scale);
+}
+
+// Returns whether (Base + Index * Stride) can be folded to an addressing mode.
+static bool isAddFoldable(const SCEV *Base, ConstantInt *Index, Value *Stride,
+ TargetTransformInfo *TTI) {
+ return TTI->isLegalAddressingMode(Base->getType(), nullptr, 0, true,
+ Index->getSExtValue());
+}
+
+bool StraightLineStrengthReduce::isFoldable(const Candidate &C,
+ TargetTransformInfo *TTI,
+ const DataLayout *DL) {
+ if (C.CandidateKind == Candidate::Add)
+ return isAddFoldable(C.Base, C.Index, C.Stride, TTI);
+ if (C.CandidateKind == Candidate::GEP)
+ return isGEPFoldable(cast<GetElementPtrInst>(C.Ins), TTI, DL);
+ return false;
+}
+
+// Returns true if GEP has zero or one non-zero index.
+static bool hasOnlyOneNonZeroIndex(GetElementPtrInst *GEP) {
+ unsigned NumNonZeroIndices = 0;
+ for (auto I = GEP->idx_begin(); I != GEP->idx_end(); ++I) {
+ ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I);
+ if (ConstIdx == nullptr || !ConstIdx->isZero())
+ ++NumNonZeroIndices;
+ }
+ return NumNonZeroIndices <= 1;
+}
+
+bool StraightLineStrengthReduce::isSimplestForm(const Candidate &C) {
+ if (C.CandidateKind == Candidate::Add) {
+ // B + 1 * S or B + (-1) * S
+ return C.Index->isOne() || C.Index->isMinusOne();
+ }
+ if (C.CandidateKind == Candidate::Mul) {
+ // (B + 0) * S
+ return C.Index->isZero();
+ }
+ if (C.CandidateKind == Candidate::GEP) {
+ // (char*)B + S or (char*)B - S
+ return ((C.Index->isOne() || C.Index->isMinusOne()) &&
+ hasOnlyOneNonZeroIndex(cast<GetElementPtrInst>(C.Ins)));
+ }
+ return false;
+}
+
+// TODO: We currently implement an algorithm whose time complexity is linear in
+// the number of existing candidates. However, we could do better by using
+// ScopedHashTable. Specifically, while traversing the dominator tree, we could
+// maintain all the candidates that dominate the basic block being traversed in
+// a ScopedHashTable. This hash table is indexed by the base and the stride of
+// a candidate. Therefore, finding the immediate basis of a candidate boils down
+// to one hash-table look up.
+void StraightLineStrengthReduce::allocateCandidatesAndFindBasis(
+ Candidate::Kind CT, const SCEV *B, ConstantInt *Idx, Value *S,
+ Instruction *I) {
+ Candidate C(CT, B, Idx, S, I);
+ // SLSR can complicate an instruction in two cases:
+ //
+ // 1. If we can fold I into an addressing mode, computing I is likely free or
+ // takes only one instruction.
+ //
+ // 2. I is already in a simplest form. For example, when
+ // X = B + 8 * S
+ // Y = B + S,
+ // rewriting Y to X - 7 * S is probably a bad idea.
+ //
+ // In the above cases, we still add I to the candidate list so that I can be
+ // the basis of other candidates, but we leave I's basis blank so that I
+ // won't be rewritten.
+ if (!isFoldable(C, TTI, DL) && !isSimplestForm(C)) {
+ // Try to compute the immediate basis of C.
+ unsigned NumIterations = 0;
+ // Limit the scan radius to avoid running in quadratice time.
+ static const unsigned MaxNumIterations = 50;
+ for (auto Basis = Candidates.rbegin();
+ Basis != Candidates.rend() && NumIterations < MaxNumIterations;
+ ++Basis, ++NumIterations) {
+ if (isBasisFor(*Basis, C)) {
+ C.Basis = &(*Basis);
+ break;
+ }
+ }
+ }
+ // Regardless of whether we find a basis for C, we need to push C to the
+ // candidate list so that it can be the basis of other candidates.
+ Candidates.push_back(C);
+}
+
+void StraightLineStrengthReduce::allocateCandidatesAndFindBasis(
+ Instruction *I) {
+ switch (I->getOpcode()) {
+ case Instruction::Add:
+ allocateCandidatesAndFindBasisForAdd(I);
+ break;
+ case Instruction::Mul:
+ allocateCandidatesAndFindBasisForMul(I);
+ break;
+ case Instruction::GetElementPtr:
+ allocateCandidatesAndFindBasisForGEP(cast<GetElementPtrInst>(I));
+ break;
+ }
+}
+
+void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForAdd(
+ Instruction *I) {
+ // Try matching B + i * S.
+ if (!isa<IntegerType>(I->getType()))
+ return;
+
+ assert(I->getNumOperands() == 2 && "isn't I an add?");
+ Value *LHS = I->getOperand(0), *RHS = I->getOperand(1);
+ allocateCandidatesAndFindBasisForAdd(LHS, RHS, I);
+ if (LHS != RHS)
+ allocateCandidatesAndFindBasisForAdd(RHS, LHS, I);
+}
+
+void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForAdd(
+ Value *LHS, Value *RHS, Instruction *I) {
+ Value *S = nullptr;
+ ConstantInt *Idx = nullptr;
+ if (match(RHS, m_Mul(m_Value(S), m_ConstantInt(Idx)))) {
+ // I = LHS + RHS = LHS + Idx * S
+ allocateCandidatesAndFindBasis(Candidate::Add, SE->getSCEV(LHS), Idx, S, I);
+ } else if (match(RHS, m_Shl(m_Value(S), m_ConstantInt(Idx)))) {
+ // I = LHS + RHS = LHS + (S << Idx) = LHS + S * (1 << Idx)
+ APInt One(Idx->getBitWidth(), 1);
+ Idx = ConstantInt::get(Idx->getContext(), One << Idx->getValue());
+ allocateCandidatesAndFindBasis(Candidate::Add, SE->getSCEV(LHS), Idx, S, I);
+ } else {
+ // At least, I = LHS + 1 * RHS
+ ConstantInt *One = ConstantInt::get(cast<IntegerType>(I->getType()), 1);
+ allocateCandidatesAndFindBasis(Candidate::Add, SE->getSCEV(LHS), One, RHS,
+ I);
+ }
+}
+
+// Returns true if A matches B + C where C is constant.
+static bool matchesAdd(Value *A, Value *&B, ConstantInt *&C) {
+ return (match(A, m_Add(m_Value(B), m_ConstantInt(C))) ||
+ match(A, m_Add(m_ConstantInt(C), m_Value(B))));
+}
+
+// Returns true if A matches B | C where C is constant.
+static bool matchesOr(Value *A, Value *&B, ConstantInt *&C) {
+ return (match(A, m_Or(m_Value(B), m_ConstantInt(C))) ||
+ match(A, m_Or(m_ConstantInt(C), m_Value(B))));
+}
+
+void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForMul(
+ Value *LHS, Value *RHS, Instruction *I) {
+ Value *B = nullptr;
+ ConstantInt *Idx = nullptr;
+ if (matchesAdd(LHS, B, Idx)) {
+ // If LHS is in the form of "Base + Index", then I is in the form of
+ // "(Base + Index) * RHS".
+ allocateCandidatesAndFindBasis(Candidate::Mul, SE->getSCEV(B), Idx, RHS, I);
+ } else if (matchesOr(LHS, B, Idx) && haveNoCommonBitsSet(B, Idx, *DL)) {
+ // If LHS is in the form of "Base | Index" and Base and Index have no common
+ // bits set, then
+ // Base | Index = Base + Index
+ // and I is thus in the form of "(Base + Index) * RHS".
+ allocateCandidatesAndFindBasis(Candidate::Mul, SE->getSCEV(B), Idx, RHS, I);
+ } else {
+ // Otherwise, at least try the form (LHS + 0) * RHS.
+ ConstantInt *Zero = ConstantInt::get(cast<IntegerType>(I->getType()), 0);
+ allocateCandidatesAndFindBasis(Candidate::Mul, SE->getSCEV(LHS), Zero, RHS,
+ I);
+ }
+}
+
+void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForMul(
+ Instruction *I) {
+ // Try matching (B + i) * S.
+ // TODO: we could extend SLSR to float and vector types.
+ if (!isa<IntegerType>(I->getType()))
+ return;
+
+ assert(I->getNumOperands() == 2 && "isn't I a mul?");
+ Value *LHS = I->getOperand(0), *RHS = I->getOperand(1);
+ allocateCandidatesAndFindBasisForMul(LHS, RHS, I);
+ if (LHS != RHS) {
+ // Symmetrically, try to split RHS to Base + Index.
+ allocateCandidatesAndFindBasisForMul(RHS, LHS, I);
+ }
+}
+
+void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForGEP(
+ const SCEV *B, ConstantInt *Idx, Value *S, uint64_t ElementSize,
+ Instruction *I) {
+ // I = B + sext(Idx *nsw S) * ElementSize
+ // = B + (sext(Idx) * sext(S)) * ElementSize
+ // = B + (sext(Idx) * ElementSize) * sext(S)
+ // Casting to IntegerType is safe because we skipped vector GEPs.
+ IntegerType *IntPtrTy = cast<IntegerType>(DL->getIntPtrType(I->getType()));
+ ConstantInt *ScaledIdx = ConstantInt::get(
+ IntPtrTy, Idx->getSExtValue() * (int64_t)ElementSize, true);
+ allocateCandidatesAndFindBasis(Candidate::GEP, B, ScaledIdx, S, I);
+}
+
+void StraightLineStrengthReduce::factorArrayIndex(Value *ArrayIdx,
+ const SCEV *Base,
+ uint64_t ElementSize,
+ GetElementPtrInst *GEP) {
+ // At least, ArrayIdx = ArrayIdx *nsw 1.
+ allocateCandidatesAndFindBasisForGEP(
+ Base, ConstantInt::get(cast<IntegerType>(ArrayIdx->getType()), 1),
+ ArrayIdx, ElementSize, GEP);
+ Value *LHS = nullptr;
+ ConstantInt *RHS = nullptr;
+ // One alternative is matching the SCEV of ArrayIdx instead of ArrayIdx
+ // itself. This would allow us to handle the shl case for free. However,
+ // matching SCEVs has two issues:
+ //
+ // 1. this would complicate rewriting because the rewriting procedure
+ // would have to translate SCEVs back to IR instructions. This translation
+ // is difficult when LHS is further evaluated to a composite SCEV.
+ //
+ // 2. ScalarEvolution is designed to be control-flow oblivious. It tends
+ // to strip nsw/nuw flags which are critical for SLSR to trace into
+ // sext'ed multiplication.
+ if (match(ArrayIdx, m_NSWMul(m_Value(LHS), m_ConstantInt(RHS)))) {
+ // SLSR is currently unsafe if i * S may overflow.
+ // GEP = Base + sext(LHS *nsw RHS) * ElementSize
+ allocateCandidatesAndFindBasisForGEP(Base, RHS, LHS, ElementSize, GEP);
+ } else if (match(ArrayIdx, m_NSWShl(m_Value(LHS), m_ConstantInt(RHS)))) {
+ // GEP = Base + sext(LHS <<nsw RHS) * ElementSize
+ // = Base + sext(LHS *nsw (1 << RHS)) * ElementSize
+ APInt One(RHS->getBitWidth(), 1);
+ ConstantInt *PowerOf2 =
+ ConstantInt::get(RHS->getContext(), One << RHS->getValue());
+ allocateCandidatesAndFindBasisForGEP(Base, PowerOf2, LHS, ElementSize, GEP);
+ }
+}
+
+void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForGEP(
+ GetElementPtrInst *GEP) {
+ // TODO: handle vector GEPs
+ if (GEP->getType()->isVectorTy())
+ return;
+
+ SmallVector<const SCEV *, 4> IndexExprs;
+ for (auto I = GEP->idx_begin(); I != GEP->idx_end(); ++I)
+ IndexExprs.push_back(SE->getSCEV(*I));
+
+ gep_type_iterator GTI = gep_type_begin(GEP);
+ for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I) {
+ if (!isa<SequentialType>(*GTI++))
+ continue;
+
+ const SCEV *OrigIndexExpr = IndexExprs[I - 1];
+ IndexExprs[I - 1] = SE->getConstant(OrigIndexExpr->getType(), 0);
+
+ // The base of this candidate is GEP's base plus the offsets of all
+ // indices except this current one.
+ const SCEV *BaseExpr = SE->getGEPExpr(GEP->getSourceElementType(),
+ SE->getSCEV(GEP->getPointerOperand()),
+ IndexExprs, GEP->isInBounds());
+ Value *ArrayIdx = GEP->getOperand(I);
+ uint64_t ElementSize = DL->getTypeAllocSize(*GTI);
+ factorArrayIndex(ArrayIdx, BaseExpr, ElementSize, GEP);
+ // When ArrayIdx is the sext of a value, we try to factor that value as
+ // well. Handling this case is important because array indices are
+ // typically sign-extended to the pointer size.
+ Value *TruncatedArrayIdx = nullptr;
+ if (match(ArrayIdx, m_SExt(m_Value(TruncatedArrayIdx))))
+ factorArrayIndex(TruncatedArrayIdx, BaseExpr, ElementSize, GEP);
+
+ IndexExprs[I - 1] = OrigIndexExpr;
+ }
+}
+
+// A helper function that unifies the bitwidth of A and B.
+static void unifyBitWidth(APInt &A, APInt &B) {
+ if (A.getBitWidth() < B.getBitWidth())
+ A = A.sext(B.getBitWidth());
+ else if (A.getBitWidth() > B.getBitWidth())
+ B = B.sext(A.getBitWidth());
+}
+
+Value *StraightLineStrengthReduce::emitBump(const Candidate &Basis,
+ const Candidate &C,
+ IRBuilder<> &Builder,
+ const DataLayout *DL,
+ bool &BumpWithUglyGEP) {
+ APInt Idx = C.Index->getValue(), BasisIdx = Basis.Index->getValue();
+ unifyBitWidth(Idx, BasisIdx);
+ APInt IndexOffset = Idx - BasisIdx;
+
+ BumpWithUglyGEP = false;
+ if (Basis.CandidateKind == Candidate::GEP) {
+ APInt ElementSize(
+ IndexOffset.getBitWidth(),
+ DL->getTypeAllocSize(
+ cast<GetElementPtrInst>(Basis.Ins)->getType()->getElementType()));
+ APInt Q, R;
+ APInt::sdivrem(IndexOffset, ElementSize, Q, R);
+ if (R.getSExtValue() == 0)
+ IndexOffset = Q;
+ else
+ BumpWithUglyGEP = true;
+ }
+
+ // Compute Bump = C - Basis = (i' - i) * S.
+ // Common case 1: if (i' - i) is 1, Bump = S.
+ if (IndexOffset.getSExtValue() == 1)
+ return C.Stride;
+ // Common case 2: if (i' - i) is -1, Bump = -S.
+ if (IndexOffset.getSExtValue() == -1)
+ return Builder.CreateNeg(C.Stride);
+
+ // Otherwise, Bump = (i' - i) * sext/trunc(S). Note that (i' - i) and S may
+ // have different bit widths.
+ IntegerType *DeltaType =
+ IntegerType::get(Basis.Ins->getContext(), IndexOffset.getBitWidth());
+ Value *ExtendedStride = Builder.CreateSExtOrTrunc(C.Stride, DeltaType);
+ if (IndexOffset.isPowerOf2()) {
+ // If (i' - i) is a power of 2, Bump = sext/trunc(S) << log(i' - i).
+ ConstantInt *Exponent = ConstantInt::get(DeltaType, IndexOffset.logBase2());
+ return Builder.CreateShl(ExtendedStride, Exponent);
+ }
+ if ((-IndexOffset).isPowerOf2()) {
+ // If (i - i') is a power of 2, Bump = -sext/trunc(S) << log(i' - i).
+ ConstantInt *Exponent =
+ ConstantInt::get(DeltaType, (-IndexOffset).logBase2());
+ return Builder.CreateNeg(Builder.CreateShl(ExtendedStride, Exponent));
+ }
+ Constant *Delta = ConstantInt::get(DeltaType, IndexOffset);
+ return Builder.CreateMul(ExtendedStride, Delta);
+}
+
+void StraightLineStrengthReduce::rewriteCandidateWithBasis(
+ const Candidate &C, const Candidate &Basis) {
+ assert(C.CandidateKind == Basis.CandidateKind && C.Base == Basis.Base &&
+ C.Stride == Basis.Stride);
+ // We run rewriteCandidateWithBasis on all candidates in a post-order, so the
+ // basis of a candidate cannot be unlinked before the candidate.
+ assert(Basis.Ins->getParent() != nullptr && "the basis is unlinked");
+
+ // An instruction can correspond to multiple candidates. Therefore, instead of
+ // simply deleting an instruction when we rewrite it, we mark its parent as
+ // nullptr (i.e. unlink it) so that we can skip the candidates whose
+ // instruction is already rewritten.
+ if (!C.Ins->getParent())
+ return;
+
+ IRBuilder<> Builder(C.Ins);
+ bool BumpWithUglyGEP;
+ Value *Bump = emitBump(Basis, C, Builder, DL, BumpWithUglyGEP);
+ Value *Reduced = nullptr; // equivalent to but weaker than C.Ins
+ switch (C.CandidateKind) {
+ case Candidate::Add:
+ case Candidate::Mul:
+ // C = Basis + Bump
+ if (BinaryOperator::isNeg(Bump)) {
+ // If Bump is a neg instruction, emit C = Basis - (-Bump).
+ Reduced =
+ Builder.CreateSub(Basis.Ins, BinaryOperator::getNegArgument(Bump));
+ // We only use the negative argument of Bump, and Bump itself may be
+ // trivially dead.
+ RecursivelyDeleteTriviallyDeadInstructions(Bump);
+ } else {
+ Reduced = Builder.CreateAdd(Basis.Ins, Bump);
+ }
+ break;
+ case Candidate::GEP:
+ {
+ Type *IntPtrTy = DL->getIntPtrType(C.Ins->getType());
+ bool InBounds = cast<GetElementPtrInst>(C.Ins)->isInBounds();
+ if (BumpWithUglyGEP) {
+ // C = (char *)Basis + Bump
+ unsigned AS = Basis.Ins->getType()->getPointerAddressSpace();
+ Type *CharTy = Type::getInt8PtrTy(Basis.Ins->getContext(), AS);
+ Reduced = Builder.CreateBitCast(Basis.Ins, CharTy);
+ if (InBounds)
+ Reduced =
+ Builder.CreateInBoundsGEP(Builder.getInt8Ty(), Reduced, Bump);
+ else
+ Reduced = Builder.CreateGEP(Builder.getInt8Ty(), Reduced, Bump);
+ Reduced = Builder.CreateBitCast(Reduced, C.Ins->getType());
+ } else {
+ // C = gep Basis, Bump
+ // Canonicalize bump to pointer size.
+ Bump = Builder.CreateSExtOrTrunc(Bump, IntPtrTy);
+ if (InBounds)
+ Reduced = Builder.CreateInBoundsGEP(nullptr, Basis.Ins, Bump);
+ else
+ Reduced = Builder.CreateGEP(nullptr, Basis.Ins, Bump);
+ }
+ }
+ break;
+ default:
+ llvm_unreachable("C.CandidateKind is invalid");
+ };
+ Reduced->takeName(C.Ins);
+ C.Ins->replaceAllUsesWith(Reduced);
+ // Unlink C.Ins so that we can skip other candidates also corresponding to
+ // C.Ins. The actual deletion is postponed to the end of runOnFunction.
+ C.Ins->removeFromParent();
+ UnlinkedInstructions.push_back(C.Ins);
+}
+
+bool StraightLineStrengthReduce::runOnFunction(Function &F) {
+ if (skipOptnoneFunction(F))
+ return false;
+
+ TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ SE = &getAnalysis<ScalarEvolution>();
+ // Traverse the dominator tree in the depth-first order. This order makes sure
+ // all bases of a candidate are in Candidates when we process it.
+ for (auto node = GraphTraits<DominatorTree *>::nodes_begin(DT);
+ node != GraphTraits<DominatorTree *>::nodes_end(DT); ++node) {
+ for (auto &I : *node->getBlock())
+ allocateCandidatesAndFindBasis(&I);
+ }
+
+ // Rewrite candidates in the reverse depth-first order. This order makes sure
+ // a candidate being rewritten is not a basis for any other candidate.
+ while (!Candidates.empty()) {
+ const Candidate &C = Candidates.back();
+ if (C.Basis != nullptr) {
+ rewriteCandidateWithBasis(C, *C.Basis);
+ }
+ Candidates.pop_back();
+ }
+
+ // Delete all unlink instructions.
+ for (auto *UnlinkedInst : UnlinkedInstructions) {
+ for (unsigned I = 0, E = UnlinkedInst->getNumOperands(); I != E; ++I) {
+ Value *Op = UnlinkedInst->getOperand(I);
+ UnlinkedInst->setOperand(I, nullptr);
+ RecursivelyDeleteTriviallyDeadInstructions(Op);
+ }
+ delete UnlinkedInst;
+ }
+ bool Ret = !UnlinkedInstructions.empty();
+ UnlinkedInstructions.clear();
+ return Ret;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index 7fe87f9..4f23e20 100644
--- a/contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -9,6 +9,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/RegionInfo.h"
@@ -16,6 +17,8 @@
#include "llvm/Analysis/RegionPass.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
using namespace llvm;
@@ -249,7 +252,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequiredID(LowerSwitchID);
AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<LoopInfo>();
+ AU.addRequired<LoopInfoWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
RegionPass::getAnalysisUsage(AU);
}
@@ -281,11 +284,65 @@ bool StructurizeCFG::doInitialization(Region *R, RGPassManager &RGM) {
/// \brief Build up the general order of nodes
void StructurizeCFG::orderNodes() {
- scc_iterator<Region *> I = scc_begin(ParentRegion);
- for (Order.clear(); !I.isAtEnd(); ++I) {
- const std::vector<RegionNode *> &Nodes = *I;
- Order.append(Nodes.begin(), Nodes.end());
+ RNVector TempOrder;
+ ReversePostOrderTraversal<Region*> RPOT(ParentRegion);
+ TempOrder.append(RPOT.begin(), RPOT.end());
+
+ std::map<Loop*, unsigned> LoopBlocks;
+
+
+ // The reverse post-order traversal of the list gives us an ordering close
+ // to what we want. The only problem with it is that sometimes backedges
+ // for outer loops will be visited before backedges for inner loops.
+ for (RegionNode *RN : TempOrder) {
+ BasicBlock *BB = RN->getEntry();
+ Loop *Loop = LI->getLoopFor(BB);
+ if (!LoopBlocks.count(Loop)) {
+ LoopBlocks[Loop] = 1;
+ continue;
+ }
+ LoopBlocks[Loop]++;
+ }
+
+ unsigned CurrentLoopDepth = 0;
+ Loop *CurrentLoop = nullptr;
+ BBSet TempVisited;
+ for (RNVector::iterator I = TempOrder.begin(), E = TempOrder.end(); I != E; ++I) {
+ BasicBlock *BB = (*I)->getEntry();
+ unsigned LoopDepth = LI->getLoopDepth(BB);
+
+ if (std::find(Order.begin(), Order.end(), *I) != Order.end())
+ continue;
+
+ if (LoopDepth < CurrentLoopDepth) {
+ // Make sure we have visited all blocks in this loop before moving back to
+ // the outer loop.
+
+ RNVector::iterator LoopI = I;
+ while(LoopBlocks[CurrentLoop]) {
+ LoopI++;
+ BasicBlock *LoopBB = (*LoopI)->getEntry();
+ if (LI->getLoopFor(LoopBB) == CurrentLoop) {
+ LoopBlocks[CurrentLoop]--;
+ Order.push_back(*LoopI);
+ }
+ }
+ }
+
+ CurrentLoop = LI->getLoopFor(BB);
+ if (CurrentLoop) {
+ LoopBlocks[CurrentLoop]--;
+ }
+
+ CurrentLoopDepth = LoopDepth;
+ Order.push_back(*I);
}
+
+ // This pass originally used a post-order traversal and then operated on
+ // the list in reverse. Now that we are using a reverse post-order traversal
+ // rather than re-working the whole pass to operate on the list in order,
+ // we just reverse the list and continue to operate on it in reverse.
+ std::reverse(Order.begin(), Order.end());
}
/// \brief Determine the end of the loops
@@ -304,7 +361,7 @@ void StructurizeCFG::analyzeLoops(RegionNode *N) {
for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
BasicBlock *Succ = Term->getSuccessor(i);
- if (Visited.count(Succ) && LI->isLoopHeader(Succ) ) {
+ if (Visited.count(Succ)) {
Loops[Succ] = BB;
}
}
@@ -441,6 +498,10 @@ void StructurizeCFG::collectInfos() {
for (RNVector::reverse_iterator OI = Order.rbegin(), OE = Order.rend();
OI != OE; ++OI) {
+ DEBUG(dbgs() << "Visiting: " <<
+ ((*OI)->isSubRegion() ? "SubRegion with entry: " : "") <<
+ (*OI)->getEntry()->getName() << " Loop Depth: " << LI->getLoopDepth((*OI)->getEntry()) << "\n");
+
// Analyze all the conditions leading to a node
gatherPredicates(*OI);
@@ -826,7 +887,7 @@ void StructurizeCFG::createFlow() {
/// no longer dominate all their uses. Not sure if this is really nessasary
void StructurizeCFG::rebuildSSA() {
SSAUpdater Updater;
- for (const auto &BB : ParentRegion->blocks())
+ for (auto *BB : ParentRegion->blocks())
for (BasicBlock::iterator II = BB->begin(), IE = BB->end();
II != IE; ++II) {
@@ -866,7 +927,7 @@ bool StructurizeCFG::runOnRegion(Region *R, RGPassManager &RGM) {
ParentRegion = R;
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- LI = &getAnalysis<LoopInfo>();
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
orderNodes();
collectInfos();
diff --git a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
index f3c3e30..9eef132 100644
--- a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -54,8 +54,8 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/Loads.h"
@@ -87,7 +87,6 @@ STATISTIC(NumAccumAdded, "Number of accumulators introduced");
namespace {
struct TailCallElim : public FunctionPass {
const TargetTransformInfo *TTI;
- const DataLayout *DL;
static char ID; // Pass identification, replacement for typeid
TailCallElim() : FunctionPass(ID) {
@@ -126,7 +125,7 @@ namespace {
char TailCallElim::ID = 0;
INITIALIZE_PASS_BEGIN(TailCallElim, "tailcallelim",
"Tail Call Elimination", false, false)
-INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(TailCallElim, "tailcallelim",
"Tail Call Elimination", false, false)
@@ -136,7 +135,7 @@ FunctionPass *llvm::createTailCallEliminationPass() {
}
void TailCallElim::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<TargetTransformInfo>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
}
/// \brief Scan the specified function for alloca instructions.
@@ -159,8 +158,6 @@ bool TailCallElim::runOnFunction(Function &F) {
if (skipOptnoneFunction(F))
return false;
- DL = F.getParent()->getDataLayout();
-
bool AllCallsAreTailCalls = false;
bool Modified = markTails(F, AllCallsAreTailCalls);
if (AllCallsAreTailCalls)
@@ -386,16 +383,15 @@ bool TailCallElim::runTRE(Function &F) {
// right, so don't even try to convert it...
if (F.getFunctionType()->isVarArg()) return false;
- TTI = &getAnalysis<TargetTransformInfo>();
+ TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
BasicBlock *OldEntry = nullptr;
bool TailCallsAreMarkedTail = false;
SmallVector<PHINode*, 8> ArgumentPHIs;
bool MadeChange = false;
- // CanTRETailMarkedCall - If false, we cannot perform TRE on tail calls
- // marked with the 'tail' attribute, because doing so would cause the stack
- // size to increase (real TRE would deallocate variable sized allocas, TRE
- // doesn't).
+ // If false, we cannot perform TRE on tail calls marked with the 'tail'
+ // attribute, because doing so would cause the stack size to increase (real
+ // TRE would deallocate variable sized allocas, TRE doesn't).
bool CanTRETailMarkedCall = CanTRE(F);
// Change any tail recursive calls to loops.
@@ -404,28 +400,19 @@ bool TailCallElim::runTRE(Function &F) {
// alloca' is changed from being a static alloca to being a dynamic alloca.
// Until this is resolved, disable this transformation if that would ever
// happen. This bug is PR962.
- SmallVector<BasicBlock*, 8> BBToErase;
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ for (Function::iterator BBI = F.begin(), E = F.end(); BBI != E; /*in loop*/) {
+ BasicBlock *BB = BBI++; // FoldReturnAndProcessPred may delete BB.
if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) {
bool Change = ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail,
ArgumentPHIs, !CanTRETailMarkedCall);
- if (!Change && BB->getFirstNonPHIOrDbg() == Ret) {
+ if (!Change && BB->getFirstNonPHIOrDbg() == Ret)
Change = FoldReturnAndProcessPred(BB, Ret, OldEntry,
TailCallsAreMarkedTail, ArgumentPHIs,
!CanTRETailMarkedCall);
- // FoldReturnAndProcessPred may have emptied some BB. Remember to
- // erase them.
- if (Change && BB->empty())
- BBToErase.push_back(BB);
-
- }
MadeChange |= Change;
}
}
- for (auto BB: BBToErase)
- BB->eraseFromParent();
-
// If we eliminated any tail recursions, it's possible that we inserted some
// silly PHI nodes which just merge an initial value (the incoming operand)
// with themselves. Check to see if we did and clean up our mess if so. This
@@ -435,7 +422,7 @@ bool TailCallElim::runTRE(Function &F) {
PHINode *PN = ArgumentPHIs[i];
// If the PHI Node is a dynamic constant, replace it with the value it is.
- if (Value *PNV = SimplifyInstruction(PN)) {
+ if (Value *PNV = SimplifyInstruction(PN, F.getParent()->getDataLayout())) {
PN->replaceAllUsesWith(PNV);
PN->eraseFromParent();
}
@@ -445,7 +432,7 @@ bool TailCallElim::runTRE(Function &F) {
}
-/// CanMoveAboveCall - Return true if it is safe to move the specified
+/// Return true if it is safe to move the specified
/// instruction from after the call to before the call, assuming that all
/// instructions between the call and this instruction are movable.
///
@@ -464,7 +451,7 @@ bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) {
// being loaded from.
if (CI->mayWriteToMemory() ||
!isSafeToLoadUnconditionally(L->getPointerOperand(), L,
- L->getAlignment(), DL))
+ L->getAlignment()))
return false;
}
}
@@ -480,13 +467,11 @@ bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) {
return true;
}
-// isDynamicConstant - Return true if the specified value is the same when the
-// return would exit as it was when the initial iteration of the recursive
-// function was executed.
-//
-// We currently handle static constants and arguments that are not modified as
-// part of the recursion.
-//
+/// Return true if the specified value is the same when the return would exit
+/// as it was when the initial iteration of the recursive function was executed.
+///
+/// We currently handle static constants and arguments that are not modified as
+/// part of the recursion.
static bool isDynamicConstant(Value *V, CallInst *CI, ReturnInst *RI) {
if (isa<Constant>(V)) return true; // Static constants are always dyn consts
@@ -518,10 +503,9 @@ static bool isDynamicConstant(Value *V, CallInst *CI, ReturnInst *RI) {
return false;
}
-// getCommonReturnValue - Check to see if the function containing the specified
-// tail call consistently returns the same runtime-constant value at all exit
-// points except for IgnoreRI. If so, return the returned value.
-//
+/// Check to see if the function containing the specified tail call consistently
+/// returns the same runtime-constant value at all exit points except for
+/// IgnoreRI. If so, return the returned value.
static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) {
Function *F = CI->getParent()->getParent();
Value *ReturnedValue = nullptr;
@@ -545,10 +529,9 @@ static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) {
return ReturnedValue;
}
-/// CanTransformAccumulatorRecursion - If the specified instruction can be
-/// transformed using accumulator recursion elimination, return the constant
-/// which is the start of the accumulator value. Otherwise return null.
-///
+/// If the specified instruction can be transformed using accumulator recursion
+/// elimination, return the constant which is the start of the accumulator
+/// value. Otherwise return null.
Value *TailCallElim::CanTransformAccumulatorRecursion(Instruction *I,
CallInst *CI) {
if (!I->isAssociative() || !I->isCommutative()) return nullptr;
@@ -836,14 +819,11 @@ bool TailCallElim::FoldReturnAndProcessPred(BasicBlock *BB,
ReturnInst *RI = FoldReturnIntoUncondBranch(Ret, BB, Pred);
// Cleanup: if all predecessors of BB have been eliminated by
- // FoldReturnIntoUncondBranch, we would like to delete it, but we
- // can not just nuke it as it is being used as an iterator by our caller.
- // Just empty it, and the caller will erase it when it is safe to do so.
- // It is important to empty it, because the ret instruction in there is
- // still using a value which EliminateRecursiveTailCall will attempt
- // to remove.
+ // FoldReturnIntoUncondBranch, delete it. It is important to empty it,
+ // because the ret instruction in there is still using a value which
+ // EliminateRecursiveTailCall will attempt to remove.
if (!BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB))
- BB->getInstList().clear();
+ BB->eraseFromParent();
EliminateRecursiveTailCall(CI, RI, OldEntry, TailCallsAreMarkedTail,
ArgumentPHIs,
diff --git a/contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp b/contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp
index cce016a..03c3a80 100644
--- a/contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp
@@ -13,6 +13,7 @@
#include "llvm/Transforms/Utils/ASanStackFrameLayout.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/MathExtras.h"
#include <algorithm>
namespace llvm {
@@ -33,11 +34,6 @@ static inline bool CompareVars(const ASanStackVariableDescription &a,
// with e.g. alignment 1 and alignment 16 do not get reordered by CompareVars.
static const size_t kMinAlignment = 16;
-static size_t RoundUpTo(size_t X, size_t RoundTo) {
- assert((RoundTo & (RoundTo - 1)) == 0);
- return (X + RoundTo - 1) & ~(RoundTo - 1);
-}
-
// The larger the variable Size the larger is the redzone.
// The resulting frame size is a multiple of Alignment.
static size_t VarAndRedzoneSize(size_t Size, size_t Alignment) {
@@ -48,7 +44,7 @@ static size_t VarAndRedzoneSize(size_t Size, size_t Alignment) {
else if (Size <= 512) Res = Size + 64;
else if (Size <= 4096) Res = Size + 128;
else Res = Size + 256;
- return RoundUpTo(Res, Alignment);
+ return RoundUpToAlignment(Res, Alignment);
}
void
diff --git a/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp b/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp
index 820544b..e9f6239 100644
--- a/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp
@@ -174,42 +174,51 @@ bool AddDiscriminators::runOnFunction(Function &F) {
for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
BasicBlock *B = I;
TerminatorInst *Last = B->getTerminator();
- DebugLoc LastLoc = Last->getDebugLoc();
- if (LastLoc.isUnknown()) continue;
- DILocation LastDIL(LastLoc.getAsMDNode(Ctx));
+ const DILocation *LastDIL = Last->getDebugLoc();
+ if (!LastDIL)
+ continue;
for (unsigned I = 0; I < Last->getNumSuccessors(); ++I) {
BasicBlock *Succ = Last->getSuccessor(I);
Instruction *First = Succ->getFirstNonPHIOrDbgOrLifetime();
- DebugLoc FirstLoc = First->getDebugLoc();
- if (FirstLoc.isUnknown()) continue;
- DILocation FirstDIL(FirstLoc.getAsMDNode(Ctx));
+ const DILocation *FirstDIL = First->getDebugLoc();
+ if (!FirstDIL)
+ continue;
// If the first instruction (First) of Succ is at the same file
// location as B's last instruction (Last), add a new
// discriminator for First's location and all the instructions
// in Succ that share the same location with First.
- if (FirstDIL.atSameLineAs(LastDIL)) {
+ if (!FirstDIL->canDiscriminate(*LastDIL)) {
// Create a new lexical scope and compute a new discriminator
// number for it.
- StringRef Filename = FirstDIL.getFilename();
- DIScope Scope = FirstDIL.getScope();
- DIFile File = Builder.createFile(Filename, Scope.getDirectory());
- unsigned Discriminator = FirstDIL.computeNewDiscriminator(Ctx);
- DILexicalBlockFile NewScope =
+ StringRef Filename = FirstDIL->getFilename();
+ auto *Scope = FirstDIL->getScope();
+ auto *File = Builder.createFile(Filename, Scope->getDirectory());
+
+ // FIXME: Calculate the discriminator here, based on local information,
+ // and delete DILocation::computeNewDiscriminator(). The current
+ // solution gives different results depending on other modules in the
+ // same context. All we really need is to discriminate between
+ // FirstDIL and LastDIL -- a local map would suffice.
+ unsigned Discriminator = FirstDIL->computeNewDiscriminator();
+ auto *NewScope =
Builder.createLexicalBlockFile(Scope, File, Discriminator);
- DILocation NewDIL = FirstDIL.copyWithNewScope(Ctx, NewScope);
- DebugLoc newDebugLoc = DebugLoc::getFromDILocation(NewDIL);
+ auto *NewDIL =
+ DILocation::get(Ctx, FirstDIL->getLine(), FirstDIL->getColumn(),
+ NewScope, FirstDIL->getInlinedAt());
+ DebugLoc newDebugLoc = NewDIL;
// Attach this new debug location to First and every
// instruction following First that shares the same location.
for (BasicBlock::iterator I1(*First), E1 = Succ->end(); I1 != E1;
++I1) {
- if (I1->getDebugLoc() != FirstLoc) break;
+ if (I1->getDebugLoc().get() != FirstDIL)
+ break;
I1->setDebugLoc(newDebugLoc);
- DEBUG(dbgs() << NewDIL.getFilename() << ":" << NewDIL.getLineNumber()
- << ":" << NewDIL.getColumnNumber() << ":"
- << NewDIL.getDiscriminator() << *I1 << "\n");
+ DEBUG(dbgs() << NewDIL->getFilename() << ":" << NewDIL->getLine()
+ << ":" << NewDIL->getColumn() << ":"
+ << NewDIL->getDiscriminator() << *I1 << "\n");
}
DEBUG(dbgs() << "\n");
Changed = true;
diff --git a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index 983f025..f3c8013 100644
--- a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -65,16 +65,10 @@ void llvm::DeleteDeadBlock(BasicBlock *BB) {
/// any single-entry PHI nodes in it, fold them away. This handles the case
/// when all entries to the PHI nodes in a block are guaranteed equal, such as
/// when the block has exactly one predecessor.
-void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, Pass *P) {
+void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, AliasAnalysis *AA,
+ MemoryDependenceAnalysis *MemDep) {
if (!isa<PHINode>(BB->begin())) return;
- AliasAnalysis *AA = nullptr;
- MemoryDependenceAnalysis *MemDep = nullptr;
- if (P) {
- AA = P->getAnalysisIfAvailable<AliasAnalysis>();
- MemDep = P->getAnalysisIfAvailable<MemoryDependenceAnalysis>();
- }
-
while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
if (PN->getIncomingValue(0) != PN)
PN->replaceAllUsesWith(PN->getIncomingValue(0));
@@ -113,7 +107,9 @@ bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) {
/// MergeBlockIntoPredecessor - Attempts to merge a block into its predecessor,
/// if possible. The return value indicates success or failure.
-bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
+bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT,
+ LoopInfo *LI, AliasAnalysis *AA,
+ MemoryDependenceAnalysis *MemDep) {
// Don't merge away blocks who have their address taken.
if (BB->hasAddressTaken()) return false;
@@ -140,8 +136,8 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
// Can't merge if there is PHI loop.
for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE; ++BI) {
if (PHINode *PN = dyn_cast<PHINode>(BI)) {
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- if (PN->getIncomingValue(i) == PN)
+ for (Value *IncValue : PN->incoming_values())
+ if (IncValue == PN)
return false;
} else
break;
@@ -149,7 +145,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
// Begin by getting rid of unneeded PHIs.
if (isa<PHINode>(BB->front()))
- FoldSingleEntryPHINodes(BB, P);
+ FoldSingleEntryPHINodes(BB, AA, MemDep);
// Delete the unconditional branch from the predecessor...
PredBB->getInstList().pop_back();
@@ -166,28 +162,23 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
PredBB->takeName(BB);
// Finally, erase the old block and update dominator info.
- if (P) {
- if (DominatorTreeWrapperPass *DTWP =
- P->getAnalysisIfAvailable<DominatorTreeWrapperPass>()) {
- DominatorTree &DT = DTWP->getDomTree();
- if (DomTreeNode *DTN = DT.getNode(BB)) {
- DomTreeNode *PredDTN = DT.getNode(PredBB);
- SmallVector<DomTreeNode*, 8> Children(DTN->begin(), DTN->end());
- for (SmallVectorImpl<DomTreeNode *>::iterator DI = Children.begin(),
- DE = Children.end(); DI != DE; ++DI)
- DT.changeImmediateDominator(*DI, PredDTN);
-
- DT.eraseNode(BB);
- }
+ if (DT)
+ if (DomTreeNode *DTN = DT->getNode(BB)) {
+ DomTreeNode *PredDTN = DT->getNode(PredBB);
+ SmallVector<DomTreeNode *, 8> Children(DTN->begin(), DTN->end());
+ for (SmallVectorImpl<DomTreeNode *>::iterator DI = Children.begin(),
+ DE = Children.end();
+ DI != DE; ++DI)
+ DT->changeImmediateDominator(*DI, PredDTN);
+
+ DT->eraseNode(BB);
+ }
- if (LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>())
- LI->removeBlock(BB);
+ if (LI)
+ LI->removeBlock(BB);
- if (MemoryDependenceAnalysis *MD =
- P->getAnalysisIfAvailable<MemoryDependenceAnalysis>())
- MD->invalidateCachedPredecessors();
- }
- }
+ if (MemDep)
+ MemDep->invalidateCachedPredecessors();
BB->eraseFromParent();
return true;
@@ -240,12 +231,14 @@ void llvm::ReplaceInstWithInst(Instruction *From, Instruction *To) {
/// SplitEdge - Split the edge connecting specified block. Pass P must
/// not be NULL.
-BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) {
+BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, DominatorTree *DT,
+ LoopInfo *LI) {
unsigned SuccNum = GetSuccessorNumber(BB, Succ);
// If this is a critical edge, let SplitCriticalEdge do it.
TerminatorInst *LatchTerm = BB->getTerminator();
- if (SplitCriticalEdge(LatchTerm, SuccNum, P))
+ if (SplitCriticalEdge(LatchTerm, SuccNum, CriticalEdgeSplittingOptions(DT, LI)
+ .setPreserveLCSSA()))
return LatchTerm->getSuccessor(SuccNum);
// If the edge isn't critical, then BB has a single successor or Succ has a
@@ -255,23 +248,25 @@ BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) {
// block.
assert(SP == BB && "CFG broken");
SP = nullptr;
- return SplitBlock(Succ, Succ->begin(), P);
+ return SplitBlock(Succ, Succ->begin(), DT, LI);
}
// Otherwise, if BB has a single successor, split it at the bottom of the
// block.
assert(BB->getTerminator()->getNumSuccessors() == 1 &&
"Should have a single succ!");
- return SplitBlock(BB, BB->getTerminator(), P);
+ return SplitBlock(BB, BB->getTerminator(), DT, LI);
}
-unsigned llvm::SplitAllCriticalEdges(Function &F, Pass *P) {
+unsigned
+llvm::SplitAllCriticalEdges(Function &F,
+ const CriticalEdgeSplittingOptions &Options) {
unsigned NumBroken = 0;
for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
TerminatorInst *TI = I->getTerminator();
if (TI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(TI))
for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
- if (SplitCriticalEdge(TI, i, P))
+ if (SplitCriticalEdge(TI, i, Options))
++NumBroken;
}
return NumBroken;
@@ -282,7 +277,8 @@ unsigned llvm::SplitAllCriticalEdges(Function &F, Pass *P) {
/// to a new block. The two blocks are joined by an unconditional branch and
/// the loop info is updated.
///
-BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) {
+BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt,
+ DominatorTree *DT, LoopInfo *LI) {
BasicBlock::iterator SplitIt = SplitPt;
while (isa<PHINode>(SplitIt) || isa<LandingPadInst>(SplitIt))
++SplitIt;
@@ -290,26 +286,23 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) {
// The new block lives in whichever loop the old one did. This preserves
// LCSSA as well, because we force the split point to be after any PHI nodes.
- if (LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>())
+ if (LI)
if (Loop *L = LI->getLoopFor(Old))
- L->addBasicBlockToLoop(New, LI->getBase());
+ L->addBasicBlockToLoop(New, *LI);
- if (DominatorTreeWrapperPass *DTWP =
- P->getAnalysisIfAvailable<DominatorTreeWrapperPass>()) {
- DominatorTree &DT = DTWP->getDomTree();
+ if (DT)
// Old dominates New. New node dominates all other nodes dominated by Old.
- if (DomTreeNode *OldNode = DT.getNode(Old)) {
+ if (DomTreeNode *OldNode = DT->getNode(Old)) {
std::vector<DomTreeNode *> Children;
for (DomTreeNode::iterator I = OldNode->begin(), E = OldNode->end();
I != E; ++I)
Children.push_back(*I);
- DomTreeNode *NewNode = DT.addNewBlock(New, Old);
+ DomTreeNode *NewNode = DT->addNewBlock(New, Old);
for (std::vector<DomTreeNode *>::iterator I = Children.begin(),
E = Children.end(); I != E; ++I)
- DT.changeImmediateDominator(*I, NewNode);
+ DT->changeImmediateDominator(*I, NewNode);
}
- }
return New;
}
@@ -318,45 +311,46 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) {
/// analysis information.
static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB,
ArrayRef<BasicBlock *> Preds,
- Pass *P, bool &HasLoopExit) {
- if (!P) return;
+ DominatorTree *DT, LoopInfo *LI,
+ bool PreserveLCSSA, bool &HasLoopExit) {
+ // Update dominator tree if available.
+ if (DT)
+ DT->splitBlock(NewBB);
+
+ // The rest of the logic is only relevant for updating the loop structures.
+ if (!LI)
+ return;
- LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>();
- Loop *L = LI ? LI->getLoopFor(OldBB) : nullptr;
+ Loop *L = LI->getLoopFor(OldBB);
// If we need to preserve loop analyses, collect some information about how
// this split will affect loops.
bool IsLoopEntry = !!L;
bool SplitMakesNewLoopHeader = false;
- if (LI) {
- bool PreserveLCSSA = P->mustPreserveAnalysisID(LCSSAID);
- for (ArrayRef<BasicBlock*>::iterator
- i = Preds.begin(), e = Preds.end(); i != e; ++i) {
- BasicBlock *Pred = *i;
-
- // If we need to preserve LCSSA, determine if any of the preds is a loop
- // exit.
- if (PreserveLCSSA)
- if (Loop *PL = LI->getLoopFor(Pred))
- if (!PL->contains(OldBB))
- HasLoopExit = true;
-
- // If we need to preserve LoopInfo, note whether any of the preds crosses
- // an interesting loop boundary.
- if (!L) continue;
- if (L->contains(Pred))
- IsLoopEntry = false;
- else
- SplitMakesNewLoopHeader = true;
- }
+ for (ArrayRef<BasicBlock *>::iterator i = Preds.begin(), e = Preds.end();
+ i != e; ++i) {
+ BasicBlock *Pred = *i;
+
+ // If we need to preserve LCSSA, determine if any of the preds is a loop
+ // exit.
+ if (PreserveLCSSA)
+ if (Loop *PL = LI->getLoopFor(Pred))
+ if (!PL->contains(OldBB))
+ HasLoopExit = true;
+
+ // If we need to preserve LoopInfo, note whether any of the preds crosses
+ // an interesting loop boundary.
+ if (!L)
+ continue;
+ if (L->contains(Pred))
+ IsLoopEntry = false;
+ else
+ SplitMakesNewLoopHeader = true;
}
- // Update dominator tree if available.
- if (DominatorTreeWrapperPass *DTWP =
- P->getAnalysisIfAvailable<DominatorTreeWrapperPass>())
- DTWP->getDomTree().splitBlock(NewBB);
-
- if (!L) return;
+ // Unless we have a loop for OldBB, nothing else to do here.
+ if (!L)
+ return;
if (IsLoopEntry) {
// Add the new block to the nearest enclosing loop (and not an adjacent
@@ -382,9 +376,9 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB,
}
if (InnermostPredLoop)
- InnermostPredLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+ InnermostPredLoop->addBasicBlockToLoop(NewBB, *LI);
} else {
- L->addBasicBlockToLoop(NewBB, LI->getBase());
+ L->addBasicBlockToLoop(NewBB, *LI);
if (SplitMakesNewLoopHeader)
L->moveToHeader(NewBB);
}
@@ -393,10 +387,9 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB,
/// UpdatePHINodes - Update the PHI nodes in OrigBB to include the values coming
/// from NewBB. This also updates AliasAnalysis, if available.
static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB,
- ArrayRef<BasicBlock*> Preds, BranchInst *BI,
- Pass *P, bool HasLoopExit) {
+ ArrayRef<BasicBlock *> Preds, BranchInst *BI,
+ AliasAnalysis *AA, bool HasLoopExit) {
// Otherwise, create a new PHI node in NewBB for each PHI node in OrigBB.
- AliasAnalysis *AA = P ? P->getAnalysisIfAvailable<AliasAnalysis>() : nullptr;
SmallPtrSet<BasicBlock *, 16> PredSet(Preds.begin(), Preds.end());
for (BasicBlock::iterator I = OrigBB->begin(); isa<PHINode>(I); ) {
PHINode *PN = cast<PHINode>(I++);
@@ -461,11 +454,15 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB,
}
}
-/// SplitBlockPredecessors - This method transforms BB by introducing a new
-/// basic block into the function, and moving some of the predecessors of BB to
-/// be predecessors of the new block. The new predecessors are indicated by the
-/// Preds array, which has NumPreds elements in it. The new block is given a
-/// suffix of 'Suffix'.
+/// SplitBlockPredecessors - This method introduces at least one new basic block
+/// into the function and moves some of the predecessors of BB to be
+/// predecessors of the new block. The new predecessors are indicated by the
+/// Preds array. The new block is given a suffix of 'Suffix'. Returns new basic
+/// block to which predecessors from Preds are now pointing.
+///
+/// If BB is a landingpad block then additional basicblock might be introduced.
+/// It will have suffix of 'Suffix'+".split_lp".
+/// See SplitLandingPadPredecessors for more details on this case.
///
/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree,
/// LoopInfo, and LCCSA but no other analyses. In particular, it does not
@@ -473,8 +470,21 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB,
/// of the edges being split is an exit of a loop with other exits).
///
BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
- ArrayRef<BasicBlock*> Preds,
- const char *Suffix, Pass *P) {
+ ArrayRef<BasicBlock *> Preds,
+ const char *Suffix, AliasAnalysis *AA,
+ DominatorTree *DT, LoopInfo *LI,
+ bool PreserveLCSSA) {
+ // For the landingpads we need to act a bit differently.
+ // Delegate this work to the SplitLandingPadPredecessors.
+ if (BB->isLandingPad()) {
+ SmallVector<BasicBlock*, 2> NewBBs;
+ std::string NewName = std::string(Suffix) + ".split-lp";
+
+ SplitLandingPadPredecessors(BB, Preds, Suffix, NewName.c_str(),
+ NewBBs, AA, DT, LI, PreserveLCSSA);
+ return NewBBs[0];
+ }
+
// Create new basic block, insert right before the original block.
BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), BB->getName()+Suffix,
BB->getParent(), BB);
@@ -505,10 +515,11 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
// Update DominatorTree, LoopInfo, and LCCSA analysis information.
bool HasLoopExit = false;
- UpdateAnalysisInformation(BB, NewBB, Preds, P, HasLoopExit);
+ UpdateAnalysisInformation(BB, NewBB, Preds, DT, LI, PreserveLCSSA,
+ HasLoopExit);
// Update the PHI nodes in BB with the values coming from NewBB.
- UpdatePHINodes(BB, NewBB, Preds, BI, P, HasLoopExit);
+ UpdatePHINodes(BB, NewBB, Preds, BI, AA, HasLoopExit);
return NewBB;
}
@@ -526,10 +537,11 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
/// exits).
///
void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
- ArrayRef<BasicBlock*> Preds,
+ ArrayRef<BasicBlock *> Preds,
const char *Suffix1, const char *Suffix2,
- Pass *P,
- SmallVectorImpl<BasicBlock*> &NewBBs) {
+ SmallVectorImpl<BasicBlock *> &NewBBs,
+ AliasAnalysis *AA, DominatorTree *DT,
+ LoopInfo *LI, bool PreserveLCSSA) {
assert(OrigBB->isLandingPad() && "Trying to split a non-landing pad!");
// Create a new basic block for OrigBB's predecessors listed in Preds. Insert
@@ -552,12 +564,12 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
Preds[i]->getTerminator()->replaceUsesOfWith(OrigBB, NewBB1);
}
- // Update DominatorTree, LoopInfo, and LCCSA analysis information.
bool HasLoopExit = false;
- UpdateAnalysisInformation(OrigBB, NewBB1, Preds, P, HasLoopExit);
+ UpdateAnalysisInformation(OrigBB, NewBB1, Preds, DT, LI, PreserveLCSSA,
+ HasLoopExit);
// Update the PHI nodes in OrigBB with the values coming from NewBB1.
- UpdatePHINodes(OrigBB, NewBB1, Preds, BI1, P, HasLoopExit);
+ UpdatePHINodes(OrigBB, NewBB1, Preds, BI1, AA, HasLoopExit);
// Move the remaining edges from OrigBB to point to NewBB2.
SmallVector<BasicBlock*, 8> NewBB2Preds;
@@ -589,10 +601,11 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
// Update DominatorTree, LoopInfo, and LCCSA analysis information.
HasLoopExit = false;
- UpdateAnalysisInformation(OrigBB, NewBB2, NewBB2Preds, P, HasLoopExit);
+ UpdateAnalysisInformation(OrigBB, NewBB2, NewBB2Preds, DT, LI,
+ PreserveLCSSA, HasLoopExit);
// Update the PHI nodes in OrigBB with the values coming from NewBB2.
- UpdatePHINodes(OrigBB, NewBB2, NewBB2Preds, BI2, P, HasLoopExit);
+ UpdatePHINodes(OrigBB, NewBB2, NewBB2Preds, BI2, AA, HasLoopExit);
}
LandingPadInst *LPad = OrigBB->getLandingPadInst();
diff --git a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
index eda22cf..7e83c9e 100644
--- a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -18,6 +18,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/CFG.h"
@@ -41,14 +42,19 @@ namespace {
}
bool runOnFunction(Function &F) override {
- unsigned N = SplitAllCriticalEdges(F, this);
+ auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
+ auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
+ auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
+ unsigned N =
+ SplitAllCriticalEdges(F, CriticalEdgeSplittingOptions(DT, LI));
NumBroken += N;
return N > 0;
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<LoopInfo>();
+ AU.addPreserved<LoopInfoWrapperPass>();
// No loop canonicalization guarantees are broken by this pass.
AU.addPreservedID(LoopSimplifyID);
@@ -125,10 +131,9 @@ static void createPHIsForSplitLoopExit(ArrayRef<BasicBlock *> Preds,
/// to.
///
BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
- Pass *P, bool MergeIdenticalEdges,
- bool DontDeleteUselessPhis,
- bool SplitLandingPads) {
- if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return nullptr;
+ const CriticalEdgeSplittingOptions &Options) {
+ if (!isCriticalEdge(TI, SuccNum, Options.MergeIdenticalEdges))
+ return nullptr;
assert(!isa<IndirectBrInst>(TI) &&
"Cannot split critical edge from IndirectBrInst");
@@ -179,29 +184,22 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
// If there are any other edges from TIBB to DestBB, update those to go
// through the split block, making those edges non-critical as well (and
// reducing the number of phi entries in the DestBB if relevant).
- if (MergeIdenticalEdges) {
+ if (Options.MergeIdenticalEdges) {
for (unsigned i = SuccNum+1, e = TI->getNumSuccessors(); i != e; ++i) {
if (TI->getSuccessor(i) != DestBB) continue;
// Remove an entry for TIBB from DestBB phi nodes.
- DestBB->removePredecessor(TIBB, DontDeleteUselessPhis);
+ DestBB->removePredecessor(TIBB, Options.DontDeleteUselessPHIs);
// We found another edge to DestBB, go to NewBB instead.
TI->setSuccessor(i, NewBB);
}
}
-
-
- // If we don't have a pass object, we can't update anything...
- if (!P) return NewBB;
-
- DominatorTreeWrapperPass *DTWP =
- P->getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
- LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>();
-
// If we have nothing to update, just return.
+ auto *AA = Options.AA;
+ auto *DT = Options.DT;
+ auto *LI = Options.LI;
if (!DT && !LI)
return NewBB;
@@ -268,13 +266,13 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
if (Loop *DestLoop = LI->getLoopFor(DestBB)) {
if (TIL == DestLoop) {
// Both in the same loop, the NewBB joins loop.
- DestLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+ DestLoop->addBasicBlockToLoop(NewBB, *LI);
} else if (TIL->contains(DestLoop)) {
// Edge from an outer loop to an inner loop. Add to the outer loop.
- TIL->addBasicBlockToLoop(NewBB, LI->getBase());
+ TIL->addBasicBlockToLoop(NewBB, *LI);
} else if (DestLoop->contains(TIL)) {
// Edge from an inner loop to an outer loop. Add to the outer loop.
- DestLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+ DestLoop->addBasicBlockToLoop(NewBB, *LI);
} else {
// Edge from two loops with no containment relation. Because these
// are natural loops, we know that the destination block must be the
@@ -283,19 +281,20 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
assert(DestLoop->getHeader() == DestBB &&
"Should not create irreducible loops!");
if (Loop *P = DestLoop->getParentLoop())
- P->addBasicBlockToLoop(NewBB, LI->getBase());
+ P->addBasicBlockToLoop(NewBB, *LI);
}
}
+
// If TIBB is in a loop and DestBB is outside of that loop, we may need
// to update LoopSimplify form and LCSSA form.
- if (!TIL->contains(DestBB) &&
- P->mustPreserveAnalysisID(LoopSimplifyID)) {
+ if (!TIL->contains(DestBB)) {
assert(!TIL->contains(NewBB) &&
"Split point for loop exit is contained in loop!");
// Update LCSSA form in the newly created exit block.
- if (P->mustPreserveAnalysisID(LCSSAID))
+ if (Options.PreserveLCSSA) {
createPHIsForSplitLoopExit(TIBB, NewBB, DestBB);
+ }
// The only that we can break LoopSimplify form by splitting a critical
// edge is if after the split there exists some edge from TIL to DestBB
@@ -322,20 +321,12 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
if (!LoopPreds.empty()) {
assert(!DestBB->isLandingPad() &&
"We don't split edges to landing pads!");
- BasicBlock *NewExitBB =
- SplitBlockPredecessors(DestBB, LoopPreds, "split", P);
- if (P->mustPreserveAnalysisID(LCSSAID))
+ BasicBlock *NewExitBB = SplitBlockPredecessors(
+ DestBB, LoopPreds, "split", AA, DT, LI, Options.PreserveLCSSA);
+ if (Options.PreserveLCSSA)
createPHIsForSplitLoopExit(LoopPreds, NewExitBB, DestBB);
}
}
- // LCSSA form was updated above for the case where LoopSimplify is
- // available, which means that all predecessors of loop exit blocks
- // are within the loop. Without LoopSimplify form, it would be
- // necessary to insert a new phi.
- assert((!P->mustPreserveAnalysisID(LCSSAID) ||
- P->mustPreserveAnalysisID(LoopSimplifyID)) &&
- "SplitCriticalEdge doesn't know how to update LCCSA form "
- "without LoopSimplify!");
}
}
diff --git a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
index 322485d..8aa7b2a 100644
--- a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -21,7 +21,7 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
using namespace llvm;
@@ -33,7 +33,7 @@ Value *llvm::CastToCStr(Value *V, IRBuilder<> &B) {
/// EmitStrLen - Emit a call to the strlen function to the builder, for the
/// specified pointer. This always returns an integer value of size intptr_t.
-Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout *TD,
+Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout &DL,
const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::strlen))
return nullptr;
@@ -45,12 +45,9 @@ Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout *TD,
AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, AVs);
LLVMContext &Context = B.GetInsertBlock()->getContext();
- Constant *StrLen = M->getOrInsertFunction("strlen",
- AttributeSet::get(M->getContext(),
- AS),
- TD->getIntPtrType(Context),
- B.getInt8PtrTy(),
- nullptr);
+ Constant *StrLen = M->getOrInsertFunction(
+ "strlen", AttributeSet::get(M->getContext(), AS),
+ DL.getIntPtrType(Context), B.getInt8PtrTy(), nullptr);
CallInst *CI = B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen");
if (const Function *F = dyn_cast<Function>(StrLen->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
@@ -62,7 +59,7 @@ Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout *TD,
/// specified pointer. Ptr is required to be some pointer type, MaxLen must
/// be of size_t type, and the return value has 'intptr_t' type.
Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B,
- const DataLayout *TD, const TargetLibraryInfo *TLI) {
+ const DataLayout &DL, const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::strnlen))
return nullptr;
@@ -73,14 +70,11 @@ Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B,
AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, AVs);
LLVMContext &Context = B.GetInsertBlock()->getContext();
- Constant *StrNLen = M->getOrInsertFunction("strnlen",
- AttributeSet::get(M->getContext(),
- AS),
- TD->getIntPtrType(Context),
- B.getInt8PtrTy(),
- TD->getIntPtrType(Context),
- nullptr);
- CallInst *CI = B.CreateCall2(StrNLen, CastToCStr(Ptr, B), MaxLen, "strnlen");
+ Constant *StrNLen =
+ M->getOrInsertFunction("strnlen", AttributeSet::get(M->getContext(), AS),
+ DL.getIntPtrType(Context), B.getInt8PtrTy(),
+ DL.getIntPtrType(Context), nullptr);
+ CallInst *CI = B.CreateCall(StrNLen, {CastToCStr(Ptr, B), MaxLen}, "strnlen");
if (const Function *F = dyn_cast<Function>(StrNLen->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
@@ -91,7 +85,7 @@ Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B,
/// specified pointer and character. Ptr is required to be some pointer type,
/// and the return value has 'i8*' type.
Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B,
- const DataLayout *TD, const TargetLibraryInfo *TLI) {
+ const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::strchr))
return nullptr;
@@ -106,17 +100,16 @@ Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B,
AttributeSet::get(M->getContext(),
AS),
I8Ptr, I8Ptr, I32Ty, nullptr);
- CallInst *CI = B.CreateCall2(StrChr, CastToCStr(Ptr, B),
- ConstantInt::get(I32Ty, C), "strchr");
+ CallInst *CI = B.CreateCall(
+ StrChr, {CastToCStr(Ptr, B), ConstantInt::get(I32Ty, C)}, "strchr");
if (const Function *F = dyn_cast<Function>(StrChr->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
return CI;
}
/// EmitStrNCmp - Emit a call to the strncmp function to the builder.
-Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len,
- IRBuilder<> &B, const DataLayout *TD,
- const TargetLibraryInfo *TLI) {
+Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
+ const DataLayout &DL, const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::strncmp))
return nullptr;
@@ -128,15 +121,11 @@ Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len,
AS[2] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, AVs);
LLVMContext &Context = B.GetInsertBlock()->getContext();
- Value *StrNCmp = M->getOrInsertFunction("strncmp",
- AttributeSet::get(M->getContext(),
- AS),
- B.getInt32Ty(),
- B.getInt8PtrTy(),
- B.getInt8PtrTy(),
- TD->getIntPtrType(Context), nullptr);
- CallInst *CI = B.CreateCall3(StrNCmp, CastToCStr(Ptr1, B),
- CastToCStr(Ptr2, B), Len, "strncmp");
+ Value *StrNCmp = M->getOrInsertFunction(
+ "strncmp", AttributeSet::get(M->getContext(), AS), B.getInt32Ty(),
+ B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context), nullptr);
+ CallInst *CI = B.CreateCall(
+ StrNCmp, {CastToCStr(Ptr1, B), CastToCStr(Ptr2, B), Len}, "strncmp");
if (const Function *F = dyn_cast<Function>(StrNCmp->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
@@ -147,8 +136,7 @@ Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len,
/// EmitStrCpy - Emit a call to the strcpy function to the builder, for the
/// specified pointer arguments.
Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
- const DataLayout *TD, const TargetLibraryInfo *TLI,
- StringRef Name) {
+ const TargetLibraryInfo *TLI, StringRef Name) {
if (!TLI->has(LibFunc::strcpy))
return nullptr;
@@ -161,8 +149,8 @@ Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
Value *StrCpy = M->getOrInsertFunction(Name,
AttributeSet::get(M->getContext(), AS),
I8Ptr, I8Ptr, I8Ptr, nullptr);
- CallInst *CI = B.CreateCall2(StrCpy, CastToCStr(Dst, B), CastToCStr(Src, B),
- Name);
+ CallInst *CI =
+ B.CreateCall(StrCpy, {CastToCStr(Dst, B), CastToCStr(Src, B)}, Name);
if (const Function *F = dyn_cast<Function>(StrCpy->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
return CI;
@@ -170,8 +158,7 @@ Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
/// EmitStrNCpy - Emit a call to the strncpy function to the builder, for the
/// specified pointer arguments.
-Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len,
- IRBuilder<> &B, const DataLayout *TD,
+Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len, IRBuilder<> &B,
const TargetLibraryInfo *TLI, StringRef Name) {
if (!TLI->has(LibFunc::strncpy))
return nullptr;
@@ -187,8 +174,8 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len,
AS),
I8Ptr, I8Ptr, I8Ptr,
Len->getType(), nullptr);
- CallInst *CI = B.CreateCall3(StrNCpy, CastToCStr(Dst, B), CastToCStr(Src, B),
- Len, "strncpy");
+ CallInst *CI = B.CreateCall(
+ StrNCpy, {CastToCStr(Dst, B), CastToCStr(Src, B), Len}, "strncpy");
if (const Function *F = dyn_cast<Function>(StrNCpy->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
return CI;
@@ -198,7 +185,7 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len,
/// This expects that the Len and ObjSize have type 'intptr_t' and Dst/Src
/// are pointers.
Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
- IRBuilder<> &B, const DataLayout *TD,
+ IRBuilder<> &B, const DataLayout &DL,
const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::memcpy_chk))
return nullptr;
@@ -208,16 +195,13 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
AS = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
Attribute::NoUnwind);
LLVMContext &Context = B.GetInsertBlock()->getContext();
- Value *MemCpy = M->getOrInsertFunction("__memcpy_chk",
- AttributeSet::get(M->getContext(), AS),
- B.getInt8PtrTy(),
- B.getInt8PtrTy(),
- B.getInt8PtrTy(),
- TD->getIntPtrType(Context),
- TD->getIntPtrType(Context), nullptr);
+ Value *MemCpy = M->getOrInsertFunction(
+ "__memcpy_chk", AttributeSet::get(M->getContext(), AS), B.getInt8PtrTy(),
+ B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context),
+ DL.getIntPtrType(Context), nullptr);
Dst = CastToCStr(Dst, B);
Src = CastToCStr(Src, B);
- CallInst *CI = B.CreateCall4(MemCpy, Dst, Src, Len, ObjSize);
+ CallInst *CI = B.CreateCall(MemCpy, {Dst, Src, Len, ObjSize});
if (const Function *F = dyn_cast<Function>(MemCpy->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
return CI;
@@ -225,9 +209,8 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
/// EmitMemChr - Emit a call to the memchr function. This assumes that Ptr is
/// a pointer, Val is an i32 value, and Len is an 'intptr_t' value.
-Value *llvm::EmitMemChr(Value *Ptr, Value *Val,
- Value *Len, IRBuilder<> &B, const DataLayout *TD,
- const TargetLibraryInfo *TLI) {
+Value *llvm::EmitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilder<> &B,
+ const DataLayout &DL, const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::memchr))
return nullptr;
@@ -236,14 +219,10 @@ Value *llvm::EmitMemChr(Value *Ptr, Value *Val,
Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind };
AS = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, AVs);
LLVMContext &Context = B.GetInsertBlock()->getContext();
- Value *MemChr = M->getOrInsertFunction("memchr",
- AttributeSet::get(M->getContext(), AS),
- B.getInt8PtrTy(),
- B.getInt8PtrTy(),
- B.getInt32Ty(),
- TD->getIntPtrType(Context),
- nullptr);
- CallInst *CI = B.CreateCall3(MemChr, CastToCStr(Ptr, B), Val, Len, "memchr");
+ Value *MemChr = M->getOrInsertFunction(
+ "memchr", AttributeSet::get(M->getContext(), AS), B.getInt8PtrTy(),
+ B.getInt8PtrTy(), B.getInt32Ty(), DL.getIntPtrType(Context), nullptr);
+ CallInst *CI = B.CreateCall(MemChr, {CastToCStr(Ptr, B), Val, Len}, "memchr");
if (const Function *F = dyn_cast<Function>(MemChr->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
@@ -252,9 +231,8 @@ Value *llvm::EmitMemChr(Value *Ptr, Value *Val,
}
/// EmitMemCmp - Emit a call to the memcmp function.
-Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2,
- Value *Len, IRBuilder<> &B, const DataLayout *TD,
- const TargetLibraryInfo *TLI) {
+Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
+ const DataLayout &DL, const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::memcmp))
return nullptr;
@@ -266,14 +244,11 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2,
AS[2] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, AVs);
LLVMContext &Context = B.GetInsertBlock()->getContext();
- Value *MemCmp = M->getOrInsertFunction("memcmp",
- AttributeSet::get(M->getContext(), AS),
- B.getInt32Ty(),
- B.getInt8PtrTy(),
- B.getInt8PtrTy(),
- TD->getIntPtrType(Context), nullptr);
- CallInst *CI = B.CreateCall3(MemCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B),
- Len, "memcmp");
+ Value *MemCmp = M->getOrInsertFunction(
+ "memcmp", AttributeSet::get(M->getContext(), AS), B.getInt32Ty(),
+ B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context), nullptr);
+ CallInst *CI = B.CreateCall(
+ MemCmp, {CastToCStr(Ptr1, B), CastToCStr(Ptr2, B), Len}, "memcmp");
if (const Function *F = dyn_cast<Function>(MemCmp->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
@@ -329,7 +304,7 @@ Value *llvm::EmitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name,
Module *M = B.GetInsertBlock()->getParent()->getParent();
Value *Callee = M->getOrInsertFunction(Name, Op1->getType(),
Op1->getType(), Op2->getType(), nullptr);
- CallInst *CI = B.CreateCall2(Callee, Op1, Op2, Name);
+ CallInst *CI = B.CreateCall(Callee, {Op1, Op2}, Name);
CI->setAttributes(Attrs);
if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
@@ -339,7 +314,7 @@ Value *llvm::EmitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name,
/// EmitPutChar - Emit a call to the putchar function. This assumes that Char
/// is an integer.
-Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const DataLayout *TD,
+Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B,
const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::putchar))
return nullptr;
@@ -361,7 +336,7 @@ Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const DataLayout *TD,
/// EmitPutS - Emit a call to the puts function. This assumes that Str is
/// some pointer.
-Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const DataLayout *TD,
+Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B,
const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::puts))
return nullptr;
@@ -386,7 +361,7 @@ Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const DataLayout *TD,
/// EmitFPutC - Emit a call to the fputc function. This assumes that Char is
/// an integer and File is a pointer to FILE.
Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B,
- const DataLayout *TD, const TargetLibraryInfo *TLI) {
+ const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::fputc))
return nullptr;
@@ -409,7 +384,7 @@ Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B,
File->getType(), nullptr);
Char = B.CreateIntCast(Char, B.getInt32Ty(), /*isSigned*/true,
"chari");
- CallInst *CI = B.CreateCall2(F, Char, File, "fputc");
+ CallInst *CI = B.CreateCall(F, {Char, File}, "fputc");
if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
CI->setCallingConv(Fn->getCallingConv());
@@ -419,7 +394,7 @@ Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B,
/// EmitFPutS - Emit a call to the puts function. Str is required to be a
/// pointer and File is a pointer to FILE.
Value *llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B,
- const DataLayout *TD, const TargetLibraryInfo *TLI) {
+ const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::fputs))
return nullptr;
@@ -441,7 +416,7 @@ Value *llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B,
F = M->getOrInsertFunction(FPutsName, B.getInt32Ty(),
B.getInt8PtrTy(),
File->getType(), nullptr);
- CallInst *CI = B.CreateCall2(F, CastToCStr(Str, B), File, "fputs");
+ CallInst *CI = B.CreateCall(F, {CastToCStr(Str, B), File}, "fputs");
if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
CI->setCallingConv(Fn->getCallingConv());
@@ -450,9 +425,8 @@ Value *llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B,
/// EmitFWrite - Emit a call to the fwrite function. This assumes that Ptr is
/// a pointer, Size is an 'intptr_t', and File is a pointer to FILE.
-Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File,
- IRBuilder<> &B, const DataLayout *TD,
- const TargetLibraryInfo *TLI) {
+Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B,
+ const DataLayout &DL, const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::fwrite))
return nullptr;
@@ -466,21 +440,18 @@ Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File,
StringRef FWriteName = TLI->getName(LibFunc::fwrite);
Constant *F;
if (File->getType()->isPointerTy())
- F = M->getOrInsertFunction(FWriteName,
- AttributeSet::get(M->getContext(), AS),
- TD->getIntPtrType(Context),
- B.getInt8PtrTy(),
- TD->getIntPtrType(Context),
- TD->getIntPtrType(Context),
- File->getType(), nullptr);
+ F = M->getOrInsertFunction(
+ FWriteName, AttributeSet::get(M->getContext(), AS),
+ DL.getIntPtrType(Context), B.getInt8PtrTy(), DL.getIntPtrType(Context),
+ DL.getIntPtrType(Context), File->getType(), nullptr);
else
- F = M->getOrInsertFunction(FWriteName, TD->getIntPtrType(Context),
- B.getInt8PtrTy(),
- TD->getIntPtrType(Context),
- TD->getIntPtrType(Context),
- File->getType(), nullptr);
- CallInst *CI = B.CreateCall4(F, CastToCStr(Ptr, B), Size,
- ConstantInt::get(TD->getIntPtrType(Context), 1), File);
+ F = M->getOrInsertFunction(FWriteName, DL.getIntPtrType(Context),
+ B.getInt8PtrTy(), DL.getIntPtrType(Context),
+ DL.getIntPtrType(Context), File->getType(),
+ nullptr);
+ CallInst *CI =
+ B.CreateCall(F, {CastToCStr(Ptr, B), Size,
+ ConstantInt::get(DL.getIntPtrType(Context), 1), File});
if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
CI->setCallingConv(Fn->getCallingConv());
diff --git a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 96a763f..4f8d1df 100644
--- a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -34,7 +34,7 @@
#include <map>
using namespace llvm;
-// CloneBasicBlock - See comments in Cloning.h
+/// See comments in Cloning.h.
BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB,
ValueToValueMapTy &VMap,
const Twine &NameSuffix, Function *F,
@@ -154,23 +154,26 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
TypeMapper, Materializer);
}
-// Find the MDNode which corresponds to the DISubprogram data that described F.
-static MDNode* FindSubprogram(const Function *F, DebugInfoFinder &Finder) {
- for (DISubprogram Subprogram : Finder.subprograms()) {
- if (Subprogram.describes(F)) return Subprogram;
+// Find the MDNode which corresponds to the subprogram data that described F.
+static DISubprogram *FindSubprogram(const Function *F,
+ DebugInfoFinder &Finder) {
+ for (DISubprogram *Subprogram : Finder.subprograms()) {
+ if (Subprogram->describes(F))
+ return Subprogram;
}
return nullptr;
}
// Add an operand to an existing MDNode. The new operand will be added at the
// back of the operand list.
-static void AddOperand(DICompileUnit CU, DIArray SPs, Metadata *NewSP) {
+static void AddOperand(DICompileUnit *CU, DISubprogramArray SPs,
+ Metadata *NewSP) {
SmallVector<Metadata *, 16> NewSPs;
- NewSPs.reserve(SPs->getNumOperands() + 1);
- for (unsigned I = 0, E = SPs->getNumOperands(); I != E; ++I)
- NewSPs.push_back(SPs->getOperand(I));
+ NewSPs.reserve(SPs.size() + 1);
+ for (auto *SP : SPs)
+ NewSPs.push_back(SP);
NewSPs.push_back(NewSP);
- CU.replaceSubprograms(DIArray(MDNode::get(CU->getContext(), NewSPs)));
+ CU->replaceSubprograms(MDTuple::get(CU->getContext(), NewSPs));
}
// Clone the module-level debug info associated with OldFunc. The cloned data
@@ -180,21 +183,21 @@ static void CloneDebugInfoMetadata(Function *NewFunc, const Function *OldFunc,
DebugInfoFinder Finder;
Finder.processModule(*OldFunc->getParent());
- const MDNode *OldSubprogramMDNode = FindSubprogram(OldFunc, Finder);
+ const DISubprogram *OldSubprogramMDNode = FindSubprogram(OldFunc, Finder);
if (!OldSubprogramMDNode) return;
// Ensure that OldFunc appears in the map.
// (if it's already there it must point to NewFunc anyway)
VMap[OldFunc] = NewFunc;
- DISubprogram NewSubprogram(MapMetadata(OldSubprogramMDNode, VMap));
-
- for (DICompileUnit CU : Finder.compile_units()) {
- DIArray Subprograms(CU.getSubprograms());
+ auto *NewSubprogram =
+ cast<DISubprogram>(MapMetadata(OldSubprogramMDNode, VMap));
+ for (auto *CU : Finder.compile_units()) {
+ auto Subprograms = CU->getSubprograms();
// If the compile unit's function list contains the old function, it should
// also contain the new one.
- for (unsigned i = 0; i < Subprograms.getNumElements(); i++) {
- if ((MDNode*)Subprograms.getElement(i) == OldSubprogramMDNode) {
+ for (auto *SP : Subprograms) {
+ if (SP == OldSubprogramMDNode) {
AddOperand(CU, Subprograms, NewSubprogram);
break;
}
@@ -202,7 +205,7 @@ static void CloneDebugInfoMetadata(Function *NewFunc, const Function *OldFunc,
}
}
-/// CloneFunction - Return a copy of the specified function, but without
+/// Return a copy of the specified function, but without
/// embedding the function into another module. Also, any references specified
/// in the VMap are changed to refer to their mapped value instead of the
/// original one. If any of the arguments to the function are in the VMap,
@@ -250,8 +253,7 @@ Function *llvm::CloneFunction(const Function *F, ValueToValueMapTy &VMap,
namespace {
- /// PruningFunctionCloner - This class is a private class used to implement
- /// the CloneAndPruneFunctionInto method.
+ /// This is a private class used to implement CloneAndPruneFunctionInto.
struct PruningFunctionCloner {
Function *NewFunc;
const Function *OldFunc;
@@ -259,29 +261,40 @@ namespace {
bool ModuleLevelChanges;
const char *NameSuffix;
ClonedCodeInfo *CodeInfo;
- const DataLayout *DL;
+ CloningDirector *Director;
+ ValueMapTypeRemapper *TypeMapper;
+ ValueMaterializer *Materializer;
+
public:
PruningFunctionCloner(Function *newFunc, const Function *oldFunc,
- ValueToValueMapTy &valueMap,
- bool moduleLevelChanges,
- const char *nameSuffix,
- ClonedCodeInfo *codeInfo,
- const DataLayout *DL)
- : NewFunc(newFunc), OldFunc(oldFunc),
- VMap(valueMap), ModuleLevelChanges(moduleLevelChanges),
- NameSuffix(nameSuffix), CodeInfo(codeInfo), DL(DL) {
+ ValueToValueMapTy &valueMap, bool moduleLevelChanges,
+ const char *nameSuffix, ClonedCodeInfo *codeInfo,
+ CloningDirector *Director)
+ : NewFunc(newFunc), OldFunc(oldFunc), VMap(valueMap),
+ ModuleLevelChanges(moduleLevelChanges), NameSuffix(nameSuffix),
+ CodeInfo(codeInfo), Director(Director) {
+ // These are optional components. The Director may return null.
+ if (Director) {
+ TypeMapper = Director->getTypeRemapper();
+ Materializer = Director->getValueMaterializer();
+ } else {
+ TypeMapper = nullptr;
+ Materializer = nullptr;
+ }
}
- /// CloneBlock - The specified block is found to be reachable, clone it and
+ /// The specified block is found to be reachable, clone it and
/// anything that it can reach.
- void CloneBlock(const BasicBlock *BB,
+ void CloneBlock(const BasicBlock *BB,
+ BasicBlock::const_iterator StartingInst,
std::vector<const BasicBlock*> &ToClone);
};
}
-/// CloneBlock - The specified block is found to be reachable, clone it and
+/// The specified block is found to be reachable, clone it and
/// anything that it can reach.
void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
+ BasicBlock::const_iterator StartingInst,
std::vector<const BasicBlock*> &ToClone){
WeakVH &BBEntry = VMap[BB];
@@ -307,26 +320,45 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
const_cast<BasicBlock*>(BB));
VMap[OldBBAddr] = BlockAddress::get(NewFunc, NewBB);
}
-
bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false;
-
+
// Loop over all instructions, and copy them over, DCE'ing as we go. This
// loop doesn't include the terminator.
- for (BasicBlock::const_iterator II = BB->begin(), IE = --BB->end();
+ for (BasicBlock::const_iterator II = StartingInst, IE = --BB->end();
II != IE; ++II) {
+ // If the "Director" remaps the instruction, don't clone it.
+ if (Director) {
+ CloningDirector::CloningAction Action
+ = Director->handleInstruction(VMap, II, NewBB);
+ // If the cloning director says stop, we want to stop everything, not
+ // just break out of the loop (which would cause the terminator to be
+ // cloned). The cloning director is responsible for inserting a proper
+ // terminator into the new basic block in this case.
+ if (Action == CloningDirector::StopCloningBB)
+ return;
+ // If the cloning director says skip, continue to the next instruction.
+ // In this case, the cloning director is responsible for mapping the
+ // skipped instruction to some value that is defined in the new
+ // basic block.
+ if (Action == CloningDirector::SkipInstruction)
+ continue;
+ }
+
Instruction *NewInst = II->clone();
// Eagerly remap operands to the newly cloned instruction, except for PHI
// nodes for which we defer processing until we update the CFG.
if (!isa<PHINode>(NewInst)) {
RemapInstruction(NewInst, VMap,
- ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
+ ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
+ TypeMapper, Materializer);
// If we can simplify this instruction to some other value, simply add
// a mapping to that value rather than inserting a new instruction into
// the basic block.
- if (Value *V = SimplifyInstruction(NewInst, DL)) {
+ if (Value *V =
+ SimplifyInstruction(NewInst, BB->getModule()->getDataLayout())) {
// On the off-chance that this simplifies to an instruction in the old
// function, map it back into the new function.
if (Value *MappedV = VMap.lookup(V))
@@ -354,6 +386,26 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
// Finally, clone over the terminator.
const TerminatorInst *OldTI = BB->getTerminator();
bool TerminatorDone = false;
+ if (Director) {
+ CloningDirector::CloningAction Action
+ = Director->handleInstruction(VMap, OldTI, NewBB);
+ // If the cloning director says stop, we want to stop everything, not
+ // just break out of the loop (which would cause the terminator to be
+ // cloned). The cloning director is responsible for inserting a proper
+ // terminator into the new basic block in this case.
+ if (Action == CloningDirector::StopCloningBB)
+ return;
+ if (Action == CloningDirector::CloneSuccessors) {
+ // If the director says to skip with a terminate instruction, we still
+ // need to clone this block's successors.
+ const TerminatorInst *TI = NewBB->getTerminator();
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+ ToClone.push_back(TI->getSuccessor(i));
+ return;
+ }
+ assert(Action != CloningDirector::SkipInstruction &&
+ "SkipInstruction is not valid for terminators.");
+ }
if (const BranchInst *BI = dyn_cast<BranchInst>(OldTI)) {
if (BI->isConditional()) {
// If the condition was a known constant in the callee...
@@ -409,39 +461,53 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
}
}
-/// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto,
-/// except that it does some simple constant prop and DCE on the fly. The
-/// effect of this is to copy significantly less code in cases where (for
-/// example) a function call with constant arguments is inlined, and those
-/// constant arguments cause a significant amount of code in the callee to be
-/// dead. Since this doesn't produce an exact copy of the input, it can't be
-/// used for things like CloneFunction or CloneModule.
-void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
+/// This works like CloneAndPruneFunctionInto, except that it does not clone the
+/// entire function. Instead it starts at an instruction provided by the caller
+/// and copies (and prunes) only the code reachable from that instruction.
+void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
+ const Instruction *StartingInst,
ValueToValueMapTy &VMap,
bool ModuleLevelChanges,
- SmallVectorImpl<ReturnInst*> &Returns,
+ SmallVectorImpl<ReturnInst *> &Returns,
const char *NameSuffix,
ClonedCodeInfo *CodeInfo,
- const DataLayout *DL,
- Instruction *TheCall) {
+ CloningDirector *Director) {
assert(NameSuffix && "NameSuffix cannot be null!");
-
+
+ ValueMapTypeRemapper *TypeMapper = nullptr;
+ ValueMaterializer *Materializer = nullptr;
+
+ if (Director) {
+ TypeMapper = Director->getTypeRemapper();
+ Materializer = Director->getValueMaterializer();
+ }
+
#ifndef NDEBUG
- for (Function::const_arg_iterator II = OldFunc->arg_begin(),
- E = OldFunc->arg_end(); II != E; ++II)
- assert(VMap.count(II) && "No mapping from source argument specified!");
+ // If the cloning starts at the begining of the function, verify that
+ // the function arguments are mapped.
+ if (!StartingInst)
+ for (Function::const_arg_iterator II = OldFunc->arg_begin(),
+ E = OldFunc->arg_end(); II != E; ++II)
+ assert(VMap.count(II) && "No mapping from source argument specified!");
#endif
PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges,
- NameSuffix, CodeInfo, DL);
+ NameSuffix, CodeInfo, Director);
+ const BasicBlock *StartingBB;
+ if (StartingInst)
+ StartingBB = StartingInst->getParent();
+ else {
+ StartingBB = &OldFunc->getEntryBlock();
+ StartingInst = StartingBB->begin();
+ }
// Clone the entry block, and anything recursively reachable from it.
std::vector<const BasicBlock*> CloneWorklist;
- CloneWorklist.push_back(&OldFunc->getEntryBlock());
+ PFC.CloneBlock(StartingBB, StartingInst, CloneWorklist);
while (!CloneWorklist.empty()) {
const BasicBlock *BB = CloneWorklist.back();
CloneWorklist.pop_back();
- PFC.CloneBlock(BB, CloneWorklist);
+ PFC.CloneBlock(BB, BB->begin(), CloneWorklist);
}
// Loop over all of the basic blocks in the old function. If the block was
@@ -461,16 +527,24 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
// Handle PHI nodes specially, as we have to remove references to dead
// blocks.
- for (BasicBlock::const_iterator I = BI->begin(), E = BI->end(); I != E; ++I)
- if (const PHINode *PN = dyn_cast<PHINode>(I))
- PHIToResolve.push_back(PN);
- else
+ for (BasicBlock::const_iterator I = BI->begin(), E = BI->end(); I != E; ++I) {
+ // PHI nodes may have been remapped to non-PHI nodes by the caller or
+ // during the cloning process.
+ if (const PHINode *PN = dyn_cast<PHINode>(I)) {
+ if (isa<PHINode>(VMap[PN]))
+ PHIToResolve.push_back(PN);
+ else
+ break;
+ } else {
break;
+ }
+ }
// Finally, remap the terminator instructions, as those can't be remapped
// until all BBs are mapped.
RemapInstruction(NewBB->getTerminator(), VMap,
- ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
+ ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
+ TypeMapper, Materializer);
}
// Defer PHI resolution until rest of function is resolved, PHI resolution
@@ -563,13 +637,13 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
// node).
for (unsigned Idx = 0, Size = PHIToResolve.size(); Idx != Size; ++Idx)
if (PHINode *PN = dyn_cast<PHINode>(VMap[PHIToResolve[Idx]]))
- recursivelySimplifyInstruction(PN, DL);
+ recursivelySimplifyInstruction(PN);
// Now that the inlined function body has been fully constructed, go through
- // and zap unconditional fall-through branches. This happen all the time when
+ // and zap unconditional fall-through branches. This happens all the time when
// specializing code: code specialization turns conditional branches into
// uncond branches, and this code folds them.
- Function::iterator Begin = cast<BasicBlock>(VMap[&OldFunc->getEntryBlock()]);
+ Function::iterator Begin = cast<BasicBlock>(VMap[StartingBB]);
Function::iterator I = Begin;
while (I != NewFunc->end()) {
// Check if this block has become dead during inlining or other
@@ -617,12 +691,32 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
// Do not increment I, iteratively merge all things this block branches to.
}
- // Make a final pass over the basic blocks from theh old function to gather
+ // Make a final pass over the basic blocks from the old function to gather
// any return instructions which survived folding. We have to do this here
// because we can iteratively remove and merge returns above.
- for (Function::iterator I = cast<BasicBlock>(VMap[&OldFunc->getEntryBlock()]),
+ for (Function::iterator I = cast<BasicBlock>(VMap[StartingBB]),
E = NewFunc->end();
I != E; ++I)
if (ReturnInst *RI = dyn_cast<ReturnInst>(I->getTerminator()))
Returns.push_back(RI);
}
+
+
+/// This works exactly like CloneFunctionInto,
+/// except that it does some simple constant prop and DCE on the fly. The
+/// effect of this is to copy significantly less code in cases where (for
+/// example) a function call with constant arguments is inlined, and those
+/// constant arguments cause a significant amount of code in the callee to be
+/// dead. Since this doesn't produce an exact copy of the input, it can't be
+/// used for things like CloneFunction or CloneModule.
+void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
+ ValueToValueMapTy &VMap,
+ bool ModuleLevelChanges,
+ SmallVectorImpl<ReturnInst*> &Returns,
+ const char *NameSuffix,
+ ClonedCodeInfo *CodeInfo,
+ Instruction *TheCall) {
+ CloneAndPruneIntoFromInst(NewFunc, OldFunc, OldFunc->front().begin(), VMap,
+ ModuleLevelChanges, Returns, NameSuffix, CodeInfo,
+ nullptr);
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp
index fae9ff5..2693322 100644
--- a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp
@@ -69,9 +69,7 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
I != E; ++I) {
auto *PTy = cast<PointerType>(I->getType());
- auto *GA =
- GlobalAlias::create(PTy->getElementType(), PTy->getAddressSpace(),
- I->getLinkage(), I->getName(), New);
+ auto *GA = GlobalAlias::create(PTy, I->getLinkage(), I->getName(), New);
GA->copyAttributesFrom(I);
VMap[I] = GA;
}
diff --git a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index e70a7d6..ab89b41 100644
--- a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -332,11 +332,11 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
DEBUG(dbgs() << **i << ", ");
DEBUG(dbgs() << ")\n");
+ StructType *StructTy;
if (AggregateArgs && (inputs.size() + outputs.size() > 0)) {
- PointerType *StructPtr =
- PointerType::getUnqual(StructType::get(M->getContext(), paramTy));
+ StructTy = StructType::get(M->getContext(), paramTy);
paramTy.clear();
- paramTy.push_back(StructPtr);
+ paramTy.push_back(PointerType::getUnqual(StructTy));
}
FunctionType *funcType =
FunctionType::get(RetTy, paramTy, false);
@@ -364,8 +364,8 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext()));
Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i);
TerminatorInst *TI = newFunction->begin()->getTerminator();
- GetElementPtrInst *GEP =
- GetElementPtrInst::Create(AI, Idx, "gep_" + inputs[i]->getName(), TI);
+ GetElementPtrInst *GEP = GetElementPtrInst::Create(
+ StructTy, AI, Idx, "gep_" + inputs[i]->getName(), TI);
RewriteVal = new LoadInst(GEP, "loadgep_" + inputs[i]->getName(), TI);
} else
RewriteVal = AI++;
@@ -447,6 +447,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
}
}
+ StructType *StructArgTy = nullptr;
AllocaInst *Struct = nullptr;
if (AggregateArgs && (inputs.size() + outputs.size() > 0)) {
std::vector<Type*> ArgTypes;
@@ -455,7 +456,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
ArgTypes.push_back((*v)->getType());
// Allocate a struct at the beginning of this function
- Type *StructArgTy = StructType::get(newFunction->getContext(), ArgTypes);
+ StructArgTy = StructType::get(newFunction->getContext(), ArgTypes);
Struct =
new AllocaInst(StructArgTy, nullptr, "structArg",
codeReplacer->getParent()->begin()->begin());
@@ -465,9 +466,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
Value *Idx[2];
Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i);
- GetElementPtrInst *GEP =
- GetElementPtrInst::Create(Struct, Idx,
- "gep_" + StructValues[i]->getName());
+ GetElementPtrInst *GEP = GetElementPtrInst::Create(
+ StructArgTy, Struct, Idx, "gep_" + StructValues[i]->getName());
codeReplacer->getInstList().push_back(GEP);
StoreInst *SI = new StoreInst(StructValues[i], GEP);
codeReplacer->getInstList().push_back(SI);
@@ -491,9 +491,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
Value *Idx[2];
Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i);
- GetElementPtrInst *GEP
- = GetElementPtrInst::Create(Struct, Idx,
- "gep_reload_" + outputs[i]->getName());
+ GetElementPtrInst *GEP = GetElementPtrInst::Create(
+ StructArgTy, Struct, Idx, "gep_reload_" + outputs[i]->getName());
codeReplacer->getInstList().push_back(GEP);
Output = GEP;
} else {
@@ -606,10 +605,9 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
Idx[1] = ConstantInt::get(Type::getInt32Ty(Context),
FirstOut+out);
- GetElementPtrInst *GEP =
- GetElementPtrInst::Create(OAI, Idx,
- "gep_" + outputs[out]->getName(),
- NTRet);
+ GetElementPtrInst *GEP = GetElementPtrInst::Create(
+ StructArgTy, OAI, Idx, "gep_" + outputs[out]->getName(),
+ NTRet);
new StoreInst(outputs[out], GEP, NTRet);
} else {
new StoreInst(outputs[out], OAI, NTRet);
diff --git a/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp b/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp
index 26875e8..dc95089 100644
--- a/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp
@@ -11,14 +11,15 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/BitVector.h"
#include "llvm/Transforms/Utils/CtorUtils.h"
+#include "llvm/ADT/BitVector.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#define DEBUG_TYPE "ctor_utils"
diff --git a/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
index 9972b22..003da58 100644
--- a/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
@@ -39,6 +39,19 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
F->getEntryBlock().begin());
}
+ // We cannot demote invoke instructions to the stack if their normal edge
+ // is critical. Therefore, split the critical edge and create a basic block
+ // into which the store can be inserted.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
+ if (!II->getNormalDest()->getSinglePredecessor()) {
+ unsigned SuccNum = GetSuccessorNumber(II->getParent(), II->getNormalDest());
+ assert(isCriticalEdge(II, SuccNum) && "Expected a critical edge!");
+ BasicBlock *BB = SplitCriticalEdge(II, SuccNum);
+ assert(BB && "Unable to split critical edge.");
+ (void)BB;
+ }
+ }
+
// Change all of the users of the instruction to read from the stack slot.
while (!I.use_empty()) {
Instruction *U = cast<Instruction>(I.user_back());
@@ -71,7 +84,6 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
}
}
-
// Insert stores of the computed value into the stack slot. We have to be
// careful if I is an invoke instruction, because we can't insert the store
// AFTER the terminator instruction.
@@ -79,27 +91,13 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
if (!isa<TerminatorInst>(I)) {
InsertPt = &I;
++InsertPt;
+ for (; isa<PHINode>(InsertPt) || isa<LandingPadInst>(InsertPt); ++InsertPt)
+ /* empty */; // Don't insert before PHI nodes or landingpad instrs.
} else {
InvokeInst &II = cast<InvokeInst>(I);
- if (II.getNormalDest()->getSinglePredecessor())
- InsertPt = II.getNormalDest()->getFirstInsertionPt();
- else {
- // We cannot demote invoke instructions to the stack if their normal edge
- // is critical. Therefore, split the critical edge and insert the store
- // in the newly created basic block.
- unsigned SuccNum = GetSuccessorNumber(I.getParent(), II.getNormalDest());
- TerminatorInst *TI = &cast<TerminatorInst>(I);
- assert (isCriticalEdge(TI, SuccNum) &&
- "Expected a critical edge!");
- BasicBlock *BB = SplitCriticalEdge(TI, SuccNum);
- assert (BB && "Unable to split critical edge.");
- InsertPt = BB->getFirstInsertionPt();
- }
+ InsertPt = II.getNormalDest()->getFirstInsertionPt();
}
- for (; isa<PHINode>(InsertPt) || isa<LandingPadInst>(InsertPt); ++InsertPt)
- /* empty */; // Don't insert before PHI nodes or landingpad instrs.
-
new StoreInst(&I, Slot, InsertPt);
return Slot;
}
diff --git a/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp b/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp
index 52e2d59..44b7d25 100644
--- a/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp
@@ -150,7 +150,7 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
if (MSI->isVolatile())
return true;
GS.StoredType = GlobalStatus::Stored;
- } else if (ImmutableCallSite C = I) {
+ } else if (auto C = ImmutableCallSite(I)) {
if (!C.isCallee(&U))
return true;
GS.IsLoaded = true;
diff --git a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
index 2a86eb5..ddeaff0 100644
--- a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -30,6 +30,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
@@ -88,7 +89,7 @@ namespace {
CallerLPad = cast<LandingPadInst>(I);
}
- /// getOuterResumeDest - The outer unwind destination is the target of
+ /// The outer unwind destination is the target of
/// unwind edges introduced for calls within the inlined function.
BasicBlock *getOuterResumeDest() const {
return OuterResumeDest;
@@ -98,17 +99,16 @@ namespace {
LandingPadInst *getLandingPadInst() const { return CallerLPad; }
- /// forwardResume - Forward the 'resume' instruction to the caller's landing
- /// pad block. When the landing pad block has only one predecessor, this is
+ /// Forward the 'resume' instruction to the caller's landing pad block.
+ /// When the landing pad block has only one predecessor, this is
/// a simple branch. When there is more than one predecessor, we need to
/// split the landing pad block after the landingpad instruction and jump
/// to there.
void forwardResume(ResumeInst *RI,
SmallPtrSetImpl<LandingPadInst*> &InlinedLPads);
- /// addIncomingPHIValuesFor - Add incoming-PHI values to the unwind
- /// destination block for the given basic block, using the values for the
- /// original invoke's source block.
+ /// Add incoming-PHI values to the unwind destination block for the given
+ /// basic block, using the values for the original invoke's source block.
void addIncomingPHIValuesFor(BasicBlock *BB) const {
addIncomingPHIValuesForInto(BB, OuterResumeDest);
}
@@ -123,7 +123,7 @@ namespace {
};
}
-/// getInnerResumeDest - Get or create a target for the branch from ResumeInsts.
+/// Get or create a target for the branch from ResumeInsts.
BasicBlock *InvokeInliningInfo::getInnerResumeDest() {
if (InnerResumeDest) return InnerResumeDest;
@@ -158,8 +158,8 @@ BasicBlock *InvokeInliningInfo::getInnerResumeDest() {
return InnerResumeDest;
}
-/// forwardResume - Forward the 'resume' instruction to the caller's landing pad
-/// block. When the landing pad block has only one predecessor, this is a simple
+/// Forward the 'resume' instruction to the caller's landing pad block.
+/// When the landing pad block has only one predecessor, this is a simple
/// branch. When there is more than one predecessor, we need to split the
/// landing pad block after the landingpad instruction and jump to there.
void InvokeInliningInfo::forwardResume(ResumeInst *RI,
@@ -177,9 +177,9 @@ void InvokeInliningInfo::forwardResume(ResumeInst *RI,
RI->eraseFromParent();
}
-/// HandleCallsInBlockInlinedThroughInvoke - When we inline a basic block into
-/// an invoke, we have to turn all of the calls that can throw into
-/// invokes. This function analyze BB to see if there are any calls, and if so,
+/// When we inline a basic block into an invoke,
+/// we have to turn all of the calls that can throw into invokes.
+/// This function analyze BB to see if there are any calls, and if so,
/// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI
/// nodes in that block with the values specified in InvokeDestPHIValues.
static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
@@ -227,7 +227,7 @@ static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
}
}
-/// HandleInlinedInvoke - If we inlined an invoke site, we need to convert calls
+/// If we inlined an invoke site, we need to convert calls
/// in the body of the inlined function into invokes.
///
/// II is the invoke instruction being inlined. FirstNewBlock is the first
@@ -278,8 +278,8 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
InvokeDest->removePredecessor(II->getParent());
}
-/// CloneAliasScopeMetadata - When inlining a function that contains noalias
-/// scope metadata, this metadata needs to be cloned so that the inlined blocks
+/// When inlining a function that contains noalias scope metadata,
+/// this metadata needs to be cloned so that the inlined blocks
/// have different "unqiue scopes" at every call site. Were this not done, then
/// aliasing scopes from a function inlined into a caller multiple times could
/// not be differentiated (and this would lead to miscompiles because the
@@ -319,13 +319,12 @@ static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) {
// Now we have a complete set of all metadata in the chains used to specify
// the noalias scopes and the lists of those scopes.
- SmallVector<MDNode *, 16> DummyNodes;
+ SmallVector<TempMDTuple, 16> DummyNodes;
DenseMap<const MDNode *, TrackingMDNodeRef> MDMap;
for (SetVector<const MDNode *>::iterator I = MD.begin(), IE = MD.end();
I != IE; ++I) {
- MDNode *Dummy = MDNode::getTemporary(CalledFunc->getContext(), None);
- DummyNodes.push_back(Dummy);
- MDMap[*I].reset(Dummy);
+ DummyNodes.push_back(MDTuple::getTemporary(CalledFunc->getContext(), None));
+ MDMap[*I].reset(DummyNodes.back().get());
}
// Create new metadata nodes to replace the dummy nodes, replacing old
@@ -343,7 +342,8 @@ static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) {
}
MDNode *NewM = MDNode::get(CalledFunc->getContext(), NewOps);
- MDNodeFwdDecl *TempM = cast<MDNodeFwdDecl>(MDMap[*I]);
+ MDTuple *TempM = cast<MDTuple>(MDMap[*I]);
+ assert(TempM->isTemporary() && "Expected temporary node");
TempM->replaceAllUsesWith(NewM);
}
@@ -388,18 +388,14 @@ static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) {
NI->setMetadata(LLVMContext::MD_noalias, M);
}
}
-
- // Now that everything has been replaced, delete the dummy nodes.
- for (unsigned i = 0, ie = DummyNodes.size(); i != ie; ++i)
- MDNode::deleteTemporary(DummyNodes[i]);
}
-/// AddAliasScopeMetadata - If the inlined function has noalias arguments, then
-/// add new alias scopes for each noalias argument, tag the mapped noalias
+/// If the inlined function has noalias arguments,
+/// then add new alias scopes for each noalias argument, tag the mapped noalias
/// parameters with noalias metadata specifying the new scope, and tag all
/// non-derived loads, stores and memory intrinsics with the new alias scopes.
static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap,
- const DataLayout *DL, AliasAnalysis *AA) {
+ const DataLayout &DL, AliasAnalysis *AA) {
if (!EnableNoAliasConversion)
return;
@@ -625,8 +621,9 @@ static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap,
/// If the inlined function has non-byval align arguments, then
/// add @llvm.assume-based alignment assumptions to preserve this information.
static void AddAlignmentAssumptions(CallSite CS, InlineFunctionInfo &IFI) {
- if (!PreserveAlignmentAssumptions || !IFI.DL)
+ if (!PreserveAlignmentAssumptions)
return;
+ auto &DL = CS.getCaller()->getParent()->getDataLayout();
// To avoid inserting redundant assumptions, we should check for assumptions
// already in the caller. To do this, we might need a DT of the caller.
@@ -648,20 +645,20 @@ static void AddAlignmentAssumptions(CallSite CS, InlineFunctionInfo &IFI) {
// If we can already prove the asserted alignment in the context of the
// caller, then don't bother inserting the assumption.
Value *Arg = CS.getArgument(I->getArgNo());
- if (getKnownAlignment(Arg, IFI.DL,
+ if (getKnownAlignment(Arg, DL, CS.getInstruction(),
&IFI.ACT->getAssumptionCache(*CalledFunc),
- CS.getInstruction(), &DT) >= Align)
+ &DT) >= Align)
continue;
- IRBuilder<>(CS.getInstruction()).CreateAlignmentAssumption(*IFI.DL, Arg,
- Align);
+ IRBuilder<>(CS.getInstruction())
+ .CreateAlignmentAssumption(DL, Arg, Align);
}
}
}
-/// UpdateCallGraphAfterInlining - Once we have cloned code over from a callee
-/// into the caller, update the specified callgraph to reflect the changes we
-/// made. Note that it's possible that not all code was copied over, so only
+/// Once we have cloned code over from a callee into the caller,
+/// update the specified callgraph to reflect the changes we made.
+/// Note that it's possible that not all code was copied over, so only
/// some edges of the callgraph may remain.
static void UpdateCallGraphAfterInlining(CallSite CS,
Function::iterator FirstNewBlock,
@@ -696,8 +693,15 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
// If the call was inlined, but then constant folded, there is no edge to
// add. Check for this case.
Instruction *NewCall = dyn_cast<Instruction>(VMI->second);
- if (!NewCall) continue;
+ if (!NewCall)
+ continue;
+ // We do not treat intrinsic calls like real function calls because we
+ // expect them to become inline code; do not add an edge for an intrinsic.
+ CallSite CS = CallSite(NewCall);
+ if (CS && CS.getCalledFunction() && CS.getCalledFunction()->isIntrinsic())
+ continue;
+
// Remember that this call site got inlined for the client of
// InlineFunction.
IFI.InlinedCalls.push_back(NewCall);
@@ -729,11 +733,7 @@ static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M,
Type *AggTy = cast<PointerType>(Src->getType())->getElementType();
IRBuilder<> Builder(InsertBlock->begin());
- Value *Size;
- if (IFI.DL == nullptr)
- Size = ConstantExpr::getSizeOf(AggTy);
- else
- Size = Builder.getInt64(IFI.DL->getTypeStoreSize(AggTy));
+ Value *Size = Builder.getInt64(M->getDataLayout().getTypeStoreSize(AggTy));
// Always generate a memcpy of alignment 1 here because we don't know
// the alignment of the src pointer. Other optimizations can infer
@@ -741,7 +741,7 @@ static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M,
Builder.CreateMemCpy(Dst, Src, Size, /*Align=*/1);
}
-/// HandleByValArgument - When inlining a call site that has a byval argument,
+/// When inlining a call site that has a byval argument,
/// we have to make the implicit memcpy explicit by adding it.
static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
const Function *CalledFunc,
@@ -762,11 +762,13 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
if (ByValAlignment <= 1) // 0 = unspecified, 1 = no particular alignment.
return Arg;
+ const DataLayout &DL = Caller->getParent()->getDataLayout();
+
// If the pointer is already known to be sufficiently aligned, or if we can
// round it up to a larger alignment, then we don't need a temporary.
- if (getOrEnforceKnownAlignment(Arg, ByValAlignment, IFI.DL,
- &IFI.ACT->getAssumptionCache(*Caller),
- TheCall) >= ByValAlignment)
+ if (getOrEnforceKnownAlignment(Arg, ByValAlignment, DL, TheCall,
+ &IFI.ACT->getAssumptionCache(*Caller)) >=
+ ByValAlignment)
return Arg;
// Otherwise, we have to make a memcpy to get a safe alignment. This is bad
@@ -774,10 +776,9 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
}
// Create the alloca. If we have DataLayout, use nice alignment.
- unsigned Align = 1;
- if (IFI.DL)
- Align = IFI.DL->getPrefTypeAlignment(AggTy);
-
+ unsigned Align =
+ Caller->getParent()->getDataLayout().getPrefTypeAlignment(AggTy);
+
// If the byval had an alignment specified, we *must* use at least that
// alignment, as it is required by the byval argument (and uses of the
// pointer inside the callee).
@@ -792,8 +793,7 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
return NewAlloca;
}
-// isUsedByLifetimeMarker - Check whether this Value is used by a lifetime
-// intrinsic.
+// Check whether this Value is used by a lifetime intrinsic.
static bool isUsedByLifetimeMarker(Value *V) {
for (User *U : V->users()) {
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
@@ -808,7 +808,7 @@ static bool isUsedByLifetimeMarker(Value *V) {
return false;
}
-// hasLifetimeMarkers - Check whether the given alloca already has
+// Check whether the given alloca already has
// lifetime.start or lifetime.end intrinsics.
static bool hasLifetimeMarkers(AllocaInst *AI) {
Type *Ty = AI->getType();
@@ -827,35 +827,69 @@ static bool hasLifetimeMarkers(AllocaInst *AI) {
return false;
}
-/// updateInlinedAtInfo - Helper function used by fixupLineNumbers to
-/// recursively update InlinedAtEntry of a DebugLoc.
-static DebugLoc updateInlinedAtInfo(const DebugLoc &DL,
- const DebugLoc &InlinedAtDL,
- LLVMContext &Ctx) {
- if (MDNode *IA = DL.getInlinedAt(Ctx)) {
- DebugLoc NewInlinedAtDL
- = updateInlinedAtInfo(DebugLoc::getFromDILocation(IA), InlinedAtDL, Ctx);
- return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx),
- NewInlinedAtDL.getAsMDNode(Ctx));
+/// Rebuild the entire inlined-at chain for this instruction so that the top of
+/// the chain now is inlined-at the new call site.
+static DebugLoc
+updateInlinedAtInfo(DebugLoc DL, DILocation *InlinedAtNode, LLVMContext &Ctx,
+ DenseMap<const DILocation *, DILocation *> &IANodes) {
+ SmallVector<DILocation *, 3> InlinedAtLocations;
+ DILocation *Last = InlinedAtNode;
+ DILocation *CurInlinedAt = DL;
+
+ // Gather all the inlined-at nodes
+ while (DILocation *IA = CurInlinedAt->getInlinedAt()) {
+ // Skip any we've already built nodes for
+ if (DILocation *Found = IANodes[IA]) {
+ Last = Found;
+ break;
+ }
+
+ InlinedAtLocations.push_back(IA);
+ CurInlinedAt = IA;
}
- return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx),
- InlinedAtDL.getAsMDNode(Ctx));
+ // Starting from the top, rebuild the nodes to point to the new inlined-at
+ // location (then rebuilding the rest of the chain behind it) and update the
+ // map of already-constructed inlined-at nodes.
+ for (auto I = InlinedAtLocations.rbegin(), E = InlinedAtLocations.rend();
+ I != E; ++I) {
+ const DILocation *MD = *I;
+ Last = IANodes[MD] = DILocation::getDistinct(
+ Ctx, MD->getLine(), MD->getColumn(), MD->getScope(), Last);
+ }
+
+ // And finally create the normal location for this instruction, referring to
+ // the new inlined-at chain.
+ return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(), Last);
}
-/// fixupLineNumbers - Update inlined instructions' line numbers to
+/// Update inlined instructions' line numbers to
/// to encode location where these instructions are inlined.
static void fixupLineNumbers(Function *Fn, Function::iterator FI,
Instruction *TheCall) {
DebugLoc TheCallDL = TheCall->getDebugLoc();
- if (TheCallDL.isUnknown())
+ if (!TheCallDL)
return;
+ auto &Ctx = Fn->getContext();
+ DILocation *InlinedAtNode = TheCallDL;
+
+ // Create a unique call site, not to be confused with any other call from the
+ // same location.
+ InlinedAtNode = DILocation::getDistinct(
+ Ctx, InlinedAtNode->getLine(), InlinedAtNode->getColumn(),
+ InlinedAtNode->getScope(), InlinedAtNode->getInlinedAt());
+
+ // Cache the inlined-at nodes as they're built so they are reused, without
+ // this every instruction's inlined-at chain would become distinct from each
+ // other.
+ DenseMap<const DILocation *, DILocation *> IANodes;
+
for (; FI != Fn->end(); ++FI) {
for (BasicBlock::iterator BI = FI->begin(), BE = FI->end();
BI != BE; ++BI) {
DebugLoc DL = BI->getDebugLoc();
- if (DL.isUnknown()) {
+ if (!DL) {
// If the inlined instruction has no line number, make it look as if it
// originates from the call location. This is important for
// ((__always_inline__, __nodebug__)) functions which must use caller
@@ -868,29 +902,15 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
BI->setDebugLoc(TheCallDL);
} else {
- BI->setDebugLoc(updateInlinedAtInfo(DL, TheCallDL, BI->getContext()));
- if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(BI)) {
- LLVMContext &Ctx = BI->getContext();
- MDNode *InlinedAt = BI->getDebugLoc().getInlinedAt(Ctx);
- DVI->setOperand(2, MetadataAsValue::get(
- Ctx, createInlinedVariable(DVI->getVariable(),
- InlinedAt, Ctx)));
- } else if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI)) {
- LLVMContext &Ctx = BI->getContext();
- MDNode *InlinedAt = BI->getDebugLoc().getInlinedAt(Ctx);
- DDI->setOperand(1, MetadataAsValue::get(
- Ctx, createInlinedVariable(DDI->getVariable(),
- InlinedAt, Ctx)));
- }
+ BI->setDebugLoc(updateInlinedAtInfo(DL, InlinedAtNode, BI->getContext(), IANodes));
}
}
}
}
-/// InlineFunction - This function inlines the called function into the basic
-/// block of the caller. This returns false if it is not possible to inline
-/// this call. The program is still in a well defined state if this occurs
-/// though.
+/// This function inlines the called function into the basic block of the
+/// caller. This returns false if it is not possible to inline this call.
+/// The program is still in a well defined state if this occurs though.
///
/// Note that this only does one level of inlining. For example, if the
/// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now
@@ -975,6 +995,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// Keep a list of pair (dst, src) to emit byval initializations.
SmallVector<std::pair<Value*, Value*>, 4> ByValInit;
+ auto &DL = Caller->getParent()->getDataLayout();
+
assert(CalledFunc->arg_size() == CS.arg_size() &&
"No varargs calls can be inlined!");
@@ -1009,9 +1031,9 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// have no dead or constant instructions leftover after inlining occurs
// (which can happen, e.g., because an argument was constant), but we'll be
// happy with whatever the cloner can do.
- CloneAndPruneFunctionInto(Caller, CalledFunc, VMap,
+ CloneAndPruneFunctionInto(Caller, CalledFunc, VMap,
/*ModuleLevelChanges=*/false, Returns, ".i",
- &InlinedFunctionInfo, IFI.DL, TheCall);
+ &InlinedFunctionInfo, TheCall);
// Remember the first block that is newly cloned over.
FirstNewBlock = LastBlock; ++FirstNewBlock;
@@ -1032,7 +1054,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
CloneAliasScopeMetadata(CS, VMap);
// Add noalias metadata if necessary.
- AddAliasScopeMetadata(CS, VMap, IFI.DL, IFI.AA);
+ AddAliasScopeMetadata(CS, VMap, DL, IFI.AA);
// FIXME: We could register any cloned assumptions instead of clearing the
// whole function's cache.
@@ -1079,6 +1101,10 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
FirstNewBlock->getInstList(),
AI, I);
}
+ // Move any dbg.declares describing the allocas into the entry basic block.
+ DIBuilder DIB(*Caller->getParent());
+ for (auto &AI : IFI.StaticAllocas)
+ replaceDbgDeclareForAlloca(AI, AI, DIB, /*Deref=*/false);
}
bool InlinedMustTailCalls = false;
@@ -1136,18 +1162,21 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
ConstantInt *AllocaSize = nullptr;
if (ConstantInt *AIArraySize =
dyn_cast<ConstantInt>(AI->getArraySize())) {
- if (IFI.DL) {
- Type *AllocaType = AI->getAllocatedType();
- uint64_t AllocaTypeSize = IFI.DL->getTypeAllocSize(AllocaType);
- uint64_t AllocaArraySize = AIArraySize->getLimitedValue();
- assert(AllocaArraySize > 0 && "array size of AllocaInst is zero");
- // Check that array size doesn't saturate uint64_t and doesn't
- // overflow when it's multiplied by type size.
- if (AllocaArraySize != ~0ULL &&
- UINT64_MAX / AllocaArraySize >= AllocaTypeSize) {
- AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()),
- AllocaArraySize * AllocaTypeSize);
- }
+ auto &DL = Caller->getParent()->getDataLayout();
+ Type *AllocaType = AI->getAllocatedType();
+ uint64_t AllocaTypeSize = DL.getTypeAllocSize(AllocaType);
+ uint64_t AllocaArraySize = AIArraySize->getLimitedValue();
+
+ // Don't add markers for zero-sized allocas.
+ if (AllocaArraySize == 0)
+ continue;
+
+ // Check that array size doesn't saturate uint64_t and doesn't
+ // overflow when it's multiplied by type size.
+ if (AllocaArraySize != ~0ULL &&
+ UINT64_MAX / AllocaArraySize >= AllocaTypeSize) {
+ AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()),
+ AllocaArraySize * AllocaTypeSize);
}
}
@@ -1173,7 +1202,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// Insert the llvm.stacksave.
CallInst *SavedPtr = IRBuilder<>(FirstNewBlock, FirstNewBlock->begin())
- .CreateCall(StackSave, "savedstack");
+ .CreateCall(StackSave, {}, "savedstack");
// Insert a call to llvm.stackrestore before any return instructions in the
// inlined function.
@@ -1408,7 +1437,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// the entries are the same or undef). If so, remove the PHI so it doesn't
// block other optimizations.
if (PHI) {
- if (Value *V = SimplifyInstruction(PHI, IFI.DL, nullptr, nullptr,
+ auto &DL = Caller->getParent()->getDataLayout();
+ if (Value *V = SimplifyInstruction(PHI, DL, nullptr, nullptr,
&IFI.ACT->getAssumptionCache(*Caller))) {
PHI->replaceAllUsesWith(V);
PHI->eraseFromParent();
diff --git a/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp b/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp
index 0ae746c..30edf3b 100644
--- a/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/IntegerDivision.cpp
@@ -252,8 +252,8 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor,
Value *Ret0_1 = Builder.CreateICmpEQ(Divisor, Zero);
Value *Ret0_2 = Builder.CreateICmpEQ(Dividend, Zero);
Value *Ret0_3 = Builder.CreateOr(Ret0_1, Ret0_2);
- Value *Tmp0 = Builder.CreateCall2(CTLZ, Divisor, True);
- Value *Tmp1 = Builder.CreateCall2(CTLZ, Dividend, True);
+ Value *Tmp0 = Builder.CreateCall(CTLZ, {Divisor, True});
+ Value *Tmp1 = Builder.CreateCall(CTLZ, {Dividend, True});
Value *SR = Builder.CreateSub(Tmp0, Tmp1);
Value *Ret0_4 = Builder.CreateICmpUGT(SR, MSB);
Value *Ret0 = Builder.CreateOr(Ret0_3, Ret0_4);
diff --git a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
index 3f9b702..9d40b69 100644
--- a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
@@ -112,17 +112,17 @@ static bool processInstruction(Loop &L, Instruction &Inst, DominatorTree &DT,
if (SSAUpdate.HasValueForBlock(ExitBB))
continue;
- PHINode *PN = PHINode::Create(Inst.getType(), PredCache.GetNumPreds(ExitBB),
+ PHINode *PN = PHINode::Create(Inst.getType(), PredCache.size(ExitBB),
Inst.getName() + ".lcssa", ExitBB->begin());
// Add inputs from inside the loop for this PHI.
- for (BasicBlock **PI = PredCache.GetPreds(ExitBB); *PI; ++PI) {
- PN->addIncoming(&Inst, *PI);
+ for (BasicBlock *Pred : PredCache.get(ExitBB)) {
+ PN->addIncoming(&Inst, Pred);
// If the exit block has a predecessor not within the loop, arrange for
// the incoming value use corresponding to that predecessor to be
// rewritten in terms of a different LCSSA PHI.
- if (!L.contains(*PI))
+ if (!L.contains(Pred))
UsesToRewrite.push_back(
&PN->getOperandUse(PN->getOperandNumForIncomingValue(
PN->getNumIncomingValues() - 1)));
@@ -294,21 +294,18 @@ struct LCSSA : public FunctionPass {
AU.setPreservesCFG();
AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<LoopInfo>();
+ AU.addRequired<LoopInfoWrapperPass>();
AU.addPreservedID(LoopSimplifyID);
AU.addPreserved<AliasAnalysis>();
AU.addPreserved<ScalarEvolution>();
}
-
-private:
- void verifyAnalysis() const override;
};
}
char LCSSA::ID = 0;
INITIALIZE_PASS_BEGIN(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_END(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false)
Pass *llvm::createLCSSAPass() { return new LCSSA(); }
@@ -318,7 +315,7 @@ char &llvm::LCSSAID = LCSSA::ID;
/// Process all loops in the function, inner-most out.
bool LCSSA::runOnFunction(Function &F) {
bool Changed = false;
- LI = &getAnalysis<LoopInfo>();
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
SE = getAnalysisIfAvailable<ScalarEvolution>();
@@ -329,18 +326,3 @@ bool LCSSA::runOnFunction(Function &F) {
return Changed;
}
-static void verifyLoop(Loop &L, DominatorTree &DT) {
- // Recurse depth-first through inner loops.
- for (Loop::iterator LI = L.begin(), LE = L.end(); LI != LE; ++LI)
- verifyLoop(**LI, DT);
-
- // Check the special guarantees that LCSSA makes.
- //assert(L.isLCSSAForm(DT) && "LCSSA form not preserved!");
-}
-
-void LCSSA::verifyAnalysis() const {
- // Verify each loop nest in the function, assuming LI still points at that
- // function's loop info.
- for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
- verifyLoop(**I, *DT);
-}
diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp
index 2a84d7e..70c77b0 100644
--- a/contrib/llvm/lib/Transforms/Utils/Local.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LibCallSemantics.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CFG.h"
@@ -110,11 +111,17 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
}
if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) {
- // If we are switching on a constant, we can convert the switch into a
- // single branch instruction!
+ // If we are switching on a constant, we can convert the switch to an
+ // unconditional branch.
ConstantInt *CI = dyn_cast<ConstantInt>(SI->getCondition());
- BasicBlock *TheOnlyDest = SI->getDefaultDest();
- BasicBlock *DefaultDest = TheOnlyDest;
+ BasicBlock *DefaultDest = SI->getDefaultDest();
+ BasicBlock *TheOnlyDest = DefaultDest;
+
+ // If the default is unreachable, ignore it when searching for TheOnlyDest.
+ if (isa<UnreachableInst>(DefaultDest->getFirstNonPHIOrDbg()) &&
+ SI->getNumCases() > 0) {
+ TheOnlyDest = SI->case_begin().getCaseSuccessor();
+ }
// Figure out which case it goes to.
for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
@@ -410,7 +417,7 @@ bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN,
///
/// This returns true if it changed the code, note that it can delete
/// instructions in other blocks as well in this block.
-bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const DataLayout *TD,
+bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB,
const TargetLibraryInfo *TLI) {
bool MadeChange = false;
@@ -427,7 +434,7 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const DataLayout *TD,
Instruction *Inst = BI++;
WeakVH BIHandle(BI);
- if (recursivelySimplifyInstruction(Inst, TD, TLI)) {
+ if (recursivelySimplifyInstruction(Inst, TLI)) {
MadeChange = true;
if (BIHandle != BI)
BI = BB->begin();
@@ -457,8 +464,7 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const DataLayout *TD,
///
/// .. and delete the predecessor corresponding to the '1', this will attempt to
/// recursively fold the and to 0.
-void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred,
- DataLayout *TD) {
+void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred) {
// This only adjusts blocks with PHI nodes.
if (!isa<PHINode>(BB->begin()))
return;
@@ -473,7 +479,7 @@ void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred,
PhiIt = &*++BasicBlock::iterator(cast<Instruction>(PhiIt));
Value *OldPhiIt = PhiIt;
- if (!recursivelySimplifyInstruction(PN, TD))
+ if (!recursivelySimplifyInstruction(PN))
continue;
// If recursive simplification ended up deleting the next PHI node we would
@@ -489,7 +495,7 @@ void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred,
/// between them, moving the instructions in the predecessor into DestBB and
/// deleting the predecessor block.
///
-void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) {
+void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, DominatorTree *DT) {
// If BB has single-entry PHI nodes, fold them.
while (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) {
Value *NewVal = PN->getIncomingValue(0);
@@ -525,14 +531,10 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) {
if (PredBB == &DestBB->getParent()->getEntryBlock())
DestBB->moveAfter(PredBB);
- if (P) {
- if (DominatorTreeWrapperPass *DTWP =
- P->getAnalysisIfAvailable<DominatorTreeWrapperPass>()) {
- DominatorTree &DT = DTWP->getDomTree();
- BasicBlock *PredBBIDom = DT.getNode(PredBB)->getIDom()->getBlock();
- DT.changeImmediateDominator(DestBB, PredBBIDom);
- DT.eraseNode(PredBB);
- }
+ if (DT) {
+ BasicBlock *PredBBIDom = DT->getNode(PredBB)->getIDom()->getBlock();
+ DT->changeImmediateDominator(DestBB, PredBBIDom);
+ DT->eraseNode(PredBB);
}
// Nuke BB.
PredBB->eraseFromParent();
@@ -897,13 +899,14 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
/// their preferred alignment from the beginning.
///
static unsigned enforceKnownAlignment(Value *V, unsigned Align,
- unsigned PrefAlign, const DataLayout *TD) {
+ unsigned PrefAlign,
+ const DataLayout &DL) {
V = V->stripPointerCasts();
if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
// If the preferred alignment is greater than the natural stack alignment
// then don't round up. This avoids dynamic stack realignment.
- if (TD && TD->exceedsNaturalStackAlignment(PrefAlign))
+ if (DL.exceedsNaturalStackAlignment(PrefAlign))
return Align;
// If there is a requested alignment and if this is an alloca, round up.
if (AI->getAlignment() >= PrefAlign)
@@ -942,13 +945,13 @@ static unsigned enforceKnownAlignment(Value *V, unsigned Align,
/// and it is more than the alignment of the ultimate object, see if we can
/// increase the alignment of the ultimate object, making this check succeed.
unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
- const DataLayout *DL,
- AssumptionCache *AC,
+ const DataLayout &DL,
const Instruction *CxtI,
+ AssumptionCache *AC,
const DominatorTree *DT) {
assert(V->getType()->isPointerTy() &&
"getOrEnforceKnownAlignment expects a pointer!");
- unsigned BitWidth = DL ? DL->getPointerTypeSizeInBits(V->getType()) : 64;
+ unsigned BitWidth = DL.getPointerTypeSizeInBits(V->getType());
APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
computeKnownBits(V, KnownZero, KnownOne, DL, 0, AC, CxtI, DT);
@@ -975,7 +978,7 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
///
/// See if there is a dbg.value intrinsic for DIVar before I.
-static bool LdStHasDebugValue(DIVariable &DIVar, Instruction *I) {
+static bool LdStHasDebugValue(const DILocalVariable *DIVar, Instruction *I) {
// Since we can't guarantee that the original dbg.declare instrinsic
// is removed by LowerDbgDeclare(), we need to make sure that we are
// not inserting the same dbg.value intrinsic over and over.
@@ -995,17 +998,13 @@ static bool LdStHasDebugValue(DIVariable &DIVar, Instruction *I) {
/// that has an associated llvm.dbg.decl intrinsic.
bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
StoreInst *SI, DIBuilder &Builder) {
- DIVariable DIVar(DDI->getVariable());
- DIExpression DIExpr(DDI->getExpression());
- assert((!DIVar || DIVar.isVariable()) &&
- "Variable in DbgDeclareInst should be either null or a DIVariable.");
- if (!DIVar)
- return false;
+ auto *DIVar = DDI->getVariable();
+ auto *DIExpr = DDI->getExpression();
+ assert(DIVar && "Missing variable");
if (LdStHasDebugValue(DIVar, SI))
return true;
- Instruction *DbgVal = nullptr;
// If an argument is zero extended then use argument directly. The ZExt
// may be zapped by an optimization pass in future.
Argument *ExtendedArg = nullptr;
@@ -1014,11 +1013,11 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
ExtendedArg = dyn_cast<Argument>(SExt->getOperand(0));
if (ExtendedArg)
- DbgVal = Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar, DIExpr, SI);
+ Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar, DIExpr,
+ DDI->getDebugLoc(), SI);
else
- DbgVal = Builder.insertDbgValueIntrinsic(SI->getOperand(0), 0, DIVar,
- DIExpr, SI);
- DbgVal->setDebugLoc(DDI->getDebugLoc());
+ Builder.insertDbgValueIntrinsic(SI->getOperand(0), 0, DIVar, DIExpr,
+ DDI->getDebugLoc(), SI);
return true;
}
@@ -1026,19 +1025,15 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
/// that has an associated llvm.dbg.decl intrinsic.
bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
LoadInst *LI, DIBuilder &Builder) {
- DIVariable DIVar(DDI->getVariable());
- DIExpression DIExpr(DDI->getExpression());
- assert((!DIVar || DIVar.isVariable()) &&
- "Variable in DbgDeclareInst should be either null or a DIVariable.");
- if (!DIVar)
- return false;
+ auto *DIVar = DDI->getVariable();
+ auto *DIExpr = DDI->getExpression();
+ assert(DIVar && "Missing variable");
if (LdStHasDebugValue(DIVar, LI))
return true;
- Instruction *DbgVal =
- Builder.insertDbgValueIntrinsic(LI->getOperand(0), 0, DIVar, DIExpr, LI);
- DbgVal->setDebugLoc(DDI->getDebugLoc());
+ Builder.insertDbgValueIntrinsic(LI->getOperand(0), 0, DIVar, DIExpr,
+ DDI->getDebugLoc(), LI);
return true;
}
@@ -1080,10 +1075,9 @@ bool llvm::LowerDbgDeclare(Function &F) {
// This is a call by-value or some other instruction that
// takes a pointer to the variable. Insert a *value*
// intrinsic that describes the alloca.
- auto DbgVal = DIB.insertDbgValueIntrinsic(
- AI, 0, DIVariable(DDI->getVariable()),
- DIExpression(DDI->getExpression()), CI);
- DbgVal->setDebugLoc(DDI->getDebugLoc());
+ DIB.insertDbgValueIntrinsic(AI, 0, DDI->getVariable(),
+ DDI->getExpression(), DDI->getDebugLoc(),
+ CI);
}
DDI->eraseFromParent();
}
@@ -1104,32 +1098,31 @@ DbgDeclareInst *llvm::FindAllocaDbgDeclare(Value *V) {
}
bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
- DIBuilder &Builder) {
+ DIBuilder &Builder, bool Deref) {
DbgDeclareInst *DDI = FindAllocaDbgDeclare(AI);
if (!DDI)
return false;
- DIVariable DIVar(DDI->getVariable());
- DIExpression DIExpr(DDI->getExpression());
- assert((!DIVar || DIVar.isVariable()) &&
- "Variable in DbgDeclareInst should be either null or a DIVariable.");
- if (!DIVar)
- return false;
-
- // Create a copy of the original DIDescriptor for user variable, prepending
- // "deref" operation to a list of address elements, as new llvm.dbg.declare
- // will take a value storing address of the memory for variable, not
- // alloca itself.
- SmallVector<int64_t, 4> NewDIExpr;
- NewDIExpr.push_back(dwarf::DW_OP_deref);
- if (DIExpr)
- for (unsigned i = 0, n = DIExpr.getNumElements(); i < n; ++i)
- NewDIExpr.push_back(DIExpr.getElement(i));
+ DebugLoc Loc = DDI->getDebugLoc();
+ auto *DIVar = DDI->getVariable();
+ auto *DIExpr = DDI->getExpression();
+ assert(DIVar && "Missing variable");
+
+ if (Deref) {
+ // Create a copy of the original DIDescriptor for user variable, prepending
+ // "deref" operation to a list of address elements, as new llvm.dbg.declare
+ // will take a value storing address of the memory for variable, not
+ // alloca itself.
+ SmallVector<uint64_t, 4> NewDIExpr;
+ NewDIExpr.push_back(dwarf::DW_OP_deref);
+ if (DIExpr)
+ NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end());
+ DIExpr = Builder.createExpression(NewDIExpr);
+ }
// Insert llvm.dbg.declare in the same basic block as the original alloca,
// and remove old llvm.dbg.declare.
BasicBlock *BB = AI->getParent();
- Builder.insertDeclare(NewAllocaAddress, DIVar,
- Builder.createExpression(NewDIExpr), BB);
+ Builder.insertDeclare(NewAllocaAddress, DIVar, DIExpr, Loc, BB);
DDI->eraseFromParent();
return true;
}
@@ -1254,7 +1247,7 @@ static bool markAliveBlocks(BasicBlock *BB,
if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
changeToUnreachable(II, true);
Changed = true;
- } else if (II->doesNotThrow()) {
+ } else if (II->doesNotThrow() && canSimplifyInvokeNoUnwind(II)) {
if (II->use_empty() && II->onlyReadsMemory()) {
// jump to the normal destination branch.
BranchInst::Create(II->getNormalDest(), II);
@@ -1350,3 +1343,23 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J, ArrayRef<unsign
}
}
}
+
+unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To,
+ DominatorTree &DT,
+ const BasicBlockEdge &Root) {
+ assert(From->getType() == To->getType());
+
+ unsigned Count = 0;
+ for (Value::use_iterator UI = From->use_begin(), UE = From->use_end();
+ UI != UE; ) {
+ Use &U = *UI++;
+ if (DT.dominates(Root, U)) {
+ U.set(To);
+ DEBUG(dbgs() << "Replace dominated use of '"
+ << From->getName() << "' as "
+ << *To << " in " << *U << "\n");
+ ++Count;
+ }
+ }
+ return Count;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
index c832a4b..90dfaba 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
@@ -57,8 +57,10 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
@@ -113,6 +115,14 @@ static void placeSplitBlockCarefully(BasicBlock *NewBB,
BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, Pass *PP) {
BasicBlock *Header = L->getHeader();
+ // Get analyses that we try to update.
+ auto *AA = PP->getAnalysisIfAvailable<AliasAnalysis>();
+ auto *DTWP = PP->getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
+ auto *LIWP = PP->getAnalysisIfAvailable<LoopInfoWrapperPass>();
+ auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
+ bool PreserveLCSSA = PP->mustPreserveAnalysisID(LCSSAID);
+
// Compute the set of predecessors of the loop that are not in the loop.
SmallVector<BasicBlock*, 8> OutsideBlocks;
for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header);
@@ -131,15 +141,8 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, Pass *PP) {
// Split out the loop pre-header.
BasicBlock *PreheaderBB;
- if (!Header->isLandingPad()) {
- PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader",
- PP);
- } else {
- SmallVector<BasicBlock*, 2> NewBBs;
- SplitLandingPadPredecessors(Header, OutsideBlocks, ".preheader",
- ".split-lp", PP, NewBBs);
- PreheaderBB = NewBBs[0];
- }
+ PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader",
+ AA, DT, LI, PreserveLCSSA);
PreheaderBB->getTerminator()->setDebugLoc(
Header->getFirstNonPHI()->getDebugLoc());
@@ -157,7 +160,9 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, Pass *PP) {
///
/// This method is used to split exit blocks that have predecessors outside of
/// the loop.
-static BasicBlock *rewriteLoopExitBlock(Loop *L, BasicBlock *Exit, Pass *PP) {
+static BasicBlock *rewriteLoopExitBlock(Loop *L, BasicBlock *Exit,
+ AliasAnalysis *AA, DominatorTree *DT,
+ LoopInfo *LI, Pass *PP) {
SmallVector<BasicBlock*, 8> LoopBlocks;
for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) {
BasicBlock *P = *I;
@@ -172,15 +177,10 @@ static BasicBlock *rewriteLoopExitBlock(Loop *L, BasicBlock *Exit, Pass *PP) {
assert(!LoopBlocks.empty() && "No edges coming in from outside the loop?");
BasicBlock *NewExitBB = nullptr;
- if (Exit->isLandingPad()) {
- SmallVector<BasicBlock*, 2> NewBBs;
- SplitLandingPadPredecessors(Exit, LoopBlocks,
- ".loopexit", ".nonloopexit",
- PP, NewBBs);
- NewExitBB = NewBBs[0];
- } else {
- NewExitBB = SplitBlockPredecessors(Exit, LoopBlocks, ".loopexit", PP);
- }
+ bool PreserveLCSSA = PP->mustPreserveAnalysisID(LCSSAID);
+
+ NewExitBB = SplitBlockPredecessors(Exit, LoopBlocks, ".loopexit", AA, DT,
+ LI, PreserveLCSSA);
DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "
<< NewExitBB->getName() << "\n");
@@ -211,10 +211,11 @@ static void addBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock,
static PHINode *findPHIToPartitionLoops(Loop *L, AliasAnalysis *AA,
DominatorTree *DT,
AssumptionCache *AC) {
+ const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) {
PHINode *PN = cast<PHINode>(I);
++I;
- if (Value *V = SimplifyInstruction(PN, nullptr, nullptr, DT, AC)) {
+ if (Value *V = SimplifyInstruction(PN, DL, nullptr, DT, AC)) {
// This is a degenerate PHI already, don't modify it!
PN->replaceAllUsesWith(V);
if (AA) AA->deleteValue(PN);
@@ -287,9 +288,11 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
if (SE)
SE->forgetLoop(L);
+ bool PreserveLCSSA = PP->mustPreserveAnalysisID(LCSSAID);
+
BasicBlock *Header = L->getHeader();
- BasicBlock *NewBB =
- SplitBlockPredecessors(Header, OuterLoopPreds, ".outer", PP);
+ BasicBlock *NewBB = SplitBlockPredecessors(Header, OuterLoopPreds, ".outer",
+ AA, DT, LI, PreserveLCSSA);
// Make sure that NewBB is put someplace intelligent, which doesn't mess up
// code layout too horribly.
@@ -460,7 +463,7 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
// Update Loop Information - we know that this block is now in the current
// loop and all parent loops.
- L->addBasicBlockToLoop(BEBlock, LI->getBase());
+ L->addBasicBlockToLoop(BEBlock, *LI);
// Update dominator information
DT->splitBlock(BEBlock);
@@ -476,7 +479,7 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
/// explicit if they accepted the analysis directly and then updated it.
static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist,
AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI,
- ScalarEvolution *SE, Pass *PP, const DataLayout *DL,
+ ScalarEvolution *SE, Pass *PP,
AssumptionCache *AC) {
bool Changed = false;
ReprocessLoop:
@@ -567,7 +570,7 @@ ReprocessLoop:
// Must be exactly this loop: no subloops, parent loops, or non-loop preds
// allowed.
if (!L->contains(*PI)) {
- if (rewriteLoopExitBlock(L, ExitBlock, PP)) {
+ if (rewriteLoopExitBlock(L, ExitBlock, AA, DT, LI, PP)) {
++NumInserted;
Changed = true;
}
@@ -608,13 +611,15 @@ ReprocessLoop:
}
}
+ const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
+
// Scan over the PHI nodes in the loop header. Since they now have only two
// incoming values (the loop is canonicalized), we may have simplified the PHI
// down to 'X = phi [X, Y]', which should be replaced with 'Y'.
PHINode *PN;
for (BasicBlock::iterator I = L->getHeader()->begin();
(PN = dyn_cast<PHINode>(I++)); )
- if (Value *V = SimplifyInstruction(PN, nullptr, nullptr, DT, AC)) {
+ if (Value *V = SimplifyInstruction(PN, DL, nullptr, DT, AC)) {
if (AA) AA->deleteValue(PN);
if (SE) SE->forgetValue(PN);
PN->replaceAllUsesWith(V);
@@ -676,7 +681,8 @@ ReprocessLoop:
// The block has now been cleared of all instructions except for
// a comparison and a conditional branch. SimplifyCFG may be able
// to fold it now.
- if (!FoldBranchToCommonDest(BI, DL)) continue;
+ if (!FoldBranchToCommonDest(BI))
+ continue;
// Success. The block is now dead, so remove it from the loop,
// update the dominator tree and delete it.
@@ -714,7 +720,7 @@ ReprocessLoop:
bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, Pass *PP,
AliasAnalysis *AA, ScalarEvolution *SE,
- const DataLayout *DL, AssumptionCache *AC) {
+ AssumptionCache *AC) {
bool Changed = false;
// Worklist maintains our depth-first queue of loops in this nest to process.
@@ -726,13 +732,12 @@ bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, Pass *PP,
// order. We can use this simple process because loops form a tree.
for (unsigned Idx = 0; Idx != Worklist.size(); ++Idx) {
Loop *L2 = Worklist[Idx];
- for (Loop::iterator I = L2->begin(), E = L2->end(); I != E; ++I)
- Worklist.push_back(*I);
+ Worklist.append(L2->begin(), L2->end());
}
while (!Worklist.empty())
Changed |= simplifyOneLoop(Worklist.pop_back_val(), Worklist, AA, DT, LI,
- SE, PP, DL, AC);
+ SE, PP, AC);
return Changed;
}
@@ -750,7 +755,6 @@ namespace {
DominatorTree *DT;
LoopInfo *LI;
ScalarEvolution *SE;
- const DataLayout *DL;
AssumptionCache *AC;
bool runOnFunction(Function &F) override;
@@ -762,8 +766,8 @@ namespace {
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addRequired<LoopInfo>();
- AU.addPreserved<LoopInfo>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
AU.addPreserved<AliasAnalysis>();
AU.addPreserved<ScalarEvolution>();
@@ -781,7 +785,7 @@ INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify",
"Canonicalize natural loops", false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_END(LoopSimplify, "loop-simplify",
"Canonicalize natural loops", false, false)
@@ -795,16 +799,14 @@ Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); }
bool LoopSimplify::runOnFunction(Function &F) {
bool Changed = false;
AA = getAnalysisIfAvailable<AliasAnalysis>();
- LI = &getAnalysis<LoopInfo>();
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
SE = getAnalysisIfAvailable<ScalarEvolution>();
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
// Simplify each loop nest in the function.
for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
- Changed |= simplifyLoop(*I, DT, LI, this, AA, SE, DL, AC);
+ Changed |= simplifyLoop(*I, DT, LI, this, AA, SE, AC);
return Changed;
}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index 5745920..1dbce47 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -26,8 +26,8 @@
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Dominators.h"
#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -146,6 +146,13 @@ FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, LPPassManager *LPM,
/// Similarly, TripMultiple divides the number of times that the LatchBlock may
/// execute without exiting the loop.
///
+/// If AllowRuntime is true then UnrollLoop will consider unrolling loops that
+/// have a runtime (i.e. not compile time constant) trip count. Unrolling these
+/// loops require a unroll "prologue" that runs "RuntimeTripCount % Count"
+/// iterations before branching into the unrolled loop. UnrollLoop will not
+/// runtime-unroll the loop if computing RuntimeTripCount will be expensive and
+/// AllowExpensiveTripCount is false.
+///
/// The LoopInfo Analysis that is passed will be kept consistent.
///
/// If a LoopPassManager is passed in, and the loop is fully removed, it will be
@@ -154,8 +161,9 @@ FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, LPPassManager *LPM,
/// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are
/// available from the Pass it must also preserve those analyses.
bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
- bool AllowRuntime, unsigned TripMultiple, LoopInfo *LI,
- Pass *PP, LPPassManager *LPM, AssumptionCache *AC) {
+ bool AllowRuntime, bool AllowExpensiveTripCount,
+ unsigned TripMultiple, LoopInfo *LI, Pass *PP,
+ LPPassManager *LPM, AssumptionCache *AC) {
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader) {
DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n");
@@ -218,7 +226,8 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
// flag is specified.
bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime);
- if (RuntimeTripCount && !UnrollRuntimeLoopProlog(L, Count, LI, LPM))
+ if (RuntimeTripCount &&
+ !UnrollRuntimeLoopProlog(L, Count, AllowExpensiveTripCount, LI, LPM))
return false;
// Notify ScalarEvolution that the loop will be substantially changed,
@@ -311,7 +320,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
// Tell LI about New.
if (*BB == Header) {
assert(LI->getLoopFor(*BB) == L && "Header should not be in a sub-loop");
- L->addBasicBlockToLoop(New, LI->getBase());
+ L->addBasicBlockToLoop(New, *LI);
} else {
// Figure out which loop New is in.
const Loop *OldLoop = LI->getLoopFor(*BB);
@@ -333,7 +342,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
if (SE)
SE->forgetLoop(OldLoop);
}
- NewLoop->addBasicBlockToLoop(New, LI->getBase());
+ NewLoop->addBasicBlockToLoop(New, *LI);
}
if (*BB == Header)
@@ -500,6 +509,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
// At this point, the code is well formed. We now do a quick sweep over the
// inserted code, doing constant propagation and dead code elimination as we
// go.
+ const DataLayout &DL = Header->getModule()->getDataLayout();
const std::vector<BasicBlock*> &NewLoopBlocks = L->getBlocks();
for (std::vector<BasicBlock*>::const_iterator BB = NewLoopBlocks.begin(),
BBE = NewLoopBlocks.end(); BB != BBE; ++BB)
@@ -508,7 +518,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
if (isInstructionTriviallyDead(Inst))
(*BB)->getInstList().erase(Inst);
- else if (Value *V = SimplifyInstruction(Inst))
+ else if (Value *V = SimplifyInstruction(Inst, DL))
if (LI->replacementPreservesLCSSAForm(Inst, V)) {
Inst->replaceAllUsesWith(V);
(*BB)->getInstList().erase(Inst);
@@ -531,9 +541,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
if (!OuterL && !CompletelyUnroll)
OuterL = L;
if (OuterL) {
- DataLayoutPass *DLP = PP->getAnalysisIfAvailable<DataLayoutPass>();
- const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
- simplifyLoop(OuterL, DT, LI, PP, /*AliasAnalysis*/ nullptr, SE, DL, AC);
+ simplifyLoop(OuterL, DT, LI, PP, /*AliasAnalysis*/ nullptr, SE, AC);
// LCSSA must be performed on the outermost affected loop. The unrolled
// loop's last loop latch is guaranteed to be in the outermost loop after
@@ -549,3 +557,26 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
return true;
}
+
+/// Given an llvm.loop loop id metadata node, returns the loop hint metadata
+/// node with the given name (for example, "llvm.loop.unroll.count"). If no
+/// such metadata node exists, then nullptr is returned.
+MDNode *llvm::GetUnrollMetadata(MDNode *LoopID, StringRef Name) {
+ // First operand should refer to the loop id itself.
+ assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
+ assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
+
+ for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
+ MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
+ if (!MD)
+ continue;
+
+ MDString *S = dyn_cast<MDString>(MD->getOperand(0));
+ if (!S)
+ continue;
+
+ if (Name.equals(S->getString()))
+ return MD;
+ }
+ return nullptr;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index 8a32215..d1774df 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -23,14 +23,18 @@
#include "llvm/Transforms/Utils/UnrollLoop.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include <algorithm>
@@ -58,7 +62,8 @@ STATISTIC(NumRuntimeUnrolled,
static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
BasicBlock *LastPrologBB, BasicBlock *PrologEnd,
BasicBlock *OrigPH, BasicBlock *NewPH,
- ValueToValueMapTy &VMap, Pass *P) {
+ ValueToValueMapTy &VMap, AliasAnalysis *AA,
+ DominatorTree *DT, LoopInfo *LI, Pass *P) {
BasicBlock *Latch = L->getLoopLatch();
assert(Latch && "Loop must have a latch");
@@ -122,13 +127,8 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
assert(Exit && "Loop must have a single exit block only");
// Split the exit to maintain loop canonicalization guarantees
SmallVector<BasicBlock*, 4> Preds(pred_begin(Exit), pred_end(Exit));
- if (!Exit->isLandingPad()) {
- SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", P);
- } else {
- SmallVector<BasicBlock*, 2> NewBBs;
- SplitLandingPadPredecessors(Exit, Preds, ".unr1-lcssa", ".unr2-lcssa",
- P, NewBBs);
- }
+ SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", AA, DT, LI,
+ P->mustPreserveAnalysisID(LCSSAID));
// Add the branch to the exit block (around the unrolled loop)
BranchInst::Create(Exit, NewPH, BrLoopExit, InsertPt);
InsertPt->eraseFromParent();
@@ -167,9 +167,9 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
NewBlocks.push_back(NewBB);
if (NewLoop)
- NewLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+ NewLoop->addBasicBlockToLoop(NewBB, *LI);
else if (ParentLoop)
- ParentLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+ ParentLoop->addBasicBlockToLoop(NewBB, *LI);
VMap[*BB] = NewBB;
if (Header == *BB) {
@@ -278,7 +278,8 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
/// ...
/// End:
///
-bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
+bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
+ bool AllowExpensiveTripCount, LoopInfo *LI,
LPPassManager *LPM) {
// for now, only unroll loops that contain a single exit
if (!L->getExitingBlock())
@@ -312,15 +313,20 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
if (isa<SCEVCouldNotCompute>(TripCountSC))
return false;
+ BasicBlock *Header = L->getHeader();
+ const DataLayout &DL = Header->getModule()->getDataLayout();
+ SCEVExpander Expander(*SE, DL, "loop-unroll");
+ if (!AllowExpensiveTripCount && Expander.isHighCostExpansion(TripCountSC, L))
+ return false;
+
// We only handle cases when the unroll factor is a power of 2.
// Count is the loop unroll factor, the number of extra copies added + 1.
if (!isPowerOf2_32(Count))
return false;
// This constraint lets us deal with an overflowing trip count easily; see the
- // comment on ModVal below. This check is equivalent to `Log2(Count) <
- // BEWidth`.
- if (static_cast<uint64_t>(Count) > (1ULL << BEWidth))
+ // comment on ModVal below.
+ if (Log2_32(Count) > BEWidth)
return false;
// If this loop is nested, then the loop unroller changes the code in
@@ -328,18 +334,20 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
if (Loop *ParentLoop = L->getParentLoop())
SE->forgetLoop(ParentLoop);
+ // Grab analyses that we preserve.
+ auto *DTWP = LPM->getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
+
BasicBlock *PH = L->getLoopPreheader();
- BasicBlock *Header = L->getHeader();
BasicBlock *Latch = L->getLoopLatch();
// It helps to splits the original preheader twice, one for the end of the
// prolog code and one for a new loop preheader
- BasicBlock *PEnd = SplitEdge(PH, Header, LPM->getAsPass());
- BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), LPM->getAsPass());
+ BasicBlock *PEnd = SplitEdge(PH, Header, DT, LI);
+ BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), DT, LI);
BranchInst *PreHeaderBR = cast<BranchInst>(PH->getTerminator());
// Compute the number of extra iterations required, which is:
// extra iterations = run-time trip count % (loop unroll factor + 1)
- SCEVExpander Expander(*SE, "loop-unroll");
Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
PreHeaderBR);
Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(),
@@ -408,7 +416,7 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
// PHI functions.
BasicBlock *LastLoopBB = cast<BasicBlock>(VMap[Latch]);
ConnectProlog(L, BECount, Count, LastLoopBB, PEnd, PH, NewPH, VMap,
- LPM->getAsPass());
+ /*AliasAnalysis*/ nullptr, DT, LI, LPM->getAsPass());
NumRuntimeUnrolled++;
return true;
}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp
new file mode 100644
index 0000000..a5890c0
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -0,0 +1,499 @@
+//===-- LoopUtils.cpp - Loop Utility functions -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines common loop utility functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+#define DEBUG_TYPE "loop-utils"
+
+bool ReductionDescriptor::areAllUsesIn(Instruction *I,
+ SmallPtrSetImpl<Instruction *> &Set) {
+ for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; ++Use)
+ if (!Set.count(dyn_cast<Instruction>(*Use)))
+ return false;
+ return true;
+}
+
+bool ReductionDescriptor::AddReductionVar(PHINode *Phi, ReductionKind Kind,
+ Loop *TheLoop, bool HasFunNoNaNAttr,
+ ReductionDescriptor &RedDes) {
+ if (Phi->getNumIncomingValues() != 2)
+ return false;
+
+ // Reduction variables are only found in the loop header block.
+ if (Phi->getParent() != TheLoop->getHeader())
+ return false;
+
+ // Obtain the reduction start value from the value that comes from the loop
+ // preheader.
+ Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader());
+
+ // ExitInstruction is the single value which is used outside the loop.
+ // We only allow for a single reduction value to be used outside the loop.
+ // This includes users of the reduction, variables (which form a cycle
+ // which ends in the phi node).
+ Instruction *ExitInstruction = nullptr;
+ // Indicates that we found a reduction operation in our scan.
+ bool FoundReduxOp = false;
+
+ // We start with the PHI node and scan for all of the users of this
+ // instruction. All users must be instructions that can be used as reduction
+ // variables (such as ADD). We must have a single out-of-block user. The cycle
+ // must include the original PHI.
+ bool FoundStartPHI = false;
+
+ // To recognize min/max patterns formed by a icmp select sequence, we store
+ // the number of instruction we saw from the recognized min/max pattern,
+ // to make sure we only see exactly the two instructions.
+ unsigned NumCmpSelectPatternInst = 0;
+ ReductionInstDesc ReduxDesc(false, nullptr);
+
+ SmallPtrSet<Instruction *, 8> VisitedInsts;
+ SmallVector<Instruction *, 8> Worklist;
+ Worklist.push_back(Phi);
+ VisitedInsts.insert(Phi);
+
+ // A value in the reduction can be used:
+ // - By the reduction:
+ // - Reduction operation:
+ // - One use of reduction value (safe).
+ // - Multiple use of reduction value (not safe).
+ // - PHI:
+ // - All uses of the PHI must be the reduction (safe).
+ // - Otherwise, not safe.
+ // - By one instruction outside of the loop (safe).
+ // - By further instructions outside of the loop (not safe).
+ // - By an instruction that is not part of the reduction (not safe).
+ // This is either:
+ // * An instruction type other than PHI or the reduction operation.
+ // * A PHI in the header other than the initial PHI.
+ while (!Worklist.empty()) {
+ Instruction *Cur = Worklist.back();
+ Worklist.pop_back();
+
+ // No Users.
+ // If the instruction has no users then this is a broken chain and can't be
+ // a reduction variable.
+ if (Cur->use_empty())
+ return false;
+
+ bool IsAPhi = isa<PHINode>(Cur);
+
+ // A header PHI use other than the original PHI.
+ if (Cur != Phi && IsAPhi && Cur->getParent() == Phi->getParent())
+ return false;
+
+ // Reductions of instructions such as Div, and Sub is only possible if the
+ // LHS is the reduction variable.
+ if (!Cur->isCommutative() && !IsAPhi && !isa<SelectInst>(Cur) &&
+ !isa<ICmpInst>(Cur) && !isa<FCmpInst>(Cur) &&
+ !VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0))))
+ return false;
+
+ // Any reduction instruction must be of one of the allowed kinds.
+ ReduxDesc = isReductionInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr);
+ if (!ReduxDesc.isReduction())
+ return false;
+
+ // A reduction operation must only have one use of the reduction value.
+ if (!IsAPhi && Kind != RK_IntegerMinMax && Kind != RK_FloatMinMax &&
+ hasMultipleUsesOf(Cur, VisitedInsts))
+ return false;
+
+ // All inputs to a PHI node must be a reduction value.
+ if (IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts))
+ return false;
+
+ if (Kind == RK_IntegerMinMax &&
+ (isa<ICmpInst>(Cur) || isa<SelectInst>(Cur)))
+ ++NumCmpSelectPatternInst;
+ if (Kind == RK_FloatMinMax && (isa<FCmpInst>(Cur) || isa<SelectInst>(Cur)))
+ ++NumCmpSelectPatternInst;
+
+ // Check whether we found a reduction operator.
+ FoundReduxOp |= !IsAPhi;
+
+ // Process users of current instruction. Push non-PHI nodes after PHI nodes
+ // onto the stack. This way we are going to have seen all inputs to PHI
+ // nodes once we get to them.
+ SmallVector<Instruction *, 8> NonPHIs;
+ SmallVector<Instruction *, 8> PHIs;
+ for (User *U : Cur->users()) {
+ Instruction *UI = cast<Instruction>(U);
+
+ // Check if we found the exit user.
+ BasicBlock *Parent = UI->getParent();
+ if (!TheLoop->contains(Parent)) {
+ // Exit if you find multiple outside users or if the header phi node is
+ // being used. In this case the user uses the value of the previous
+ // iteration, in which case we would loose "VF-1" iterations of the
+ // reduction operation if we vectorize.
+ if (ExitInstruction != nullptr || Cur == Phi)
+ return false;
+
+ // The instruction used by an outside user must be the last instruction
+ // before we feed back to the reduction phi. Otherwise, we loose VF-1
+ // operations on the value.
+ if (std::find(Phi->op_begin(), Phi->op_end(), Cur) == Phi->op_end())
+ return false;
+
+ ExitInstruction = Cur;
+ continue;
+ }
+
+ // Process instructions only once (termination). Each reduction cycle
+ // value must only be used once, except by phi nodes and min/max
+ // reductions which are represented as a cmp followed by a select.
+ ReductionInstDesc IgnoredVal(false, nullptr);
+ if (VisitedInsts.insert(UI).second) {
+ if (isa<PHINode>(UI))
+ PHIs.push_back(UI);
+ else
+ NonPHIs.push_back(UI);
+ } else if (!isa<PHINode>(UI) &&
+ ((!isa<FCmpInst>(UI) && !isa<ICmpInst>(UI) &&
+ !isa<SelectInst>(UI)) ||
+ !isMinMaxSelectCmpPattern(UI, IgnoredVal).isReduction()))
+ return false;
+
+ // Remember that we completed the cycle.
+ if (UI == Phi)
+ FoundStartPHI = true;
+ }
+ Worklist.append(PHIs.begin(), PHIs.end());
+ Worklist.append(NonPHIs.begin(), NonPHIs.end());
+ }
+
+ // This means we have seen one but not the other instruction of the
+ // pattern or more than just a select and cmp.
+ if ((Kind == RK_IntegerMinMax || Kind == RK_FloatMinMax) &&
+ NumCmpSelectPatternInst != 2)
+ return false;
+
+ if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction)
+ return false;
+
+ // We found a reduction var if we have reached the original phi node and we
+ // only have a single instruction with out-of-loop users.
+
+ // The ExitInstruction(Instruction which is allowed to have out-of-loop users)
+ // is saved as part of the ReductionDescriptor.
+
+ // Save the description of this reduction variable.
+ ReductionDescriptor RD(RdxStart, ExitInstruction, Kind,
+ ReduxDesc.getMinMaxKind());
+
+ RedDes = RD;
+
+ return true;
+}
+
+/// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction
+/// pattern corresponding to a min(X, Y) or max(X, Y).
+ReductionInstDesc
+ReductionDescriptor::isMinMaxSelectCmpPattern(Instruction *I,
+ ReductionInstDesc &Prev) {
+
+ assert((isa<ICmpInst>(I) || isa<FCmpInst>(I) || isa<SelectInst>(I)) &&
+ "Expect a select instruction");
+ Instruction *Cmp = nullptr;
+ SelectInst *Select = nullptr;
+
+ // We must handle the select(cmp()) as a single instruction. Advance to the
+ // select.
+ if ((Cmp = dyn_cast<ICmpInst>(I)) || (Cmp = dyn_cast<FCmpInst>(I))) {
+ if (!Cmp->hasOneUse() || !(Select = dyn_cast<SelectInst>(*I->user_begin())))
+ return ReductionInstDesc(false, I);
+ return ReductionInstDesc(Select, Prev.getMinMaxKind());
+ }
+
+ // Only handle single use cases for now.
+ if (!(Select = dyn_cast<SelectInst>(I)))
+ return ReductionInstDesc(false, I);
+ if (!(Cmp = dyn_cast<ICmpInst>(I->getOperand(0))) &&
+ !(Cmp = dyn_cast<FCmpInst>(I->getOperand(0))))
+ return ReductionInstDesc(false, I);
+ if (!Cmp->hasOneUse())
+ return ReductionInstDesc(false, I);
+
+ Value *CmpLeft;
+ Value *CmpRight;
+
+ // Look for a min/max pattern.
+ if (m_UMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return ReductionInstDesc(Select, ReductionInstDesc::MRK_UIntMin);
+ else if (m_UMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return ReductionInstDesc(Select, ReductionInstDesc::MRK_UIntMax);
+ else if (m_SMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return ReductionInstDesc(Select, ReductionInstDesc::MRK_SIntMax);
+ else if (m_SMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return ReductionInstDesc(Select, ReductionInstDesc::MRK_SIntMin);
+ else if (m_OrdFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return ReductionInstDesc(Select, ReductionInstDesc::MRK_FloatMin);
+ else if (m_OrdFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return ReductionInstDesc(Select, ReductionInstDesc::MRK_FloatMax);
+ else if (m_UnordFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return ReductionInstDesc(Select, ReductionInstDesc::MRK_FloatMin);
+ else if (m_UnordFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return ReductionInstDesc(Select, ReductionInstDesc::MRK_FloatMax);
+
+ return ReductionInstDesc(false, I);
+}
+
+ReductionInstDesc ReductionDescriptor::isReductionInstr(Instruction *I,
+ ReductionKind Kind,
+ ReductionInstDesc &Prev,
+ bool HasFunNoNaNAttr) {
+ bool FP = I->getType()->isFloatingPointTy();
+ bool FastMath = FP && I->hasUnsafeAlgebra();
+ switch (I->getOpcode()) {
+ default:
+ return ReductionInstDesc(false, I);
+ case Instruction::PHI:
+ if (FP &&
+ (Kind != RK_FloatMult && Kind != RK_FloatAdd && Kind != RK_FloatMinMax))
+ return ReductionInstDesc(false, I);
+ return ReductionInstDesc(I, Prev.getMinMaxKind());
+ case Instruction::Sub:
+ case Instruction::Add:
+ return ReductionInstDesc(Kind == RK_IntegerAdd, I);
+ case Instruction::Mul:
+ return ReductionInstDesc(Kind == RK_IntegerMult, I);
+ case Instruction::And:
+ return ReductionInstDesc(Kind == RK_IntegerAnd, I);
+ case Instruction::Or:
+ return ReductionInstDesc(Kind == RK_IntegerOr, I);
+ case Instruction::Xor:
+ return ReductionInstDesc(Kind == RK_IntegerXor, I);
+ case Instruction::FMul:
+ return ReductionInstDesc(Kind == RK_FloatMult && FastMath, I);
+ case Instruction::FSub:
+ case Instruction::FAdd:
+ return ReductionInstDesc(Kind == RK_FloatAdd && FastMath, I);
+ case Instruction::FCmp:
+ case Instruction::ICmp:
+ case Instruction::Select:
+ if (Kind != RK_IntegerMinMax &&
+ (!HasFunNoNaNAttr || Kind != RK_FloatMinMax))
+ return ReductionInstDesc(false, I);
+ return isMinMaxSelectCmpPattern(I, Prev);
+ }
+}
+
+bool ReductionDescriptor::hasMultipleUsesOf(
+ Instruction *I, SmallPtrSetImpl<Instruction *> &Insts) {
+ unsigned NumUses = 0;
+ for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E;
+ ++Use) {
+ if (Insts.count(dyn_cast<Instruction>(*Use)))
+ ++NumUses;
+ if (NumUses > 1)
+ return true;
+ }
+
+ return false;
+}
+bool ReductionDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
+ ReductionDescriptor &RedDes) {
+
+ bool HasFunNoNaNAttr = false;
+ BasicBlock *Header = TheLoop->getHeader();
+ Function &F = *Header->getParent();
+ if (F.hasFnAttribute("no-nans-fp-math"))
+ HasFunNoNaNAttr =
+ F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
+
+ if (AddReductionVar(Phi, RK_IntegerAdd, TheLoop, HasFunNoNaNAttr, RedDes)) {
+ DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n");
+ return true;
+ }
+ if (AddReductionVar(Phi, RK_IntegerMult, TheLoop, HasFunNoNaNAttr, RedDes)) {
+ DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n");
+ return true;
+ }
+ if (AddReductionVar(Phi, RK_IntegerOr, TheLoop, HasFunNoNaNAttr, RedDes)) {
+ DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n");
+ return true;
+ }
+ if (AddReductionVar(Phi, RK_IntegerAnd, TheLoop, HasFunNoNaNAttr, RedDes)) {
+ DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n");
+ return true;
+ }
+ if (AddReductionVar(Phi, RK_IntegerXor, TheLoop, HasFunNoNaNAttr, RedDes)) {
+ DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n");
+ return true;
+ }
+ if (AddReductionVar(Phi, RK_IntegerMinMax, TheLoop, HasFunNoNaNAttr,
+ RedDes)) {
+ DEBUG(dbgs() << "Found a MINMAX reduction PHI." << *Phi << "\n");
+ return true;
+ }
+ if (AddReductionVar(Phi, RK_FloatMult, TheLoop, HasFunNoNaNAttr, RedDes)) {
+ DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n");
+ return true;
+ }
+ if (AddReductionVar(Phi, RK_FloatAdd, TheLoop, HasFunNoNaNAttr, RedDes)) {
+ DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n");
+ return true;
+ }
+ if (AddReductionVar(Phi, RK_FloatMinMax, TheLoop, HasFunNoNaNAttr, RedDes)) {
+ DEBUG(dbgs() << "Found an float MINMAX reduction PHI." << *Phi << "\n");
+ return true;
+ }
+ // Not a reduction of known type.
+ return false;
+}
+
+/// This function returns the identity element (or neutral element) for
+/// the operation K.
+Constant *ReductionDescriptor::getReductionIdentity(ReductionKind K, Type *Tp) {
+ switch (K) {
+ case RK_IntegerXor:
+ case RK_IntegerAdd:
+ case RK_IntegerOr:
+ // Adding, Xoring, Oring zero to a number does not change it.
+ return ConstantInt::get(Tp, 0);
+ case RK_IntegerMult:
+ // Multiplying a number by 1 does not change it.
+ return ConstantInt::get(Tp, 1);
+ case RK_IntegerAnd:
+ // AND-ing a number with an all-1 value does not change it.
+ return ConstantInt::get(Tp, -1, true);
+ case RK_FloatMult:
+ // Multiplying a number by 1 does not change it.
+ return ConstantFP::get(Tp, 1.0L);
+ case RK_FloatAdd:
+ // Adding zero to a number does not change it.
+ return ConstantFP::get(Tp, 0.0L);
+ default:
+ llvm_unreachable("Unknown reduction kind");
+ }
+}
+
+/// This function translates the reduction kind to an LLVM binary operator.
+unsigned ReductionDescriptor::getReductionBinOp(ReductionKind Kind) {
+ switch (Kind) {
+ case RK_IntegerAdd:
+ return Instruction::Add;
+ case RK_IntegerMult:
+ return Instruction::Mul;
+ case RK_IntegerOr:
+ return Instruction::Or;
+ case RK_IntegerAnd:
+ return Instruction::And;
+ case RK_IntegerXor:
+ return Instruction::Xor;
+ case RK_FloatMult:
+ return Instruction::FMul;
+ case RK_FloatAdd:
+ return Instruction::FAdd;
+ case RK_IntegerMinMax:
+ return Instruction::ICmp;
+ case RK_FloatMinMax:
+ return Instruction::FCmp;
+ default:
+ llvm_unreachable("Unknown reduction operation");
+ }
+}
+
+Value *
+ReductionDescriptor::createMinMaxOp(IRBuilder<> &Builder,
+ ReductionInstDesc::MinMaxReductionKind RK,
+ Value *Left, Value *Right) {
+ CmpInst::Predicate P = CmpInst::ICMP_NE;
+ switch (RK) {
+ default:
+ llvm_unreachable("Unknown min/max reduction kind");
+ case ReductionInstDesc::MRK_UIntMin:
+ P = CmpInst::ICMP_ULT;
+ break;
+ case ReductionInstDesc::MRK_UIntMax:
+ P = CmpInst::ICMP_UGT;
+ break;
+ case ReductionInstDesc::MRK_SIntMin:
+ P = CmpInst::ICMP_SLT;
+ break;
+ case ReductionInstDesc::MRK_SIntMax:
+ P = CmpInst::ICMP_SGT;
+ break;
+ case ReductionInstDesc::MRK_FloatMin:
+ P = CmpInst::FCMP_OLT;
+ break;
+ case ReductionInstDesc::MRK_FloatMax:
+ P = CmpInst::FCMP_OGT;
+ break;
+ }
+
+ Value *Cmp;
+ if (RK == ReductionInstDesc::MRK_FloatMin ||
+ RK == ReductionInstDesc::MRK_FloatMax)
+ Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp");
+ else
+ Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp");
+
+ Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select");
+ return Select;
+}
+
+bool llvm::isInductionPHI(PHINode *Phi, ScalarEvolution *SE,
+ ConstantInt *&StepValue) {
+ Type *PhiTy = Phi->getType();
+ // We only handle integer and pointer inductions variables.
+ if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy())
+ return false;
+
+ // Check that the PHI is consecutive.
+ const SCEV *PhiScev = SE->getSCEV(Phi);
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
+ if (!AR) {
+ DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
+ return false;
+ }
+
+ const SCEV *Step = AR->getStepRecurrence(*SE);
+ // Calculate the pointer stride and check if it is consecutive.
+ const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
+ if (!C)
+ return false;
+
+ ConstantInt *CV = C->getValue();
+ if (PhiTy->isIntegerTy()) {
+ StepValue = CV;
+ return true;
+ }
+
+ assert(PhiTy->isPointerTy() && "The PHI must be a pointer");
+ Type *PointerElementType = PhiTy->getPointerElementType();
+ // The pointer stride cannot be determined if the pointer element type is not
+ // sized.
+ if (!PointerElementType->isSized())
+ return false;
+
+ const DataLayout &DL = Phi->getModule()->getDataLayout();
+ int64_t Size = static_cast<int64_t>(DL.getTypeAllocSize(PointerElementType));
+ int64_t CVSize = CV->getSExtValue();
+ if (CVSize % Size)
+ return false;
+ StepValue = ConstantInt::getSigned(CV->getType(), CVSize / Size);
+ return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
index 04b9130..e0e0e90 100644
--- a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
@@ -14,17 +14,17 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/CFG.h"
#include "llvm/Pass.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
#include <algorithm>
using namespace llvm;
@@ -32,6 +32,23 @@ using namespace llvm;
#define DEBUG_TYPE "lower-switch"
namespace {
+ struct IntRange {
+ int64_t Low, High;
+ };
+ // Return true iff R is covered by Ranges.
+ static bool IsInRanges(const IntRange &R,
+ const std::vector<IntRange> &Ranges) {
+ // Note: Ranges must be sorted, non-overlapping and non-adjacent.
+
+ // Find the first range whose High field is >= R.High,
+ // then check if the Low field is <= R.Low. If so, we
+ // have a Range that covers R.
+ auto I = std::lower_bound(
+ Ranges.begin(), Ranges.end(), R,
+ [](const IntRange &A, const IntRange &B) { return A.High < B.High; });
+ return I != Ranges.end() && I->Low <= R.Low;
+ }
+
/// LowerSwitch Pass - Replace all SwitchInst instructions with chained branch
/// instructions.
class LowerSwitch : public FunctionPass {
@@ -50,13 +67,12 @@ namespace {
}
struct CaseRange {
- Constant* Low;
- Constant* High;
+ ConstantInt* Low;
+ ConstantInt* High;
BasicBlock* BB;
- CaseRange(Constant *low = nullptr, Constant *high = nullptr,
- BasicBlock *bb = nullptr) :
- Low(low), High(high), BB(bb) { }
+ CaseRange(ConstantInt *low, ConstantInt *high, BasicBlock *bb)
+ : Low(low), High(high), BB(bb) {}
};
typedef std::vector<CaseRange> CaseVector;
@@ -67,7 +83,8 @@ namespace {
BasicBlock *switchConvert(CaseItr Begin, CaseItr End,
ConstantInt *LowerBound, ConstantInt *UpperBound,
Value *Val, BasicBlock *Predecessor,
- BasicBlock *OrigBlock, BasicBlock *Default);
+ BasicBlock *OrigBlock, BasicBlock *Default,
+ const std::vector<IntRange> &UnreachableRanges);
BasicBlock *newLeafBlock(CaseRange &Leaf, Value *Val, BasicBlock *OrigBlock,
BasicBlock *Default);
unsigned Clusterify(CaseVector &Cases, SwitchInst *SI);
@@ -158,11 +175,16 @@ static void fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB,
// Remove additional occurences coming from condensed cases and keep the
// number of incoming values equal to the number of branches to SuccBB.
+ SmallVector<unsigned, 8> Indices;
for (++Idx; LocalNumMergedCases > 0 && Idx < E; ++Idx)
if (PN->getIncomingBlock(Idx) == OrigBB) {
- PN->removeIncomingValue(Idx);
+ Indices.push_back(Idx);
LocalNumMergedCases--;
}
+ // Remove incoming values in the reverse order to prevent invalidating
+ // *successive* index.
+ for (auto III = Indices.rbegin(), IIE = Indices.rend(); III != IIE; ++III)
+ PN->removeIncomingValue(*III);
}
}
@@ -171,12 +193,12 @@ static void fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB,
// LowerBound and UpperBound are used to keep track of the bounds for Val
// that have already been checked by a block emitted by one of the previous
// calls to switchConvert in the call stack.
-BasicBlock *LowerSwitch::switchConvert(CaseItr Begin, CaseItr End,
- ConstantInt *LowerBound,
- ConstantInt *UpperBound, Value *Val,
- BasicBlock *Predecessor,
- BasicBlock *OrigBlock,
- BasicBlock *Default) {
+BasicBlock *
+LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound,
+ ConstantInt *UpperBound, Value *Val,
+ BasicBlock *Predecessor, BasicBlock *OrigBlock,
+ BasicBlock *Default,
+ const std::vector<IntRange> &UnreachableRanges) {
unsigned Size = End - Begin;
if (Size == 1) {
@@ -203,32 +225,32 @@ BasicBlock *LowerSwitch::switchConvert(CaseItr Begin, CaseItr End,
CaseRange &Pivot = *(Begin + Mid);
DEBUG(dbgs() << "Pivot ==> "
- << cast<ConstantInt>(Pivot.Low)->getValue()
- << " -" << cast<ConstantInt>(Pivot.High)->getValue() << "\n");
+ << Pivot.Low->getValue()
+ << " -" << Pivot.High->getValue() << "\n");
// NewLowerBound here should never be the integer minimal value.
// This is because it is computed from a case range that is never
// the smallest, so there is always a case range that has at least
// a smaller value.
- ConstantInt *NewLowerBound = cast<ConstantInt>(Pivot.Low);
- ConstantInt *NewUpperBound;
-
- // If we don't have a Default block then it means that we can never
- // have a value outside of a case range, so set the UpperBound to the highest
- // value in the LHS part of the case ranges.
- if (Default != nullptr) {
- // Because NewLowerBound is never the smallest representable integer
- // it is safe here to subtract one.
- NewUpperBound = ConstantInt::get(NewLowerBound->getContext(),
- NewLowerBound->getValue() - 1);
- } else {
- CaseItr LastLHS = LHS.begin() + LHS.size() - 1;
- NewUpperBound = cast<ConstantInt>(LastLHS->High);
+ ConstantInt *NewLowerBound = Pivot.Low;
+
+ // Because NewLowerBound is never the smallest representable integer
+ // it is safe here to subtract one.
+ ConstantInt *NewUpperBound = ConstantInt::get(NewLowerBound->getContext(),
+ NewLowerBound->getValue() - 1);
+
+ if (!UnreachableRanges.empty()) {
+ // Check if the gap between LHS's highest and NewLowerBound is unreachable.
+ int64_t GapLow = LHS.back().High->getSExtValue() + 1;
+ int64_t GapHigh = NewLowerBound->getSExtValue() - 1;
+ IntRange Gap = { GapLow, GapHigh };
+ if (GapHigh >= GapLow && IsInRanges(Gap, UnreachableRanges))
+ NewUpperBound = LHS.back().High;
}
DEBUG(dbgs() << "LHS Bounds ==> ";
if (LowerBound) {
- dbgs() << cast<ConstantInt>(LowerBound)->getSExtValue();
+ dbgs() << LowerBound->getSExtValue();
} else {
dbgs() << "NONE";
}
@@ -236,7 +258,7 @@ BasicBlock *LowerSwitch::switchConvert(CaseItr Begin, CaseItr End,
dbgs() << "RHS Bounds ==> ";
dbgs() << NewLowerBound->getSExtValue() << " - ";
if (UpperBound) {
- dbgs() << cast<ConstantInt>(UpperBound)->getSExtValue() << "\n";
+ dbgs() << UpperBound->getSExtValue() << "\n";
} else {
dbgs() << "NONE\n";
});
@@ -251,10 +273,10 @@ BasicBlock *LowerSwitch::switchConvert(CaseItr Begin, CaseItr End,
BasicBlock *LBranch = switchConvert(LHS.begin(), LHS.end(), LowerBound,
NewUpperBound, Val, NewNode, OrigBlock,
- Default);
+ Default, UnreachableRanges);
BasicBlock *RBranch = switchConvert(RHS.begin(), RHS.end(), NewLowerBound,
UpperBound, Val, NewNode, OrigBlock,
- Default);
+ Default, UnreachableRanges);
Function::iterator FI = OrigBlock;
F->getBasicBlockList().insert(++FI, NewNode);
@@ -287,11 +309,11 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val,
Leaf.Low, "SwitchLeaf");
} else {
// Make range comparison
- if (cast<ConstantInt>(Leaf.Low)->isMinValue(true /*isSigned*/)) {
+ if (Leaf.Low->isMinValue(true /*isSigned*/)) {
// Val >= Min && Val <= Hi --> Val <= Hi
Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SLE, Val, Leaf.High,
"SwitchLeaf");
- } else if (cast<ConstantInt>(Leaf.Low)->isZero()) {
+ } else if (Leaf.Low->isZero()) {
// Val >= 0 && Val <= Hi --> Val <=u Hi
Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Val, Leaf.High,
"SwitchLeaf");
@@ -316,8 +338,8 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val,
for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
PHINode* PN = cast<PHINode>(I);
// Remove all but one incoming entries from the cluster
- uint64_t Range = cast<ConstantInt>(Leaf.High)->getSExtValue() -
- cast<ConstantInt>(Leaf.Low)->getSExtValue();
+ uint64_t Range = Leaf.High->getSExtValue() -
+ Leaf.Low->getSExtValue();
for (uint64_t j = 0; j < Range; ++j) {
PN->removeIncomingValue(OrigBlock);
}
@@ -345,8 +367,8 @@ unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) {
if (Cases.size()>=2)
for (CaseItr I = Cases.begin(), J = std::next(Cases.begin());
J != Cases.end();) {
- int64_t nextValue = cast<ConstantInt>(J->Low)->getSExtValue();
- int64_t currentValue = cast<ConstantInt>(I->High)->getSExtValue();
+ int64_t nextValue = J->Low->getSExtValue();
+ int64_t currentValue = I->High->getSExtValue();
BasicBlock* nextBB = J->BB;
BasicBlock* currentBB = I->BB;
@@ -379,26 +401,102 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) {
Value *Val = SI->getCondition(); // The value we are switching on...
BasicBlock* Default = SI->getDefaultDest();
- // If there is only the default destination, don't bother with the code below.
+ // If there is only the default destination, just branch.
if (!SI->getNumCases()) {
- BranchInst::Create(SI->getDefaultDest(), CurBlock);
- CurBlock->getInstList().erase(SI);
+ BranchInst::Create(Default, CurBlock);
+ SI->eraseFromParent();
return;
}
- const bool DefaultIsUnreachable =
- Default->size() == 1 && isa<UnreachableInst>(Default->getTerminator());
+ // Prepare cases vector.
+ CaseVector Cases;
+ unsigned numCmps = Clusterify(Cases, SI);
+ DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size()
+ << ". Total compares: " << numCmps << "\n");
+ DEBUG(dbgs() << "Cases: " << Cases << "\n");
+ (void)numCmps;
+
+ ConstantInt *LowerBound = nullptr;
+ ConstantInt *UpperBound = nullptr;
+ std::vector<IntRange> UnreachableRanges;
+
+ if (isa<UnreachableInst>(Default->getFirstNonPHIOrDbg())) {
+ // Make the bounds tightly fitted around the case value range, becase we
+ // know that the value passed to the switch must be exactly one of the case
+ // values.
+ assert(!Cases.empty());
+ LowerBound = Cases.front().Low;
+ UpperBound = Cases.back().High;
+
+ DenseMap<BasicBlock *, unsigned> Popularity;
+ unsigned MaxPop = 0;
+ BasicBlock *PopSucc = nullptr;
+
+ IntRange R = { INT64_MIN, INT64_MAX };
+ UnreachableRanges.push_back(R);
+ for (const auto &I : Cases) {
+ int64_t Low = I.Low->getSExtValue();
+ int64_t High = I.High->getSExtValue();
+
+ IntRange &LastRange = UnreachableRanges.back();
+ if (LastRange.Low == Low) {
+ // There is nothing left of the previous range.
+ UnreachableRanges.pop_back();
+ } else {
+ // Terminate the previous range.
+ assert(Low > LastRange.Low);
+ LastRange.High = Low - 1;
+ }
+ if (High != INT64_MAX) {
+ IntRange R = { High + 1, INT64_MAX };
+ UnreachableRanges.push_back(R);
+ }
+
+ // Count popularity.
+ int64_t N = High - Low + 1;
+ unsigned &Pop = Popularity[I.BB];
+ if ((Pop += N) > MaxPop) {
+ MaxPop = Pop;
+ PopSucc = I.BB;
+ }
+ }
+#ifndef NDEBUG
+ /* UnreachableRanges should be sorted and the ranges non-adjacent. */
+ for (auto I = UnreachableRanges.begin(), E = UnreachableRanges.end();
+ I != E; ++I) {
+ assert(I->Low <= I->High);
+ auto Next = I + 1;
+ if (Next != E) {
+ assert(Next->Low > I->High);
+ }
+ }
+#endif
+
+ // Use the most popular block as the new default, reducing the number of
+ // cases.
+ assert(MaxPop > 0 && PopSucc);
+ Default = PopSucc;
+ for (CaseItr I = Cases.begin(); I != Cases.end();) {
+ if (I->BB == PopSucc)
+ I = Cases.erase(I);
+ else
+ ++I;
+ }
+
+ // If there are no cases left, just branch.
+ if (Cases.empty()) {
+ BranchInst::Create(Default, CurBlock);
+ SI->eraseFromParent();
+ return;
+ }
+ }
+
// Create a new, empty default block so that the new hierarchy of
// if-then statements go to this and the PHI nodes are happy.
- // if the default block is set as an unreachable we avoid creating one
- // because will never be a valid target.
- BasicBlock *NewDefault = nullptr;
- if (!DefaultIsUnreachable) {
- NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault");
- F->getBasicBlockList().insert(Default, NewDefault);
-
- BranchInst::Create(Default, NewDefault);
- }
+ BasicBlock *NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault");
+ F->getBasicBlockList().insert(Default, NewDefault);
+ BranchInst::Create(Default, NewDefault);
+
// If there is an entry in any PHI nodes for the default edge, make sure
// to update them as well.
for (BasicBlock::iterator I = Default->begin(); isa<PHINode>(I); ++I) {
@@ -408,40 +506,18 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) {
PN->setIncomingBlock((unsigned)BlockIdx, NewDefault);
}
- // Prepare cases vector.
- CaseVector Cases;
- unsigned numCmps = Clusterify(Cases, SI);
-
- DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size()
- << ". Total compares: " << numCmps << "\n");
- DEBUG(dbgs() << "Cases: " << Cases << "\n");
- (void)numCmps;
-
- ConstantInt *UpperBound = nullptr;
- ConstantInt *LowerBound = nullptr;
-
- // Optimize the condition where Default is an unreachable block. In this case
- // we can make the bounds tightly fitted around the case value ranges,
- // because we know that the value passed to the switch should always be
- // exactly one of the case values.
- if (DefaultIsUnreachable) {
- CaseItr LastCase = Cases.begin() + Cases.size() - 1;
- UpperBound = cast<ConstantInt>(LastCase->High);
- LowerBound = cast<ConstantInt>(Cases.begin()->Low);
- }
BasicBlock *SwitchBlock =
switchConvert(Cases.begin(), Cases.end(), LowerBound, UpperBound, Val,
- OrigBlock, OrigBlock, NewDefault);
+ OrigBlock, OrigBlock, NewDefault, UnreachableRanges);
// Branch to our shiny new if-then stuff...
BranchInst::Create(SwitchBlock, OrigBlock);
// We are now done with the switch instruction, delete it.
+ BasicBlock *OldDefault = SI->getDefaultDest();
CurBlock->getInstList().erase(SI);
- pred_iterator PI = pred_begin(Default), E = pred_end(Default);
- // If the Default block has no more predecessors just remove it
- if (PI == E) {
- DeleteDeadBlock(Default);
- }
+ // If the Default block has no more predecessors just remove it.
+ if (pred_begin(OldDefault) == pred_end(OldDefault))
+ DeleteDeadBlock(OldDefault);
}
diff --git a/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp
index 35c701e..d69a81e 100644
--- a/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp
@@ -17,6 +17,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Module.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -93,3 +94,34 @@ llvm::collectUsedGlobalVariables(Module &M, SmallPtrSetImpl<GlobalValue *> &Set,
}
return GV;
}
+
+Function *llvm::checkSanitizerInterfaceFunction(Constant *FuncOrBitcast) {
+ if (isa<Function>(FuncOrBitcast))
+ return cast<Function>(FuncOrBitcast);
+ FuncOrBitcast->dump();
+ std::string Err;
+ raw_string_ostream Stream(Err);
+ Stream << "Sanitizer interface function redefined: " << *FuncOrBitcast;
+ report_fatal_error(Err);
+}
+
+std::pair<Function *, Function *> llvm::createSanitizerCtorAndInitFunctions(
+ Module &M, StringRef CtorName, StringRef InitName,
+ ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs) {
+ assert(!InitName.empty() && "Expected init function name");
+ assert(InitArgTypes.size() == InitArgTypes.size() &&
+ "Sanitizer's init function expects different number of arguments");
+ Function *Ctor = Function::Create(
+ FunctionType::get(Type::getVoidTy(M.getContext()), false),
+ GlobalValue::InternalLinkage, CtorName, &M);
+ BasicBlock *CtorBB = BasicBlock::Create(M.getContext(), "", Ctor);
+ IRBuilder<> IRB(ReturnInst::Create(M.getContext(), CtorBB));
+ Function *InitFunction =
+ checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+ InitName, FunctionType::get(IRB.getVoidTy(), InitArgTypes, false),
+ AttributeSet()));
+ InitFunction->setLinkage(Function::ExternalLinkage);
+ IRB.CreateCall(InitFunction, InitArgs);
+ return std::make_pair(Ctor, InitFunction);
+}
+
diff --git a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index dabadb7..623dbc9 100644
--- a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -13,16 +13,6 @@
// traversing the function in depth-first order to rewrite loads and stores as
// appropriate.
//
-// The algorithm used here is based on:
-//
-// Sreedhar and Gao. A linear time algorithm for placing phi-nodes.
-// In Proceedings of the 22nd ACM SIGPLAN-SIGACT Symposium on Principles of
-// Programming Languages
-// POPL '95. ACM, New York, NY, 62-73.
-//
-// It has been modified to not explicitly use the DJ graph data structure and to
-// directly compute pruned SSA using per-variable liveness information.
-//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
@@ -34,6 +24,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasSetTracker.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/IteratedDominanceFrontier.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
@@ -45,9 +36,9 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
-#include <queue>
using namespace llvm;
#define DEBUG_TYPE "mem2reg"
@@ -274,9 +265,6 @@ struct PromoteMem2Reg {
/// behavior.
DenseMap<BasicBlock *, unsigned> BBNumbers;
- /// Maps DomTreeNodes to their level in the dominator tree.
- DenseMap<DomTreeNode *, unsigned> DomLevels;
-
/// Lazily compute the number of predecessors a block has.
DenseMap<const BasicBlock *, unsigned> BBNumPreds;
@@ -303,8 +291,6 @@ private:
return NP - 1;
}
- void DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
- AllocaInfo &Info);
void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info,
const SmallPtrSetImpl<BasicBlock *> &DefBlocks,
SmallPtrSetImpl<BasicBlock *> &LiveInBlocks);
@@ -531,6 +517,7 @@ void PromoteMem2Reg::run() {
AllocaInfo Info;
LargeBlockInfo LBI;
+ IDFCalculator IDF(DT);
for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) {
AllocaInst *AI = Allocas[AllocaNum];
@@ -578,31 +565,12 @@ void PromoteMem2Reg::run() {
continue;
}
- // If we haven't computed dominator tree levels, do so now.
- if (DomLevels.empty()) {
- SmallVector<DomTreeNode *, 32> Worklist;
-
- DomTreeNode *Root = DT.getRootNode();
- DomLevels[Root] = 0;
- Worklist.push_back(Root);
-
- while (!Worklist.empty()) {
- DomTreeNode *Node = Worklist.pop_back_val();
- unsigned ChildLevel = DomLevels[Node] + 1;
- for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end();
- CI != CE; ++CI) {
- DomLevels[*CI] = ChildLevel;
- Worklist.push_back(*CI);
- }
- }
- }
-
// If we haven't computed a numbering for the BB's in the function, do so
// now.
if (BBNumbers.empty()) {
unsigned ID = 0;
- for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
- BBNumbers[I] = ID++;
+ for (auto &BB : F)
+ BBNumbers[&BB] = ID++;
}
// If we have an AST to keep updated, remember some pointer value that is
@@ -621,7 +589,34 @@ void PromoteMem2Reg::run() {
// the standard SSA construction algorithm. Determine which blocks need PHI
// nodes and see if we can optimize out some work by avoiding insertion of
// dead phi nodes.
- DetermineInsertionPoint(AI, AllocaNum, Info);
+
+
+ // Unique the set of defining blocks for efficient lookup.
+ SmallPtrSet<BasicBlock *, 32> DefBlocks;
+ DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end());
+
+ // Determine which blocks the value is live in. These are blocks which lead
+ // to uses.
+ SmallPtrSet<BasicBlock *, 32> LiveInBlocks;
+ ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks);
+
+ // At this point, we're committed to promoting the alloca using IDF's, and
+ // the standard SSA construction algorithm. Determine which blocks need phi
+ // nodes and see if we can optimize out some work by avoiding insertion of
+ // dead phi nodes.
+ IDF.setLiveInBlocks(LiveInBlocks);
+ IDF.setDefiningBlocks(DefBlocks);
+ SmallVector<BasicBlock *, 32> PHIBlocks;
+ IDF.calculate(PHIBlocks);
+ if (PHIBlocks.size() > 1)
+ std::sort(PHIBlocks.begin(), PHIBlocks.end(),
+ [this](BasicBlock *A, BasicBlock *B) {
+ return BBNumbers.lookup(A) < BBNumbers.lookup(B);
+ });
+
+ unsigned CurrentVersion = 0;
+ for (unsigned i = 0, e = PHIBlocks.size(); i != e; ++i)
+ QueuePhiNode(PHIBlocks[i], AllocaNum, CurrentVersion);
}
if (Allocas.empty())
@@ -667,6 +662,8 @@ void PromoteMem2Reg::run() {
A->eraseFromParent();
}
+ const DataLayout &DL = F.getParent()->getDataLayout();
+
// Remove alloca's dbg.declare instrinsics from the function.
for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i)
if (DbgDeclareInst *DDI = AllocaDbgDeclares[i])
@@ -691,7 +688,7 @@ void PromoteMem2Reg::run() {
PHINode *PN = I->second;
// If this PHI node merges one value and/or undefs, get the value.
- if (Value *V = SimplifyInstruction(PN, nullptr, nullptr, &DT, AC)) {
+ if (Value *V = SimplifyInstruction(PN, DL, nullptr, &DT, AC)) {
if (AST && PN->getType()->isPointerTy())
AST->deleteValue(PN);
PN->replaceAllUsesWith(V);
@@ -841,95 +838,6 @@ void PromoteMem2Reg::ComputeLiveInBlocks(
}
}
-/// At this point, we're committed to promoting the alloca using IDF's, and the
-/// standard SSA construction algorithm. Determine which blocks need phi nodes
-/// and see if we can optimize out some work by avoiding insertion of dead phi
-/// nodes.
-void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
- AllocaInfo &Info) {
- // Unique the set of defining blocks for efficient lookup.
- SmallPtrSet<BasicBlock *, 32> DefBlocks;
- DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end());
-
- // Determine which blocks the value is live in. These are blocks which lead
- // to uses.
- SmallPtrSet<BasicBlock *, 32> LiveInBlocks;
- ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks);
-
- // Use a priority queue keyed on dominator tree level so that inserted nodes
- // are handled from the bottom of the dominator tree upwards.
- typedef std::pair<DomTreeNode *, unsigned> DomTreeNodePair;
- typedef std::priority_queue<DomTreeNodePair, SmallVector<DomTreeNodePair, 32>,
- less_second> IDFPriorityQueue;
- IDFPriorityQueue PQ;
-
- for (BasicBlock *BB : DefBlocks) {
- if (DomTreeNode *Node = DT.getNode(BB))
- PQ.push(std::make_pair(Node, DomLevels[Node]));
- }
-
- SmallVector<std::pair<unsigned, BasicBlock *>, 32> DFBlocks;
- SmallPtrSet<DomTreeNode *, 32> Visited;
- SmallVector<DomTreeNode *, 32> Worklist;
- while (!PQ.empty()) {
- DomTreeNodePair RootPair = PQ.top();
- PQ.pop();
- DomTreeNode *Root = RootPair.first;
- unsigned RootLevel = RootPair.second;
-
- // Walk all dominator tree children of Root, inspecting their CFG edges with
- // targets elsewhere on the dominator tree. Only targets whose level is at
- // most Root's level are added to the iterated dominance frontier of the
- // definition set.
-
- Worklist.clear();
- Worklist.push_back(Root);
-
- while (!Worklist.empty()) {
- DomTreeNode *Node = Worklist.pop_back_val();
- BasicBlock *BB = Node->getBlock();
-
- for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE;
- ++SI) {
- DomTreeNode *SuccNode = DT.getNode(*SI);
-
- // Quickly skip all CFG edges that are also dominator tree edges instead
- // of catching them below.
- if (SuccNode->getIDom() == Node)
- continue;
-
- unsigned SuccLevel = DomLevels[SuccNode];
- if (SuccLevel > RootLevel)
- continue;
-
- if (!Visited.insert(SuccNode).second)
- continue;
-
- BasicBlock *SuccBB = SuccNode->getBlock();
- if (!LiveInBlocks.count(SuccBB))
- continue;
-
- DFBlocks.push_back(std::make_pair(BBNumbers[SuccBB], SuccBB));
- if (!DefBlocks.count(SuccBB))
- PQ.push(std::make_pair(SuccNode, SuccLevel));
- }
-
- for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); CI != CE;
- ++CI) {
- if (!Visited.count(*CI))
- Worklist.push_back(*CI);
- }
- }
- }
-
- if (DFBlocks.size() > 1)
- std::sort(DFBlocks.begin(), DFBlocks.end());
-
- unsigned CurrentVersion = 0;
- for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i)
- QueuePhiNode(DFBlocks[i].second, AllocaNum, CurrentVersion);
-}
-
/// \brief Queue a phi-node to be added to a basic-block for a specific Alloca.
///
/// Returns true if there wasn't already a phi-node for that variable
diff --git a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp
index 3fcb789..88b39dd 100644
--- a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp
@@ -19,6 +19,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -150,12 +151,13 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
ProtoName, &BB->front());
// Fill in all the predecessors of the PHI.
- for (unsigned i = 0, e = PredValues.size(); i != e; ++i)
- InsertedPHI->addIncoming(PredValues[i].second, PredValues[i].first);
+ for (const auto &PredValue : PredValues)
+ InsertedPHI->addIncoming(PredValue.second, PredValue.first);
// See if the PHI node can be merged to a single value. This can happen in
// loop cases when we get a PHI of itself and one other value.
- if (Value *V = SimplifyInstruction(InsertedPHI)) {
+ if (Value *V =
+ SimplifyInstruction(InsertedPHI, BB->getModule()->getDataLayout())) {
InsertedPHI->eraseFromParent();
return V;
}
@@ -245,8 +247,7 @@ public:
// but it is relatively slow. If we already have PHI nodes in this
// block, walk one of them to get the predecessor list instead.
if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) {
- for (unsigned PI = 0, E = SomePhi->getNumIncomingValues(); PI != E; ++PI)
- Preds->push_back(SomePhi->getIncomingBlock(PI));
+ Preds->append(SomePhi->block_begin(), SomePhi->block_end());
} else {
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
Preds->push_back(*PI);
@@ -321,12 +322,12 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
//===----------------------------------------------------------------------===//
LoadAndStorePromoter::
-LoadAndStorePromoter(const SmallVectorImpl<Instruction*> &Insts,
+LoadAndStorePromoter(ArrayRef<const Instruction*> Insts,
SSAUpdater &S, StringRef BaseName) : SSA(S) {
if (Insts.empty()) return;
- Value *SomeVal;
- if (LoadInst *LI = dyn_cast<LoadInst>(Insts[0]))
+ const Value *SomeVal;
+ if (const LoadInst *LI = dyn_cast<LoadInst>(Insts[0]))
SomeVal = LI;
else
SomeVal = cast<StoreInst>(Insts[0])->getOperand(0);
@@ -344,20 +345,17 @@ run(const SmallVectorImpl<Instruction*> &Insts) const {
// This is important because we have to handle multiple defs/uses in a block
// ourselves: SSAUpdater is purely for cross-block references.
DenseMap<BasicBlock*, TinyPtrVector<Instruction*> > UsesByBlock;
-
- for (unsigned i = 0, e = Insts.size(); i != e; ++i) {
- Instruction *User = Insts[i];
+
+ for (Instruction *User : Insts)
UsesByBlock[User->getParent()].push_back(User);
- }
// Okay, now we can iterate over all the blocks in the function with uses,
// processing them. Keep track of which loads are loading a live-in value.
// Walk the uses in the use-list order to be determinstic.
SmallVector<LoadInst*, 32> LiveInLoads;
DenseMap<Value*, Value*> ReplacedLoads;
-
- for (unsigned i = 0, e = Insts.size(); i != e; ++i) {
- Instruction *User = Insts[i];
+
+ for (Instruction *User : Insts) {
BasicBlock *BB = User->getParent();
TinyPtrVector<Instruction*> &BlockUses = UsesByBlock[BB];
@@ -380,8 +378,8 @@ run(const SmallVectorImpl<Instruction*> &Insts) const {
// Otherwise, check to see if this block is all loads.
bool HasStore = false;
- for (unsigned i = 0, e = BlockUses.size(); i != e; ++i) {
- if (isa<StoreInst>(BlockUses[i])) {
+ for (Instruction *I : BlockUses) {
+ if (isa<StoreInst>(I)) {
HasStore = true;
break;
}
@@ -391,8 +389,8 @@ run(const SmallVectorImpl<Instruction*> &Insts) const {
// efficient way to tell which on is first in the block and don't want to
// scan large blocks, so just add all loads as live ins.
if (!HasStore) {
- for (unsigned i = 0, e = BlockUses.size(); i != e; ++i)
- LiveInLoads.push_back(cast<LoadInst>(BlockUses[i]));
+ for (Instruction *I : BlockUses)
+ LiveInLoads.push_back(cast<LoadInst>(I));
BlockUses.clear();
continue;
}
@@ -403,8 +401,8 @@ run(const SmallVectorImpl<Instruction*> &Insts) const {
// block is a load, then it uses the live in value. The last store defines
// the live out value. We handle this by doing a linear scan of the block.
Value *StoredValue = nullptr;
- for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
- if (LoadInst *L = dyn_cast<LoadInst>(II)) {
+ for (Instruction &I : *BB) {
+ if (LoadInst *L = dyn_cast<LoadInst>(&I)) {
// If this is a load from an unrelated pointer, ignore it.
if (!isInstInList(L, Insts)) continue;
@@ -419,8 +417,8 @@ run(const SmallVectorImpl<Instruction*> &Insts) const {
}
continue;
}
-
- if (StoreInst *SI = dyn_cast<StoreInst>(II)) {
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
// If this is a store to an unrelated pointer, ignore it.
if (!isInstInList(SI, Insts)) continue;
updateDebugInfo(SI);
@@ -438,8 +436,7 @@ run(const SmallVectorImpl<Instruction*> &Insts) const {
// Okay, now we rewrite all loads that use live-in values in the loop,
// inserting PHI nodes as necessary.
- for (unsigned i = 0, e = LiveInLoads.size(); i != e; ++i) {
- LoadInst *ALoad = LiveInLoads[i];
+ for (LoadInst *ALoad : LiveInLoads) {
Value *NewVal = SSA.GetValueInMiddleOfBlock(ALoad->getParent());
replaceLoadWithValue(ALoad, NewVal);
@@ -454,9 +451,7 @@ run(const SmallVectorImpl<Instruction*> &Insts) const {
// Now that everything is rewritten, delete the old instructions from the
// function. They should all be dead now.
- for (unsigned i = 0, e = Insts.size(); i != e; ++i) {
- Instruction *User = Insts[i];
-
+ for (Instruction *User : Insts) {
// If this is a load that still has uses, then the load must have been added
// as a live value in the SSAUpdate data structure for a block (e.g. because
// the loaded value was stored later). In this case, we need to recursively
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index f6867c2..60ac271 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -53,9 +53,13 @@ using namespace PatternMatch;
#define DEBUG_TYPE "simplifycfg"
+// Chosen as 2 so as to be cheap, but still to have enough power to fold
+// a select, so the "clamp" idiom (of a min followed by a max) will be caught.
+// To catch this, we need to fold a compare and a select, hence '2' being the
+// minimum reasonable default.
static cl::opt<unsigned>
-PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(1),
- cl::desc("Control the amount of phi node folding to perform (default = 1)"));
+PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(2),
+ cl::desc("Control the amount of phi node folding to perform (default = 2)"));
static cl::opt<bool>
DupRet("simplifycfg-dup-ret", cl::Hidden, cl::init(false),
@@ -106,8 +110,8 @@ namespace {
class SimplifyCFGOpt {
const TargetTransformInfo &TTI;
+ const DataLayout &DL;
unsigned BonusInstThreshold;
- const DataLayout *const DL;
AssumptionCache *AC;
Value *isValueEqualityComparison(TerminatorInst *TI);
BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI,
@@ -127,9 +131,9 @@ class SimplifyCFGOpt {
bool SimplifyCondBranch(BranchInst *BI, IRBuilder <>&Builder);
public:
- SimplifyCFGOpt(const TargetTransformInfo &TTI, unsigned BonusInstThreshold,
- const DataLayout *DL, AssumptionCache *AC)
- : TTI(TTI), BonusInstThreshold(BonusInstThreshold), DL(DL), AC(AC) {}
+ SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout &DL,
+ unsigned BonusInstThreshold, AssumptionCache *AC)
+ : TTI(TTI), DL(DL), BonusInstThreshold(BonusInstThreshold), AC(AC) {}
bool run(BasicBlock *BB);
};
}
@@ -216,45 +220,15 @@ static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
}
/// ComputeSpeculationCost - Compute an abstract "cost" of speculating the
-/// given instruction, which is assumed to be safe to speculate. 1 means
-/// cheap, 2 means less cheap, and UINT_MAX means prohibitively expensive.
-static unsigned ComputeSpeculationCost(const User *I, const DataLayout *DL) {
- assert(isSafeToSpeculativelyExecute(I, DL) &&
+/// given instruction, which is assumed to be safe to speculate. TCC_Free means
+/// cheap, TCC_Basic means less cheap, and TCC_Expensive means prohibitively
+/// expensive.
+static unsigned ComputeSpeculationCost(const User *I,
+ const TargetTransformInfo &TTI) {
+ assert(isSafeToSpeculativelyExecute(I) &&
"Instruction is not safe to speculatively execute!");
- switch (Operator::getOpcode(I)) {
- default:
- // In doubt, be conservative.
- return UINT_MAX;
- case Instruction::GetElementPtr:
- // GEPs are cheap if all indices are constant.
- if (!cast<GEPOperator>(I)->hasAllConstantIndices())
- return UINT_MAX;
- return 1;
- case Instruction::ExtractValue:
- case Instruction::Load:
- case Instruction::Add:
- case Instruction::Sub:
- case Instruction::And:
- case Instruction::Or:
- case Instruction::Xor:
- case Instruction::Shl:
- case Instruction::LShr:
- case Instruction::AShr:
- case Instruction::ICmp:
- case Instruction::Trunc:
- case Instruction::ZExt:
- case Instruction::SExt:
- case Instruction::BitCast:
- case Instruction::ExtractElement:
- case Instruction::InsertElement:
- return 1; // These are all cheap.
-
- case Instruction::Call:
- case Instruction::Select:
- return 2;
- }
+ return TTI.getUserCost(I);
}
-
/// DominatesMergePoint - If we have a merge point of an "if condition" as
/// accepted above, return true if the specified value dominates the block. We
/// don't handle the true generality of domination here, just a special case
@@ -275,7 +249,7 @@ static unsigned ComputeSpeculationCost(const User *I, const DataLayout *DL) {
static bool DominatesMergePoint(Value *V, BasicBlock *BB,
SmallPtrSetImpl<Instruction*> *AggressiveInsts,
unsigned &CostRemaining,
- const DataLayout *DL) {
+ const TargetTransformInfo &TTI) {
Instruction *I = dyn_cast<Instruction>(V);
if (!I) {
// Non-instructions all dominate instructions, but not all constantexprs
@@ -308,10 +282,10 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
// Okay, it looks like the instruction IS in the "condition". Check to
// see if it's a cheap instruction to unconditionally compute, and if it
// only uses stuff defined outside of the condition. If so, hoist it out.
- if (!isSafeToSpeculativelyExecute(I, DL))
+ if (!isSafeToSpeculativelyExecute(I))
return false;
- unsigned Cost = ComputeSpeculationCost(I, DL);
+ unsigned Cost = ComputeSpeculationCost(I, TTI);
if (Cost > CostRemaining)
return false;
@@ -321,7 +295,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
// Okay, we can only really hoist these out if their operands do
// not take us over the cost threshold.
for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i)
- if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining, DL))
+ if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining, TTI))
return false;
// Okay, it's safe to do this! Remember this instruction.
AggressiveInsts->insert(I);
@@ -330,15 +304,15 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
/// GetConstantInt - Extract ConstantInt from value, looking through IntToPtr
/// and PointerNullValue. Return NULL if value is not a constant int.
-static ConstantInt *GetConstantInt(Value *V, const DataLayout *DL) {
+static ConstantInt *GetConstantInt(Value *V, const DataLayout &DL) {
// Normal constant int.
ConstantInt *CI = dyn_cast<ConstantInt>(V);
- if (CI || !DL || !isa<Constant>(V) || !V->getType()->isPointerTy())
+ if (CI || !isa<Constant>(V) || !V->getType()->isPointerTy())
return CI;
// This is some kind of pointer constant. Turn it into a pointer-sized
// ConstantInt if possible.
- IntegerType *PtrTy = cast<IntegerType>(DL->getIntPtrType(V->getType()));
+ IntegerType *PtrTy = cast<IntegerType>(DL.getIntPtrType(V->getType()));
// Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
if (isa<ConstantPointerNull>(V))
@@ -371,23 +345,22 @@ namespace {
/// while for a chain of '&&' it will build the set elements that make the test
/// fail.
struct ConstantComparesGatherer {
-
+ const DataLayout &DL;
Value *CompValue; /// Value found for the switch comparison
Value *Extra; /// Extra clause to be checked before the switch
SmallVector<ConstantInt *, 8> Vals; /// Set of integers to match in switch
unsigned UsedICmps; /// Number of comparisons matched in the and/or chain
/// Construct and compute the result for the comparison instruction Cond
- ConstantComparesGatherer(Instruction *Cond, const DataLayout *DL)
- : CompValue(nullptr), Extra(nullptr), UsedICmps(0) {
- gather(Cond, DL);
+ ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL)
+ : DL(DL), CompValue(nullptr), Extra(nullptr), UsedICmps(0) {
+ gather(Cond);
}
/// Prevent copy
- ConstantComparesGatherer(const ConstantComparesGatherer &)
- LLVM_DELETED_FUNCTION;
+ ConstantComparesGatherer(const ConstantComparesGatherer &) = delete;
ConstantComparesGatherer &
- operator=(const ConstantComparesGatherer &) LLVM_DELETED_FUNCTION;
+ operator=(const ConstantComparesGatherer &) = delete;
private:
@@ -406,7 +379,7 @@ private:
/// against is placed in CompValue.
/// If CompValue is already set, the function is expected to fail if a match
/// is found but the value compared to is different.
- bool matchInstruction(Instruction *I, const DataLayout *DL, bool isEQ) {
+ bool matchInstruction(Instruction *I, bool isEQ) {
// If this is an icmp against a constant, handle this as one of the cases.
ICmpInst *ICI;
ConstantInt *C;
@@ -448,8 +421,8 @@ private:
}
// If we have "x ult 3", for example, then we can add 0,1,2 to the set.
- ConstantRange Span = ConstantRange::makeICmpRegion(ICI->getPredicate(),
- C->getValue());
+ ConstantRange Span = ConstantRange::makeAllowedICmpRegion(
+ ICI->getPredicate(), C->getValue());
// Shift the range if the compare is fed by an add. This is the range
// compare idiom as emitted by instcombine.
@@ -488,7 +461,7 @@ private:
/// the value being compared, and stick the list constants into the Vals
/// vector.
/// One "Extra" case is allowed to differ from the other.
- void gather(Value *V, const DataLayout *DL) {
+ void gather(Value *V) {
Instruction *I = dyn_cast<Instruction>(V);
bool isEQ = (I->getOpcode() == Instruction::Or);
@@ -510,7 +483,7 @@ private:
}
// Try to match the current instruction
- if (matchInstruction(I, DL, isEQ))
+ if (matchInstruction(I, isEQ))
// Match succeed, continue the loop
continue;
}
@@ -558,15 +531,16 @@ Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) {
CV = SI->getCondition();
} else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
if (BI->isConditional() && BI->getCondition()->hasOneUse())
- if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition()))
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
if (ICI->isEquality() && GetConstantInt(ICI->getOperand(1), DL))
CV = ICI->getOperand(0);
+ }
// Unwrap any lossless ptrtoint cast.
- if (DL && CV) {
+ if (CV) {
if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) {
Value *Ptr = PTII->getPointerOperand();
- if (PTII->getType() == DL->getIntPtrType(Ptr->getType()))
+ if (PTII->getType() == DL.getIntPtrType(Ptr->getType()))
CV = Ptr;
}
}
@@ -1007,8 +981,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
Builder.SetInsertPoint(PTI);
// Convert pointer to int before we switch.
if (CV->getType()->isPointerTy()) {
- assert(DL && "Cannot switch on pointer without DataLayout");
- CV = Builder.CreatePtrToInt(CV, DL->getIntPtrType(CV->getType()),
+ CV = Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()),
"magicptr");
}
@@ -1079,7 +1052,8 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I);
/// HoistThenElseCodeToIf - Given a conditional branch that goes to BB1 and
/// BB2, hoist any common code in the two blocks up into the branch block. The
/// caller of this function guarantees that BI's block dominates BB1 and BB2.
-static bool HoistThenElseCodeToIf(BranchInst *BI, const DataLayout *DL) {
+static bool HoistThenElseCodeToIf(BranchInst *BI,
+ const TargetTransformInfo &TTI) {
// This does very trivial matching, with limited scanning, to find identical
// instructions in the two blocks. In particular, we don't want to get into
// O(M*N) situations here where M and N are the sizes of BB1 and BB2. As
@@ -1114,6 +1088,9 @@ static bool HoistThenElseCodeToIf(BranchInst *BI, const DataLayout *DL) {
if (isa<TerminatorInst>(I1))
goto HoistTerminator;
+ if (!TTI.isProfitableToHoist(I1) || !TTI.isProfitableToHoist(I2))
+ return Changed;
+
// For a normal instruction, we just move one to right before the branch,
// then replace all uses of the other with the first. Finally, we remove
// the now redundant second instruction.
@@ -1167,9 +1144,9 @@ HoistTerminator:
passingValueIsAlwaysUndefined(BB2V, PN))
return Changed;
- if (isa<ConstantExpr>(BB1V) && !isSafeToSpeculativelyExecute(BB1V, DL))
+ if (isa<ConstantExpr>(BB1V) && !isSafeToSpeculativelyExecute(BB1V))
return Changed;
- if (isa<ConstantExpr>(BB2V) && !isSafeToSpeculativelyExecute(BB2V, DL))
+ if (isa<ConstantExpr>(BB2V) && !isSafeToSpeculativelyExecute(BB2V))
return Changed;
}
}
@@ -1489,7 +1466,7 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,
///
/// \returns true if the conditional block is removed.
static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
- const DataLayout *DL) {
+ const TargetTransformInfo &TTI) {
// Be conservative for now. FP select instruction can often be expensive.
Value *BrCond = BI->getCondition();
if (isa<FCmpInst>(BrCond))
@@ -1525,20 +1502,20 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
if (isa<DbgInfoIntrinsic>(I))
continue;
- // Only speculatively execution a single instruction (not counting the
+ // Only speculatively execute a single instruction (not counting the
// terminator) for now.
++SpeculationCost;
if (SpeculationCost > 1)
return false;
// Don't hoist the instruction if it's unsafe or expensive.
- if (!isSafeToSpeculativelyExecute(I, DL) &&
- !(HoistCondStores &&
- (SpeculatedStoreValue = isSafeToSpeculateStore(I, BB, ThenBB,
- EndBB))))
+ if (!isSafeToSpeculativelyExecute(I) &&
+ !(HoistCondStores && (SpeculatedStoreValue = isSafeToSpeculateStore(
+ I, BB, ThenBB, EndBB))))
return false;
if (!SpeculatedStoreValue &&
- ComputeSpeculationCost(I, DL) > PHINodeFoldingThreshold)
+ ComputeSpeculationCost(I, TTI) >
+ PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic)
return false;
// Store the store speculation candidate.
@@ -1594,12 +1571,14 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
if (!OrigCE && !ThenCE)
continue; // Known safe and cheap.
- if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE, DL)) ||
- (OrigCE && !isSafeToSpeculativelyExecute(OrigCE, DL)))
+ if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE)) ||
+ (OrigCE && !isSafeToSpeculativelyExecute(OrigCE)))
return false;
- unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE, DL) : 0;
- unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE, DL) : 0;
- if (OrigCost + ThenCost > 2 * PHINodeFoldingThreshold)
+ unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE, TTI) : 0;
+ unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE, TTI) : 0;
+ unsigned MaxCost = 2 * PHINodeFoldingThreshold *
+ TargetTransformInfo::TCC_Basic;
+ if (OrigCost + ThenCost > MaxCost)
return false;
// Account for the cost of an unfolded ConstantExpr which could end up
@@ -1706,7 +1685,7 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
/// that is defined in the same block as the branch and if any PHI entries are
/// constants, thread edges corresponding to that entry to be branches to their
/// ultimate destination.
-static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout *DL) {
+static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL) {
BasicBlock *BB = BI->getParent();
PHINode *PN = dyn_cast<PHINode>(BI->getCondition());
// NOTE: we currently cannot transform this case if the PHI node is used
@@ -1804,7 +1783,8 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout *DL) {
/// FoldTwoEntryPHINode - Given a BB that starts with the specified two-entry
/// PHI node, see if we can eliminate it.
-static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *DL) {
+static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
+ const DataLayout &DL) {
// Ok, this is a two entry PHI node. Check to see if this is a simple "if
// statement", which has a very simple dominance structure. Basically, we
// are trying to find the condition that is being branched on, which
@@ -1835,6 +1815,8 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *DL) {
SmallPtrSet<Instruction*, 4> AggressiveInsts;
unsigned MaxCostVal0 = PHINodeFoldingThreshold,
MaxCostVal1 = PHINodeFoldingThreshold;
+ MaxCostVal0 *= TargetTransformInfo::TCC_Basic;
+ MaxCostVal1 *= TargetTransformInfo::TCC_Basic;
for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
PHINode *PN = cast<PHINode>(II++);
@@ -1845,9 +1827,9 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *DL) {
}
if (!DominatesMergePoint(PN->getIncomingValue(0), BB, &AggressiveInsts,
- MaxCostVal0, DL) ||
+ MaxCostVal0, TTI) ||
!DominatesMergePoint(PN->getIncomingValue(1), BB, &AggressiveInsts,
- MaxCostVal1, DL))
+ MaxCostVal1, TTI))
return false;
}
@@ -2067,8 +2049,7 @@ static bool checkCSEInPredecessor(Instruction *Inst, BasicBlock *PB) {
/// FoldBranchToCommonDest - If this basic block is simple enough, and if a
/// predecessor branches to us and one of our successors, fold the block into
/// the predecessor and use logical operations to pick the right destination.
-bool llvm::FoldBranchToCommonDest(BranchInst *BI, const DataLayout *DL,
- unsigned BonusInstThreshold) {
+bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
BasicBlock *BB = BI->getParent();
Instruction *Cond = nullptr;
@@ -2124,7 +2105,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, const DataLayout *DL,
// Ignore dbg intrinsics.
if (isa<DbgInfoIntrinsic>(I))
continue;
- if (!I->hasOneUse() || !isSafeToSpeculativelyExecute(I, DL))
+ if (!I->hasOneUse() || !isSafeToSpeculativelyExecute(I))
return false;
// I has only one use and can be executed unconditionally.
Instruction *User = dyn_cast<Instruction>(I->user_back());
@@ -2536,17 +2517,15 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
// The weight to CommonDest should be PredCommon * SuccTotal +
// PredOther * SuccCommon.
// The weight to OtherDest should be PredOther * SuccOther.
- SmallVector<uint64_t, 2> NewWeights;
- NewWeights.push_back(PredCommon * (SuccCommon + SuccOther) +
- PredOther * SuccCommon);
- NewWeights.push_back(PredOther * SuccOther);
+ uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther) +
+ PredOther * SuccCommon,
+ PredOther * SuccOther};
// Halve the weights if any of them cannot fit in an uint32_t
FitWeights(NewWeights);
- SmallVector<uint32_t, 2> MDWeights(NewWeights.begin(),NewWeights.end());
PBI->setMetadata(LLVMContext::MD_prof,
- MDBuilder(BI->getContext()).
- createBranchWeights(MDWeights));
+ MDBuilder(BI->getContext())
+ .createBranchWeights(NewWeights[0], NewWeights[1]));
}
// OtherDest may have phi nodes. If so, add an entry from PBI's
@@ -2719,8 +2698,9 @@ static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) {
/// We prefer to split the edge to 'end' so that there is a true/false entry to
/// the PHI, merging the third icmp into the switch.
static bool TryToSimplifyUncondBranchWithICmpInIt(
- ICmpInst *ICI, IRBuilder<> &Builder, const TargetTransformInfo &TTI,
- unsigned BonusInstThreshold, const DataLayout *DL, AssumptionCache *AC) {
+ ICmpInst *ICI, IRBuilder<> &Builder, const DataLayout &DL,
+ const TargetTransformInfo &TTI, unsigned BonusInstThreshold,
+ AssumptionCache *AC) {
BasicBlock *BB = ICI->getParent();
// If the block has any PHIs in it or the icmp has multiple uses, it is too
@@ -2753,7 +2733,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(
ICI->eraseFromParent();
}
// BB is now empty, so it is likely to simplify away.
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
}
// Ok, the block is reachable from the default dest. If the constant we're
@@ -2769,7 +2749,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(
ICI->replaceAllUsesWith(V);
ICI->eraseFromParent();
// BB is now empty, so it is likely to simplify away.
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
}
// The use of the icmp has to be in the 'end' block, by the only PHI node in
@@ -2825,8 +2805,8 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(
/// SimplifyBranchOnICmpChain - The specified branch is a conditional branch.
/// Check to see if it is branching on an or/and chain of icmp instructions, and
/// fold it into a switch instruction if so.
-static bool SimplifyBranchOnICmpChain(BranchInst *BI, const DataLayout *DL,
- IRBuilder<> &Builder) {
+static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
+ const DataLayout &DL) {
Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
if (!Cond) return false;
@@ -2901,10 +2881,8 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const DataLayout *DL,
Builder.SetInsertPoint(BI);
// Convert pointer to int before we switch.
if (CompVal->getType()->isPointerTy()) {
- assert(DL && "Cannot switch on pointer without DataLayout");
- CompVal = Builder.CreatePtrToInt(CompVal,
- DL->getIntPtrType(CompVal->getType()),
- "magicptr");
+ CompVal = Builder.CreatePtrToInt(
+ CompVal, DL.getIntPtrType(CompVal->getType()), "magicptr");
}
// Create the new switch instruction now.
@@ -2949,20 +2927,9 @@ bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
return false;
// Turn all invokes that unwind here into calls and delete the basic block.
- bool InvokeRequiresTableEntry = false;
- bool Changed = false;
for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) {
InvokeInst *II = cast<InvokeInst>((*PI++)->getTerminator());
-
- if (II->hasFnAttr(Attribute::UWTable)) {
- // Don't remove an `invoke' instruction if the ABI requires an entry into
- // the table.
- InvokeRequiresTableEntry = true;
- continue;
- }
-
SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
-
// Insert a call instruction before the invoke.
CallInst *Call = CallInst::Create(II->getCalledValue(), Args, "", II);
Call->takeName(II);
@@ -2982,14 +2949,11 @@ bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
// Finally, delete the invoke instruction!
II->eraseFromParent();
- Changed = true;
}
- if (!InvokeRequiresTableEntry)
- // The landingpad is now unreachable. Zap it.
- BB->eraseFromParent();
-
- return Changed;
+ // The landingpad is now unreachable. Zap it.
+ BB->eraseFromParent();
+ return true;
}
bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
@@ -3121,55 +3085,6 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
--i; --e;
Changed = true;
}
- // If the default value is unreachable, figure out the most popular
- // destination and make it the default.
- if (SI->getDefaultDest() == BB) {
- std::map<BasicBlock*, std::pair<unsigned, unsigned> > Popularity;
- for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
- i != e; ++i) {
- std::pair<unsigned, unsigned> &entry =
- Popularity[i.getCaseSuccessor()];
- if (entry.first == 0) {
- entry.first = 1;
- entry.second = i.getCaseIndex();
- } else {
- entry.first++;
- }
- }
-
- // Find the most popular block.
- unsigned MaxPop = 0;
- unsigned MaxIndex = 0;
- BasicBlock *MaxBlock = nullptr;
- for (std::map<BasicBlock*, std::pair<unsigned, unsigned> >::iterator
- I = Popularity.begin(), E = Popularity.end(); I != E; ++I) {
- if (I->second.first > MaxPop ||
- (I->second.first == MaxPop && MaxIndex > I->second.second)) {
- MaxPop = I->second.first;
- MaxIndex = I->second.second;
- MaxBlock = I->first;
- }
- }
- if (MaxBlock) {
- // Make this the new default, allowing us to delete any explicit
- // edges to it.
- SI->setDefaultDest(MaxBlock);
- Changed = true;
-
- // If MaxBlock has phinodes in it, remove MaxPop-1 entries from
- // it.
- if (isa<PHINode>(MaxBlock->begin()))
- for (unsigned i = 0; i != MaxPop-1; ++i)
- MaxBlock->removePredecessor(SI->getParent());
-
- for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
- i != e; ++i)
- if (i.getCaseSuccessor() == MaxBlock) {
- SI->removeCase(i);
- --i; --e;
- }
- }
- }
} else if (InvokeInst *II = dyn_cast<InvokeInst>(TI)) {
if (II->getUnwindDest() == BB) {
// Convert the invoke to a call instruction. This would be a good
@@ -3203,70 +3118,122 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
return Changed;
}
-/// TurnSwitchRangeIntoICmp - Turns a switch with that contains only a
-/// integer range comparison into a sub, an icmp and a branch.
-static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) {
- assert(SI->getNumCases() > 1 && "Degenerate switch?");
+static bool CasesAreContiguous(SmallVectorImpl<ConstantInt *> &Cases) {
+ assert(Cases.size() >= 1);
- // Make sure all cases point to the same destination and gather the values.
- SmallVector<ConstantInt *, 16> Cases;
- SwitchInst::CaseIt I = SI->case_begin();
- Cases.push_back(I.getCaseValue());
- SwitchInst::CaseIt PrevI = I++;
- for (SwitchInst::CaseIt E = SI->case_end(); I != E; PrevI = I++) {
- if (PrevI.getCaseSuccessor() != I.getCaseSuccessor())
+ array_pod_sort(Cases.begin(), Cases.end(), ConstantIntSortPredicate);
+ for (size_t I = 1, E = Cases.size(); I != E; ++I) {
+ if (Cases[I - 1]->getValue() != Cases[I]->getValue() + 1)
return false;
- Cases.push_back(I.getCaseValue());
}
- assert(Cases.size() == SI->getNumCases() && "Not all cases gathered");
+ return true;
+}
- // Sort the case values, then check if they form a range we can transform.
- array_pod_sort(Cases.begin(), Cases.end(), ConstantIntSortPredicate);
- for (unsigned I = 1, E = Cases.size(); I != E; ++I) {
- if (Cases[I-1]->getValue() != Cases[I]->getValue()+1)
- return false;
+/// Turn a switch with two reachable destinations into an integer range
+/// comparison and branch.
+static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) {
+ assert(SI->getNumCases() > 1 && "Degenerate switch?");
+
+ bool HasDefault =
+ !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
+
+ // Partition the cases into two sets with different destinations.
+ BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr;
+ BasicBlock *DestB = nullptr;
+ SmallVector <ConstantInt *, 16> CasesA;
+ SmallVector <ConstantInt *, 16> CasesB;
+
+ for (SwitchInst::CaseIt I : SI->cases()) {
+ BasicBlock *Dest = I.getCaseSuccessor();
+ if (!DestA) DestA = Dest;
+ if (Dest == DestA) {
+ CasesA.push_back(I.getCaseValue());
+ continue;
+ }
+ if (!DestB) DestB = Dest;
+ if (Dest == DestB) {
+ CasesB.push_back(I.getCaseValue());
+ continue;
+ }
+ return false; // More than two destinations.
}
- Constant *Offset = ConstantExpr::getNeg(Cases.back());
- Constant *NumCases = ConstantInt::get(Offset->getType(), SI->getNumCases());
+ assert(DestA && DestB && "Single-destination switch should have been folded.");
+ assert(DestA != DestB);
+ assert(DestB != SI->getDefaultDest());
+ assert(!CasesB.empty() && "There must be non-default cases.");
+ assert(!CasesA.empty() || HasDefault);
+
+ // Figure out if one of the sets of cases form a contiguous range.
+ SmallVectorImpl<ConstantInt *> *ContiguousCases = nullptr;
+ BasicBlock *ContiguousDest = nullptr;
+ BasicBlock *OtherDest = nullptr;
+ if (!CasesA.empty() && CasesAreContiguous(CasesA)) {
+ ContiguousCases = &CasesA;
+ ContiguousDest = DestA;
+ OtherDest = DestB;
+ } else if (CasesAreContiguous(CasesB)) {
+ ContiguousCases = &CasesB;
+ ContiguousDest = DestB;
+ OtherDest = DestA;
+ } else
+ return false;
+
+ // Start building the compare and branch.
+
+ Constant *Offset = ConstantExpr::getNeg(ContiguousCases->back());
+ Constant *NumCases = ConstantInt::get(Offset->getType(), ContiguousCases->size());
Value *Sub = SI->getCondition();
if (!Offset->isNullValue())
- Sub = Builder.CreateAdd(Sub, Offset, Sub->getName()+".off");
+ Sub = Builder.CreateAdd(Sub, Offset, Sub->getName() + ".off");
+
Value *Cmp;
// If NumCases overflowed, then all possible values jump to the successor.
- if (NumCases->isNullValue() && SI->getNumCases() != 0)
+ if (NumCases->isNullValue() && !ContiguousCases->empty())
Cmp = ConstantInt::getTrue(SI->getContext());
else
Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
- BranchInst *NewBI = Builder.CreateCondBr(
- Cmp, SI->case_begin().getCaseSuccessor(), SI->getDefaultDest());
+ BranchInst *NewBI = Builder.CreateCondBr(Cmp, ContiguousDest, OtherDest);
// Update weight for the newly-created conditional branch.
- SmallVector<uint64_t, 8> Weights;
- bool HasWeights = HasBranchWeights(SI);
- if (HasWeights) {
+ if (HasBranchWeights(SI)) {
+ SmallVector<uint64_t, 8> Weights;
GetBranchWeights(SI, Weights);
if (Weights.size() == 1 + SI->getNumCases()) {
- // Combine all weights for the cases to be the true weight of NewBI.
- // We assume that the sum of all weights for a Terminator can fit into 32
- // bits.
- uint32_t NewTrueWeight = 0;
- for (unsigned I = 1, E = Weights.size(); I != E; ++I)
- NewTrueWeight += (uint32_t)Weights[I];
+ uint64_t TrueWeight = 0;
+ uint64_t FalseWeight = 0;
+ for (size_t I = 0, E = Weights.size(); I != E; ++I) {
+ if (SI->getSuccessor(I) == ContiguousDest)
+ TrueWeight += Weights[I];
+ else
+ FalseWeight += Weights[I];
+ }
+ while (TrueWeight > UINT32_MAX || FalseWeight > UINT32_MAX) {
+ TrueWeight /= 2;
+ FalseWeight /= 2;
+ }
NewBI->setMetadata(LLVMContext::MD_prof,
- MDBuilder(SI->getContext()).
- createBranchWeights(NewTrueWeight,
- (uint32_t)Weights[0]));
+ MDBuilder(SI->getContext()).createBranchWeights(
+ (uint32_t)TrueWeight, (uint32_t)FalseWeight));
}
}
- // Prune obsolete incoming values off the successor's PHI nodes.
- for (BasicBlock::iterator BBI = SI->case_begin().getCaseSuccessor()->begin();
- isa<PHINode>(BBI); ++BBI) {
- for (unsigned I = 0, E = SI->getNumCases()-1; I != E; ++I)
+ // Prune obsolete incoming values off the successors' PHI nodes.
+ for (auto BBI = ContiguousDest->begin(); isa<PHINode>(BBI); ++BBI) {
+ unsigned PreviousEdges = ContiguousCases->size();
+ if (ContiguousDest == SI->getDefaultDest()) ++PreviousEdges;
+ for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
+ cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
+ }
+ for (auto BBI = OtherDest->begin(); isa<PHINode>(BBI); ++BBI) {
+ unsigned PreviousEdges = SI->getNumCases() - ContiguousCases->size();
+ if (OtherDest == SI->getDefaultDest()) ++PreviousEdges;
+ for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
}
+
+ // Drop the switch.
SI->eraseFromParent();
return true;
@@ -3274,8 +3241,8 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) {
/// EliminateDeadSwitchCases - Compute masked bits for the condition of a switch
/// and use it to remove dead cases.
-static bool EliminateDeadSwitchCases(SwitchInst *SI, const DataLayout *DL,
- AssumptionCache *AC) {
+static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,
+ const DataLayout &DL) {
Value *Cond = SI->getCondition();
unsigned Bits = Cond->getType()->getIntegerBitWidth();
APInt KnownZero(Bits, 0), KnownOne(Bits, 0);
@@ -3426,9 +3393,8 @@ static Constant *LookupConstant(Value *V,
/// constant or can be replaced by constants from the ConstantPool. Returns the
/// resulting constant on success, 0 otherwise.
static Constant *
-ConstantFold(Instruction *I,
- const SmallDenseMap<Value *, Constant *> &ConstantPool,
- const DataLayout *DL) {
+ConstantFold(Instruction *I, const DataLayout &DL,
+ const SmallDenseMap<Value *, Constant *> &ConstantPool) {
if (SelectInst *Select = dyn_cast<SelectInst>(I)) {
Constant *A = LookupConstant(Select->getCondition(), ConstantPool);
if (!A)
@@ -3448,9 +3414,10 @@ ConstantFold(Instruction *I,
return nullptr;
}
- if (CmpInst *Cmp = dyn_cast<CmpInst>(I))
+ if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) {
return ConstantFoldCompareInstOperands(Cmp->getPredicate(), COps[0],
COps[1], DL);
+ }
return ConstantFoldInstOperands(I->getOpcode(), I->getType(), COps, DL);
}
@@ -3460,12 +3427,10 @@ ConstantFold(Instruction *I,
/// destionations CaseDest corresponding to value CaseVal (0 for the default
/// case), of a switch instruction SI.
static bool
-GetCaseResults(SwitchInst *SI,
- ConstantInt *CaseVal,
- BasicBlock *CaseDest,
+GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest,
BasicBlock **CommonDest,
- SmallVectorImpl<std::pair<PHINode *, Constant *> > &Res,
- const DataLayout *DL) {
+ SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res,
+ const DataLayout &DL) {
// The block from which we enter the common destination.
BasicBlock *Pred = SI->getParent();
@@ -3484,7 +3449,7 @@ GetCaseResults(SwitchInst *SI,
} else if (isa<DbgInfoIntrinsic>(I)) {
// Skip debug intrinsic.
continue;
- } else if (Constant *C = ConstantFold(I, ConstantPool, DL)) {
+ } else if (Constant *C = ConstantFold(I, DL, ConstantPool)) {
// Instruction is side-effect free and constant.
// If the instruction has uses outside this block or a phi node slot for
@@ -3555,11 +3520,11 @@ static void MapCaseToResult(ConstantInt *CaseVal,
// results for the PHI node of the common destination block for a switch
// instruction. Returns false if multiple PHI nodes have been found or if
// there is not a common destination block for the switch.
-static bool InitializeUniqueCases(
- SwitchInst *SI, const DataLayout *DL, PHINode *&PHI,
- BasicBlock *&CommonDest,
- SwitchCaseResultVectorTy &UniqueResults,
- Constant *&DefaultResult) {
+static bool InitializeUniqueCases(SwitchInst *SI, PHINode *&PHI,
+ BasicBlock *&CommonDest,
+ SwitchCaseResultVectorTy &UniqueResults,
+ Constant *&DefaultResult,
+ const DataLayout &DL) {
for (auto &I : SI->cases()) {
ConstantInt *CaseVal = I.getCaseValue();
@@ -3666,15 +3631,15 @@ static void RemoveSwitchAfterSelectConversion(SwitchInst *SI, PHINode *PHI,
/// phi nodes in a common successor block with only two different
/// constant values, replace the switch with select.
static bool SwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
- const DataLayout *DL, AssumptionCache *AC) {
+ AssumptionCache *AC, const DataLayout &DL) {
Value *const Cond = SI->getCondition();
PHINode *PHI = nullptr;
BasicBlock *CommonDest = nullptr;
Constant *DefaultResult;
SwitchCaseResultVectorTy UniqueResults;
// Collect all the cases that will deliver the same value from the switch.
- if (!InitializeUniqueCases(SI, DL, PHI, CommonDest, UniqueResults,
- DefaultResult))
+ if (!InitializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
+ DL))
return false;
// Selects choose between maximum two values.
if (UniqueResults.size() != 2)
@@ -3701,12 +3666,10 @@ namespace {
/// SwitchLookupTable - Create a lookup table to use as a switch replacement
/// with the contents of Values, using DefaultValue to fill any holes in the
/// table.
- SwitchLookupTable(Module &M,
- uint64_t TableSize,
- ConstantInt *Offset,
- const SmallVectorImpl<std::pair<ConstantInt*, Constant*> >& Values,
- Constant *DefaultValue,
- const DataLayout *DL);
+ SwitchLookupTable(
+ Module &M, uint64_t TableSize, ConstantInt *Offset,
+ const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
+ Constant *DefaultValue, const DataLayout &DL);
/// BuildLookup - Build instructions with Builder to retrieve the value at
/// the position given by Index in the lookup table.
@@ -3714,8 +3677,7 @@ namespace {
/// WouldFitInRegister - Return true if a table with TableSize elements of
/// type ElementType would fit in a target-legal register.
- static bool WouldFitInRegister(const DataLayout *DL,
- uint64_t TableSize,
+ static bool WouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
const Type *ElementType);
private:
@@ -3757,12 +3719,10 @@ namespace {
};
}
-SwitchLookupTable::SwitchLookupTable(Module &M,
- uint64_t TableSize,
- ConstantInt *Offset,
- const SmallVectorImpl<std::pair<ConstantInt*, Constant*> >& Values,
- Constant *DefaultValue,
- const DataLayout *DL)
+SwitchLookupTable::SwitchLookupTable(
+ Module &M, uint64_t TableSize, ConstantInt *Offset,
+ const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
+ Constant *DefaultValue, const DataLayout &DL)
: SingleValue(nullptr), BitMap(nullptr), BitMapElementTy(nullptr),
LinearOffset(nullptr), LinearMultiplier(nullptr), Array(nullptr) {
assert(Values.size() && "Can't build lookup table without values!");
@@ -3924,19 +3884,17 @@ Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) {
"switch.tableidx.zext");
Value *GEPIndices[] = { Builder.getInt32(0), Index };
- Value *GEP = Builder.CreateInBoundsGEP(Array, GEPIndices,
- "switch.gep");
+ Value *GEP = Builder.CreateInBoundsGEP(Array->getValueType(), Array,
+ GEPIndices, "switch.gep");
return Builder.CreateLoad(GEP, "switch.load");
}
}
llvm_unreachable("Unknown lookup table kind!");
}
-bool SwitchLookupTable::WouldFitInRegister(const DataLayout *DL,
+bool SwitchLookupTable::WouldFitInRegister(const DataLayout &DL,
uint64_t TableSize,
const Type *ElementType) {
- if (!DL)
- return false;
const IntegerType *IT = dyn_cast<IntegerType>(ElementType);
if (!IT)
return false;
@@ -3946,17 +3904,16 @@ bool SwitchLookupTable::WouldFitInRegister(const DataLayout *DL,
// Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
if (TableSize >= UINT_MAX/IT->getBitWidth())
return false;
- return DL->fitsInLegalInteger(TableSize * IT->getBitWidth());
+ return DL.fitsInLegalInteger(TableSize * IT->getBitWidth());
}
/// ShouldBuildLookupTable - Determine whether a lookup table should be built
/// for this switch, based on the number of cases, size of the table and the
/// types of the results.
-static bool ShouldBuildLookupTable(SwitchInst *SI,
- uint64_t TableSize,
- const TargetTransformInfo &TTI,
- const DataLayout *DL,
- const SmallDenseMap<PHINode*, Type*>& ResultTypes) {
+static bool
+ShouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize,
+ const TargetTransformInfo &TTI, const DataLayout &DL,
+ const SmallDenseMap<PHINode *, Type *> &ResultTypes) {
if (SI->getNumCases() > TableSize || TableSize >= UINT64_MAX / 10)
return false; // TableSize overflowed, or mul below might overflow.
@@ -4079,10 +4036,9 @@ static void reuseTableCompare(User *PhiUser, BasicBlock *PhiBlock,
/// SwitchToLookupTable - If the switch is only used to initialize one or more
/// phi nodes in a common successor block with different constant values,
/// replace the switch with lookup tables.
-static bool SwitchToLookupTable(SwitchInst *SI,
- IRBuilder<> &Builder,
- const TargetTransformInfo &TTI,
- const DataLayout* DL) {
+static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
+ const DataLayout &DL,
+ const TargetTransformInfo &TTI) {
assert(SI->getNumCases() > 1 && "Degenerate switch?");
// Only build lookup table when we have a target that supports it.
@@ -4153,14 +4109,14 @@ static bool SwitchToLookupTable(SwitchInst *SI,
// or a bitmask that fits in a register.
SmallVector<std::pair<PHINode*, Constant*>, 4> DefaultResultsList;
bool HasDefaultResults = GetCaseResults(SI, nullptr, SI->getDefaultDest(),
- &CommonDest, DefaultResultsList, DL);
+ &CommonDest, DefaultResultsList, DL);
bool NeedMask = (TableHasHoles && !HasDefaultResults);
if (NeedMask) {
// As an extra penalty for the validity test we require more cases.
if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark).
return false;
- if (!(DL && DL->fitsInLegalInteger(TableSize)))
+ if (!DL.fitsInLegalInteger(TableSize))
return false;
}
@@ -4193,19 +4149,18 @@ static bool SwitchToLookupTable(SwitchInst *SI,
"It is impossible for a switch to have more entries than the max "
"representable value of its input integer type's size.");
- // If we have a fully covered lookup table, unconditionally branch to the
- // lookup table BB. Otherwise, check if the condition value is within the case
- // range. If it is so, branch to the new BB. Otherwise branch to SI's default
- // destination.
+ // If the default destination is unreachable, or if the lookup table covers
+ // all values of the conditional variable, branch directly to the lookup table
+ // BB. Otherwise, check that the condition is within the case range.
+ const bool DefaultIsReachable =
+ !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
+ const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
BranchInst *RangeCheckBranch = nullptr;
- const bool GeneratingCoveredLookupTable = MaxTableSize == TableSize;
- if (GeneratingCoveredLookupTable) {
+ if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
Builder.CreateBr(LookupBB);
- // We cached PHINodes in PHIs, to avoid accessing deleted PHINodes later,
- // do not delete PHINodes here.
- SI->getDefaultDest()->removePredecessor(SI->getParent(),
- true/*DontDeleteUselessPHIs*/);
+ // Note: We call removeProdecessor later since we need to be able to get the
+ // PHI value for the default case in case we're using a bit mask.
} else {
Value *Cmp = Builder.CreateICmpULT(TableIndex, ConstantInt::get(
MinCaseVal->getType(), TableSize));
@@ -4257,6 +4212,13 @@ static bool SwitchToLookupTable(SwitchInst *SI,
AddPredecessorToBlock(SI->getDefaultDest(), MaskBB, SI->getParent());
}
+ if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
+ // We cached PHINodes in PHIs, to avoid accessing deleted PHINodes later,
+ // do not delete PHINodes here.
+ SI->getDefaultDest()->removePredecessor(SI->getParent(),
+ /*DontDeleteUselessPHIs=*/true);
+ }
+
bool ReturnedEarly = false;
for (size_t I = 0, E = PHIs.size(); I != E; ++I) {
PHINode *PHI = PHIs[I];
@@ -4317,12 +4279,12 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
// see if that predecessor totally determines the outcome of this switch.
if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
Value *Cond = SI->getCondition();
if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
if (SimplifySwitchOnSelect(SI, Select))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
// If the block only contains the switch, see if we can fold the block
// away into any preds.
@@ -4332,25 +4294,25 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
++BBI;
if (SI == &*BBI)
if (FoldValueComparisonIntoPredecessors(SI, Builder))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
}
// Try to transform the switch into an icmp and a branch.
if (TurnSwitchRangeIntoICmp(SI, Builder))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
// Remove unreachable cases.
- if (EliminateDeadSwitchCases(SI, DL, AC))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+ if (EliminateDeadSwitchCases(SI, AC, DL))
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
- if (SwitchToSelect(SI, Builder, DL, AC))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+ if (SwitchToSelect(SI, Builder, AC, DL))
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
if (ForwardSwitchConditionToPHI(SI))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
- if (SwitchToLookupTable(SI, Builder, TTI, DL))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+ if (SwitchToLookupTable(SI, Builder, DL, TTI))
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
return false;
}
@@ -4387,11 +4349,87 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) {
if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
if (SimplifyIndirectBrOnSelect(IBI, SI))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
}
return Changed;
}
+/// Given an block with only a single landing pad and a unconditional branch
+/// try to find another basic block which this one can be merged with. This
+/// handles cases where we have multiple invokes with unique landing pads, but
+/// a shared handler.
+///
+/// We specifically choose to not worry about merging non-empty blocks
+/// here. That is a PRE/scheduling problem and is best solved elsewhere. In
+/// practice, the optimizer produces empty landing pad blocks quite frequently
+/// when dealing with exception dense code. (see: instcombine, gvn, if-else
+/// sinking in this file)
+///
+/// This is primarily a code size optimization. We need to avoid performing
+/// any transform which might inhibit optimization (such as our ability to
+/// specialize a particular handler via tail commoning). We do this by not
+/// merging any blocks which require us to introduce a phi. Since the same
+/// values are flowing through both blocks, we don't loose any ability to
+/// specialize. If anything, we make such specialization more likely.
+///
+/// TODO - This transformation could remove entries from a phi in the target
+/// block when the inputs in the phi are the same for the two blocks being
+/// merged. In some cases, this could result in removal of the PHI entirely.
+static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI,
+ BasicBlock *BB) {
+ auto Succ = BB->getUniqueSuccessor();
+ assert(Succ);
+ // If there's a phi in the successor block, we'd likely have to introduce
+ // a phi into the merged landing pad block.
+ if (isa<PHINode>(*Succ->begin()))
+ return false;
+
+ for (BasicBlock *OtherPred : predecessors(Succ)) {
+ if (BB == OtherPred)
+ continue;
+ BasicBlock::iterator I = OtherPred->begin();
+ LandingPadInst *LPad2 = dyn_cast<LandingPadInst>(I);
+ if (!LPad2 || !LPad2->isIdenticalTo(LPad))
+ continue;
+ for (++I; isa<DbgInfoIntrinsic>(I); ++I) {}
+ BranchInst *BI2 = dyn_cast<BranchInst>(I);
+ if (!BI2 || !BI2->isIdenticalTo(BI))
+ continue;
+
+ // We've found an identical block. Update our predeccessors to take that
+ // path instead and make ourselves dead.
+ SmallSet<BasicBlock *, 16> Preds;
+ Preds.insert(pred_begin(BB), pred_end(BB));
+ for (BasicBlock *Pred : Preds) {
+ InvokeInst *II = cast<InvokeInst>(Pred->getTerminator());
+ assert(II->getNormalDest() != BB &&
+ II->getUnwindDest() == BB && "unexpected successor");
+ II->setUnwindDest(OtherPred);
+ }
+
+ // The debug info in OtherPred doesn't cover the merged control flow that
+ // used to go through BB. We need to delete it or update it.
+ for (auto I = OtherPred->begin(), E = OtherPred->end();
+ I != E;) {
+ Instruction &Inst = *I; I++;
+ if (isa<DbgInfoIntrinsic>(Inst))
+ Inst.eraseFromParent();
+ }
+
+ SmallSet<BasicBlock *, 16> Succs;
+ Succs.insert(succ_begin(BB), succ_end(BB));
+ for (BasicBlock *Succ : Succs) {
+ Succ->removePredecessor(BB);
+ }
+
+ IRBuilder<> Builder(BI);
+ Builder.CreateUnreachable();
+ BI->eraseFromParent();
+ return true;
+ }
+ return false;
+}
+
bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){
BasicBlock *BB = BI->getParent();
@@ -4411,17 +4449,26 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){
for (++I; isa<DbgInfoIntrinsic>(I); ++I)
;
if (I->isTerminator() &&
- TryToSimplifyUncondBranchWithICmpInIt(ICI, Builder, TTI,
- BonusInstThreshold, DL, AC))
+ TryToSimplifyUncondBranchWithICmpInIt(ICI, Builder, DL, TTI,
+ BonusInstThreshold, AC))
return true;
}
+ // See if we can merge an empty landing pad block with another which is
+ // equivalent.
+ if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(I)) {
+ for (++I; isa<DbgInfoIntrinsic>(I); ++I) {}
+ if (I->isTerminator() &&
+ TryToMergeLandingPad(LPad, BI, BB))
+ return true;
+ }
+
// If this basic block is ONLY a compare and a branch, and if a predecessor
// branches to us and our successor, fold the comparison into the
// predecessor and use logical operations to update the incoming value
// for PHI nodes in common successor.
- if (FoldBranchToCommonDest(BI, DL, BonusInstThreshold))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+ if (FoldBranchToCommonDest(BI, BonusInstThreshold))
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
return false;
}
@@ -4436,7 +4483,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
// switch.
if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
// This block must be empty, except for the setcond inst, if it exists.
// Ignore dbg intrinsics.
@@ -4446,26 +4493,26 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
++I;
if (&*I == BI) {
if (FoldValueComparisonIntoPredecessors(BI, Builder))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
} else if (&*I == cast<Instruction>(BI->getCondition())){
++I;
// Ignore dbg intrinsics.
while (isa<DbgInfoIntrinsic>(I))
++I;
if (&*I == BI && FoldValueComparisonIntoPredecessors(BI, Builder))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
}
}
// Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
- if (SimplifyBranchOnICmpChain(BI, DL, Builder))
+ if (SimplifyBranchOnICmpChain(BI, Builder, DL))
return true;
// If this basic block is ONLY a compare and a branch, and if a predecessor
// branches to us and one of our successors, fold the comparison into the
// predecessor and use logical operations to pick the right destination.
- if (FoldBranchToCommonDest(BI, DL, BonusInstThreshold))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+ if (FoldBranchToCommonDest(BI, BonusInstThreshold))
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
// We have a conditional branch to two blocks that are only reachable
// from BI. We know that the condbr dominates the two blocks, so see if
@@ -4473,16 +4520,16 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
// can hoist it up to the branching block.
if (BI->getSuccessor(0)->getSinglePredecessor()) {
if (BI->getSuccessor(1)->getSinglePredecessor()) {
- if (HoistThenElseCodeToIf(BI, DL))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+ if (HoistThenElseCodeToIf(BI, TTI))
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
} else {
// If Successor #1 has multiple preds, we may be able to conditionally
// execute Successor #0 if it branches to Successor #1.
TerminatorInst *Succ0TI = BI->getSuccessor(0)->getTerminator();
if (Succ0TI->getNumSuccessors() == 1 &&
Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
- if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0), DL))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+ if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0), TTI))
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
}
} else if (BI->getSuccessor(1)->getSinglePredecessor()) {
// If Successor #0 has multiple preds, we may be able to conditionally
@@ -4490,8 +4537,8 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
TerminatorInst *Succ1TI = BI->getSuccessor(1)->getTerminator();
if (Succ1TI->getNumSuccessors() == 1 &&
Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
- if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1), DL))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+ if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1), TTI))
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
}
// If this is a branch on a phi node in the current block, thread control
@@ -4499,14 +4546,14 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition()))
if (PN->getParent() == BI->getParent())
if (FoldCondBranchOnPHI(BI, DL))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
// Scan predecessor blocks for conditional branches.
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
if (PBI != BI && PBI->isConditional())
if (SimplifyCondBranchToCondBranch(PBI, BI))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
return false;
}
@@ -4618,7 +4665,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
// eliminate it, do so now.
if (PHINode *PN = dyn_cast<PHINode>(BB->begin()))
if (PN->getNumIncomingValues() == 2)
- Changed |= FoldTwoEntryPHINode(PN, DL);
+ Changed |= FoldTwoEntryPHINode(PN, TTI, DL);
Builder.SetInsertPoint(BB->getTerminator());
if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
@@ -4650,7 +4697,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
/// of the CFG. It returns true if a modification was made.
///
bool llvm::SimplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
- unsigned BonusInstThreshold, const DataLayout *DL,
- AssumptionCache *AC) {
- return SimplifyCFGOpt(TTI, BonusInstThreshold, DL, AC).run(BB);
+ unsigned BonusInstThreshold, AssumptionCache *AC) {
+ return SimplifyCFGOpt(TTI, BB->getModule()->getDataLayout(),
+ BonusInstThreshold, AC).run(BB);
}
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
index d54c09a..3757a80 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -17,7 +17,6 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/IVUsers.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
@@ -48,22 +47,15 @@ namespace {
Loop *L;
LoopInfo *LI;
ScalarEvolution *SE;
- const DataLayout *DL; // May be NULL
SmallVectorImpl<WeakVH> &DeadInsts;
bool Changed;
public:
- SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, LPPassManager *LPM,
- SmallVectorImpl<WeakVH> &Dead, IVUsers *IVU = nullptr) :
- L(Loop),
- LI(LPM->getAnalysisIfAvailable<LoopInfo>()),
- SE(SE),
- DeadInsts(Dead),
- Changed(false) {
- DataLayoutPass *DLP = LPM->getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
+ SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, LoopInfo *LI,
+ SmallVectorImpl<WeakVH> &Dead)
+ : L(Loop), LI(LI), SE(SE), DeadInsts(Dead), Changed(false) {
assert(LI && "IV simplification requires LoopInfo");
}
@@ -277,95 +269,57 @@ bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst,
bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO,
Value *IVOperand) {
- // Currently we only handle instructions of the form "add <indvar> <value>"
- unsigned Op = BO->getOpcode();
- if (Op != Instruction::Add)
+ // Fastpath: we don't have any work to do if `BO` is `nuw` and `nsw`.
+ if (BO->hasNoUnsignedWrap() && BO->hasNoSignedWrap())
return false;
- // If BO is already both nuw and nsw then there is nothing left to do
- if (BO->hasNoUnsignedWrap() && BO->hasNoSignedWrap())
+ const SCEV *(ScalarEvolution::*GetExprForBO)(const SCEV *, const SCEV *,
+ SCEV::NoWrapFlags);
+
+ switch (BO->getOpcode()) {
+ default:
return false;
- IntegerType *IT = cast<IntegerType>(IVOperand->getType());
- Value *OtherOperand = nullptr;
- if (BO->getOperand(0) == IVOperand) {
- OtherOperand = BO->getOperand(1);
- } else {
- assert(BO->getOperand(1) == IVOperand && "only other use!");
- OtherOperand = BO->getOperand(0);
+ case Instruction::Add:
+ GetExprForBO = &ScalarEvolution::getAddExpr;
+ break;
+
+ case Instruction::Sub:
+ GetExprForBO = &ScalarEvolution::getMinusSCEV;
+ break;
+
+ case Instruction::Mul:
+ GetExprForBO = &ScalarEvolution::getMulExpr;
+ break;
}
- bool Changed = false;
- const SCEV *OtherOpSCEV = SE->getSCEV(OtherOperand);
- if (OtherOpSCEV == SE->getCouldNotCompute())
- return false;
+ unsigned BitWidth = cast<IntegerType>(BO->getType())->getBitWidth();
+ Type *WideTy = IntegerType::get(BO->getContext(), BitWidth * 2);
+ const SCEV *LHS = SE->getSCEV(BO->getOperand(0));
+ const SCEV *RHS = SE->getSCEV(BO->getOperand(1));
- const SCEV *IVOpSCEV = SE->getSCEV(IVOperand);
- const SCEV *ZeroSCEV = SE->getConstant(IVOpSCEV->getType(), 0);
+ bool Changed = false;
- if (!BO->hasNoSignedWrap()) {
- // Upgrade the add to an "add nsw" if we can prove that it will never
- // sign-overflow or sign-underflow.
-
- const SCEV *SignedMax =
- SE->getConstant(APInt::getSignedMaxValue(IT->getBitWidth()));
- const SCEV *SignedMin =
- SE->getConstant(APInt::getSignedMinValue(IT->getBitWidth()));
-
- // The addition "IVOperand + OtherOp" does not sign-overflow if the result
- // is sign-representable in 2's complement in the given bit-width.
- //
- // If OtherOp is SLT 0, then for an IVOperand in [SignedMin - OtherOp,
- // SignedMax], "IVOperand + OtherOp" is in [SignedMin, SignedMax + OtherOp].
- // Everything in [SignedMin, SignedMax + OtherOp] is representable since
- // SignedMax + OtherOp is at least -1.
- //
- // If OtherOp is SGE 0, then for an IVOperand in [SignedMin, SignedMax -
- // OtherOp], "IVOperand + OtherOp" is in [SignedMin + OtherOp, SignedMax].
- // Everything in [SignedMin + OtherOp, SignedMax] is representable since
- // SignedMin + OtherOp is at most -1.
- //
- // It follows that for all values of IVOperand in [SignedMin - smin(0,
- // OtherOp), SignedMax - smax(0, OtherOp)] the result of the add is
- // representable (i.e. there is no sign-overflow).
-
- const SCEV *UpperDelta = SE->getSMaxExpr(ZeroSCEV, OtherOpSCEV);
- const SCEV *UpperLimit = SE->getMinusSCEV(SignedMax, UpperDelta);
-
- bool NeverSignedOverflows =
- SE->isKnownPredicate(ICmpInst::ICMP_SLE, IVOpSCEV, UpperLimit);
-
- if (NeverSignedOverflows) {
- const SCEV *LowerDelta = SE->getSMinExpr(ZeroSCEV, OtherOpSCEV);
- const SCEV *LowerLimit = SE->getMinusSCEV(SignedMin, LowerDelta);
-
- bool NeverSignedUnderflows =
- SE->isKnownPredicate(ICmpInst::ICMP_SGE, IVOpSCEV, LowerLimit);
- if (NeverSignedUnderflows) {
- BO->setHasNoSignedWrap(true);
- Changed = true;
- }
+ if (!BO->hasNoUnsignedWrap()) {
+ const SCEV *ExtendAfterOp = SE->getZeroExtendExpr(SE->getSCEV(BO), WideTy);
+ const SCEV *OpAfterExtend = (SE->*GetExprForBO)(
+ SE->getZeroExtendExpr(LHS, WideTy), SE->getZeroExtendExpr(RHS, WideTy),
+ SCEV::FlagAnyWrap);
+ if (ExtendAfterOp == OpAfterExtend) {
+ BO->setHasNoUnsignedWrap();
+ SE->forgetValue(BO);
+ Changed = true;
}
}
- if (!BO->hasNoUnsignedWrap()) {
- // Upgrade the add computing "IVOperand + OtherOp" to an "add nuw" if we can
- // prove that it will never unsigned-overflow (i.e. the result will always
- // be representable in the given bit-width).
- //
- // "IVOperand + OtherOp" is unsigned-representable in 2's complement iff it
- // does not produce a carry. "IVOperand + OtherOp" produces no carry iff
- // IVOperand ULE (UnsignedMax - OtherOp).
-
- const SCEV *UnsignedMax =
- SE->getConstant(APInt::getMaxValue(IT->getBitWidth()));
- const SCEV *UpperLimit = SE->getMinusSCEV(UnsignedMax, OtherOpSCEV);
-
- bool NeverUnsignedOverflows =
- SE->isKnownPredicate(ICmpInst::ICMP_ULE, IVOpSCEV, UpperLimit);
-
- if (NeverUnsignedOverflows) {
- BO->setHasNoUnsignedWrap(true);
+ if (!BO->hasNoSignedWrap()) {
+ const SCEV *ExtendAfterOp = SE->getSignExtendExpr(SE->getSCEV(BO), WideTy);
+ const SCEV *OpAfterExtend = (SE->*GetExprForBO)(
+ SE->getSignExtendExpr(LHS, WideTy), SE->getSignExtendExpr(RHS, WideTy),
+ SCEV::FlagAnyWrap);
+ if (ExtendAfterOp == OpAfterExtend) {
+ BO->setHasNoSignedWrap();
+ SE->forgetValue(BO);
Changed = true;
}
}
@@ -562,8 +516,8 @@ void IVVisitor::anchor() { }
bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, LPPassManager *LPM,
SmallVectorImpl<WeakVH> &Dead, IVVisitor *V)
{
- LoopInfo *LI = &LPM->getAnalysis<LoopInfo>();
- SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, LPM, Dead);
+ LoopInfo *LI = &LPM->getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, LI, Dead);
SIV.simplifyUsers(CurrIV, V);
return SIV.hasChanged();
}
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp
index cc97098..c499c87 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp
@@ -25,7 +25,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/Type.h"
#include "llvm/Pass.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -43,7 +43,7 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<TargetLibraryInfo>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
}
/// runOnFunction - Remove instructions that simplify.
@@ -51,9 +51,9 @@ namespace {
const DominatorTreeWrapperPass *DTWP =
getAnalysisIfAvailable<DominatorTreeWrapperPass>();
const DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
- const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ const TargetLibraryInfo *TLI =
+ &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
AssumptionCache *AC =
&getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2;
@@ -106,7 +106,7 @@ char InstSimplifier::ID = 0;
INITIALIZE_PASS_BEGIN(InstSimplifier, "instsimplify",
"Remove redundant instructions", false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(InstSimplifier, "instsimplify",
"Remove redundant instructions", false, false)
char &llvm::InstructionSimplifierID = InstSimplifier::ID;
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 5a0d52e..6bbf828 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -30,7 +30,7 @@
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
using namespace llvm;
@@ -120,12 +120,12 @@ static bool hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty,
/// string/memory copying library function \p Func.
/// Acceptable functions are st[rp][n]?cpy, memove, memcpy, and memset.
/// Their fortified (_chk) counterparts are also accepted.
-static bool checkStringCopyLibFuncSignature(Function *F, LibFunc::Func Func,
- const DataLayout *DL) {
+static bool checkStringCopyLibFuncSignature(Function *F, LibFunc::Func Func) {
+ const DataLayout &DL = F->getParent()->getDataLayout();
FunctionType *FT = F->getFunctionType();
LLVMContext &Context = F->getContext();
Type *PCharTy = Type::getInt8PtrTy(Context);
- Type *SizeTTy = DL ? DL->getIntPtrType(Context) : nullptr;
+ Type *SizeTTy = DL.getIntPtrType(Context);
unsigned NumParams = FT->getNumParams();
// All string libfuncs return the same type as the first parameter.
@@ -208,10 +208,6 @@ Value *LibCallSimplifier::optimizeStrCat(CallInst *CI, IRBuilder<> &B) {
if (Len == 0)
return Dst;
- // These optimizations require DataLayout.
- if (!DL)
- return nullptr;
-
return emitStrLenMemCpy(Src, Dst, Len, B);
}
@@ -226,13 +222,13 @@ Value *LibCallSimplifier::emitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len,
// Now that we have the destination's length, we must index into the
// destination's pointer to get the actual memcpy destination (end of
// the string .. we're concatenating).
- Value *CpyDst = B.CreateGEP(Dst, DstLen, "endptr");
+ Value *CpyDst = B.CreateGEP(B.getInt8Ty(), Dst, DstLen, "endptr");
// We have enough information to now generate the memcpy call to do the
// concatenation for us. Make a memcpy to copy the nul byte with align = 1.
- B.CreateMemCpy(
- CpyDst, Src,
- ConstantInt::get(DL->getIntPtrType(Src->getContext()), Len + 1), 1);
+ B.CreateMemCpy(CpyDst, Src,
+ ConstantInt::get(DL.getIntPtrType(Src->getContext()), Len + 1),
+ 1);
return Dst;
}
@@ -269,10 +265,6 @@ Value *LibCallSimplifier::optimizeStrNCat(CallInst *CI, IRBuilder<> &B) {
if (SrcLen == 0 || Len == 0)
return Dst;
- // These optimizations require DataLayout.
- if (!DL)
- return nullptr;
-
// We don't optimize this case
if (Len < SrcLen)
return nullptr;
@@ -297,25 +289,21 @@ Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilder<> &B) {
// of the input string and turn this into memchr.
ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
if (!CharC) {
- // These optimizations require DataLayout.
- if (!DL)
- return nullptr;
-
uint64_t Len = GetStringLength(SrcStr);
if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32)) // memchr needs i32.
return nullptr;
- return EmitMemChr(
- SrcStr, CI->getArgOperand(1), // include nul.
- ConstantInt::get(DL->getIntPtrType(CI->getContext()), Len), B, DL, TLI);
+ return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul.
+ ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len),
+ B, DL, TLI);
}
// Otherwise, the character is a constant, see if the first argument is
// a string literal. If so, we can constant fold.
StringRef Str;
if (!getConstantStringInfo(SrcStr, Str)) {
- if (DL && CharC->isZero()) // strchr(p, 0) -> p + strlen(p)
- return B.CreateGEP(SrcStr, EmitStrLen(SrcStr, B, DL, TLI), "strchr");
+ if (CharC->isZero()) // strchr(p, 0) -> p + strlen(p)
+ return B.CreateGEP(B.getInt8Ty(), SrcStr, EmitStrLen(SrcStr, B, DL, TLI), "strchr");
return nullptr;
}
@@ -328,7 +316,7 @@ Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilder<> &B) {
return Constant::getNullValue(CI->getType());
// strchr(s+n,c) -> gep(s+n+i,c)
- return B.CreateGEP(SrcStr, B.getInt64(I), "strchr");
+ return B.CreateGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "strchr");
}
Value *LibCallSimplifier::optimizeStrRChr(CallInst *CI, IRBuilder<> &B) {
@@ -350,8 +338,8 @@ Value *LibCallSimplifier::optimizeStrRChr(CallInst *CI, IRBuilder<> &B) {
StringRef Str;
if (!getConstantStringInfo(SrcStr, Str)) {
// strrchr(s, 0) -> strchr(s, 0)
- if (DL && CharC->isZero())
- return EmitStrChr(SrcStr, '\0', B, DL, TLI);
+ if (CharC->isZero())
+ return EmitStrChr(SrcStr, '\0', B, TLI);
return nullptr;
}
@@ -363,7 +351,7 @@ Value *LibCallSimplifier::optimizeStrRChr(CallInst *CI, IRBuilder<> &B) {
return Constant::getNullValue(CI->getType());
// strrchr(s+n,c) -> gep(s+n+i,c)
- return B.CreateGEP(SrcStr, B.getInt64(I), "strrchr");
+ return B.CreateGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "strrchr");
}
Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilder<> &B) {
@@ -398,12 +386,8 @@ Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilder<> &B) {
uint64_t Len1 = GetStringLength(Str1P);
uint64_t Len2 = GetStringLength(Str2P);
if (Len1 && Len2) {
- // These optimizations require DataLayout.
- if (!DL)
- return nullptr;
-
return EmitMemCmp(Str1P, Str2P,
- ConstantInt::get(DL->getIntPtrType(CI->getContext()),
+ ConstantInt::get(DL.getIntPtrType(CI->getContext()),
std::min(Len1, Len2)),
B, DL, TLI);
}
@@ -435,7 +419,7 @@ Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilder<> &B) {
if (Length == 0) // strncmp(x,y,0) -> 0
return ConstantInt::get(CI->getType(), 0);
- if (DL && Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1)
+ if (Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1)
return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, DL, TLI);
StringRef Str1, Str2;
@@ -462,17 +446,13 @@ Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilder<> &B) {
Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
- if (!checkStringCopyLibFuncSignature(Callee, LibFunc::strcpy, DL))
+ if (!checkStringCopyLibFuncSignature(Callee, LibFunc::strcpy))
return nullptr;
Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
if (Dst == Src) // strcpy(x,x) -> x
return Src;
- // These optimizations require DataLayout.
- if (!DL)
- return nullptr;
-
// See if we can get the length of the input string.
uint64_t Len = GetStringLength(Src);
if (Len == 0)
@@ -481,7 +461,7 @@ Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilder<> &B) {
// We have enough information to now generate the memcpy call to do the
// copy for us. Make a memcpy to copy the nul byte with align = 1.
B.CreateMemCpy(Dst, Src,
- ConstantInt::get(DL->getIntPtrType(CI->getContext()), Len), 1);
+ ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len), 1);
return Dst;
}
@@ -490,17 +470,13 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) {
// Verify the "stpcpy" function prototype.
FunctionType *FT = Callee->getFunctionType();
- if (!checkStringCopyLibFuncSignature(Callee, LibFunc::stpcpy, DL))
- return nullptr;
-
- // These optimizations require DataLayout.
- if (!DL)
+ if (!checkStringCopyLibFuncSignature(Callee, LibFunc::stpcpy))
return nullptr;
Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x)
Value *StrLen = EmitStrLen(Src, B, DL, TLI);
- return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : nullptr;
+ return StrLen ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, StrLen) : nullptr;
}
// See if we can get the length of the input string.
@@ -509,9 +485,9 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) {
return nullptr;
Type *PT = FT->getParamType(0);
- Value *LenV = ConstantInt::get(DL->getIntPtrType(PT), Len);
+ Value *LenV = ConstantInt::get(DL.getIntPtrType(PT), Len);
Value *DstEnd =
- B.CreateGEP(Dst, ConstantInt::get(DL->getIntPtrType(PT), Len - 1));
+ B.CreateGEP(B.getInt8Ty(), Dst, ConstantInt::get(DL.getIntPtrType(PT), Len - 1));
// We have enough information to now generate the memcpy call to do the
// copy for us. Make a memcpy to copy the nul byte with align = 1.
@@ -523,7 +499,7 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
FunctionType *FT = Callee->getFunctionType();
- if (!checkStringCopyLibFuncSignature(Callee, LibFunc::strncpy, DL))
+ if (!checkStringCopyLibFuncSignature(Callee, LibFunc::strncpy))
return nullptr;
Value *Dst = CI->getArgOperand(0);
@@ -551,17 +527,13 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) {
if (Len == 0)
return Dst; // strncpy(x, y, 0) -> x
- // These optimizations require DataLayout.
- if (!DL)
- return nullptr;
-
// Let strncpy handle the zero padding
if (Len > SrcLen + 1)
return nullptr;
Type *PT = FT->getParamType(0);
// strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant]
- B.CreateMemCpy(Dst, Src, ConstantInt::get(DL->getIntPtrType(PT), Len), 1);
+ B.CreateMemCpy(Dst, Src, ConstantInt::get(DL.getIntPtrType(PT), Len), 1);
return Dst;
}
@@ -625,12 +597,12 @@ Value *LibCallSimplifier::optimizeStrPBrk(CallInst *CI, IRBuilder<> &B) {
if (I == StringRef::npos) // No match.
return Constant::getNullValue(CI->getType());
- return B.CreateGEP(CI->getArgOperand(0), B.getInt64(I), "strpbrk");
+ return B.CreateGEP(B.getInt8Ty(), CI->getArgOperand(0), B.getInt64(I), "strpbrk");
}
// strpbrk(s, "a") -> strchr(s, 'a')
- if (DL && HasS2 && S2.size() == 1)
- return EmitStrChr(CI->getArgOperand(0), S2[0], B, DL, TLI);
+ if (HasS2 && S2.size() == 1)
+ return EmitStrChr(CI->getArgOperand(0), S2[0], B, TLI);
return nullptr;
}
@@ -706,7 +678,7 @@ Value *LibCallSimplifier::optimizeStrCSpn(CallInst *CI, IRBuilder<> &B) {
}
// strcspn(s, "") -> strlen(s)
- if (DL && HasS2 && S2.empty())
+ if (HasS2 && S2.empty())
return EmitStrLen(CI->getArgOperand(0), B, DL, TLI);
return nullptr;
@@ -725,7 +697,7 @@ Value *LibCallSimplifier::optimizeStrStr(CallInst *CI, IRBuilder<> &B) {
return B.CreateBitCast(CI->getArgOperand(0), CI->getType());
// fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0
- if (DL && isOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) {
+ if (isOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) {
Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, DL, TLI);
if (!StrLen)
return nullptr;
@@ -767,12 +739,98 @@ Value *LibCallSimplifier::optimizeStrStr(CallInst *CI, IRBuilder<> &B) {
// fold strstr(x, "y") -> strchr(x, 'y').
if (HasStr2 && ToFindStr.size() == 1) {
- Value *StrChr = EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, DL, TLI);
+ Value *StrChr = EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TLI);
return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : nullptr;
}
return nullptr;
}
+Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 3 || !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isIntegerTy(32) ||
+ !FT->getParamType(2)->isIntegerTy() ||
+ !FT->getReturnType()->isPointerTy())
+ return nullptr;
+
+ Value *SrcStr = CI->getArgOperand(0);
+ ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+ ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
+
+ // memchr(x, y, 0) -> null
+ if (LenC && LenC->isNullValue())
+ return Constant::getNullValue(CI->getType());
+
+ // From now on we need at least constant length and string.
+ StringRef Str;
+ if (!LenC || !getConstantStringInfo(SrcStr, Str, 0, /*TrimAtNul=*/false))
+ return nullptr;
+
+ // Truncate the string to LenC. If Str is smaller than LenC we will still only
+ // scan the string, as reading past the end of it is undefined and we can just
+ // return null if we don't find the char.
+ Str = Str.substr(0, LenC->getZExtValue());
+
+ // If the char is variable but the input str and length are not we can turn
+ // this memchr call into a simple bit field test. Of course this only works
+ // when the return value is only checked against null.
+ //
+ // It would be really nice to reuse switch lowering here but we can't change
+ // the CFG at this point.
+ //
+ // memchr("\r\n", C, 2) != nullptr -> (C & ((1 << '\r') | (1 << '\n'))) != 0
+ // after bounds check.
+ if (!CharC && !Str.empty() && isOnlyUsedInZeroEqualityComparison(CI)) {
+ unsigned char Max =
+ *std::max_element(reinterpret_cast<const unsigned char *>(Str.begin()),
+ reinterpret_cast<const unsigned char *>(Str.end()));
+
+ // Make sure the bit field we're about to create fits in a register on the
+ // target.
+ // FIXME: On a 64 bit architecture this prevents us from using the
+ // interesting range of alpha ascii chars. We could do better by emitting
+ // two bitfields or shifting the range by 64 if no lower chars are used.
+ if (!DL.fitsInLegalInteger(Max + 1))
+ return nullptr;
+
+ // For the bit field use a power-of-2 type with at least 8 bits to avoid
+ // creating unnecessary illegal types.
+ unsigned char Width = NextPowerOf2(std::max((unsigned char)7, Max));
+
+ // Now build the bit field.
+ APInt Bitfield(Width, 0);
+ for (char C : Str)
+ Bitfield.setBit((unsigned char)C);
+ Value *BitfieldC = B.getInt(Bitfield);
+
+ // First check that the bit field access is within bounds.
+ Value *C = B.CreateZExtOrTrunc(CI->getArgOperand(1), BitfieldC->getType());
+ Value *Bounds = B.CreateICmp(ICmpInst::ICMP_ULT, C, B.getIntN(Width, Width),
+ "memchr.bounds");
+
+ // Create code that checks if the given bit is set in the field.
+ Value *Shl = B.CreateShl(B.getIntN(Width, 1ULL), C);
+ Value *Bits = B.CreateIsNotNull(B.CreateAnd(Shl, BitfieldC), "memchr.bits");
+
+ // Finally merge both checks and cast to pointer type. The inttoptr
+ // implicitly zexts the i1 to intptr type.
+ return B.CreateIntToPtr(B.CreateAnd(Bounds, Bits, "memchr"), CI->getType());
+ }
+
+ // Check if all arguments are constants. If so, we can constant fold.
+ if (!CharC)
+ return nullptr;
+
+ // Compute the offset.
+ size_t I = Str.find(CharC->getSExtValue() & 0xFF);
+ if (I == StringRef::npos) // Didn't find the char. memchr returns null.
+ return Constant::getNullValue(CI->getType());
+
+ // memchr(s+n,c,l) -> gep(s+n+i,c)
+ return B.CreateGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "memchr");
+}
+
Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
FunctionType *FT = Callee->getFunctionType();
@@ -827,11 +885,8 @@ Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) {
Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
- // These optimizations require DataLayout.
- if (!DL)
- return nullptr;
- if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memcpy, DL))
+ if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memcpy))
return nullptr;
// memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1)
@@ -842,11 +897,8 @@ Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilder<> &B) {
Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
- // These optimizations require DataLayout.
- if (!DL)
- return nullptr;
- if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memmove, DL))
+ if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memmove))
return nullptr;
// memmove(x, y, n) -> llvm.memmove(x, y, n, 1)
@@ -857,11 +909,8 @@ Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilder<> &B) {
Value *LibCallSimplifier::optimizeMemSet(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
- // These optimizations require DataLayout.
- if (!DL)
- return nullptr;
- if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memset, DL))
+ if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memset))
return nullptr;
// memset(p, v, n) -> llvm.memset(p, v, n, 1)
@@ -924,7 +973,7 @@ Value *LibCallSimplifier::optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B,
// floor((double)floatval) -> (double)floorf(floatval)
if (Callee->isIntrinsic()) {
Module *M = CI->getParent()->getParent()->getParent();
- Intrinsic::ID IID = (Intrinsic::ID) Callee->getIntrinsicID();
+ Intrinsic::ID IID = Callee->getIntrinsicID();
Function *F = Intrinsic::getDeclaration(M, IID, B.getFloatTy());
V = B.CreateCall(F, V);
} else {
@@ -1101,7 +1150,7 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) {
Value *Callee =
M->getOrInsertFunction(TLI->getName(LdExp), Op->getType(),
Op->getType(), B.getInt32Ty(), nullptr);
- CallInst *CI = B.CreateCall2(Callee, One, LdExpArg);
+ CallInst *CI = B.CreateCall(Callee, {One, LdExpArg});
if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
@@ -1387,7 +1436,7 @@ Value *LibCallSimplifier::optimizeFFS(CallInst *CI, IRBuilder<> &B) {
Type *ArgType = Op->getType();
Value *F =
Intrinsic::getDeclaration(Callee->getParent(), Intrinsic::cttz, ArgType);
- Value *V = B.CreateCall2(F, Op, B.getFalse(), "cttz");
+ Value *V = B.CreateCall(F, {Op, B.getFalse()}, "cttz");
V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1));
V = B.CreateIntCast(V, B.getInt32Ty(), false);
@@ -1521,7 +1570,7 @@ Value *LibCallSimplifier::optimizePrintFString(CallInst *CI, IRBuilder<> &B) {
// printf("x") -> putchar('x'), even for '%'.
if (FormatStr.size() == 1) {
- Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, DL, TLI);
+ Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, TLI);
if (CI->use_empty() || !Res)
return Res;
return B.CreateIntCast(Res, CI->getType(), true);
@@ -1534,7 +1583,7 @@ Value *LibCallSimplifier::optimizePrintFString(CallInst *CI, IRBuilder<> &B) {
// pass to be run after this pass, to merge duplicate strings.
FormatStr = FormatStr.drop_back();
Value *GV = B.CreateGlobalString(FormatStr, "str");
- Value *NewCI = EmitPutS(GV, B, DL, TLI);
+ Value *NewCI = EmitPutS(GV, B, TLI);
return (CI->use_empty() || !NewCI)
? NewCI
: ConstantInt::get(CI->getType(), FormatStr.size() + 1);
@@ -1544,7 +1593,7 @@ Value *LibCallSimplifier::optimizePrintFString(CallInst *CI, IRBuilder<> &B) {
// printf("%c", chr) --> putchar(chr)
if (FormatStr == "%c" && CI->getNumArgOperands() > 1 &&
CI->getArgOperand(1)->getType()->isIntegerTy()) {
- Value *Res = EmitPutChar(CI->getArgOperand(1), B, DL, TLI);
+ Value *Res = EmitPutChar(CI->getArgOperand(1), B, TLI);
if (CI->use_empty() || !Res)
return Res;
@@ -1554,7 +1603,7 @@ Value *LibCallSimplifier::optimizePrintFString(CallInst *CI, IRBuilder<> &B) {
// printf("%s\n", str) --> puts(str)
if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 &&
CI->getArgOperand(1)->getType()->isPointerTy()) {
- return EmitPutS(CI->getArgOperand(1), B, DL, TLI);
+ return EmitPutS(CI->getArgOperand(1), B, TLI);
}
return nullptr;
}
@@ -1600,16 +1649,11 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, IRBuilder<> &B) {
if (FormatStr[i] == '%')
return nullptr; // we found a format specifier, bail out.
- // These optimizations require DataLayout.
- if (!DL)
- return nullptr;
-
// sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1)
- B.CreateMemCpy(
- CI->getArgOperand(0), CI->getArgOperand(1),
- ConstantInt::get(DL->getIntPtrType(CI->getContext()),
- FormatStr.size() + 1),
- 1); // Copy the null byte.
+ B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ ConstantInt::get(DL.getIntPtrType(CI->getContext()),
+ FormatStr.size() + 1),
+ 1); // Copy the null byte.
return ConstantInt::get(CI->getType(), FormatStr.size());
}
@@ -1627,17 +1671,13 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, IRBuilder<> &B) {
Value *V = B.CreateTrunc(CI->getArgOperand(2), B.getInt8Ty(), "char");
Value *Ptr = CastToCStr(CI->getArgOperand(0), B);
B.CreateStore(V, Ptr);
- Ptr = B.CreateGEP(Ptr, B.getInt32(1), "nul");
+ Ptr = B.CreateGEP(B.getInt8Ty(), Ptr, B.getInt32(1), "nul");
B.CreateStore(B.getInt8(0), Ptr);
return ConstantInt::get(CI->getType(), 1);
}
if (FormatStr[1] == 's') {
- // These optimizations require DataLayout.
- if (!DL)
- return nullptr;
-
// sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1)
if (!CI->getArgOperand(2)->getType()->isPointerTy())
return nullptr;
@@ -1702,13 +1742,9 @@ Value *LibCallSimplifier::optimizeFPrintFString(CallInst *CI, IRBuilder<> &B) {
if (FormatStr[i] == '%') // Could handle %% -> % if we cared.
return nullptr; // We found a format specifier.
- // These optimizations require DataLayout.
- if (!DL)
- return nullptr;
-
return EmitFWrite(
CI->getArgOperand(1),
- ConstantInt::get(DL->getIntPtrType(CI->getContext()), FormatStr.size()),
+ ConstantInt::get(DL.getIntPtrType(CI->getContext()), FormatStr.size()),
CI->getArgOperand(0), B, DL, TLI);
}
@@ -1723,14 +1759,14 @@ Value *LibCallSimplifier::optimizeFPrintFString(CallInst *CI, IRBuilder<> &B) {
// fprintf(F, "%c", chr) --> fputc(chr, F)
if (!CI->getArgOperand(2)->getType()->isIntegerTy())
return nullptr;
- return EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, DL, TLI);
+ return EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TLI);
}
if (FormatStr[1] == 's') {
// fprintf(F, "%s", str) --> fputs(str, F)
if (!CI->getArgOperand(2)->getType()->isPointerTy())
return nullptr;
- return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, DL, TLI);
+ return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TLI);
}
return nullptr;
}
@@ -1790,7 +1826,7 @@ Value *LibCallSimplifier::optimizeFWrite(CallInst *CI, IRBuilder<> &B) {
// This optimisation is only valid, if the return value is unused.
if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F)
Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char");
- Value *NewCI = EmitFPutC(Char, CI->getArgOperand(3), B, DL, TLI);
+ Value *NewCI = EmitFPutC(Char, CI->getArgOperand(3), B, TLI);
return NewCI ? ConstantInt::get(CI->getType(), 1) : nullptr;
}
@@ -1802,10 +1838,6 @@ Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
- // These optimizations require DataLayout.
- if (!DL)
- return nullptr;
-
// Require two pointers. Also, we can't optimize if return value is used.
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
@@ -1820,7 +1852,7 @@ Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilder<> &B) {
// Known to have no uses (see above).
return EmitFWrite(
CI->getArgOperand(0),
- ConstantInt::get(DL->getIntPtrType(CI->getContext()), Len - 1),
+ ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len - 1),
CI->getArgOperand(1), B, DL, TLI);
}
@@ -1839,7 +1871,7 @@ Value *LibCallSimplifier::optimizePuts(CallInst *CI, IRBuilder<> &B) {
if (Str.empty() && CI->use_empty()) {
// puts("") -> putchar('\n')
- Value *Res = EmitPutChar(B.getInt32('\n'), B, DL, TLI);
+ Value *Res = EmitPutChar(B.getInt32('\n'), B, TLI);
if (CI->use_empty() || !Res)
return Res;
return B.CreateIntCast(Res, CI->getType(), true);
@@ -1906,6 +1938,8 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI,
return optimizeStrCSpn(CI, Builder);
case LibFunc::strstr:
return optimizeStrStr(CI, Builder);
+ case LibFunc::memchr:
+ return optimizeMemChr(CI, Builder);
case LibFunc::memcmp:
return optimizeMemCmp(CI, Builder);
case LibFunc::memcpy:
@@ -2088,15 +2122,19 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
return nullptr;
}
-LibCallSimplifier::LibCallSimplifier(const DataLayout *DL,
- const TargetLibraryInfo *TLI) :
- FortifiedSimplifier(DL, TLI),
- DL(DL),
- TLI(TLI),
- UnsafeFPShrink(false) {
+LibCallSimplifier::LibCallSimplifier(
+ const DataLayout &DL, const TargetLibraryInfo *TLI,
+ function_ref<void(Instruction *, Value *)> Replacer)
+ : FortifiedSimplifier(TLI), DL(DL), TLI(TLI), UnsafeFPShrink(false),
+ Replacer(Replacer) {}
+
+void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) {
+ // Indirect through the replacer used in this instance.
+ Replacer(I, With);
}
-void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) const {
+/*static*/ void LibCallSimplifier::replaceAllUsesWithDefault(Instruction *I,
+ Value *With) {
I->replaceAllUsesWith(With);
I->eraseFromParent();
}
@@ -2183,7 +2221,7 @@ bool FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI,
Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
- if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memcpy_chk, DL))
+ if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memcpy_chk))
return nullptr;
if (isFortifiedCallFoldable(CI, 3, 2, false)) {
@@ -2197,7 +2235,7 @@ Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI, IRBuilder<> &
Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
- if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memmove_chk, DL))
+ if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memmove_chk))
return nullptr;
if (isFortifiedCallFoldable(CI, 3, 2, false)) {
@@ -2211,7 +2249,7 @@ Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI, IRBuilder<>
Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
- if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memset_chk, DL))
+ if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memset_chk))
return nullptr;
if (isFortifiedCallFoldable(CI, 3, 2, false)) {
@@ -2227,8 +2265,9 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
LibFunc::Func Func) {
Function *Callee = CI->getCalledFunction();
StringRef Name = Callee->getName();
+ const DataLayout &DL = CI->getModule()->getDataLayout();
- if (!checkStringCopyLibFuncSignature(Callee, Func, DL))
+ if (!checkStringCopyLibFuncSignature(Callee, Func))
return nullptr;
Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1),
@@ -2237,7 +2276,7 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
// __stpcpy_chk(x,x,...) -> x+strlen(x)
if (Func == LibFunc::stpcpy_chk && !OnlyLowerUnknownSize && Dst == Src) {
Value *StrLen = EmitStrLen(Src, B, DL, TLI);
- return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : nullptr;
+ return StrLen ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, StrLen) : nullptr;
}
// If a) we don't have any length information, or b) we know this will
@@ -2245,29 +2284,25 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
// st[rp]cpy_chk call which may fail at runtime if the size is too long.
// TODO: It might be nice to get a maximum length out of the possible
// string lengths for varying.
- if (isFortifiedCallFoldable(CI, 2, 1, true)) {
- Value *Ret = EmitStrCpy(Dst, Src, B, DL, TLI, Name.substr(2, 6));
- return Ret;
- } else if (!OnlyLowerUnknownSize) {
- // Maybe we can stil fold __st[rp]cpy_chk to __memcpy_chk.
- uint64_t Len = GetStringLength(Src);
- if (Len == 0)
- return nullptr;
+ if (isFortifiedCallFoldable(CI, 2, 1, true))
+ return EmitStrCpy(Dst, Src, B, TLI, Name.substr(2, 6));
- // This optimization requires DataLayout.
- if (!DL)
- return nullptr;
+ if (OnlyLowerUnknownSize)
+ return nullptr;
- Type *SizeTTy = DL->getIntPtrType(CI->getContext());
- Value *LenV = ConstantInt::get(SizeTTy, Len);
- Value *Ret = EmitMemCpyChk(Dst, Src, LenV, ObjSize, B, DL, TLI);
- // If the function was an __stpcpy_chk, and we were able to fold it into
- // a __memcpy_chk, we still need to return the correct end pointer.
- if (Ret && Func == LibFunc::stpcpy_chk)
- return B.CreateGEP(Dst, ConstantInt::get(SizeTTy, Len - 1));
- return Ret;
- }
- return nullptr;
+ // Maybe we can stil fold __st[rp]cpy_chk to __memcpy_chk.
+ uint64_t Len = GetStringLength(Src);
+ if (Len == 0)
+ return nullptr;
+
+ Type *SizeTTy = DL.getIntPtrType(CI->getContext());
+ Value *LenV = ConstantInt::get(SizeTTy, Len);
+ Value *Ret = EmitMemCpyChk(Dst, Src, LenV, ObjSize, B, DL, TLI);
+ // If the function was an __stpcpy_chk, and we were able to fold it into
+ // a __memcpy_chk, we still need to return the correct end pointer.
+ if (Ret && Func == LibFunc::stpcpy_chk)
+ return B.CreateGEP(B.getInt8Ty(), Dst, ConstantInt::get(SizeTTy, Len - 1));
+ return Ret;
}
Value *FortifiedLibCallSimplifier::optimizeStrpNCpyChk(CallInst *CI,
@@ -2276,20 +2311,29 @@ Value *FortifiedLibCallSimplifier::optimizeStrpNCpyChk(CallInst *CI,
Function *Callee = CI->getCalledFunction();
StringRef Name = Callee->getName();
- if (!checkStringCopyLibFuncSignature(Callee, Func, DL))
+ if (!checkStringCopyLibFuncSignature(Callee, Func))
return nullptr;
if (isFortifiedCallFoldable(CI, 3, 2, false)) {
- Value *Ret =
- EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), B, DL, TLI, Name.substr(2, 7));
+ Value *Ret = EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), B, TLI, Name.substr(2, 7));
return Ret;
}
return nullptr;
}
Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) {
- if (CI->isNoBuiltin())
- return nullptr;
+ // FIXME: We shouldn't be changing "nobuiltin" or TLI unavailable calls here.
+ // Some clang users checked for _chk libcall availability using:
+ // __has_builtin(__builtin___memcpy_chk)
+ // When compiling with -fno-builtin, this is always true.
+ // When passing -ffreestanding/-mkernel, which both imply -fno-builtin, we
+ // end up with fortified libcalls, which isn't acceptable in a freestanding
+ // environment which only provides their non-fortified counterparts.
+ //
+ // Until we change clang and/or teach external users to check for availability
+ // differently, disregard the "nobuiltin" attribute and TLI::has.
+ //
+ // PR23093.
LibFunc::Func Func;
Function *Callee = CI->getCalledFunction();
@@ -2298,7 +2342,7 @@ Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) {
bool isCallingConvC = CI->getCallingConv() == llvm::CallingConv::C;
// First, check that this is a known library functions.
- if (!TLI->getLibFunc(FuncName, Func) || !TLI->has(Func))
+ if (!TLI->getLibFunc(FuncName, Func))
return nullptr;
// We never change the calling convention.
@@ -2324,8 +2368,6 @@ Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) {
return nullptr;
}
-FortifiedLibCallSimplifier::
-FortifiedLibCallSimplifier(const DataLayout *DL, const TargetLibraryInfo *TLI,
- bool OnlyLowerUnknownSize)
- : DL(DL), TLI(TLI), OnlyLowerUnknownSize(OnlyLowerUnknownSize) {
-}
+FortifiedLibCallSimplifier::FortifiedLibCallSimplifier(
+ const TargetLibraryInfo *TLI, bool OnlyLowerUnknownSize)
+ : TLI(TLI), OnlyLowerUnknownSize(OnlyLowerUnknownSize) {}
diff --git a/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp b/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
index d36283e..a2a54da 100644
--- a/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
@@ -60,7 +60,8 @@
#define DEBUG_TYPE "symbol-rewriter"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Pass.h"
-#include "llvm/PassManager.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/IR/LegacyPassManager.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MemoryBuffer.h"
@@ -72,15 +73,15 @@
#include "llvm/Transforms/Utils/SymbolRewriter.h"
using namespace llvm;
+using namespace SymbolRewriter;
static cl::list<std::string> RewriteMapFiles("rewrite-map-file",
cl::desc("Symbol Rewrite Map"),
cl::value_desc("filename"));
-namespace llvm {
-namespace SymbolRewriter {
-void rewriteComdat(Module &M, GlobalObject *GO, const std::string &Source,
- const std::string &Target) {
+static void rewriteComdat(Module &M, GlobalObject *GO,
+ const std::string &Source,
+ const std::string &Target) {
if (Comdat *CD = GO->getComdat()) {
auto &Comdats = M.getComdatSymbolTable();
@@ -92,6 +93,7 @@ void rewriteComdat(Module &M, GlobalObject *GO, const std::string &Source,
}
}
+namespace {
template <RewriteDescriptor::Type DT, typename ValueType,
ValueType *(llvm::Module::*Get)(StringRef) const>
class ExplicitRewriteDescriptor : public RewriteDescriptor {
@@ -226,6 +228,7 @@ typedef PatternRewriteDescriptor<RewriteDescriptor::Type::NamedAlias,
&llvm::Module::getNamedAlias,
&llvm::Module::aliases>
PatternRewriteNamedAliasDescriptor;
+} // namespace
bool RewriteMapParser::parse(const std::string &MapFile,
RewriteDescriptorList *DL) {
@@ -489,8 +492,6 @@ parseRewriteGlobalAliasDescriptor(yaml::Stream &YS, yaml::ScalarNode *K,
return true;
}
-}
-}
namespace {
class RewriteSymbols : public ModulePass {
diff --git a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
index 65f2ae2..cac80ac 100644
--- a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/ValueMapper.h"
+#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InlineAsm.h"
@@ -155,13 +156,12 @@ static Metadata *mapToSelf(ValueToValueMapTy &VM, const Metadata *MD) {
}
static Metadata *MapMetadataImpl(const Metadata *MD,
- SmallVectorImpl<UniquableMDNode *> &Cycles,
+ SmallVectorImpl<MDNode *> &Cycles,
ValueToValueMapTy &VM, RemapFlags Flags,
ValueMapTypeRemapper *TypeMapper,
ValueMaterializer *Materializer);
-static Metadata *mapMetadataOp(Metadata *Op,
- SmallVectorImpl<UniquableMDNode *> &Cycles,
+static Metadata *mapMetadataOp(Metadata *Op, SmallVectorImpl<MDNode *> &Cycles,
ValueToValueMapTy &VM, RemapFlags Flags,
ValueMapTypeRemapper *TypeMapper,
ValueMaterializer *Materializer) {
@@ -182,157 +182,85 @@ static Metadata *mapMetadataOp(Metadata *Op,
return nullptr;
}
-static Metadata *cloneMDTuple(const MDTuple *Node,
- SmallVectorImpl<UniquableMDNode *> &Cycles,
- ValueToValueMapTy &VM, RemapFlags Flags,
- ValueMapTypeRemapper *TypeMapper,
- ValueMaterializer *Materializer,
- bool IsDistinct) {
- // Distinct MDTuples have their own code path.
- assert(!IsDistinct && "Unexpected distinct tuple");
- (void)IsDistinct;
-
- SmallVector<Metadata *, 4> Elts;
- Elts.reserve(Node->getNumOperands());
- for (unsigned I = 0, E = Node->getNumOperands(); I != E; ++I)
- Elts.push_back(mapMetadataOp(Node->getOperand(I), Cycles, VM, Flags,
- TypeMapper, Materializer));
-
- return MDTuple::get(Node->getContext(), Elts);
-}
-
-static Metadata *cloneMDLocation(const MDLocation *Node,
- SmallVectorImpl<UniquableMDNode *> &Cycles,
- ValueToValueMapTy &VM, RemapFlags Flags,
- ValueMapTypeRemapper *TypeMapper,
- ValueMaterializer *Materializer,
- bool IsDistinct) {
- return (IsDistinct ? MDLocation::getDistinct : MDLocation::get)(
- Node->getContext(), Node->getLine(), Node->getColumn(),
- mapMetadataOp(Node->getScope(), Cycles, VM, Flags, TypeMapper,
- Materializer),
- mapMetadataOp(Node->getInlinedAt(), Cycles, VM, Flags, TypeMapper,
- Materializer));
-}
-
-static Metadata *cloneMDNode(const UniquableMDNode *Node,
- SmallVectorImpl<UniquableMDNode *> &Cycles,
- ValueToValueMapTy &VM, RemapFlags Flags,
- ValueMapTypeRemapper *TypeMapper,
- ValueMaterializer *Materializer, bool IsDistinct) {
- switch (Node->getMetadataID()) {
- default:
- llvm_unreachable("Invalid UniquableMDNode subclass");
-#define HANDLE_UNIQUABLE_LEAF(CLASS) \
- case Metadata::CLASS##Kind: \
- return clone##CLASS(cast<CLASS>(Node), Cycles, VM, Flags, TypeMapper, \
- Materializer, IsDistinct);
-#include "llvm/IR/Metadata.def"
+/// \brief Remap nodes.
+///
+/// Insert \c NewNode in the value map, and then remap \c OldNode's operands.
+/// Assumes that \c NewNode is already a clone of \c OldNode.
+///
+/// \pre \c NewNode is a clone of \c OldNode.
+static bool remap(const MDNode *OldNode, MDNode *NewNode,
+ SmallVectorImpl<MDNode *> &Cycles, ValueToValueMapTy &VM,
+ RemapFlags Flags, ValueMapTypeRemapper *TypeMapper,
+ ValueMaterializer *Materializer) {
+ assert(OldNode->getNumOperands() == NewNode->getNumOperands() &&
+ "Expected nodes to match");
+ assert(OldNode->isResolved() && "Expected resolved node");
+ assert(!NewNode->isUniqued() && "Expected non-uniqued node");
+
+ // Map the node upfront so it's available for cyclic references.
+ mapToMetadata(VM, OldNode, NewNode);
+ bool AnyChanged = false;
+ for (unsigned I = 0, E = OldNode->getNumOperands(); I != E; ++I) {
+ Metadata *Old = OldNode->getOperand(I);
+ assert(NewNode->getOperand(I) == Old &&
+ "Expected old operands to already be in place");
+
+ Metadata *New = mapMetadataOp(OldNode->getOperand(I), Cycles, VM, Flags,
+ TypeMapper, Materializer);
+ if (Old != New) {
+ AnyChanged = true;
+ NewNode->replaceOperandWith(I, New);
+ }
}
-}
-static void
-trackCyclesUnderDistinct(const UniquableMDNode *Node,
- SmallVectorImpl<UniquableMDNode *> &Cycles) {
- // Track any cycles beneath this node.
- for (Metadata *Op : Node->operands())
- if (auto *N = dyn_cast_or_null<UniquableMDNode>(Op))
- if (!N->isResolved())
- Cycles.push_back(N);
+ return AnyChanged;
}
/// \brief Map a distinct MDNode.
///
/// Distinct nodes are not uniqued, so they must always recreated.
-static Metadata *mapDistinctNode(const UniquableMDNode *Node,
- SmallVectorImpl<UniquableMDNode *> &Cycles,
+static Metadata *mapDistinctNode(const MDNode *Node,
+ SmallVectorImpl<MDNode *> &Cycles,
ValueToValueMapTy &VM, RemapFlags Flags,
ValueMapTypeRemapper *TypeMapper,
ValueMaterializer *Materializer) {
assert(Node->isDistinct() && "Expected distinct node");
- // Optimization for MDTuples.
- if (isa<MDTuple>(Node)) {
- // Create the node first so it's available for cyclical references.
- SmallVector<Metadata *, 4> EmptyOps(Node->getNumOperands());
- MDTuple *NewMD = MDTuple::getDistinct(Node->getContext(), EmptyOps);
- mapToMetadata(VM, Node, NewMD);
-
- // Fix the operands.
- for (unsigned I = 0, E = Node->getNumOperands(); I != E; ++I)
- NewMD->replaceOperandWith(I,
- mapMetadataOp(Node->getOperand(I), Cycles, VM,
- Flags, TypeMapper, Materializer));
-
- trackCyclesUnderDistinct(NewMD, Cycles);
- return NewMD;
- }
+ MDNode *NewMD = MDNode::replaceWithDistinct(Node->clone());
+ remap(Node, NewMD, Cycles, VM, Flags, TypeMapper, Materializer);
- // In general we need a dummy node, since whether the operands are null can
- // affect the size of the node.
- std::unique_ptr<MDNodeFwdDecl> Dummy(
- MDNode::getTemporary(Node->getContext(), None));
- mapToMetadata(VM, Node, Dummy.get());
- auto *NewMD = cast<UniquableMDNode>(cloneMDNode(Node, Cycles, VM, Flags,
- TypeMapper, Materializer,
- /* IsDistinct */ true));
- Dummy->replaceAllUsesWith(NewMD);
- trackCyclesUnderDistinct(NewMD, Cycles);
- return mapToMetadata(VM, Node, NewMD);
-}
+ // Track any cycles beneath this node.
+ for (Metadata *Op : NewMD->operands())
+ if (auto *Node = dyn_cast_or_null<MDNode>(Op))
+ if (!Node->isResolved())
+ Cycles.push_back(Node);
-/// \brief Check whether a uniqued node needs to be remapped.
-///
-/// Check whether a uniqued node needs to be remapped (due to any operands
-/// changing).
-static bool shouldRemapUniquedNode(const UniquableMDNode *Node,
- SmallVectorImpl<UniquableMDNode *> &Cycles,
- ValueToValueMapTy &VM, RemapFlags Flags,
- ValueMapTypeRemapper *TypeMapper,
- ValueMaterializer *Materializer) {
- // Check all operands to see if any need to be remapped.
- for (unsigned I = 0, E = Node->getNumOperands(); I != E; ++I) {
- Metadata *Op = Node->getOperand(I);
- if (Op != mapMetadataOp(Op, Cycles, VM, Flags, TypeMapper, Materializer))
- return true;
- }
- return false;
+ return NewMD;
}
/// \brief Map a uniqued MDNode.
///
/// Uniqued nodes may not need to be recreated (they may map to themselves).
-static Metadata *mapUniquedNode(const UniquableMDNode *Node,
- SmallVectorImpl<UniquableMDNode *> &Cycles,
+static Metadata *mapUniquedNode(const MDNode *Node,
+ SmallVectorImpl<MDNode *> &Cycles,
ValueToValueMapTy &VM, RemapFlags Flags,
ValueMapTypeRemapper *TypeMapper,
ValueMaterializer *Materializer) {
- assert(!Node->isDistinct() && "Expected uniqued node");
-
- // Create a dummy node in case we have a metadata cycle.
- MDNodeFwdDecl *Dummy = MDNode::getTemporary(Node->getContext(), None);
- mapToMetadata(VM, Node, Dummy);
-
- // Check all operands to see if any need to be remapped.
- if (!shouldRemapUniquedNode(Node, Cycles, VM, Flags, TypeMapper,
- Materializer)) {
- // Use an identity mapping.
- mapToSelf(VM, Node);
- MDNode::deleteTemporary(Dummy);
- return const_cast<Metadata *>(static_cast<const Metadata *>(Node));
- }
+ assert(Node->isUniqued() && "Expected uniqued node");
- // At least one operand needs remapping.
- Metadata *NewMD =
- cloneMDNode(Node, Cycles, VM, Flags, TypeMapper, Materializer,
- /* IsDistinct */ false);
- Dummy->replaceAllUsesWith(NewMD);
- MDNode::deleteTemporary(Dummy);
- return mapToMetadata(VM, Node, NewMD);
+ // Create a temporary node upfront in case we have a metadata cycle.
+ auto ClonedMD = Node->clone();
+ if (!remap(Node, ClonedMD.get(), Cycles, VM, Flags, TypeMapper, Materializer))
+ // No operands changed, so use the identity mapping.
+ return mapToSelf(VM, Node);
+
+ // At least one operand has changed, so uniquify the cloned node.
+ return mapToMetadata(VM, Node,
+ MDNode::replaceWithUniqued(std::move(ClonedMD)));
}
static Metadata *MapMetadataImpl(const Metadata *MD,
- SmallVectorImpl<UniquableMDNode *> &Cycles,
+ SmallVectorImpl<MDNode *> &Cycles,
ValueToValueMapTy &VM, RemapFlags Flags,
ValueMapTypeRemapper *TypeMapper,
ValueMaterializer *Materializer) {
@@ -364,14 +292,18 @@ static Metadata *MapMetadataImpl(const Metadata *MD,
return nullptr;
}
- const UniquableMDNode *Node = cast<UniquableMDNode>(MD);
- assert(Node->isResolved() && "Unexpected unresolved node");
+ // Note: this cast precedes the Flags check so we always get its associated
+ // assertion.
+ const MDNode *Node = cast<MDNode>(MD);
// If this is a module-level metadata and we know that nothing at the
// module level is changing, then use an identity mapping.
if (Flags & RF_NoModuleLevelChanges)
return mapToSelf(VM, MD);
+ // Require resolved nodes whenever metadata might be remapped.
+ assert(Node->isResolved() && "Unexpected unresolved node");
+
if (Node->isDistinct())
return mapDistinctNode(Node, Cycles, VM, Flags, TypeMapper, Materializer);
@@ -381,17 +313,19 @@ static Metadata *MapMetadataImpl(const Metadata *MD,
Metadata *llvm::MapMetadata(const Metadata *MD, ValueToValueMapTy &VM,
RemapFlags Flags, ValueMapTypeRemapper *TypeMapper,
ValueMaterializer *Materializer) {
- SmallVector<UniquableMDNode *, 8> Cycles;
+ SmallVector<MDNode *, 8> Cycles;
Metadata *NewMD =
MapMetadataImpl(MD, Cycles, VM, Flags, TypeMapper, Materializer);
// Resolve cycles underneath MD.
if (NewMD && NewMD != MD) {
- if (auto *N = dyn_cast<UniquableMDNode>(NewMD))
- N->resolveCycles();
+ if (auto *N = dyn_cast<MDNode>(NewMD))
+ if (!N->isResolved())
+ N->resolveCycles();
- for (UniquableMDNode *N : Cycles)
- N->resolveCycles();
+ for (MDNode *N : Cycles)
+ if (!N->isResolved())
+ N->resolveCycles();
} else {
// Shouldn't get unresolved cycles if nothing was remapped.
assert(Cycles.empty() && "Expected no unresolved cycles");
@@ -450,7 +384,24 @@ void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap,
I->setMetadata(MI->first, New);
}
+ if (!TypeMapper)
+ return;
+
// If the instruction's type is being remapped, do so now.
- if (TypeMapper)
- I->mutateType(TypeMapper->remapType(I->getType()));
+ if (auto CS = CallSite(I)) {
+ SmallVector<Type *, 3> Tys;
+ FunctionType *FTy = CS.getFunctionType();
+ Tys.reserve(FTy->getNumParams());
+ for (Type *Ty : FTy->params())
+ Tys.push_back(TypeMapper->remapType(Ty));
+ CS.mutateFunctionType(FunctionType::get(
+ TypeMapper->remapType(I->getType()), Tys, FTy->isVarArg()));
+ return;
+ }
+ if (auto *AI = dyn_cast<AllocaInst>(I))
+ AI->setAllocatedType(TypeMapper->remapType(AI->getAllocatedType()));
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(I))
+ GEP->setSourceElementType(
+ TypeMapper->remapType(GEP->getSourceElementType()));
+ I->mutateType(TypeMapper->remapType(I->getType()));
}
diff --git a/contrib/llvm/lib/Transforms/Vectorize/BBVectorize.cpp b/contrib/llvm/lib/Transforms/Vectorize/BBVectorize.cpp
index a0ccf9d..215d6f9 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/BBVectorize.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/BBVectorize.cpp
@@ -39,6 +39,7 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Pass.h"
@@ -201,14 +202,14 @@ namespace {
initializeBBVectorizePass(*PassRegistry::getPassRegistry());
}
- BBVectorize(Pass *P, const VectorizeConfig &C)
+ BBVectorize(Pass *P, Function &F, const VectorizeConfig &C)
: BasicBlockPass(ID), Config(C) {
AA = &P->getAnalysis<AliasAnalysis>();
DT = &P->getAnalysis<DominatorTreeWrapperPass>().getDomTree();
SE = &P->getAnalysis<ScalarEvolution>();
- DataLayoutPass *DLP = P->getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
- TTI = IgnoreTargetInfo ? nullptr : &P->getAnalysis<TargetTransformInfo>();
+ TTI = IgnoreTargetInfo
+ ? nullptr
+ : &P->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
}
typedef std::pair<Value *, Value *> ValuePair;
@@ -220,7 +221,6 @@ namespace {
AliasAnalysis *AA;
DominatorTree *DT;
ScalarEvolution *SE;
- const DataLayout *DL;
const TargetTransformInfo *TTI;
// FIXME: const correct?
@@ -440,9 +440,10 @@ namespace {
AA = &getAnalysis<AliasAnalysis>();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
SE = &getAnalysis<ScalarEvolution>();
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
- TTI = IgnoreTargetInfo ? nullptr : &getAnalysis<TargetTransformInfo>();
+ TTI = IgnoreTargetInfo
+ ? nullptr
+ : &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
+ *BB.getParent());
return vectorizeBB(BB);
}
@@ -452,7 +453,7 @@ namespace {
AU.addRequired<AliasAnalysis>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<ScalarEvolution>();
- AU.addRequired<TargetTransformInfo>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addPreserved<AliasAnalysis>();
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<ScalarEvolution>();
@@ -637,19 +638,19 @@ namespace {
dyn_cast<SCEVConstant>(OffsetSCEV)) {
ConstantInt *IntOff = ConstOffSCEV->getValue();
int64_t Offset = IntOff->getSExtValue();
-
+ const DataLayout &DL = I->getModule()->getDataLayout();
Type *VTy = IPtr->getType()->getPointerElementType();
- int64_t VTyTSS = (int64_t) DL->getTypeStoreSize(VTy);
+ int64_t VTyTSS = (int64_t)DL.getTypeStoreSize(VTy);
Type *VTy2 = JPtr->getType()->getPointerElementType();
if (VTy != VTy2 && Offset < 0) {
- int64_t VTy2TSS = (int64_t) DL->getTypeStoreSize(VTy2);
+ int64_t VTy2TSS = (int64_t)DL.getTypeStoreSize(VTy2);
OffsetInElmts = Offset/VTy2TSS;
- return (abs64(Offset) % VTy2TSS) == 0;
+ return (std::abs(Offset) % VTy2TSS) == 0;
}
OffsetInElmts = Offset/VTyTSS;
- return (abs64(Offset) % VTyTSS) == 0;
+ return (std::abs(Offset) % VTyTSS) == 0;
}
return false;
@@ -661,7 +662,7 @@ namespace {
Function *F = I->getCalledFunction();
if (!F) return false;
- Intrinsic::ID IID = (Intrinsic::ID) F->getIntrinsicID();
+ Intrinsic::ID IID = F->getIntrinsicID();
if (!IID) return false;
switch(IID) {
@@ -841,7 +842,7 @@ namespace {
// It is important to cleanup here so that future iterations of this
// function have less work to do.
- (void) SimplifyInstructionsInBlock(&BB, DL, AA->getTargetLibraryInfo());
+ (void)SimplifyInstructionsInBlock(&BB, AA->getTargetLibraryInfo());
return true;
}
@@ -895,10 +896,6 @@ namespace {
return false;
}
- // We can't vectorize memory operations without target data
- if (!DL && IsSimpleLoadStore)
- return false;
-
Type *T1, *T2;
getInstructionTypes(I, T1, T2);
@@ -933,9 +930,8 @@ namespace {
if (T2->isX86_FP80Ty() || T2->isPPC_FP128Ty() || T2->isX86_MMXTy())
return false;
- if ((!Config.VectorizePointers || !DL) &&
- (T1->getScalarType()->isPointerTy() ||
- T2->getScalarType()->isPointerTy()))
+ if (!Config.VectorizePointers && (T1->getScalarType()->isPointerTy() ||
+ T2->getScalarType()->isPointerTy()))
return false;
if (!TTI && (T1->getPrimitiveSizeInBits() >= Config.VectorBits ||
@@ -980,8 +976,8 @@ namespace {
unsigned IAlignment, JAlignment, IAddressSpace, JAddressSpace;
int64_t OffsetInElmts = 0;
if (getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment,
- IAddressSpace, JAddressSpace,
- OffsetInElmts) && abs64(OffsetInElmts) == 1) {
+ IAddressSpace, JAddressSpace, OffsetInElmts) &&
+ std::abs(OffsetInElmts) == 1) {
FixedOrder = (int) OffsetInElmts;
unsigned BottomAlignment = IAlignment;
if (OffsetInElmts < 0) BottomAlignment = JAlignment;
@@ -996,8 +992,8 @@ namespace {
// An aligned load or store is possible only if the instruction
// with the lower offset has an alignment suitable for the
// vector type.
-
- unsigned VecAlignment = DL->getPrefTypeAlignment(VType);
+ const DataLayout &DL = I->getModule()->getDataLayout();
+ unsigned VecAlignment = DL.getPrefTypeAlignment(VType);
if (BottomAlignment < VecAlignment)
return false;
}
@@ -1102,7 +1098,7 @@ namespace {
CallInst *CI = dyn_cast<CallInst>(I);
Function *FI;
if (CI && (FI = CI->getCalledFunction())) {
- Intrinsic::ID IID = (Intrinsic::ID) FI->getIntrinsicID();
+ Intrinsic::ID IID = FI->getIntrinsicID();
if (IID == Intrinsic::powi || IID == Intrinsic::ctlz ||
IID == Intrinsic::cttz) {
Value *A1I = CI->getArgOperand(1),
@@ -1277,7 +1273,7 @@ namespace {
CostSavings, FixedOrder)) continue;
// J is a candidate for merging with I.
- if (!PairableInsts.size() ||
+ if (PairableInsts.empty() ||
PairableInsts[PairableInsts.size()-1] != I) {
PairableInsts.push_back(I);
}
@@ -2774,7 +2770,7 @@ namespace {
continue;
} else if (isa<CallInst>(I)) {
Function *F = cast<CallInst>(I)->getCalledFunction();
- Intrinsic::ID IID = (Intrinsic::ID) F->getIntrinsicID();
+ Intrinsic::ID IID = F->getIntrinsicID();
if (o == NumOperands-1) {
BasicBlock &BB = *I->getParent();
@@ -3107,7 +3103,17 @@ namespace {
else if (H->hasName())
K->takeName(H);
- if (!isa<StoreInst>(K))
+ if (auto CS = CallSite(K)) {
+ SmallVector<Type *, 3> Tys;
+ FunctionType *Old = CS.getFunctionType();
+ unsigned NumOld = Old->getNumParams();
+ assert(NumOld <= ReplacedOperands.size());
+ for (unsigned i = 0; i != NumOld; ++i)
+ Tys.push_back(ReplacedOperands[i]->getType());
+ CS.mutateFunctionType(
+ FunctionType::get(getVecTypeForPair(L->getType(), H->getType()),
+ Tys, Old->isVarArg()));
+ } else if (!isa<StoreInst>(K))
K->mutateType(getVecTypeForPair(L->getType(), H->getType()));
unsigned KnownIDs[] = {
@@ -3192,7 +3198,7 @@ char BBVectorize::ID = 0;
static const char bb_vectorize_name[] = "Basic-Block Vectorization";
INITIALIZE_PASS_BEGIN(BBVectorize, BBV_NAME, bb_vectorize_name, false, false)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
-INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
INITIALIZE_PASS_END(BBVectorize, BBV_NAME, bb_vectorize_name, false, false)
@@ -3203,7 +3209,7 @@ BasicBlockPass *llvm::createBBVectorizePass(const VectorizeConfig &C) {
bool
llvm::vectorizeBasicBlock(Pass *P, BasicBlock &BB, const VectorizeConfig &C) {
- BBVectorize BBVectorizer(P, C);
+ BBVectorize BBVectorizer(P, *BB.getParent(), C);
return BBVectorizer.vectorizeBB(BB);
}
diff --git a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 47b92a3..011fd0f 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -58,6 +58,7 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/LoopPass.h"
@@ -92,6 +93,7 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/VectorUtils.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
#include <algorithm>
#include <map>
#include <tuple>
@@ -105,15 +107,6 @@ using namespace llvm::PatternMatch;
STATISTIC(LoopsVectorized, "Number of loops vectorized");
STATISTIC(LoopsAnalyzed, "Number of loops analyzed for vectorization");
-static cl::opt<unsigned>
-VectorizationFactor("force-vector-width", cl::init(0), cl::Hidden,
- cl::desc("Sets the SIMD width. Zero is autoselect."));
-
-static cl::opt<unsigned>
-VectorizationInterleave("force-vector-interleave", cl::init(0), cl::Hidden,
- cl::desc("Sets the vectorization interleave count. "
- "Zero is autoselect."));
-
static cl::opt<bool>
EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
cl::desc("Enable if-conversion during vectorization."));
@@ -144,13 +137,6 @@ static cl::opt<bool> EnableMemAccessVersioning(
/// We don't unroll loops with a known constant trip count below this number.
static const unsigned TinyTripCountUnrollThreshold = 128;
-/// When performing memory disambiguation checks at runtime do not make more
-/// than this number of comparisons.
-static const unsigned RuntimeMemoryCheckThreshold = 8;
-
-/// Maximum simd width.
-static const unsigned MaxVectorWidth = 64;
-
static cl::opt<unsigned> ForceTargetNumScalarRegs(
"force-target-num-scalar-regs", cl::init(0), cl::Hidden,
cl::desc("A flag that overrides the target's number of scalar registers."));
@@ -218,29 +204,30 @@ class LoopVectorizationLegality;
class LoopVectorizationCostModel;
class LoopVectorizeHints;
-/// Optimization analysis message produced during vectorization. Messages inform
-/// the user why vectorization did not occur.
-class Report {
- std::string Message;
- raw_string_ostream Out;
- Instruction *Instr;
-
+/// \brief This modifies LoopAccessReport to initialize message with
+/// loop-vectorizer-specific part.
+class VectorizationReport : public LoopAccessReport {
public:
- Report(Instruction *I = nullptr) : Out(Message), Instr(I) {
- Out << "loop not vectorized: ";
- }
-
- template <typename A> Report &operator<<(const A &Value) {
- Out << Value;
- return *this;
- }
-
- Instruction *getInstr() { return Instr; }
-
- std::string &str() { return Out.str(); }
- operator Twine() { return Out.str(); }
+ VectorizationReport(Instruction *I = nullptr)
+ : LoopAccessReport("loop not vectorized: ", I) {}
+
+ /// \brief This allows promotion of the loop-access analysis report into the
+ /// loop-vectorizer report. It modifies the message to add the
+ /// loop-vectorizer-specific part of the message.
+ explicit VectorizationReport(const LoopAccessReport &R)
+ : LoopAccessReport(Twine("loop not vectorized: ") + R.str(),
+ R.getInstr()) {}
};
+/// A helper function for converting Scalar types to vector types.
+/// If the incoming type is void, we return void. If the VF is 1, we return
+/// the scalar type.
+static Type* ToVectorTy(Type *Scalar, unsigned VF) {
+ if (Scalar->isVoidTy() || VF == 1)
+ return Scalar;
+ return VectorType::get(Scalar, VF);
+}
+
/// InnerLoopVectorizer vectorizes loops which contain only one basic
/// block to a specified vectorization factor (VF).
/// This class performs the widening of scalars into vectors, or multiple
@@ -258,13 +245,13 @@ public:
class InnerLoopVectorizer {
public:
InnerLoopVectorizer(Loop *OrigLoop, ScalarEvolution *SE, LoopInfo *LI,
- DominatorTree *DT, const DataLayout *DL,
- const TargetLibraryInfo *TLI, unsigned VecWidth,
+ DominatorTree *DT, const TargetLibraryInfo *TLI,
+ const TargetTransformInfo *TTI, unsigned VecWidth,
unsigned UnrollFactor)
- : OrigLoop(OrigLoop), SE(SE), LI(LI), DT(DT), DL(DL), TLI(TLI),
+ : OrigLoop(OrigLoop), SE(SE), LI(LI), DT(DT), TLI(TLI), TTI(TTI),
VF(VecWidth), UF(UnrollFactor), Builder(SE->getContext()),
Induction(nullptr), OldInduction(nullptr), WidenMap(UnrollFactor),
- Legal(nullptr) {}
+ Legal(nullptr), AddedSafetyChecks(false) {}
// Perform the actual loop widening (vectorization).
void vectorize(LoopVectorizationLegality *L) {
@@ -278,6 +265,11 @@ public:
updateAnalysis();
}
+ // Return true if any runtime check is added.
+ bool IsSafetyChecksAdded() {
+ return AddedSafetyChecks;
+ }
+
virtual ~InnerLoopVectorizer() {}
protected:
@@ -288,19 +280,12 @@ protected:
/// originated from one scalar instruction.
typedef SmallVector<Value*, 2> VectorParts;
- // When we if-convert we need create edge masks. We have to cache values so
- // that we don't end up with exponential recursion/IR.
+ // When we if-convert we need to create edge masks. We have to cache values
+ // so that we don't end up with exponential recursion/IR.
typedef DenseMap<std::pair<BasicBlock*, BasicBlock*>,
VectorParts> EdgeMaskCache;
- /// \brief Add code that checks at runtime if the accessed arrays overlap.
- ///
- /// Returns a pair of instructions where the first element is the first
- /// instruction generated in possibly a sequence of instructions and the
- /// second value is the final comparator value or NULL if no check is needed.
- std::pair<Instruction *, Instruction *> addRuntimeCheck(Instruction *Loc);
-
- /// \brief Add checks for strides that where assumed to be 1.
+ /// \brief Add checks for strides that were assumed to be 1.
///
/// Returns the last check instruction and the first check instruction in the
/// pair as (first, last).
@@ -355,10 +340,9 @@ protected:
/// element.
virtual Value *getBroadcastInstrs(Value *V);
- /// This function adds 0, 1, 2 ... to each vector element, starting at zero.
- /// If Negate is set then negative numbers are added e.g. (0, -1, -2, ...).
- /// The sequence starts at StartIndex.
- virtual Value *getConsecutiveVector(Value* Val, int StartIdx, bool Negate);
+ /// This function adds (StartIdx, StartIdx + Step, StartIdx + 2*Step, ...)
+ /// to each vector element of Val. The sequence starts at StartIndex.
+ virtual Value *getStepVector(Value *Val, int StartIdx, Value *Step);
/// When we go over instructions in the basic block we rely on previous
/// values within the current basic block or on loop invariant values.
@@ -420,10 +404,10 @@ protected:
DominatorTree *DT;
/// Alias Analysis.
AliasAnalysis *AA;
- /// Data Layout.
- const DataLayout *DL;
/// Target Library Info.
const TargetLibraryInfo *TLI;
+ /// Target Transform Info.
+ const TargetTransformInfo *TTI;
/// The vectorization SIMD factor to use. Each vector will have this many
/// vector elements.
@@ -465,21 +449,24 @@ protected:
EdgeMaskCache MaskCache;
LoopVectorizationLegality *Legal;
+
+ // Record whether runtime check is added.
+ bool AddedSafetyChecks;
};
class InnerLoopUnroller : public InnerLoopVectorizer {
public:
InnerLoopUnroller(Loop *OrigLoop, ScalarEvolution *SE, LoopInfo *LI,
- DominatorTree *DT, const DataLayout *DL,
- const TargetLibraryInfo *TLI, unsigned UnrollFactor) :
- InnerLoopVectorizer(OrigLoop, SE, LI, DT, DL, TLI, 1, UnrollFactor) { }
+ DominatorTree *DT, const TargetLibraryInfo *TLI,
+ const TargetTransformInfo *TTI, unsigned UnrollFactor)
+ : InnerLoopVectorizer(OrigLoop, SE, LI, DT, TLI, TTI, 1, UnrollFactor) {}
private:
void scalarizeInstruction(Instruction *Instr,
bool IfPredicateStore = false) override;
void vectorizeMemoryInstruction(Instruction *Instr) override;
Value *getBroadcastInstrs(Value *V) override;
- Value *getConsecutiveVector(Value* Val, int StartIdx, bool Negate) override;
+ Value *getStepVector(Value *Val, int StartIdx, Value *Step) override;
Value *reverseVector(Value *Vec) override;
};
@@ -517,9 +504,8 @@ static std::string getDebugLocString(const Loop *L) {
std::string Result;
if (L) {
raw_string_ostream OS(Result);
- const DebugLoc LoopDbgLoc = L->getStartLoc();
- if (!LoopDbgLoc.isUnknown())
- LoopDbgLoc.print(L->getHeader()->getContext(), OS);
+ if (const DebugLoc LoopDbgLoc = L->getStartLoc())
+ LoopDbgLoc.print(OS);
else
// Just print the module name.
OS << L->getHeader()->getParent()->getParent()->getModuleIdentifier();
@@ -574,135 +560,84 @@ static void propagateMetadata(SmallVectorImpl<Value *> &To, const Instruction *F
/// induction variable and the different reduction variables.
class LoopVectorizationLegality {
public:
- unsigned NumLoads;
- unsigned NumStores;
- unsigned NumPredStores;
-
- LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, const DataLayout *DL,
- DominatorTree *DT, TargetLibraryInfo *TLI,
- AliasAnalysis *AA, Function *F,
- const TargetTransformInfo *TTI)
- : NumLoads(0), NumStores(0), NumPredStores(0), TheLoop(L), SE(SE), DL(DL),
- DT(DT), TLI(TLI), AA(AA), TheFunction(F), TTI(TTI), Induction(nullptr),
- WidestIndTy(nullptr), HasFunNoNaNAttr(false), MaxSafeDepDistBytes(-1U) {
- }
-
- /// This enum represents the kinds of reductions that we support.
- enum ReductionKind {
- RK_NoReduction, ///< Not a reduction.
- RK_IntegerAdd, ///< Sum of integers.
- RK_IntegerMult, ///< Product of integers.
- RK_IntegerOr, ///< Bitwise or logical OR of numbers.
- RK_IntegerAnd, ///< Bitwise or logical AND of numbers.
- RK_IntegerXor, ///< Bitwise or logical XOR of numbers.
- RK_IntegerMinMax, ///< Min/max implemented in terms of select(cmp()).
- RK_FloatAdd, ///< Sum of floats.
- RK_FloatMult, ///< Product of floats.
- RK_FloatMinMax ///< Min/max implemented in terms of select(cmp()).
- };
+ LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, DominatorTree *DT,
+ TargetLibraryInfo *TLI, AliasAnalysis *AA,
+ Function *F, const TargetTransformInfo *TTI,
+ LoopAccessAnalysis *LAA)
+ : NumPredStores(0), TheLoop(L), SE(SE), TLI(TLI), TheFunction(F),
+ TTI(TTI), DT(DT), LAA(LAA), LAI(nullptr), Induction(nullptr),
+ WidestIndTy(nullptr), HasFunNoNaNAttr(false) {}
/// This enum represents the kinds of inductions that we support.
enum InductionKind {
- IK_NoInduction, ///< Not an induction variable.
- IK_IntInduction, ///< Integer induction variable. Step = 1.
- IK_ReverseIntInduction, ///< Reverse int induction variable. Step = -1.
- IK_PtrInduction, ///< Pointer induction var. Step = sizeof(elem).
- IK_ReversePtrInduction ///< Reverse ptr indvar. Step = - sizeof(elem).
- };
-
- // This enum represents the kind of minmax reduction.
- enum MinMaxReductionKind {
- MRK_Invalid,
- MRK_UIntMin,
- MRK_UIntMax,
- MRK_SIntMin,
- MRK_SIntMax,
- MRK_FloatMin,
- MRK_FloatMax
- };
-
- /// This struct holds information about reduction variables.
- struct ReductionDescriptor {
- ReductionDescriptor() : StartValue(nullptr), LoopExitInstr(nullptr),
- Kind(RK_NoReduction), MinMaxKind(MRK_Invalid) {}
-
- ReductionDescriptor(Value *Start, Instruction *Exit, ReductionKind K,
- MinMaxReductionKind MK)
- : StartValue(Start), LoopExitInstr(Exit), Kind(K), MinMaxKind(MK) {}
-
- // The starting value of the reduction.
- // It does not have to be zero!
- TrackingVH<Value> StartValue;
- // The instruction who's value is used outside the loop.
- Instruction *LoopExitInstr;
- // The kind of the reduction.
- ReductionKind Kind;
- // If this a min/max reduction the kind of reduction.
- MinMaxReductionKind MinMaxKind;
+ IK_NoInduction, ///< Not an induction variable.
+ IK_IntInduction, ///< Integer induction variable. Step = C.
+ IK_PtrInduction ///< Pointer induction var. Step = C / sizeof(elem).
};
- /// This POD struct holds information about a potential reduction operation.
- struct ReductionInstDesc {
- ReductionInstDesc(bool IsRedux, Instruction *I) :
- IsReduction(IsRedux), PatternLastInst(I), MinMaxKind(MRK_Invalid) {}
-
- ReductionInstDesc(Instruction *I, MinMaxReductionKind K) :
- IsReduction(true), PatternLastInst(I), MinMaxKind(K) {}
-
- // Is this instruction a reduction candidate.
- bool IsReduction;
- // The last instruction in a min/max pattern (select of the select(icmp())
- // pattern), or the current reduction instruction otherwise.
- Instruction *PatternLastInst;
- // If this is a min/max pattern the comparison predicate.
- MinMaxReductionKind MinMaxKind;
- };
-
- /// This struct holds information about the memory runtime legality
- /// check that a group of pointers do not overlap.
- struct RuntimePointerCheck {
- RuntimePointerCheck() : Need(false) {}
-
- /// Reset the state of the pointer runtime information.
- void reset() {
- Need = false;
- Pointers.clear();
- Starts.clear();
- Ends.clear();
- IsWritePtr.clear();
- DependencySetId.clear();
- AliasSetId.clear();
+ /// A struct for saving information about induction variables.
+ struct InductionInfo {
+ InductionInfo(Value *Start, InductionKind K, ConstantInt *Step)
+ : StartValue(Start), IK(K), StepValue(Step) {
+ assert(IK != IK_NoInduction && "Not an induction");
+ assert(StartValue && "StartValue is null");
+ assert(StepValue && !StepValue->isZero() && "StepValue is zero");
+ assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) &&
+ "StartValue is not a pointer for pointer induction");
+ assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) &&
+ "StartValue is not an integer for integer induction");
+ assert(StepValue->getType()->isIntegerTy() &&
+ "StepValue is not an integer");
+ }
+ InductionInfo()
+ : StartValue(nullptr), IK(IK_NoInduction), StepValue(nullptr) {}
+
+ /// Get the consecutive direction. Returns:
+ /// 0 - unknown or non-consecutive.
+ /// 1 - consecutive and increasing.
+ /// -1 - consecutive and decreasing.
+ int getConsecutiveDirection() const {
+ if (StepValue && (StepValue->isOne() || StepValue->isMinusOne()))
+ return StepValue->getSExtValue();
+ return 0;
}
- /// Insert a pointer and calculate the start and end SCEVs.
- void insert(ScalarEvolution *SE, Loop *Lp, Value *Ptr, bool WritePtr,
- unsigned DepSetId, unsigned ASId, ValueToValueMap &Strides);
-
- /// This flag indicates if we need to add the runtime check.
- bool Need;
- /// Holds the pointers that we need to check.
- SmallVector<TrackingVH<Value>, 2> Pointers;
- /// Holds the pointer value at the beginning of the loop.
- SmallVector<const SCEV*, 2> Starts;
- /// Holds the pointer value at the end of the loop.
- SmallVector<const SCEV*, 2> Ends;
- /// Holds the information if this pointer is used for writing to memory.
- SmallVector<bool, 2> IsWritePtr;
- /// Holds the id of the set of pointers that could be dependent because of a
- /// shared underlying object.
- SmallVector<unsigned, 2> DependencySetId;
- /// Holds the id of the disjoint alias set to which this pointer belongs.
- SmallVector<unsigned, 2> AliasSetId;
- };
+ /// Compute the transformed value of Index at offset StartValue using step
+ /// StepValue.
+ /// For integer induction, returns StartValue + Index * StepValue.
+ /// For pointer induction, returns StartValue[Index * StepValue].
+ /// FIXME: The newly created binary instructions should contain nsw/nuw
+ /// flags, which can be found from the original scalar operations.
+ Value *transform(IRBuilder<> &B, Value *Index) const {
+ switch (IK) {
+ case IK_IntInduction:
+ assert(Index->getType() == StartValue->getType() &&
+ "Index type does not match StartValue type");
+ if (StepValue->isMinusOne())
+ return B.CreateSub(StartValue, Index);
+ if (!StepValue->isOne())
+ Index = B.CreateMul(Index, StepValue);
+ return B.CreateAdd(StartValue, Index);
+
+ case IK_PtrInduction:
+ if (StepValue->isMinusOne())
+ Index = B.CreateNeg(Index);
+ else if (!StepValue->isOne())
+ Index = B.CreateMul(Index, StepValue);
+ return B.CreateGEP(nullptr, StartValue, Index);
+
+ case IK_NoInduction:
+ return nullptr;
+ }
+ llvm_unreachable("invalid enum");
+ }
- /// A struct for saving information about induction variables.
- struct InductionInfo {
- InductionInfo(Value *Start, InductionKind K) : StartValue(Start), IK(K) {}
- InductionInfo() : StartValue(nullptr), IK(IK_NoInduction) {}
/// Start value.
TrackingVH<Value> StartValue;
/// Induction kind.
InductionKind IK;
+ /// Step value.
+ ConstantInt *StepValue;
};
/// ReductionList contains the reduction descriptors for all
@@ -754,13 +689,15 @@ public:
bool isUniformAfterVectorization(Instruction* I) { return Uniforms.count(I); }
/// Returns the information that we collected about runtime memory check.
- RuntimePointerCheck *getRuntimePointerCheck() { return &PtrRtCheck; }
+ const LoopAccessInfo::RuntimePointerCheck *getRuntimePointerCheck() const {
+ return LAI->getRuntimePointerCheck();
+ }
- /// This function returns the identity element (or neutral element) for
- /// the operation K.
- static Constant *getReductionIdentity(ReductionKind K, Type *Tp);
+ const LoopAccessInfo *getLAI() const {
+ return LAI;
+ }
- unsigned getMaxSafeDepDistBytes() { return MaxSafeDepDistBytes; }
+ unsigned getMaxSafeDepDistBytes() { return LAI->getMaxSafeDepDistBytes(); }
bool hasStride(Value *V) { return StrideSet.count(V); }
bool mustCheckStrides() { return !StrideSet.empty(); }
@@ -784,6 +721,15 @@ public:
bool isMaskRequired(const Instruction* I) {
return (MaskedOp.count(I) != 0);
}
+ unsigned getNumStores() const {
+ return LAI->getNumStores();
+ }
+ unsigned getNumLoads() const {
+ return LAI->getNumLoads();
+ }
+ unsigned getNumPredStores() const {
+ return NumPredStores;
+ }
private:
/// Check if a single basic block loop is vectorizable.
/// At this point we know that this is a loop with a constant trip count
@@ -808,23 +754,9 @@ private:
/// and we know that we can read from them without segfault.
bool blockCanBePredicated(BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs);
- /// Returns True, if 'Phi' is the kind of reduction variable for type
- /// 'Kind'. If this is a reduction variable, it adds it to ReductionList.
- bool AddReductionVar(PHINode *Phi, ReductionKind Kind);
- /// Returns a struct describing if the instruction 'I' can be a reduction
- /// variable of type 'Kind'. If the reduction is a min/max pattern of
- /// select(icmp()) this function advances the instruction pointer 'I' from the
- /// compare instruction to the select instruction and stores this pointer in
- /// 'PatternLastInst' member of the returned struct.
- ReductionInstDesc isReductionInstr(Instruction *I, ReductionKind Kind,
- ReductionInstDesc &Desc);
- /// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction
- /// pattern corresponding to a min(X, Y) or max(X, Y).
- static ReductionInstDesc isMinMaxSelectCmpPattern(Instruction *I,
- ReductionInstDesc &Prev);
- /// Returns the induction kind of Phi. This function may return NoInduction
- /// if the PHI is not an induction variable.
- InductionKind isInductionVariable(PHINode *Phi);
+ /// Returns the induction kind of Phi and record the step. This function may
+ /// return NoInduction if the PHI is not an induction variable.
+ InductionKind isInductionVariable(PHINode *Phi, ConstantInt *&StepValue);
/// \brief Collect memory access with loop invariant strides.
///
@@ -833,31 +765,32 @@ private:
void collectStridedAccess(Value *LoadOrStoreInst);
/// Report an analysis message to assist the user in diagnosing loops that are
- /// not vectorized.
- void emitAnalysis(Report &Message) {
- DebugLoc DL = TheLoop->getStartLoc();
- if (Instruction *I = Message.getInstr())
- DL = I->getDebugLoc();
- emitOptimizationRemarkAnalysis(TheFunction->getContext(), DEBUG_TYPE,
- *TheFunction, DL, Message.str());
+ /// not vectorized. These are handled as LoopAccessReport rather than
+ /// VectorizationReport because the << operator of VectorizationReport returns
+ /// LoopAccessReport.
+ void emitAnalysis(const LoopAccessReport &Message) {
+ LoopAccessReport::emitAnalysis(Message, TheFunction, TheLoop, LV_NAME);
}
+ unsigned NumPredStores;
+
/// The loop that we evaluate.
Loop *TheLoop;
/// Scev analysis.
ScalarEvolution *SE;
- /// DataLayout analysis.
- const DataLayout *DL;
- /// Dominators.
- DominatorTree *DT;
/// Target Library Info.
TargetLibraryInfo *TLI;
- /// Alias analysis.
- AliasAnalysis *AA;
/// Parent function
Function *TheFunction;
/// Target Transform Info
const TargetTransformInfo *TTI;
+ /// Dominator Tree.
+ DominatorTree *DT;
+ // LoopAccess analysis.
+ LoopAccessAnalysis *LAA;
+ // And the loop-accesses info corresponding to this loop. This pointer is
+ // null until canVectorizeMemory sets it up.
+ const LoopAccessInfo *LAI;
// --- vectorization state --- //
@@ -879,17 +812,13 @@ private:
/// This set holds the variables which are known to be uniform after
/// vectorization.
SmallPtrSet<Instruction*, 4> Uniforms;
- /// We need to check that all of the pointers in this list are disjoint
- /// at runtime.
- RuntimePointerCheck PtrRtCheck;
+
/// Can we assume the absence of NaNs.
bool HasFunNoNaNAttr;
- unsigned MaxSafeDepDistBytes;
-
ValueToValueMap Strides;
SmallPtrSet<Value *, 8> StrideSet;
-
+
/// While vectorizing these instructions we have to generate a
/// call to the appropriate masked intrinsic
SmallPtrSet<const Instruction*, 8> MaskedOp;
@@ -907,10 +836,9 @@ public:
LoopVectorizationCostModel(Loop *L, ScalarEvolution *SE, LoopInfo *LI,
LoopVectorizationLegality *Legal,
const TargetTransformInfo &TTI,
- const DataLayout *DL, const TargetLibraryInfo *TLI,
- AssumptionCache *AC, const Function *F,
- const LoopVectorizeHints *Hints)
- : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), DL(DL), TLI(TLI),
+ const TargetLibraryInfo *TLI, AssumptionCache *AC,
+ const Function *F, const LoopVectorizeHints *Hints)
+ : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), TLI(TLI),
TheFunction(F), Hints(Hints) {
CodeMetrics::collectEphemeralValues(L, AC, EphValues);
}
@@ -963,23 +891,16 @@ private:
/// width. Vector width of one means scalar.
unsigned getInstructionCost(Instruction *I, unsigned VF);
- /// A helper function for converting Scalar types to vector types.
- /// If the incoming type is void, we return void. If the VF is 1, we return
- /// the scalar type.
- static Type* ToVectorTy(Type *Scalar, unsigned VF);
-
/// Returns whether the instruction is a load or store and will be a emitted
/// as a vector operation.
bool isConsecutiveLoadOrStore(Instruction *I);
/// Report an analysis message to assist the user in diagnosing loops that are
- /// not vectorized.
- void emitAnalysis(Report &Message) {
- DebugLoc DL = TheLoop->getStartLoc();
- if (Instruction *I = Message.getInstr())
- DL = I->getDebugLoc();
- emitOptimizationRemarkAnalysis(TheFunction->getContext(), DEBUG_TYPE,
- *TheFunction, DL, Message.str());
+ /// not vectorized. These are handled as LoopAccessReport rather than
+ /// VectorizationReport because the << operator of VectorizationReport returns
+ /// LoopAccessReport.
+ void emitAnalysis(const LoopAccessReport &Message) {
+ LoopAccessReport::emitAnalysis(Message, TheFunction, TheLoop, LV_NAME);
}
/// Values used only by @llvm.assume calls.
@@ -995,8 +916,6 @@ private:
LoopVectorizationLegality *Legal;
/// Vector target information.
const TargetTransformInfo &TTI;
- /// Target data layout information.
- const DataLayout *DL;
/// Target Library Info.
const TargetLibraryInfo *TLI;
const Function *TheFunction;
@@ -1032,7 +951,7 @@ class LoopVectorizeHints {
bool validate(unsigned Val) {
switch (Kind) {
case HK_WIDTH:
- return isPowerOf2_32(Val) && Val <= MaxVectorWidth;
+ return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
case HK_UNROLL:
return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
case HK_FORCE:
@@ -1060,7 +979,8 @@ public:
};
LoopVectorizeHints(const Loop *L, bool DisableInterleaving)
- : Width("vectorize.width", VectorizationFactor, HK_WIDTH),
+ : Width("vectorize.width", VectorizerParams::VectorizationFactor,
+ HK_WIDTH),
Interleave("interleave.count", DisableInterleaving, HK_UNROLL),
Force("vectorize.enable", FK_Undefined, HK_FORCE),
TheLoop(L) {
@@ -1068,8 +988,8 @@ public:
getHintsFromMetadata();
// force-vector-interleave overrides DisableInterleaving.
- if (VectorizationInterleave.getNumOccurrences() > 0)
- Interleave.Value = VectorizationInterleave;
+ if (VectorizerParams::isInterleaveForced())
+ Interleave.Value = VectorizerParams::VectorizationInterleave;
DEBUG(if (DisableInterleaving && Interleave.Value == 1) dbgs()
<< "LV: Interleaving disabled by the pass manager\n");
@@ -1084,7 +1004,7 @@ public:
/// Dumps all the hint information.
std::string emitRemark() const {
- Report R;
+ VectorizationReport R;
if (Force.Value == LoopVectorizeHints::FK_Disabled)
R << "vectorization is explicitly disabled";
else {
@@ -1260,7 +1180,6 @@ struct LoopVectorize : public FunctionPass {
}
ScalarEvolution *SE;
- const DataLayout *DL;
LoopInfo *LI;
TargetTransformInfo *TTI;
DominatorTree *DT;
@@ -1268,6 +1187,7 @@ struct LoopVectorize : public FunctionPass {
TargetLibraryInfo *TLI;
AliasAnalysis *AA;
AssumptionCache *AC;
+ LoopAccessAnalysis *LAA;
bool DisableUnrolling;
bool AlwaysVectorize;
@@ -1275,15 +1195,15 @@ struct LoopVectorize : public FunctionPass {
bool runOnFunction(Function &F) override {
SE = &getAnalysis<ScalarEvolution>();
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
- LI = &getAnalysis<LoopInfo>();
- TTI = &getAnalysis<TargetTransformInfo>();
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
BFI = &getAnalysis<BlockFrequencyInfo>();
- TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
+ auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
+ TLI = TLIP ? &TLIP->getTLI() : nullptr;
AA = &getAnalysis<AliasAnalysis>();
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ LAA = &getAnalysis<LoopAccessAnalysis>();
// Compute some weights outside of the loop over the loops. Compute this
// using a BranchProbability to re-use its scaling math.
@@ -1295,12 +1215,6 @@ struct LoopVectorize : public FunctionPass {
if (!TTI->getNumberOfRegisters(true))
return false;
- if (!DL) {
- DEBUG(dbgs() << "\nLV: Not vectorizing " << F.getName()
- << ": Missing data layout\n");
- return false;
- }
-
// Build up a worklist of inner-loops to vectorize. This is necessary as
// the act of vectorizing or partially unrolling a loop creates new loops
// and can invalidate iterators across the loops.
@@ -1320,6 +1234,40 @@ struct LoopVectorize : public FunctionPass {
return Changed;
}
+ static void AddRuntimeUnrollDisableMetaData(Loop *L) {
+ SmallVector<Metadata *, 4> MDs;
+ // Reserve first location for self reference to the LoopID metadata node.
+ MDs.push_back(nullptr);
+ bool IsUnrollMetadata = false;
+ MDNode *LoopID = L->getLoopID();
+ if (LoopID) {
+ // First find existing loop unrolling disable metadata.
+ for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
+ MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
+ if (MD) {
+ const MDString *S = dyn_cast<MDString>(MD->getOperand(0));
+ IsUnrollMetadata =
+ S && S->getString().startswith("llvm.loop.unroll.disable");
+ }
+ MDs.push_back(LoopID->getOperand(i));
+ }
+ }
+
+ if (!IsUnrollMetadata) {
+ // Add runtime unroll disable metadata.
+ LLVMContext &Context = L->getHeader()->getContext();
+ SmallVector<Metadata *, 1> DisableOperands;
+ DisableOperands.push_back(
+ MDString::get(Context, "llvm.loop.unroll.runtime.disable"));
+ MDNode *DisableNode = MDNode::get(Context, DisableOperands);
+ MDs.push_back(DisableNode);
+ MDNode *NewLoopID = MDNode::get(Context, MDs);
+ // Set operand 0 to refer to the loop id itself.
+ NewLoopID->replaceOperandWith(0, NewLoopID);
+ L->setLoopID(NewLoopID);
+ }
+ }
+
bool processLoop(Loop *L) {
assert(L->empty() && "Only process inner loops.");
@@ -1394,7 +1342,7 @@ struct LoopVectorize : public FunctionPass {
}
// Check if it is legal to vectorize the loop.
- LoopVectorizationLegality LVL(L, SE, DL, DT, TLI, AA, F, TTI);
+ LoopVectorizationLegality LVL(L, SE, DT, TLI, AA, F, TTI, LAA);
if (!LVL.canVectorize()) {
DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n");
emitMissedWarning(F, L, Hints);
@@ -1402,8 +1350,7 @@ struct LoopVectorize : public FunctionPass {
}
// Use the cost model.
- LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, DL, TLI, AC, F,
- &Hints);
+ LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, TLI, AC, F, &Hints);
// Check the function attributes to find out if this function should be
// optimized for size.
@@ -1467,14 +1414,20 @@ struct LoopVectorize : public FunctionPass {
// We decided not to vectorize, but we may want to unroll.
- InnerLoopUnroller Unroller(L, SE, LI, DT, DL, TLI, UF);
+ InnerLoopUnroller Unroller(L, SE, LI, DT, TLI, TTI, UF);
Unroller.vectorize(&LVL);
} else {
// If we decided that it is *legal* to vectorize the loop then do it.
- InnerLoopVectorizer LB(L, SE, LI, DT, DL, TLI, VF.Width, UF);
+ InnerLoopVectorizer LB(L, SE, LI, DT, TLI, TTI, VF.Width, UF);
LB.vectorize(&LVL);
++LoopsVectorized;
+ // Add metadata to disable runtime unrolling scalar loop when there's no
+ // runtime check about strides and memory. Because at this situation,
+ // scalar loop is rarely used not worthy to be unrolled.
+ if (!LB.IsSafetyChecksAdded())
+ AddRuntimeUnrollDisableMetaData(L);
+
// Report the vectorization decision.
emitOptimizationRemark(
F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(),
@@ -1495,11 +1448,12 @@ struct LoopVectorize : public FunctionPass {
AU.addRequiredID(LCSSAID);
AU.addRequired<BlockFrequencyInfo>();
AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<LoopInfo>();
+ AU.addRequired<LoopInfoWrapperPass>();
AU.addRequired<ScalarEvolution>();
- AU.addRequired<TargetTransformInfo>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addRequired<AliasAnalysis>();
- AU.addPreserved<LoopInfo>();
+ AU.addRequired<LoopAccessAnalysis>();
+ AU.addPreserved<LoopInfoWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<AliasAnalysis>();
}
@@ -1513,65 +1467,6 @@ struct LoopVectorize : public FunctionPass {
// LoopVectorizationCostModel.
//===----------------------------------------------------------------------===//
-static Value *stripIntegerCast(Value *V) {
- if (CastInst *CI = dyn_cast<CastInst>(V))
- if (CI->getOperand(0)->getType()->isIntegerTy())
- return CI->getOperand(0);
- return V;
-}
-
-///\brief Replaces the symbolic stride in a pointer SCEV expression by one.
-///
-/// If \p OrigPtr is not null, use it to look up the stride value instead of
-/// \p Ptr.
-static const SCEV *replaceSymbolicStrideSCEV(ScalarEvolution *SE,
- ValueToValueMap &PtrToStride,
- Value *Ptr, Value *OrigPtr = nullptr) {
-
- const SCEV *OrigSCEV = SE->getSCEV(Ptr);
-
- // If there is an entry in the map return the SCEV of the pointer with the
- // symbolic stride replaced by one.
- ValueToValueMap::iterator SI = PtrToStride.find(OrigPtr ? OrigPtr : Ptr);
- if (SI != PtrToStride.end()) {
- Value *StrideVal = SI->second;
-
- // Strip casts.
- StrideVal = stripIntegerCast(StrideVal);
-
- // Replace symbolic stride by one.
- Value *One = ConstantInt::get(StrideVal->getType(), 1);
- ValueToValueMap RewriteMap;
- RewriteMap[StrideVal] = One;
-
- const SCEV *ByOne =
- SCEVParameterRewriter::rewrite(OrigSCEV, *SE, RewriteMap, true);
- DEBUG(dbgs() << "LV: Replacing SCEV: " << *OrigSCEV << " by: " << *ByOne
- << "\n");
- return ByOne;
- }
-
- // Otherwise, just return the SCEV of the original pointer.
- return SE->getSCEV(Ptr);
-}
-
-void LoopVectorizationLegality::RuntimePointerCheck::insert(
- ScalarEvolution *SE, Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId,
- unsigned ASId, ValueToValueMap &Strides) {
- // Get the stride replaced scev.
- const SCEV *Sc = replaceSymbolicStrideSCEV(SE, Strides, Ptr);
- const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
- assert(AR && "Invalid addrec expression");
- const SCEV *Ex = SE->getBackedgeTakenCount(Lp);
- const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE);
- Pointers.push_back(Ptr);
- Starts.push_back(AR->getStart());
- Ends.push_back(ScEnd);
- IsWritePtr.push_back(WritePtr);
- DependencySetId.push_back(DepSetId);
- AliasSetId.push_back(ASId);
-}
-
Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) {
// We need to place the broadcast of invariant variables outside the loop.
Instruction *Instr = dyn_cast<Instruction>(V);
@@ -1591,11 +1486,13 @@ Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) {
return Shuf;
}
-Value *InnerLoopVectorizer::getConsecutiveVector(Value* Val, int StartIdx,
- bool Negate) {
+Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx,
+ Value *Step) {
assert(Val->getType()->isVectorTy() && "Must be a vector");
assert(Val->getType()->getScalarType()->isIntegerTy() &&
"Elem must be an integer");
+ assert(Step->getType() == Val->getType()->getScalarType() &&
+ "Step has wrong type");
// Create the types.
Type *ITy = Val->getType()->getScalarType();
VectorType *Ty = cast<VectorType>(Val->getType());
@@ -1603,24 +1500,27 @@ Value *InnerLoopVectorizer::getConsecutiveVector(Value* Val, int StartIdx,
SmallVector<Constant*, 8> Indices;
// Create a vector of consecutive numbers from zero to VF.
- for (int i = 0; i < VLen; ++i) {
- int64_t Idx = Negate ? (-i) : i;
- Indices.push_back(ConstantInt::get(ITy, StartIdx + Idx, Negate));
- }
+ for (int i = 0; i < VLen; ++i)
+ Indices.push_back(ConstantInt::get(ITy, StartIdx + i));
// Add the consecutive indices to the vector value.
Constant *Cv = ConstantVector::get(Indices);
assert(Cv->getType() == Val->getType() && "Invalid consecutive vec");
- return Builder.CreateAdd(Val, Cv, "induction");
+ Step = Builder.CreateVectorSplat(VLen, Step);
+ assert(Step->getType() == Val->getType() && "Invalid step vec");
+ // FIXME: The newly created binary instructions should contain nsw/nuw flags,
+ // which can be found from the original scalar operations.
+ Step = Builder.CreateMul(Cv, Step);
+ return Builder.CreateAdd(Val, Step, "induction");
}
/// \brief Find the operand of the GEP that should be checked for consecutive
/// stores. This ignores trailing indices that have no effect on the final
/// pointer.
-static unsigned getGEPInductionOperand(const DataLayout *DL,
- const GetElementPtrInst *Gep) {
+static unsigned getGEPInductionOperand(const GetElementPtrInst *Gep) {
+ const DataLayout &DL = Gep->getModule()->getDataLayout();
unsigned LastOperand = Gep->getNumOperands() - 1;
- unsigned GEPAllocSize = DL->getTypeAllocSize(
+ unsigned GEPAllocSize = DL.getTypeAllocSize(
cast<PointerType>(Gep->getType()->getScalarType())->getElementType());
// Walk backwards and try to peel off zeros.
@@ -1631,7 +1531,7 @@ static unsigned getGEPInductionOperand(const DataLayout *DL,
// If it's a type with the same allocation size as the result of the GEP we
// can peel off the zero index.
- if (DL->getTypeAllocSize(*GEPTI) != GEPAllocSize)
+ if (DL.getTypeAllocSize(*GEPTI) != GEPAllocSize)
break;
--LastOperand;
}
@@ -1649,10 +1549,7 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
PHINode *Phi = dyn_cast_or_null<PHINode>(Ptr);
if (Phi && Inductions.count(Phi)) {
InductionInfo II = Inductions[Phi];
- if (IK_PtrInduction == II.IK)
- return 1;
- else if (IK_ReversePtrInduction == II.IK)
- return -1;
+ return II.getConsecutiveDirection();
}
GetElementPtrInst *Gep = dyn_cast_or_null<GetElementPtrInst>(Ptr);
@@ -1677,13 +1574,10 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
return 0;
InductionInfo II = Inductions[Phi];
- if (IK_PtrInduction == II.IK)
- return 1;
- else if (IK_ReversePtrInduction == II.IK)
- return -1;
+ return II.getConsecutiveDirection();
}
- unsigned InductionOperand = getGEPInductionOperand(DL, Gep);
+ unsigned InductionOperand = getGEPInductionOperand(Gep);
// Check that all of the gep indices are uniform except for our induction
// operand.
@@ -1730,7 +1624,7 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
}
bool LoopVectorizationLegality::isUniform(Value *V) {
- return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop));
+ return LAI->isUniform(V);
}
InnerLoopVectorizer::VectorParts&
@@ -1776,11 +1670,12 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
unsigned Alignment = LI ? LI->getAlignment() : SI->getAlignment();
// An alignment of 0 means target abi alignment. We need to use the scalar's
// target abi alignment in such a case.
+ const DataLayout &DL = Instr->getModule()->getDataLayout();
if (!Alignment)
- Alignment = DL->getABITypeAlignment(ScalarDataTy);
+ Alignment = DL.getABITypeAlignment(ScalarDataTy);
unsigned AddressSpace = Ptr->getType()->getPointerAddressSpace();
- unsigned ScalarAllocatedSize = DL->getTypeAllocSize(ScalarDataTy);
- unsigned VectorElementSize = DL->getTypeStoreSize(DataTy)/VF;
+ unsigned ScalarAllocatedSize = DL.getTypeAllocSize(ScalarDataTy);
+ unsigned VectorElementSize = DL.getTypeStoreSize(DataTy) / VF;
if (SI && Legal->blockNeedsPredication(SI->getParent()) &&
!Legal->isMaskRequired(SI))
@@ -1821,7 +1716,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
// The last index does not have to be the induction. It can be
// consecutive and be a function of the index. For example A[I+1];
unsigned NumOperands = Gep->getNumOperands();
- unsigned InductionOperand = getGEPInductionOperand(DL, Gep);
+ unsigned InductionOperand = getGEPInductionOperand(Gep);
// Create the new GEP with the new induction variable.
GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
@@ -1864,7 +1759,8 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
for (unsigned Part = 0; Part < UF; ++Part) {
// Calculate the pointer for the specific unroll-part.
- Value *PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(Part * VF));
+ Value *PartPtr =
+ Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(Part * VF));
if (Reverse) {
// If we store to reverse consecutive memory locations then we need
@@ -1872,8 +1768,8 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
StoredVal[Part] = reverseVector(StoredVal[Part]);
// If the address is consecutive but reversed, then the
// wide store needs to start at the last vector element.
- PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF));
- PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF));
+ PartPtr = Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(-Part * VF));
+ PartPtr = Builder.CreateGEP(nullptr, PartPtr, Builder.getInt32(1 - VF));
Mask[Part] = reverseVector(Mask[Part]);
}
@@ -1896,13 +1792,14 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
setDebugLocFromInst(Builder, LI);
for (unsigned Part = 0; Part < UF; ++Part) {
// Calculate the pointer for the specific unroll-part.
- Value *PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(Part * VF));
+ Value *PartPtr =
+ Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(Part * VF));
if (Reverse) {
// If the address is consecutive but reversed, then the
// wide load needs to start at the last vector element.
- PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF));
- PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF));
+ PartPtr = Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(-Part * VF));
+ PartPtr = Builder.CreateGEP(nullptr, PartPtr, Builder.getInt32(1 - VF));
Mask[Part] = reverseVector(Mask[Part]);
}
@@ -1992,7 +1889,7 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, bool IfPredic
Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Cmp, ConstantInt::get(Cmp->getType(), 1));
CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store");
LoopVectorBody.push_back(CondBlock);
- VectorLp->addBasicBlockToLoop(CondBlock, LI->getBase());
+ VectorLp->addBasicBlockToLoop(CondBlock, *LI);
// Update Builder with newly created basic block.
Builder.SetInsertPoint(InsertPt);
}
@@ -2021,7 +1918,7 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, bool IfPredic
if (IfPredicateStore) {
BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
LoopVectorBody.push_back(NewIfBlock);
- VectorLp->addBasicBlockToLoop(NewIfBlock, LI->getBase());
+ VectorLp->addBasicBlockToLoop(NewIfBlock, *LI);
Builder.SetInsertPoint(InsertPt);
Instruction *OldBr = IfBlock->getTerminator();
BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
@@ -2078,102 +1975,6 @@ InnerLoopVectorizer::addStrideCheck(Instruction *Loc) {
return std::make_pair(FirstInst, TheCheck);
}
-std::pair<Instruction *, Instruction *>
-InnerLoopVectorizer::addRuntimeCheck(Instruction *Loc) {
- LoopVectorizationLegality::RuntimePointerCheck *PtrRtCheck =
- Legal->getRuntimePointerCheck();
-
- Instruction *tnullptr = nullptr;
- if (!PtrRtCheck->Need)
- return std::pair<Instruction *, Instruction *>(tnullptr, tnullptr);
-
- unsigned NumPointers = PtrRtCheck->Pointers.size();
- SmallVector<TrackingVH<Value> , 2> Starts;
- SmallVector<TrackingVH<Value> , 2> Ends;
-
- LLVMContext &Ctx = Loc->getContext();
- SCEVExpander Exp(*SE, "induction");
- Instruction *FirstInst = nullptr;
-
- for (unsigned i = 0; i < NumPointers; ++i) {
- Value *Ptr = PtrRtCheck->Pointers[i];
- const SCEV *Sc = SE->getSCEV(Ptr);
-
- if (SE->isLoopInvariant(Sc, OrigLoop)) {
- DEBUG(dbgs() << "LV: Adding RT check for a loop invariant ptr:" <<
- *Ptr <<"\n");
- Starts.push_back(Ptr);
- Ends.push_back(Ptr);
- } else {
- DEBUG(dbgs() << "LV: Adding RT check for range:" << *Ptr << '\n');
- unsigned AS = Ptr->getType()->getPointerAddressSpace();
-
- // Use this type for pointer arithmetic.
- Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);
-
- Value *Start = Exp.expandCodeFor(PtrRtCheck->Starts[i], PtrArithTy, Loc);
- Value *End = Exp.expandCodeFor(PtrRtCheck->Ends[i], PtrArithTy, Loc);
- Starts.push_back(Start);
- Ends.push_back(End);
- }
- }
-
- IRBuilder<> ChkBuilder(Loc);
- // Our instructions might fold to a constant.
- Value *MemoryRuntimeCheck = nullptr;
- for (unsigned i = 0; i < NumPointers; ++i) {
- for (unsigned j = i+1; j < NumPointers; ++j) {
- // No need to check if two readonly pointers intersect.
- if (!PtrRtCheck->IsWritePtr[i] && !PtrRtCheck->IsWritePtr[j])
- continue;
-
- // Only need to check pointers between two different dependency sets.
- if (PtrRtCheck->DependencySetId[i] == PtrRtCheck->DependencySetId[j])
- continue;
- // Only need to check pointers in the same alias set.
- if (PtrRtCheck->AliasSetId[i] != PtrRtCheck->AliasSetId[j])
- continue;
-
- unsigned AS0 = Starts[i]->getType()->getPointerAddressSpace();
- unsigned AS1 = Starts[j]->getType()->getPointerAddressSpace();
-
- assert((AS0 == Ends[j]->getType()->getPointerAddressSpace()) &&
- (AS1 == Ends[i]->getType()->getPointerAddressSpace()) &&
- "Trying to bounds check pointers with different address spaces");
-
- Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0);
- Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1);
-
- Value *Start0 = ChkBuilder.CreateBitCast(Starts[i], PtrArithTy0, "bc");
- Value *Start1 = ChkBuilder.CreateBitCast(Starts[j], PtrArithTy1, "bc");
- Value *End0 = ChkBuilder.CreateBitCast(Ends[i], PtrArithTy1, "bc");
- Value *End1 = ChkBuilder.CreateBitCast(Ends[j], PtrArithTy0, "bc");
-
- Value *Cmp0 = ChkBuilder.CreateICmpULE(Start0, End1, "bound0");
- FirstInst = getFirstInst(FirstInst, Cmp0, Loc);
- Value *Cmp1 = ChkBuilder.CreateICmpULE(Start1, End0, "bound1");
- FirstInst = getFirstInst(FirstInst, Cmp1, Loc);
- Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");
- FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
- if (MemoryRuntimeCheck) {
- IsConflict = ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict,
- "conflict.rdx");
- FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
- }
- MemoryRuntimeCheck = IsConflict;
- }
- }
-
- // We have to do this trickery because the IRBuilder might fold the check to a
- // constant expression in which case there is no Instruction anchored in a
- // the block.
- Instruction *Check = BinaryOperator::CreateAnd(MemoryRuntimeCheck,
- ConstantInt::getTrue(Ctx));
- ChkBuilder.Insert(Check, "memcheck.conflict");
- FirstInst = getFirstInst(FirstInst, Check, Loc);
- return std::make_pair(FirstInst, Check);
-}
-
void InnerLoopVectorizer::createEmptyLoop() {
/*
In this function we generate a new loop. The new loop will contain
@@ -2238,9 +2039,11 @@ void InnerLoopVectorizer::createEmptyLoop() {
ExitCount = SE->getAddExpr(BackedgeTakeCount,
SE->getConstant(BackedgeTakeCount->getType(), 1));
+ const DataLayout &DL = OldBasicBlock->getModule()->getDataLayout();
+
// Expand the trip count and place the new instructions in the preheader.
// Notice that the pre-header does not change, only the loop body.
- SCEVExpander Exp(*SE, "induction");
+ SCEVExpander Exp(*SE, DL, "induction");
// We need to test whether the backedge-taken count is uint##_max. Adding one
// to it will cause overflow and an incorrect loop trip count in the vector
@@ -2299,13 +2102,13 @@ void InnerLoopVectorizer::createEmptyLoop() {
// before calling any utilities such as SCEV that require valid LoopInfo.
if (ParentLoop) {
ParentLoop->addChildLoop(Lp);
- ParentLoop->addBasicBlockToLoop(ScalarPH, LI->getBase());
- ParentLoop->addBasicBlockToLoop(VectorPH, LI->getBase());
- ParentLoop->addBasicBlockToLoop(MiddleBlock, LI->getBase());
+ ParentLoop->addBasicBlockToLoop(ScalarPH, *LI);
+ ParentLoop->addBasicBlockToLoop(VectorPH, *LI);
+ ParentLoop->addBasicBlockToLoop(MiddleBlock, *LI);
} else {
LI->addTopLevelLoop(Lp);
}
- Lp->addBasicBlockToLoop(VecBody, LI->getBase());
+ Lp->addBasicBlockToLoop(VecBody, *LI);
// Use this IR builder to create the loop instructions (Phi, Br, Cmp)
// inside the loop.
@@ -2360,7 +2163,7 @@ void InnerLoopVectorizer::createEmptyLoop() {
BasicBlock *CheckBlock =
LastBypassBlock->splitBasicBlock(PastOverflowCheck, "overflow.checked");
if (ParentLoop)
- ParentLoop->addBasicBlockToLoop(CheckBlock, LI->getBase());
+ ParentLoop->addBasicBlockToLoop(CheckBlock, *LI);
LoopBypassBlocks.push_back(CheckBlock);
Instruction *OldTerm = LastBypassBlock->getTerminator();
BranchInst::Create(ScalarPH, CheckBlock, CheckBCOverflow, OldTerm);
@@ -2376,11 +2179,12 @@ void InnerLoopVectorizer::createEmptyLoop() {
std::tie(FirstCheckInst, StrideCheck) =
addStrideCheck(LastBypassBlock->getTerminator());
if (StrideCheck) {
+ AddedSafetyChecks = true;
// Create a new block containing the stride check.
BasicBlock *CheckBlock =
LastBypassBlock->splitBasicBlock(FirstCheckInst, "vector.stridecheck");
if (ParentLoop)
- ParentLoop->addBasicBlockToLoop(CheckBlock, LI->getBase());
+ ParentLoop->addBasicBlockToLoop(CheckBlock, *LI);
LoopBypassBlocks.push_back(CheckBlock);
// Replace the branch into the memory check block with a conditional branch
@@ -2398,13 +2202,14 @@ void InnerLoopVectorizer::createEmptyLoop() {
// faster.
Instruction *MemRuntimeCheck;
std::tie(FirstCheckInst, MemRuntimeCheck) =
- addRuntimeCheck(LastBypassBlock->getTerminator());
+ Legal->getLAI()->addRuntimeCheck(LastBypassBlock->getTerminator());
if (MemRuntimeCheck) {
+ AddedSafetyChecks = true;
// Create a new block containing the memory check.
BasicBlock *CheckBlock =
- LastBypassBlock->splitBasicBlock(MemRuntimeCheck, "vector.memcheck");
+ LastBypassBlock->splitBasicBlock(FirstCheckInst, "vector.memcheck");
if (ParentLoop)
- ParentLoop->addBasicBlockToLoop(CheckBlock, LI->getBase());
+ ParentLoop->addBasicBlockToLoop(CheckBlock, *LI);
LoopBypassBlocks.push_back(CheckBlock);
// Replace the branch into the memory check block with a conditional branch
@@ -2495,33 +2300,13 @@ void InnerLoopVectorizer::createEmptyLoop() {
Value *CRD = BypassBuilder.CreateSExtOrTrunc(CountRoundDown,
II.StartValue->getType(),
"cast.crd");
- EndValue = BypassBuilder.CreateAdd(CRD, II.StartValue , "ind.end");
- break;
- }
- case LoopVectorizationLegality::IK_ReverseIntInduction: {
- // Convert the CountRoundDown variable to the PHI size.
- Value *CRD = BypassBuilder.CreateSExtOrTrunc(CountRoundDown,
- II.StartValue->getType(),
- "cast.crd");
- // Handle reverse integer induction counter.
- EndValue = BypassBuilder.CreateSub(II.StartValue, CRD, "rev.ind.end");
+ EndValue = II.transform(BypassBuilder, CRD);
+ EndValue->setName("ind.end");
break;
}
case LoopVectorizationLegality::IK_PtrInduction: {
- // For pointer induction variables, calculate the offset using
- // the end index.
- EndValue = BypassBuilder.CreateGEP(II.StartValue, CountRoundDown,
- "ptr.ind.end");
- break;
- }
- case LoopVectorizationLegality::IK_ReversePtrInduction: {
- // The value at the end of the loop for the reverse pointer is calculated
- // by creating a GEP with a negative index starting from the start value.
- Value *Zero = ConstantInt::get(CountRoundDown->getType(), 0);
- Value *NegIdx = BypassBuilder.CreateSub(Zero, CountRoundDown,
- "rev.ind.end");
- EndValue = BypassBuilder.CreateGEP(II.StartValue, NegIdx,
- "rev.ptr.ind.end");
+ EndValue = II.transform(BypassBuilder, CountRoundDown);
+ EndValue->setName("ptr.ind.end");
break;
}
}// end of case
@@ -2604,99 +2389,6 @@ void InnerLoopVectorizer::createEmptyLoop() {
Hints.setAlreadyVectorized();
}
-/// This function returns the identity element (or neutral element) for
-/// the operation K.
-Constant*
-LoopVectorizationLegality::getReductionIdentity(ReductionKind K, Type *Tp) {
- switch (K) {
- case RK_IntegerXor:
- case RK_IntegerAdd:
- case RK_IntegerOr:
- // Adding, Xoring, Oring zero to a number does not change it.
- return ConstantInt::get(Tp, 0);
- case RK_IntegerMult:
- // Multiplying a number by 1 does not change it.
- return ConstantInt::get(Tp, 1);
- case RK_IntegerAnd:
- // AND-ing a number with an all-1 value does not change it.
- return ConstantInt::get(Tp, -1, true);
- case RK_FloatMult:
- // Multiplying a number by 1 does not change it.
- return ConstantFP::get(Tp, 1.0L);
- case RK_FloatAdd:
- // Adding zero to a number does not change it.
- return ConstantFP::get(Tp, 0.0L);
- default:
- llvm_unreachable("Unknown reduction kind");
- }
-}
-
-/// This function translates the reduction kind to an LLVM binary operator.
-static unsigned
-getReductionBinOp(LoopVectorizationLegality::ReductionKind Kind) {
- switch (Kind) {
- case LoopVectorizationLegality::RK_IntegerAdd:
- return Instruction::Add;
- case LoopVectorizationLegality::RK_IntegerMult:
- return Instruction::Mul;
- case LoopVectorizationLegality::RK_IntegerOr:
- return Instruction::Or;
- case LoopVectorizationLegality::RK_IntegerAnd:
- return Instruction::And;
- case LoopVectorizationLegality::RK_IntegerXor:
- return Instruction::Xor;
- case LoopVectorizationLegality::RK_FloatMult:
- return Instruction::FMul;
- case LoopVectorizationLegality::RK_FloatAdd:
- return Instruction::FAdd;
- case LoopVectorizationLegality::RK_IntegerMinMax:
- return Instruction::ICmp;
- case LoopVectorizationLegality::RK_FloatMinMax:
- return Instruction::FCmp;
- default:
- llvm_unreachable("Unknown reduction operation");
- }
-}
-
-Value *createMinMaxOp(IRBuilder<> &Builder,
- LoopVectorizationLegality::MinMaxReductionKind RK,
- Value *Left,
- Value *Right) {
- CmpInst::Predicate P = CmpInst::ICMP_NE;
- switch (RK) {
- default:
- llvm_unreachable("Unknown min/max reduction kind");
- case LoopVectorizationLegality::MRK_UIntMin:
- P = CmpInst::ICMP_ULT;
- break;
- case LoopVectorizationLegality::MRK_UIntMax:
- P = CmpInst::ICMP_UGT;
- break;
- case LoopVectorizationLegality::MRK_SIntMin:
- P = CmpInst::ICMP_SLT;
- break;
- case LoopVectorizationLegality::MRK_SIntMax:
- P = CmpInst::ICMP_SGT;
- break;
- case LoopVectorizationLegality::MRK_FloatMin:
- P = CmpInst::FCMP_OLT;
- break;
- case LoopVectorizationLegality::MRK_FloatMax:
- P = CmpInst::FCMP_OGT;
- break;
- }
-
- Value *Cmp;
- if (RK == LoopVectorizationLegality::MRK_FloatMin ||
- RK == LoopVectorizationLegality::MRK_FloatMax)
- Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp");
- else
- Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp");
-
- Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select");
- return Select;
-}
-
namespace {
struct CSEDenseMapInfo {
static bool canHandle(Instruction *I) {
@@ -2772,6 +2464,95 @@ static Value *addFastMathFlag(Value *V) {
return V;
}
+/// Estimate the overhead of scalarizing a value. Insert and Extract are set if
+/// the result needs to be inserted and/or extracted from vectors.
+static unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract,
+ const TargetTransformInfo &TTI) {
+ if (Ty->isVoidTy())
+ return 0;
+
+ assert(Ty->isVectorTy() && "Can only scalarize vectors");
+ unsigned Cost = 0;
+
+ for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
+ if (Insert)
+ Cost += TTI.getVectorInstrCost(Instruction::InsertElement, Ty, i);
+ if (Extract)
+ Cost += TTI.getVectorInstrCost(Instruction::ExtractElement, Ty, i);
+ }
+
+ return Cost;
+}
+
+// Estimate cost of a call instruction CI if it were vectorized with factor VF.
+// Return the cost of the instruction, including scalarization overhead if it's
+// needed. The flag NeedToScalarize shows if the call needs to be scalarized -
+// i.e. either vector version isn't available, or is too expensive.
+static unsigned getVectorCallCost(CallInst *CI, unsigned VF,
+ const TargetTransformInfo &TTI,
+ const TargetLibraryInfo *TLI,
+ bool &NeedToScalarize) {
+ Function *F = CI->getCalledFunction();
+ StringRef FnName = CI->getCalledFunction()->getName();
+ Type *ScalarRetTy = CI->getType();
+ SmallVector<Type *, 4> Tys, ScalarTys;
+ for (auto &ArgOp : CI->arg_operands())
+ ScalarTys.push_back(ArgOp->getType());
+
+ // Estimate cost of scalarized vector call. The source operands are assumed
+ // to be vectors, so we need to extract individual elements from there,
+ // execute VF scalar calls, and then gather the result into the vector return
+ // value.
+ unsigned ScalarCallCost = TTI.getCallInstrCost(F, ScalarRetTy, ScalarTys);
+ if (VF == 1)
+ return ScalarCallCost;
+
+ // Compute corresponding vector type for return value and arguments.
+ Type *RetTy = ToVectorTy(ScalarRetTy, VF);
+ for (unsigned i = 0, ie = ScalarTys.size(); i != ie; ++i)
+ Tys.push_back(ToVectorTy(ScalarTys[i], VF));
+
+ // Compute costs of unpacking argument values for the scalar calls and
+ // packing the return values to a vector.
+ unsigned ScalarizationCost =
+ getScalarizationOverhead(RetTy, true, false, TTI);
+ for (unsigned i = 0, ie = Tys.size(); i != ie; ++i)
+ ScalarizationCost += getScalarizationOverhead(Tys[i], false, true, TTI);
+
+ unsigned Cost = ScalarCallCost * VF + ScalarizationCost;
+
+ // If we can't emit a vector call for this function, then the currently found
+ // cost is the cost we need to return.
+ NeedToScalarize = true;
+ if (!TLI || !TLI->isFunctionVectorizable(FnName, VF) || CI->isNoBuiltin())
+ return Cost;
+
+ // If the corresponding vector cost is cheaper, return its cost.
+ unsigned VectorCallCost = TTI.getCallInstrCost(nullptr, RetTy, Tys);
+ if (VectorCallCost < Cost) {
+ NeedToScalarize = false;
+ return VectorCallCost;
+ }
+ return Cost;
+}
+
+// Estimate cost of an intrinsic call instruction CI if it were vectorized with
+// factor VF. Return the cost of the instruction, including scalarization
+// overhead if it's needed.
+static unsigned getVectorIntrinsicCost(CallInst *CI, unsigned VF,
+ const TargetTransformInfo &TTI,
+ const TargetLibraryInfo *TLI) {
+ Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI);
+ assert(ID && "Expected intrinsic call!");
+
+ Type *RetTy = ToVectorTy(CI->getType(), VF);
+ SmallVector<Type *, 4> Tys;
+ for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i)
+ Tys.push_back(ToVectorTy(CI->getArgOperand(i)->getType(), VF));
+
+ return TTI.getIntrinsicInstrCost(ID, RetTy, Tys);
+}
+
void InnerLoopVectorizer::vectorizeLoop() {
//===------------------------------------------------===//
//
@@ -2819,10 +2600,14 @@ void InnerLoopVectorizer::vectorizeLoop() {
// Find the reduction variable descriptor.
assert(Legal->getReductionVars()->count(RdxPhi) &&
"Unable to find the reduction variable");
- LoopVectorizationLegality::ReductionDescriptor RdxDesc =
- (*Legal->getReductionVars())[RdxPhi];
+ ReductionDescriptor RdxDesc = (*Legal->getReductionVars())[RdxPhi];
- setDebugLocFromInst(Builder, RdxDesc.StartValue);
+ ReductionDescriptor::ReductionKind RK = RdxDesc.getReductionKind();
+ TrackingVH<Value> ReductionStartValue = RdxDesc.getReductionStartValue();
+ Instruction *LoopExitInst = RdxDesc.getLoopExitInstr();
+ ReductionInstDesc::MinMaxReductionKind MinMaxKind =
+ RdxDesc.getMinMaxReductionKind();
+ setDebugLocFromInst(Builder, ReductionStartValue);
// We need to generate a reduction vector from the incoming scalar.
// To do so, we need to generate the 'identity' vector and override
@@ -2831,40 +2616,38 @@ void InnerLoopVectorizer::vectorizeLoop() {
Builder.SetInsertPoint(LoopBypassBlocks[1]->getTerminator());
// This is the vector-clone of the value that leaves the loop.
- VectorParts &VectorExit = getVectorValue(RdxDesc.LoopExitInstr);
+ VectorParts &VectorExit = getVectorValue(LoopExitInst);
Type *VecTy = VectorExit[0]->getType();
// Find the reduction identity variable. Zero for addition, or, xor,
// one for multiplication, -1 for And.
Value *Identity;
Value *VectorStart;
- if (RdxDesc.Kind == LoopVectorizationLegality::RK_IntegerMinMax ||
- RdxDesc.Kind == LoopVectorizationLegality::RK_FloatMinMax) {
+ if (RK == ReductionDescriptor::RK_IntegerMinMax ||
+ RK == ReductionDescriptor::RK_FloatMinMax) {
// MinMax reduction have the start value as their identify.
if (VF == 1) {
- VectorStart = Identity = RdxDesc.StartValue;
+ VectorStart = Identity = ReductionStartValue;
} else {
- VectorStart = Identity = Builder.CreateVectorSplat(VF,
- RdxDesc.StartValue,
- "minmax.ident");
+ VectorStart = Identity =
+ Builder.CreateVectorSplat(VF, ReductionStartValue, "minmax.ident");
}
} else {
// Handle other reduction kinds:
Constant *Iden =
- LoopVectorizationLegality::getReductionIdentity(RdxDesc.Kind,
- VecTy->getScalarType());
+ ReductionDescriptor::getReductionIdentity(RK, VecTy->getScalarType());
if (VF == 1) {
Identity = Iden;
// This vector is the Identity vector where the first element is the
// incoming scalar reduction.
- VectorStart = RdxDesc.StartValue;
+ VectorStart = ReductionStartValue;
} else {
Identity = ConstantVector::getSplat(VF, Iden);
// This vector is the Identity vector where the first element is the
// incoming scalar reduction.
- VectorStart = Builder.CreateInsertElement(Identity,
- RdxDesc.StartValue, Zero);
+ VectorStart =
+ Builder.CreateInsertElement(Identity, ReductionStartValue, Zero);
}
}
@@ -2893,11 +2676,11 @@ void InnerLoopVectorizer::vectorizeLoop() {
Builder.SetInsertPoint(LoopMiddleBlock->getFirstInsertionPt());
VectorParts RdxParts;
- setDebugLocFromInst(Builder, RdxDesc.LoopExitInstr);
+ setDebugLocFromInst(Builder, LoopExitInst);
for (unsigned part = 0; part < UF; ++part) {
// This PHINode contains the vectorized reduction variable, or
// the initial value vector, if we bypass the vector loop.
- VectorParts &RdxExitVal = getVectorValue(RdxDesc.LoopExitInstr);
+ VectorParts &RdxExitVal = getVectorValue(LoopExitInst);
PHINode *NewPhi = Builder.CreatePHI(VecTy, 2, "rdx.vec.exit.phi");
Value *StartVal = (part == 0) ? VectorStart : Identity;
for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I)
@@ -2909,7 +2692,7 @@ void InnerLoopVectorizer::vectorizeLoop() {
// Reduce all of the unrolled parts into a single vector.
Value *ReducedPartRdx = RdxParts[0];
- unsigned Op = getReductionBinOp(RdxDesc.Kind);
+ unsigned Op = ReductionDescriptor::getReductionBinOp(RK);
setDebugLocFromInst(Builder, ReducedPartRdx);
for (unsigned part = 1; part < UF; ++part) {
if (Op != Instruction::ICmp && Op != Instruction::FCmp)
@@ -2918,8 +2701,8 @@ void InnerLoopVectorizer::vectorizeLoop() {
Builder.CreateBinOp((Instruction::BinaryOps)Op, RdxParts[part],
ReducedPartRdx, "bin.rdx"));
else
- ReducedPartRdx = createMinMaxOp(Builder, RdxDesc.MinMaxKind,
- ReducedPartRdx, RdxParts[part]);
+ ReducedPartRdx = ReductionDescriptor::createMinMaxOp(
+ Builder, MinMaxKind, ReducedPartRdx, RdxParts[part]);
}
if (VF > 1) {
@@ -2950,7 +2733,8 @@ void InnerLoopVectorizer::vectorizeLoop() {
TmpVec = addFastMathFlag(Builder.CreateBinOp(
(Instruction::BinaryOps)Op, TmpVec, Shuf, "bin.rdx"));
else
- TmpVec = createMinMaxOp(Builder, RdxDesc.MinMaxKind, TmpVec, Shuf);
+ TmpVec = ReductionDescriptor::createMinMaxOp(Builder, MinMaxKind,
+ TmpVec, Shuf);
}
// The result is in the first element of the vector.
@@ -2962,7 +2746,7 @@ void InnerLoopVectorizer::vectorizeLoop() {
// block and the middle block.
PHINode *BCBlockPhi = PHINode::Create(RdxPhi->getType(), 2, "bc.merge.rdx",
LoopScalarPreHeader->getTerminator());
- BCBlockPhi->addIncoming(RdxDesc.StartValue, LoopBypassBlocks[0]);
+ BCBlockPhi->addIncoming(ReductionStartValue, LoopBypassBlocks[0]);
BCBlockPhi->addIncoming(ReducedPartRdx, LoopMiddleBlock);
// Now, we need to fix the users of the reduction variable
@@ -2980,7 +2764,7 @@ void InnerLoopVectorizer::vectorizeLoop() {
// We found our reduction value exit-PHI. Update it with the
// incoming bypass edge.
- if (LCSSAPhi->getIncomingValue(0) == RdxDesc.LoopExitInstr) {
+ if (LCSSAPhi->getIncomingValue(0) == LoopExitInst) {
// Add an edge coming from the bypass.
LCSSAPhi->addIncoming(ReducedPartRdx, LoopMiddleBlock);
break;
@@ -2995,7 +2779,7 @@ void InnerLoopVectorizer::vectorizeLoop() {
// Pick the other block.
int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1);
(RdxPhi)->setIncomingValue(SelfEdgeBlockIdx, BCBlockPhi);
- (RdxPhi)->setIncomingValue(IncomingEdgeBlockIdx, RdxDesc.LoopExitInstr);
+ (RdxPhi)->setIncomingValue(IncomingEdgeBlockIdx, LoopExitInst);
}// end of for each redux variable.
fixLCSSAPHIs();
@@ -3136,6 +2920,8 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
LoopVectorizationLegality::InductionInfo II =
Legal->getInductionVars()->lookup(P);
+ // FIXME: The newly created binary instructions should contain nsw/nuw flags,
+ // which can be found from the original scalar operations.
switch (II.IK) {
case LoopVectorizationLegality::IK_NoInduction:
llvm_unreachable("Unknown induction");
@@ -3153,80 +2939,42 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
Value *NormalizedIdx = Builder.CreateSub(Induction, ExtendedIdx,
"normalized.idx");
NormalizedIdx = Builder.CreateSExtOrTrunc(NormalizedIdx, PhiTy);
- Broadcasted = Builder.CreateAdd(II.StartValue, NormalizedIdx,
- "offset.idx");
+ Broadcasted = II.transform(Builder, NormalizedIdx);
+ Broadcasted->setName("offset.idx");
}
Broadcasted = getBroadcastInstrs(Broadcasted);
// After broadcasting the induction variable we need to make the vector
// consecutive by adding 0, 1, 2, etc.
for (unsigned part = 0; part < UF; ++part)
- Entry[part] = getConsecutiveVector(Broadcasted, VF * part, false);
+ Entry[part] = getStepVector(Broadcasted, VF * part, II.StepValue);
return;
}
- case LoopVectorizationLegality::IK_ReverseIntInduction:
case LoopVectorizationLegality::IK_PtrInduction:
- case LoopVectorizationLegality::IK_ReversePtrInduction:
- // Handle reverse integer and pointer inductions.
- Value *StartIdx = ExtendedIdx;
- // This is the normalized GEP that starts counting at zero.
- Value *NormalizedIdx = Builder.CreateSub(Induction, StartIdx,
- "normalized.idx");
-
- // Handle the reverse integer induction variable case.
- if (LoopVectorizationLegality::IK_ReverseIntInduction == II.IK) {
- IntegerType *DstTy = cast<IntegerType>(II.StartValue->getType());
- Value *CNI = Builder.CreateSExtOrTrunc(NormalizedIdx, DstTy,
- "resize.norm.idx");
- Value *ReverseInd = Builder.CreateSub(II.StartValue, CNI,
- "reverse.idx");
-
- // This is a new value so do not hoist it out.
- Value *Broadcasted = getBroadcastInstrs(ReverseInd);
- // After broadcasting the induction variable we need to make the
- // vector consecutive by adding ... -3, -2, -1, 0.
- for (unsigned part = 0; part < UF; ++part)
- Entry[part] = getConsecutiveVector(Broadcasted, -(int)VF * part,
- true);
- return;
- }
-
// Handle the pointer induction variable case.
assert(P->getType()->isPointerTy() && "Unexpected type.");
-
- // Is this a reverse induction ptr or a consecutive induction ptr.
- bool Reverse = (LoopVectorizationLegality::IK_ReversePtrInduction ==
- II.IK);
-
+ // This is the normalized GEP that starts counting at zero.
+ Value *NormalizedIdx =
+ Builder.CreateSub(Induction, ExtendedIdx, "normalized.idx");
// This is the vector of results. Notice that we don't generate
// vector geps because scalar geps result in better code.
for (unsigned part = 0; part < UF; ++part) {
if (VF == 1) {
- int EltIndex = (part) * (Reverse ? -1 : 1);
+ int EltIndex = part;
Constant *Idx = ConstantInt::get(Induction->getType(), EltIndex);
- Value *GlobalIdx;
- if (Reverse)
- GlobalIdx = Builder.CreateSub(Idx, NormalizedIdx, "gep.ridx");
- else
- GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx, "gep.idx");
-
- Value *SclrGep = Builder.CreateGEP(II.StartValue, GlobalIdx,
- "next.gep");
+ Value *GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx);
+ Value *SclrGep = II.transform(Builder, GlobalIdx);
+ SclrGep->setName("next.gep");
Entry[part] = SclrGep;
continue;
}
Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF));
for (unsigned int i = 0; i < VF; ++i) {
- int EltIndex = (i + part * VF) * (Reverse ? -1 : 1);
+ int EltIndex = i + part * VF;
Constant *Idx = ConstantInt::get(Induction->getType(), EltIndex);
- Value *GlobalIdx;
- if (!Reverse)
- GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx, "gep.idx");
- else
- GlobalIdx = Builder.CreateSub(Idx, NormalizedIdx, "gep.ridx");
-
- Value *SclrGep = Builder.CreateGEP(II.StartValue, GlobalIdx,
- "next.gep");
+ Value *GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx);
+ Value *SclrGep = II.transform(Builder, GlobalIdx);
+ SclrGep->setName("next.gep");
VecVal = Builder.CreateInsertElement(VecVal, SclrGep,
Builder.getInt32(i),
"insert.gep");
@@ -3246,7 +2994,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
// Nothing to do for PHIs and BR, since we already took care of the
// loop control flow instructions.
continue;
- case Instruction::PHI:{
+ case Instruction::PHI: {
// Vectorize PHINodes.
widenPHIInstruction(it, Entry, UF, VF, PV);
continue;
@@ -3367,8 +3115,12 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
Value *ScalarCast = Builder.CreateCast(CI->getOpcode(), Induction,
CI->getType());
Value *Broadcasted = getBroadcastInstrs(ScalarCast);
+ LoopVectorizationLegality::InductionInfo II =
+ Legal->getInductionVars()->lookup(OldInduction);
+ Constant *Step =
+ ConstantInt::getSigned(CI->getType(), II.StepValue->getSExtValue());
for (unsigned Part = 0; Part < UF; ++Part)
- Entry[Part] = getConsecutiveVector(Broadcasted, VF * Part, false);
+ Entry[Part] = getStepVector(Broadcasted, VF * Part, Step);
propagateMetadata(Entry, it);
break;
}
@@ -3391,37 +3143,71 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
Module *M = BB->getParent()->getParent();
CallInst *CI = cast<CallInst>(it);
+
+ StringRef FnName = CI->getCalledFunction()->getName();
+ Function *F = CI->getCalledFunction();
+ Type *RetTy = ToVectorTy(CI->getType(), VF);
+ SmallVector<Type *, 4> Tys;
+ for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i)
+ Tys.push_back(ToVectorTy(CI->getArgOperand(i)->getType(), VF));
+
Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI);
- assert(ID && "Not an intrinsic call!");
- switch (ID) {
- case Intrinsic::assume:
- case Intrinsic::lifetime_end:
- case Intrinsic::lifetime_start:
+ if (ID &&
+ (ID == Intrinsic::assume || ID == Intrinsic::lifetime_end ||
+ ID == Intrinsic::lifetime_start)) {
scalarizeInstruction(it);
break;
- default:
- bool HasScalarOpd = hasVectorInstrinsicScalarOpd(ID, 1);
- for (unsigned Part = 0; Part < UF; ++Part) {
- SmallVector<Value *, 4> Args;
- for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) {
- if (HasScalarOpd && i == 1) {
- Args.push_back(CI->getArgOperand(i));
- continue;
- }
- VectorParts &Arg = getVectorValue(CI->getArgOperand(i));
- Args.push_back(Arg[Part]);
- }
- Type *Tys[] = {CI->getType()};
- if (VF > 1)
- Tys[0] = VectorType::get(CI->getType()->getScalarType(), VF);
+ }
+ // The flag shows whether we use Intrinsic or a usual Call for vectorized
+ // version of the instruction.
+ // Is it beneficial to perform intrinsic call compared to lib call?
+ bool NeedToScalarize;
+ unsigned CallCost = getVectorCallCost(CI, VF, *TTI, TLI, NeedToScalarize);
+ bool UseVectorIntrinsic =
+ ID && getVectorIntrinsicCost(CI, VF, *TTI, TLI) <= CallCost;
+ if (!UseVectorIntrinsic && NeedToScalarize) {
+ scalarizeInstruction(it);
+ break;
+ }
- Function *F = Intrinsic::getDeclaration(M, ID, Tys);
- Entry[Part] = Builder.CreateCall(F, Args);
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ SmallVector<Value *, 4> Args;
+ for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) {
+ Value *Arg = CI->getArgOperand(i);
+ // Some intrinsics have a scalar argument - don't replace it with a
+ // vector.
+ if (!UseVectorIntrinsic || !hasVectorInstrinsicScalarOpd(ID, i)) {
+ VectorParts &VectorArg = getVectorValue(CI->getArgOperand(i));
+ Arg = VectorArg[Part];
+ }
+ Args.push_back(Arg);
}
- propagateMetadata(Entry, it);
- break;
+ Function *VectorF;
+ if (UseVectorIntrinsic) {
+ // Use vector version of the intrinsic.
+ Type *TysForDecl[] = {CI->getType()};
+ if (VF > 1)
+ TysForDecl[0] = VectorType::get(CI->getType()->getScalarType(), VF);
+ VectorF = Intrinsic::getDeclaration(M, ID, TysForDecl);
+ } else {
+ // Use vector version of the library call.
+ StringRef VFnName = TLI->getVectorizedFunction(FnName, VF);
+ assert(!VFnName.empty() && "Vector function name is empty.");
+ VectorF = M->getFunction(VFnName);
+ if (!VectorF) {
+ // Generate a declaration
+ FunctionType *FTy = FunctionType::get(RetTy, Tys, false);
+ VectorF =
+ Function::Create(FTy, Function::ExternalLinkage, VFnName, M);
+ VectorF->copyAttributesFrom(F);
+ }
+ }
+ assert(VectorF && "Can't create vector function.");
+ Entry[Part] = Builder.CreateCall(VectorF, Args);
}
+
+ propagateMetadata(Entry, it);
break;
}
@@ -3484,7 +3270,7 @@ static bool canIfConvertPHINodes(BasicBlock *BB) {
bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
if (!EnableIfConversion) {
- emitAnalysis(Report() << "if-conversion is disabled");
+ emitAnalysis(VectorizationReport() << "if-conversion is disabled");
return false;
}
@@ -3517,7 +3303,7 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
// We don't support switch statements inside loops.
if (!isa<BranchInst>(BB->getTerminator())) {
- emitAnalysis(Report(BB->getTerminator())
+ emitAnalysis(VectorizationReport(BB->getTerminator())
<< "loop contains a switch statement");
return false;
}
@@ -3525,12 +3311,12 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
// We must be able to predicate all blocks that need to be predicated.
if (blockNeedsPredication(BB)) {
if (!blockCanBePredicated(BB, SafePointes)) {
- emitAnalysis(Report(BB->getTerminator())
+ emitAnalysis(VectorizationReport(BB->getTerminator())
<< "control flow cannot be substituted for a select");
return false;
}
} else if (BB != Header && !canIfConvertPHINodes(BB)) {
- emitAnalysis(Report(BB->getTerminator())
+ emitAnalysis(VectorizationReport(BB->getTerminator())
<< "control flow cannot be substituted for a select");
return false;
}
@@ -3545,27 +3331,30 @@ bool LoopVectorizationLegality::canVectorize() {
// be canonicalized.
if (!TheLoop->getLoopPreheader()) {
emitAnalysis(
- Report() << "loop control flow is not understood by vectorizer");
+ VectorizationReport() <<
+ "loop control flow is not understood by vectorizer");
return false;
}
// We can only vectorize innermost loops.
- if (TheLoop->getSubLoopsVector().size()) {
- emitAnalysis(Report() << "loop is not the innermost loop");
+ if (!TheLoop->getSubLoopsVector().empty()) {
+ emitAnalysis(VectorizationReport() << "loop is not the innermost loop");
return false;
}
// We must have a single backedge.
if (TheLoop->getNumBackEdges() != 1) {
emitAnalysis(
- Report() << "loop control flow is not understood by vectorizer");
+ VectorizationReport() <<
+ "loop control flow is not understood by vectorizer");
return false;
}
// We must have a single exiting block.
if (!TheLoop->getExitingBlock()) {
emitAnalysis(
- Report() << "loop control flow is not understood by vectorizer");
+ VectorizationReport() <<
+ "loop control flow is not understood by vectorizer");
return false;
}
@@ -3574,7 +3363,8 @@ bool LoopVectorizationLegality::canVectorize() {
// instructions in the loop are executed the same number of times.
if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
emitAnalysis(
- Report() << "loop control flow is not understood by vectorizer");
+ VectorizationReport() <<
+ "loop control flow is not understood by vectorizer");
return false;
}
@@ -3592,7 +3382,8 @@ bool LoopVectorizationLegality::canVectorize() {
// ScalarEvolution needs to be able to find the exit count.
const SCEV *ExitCount = SE->getBackedgeTakenCount(TheLoop);
if (ExitCount == SE->getCouldNotCompute()) {
- emitAnalysis(Report() << "could not determine number of loop iterations");
+ emitAnalysis(VectorizationReport() <<
+ "could not determine number of loop iterations");
DEBUG(dbgs() << "LV: SCEV could not compute the loop exit count.\n");
return false;
}
@@ -3613,7 +3404,8 @@ bool LoopVectorizationLegality::canVectorize() {
collectLoopUniforms();
DEBUG(dbgs() << "LV: We can vectorize this loop" <<
- (PtrRtCheck.Need ? " (with a runtime bound check)" : "")
+ (LAI->getRuntimePointerCheck()->Need ? " (with a runtime bound check)" :
+ "")
<<"!\n");
// Okay! We can vectorize. At this point we don't have any other mem analysis
@@ -3667,10 +3459,10 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// Look for the attribute signaling the absence of NaNs.
Function &F = *Header->getParent();
+ const DataLayout &DL = F.getParent()->getDataLayout();
if (F.hasFnAttribute("no-nans-fp-math"))
- HasFunNoNaNAttr = F.getAttributes().getAttribute(
- AttributeSet::FunctionIndex,
- "no-nans-fp-math").getValueAsString() == "true";
+ HasFunNoNaNAttr =
+ F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
// For each block in the loop.
for (Loop::block_iterator bb = TheLoop->block_begin(),
@@ -3686,7 +3478,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
if (!PhiTy->isIntegerTy() &&
!PhiTy->isFloatingPointTy() &&
!PhiTy->isPointerTy()) {
- emitAnalysis(Report(it)
+ emitAnalysis(VectorizationReport(it)
<< "loop control flow is not understood by vectorizer");
DEBUG(dbgs() << "LV: Found an non-int non-pointer PHI.\n");
return false;
@@ -3700,14 +3492,15 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// identified reduction value with an outside user.
if (!hasOutsideLoopUser(TheLoop, it, AllowedExit))
continue;
- emitAnalysis(Report(it) << "value could not be identified as "
- "an induction or reduction variable");
+ emitAnalysis(VectorizationReport(it) <<
+ "value could not be identified as "
+ "an induction or reduction variable");
return false;
}
// We only allow if-converted PHIs with exactly two incoming values.
if (Phi->getNumIncomingValues() != 2) {
- emitAnalysis(Report(it)
+ emitAnalysis(VectorizationReport(it)
<< "control flow not understood by vectorizer");
DEBUG(dbgs() << "LV: Found an invalid PHI.\n");
return false;
@@ -3715,18 +3508,19 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// This is the value coming from the preheader.
Value *StartValue = Phi->getIncomingValueForBlock(PreHeader);
+ ConstantInt *StepValue = nullptr;
// Check if this is an induction variable.
- InductionKind IK = isInductionVariable(Phi);
+ InductionKind IK = isInductionVariable(Phi, StepValue);
if (IK_NoInduction != IK) {
// Get the widest type.
if (!WidestIndTy)
- WidestIndTy = convertPointerToIntegerType(*DL, PhiTy);
+ WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
else
- WidestIndTy = getWiderType(*DL, PhiTy, WidestIndTy);
+ WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
// Int inductions are special because we only allow one IV.
- if (IK == IK_IntInduction) {
+ if (IK == IK_IntInduction && StepValue->isOne()) {
// Use the phi node with the widest type as induction. Use the last
// one if there are multiple (no good reason for doing this other
// than it is expedient).
@@ -3735,69 +3529,44 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
}
DEBUG(dbgs() << "LV: Found an induction variable.\n");
- Inductions[Phi] = InductionInfo(StartValue, IK);
+ Inductions[Phi] = InductionInfo(StartValue, IK, StepValue);
// Until we explicitly handle the case of an induction variable with
// an outside loop user we have to give up vectorizing this loop.
if (hasOutsideLoopUser(TheLoop, it, AllowedExit)) {
- emitAnalysis(Report(it) << "use of induction value outside of the "
- "loop is not handled by vectorizer");
+ emitAnalysis(VectorizationReport(it) <<
+ "use of induction value outside of the "
+ "loop is not handled by vectorizer");
return false;
}
continue;
}
- if (AddReductionVar(Phi, RK_IntegerAdd)) {
- DEBUG(dbgs() << "LV: Found an ADD reduction PHI."<< *Phi <<"\n");
- continue;
- }
- if (AddReductionVar(Phi, RK_IntegerMult)) {
- DEBUG(dbgs() << "LV: Found a MUL reduction PHI."<< *Phi <<"\n");
- continue;
- }
- if (AddReductionVar(Phi, RK_IntegerOr)) {
- DEBUG(dbgs() << "LV: Found an OR reduction PHI."<< *Phi <<"\n");
- continue;
- }
- if (AddReductionVar(Phi, RK_IntegerAnd)) {
- DEBUG(dbgs() << "LV: Found an AND reduction PHI."<< *Phi <<"\n");
- continue;
- }
- if (AddReductionVar(Phi, RK_IntegerXor)) {
- DEBUG(dbgs() << "LV: Found a XOR reduction PHI."<< *Phi <<"\n");
- continue;
- }
- if (AddReductionVar(Phi, RK_IntegerMinMax)) {
- DEBUG(dbgs() << "LV: Found a MINMAX reduction PHI."<< *Phi <<"\n");
- continue;
- }
- if (AddReductionVar(Phi, RK_FloatMult)) {
- DEBUG(dbgs() << "LV: Found an FMult reduction PHI."<< *Phi <<"\n");
- continue;
- }
- if (AddReductionVar(Phi, RK_FloatAdd)) {
- DEBUG(dbgs() << "LV: Found an FAdd reduction PHI."<< *Phi <<"\n");
- continue;
- }
- if (AddReductionVar(Phi, RK_FloatMinMax)) {
- DEBUG(dbgs() << "LV: Found an float MINMAX reduction PHI."<< *Phi <<
- "\n");
+ if (ReductionDescriptor::isReductionPHI(Phi, TheLoop,
+ Reductions[Phi])) {
+ AllowedExit.insert(Reductions[Phi].getLoopExitInstr());
continue;
}
- emitAnalysis(Report(it) << "value that could not be identified as "
- "reduction is used outside the loop");
+ emitAnalysis(VectorizationReport(it) <<
+ "value that could not be identified as "
+ "reduction is used outside the loop");
DEBUG(dbgs() << "LV: Found an unidentified PHI."<< *Phi <<"\n");
return false;
}// end of PHI handling
- // We still don't handle functions. However, we can ignore dbg intrinsic
- // calls and we do handle certain intrinsic and libm functions.
+ // We handle calls that:
+ // * Are debug info intrinsics.
+ // * Have a mapping to an IR intrinsic.
+ // * Have a vector version available.
CallInst *CI = dyn_cast<CallInst>(it);
- if (CI && !getIntrinsicIDForCall(CI, TLI) && !isa<DbgInfoIntrinsic>(CI)) {
- emitAnalysis(Report(it) << "call instruction cannot be vectorized");
- DEBUG(dbgs() << "LV: Found a call site.\n");
+ if (CI && !getIntrinsicIDForCall(CI, TLI) && !isa<DbgInfoIntrinsic>(CI) &&
+ !(CI->getCalledFunction() && TLI &&
+ TLI->isFunctionVectorizable(CI->getCalledFunction()->getName()))) {
+ emitAnalysis(VectorizationReport(it) <<
+ "call instruction cannot be vectorized");
+ DEBUG(dbgs() << "LV: Found a non-intrinsic, non-libfunc callsite.\n");
return false;
}
@@ -3806,7 +3575,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
if (CI &&
hasVectorInstrinsicScalarOpd(getIntrinsicIDForCall(CI, TLI), 1)) {
if (!SE->isLoopInvariant(SE->getSCEV(CI->getOperand(1)), TheLoop)) {
- emitAnalysis(Report(it)
+ emitAnalysis(VectorizationReport(it)
<< "intrinsic instruction cannot be vectorized");
DEBUG(dbgs() << "LV: Found unvectorizable intrinsic " << *CI << "\n");
return false;
@@ -3817,7 +3586,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// Also, we can't vectorize extractelement instructions.
if ((!VectorType::isValidElementType(it->getType()) &&
!it->getType()->isVoidTy()) || isa<ExtractElementInst>(it)) {
- emitAnalysis(Report(it)
+ emitAnalysis(VectorizationReport(it)
<< "instruction return type cannot be vectorized");
DEBUG(dbgs() << "LV: Found unvectorizable type.\n");
return false;
@@ -3827,7 +3596,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
if (StoreInst *ST = dyn_cast<StoreInst>(it)) {
Type *T = ST->getValueOperand()->getType();
if (!VectorType::isValidElementType(T)) {
- emitAnalysis(Report(ST) << "store instruction cannot be vectorized");
+ emitAnalysis(VectorizationReport(ST) <<
+ "store instruction cannot be vectorized");
return false;
}
if (EnableMemAccessVersioning)
@@ -3841,7 +3611,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// Reduction instructions are allowed to have exit users.
// All other instructions must not have external users.
if (hasOutsideLoopUser(TheLoop, it, AllowedExit)) {
- emitAnalysis(Report(it) << "value cannot be used outside the loop");
+ emitAnalysis(VectorizationReport(it) <<
+ "value cannot be used outside the loop");
return false;
}
@@ -3852,7 +3623,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
if (!Induction) {
DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
if (Inductions.empty()) {
- emitAnalysis(Report()
+ emitAnalysis(VectorizationReport()
<< "loop induction variable could not be identified");
return false;
}
@@ -3863,13 +3634,12 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
///\brief Remove GEPs whose indices but the last one are loop invariant and
/// return the induction operand of the gep pointer.
-static Value *stripGetElementPtr(Value *Ptr, ScalarEvolution *SE,
- const DataLayout *DL, Loop *Lp) {
+static Value *stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp) {
GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
if (!GEP)
return Ptr;
- unsigned InductionOperand = getGEPInductionOperand(DL, GEP);
+ unsigned InductionOperand = getGEPInductionOperand(GEP);
// Check that all of the gep indices are uniform except for our induction
// operand.
@@ -3898,8 +3668,7 @@ static Value *getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty) {
///\brief Get the stride of a pointer access in a loop.
/// Looks for symbolic strides "a[i*stride]". Returns the symbolic stride as a
/// pointer to the Value, or null otherwise.
-static Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE,
- const DataLayout *DL, Loop *Lp) {
+static Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp) {
const PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
if (!PtrTy || PtrTy->isAggregateType())
return nullptr;
@@ -3912,7 +3681,7 @@ static Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE,
// The size of the pointer access.
int64_t PtrAccessSize = 1;
- Ptr = stripGetElementPtr(Ptr, SE, DL, Lp);
+ Ptr = stripGetElementPtr(Ptr, SE, Lp);
const SCEV *V = SE->getSCEV(Ptr);
if (Ptr != OrigPtr)
@@ -3931,7 +3700,8 @@ static Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE,
// Strip off the size of access multiplication if we are still analyzing the
// pointer.
if (OrigPtr == Ptr) {
- DL->getTypeAllocSize(PtrTy->getElementType());
+ const DataLayout &DL = Lp->getHeader()->getModule()->getDataLayout();
+ DL.getTypeAllocSize(PtrTy->getElementType());
if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(V)) {
if (M->getOperand(0)->getSCEVType() != scConstant)
return nullptr;
@@ -3983,7 +3753,7 @@ void LoopVectorizationLegality::collectStridedAccess(Value *MemAccess) {
else
return;
- Value *Stride = getStrideFromPointer(Ptr, SE, DL, TheLoop);
+ Value *Stride = getStrideFromPointer(Ptr, SE, TheLoop);
if (!Stride)
return;
@@ -4012,7 +3782,7 @@ void LoopVectorizationLegality::collectLoopUniforms() {
if (I->getType()->isPointerTy() && isConsecutivePtr(I))
Worklist.insert(Worklist.end(), I->op_begin(), I->op_end());
- while (Worklist.size()) {
+ while (!Worklist.empty()) {
Instruction *I = dyn_cast<Instruction>(Worklist.back());
Worklist.pop_back();
@@ -4030,1305 +3800,46 @@ void LoopVectorizationLegality::collectLoopUniforms() {
}
}
-namespace {
-/// \brief Analyses memory accesses in a loop.
-///
-/// Checks whether run time pointer checks are needed and builds sets for data
-/// dependence checking.
-class AccessAnalysis {
-public:
- /// \brief Read or write access location.
- typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
- typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet;
-
- /// \brief Set of potential dependent memory accesses.
- typedef EquivalenceClasses<MemAccessInfo> DepCandidates;
-
- AccessAnalysis(const DataLayout *Dl, AliasAnalysis *AA, DepCandidates &DA) :
- DL(Dl), AST(*AA), DepCands(DA), IsRTCheckNeeded(false) {}
-
- /// \brief Register a load and whether it is only read from.
- void addLoad(AliasAnalysis::Location &Loc, bool IsReadOnly) {
- Value *Ptr = const_cast<Value*>(Loc.Ptr);
- AST.add(Ptr, AliasAnalysis::UnknownSize, Loc.AATags);
- Accesses.insert(MemAccessInfo(Ptr, false));
- if (IsReadOnly)
- ReadOnlyPtr.insert(Ptr);
- }
-
- /// \brief Register a store.
- void addStore(AliasAnalysis::Location &Loc) {
- Value *Ptr = const_cast<Value*>(Loc.Ptr);
- AST.add(Ptr, AliasAnalysis::UnknownSize, Loc.AATags);
- Accesses.insert(MemAccessInfo(Ptr, true));
- }
-
- /// \brief Check whether we can check the pointers at runtime for
- /// non-intersection.
- bool canCheckPtrAtRT(LoopVectorizationLegality::RuntimePointerCheck &RtCheck,
- unsigned &NumComparisons, ScalarEvolution *SE,
- Loop *TheLoop, ValueToValueMap &Strides,
- bool ShouldCheckStride = false);
-
- /// \brief Goes over all memory accesses, checks whether a RT check is needed
- /// and builds sets of dependent accesses.
- void buildDependenceSets() {
- processMemAccesses();
- }
-
- bool isRTCheckNeeded() { return IsRTCheckNeeded; }
-
- bool isDependencyCheckNeeded() { return !CheckDeps.empty(); }
- void resetDepChecks() { CheckDeps.clear(); }
-
- MemAccessInfoSet &getDependenciesToCheck() { return CheckDeps; }
-
-private:
- typedef SetVector<MemAccessInfo> PtrAccessSet;
-
- /// \brief Go over all memory access and check whether runtime pointer checks
- /// are needed /// and build sets of dependency check candidates.
- void processMemAccesses();
-
- /// Set of all accesses.
- PtrAccessSet Accesses;
-
- /// Set of accesses that need a further dependence check.
- MemAccessInfoSet CheckDeps;
-
- /// Set of pointers that are read only.
- SmallPtrSet<Value*, 16> ReadOnlyPtr;
-
- const DataLayout *DL;
-
- /// An alias set tracker to partition the access set by underlying object and
- //intrinsic property (such as TBAA metadata).
- AliasSetTracker AST;
-
- /// Sets of potentially dependent accesses - members of one set share an
- /// underlying pointer. The set "CheckDeps" identfies which sets really need a
- /// dependence check.
- DepCandidates &DepCands;
-
- bool IsRTCheckNeeded;
-};
-
-} // end anonymous namespace
-
-/// \brief Check whether a pointer can participate in a runtime bounds check.
-static bool hasComputableBounds(ScalarEvolution *SE, ValueToValueMap &Strides,
- Value *Ptr) {
- const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, Strides, Ptr);
- const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
- if (!AR)
- return false;
-
- return AR->isAffine();
-}
-
-/// \brief Check the stride of the pointer and ensure that it does not wrap in
-/// the address space.
-static int isStridedPtr(ScalarEvolution *SE, const DataLayout *DL, Value *Ptr,
- const Loop *Lp, ValueToValueMap &StridesMap);
-
-bool AccessAnalysis::canCheckPtrAtRT(
- LoopVectorizationLegality::RuntimePointerCheck &RtCheck,
- unsigned &NumComparisons, ScalarEvolution *SE, Loop *TheLoop,
- ValueToValueMap &StridesMap, bool ShouldCheckStride) {
- // Find pointers with computable bounds. We are going to use this information
- // to place a runtime bound check.
- bool CanDoRT = true;
-
- bool IsDepCheckNeeded = isDependencyCheckNeeded();
- NumComparisons = 0;
-
- // We assign a consecutive id to access from different alias sets.
- // Accesses between different groups doesn't need to be checked.
- unsigned ASId = 1;
- for (auto &AS : AST) {
- unsigned NumReadPtrChecks = 0;
- unsigned NumWritePtrChecks = 0;
-
- // We assign consecutive id to access from different dependence sets.
- // Accesses within the same set don't need a runtime check.
- unsigned RunningDepId = 1;
- DenseMap<Value *, unsigned> DepSetId;
-
- for (auto A : AS) {
- Value *Ptr = A.getValue();
- bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true));
- MemAccessInfo Access(Ptr, IsWrite);
-
- if (IsWrite)
- ++NumWritePtrChecks;
- else
- ++NumReadPtrChecks;
-
- if (hasComputableBounds(SE, StridesMap, Ptr) &&
- // When we run after a failing dependency check we have to make sure we
- // don't have wrapping pointers.
- (!ShouldCheckStride ||
- isStridedPtr(SE, DL, Ptr, TheLoop, StridesMap) == 1)) {
- // The id of the dependence set.
- unsigned DepId;
-
- if (IsDepCheckNeeded) {
- Value *Leader = DepCands.getLeaderValue(Access).getPointer();
- unsigned &LeaderId = DepSetId[Leader];
- if (!LeaderId)
- LeaderId = RunningDepId++;
- DepId = LeaderId;
- } else
- // Each access has its own dependence set.
- DepId = RunningDepId++;
-
- RtCheck.insert(SE, TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap);
-
- DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *Ptr << '\n');
- } else {
- CanDoRT = false;
- }
- }
-
- if (IsDepCheckNeeded && CanDoRT && RunningDepId == 2)
- NumComparisons += 0; // Only one dependence set.
- else {
- NumComparisons += (NumWritePtrChecks * (NumReadPtrChecks +
- NumWritePtrChecks - 1));
- }
-
- ++ASId;
- }
-
- // If the pointers that we would use for the bounds comparison have different
- // address spaces, assume the values aren't directly comparable, so we can't
- // use them for the runtime check. We also have to assume they could
- // overlap. In the future there should be metadata for whether address spaces
- // are disjoint.
- unsigned NumPointers = RtCheck.Pointers.size();
- for (unsigned i = 0; i < NumPointers; ++i) {
- for (unsigned j = i + 1; j < NumPointers; ++j) {
- // Only need to check pointers between two different dependency sets.
- if (RtCheck.DependencySetId[i] == RtCheck.DependencySetId[j])
- continue;
- // Only need to check pointers in the same alias set.
- if (RtCheck.AliasSetId[i] != RtCheck.AliasSetId[j])
- continue;
-
- Value *PtrI = RtCheck.Pointers[i];
- Value *PtrJ = RtCheck.Pointers[j];
-
- unsigned ASi = PtrI->getType()->getPointerAddressSpace();
- unsigned ASj = PtrJ->getType()->getPointerAddressSpace();
- if (ASi != ASj) {
- DEBUG(dbgs() << "LV: Runtime check would require comparison between"
- " different address spaces\n");
- return false;
- }
- }
- }
-
- return CanDoRT;
-}
-
-void AccessAnalysis::processMemAccesses() {
- // We process the set twice: first we process read-write pointers, last we
- // process read-only pointers. This allows us to skip dependence tests for
- // read-only pointers.
-
- DEBUG(dbgs() << "LV: Processing memory accesses...\n");
- DEBUG(dbgs() << " AST: "; AST.dump());
- DEBUG(dbgs() << "LV: Accesses:\n");
- DEBUG({
- for (auto A : Accesses)
- dbgs() << "\t" << *A.getPointer() << " (" <<
- (A.getInt() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ?
- "read-only" : "read")) << ")\n";
- });
-
- // The AliasSetTracker has nicely partitioned our pointers by metadata
- // compatibility and potential for underlying-object overlap. As a result, we
- // only need to check for potential pointer dependencies within each alias
- // set.
- for (auto &AS : AST) {
- // Note that both the alias-set tracker and the alias sets themselves used
- // linked lists internally and so the iteration order here is deterministic
- // (matching the original instruction order within each set).
-
- bool SetHasWrite = false;
-
- // Map of pointers to last access encountered.
- typedef DenseMap<Value*, MemAccessInfo> UnderlyingObjToAccessMap;
- UnderlyingObjToAccessMap ObjToLastAccess;
-
- // Set of access to check after all writes have been processed.
- PtrAccessSet DeferredAccesses;
-
- // Iterate over each alias set twice, once to process read/write pointers,
- // and then to process read-only pointers.
- for (int SetIteration = 0; SetIteration < 2; ++SetIteration) {
- bool UseDeferred = SetIteration > 0;
- PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses;
-
- for (auto AV : AS) {
- Value *Ptr = AV.getValue();
-
- // For a single memory access in AliasSetTracker, Accesses may contain
- // both read and write, and they both need to be handled for CheckDeps.
- for (auto AC : S) {
- if (AC.getPointer() != Ptr)
- continue;
-
- bool IsWrite = AC.getInt();
-
- // If we're using the deferred access set, then it contains only
- // reads.
- bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite;
- if (UseDeferred && !IsReadOnlyPtr)
- continue;
- // Otherwise, the pointer must be in the PtrAccessSet, either as a
- // read or a write.
- assert(((IsReadOnlyPtr && UseDeferred) || IsWrite ||
- S.count(MemAccessInfo(Ptr, false))) &&
- "Alias-set pointer not in the access set?");
-
- MemAccessInfo Access(Ptr, IsWrite);
- DepCands.insert(Access);
-
- // Memorize read-only pointers for later processing and skip them in
- // the first round (they need to be checked after we have seen all
- // write pointers). Note: we also mark pointer that are not
- // consecutive as "read-only" pointers (so that we check
- // "a[b[i]] +="). Hence, we need the second check for "!IsWrite".
- if (!UseDeferred && IsReadOnlyPtr) {
- DeferredAccesses.insert(Access);
- continue;
- }
-
- // If this is a write - check other reads and writes for conflicts. If
- // this is a read only check other writes for conflicts (but only if
- // there is no other write to the ptr - this is an optimization to
- // catch "a[i] = a[i] + " without having to do a dependence check).
- if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) {
- CheckDeps.insert(Access);
- IsRTCheckNeeded = true;
- }
-
- if (IsWrite)
- SetHasWrite = true;
-
- // Create sets of pointers connected by a shared alias set and
- // underlying object.
- typedef SmallVector<Value *, 16> ValueVector;
- ValueVector TempObjects;
- GetUnderlyingObjects(Ptr, TempObjects, DL);
- for (Value *UnderlyingObj : TempObjects) {
- UnderlyingObjToAccessMap::iterator Prev =
- ObjToLastAccess.find(UnderlyingObj);
- if (Prev != ObjToLastAccess.end())
- DepCands.unionSets(Access, Prev->second);
-
- ObjToLastAccess[UnderlyingObj] = Access;
- }
- }
- }
- }
- }
-}
-
-namespace {
-/// \brief Checks memory dependences among accesses to the same underlying
-/// object to determine whether there vectorization is legal or not (and at
-/// which vectorization factor).
-///
-/// This class works under the assumption that we already checked that memory
-/// locations with different underlying pointers are "must-not alias".
-/// We use the ScalarEvolution framework to symbolically evalutate access
-/// functions pairs. Since we currently don't restructure the loop we can rely
-/// on the program order of memory accesses to determine their safety.
-/// At the moment we will only deem accesses as safe for:
-/// * A negative constant distance assuming program order.
-///
-/// Safe: tmp = a[i + 1]; OR a[i + 1] = x;
-/// a[i] = tmp; y = a[i];
-///
-/// The latter case is safe because later checks guarantuee that there can't
-/// be a cycle through a phi node (that is, we check that "x" and "y" is not
-/// the same variable: a header phi can only be an induction or a reduction, a
-/// reduction can't have a memory sink, an induction can't have a memory
-/// source). This is important and must not be violated (or we have to
-/// resort to checking for cycles through memory).
-///
-/// * A positive constant distance assuming program order that is bigger
-/// than the biggest memory access.
-///
-/// tmp = a[i] OR b[i] = x
-/// a[i+2] = tmp y = b[i+2];
-///
-/// Safe distance: 2 x sizeof(a[0]), and 2 x sizeof(b[0]), respectively.
-///
-/// * Zero distances and all accesses have the same size.
-///
-class MemoryDepChecker {
-public:
- typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
- typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet;
-
- MemoryDepChecker(ScalarEvolution *Se, const DataLayout *Dl, const Loop *L)
- : SE(Se), DL(Dl), InnermostLoop(L), AccessIdx(0),
- ShouldRetryWithRuntimeCheck(false) {}
-
- /// \brief Register the location (instructions are given increasing numbers)
- /// of a write access.
- void addAccess(StoreInst *SI) {
- Value *Ptr = SI->getPointerOperand();
- Accesses[MemAccessInfo(Ptr, true)].push_back(AccessIdx);
- InstMap.push_back(SI);
- ++AccessIdx;
- }
-
- /// \brief Register the location (instructions are given increasing numbers)
- /// of a write access.
- void addAccess(LoadInst *LI) {
- Value *Ptr = LI->getPointerOperand();
- Accesses[MemAccessInfo(Ptr, false)].push_back(AccessIdx);
- InstMap.push_back(LI);
- ++AccessIdx;
- }
-
- /// \brief Check whether the dependencies between the accesses are safe.
- ///
- /// Only checks sets with elements in \p CheckDeps.
- bool areDepsSafe(AccessAnalysis::DepCandidates &AccessSets,
- MemAccessInfoSet &CheckDeps, ValueToValueMap &Strides);
-
- /// \brief The maximum number of bytes of a vector register we can vectorize
- /// the accesses safely with.
- unsigned getMaxSafeDepDistBytes() { return MaxSafeDepDistBytes; }
-
- /// \brief In same cases when the dependency check fails we can still
- /// vectorize the loop with a dynamic array access check.
- bool shouldRetryWithRuntimeCheck() { return ShouldRetryWithRuntimeCheck; }
-
-private:
- ScalarEvolution *SE;
- const DataLayout *DL;
- const Loop *InnermostLoop;
-
- /// \brief Maps access locations (ptr, read/write) to program order.
- DenseMap<MemAccessInfo, std::vector<unsigned> > Accesses;
-
- /// \brief Memory access instructions in program order.
- SmallVector<Instruction *, 16> InstMap;
-
- /// \brief The program order index to be used for the next instruction.
- unsigned AccessIdx;
-
- // We can access this many bytes in parallel safely.
- unsigned MaxSafeDepDistBytes;
-
- /// \brief If we see a non-constant dependence distance we can still try to
- /// vectorize this loop with runtime checks.
- bool ShouldRetryWithRuntimeCheck;
-
- /// \brief Check whether there is a plausible dependence between the two
- /// accesses.
- ///
- /// Access \p A must happen before \p B in program order. The two indices
- /// identify the index into the program order map.
- ///
- /// This function checks whether there is a plausible dependence (or the
- /// absence of such can't be proved) between the two accesses. If there is a
- /// plausible dependence but the dependence distance is bigger than one
- /// element access it records this distance in \p MaxSafeDepDistBytes (if this
- /// distance is smaller than any other distance encountered so far).
- /// Otherwise, this function returns true signaling a possible dependence.
- bool isDependent(const MemAccessInfo &A, unsigned AIdx,
- const MemAccessInfo &B, unsigned BIdx,
- ValueToValueMap &Strides);
-
- /// \brief Check whether the data dependence could prevent store-load
- /// forwarding.
- bool couldPreventStoreLoadForward(unsigned Distance, unsigned TypeByteSize);
-};
-
-} // end anonymous namespace
-
-static bool isInBoundsGep(Value *Ptr) {
- if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr))
- return GEP->isInBounds();
- return false;
-}
-
-/// \brief Check whether the access through \p Ptr has a constant stride.
-static int isStridedPtr(ScalarEvolution *SE, const DataLayout *DL, Value *Ptr,
- const Loop *Lp, ValueToValueMap &StridesMap) {
- const Type *Ty = Ptr->getType();
- assert(Ty->isPointerTy() && "Unexpected non-ptr");
-
- // Make sure that the pointer does not point to aggregate types.
- const PointerType *PtrTy = cast<PointerType>(Ty);
- if (PtrTy->getElementType()->isAggregateType()) {
- DEBUG(dbgs() << "LV: Bad stride - Not a pointer to a scalar type" << *Ptr <<
- "\n");
- return 0;
- }
-
- const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, StridesMap, Ptr);
-
- const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
- if (!AR) {
- DEBUG(dbgs() << "LV: Bad stride - Not an AddRecExpr pointer "
- << *Ptr << " SCEV: " << *PtrScev << "\n");
- return 0;
- }
-
- // The accesss function must stride over the innermost loop.
- if (Lp != AR->getLoop()) {
- DEBUG(dbgs() << "LV: Bad stride - Not striding over innermost loop " <<
- *Ptr << " SCEV: " << *PtrScev << "\n");
- }
-
- // The address calculation must not wrap. Otherwise, a dependence could be
- // inverted.
- // An inbounds getelementptr that is a AddRec with a unit stride
- // cannot wrap per definition. The unit stride requirement is checked later.
- // An getelementptr without an inbounds attribute and unit stride would have
- // to access the pointer value "0" which is undefined behavior in address
- // space 0, therefore we can also vectorize this case.
- bool IsInBoundsGEP = isInBoundsGep(Ptr);
- bool IsNoWrapAddRec = AR->getNoWrapFlags(SCEV::NoWrapMask);
- bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0;
- if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) {
- DEBUG(dbgs() << "LV: Bad stride - Pointer may wrap in the address space "
- << *Ptr << " SCEV: " << *PtrScev << "\n");
- return 0;
- }
-
- // Check the step is constant.
- const SCEV *Step = AR->getStepRecurrence(*SE);
-
- // Calculate the pointer stride and check if it is consecutive.
- const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
- if (!C) {
- DEBUG(dbgs() << "LV: Bad stride - Not a constant strided " << *Ptr <<
- " SCEV: " << *PtrScev << "\n");
- return 0;
- }
-
- int64_t Size = DL->getTypeAllocSize(PtrTy->getElementType());
- const APInt &APStepVal = C->getValue()->getValue();
-
- // Huge step value - give up.
- if (APStepVal.getBitWidth() > 64)
- return 0;
-
- int64_t StepVal = APStepVal.getSExtValue();
-
- // Strided access.
- int64_t Stride = StepVal / Size;
- int64_t Rem = StepVal % Size;
- if (Rem)
- return 0;
-
- // If the SCEV could wrap but we have an inbounds gep with a unit stride we
- // know we can't "wrap around the address space". In case of address space
- // zero we know that this won't happen without triggering undefined behavior.
- if (!IsNoWrapAddRec && (IsInBoundsGEP || IsInAddressSpaceZero) &&
- Stride != 1 && Stride != -1)
- return 0;
-
- return Stride;
-}
-
-bool MemoryDepChecker::couldPreventStoreLoadForward(unsigned Distance,
- unsigned TypeByteSize) {
- // If loads occur at a distance that is not a multiple of a feasible vector
- // factor store-load forwarding does not take place.
- // Positive dependences might cause troubles because vectorizing them might
- // prevent store-load forwarding making vectorized code run a lot slower.
- // a[i] = a[i-3] ^ a[i-8];
- // The stores to a[i:i+1] don't align with the stores to a[i-3:i-2] and
- // hence on your typical architecture store-load forwarding does not take
- // place. Vectorizing in such cases does not make sense.
- // Store-load forwarding distance.
- const unsigned NumCyclesForStoreLoadThroughMemory = 8*TypeByteSize;
- // Maximum vector factor.
- unsigned MaxVFWithoutSLForwardIssues = MaxVectorWidth*TypeByteSize;
- if(MaxSafeDepDistBytes < MaxVFWithoutSLForwardIssues)
- MaxVFWithoutSLForwardIssues = MaxSafeDepDistBytes;
-
- for (unsigned vf = 2*TypeByteSize; vf <= MaxVFWithoutSLForwardIssues;
- vf *= 2) {
- if (Distance % vf && Distance / vf < NumCyclesForStoreLoadThroughMemory) {
- MaxVFWithoutSLForwardIssues = (vf >>=1);
- break;
- }
- }
-
- if (MaxVFWithoutSLForwardIssues< 2*TypeByteSize) {
- DEBUG(dbgs() << "LV: Distance " << Distance <<
- " that could cause a store-load forwarding conflict\n");
- return true;
- }
-
- if (MaxVFWithoutSLForwardIssues < MaxSafeDepDistBytes &&
- MaxVFWithoutSLForwardIssues != MaxVectorWidth*TypeByteSize)
- MaxSafeDepDistBytes = MaxVFWithoutSLForwardIssues;
- return false;
-}
-
-bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
- const MemAccessInfo &B, unsigned BIdx,
- ValueToValueMap &Strides) {
- assert (AIdx < BIdx && "Must pass arguments in program order");
-
- Value *APtr = A.getPointer();
- Value *BPtr = B.getPointer();
- bool AIsWrite = A.getInt();
- bool BIsWrite = B.getInt();
-
- // Two reads are independent.
- if (!AIsWrite && !BIsWrite)
- return false;
-
- // We cannot check pointers in different address spaces.
- if (APtr->getType()->getPointerAddressSpace() !=
- BPtr->getType()->getPointerAddressSpace())
- return true;
-
- const SCEV *AScev = replaceSymbolicStrideSCEV(SE, Strides, APtr);
- const SCEV *BScev = replaceSymbolicStrideSCEV(SE, Strides, BPtr);
-
- int StrideAPtr = isStridedPtr(SE, DL, APtr, InnermostLoop, Strides);
- int StrideBPtr = isStridedPtr(SE, DL, BPtr, InnermostLoop, Strides);
-
- const SCEV *Src = AScev;
- const SCEV *Sink = BScev;
-
- // If the induction step is negative we have to invert source and sink of the
- // dependence.
- if (StrideAPtr < 0) {
- //Src = BScev;
- //Sink = AScev;
- std::swap(APtr, BPtr);
- std::swap(Src, Sink);
- std::swap(AIsWrite, BIsWrite);
- std::swap(AIdx, BIdx);
- std::swap(StrideAPtr, StrideBPtr);
- }
-
- const SCEV *Dist = SE->getMinusSCEV(Sink, Src);
-
- DEBUG(dbgs() << "LV: Src Scev: " << *Src << "Sink Scev: " << *Sink
- << "(Induction step: " << StrideAPtr << ")\n");
- DEBUG(dbgs() << "LV: Distance for " << *InstMap[AIdx] << " to "
- << *InstMap[BIdx] << ": " << *Dist << "\n");
-
- // Need consecutive accesses. We don't want to vectorize
- // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in
- // the address space.
- if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){
- DEBUG(dbgs() << "Non-consecutive pointer access\n");
- return true;
- }
-
- const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
- if (!C) {
- DEBUG(dbgs() << "LV: Dependence because of non-constant distance\n");
- ShouldRetryWithRuntimeCheck = true;
- return true;
- }
-
- Type *ATy = APtr->getType()->getPointerElementType();
- Type *BTy = BPtr->getType()->getPointerElementType();
- unsigned TypeByteSize = DL->getTypeAllocSize(ATy);
-
- // Negative distances are not plausible dependencies.
- const APInt &Val = C->getValue()->getValue();
- if (Val.isNegative()) {
- bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
- if (IsTrueDataDependence &&
- (couldPreventStoreLoadForward(Val.abs().getZExtValue(), TypeByteSize) ||
- ATy != BTy))
- return true;
-
- DEBUG(dbgs() << "LV: Dependence is negative: NoDep\n");
- return false;
- }
-
- // Write to the same location with the same size.
- // Could be improved to assert type sizes are the same (i32 == float, etc).
- if (Val == 0) {
- if (ATy == BTy)
- return false;
- DEBUG(dbgs() << "LV: Zero dependence difference but different types\n");
- return true;
- }
-
- assert(Val.isStrictlyPositive() && "Expect a positive value");
-
- // Positive distance bigger than max vectorization factor.
- if (ATy != BTy) {
- DEBUG(dbgs() <<
- "LV: ReadWrite-Write positive dependency with different types\n");
- return false;
- }
-
- unsigned Distance = (unsigned) Val.getZExtValue();
-
- // Bail out early if passed-in parameters make vectorization not feasible.
- unsigned ForcedFactor = VectorizationFactor ? VectorizationFactor : 1;
- unsigned ForcedUnroll = VectorizationInterleave ? VectorizationInterleave : 1;
-
- // The distance must be bigger than the size needed for a vectorized version
- // of the operation and the size of the vectorized operation must not be
- // bigger than the currrent maximum size.
- if (Distance < 2*TypeByteSize ||
- 2*TypeByteSize > MaxSafeDepDistBytes ||
- Distance < TypeByteSize * ForcedUnroll * ForcedFactor) {
- DEBUG(dbgs() << "LV: Failure because of Positive distance "
- << Val.getSExtValue() << '\n');
- return true;
- }
-
- MaxSafeDepDistBytes = Distance < MaxSafeDepDistBytes ?
- Distance : MaxSafeDepDistBytes;
-
- bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
- if (IsTrueDataDependence &&
- couldPreventStoreLoadForward(Distance, TypeByteSize))
- return true;
-
- DEBUG(dbgs() << "LV: Positive distance " << Val.getSExtValue() <<
- " with max VF = " << MaxSafeDepDistBytes / TypeByteSize << '\n');
-
- return false;
-}
-
-bool MemoryDepChecker::areDepsSafe(AccessAnalysis::DepCandidates &AccessSets,
- MemAccessInfoSet &CheckDeps,
- ValueToValueMap &Strides) {
-
- MaxSafeDepDistBytes = -1U;
- while (!CheckDeps.empty()) {
- MemAccessInfo CurAccess = *CheckDeps.begin();
-
- // Get the relevant memory access set.
- EquivalenceClasses<MemAccessInfo>::iterator I =
- AccessSets.findValue(AccessSets.getLeaderValue(CurAccess));
-
- // Check accesses within this set.
- EquivalenceClasses<MemAccessInfo>::member_iterator AI, AE;
- AI = AccessSets.member_begin(I), AE = AccessSets.member_end();
-
- // Check every access pair.
- while (AI != AE) {
- CheckDeps.erase(*AI);
- EquivalenceClasses<MemAccessInfo>::member_iterator OI = std::next(AI);
- while (OI != AE) {
- // Check every accessing instruction pair in program order.
- for (std::vector<unsigned>::iterator I1 = Accesses[*AI].begin(),
- I1E = Accesses[*AI].end(); I1 != I1E; ++I1)
- for (std::vector<unsigned>::iterator I2 = Accesses[*OI].begin(),
- I2E = Accesses[*OI].end(); I2 != I2E; ++I2) {
- if (*I1 < *I2 && isDependent(*AI, *I1, *OI, *I2, Strides))
- return false;
- if (*I2 < *I1 && isDependent(*OI, *I2, *AI, *I1, Strides))
- return false;
- }
- ++OI;
- }
- AI++;
- }
- }
- return true;
-}
-
bool LoopVectorizationLegality::canVectorizeMemory() {
-
- typedef SmallVector<Value*, 16> ValueVector;
- typedef SmallPtrSet<Value*, 16> ValueSet;
-
- // Holds the Load and Store *instructions*.
- ValueVector Loads;
- ValueVector Stores;
-
- // Holds all the different accesses in the loop.
- unsigned NumReads = 0;
- unsigned NumReadWrites = 0;
-
- PtrRtCheck.Pointers.clear();
- PtrRtCheck.Need = false;
-
- const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
- MemoryDepChecker DepChecker(SE, DL, TheLoop);
-
- // For each block.
- for (Loop::block_iterator bb = TheLoop->block_begin(),
- be = TheLoop->block_end(); bb != be; ++bb) {
-
- // Scan the BB and collect legal loads and stores.
- for (BasicBlock::iterator it = (*bb)->begin(), e = (*bb)->end(); it != e;
- ++it) {
-
- // If this is a load, save it. If this instruction can read from memory
- // but is not a load, then we quit. Notice that we don't handle function
- // calls that read or write.
- if (it->mayReadFromMemory()) {
- // Many math library functions read the rounding mode. We will only
- // vectorize a loop if it contains known function calls that don't set
- // the flag. Therefore, it is safe to ignore this read from memory.
- CallInst *Call = dyn_cast<CallInst>(it);
- if (Call && getIntrinsicIDForCall(Call, TLI))
- continue;
-
- LoadInst *Ld = dyn_cast<LoadInst>(it);
- if (!Ld || (!Ld->isSimple() && !IsAnnotatedParallel)) {
- emitAnalysis(Report(Ld)
- << "read with atomic ordering or volatile read");
- DEBUG(dbgs() << "LV: Found a non-simple load.\n");
- return false;
- }
- NumLoads++;
- Loads.push_back(Ld);
- DepChecker.addAccess(Ld);
- continue;
- }
-
- // Save 'store' instructions. Abort if other instructions write to memory.
- if (it->mayWriteToMemory()) {
- StoreInst *St = dyn_cast<StoreInst>(it);
- if (!St) {
- emitAnalysis(Report(it) << "instruction cannot be vectorized");
- return false;
- }
- if (!St->isSimple() && !IsAnnotatedParallel) {
- emitAnalysis(Report(St)
- << "write with atomic ordering or volatile write");
- DEBUG(dbgs() << "LV: Found a non-simple store.\n");
- return false;
- }
- NumStores++;
- Stores.push_back(St);
- DepChecker.addAccess(St);
- }
- } // Next instr.
- } // Next block.
-
- // Now we have two lists that hold the loads and the stores.
- // Next, we find the pointers that they use.
-
- // Check if we see any stores. If there are no stores, then we don't
- // care if the pointers are *restrict*.
- if (!Stores.size()) {
- DEBUG(dbgs() << "LV: Found a read-only loop!\n");
- return true;
- }
-
- AccessAnalysis::DepCandidates DependentAccesses;
- AccessAnalysis Accesses(DL, AA, DependentAccesses);
-
- // Holds the analyzed pointers. We don't want to call GetUnderlyingObjects
- // multiple times on the same object. If the ptr is accessed twice, once
- // for read and once for write, it will only appear once (on the write
- // list). This is okay, since we are going to check for conflicts between
- // writes and between reads and writes, but not between reads and reads.
- ValueSet Seen;
-
- ValueVector::iterator I, IE;
- for (I = Stores.begin(), IE = Stores.end(); I != IE; ++I) {
- StoreInst *ST = cast<StoreInst>(*I);
- Value* Ptr = ST->getPointerOperand();
-
- if (isUniform(Ptr)) {
- emitAnalysis(
- Report(ST)
- << "write to a loop invariant address could not be vectorized");
- DEBUG(dbgs() << "LV: We don't allow storing to uniform addresses\n");
- return false;
- }
-
- // If we did *not* see this pointer before, insert it to the read-write
- // list. At this phase it is only a 'write' list.
- if (Seen.insert(Ptr).second) {
- ++NumReadWrites;
-
- AliasAnalysis::Location Loc = AA->getLocation(ST);
- // The TBAA metadata could have a control dependency on the predication
- // condition, so we cannot rely on it when determining whether or not we
- // need runtime pointer checks.
- if (blockNeedsPredication(ST->getParent()))
- Loc.AATags.TBAA = nullptr;
-
- Accesses.addStore(Loc);
- }
- }
-
- if (IsAnnotatedParallel) {
- DEBUG(dbgs()
- << "LV: A loop annotated parallel, ignore memory dependency "
- << "checks.\n");
- return true;
- }
-
- for (I = Loads.begin(), IE = Loads.end(); I != IE; ++I) {
- LoadInst *LD = cast<LoadInst>(*I);
- Value* Ptr = LD->getPointerOperand();
- // If we did *not* see this pointer before, insert it to the
- // read list. If we *did* see it before, then it is already in
- // the read-write list. This allows us to vectorize expressions
- // such as A[i] += x; Because the address of A[i] is a read-write
- // pointer. This only works if the index of A[i] is consecutive.
- // If the address of i is unknown (for example A[B[i]]) then we may
- // read a few words, modify, and write a few words, and some of the
- // words may be written to the same address.
- bool IsReadOnlyPtr = false;
- if (Seen.insert(Ptr).second ||
- !isStridedPtr(SE, DL, Ptr, TheLoop, Strides)) {
- ++NumReads;
- IsReadOnlyPtr = true;
- }
-
- AliasAnalysis::Location Loc = AA->getLocation(LD);
- // The TBAA metadata could have a control dependency on the predication
- // condition, so we cannot rely on it when determining whether or not we
- // need runtime pointer checks.
- if (blockNeedsPredication(LD->getParent()))
- Loc.AATags.TBAA = nullptr;
-
- Accesses.addLoad(Loc, IsReadOnlyPtr);
- }
-
- // If we write (or read-write) to a single destination and there are no
- // other reads in this loop then is it safe to vectorize.
- if (NumReadWrites == 1 && NumReads == 0) {
- DEBUG(dbgs() << "LV: Found a write-only loop!\n");
- return true;
- }
-
- // Build dependence sets and check whether we need a runtime pointer bounds
- // check.
- Accesses.buildDependenceSets();
- bool NeedRTCheck = Accesses.isRTCheckNeeded();
-
- // Find pointers with computable bounds. We are going to use this information
- // to place a runtime bound check.
- unsigned NumComparisons = 0;
- bool CanDoRT = false;
- if (NeedRTCheck)
- CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE, TheLoop,
- Strides);
-
- DEBUG(dbgs() << "LV: We need to do " << NumComparisons <<
- " pointer comparisons.\n");
-
- // If we only have one set of dependences to check pointers among we don't
- // need a runtime check.
- if (NumComparisons == 0 && NeedRTCheck)
- NeedRTCheck = false;
-
- // Check that we did not collect too many pointers or found an unsizeable
- // pointer.
- if (!CanDoRT || NumComparisons > RuntimeMemoryCheckThreshold) {
- PtrRtCheck.reset();
- CanDoRT = false;
- }
-
- if (CanDoRT) {
- DEBUG(dbgs() << "LV: We can perform a memory runtime check if needed.\n");
- }
-
- if (NeedRTCheck && !CanDoRT) {
- emitAnalysis(Report() << "cannot identify array bounds");
- DEBUG(dbgs() << "LV: We can't vectorize because we can't find " <<
- "the array bounds.\n");
- PtrRtCheck.reset();
- return false;
- }
-
- PtrRtCheck.Need = NeedRTCheck;
-
- bool CanVecMem = true;
- if (Accesses.isDependencyCheckNeeded()) {
- DEBUG(dbgs() << "LV: Checking memory dependencies\n");
- CanVecMem = DepChecker.areDepsSafe(
- DependentAccesses, Accesses.getDependenciesToCheck(), Strides);
- MaxSafeDepDistBytes = DepChecker.getMaxSafeDepDistBytes();
-
- if (!CanVecMem && DepChecker.shouldRetryWithRuntimeCheck()) {
- DEBUG(dbgs() << "LV: Retrying with memory checks\n");
- NeedRTCheck = true;
-
- // Clear the dependency checks. We assume they are not needed.
- Accesses.resetDepChecks();
-
- PtrRtCheck.reset();
- PtrRtCheck.Need = true;
-
- CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE,
- TheLoop, Strides, true);
- // Check that we did not collect too many pointers or found an unsizeable
- // pointer.
- if (!CanDoRT || NumComparisons > RuntimeMemoryCheckThreshold) {
- if (!CanDoRT && NumComparisons > 0)
- emitAnalysis(Report()
- << "cannot check memory dependencies at runtime");
- else
- emitAnalysis(Report()
- << NumComparisons << " exceeds limit of "
- << RuntimeMemoryCheckThreshold
- << " dependent memory operations checked at runtime");
- DEBUG(dbgs() << "LV: Can't vectorize with memory checks\n");
- PtrRtCheck.reset();
- return false;
- }
-
- CanVecMem = true;
- }
- }
-
- if (!CanVecMem)
- emitAnalysis(Report() << "unsafe dependent memory operations in loop");
-
- DEBUG(dbgs() << "LV: We" << (NeedRTCheck ? "" : " don't") <<
- " need a runtime memory check.\n");
-
- return CanVecMem;
-}
-
-static bool hasMultipleUsesOf(Instruction *I,
- SmallPtrSetImpl<Instruction *> &Insts) {
- unsigned NumUses = 0;
- for(User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; ++Use) {
- if (Insts.count(dyn_cast<Instruction>(*Use)))
- ++NumUses;
- if (NumUses > 1)
- return true;
- }
-
- return false;
-}
-
-static bool areAllUsesIn(Instruction *I, SmallPtrSetImpl<Instruction *> &Set) {
- for(User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; ++Use)
- if (!Set.count(dyn_cast<Instruction>(*Use)))
- return false;
- return true;
-}
-
-bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
- ReductionKind Kind) {
- if (Phi->getNumIncomingValues() != 2)
+ LAI = &LAA->getInfo(TheLoop, Strides);
+ auto &OptionalReport = LAI->getReport();
+ if (OptionalReport)
+ emitAnalysis(VectorizationReport(*OptionalReport));
+ if (!LAI->canVectorizeMemory())
return false;
- // Reduction variables are only found in the loop header block.
- if (Phi->getParent() != TheLoop->getHeader())
+ if (LAI->hasStoreToLoopInvariantAddress()) {
+ emitAnalysis(
+ VectorizationReport()
+ << "write to a loop invariant address could not be vectorized");
+ DEBUG(dbgs() << "LV: We don't allow storing to uniform addresses\n");
return false;
-
- // Obtain the reduction start value from the value that comes from the loop
- // preheader.
- Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader());
-
- // ExitInstruction is the single value which is used outside the loop.
- // We only allow for a single reduction value to be used outside the loop.
- // This includes users of the reduction, variables (which form a cycle
- // which ends in the phi node).
- Instruction *ExitInstruction = nullptr;
- // Indicates that we found a reduction operation in our scan.
- bool FoundReduxOp = false;
-
- // We start with the PHI node and scan for all of the users of this
- // instruction. All users must be instructions that can be used as reduction
- // variables (such as ADD). We must have a single out-of-block user. The cycle
- // must include the original PHI.
- bool FoundStartPHI = false;
-
- // To recognize min/max patterns formed by a icmp select sequence, we store
- // the number of instruction we saw from the recognized min/max pattern,
- // to make sure we only see exactly the two instructions.
- unsigned NumCmpSelectPatternInst = 0;
- ReductionInstDesc ReduxDesc(false, nullptr);
-
- SmallPtrSet<Instruction *, 8> VisitedInsts;
- SmallVector<Instruction *, 8> Worklist;
- Worklist.push_back(Phi);
- VisitedInsts.insert(Phi);
-
- // A value in the reduction can be used:
- // - By the reduction:
- // - Reduction operation:
- // - One use of reduction value (safe).
- // - Multiple use of reduction value (not safe).
- // - PHI:
- // - All uses of the PHI must be the reduction (safe).
- // - Otherwise, not safe.
- // - By one instruction outside of the loop (safe).
- // - By further instructions outside of the loop (not safe).
- // - By an instruction that is not part of the reduction (not safe).
- // This is either:
- // * An instruction type other than PHI or the reduction operation.
- // * A PHI in the header other than the initial PHI.
- while (!Worklist.empty()) {
- Instruction *Cur = Worklist.back();
- Worklist.pop_back();
-
- // No Users.
- // If the instruction has no users then this is a broken chain and can't be
- // a reduction variable.
- if (Cur->use_empty())
- return false;
-
- bool IsAPhi = isa<PHINode>(Cur);
-
- // A header PHI use other than the original PHI.
- if (Cur != Phi && IsAPhi && Cur->getParent() == Phi->getParent())
- return false;
-
- // Reductions of instructions such as Div, and Sub is only possible if the
- // LHS is the reduction variable.
- if (!Cur->isCommutative() && !IsAPhi && !isa<SelectInst>(Cur) &&
- !isa<ICmpInst>(Cur) && !isa<FCmpInst>(Cur) &&
- !VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0))))
- return false;
-
- // Any reduction instruction must be of one of the allowed kinds.
- ReduxDesc = isReductionInstr(Cur, Kind, ReduxDesc);
- if (!ReduxDesc.IsReduction)
- return false;
-
- // A reduction operation must only have one use of the reduction value.
- if (!IsAPhi && Kind != RK_IntegerMinMax && Kind != RK_FloatMinMax &&
- hasMultipleUsesOf(Cur, VisitedInsts))
- return false;
-
- // All inputs to a PHI node must be a reduction value.
- if(IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts))
- return false;
-
- if (Kind == RK_IntegerMinMax && (isa<ICmpInst>(Cur) ||
- isa<SelectInst>(Cur)))
- ++NumCmpSelectPatternInst;
- if (Kind == RK_FloatMinMax && (isa<FCmpInst>(Cur) ||
- isa<SelectInst>(Cur)))
- ++NumCmpSelectPatternInst;
-
- // Check whether we found a reduction operator.
- FoundReduxOp |= !IsAPhi;
-
- // Process users of current instruction. Push non-PHI nodes after PHI nodes
- // onto the stack. This way we are going to have seen all inputs to PHI
- // nodes once we get to them.
- SmallVector<Instruction *, 8> NonPHIs;
- SmallVector<Instruction *, 8> PHIs;
- for (User *U : Cur->users()) {
- Instruction *UI = cast<Instruction>(U);
-
- // Check if we found the exit user.
- BasicBlock *Parent = UI->getParent();
- if (!TheLoop->contains(Parent)) {
- // Exit if you find multiple outside users or if the header phi node is
- // being used. In this case the user uses the value of the previous
- // iteration, in which case we would loose "VF-1" iterations of the
- // reduction operation if we vectorize.
- if (ExitInstruction != nullptr || Cur == Phi)
- return false;
-
- // The instruction used by an outside user must be the last instruction
- // before we feed back to the reduction phi. Otherwise, we loose VF-1
- // operations on the value.
- if (std::find(Phi->op_begin(), Phi->op_end(), Cur) == Phi->op_end())
- return false;
-
- ExitInstruction = Cur;
- continue;
- }
-
- // Process instructions only once (termination). Each reduction cycle
- // value must only be used once, except by phi nodes and min/max
- // reductions which are represented as a cmp followed by a select.
- ReductionInstDesc IgnoredVal(false, nullptr);
- if (VisitedInsts.insert(UI).second) {
- if (isa<PHINode>(UI))
- PHIs.push_back(UI);
- else
- NonPHIs.push_back(UI);
- } else if (!isa<PHINode>(UI) &&
- ((!isa<FCmpInst>(UI) &&
- !isa<ICmpInst>(UI) &&
- !isa<SelectInst>(UI)) ||
- !isMinMaxSelectCmpPattern(UI, IgnoredVal).IsReduction))
- return false;
-
- // Remember that we completed the cycle.
- if (UI == Phi)
- FoundStartPHI = true;
- }
- Worklist.append(PHIs.begin(), PHIs.end());
- Worklist.append(NonPHIs.begin(), NonPHIs.end());
}
- // This means we have seen one but not the other instruction of the
- // pattern or more than just a select and cmp.
- if ((Kind == RK_IntegerMinMax || Kind == RK_FloatMinMax) &&
- NumCmpSelectPatternInst != 2)
- return false;
-
- if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction)
+ if (LAI->getNumRuntimePointerChecks() >
+ VectorizerParams::RuntimeMemoryCheckThreshold) {
+ emitAnalysis(VectorizationReport()
+ << LAI->getNumRuntimePointerChecks() << " exceeds limit of "
+ << VectorizerParams::RuntimeMemoryCheckThreshold
+ << " dependent memory operations checked at runtime");
+ DEBUG(dbgs() << "LV: Too many memory checks needed.\n");
return false;
-
- // We found a reduction var if we have reached the original phi node and we
- // only have a single instruction with out-of-loop users.
-
- // This instruction is allowed to have out-of-loop users.
- AllowedExit.insert(ExitInstruction);
-
- // Save the description of this reduction variable.
- ReductionDescriptor RD(RdxStart, ExitInstruction, Kind,
- ReduxDesc.MinMaxKind);
- Reductions[Phi] = RD;
- // We've ended the cycle. This is a reduction variable if we have an
- // outside user and it has a binary op.
-
- return true;
-}
-
-/// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction
-/// pattern corresponding to a min(X, Y) or max(X, Y).
-LoopVectorizationLegality::ReductionInstDesc
-LoopVectorizationLegality::isMinMaxSelectCmpPattern(Instruction *I,
- ReductionInstDesc &Prev) {
-
- assert((isa<ICmpInst>(I) || isa<FCmpInst>(I) || isa<SelectInst>(I)) &&
- "Expect a select instruction");
- Instruction *Cmp = nullptr;
- SelectInst *Select = nullptr;
-
- // We must handle the select(cmp()) as a single instruction. Advance to the
- // select.
- if ((Cmp = dyn_cast<ICmpInst>(I)) || (Cmp = dyn_cast<FCmpInst>(I))) {
- if (!Cmp->hasOneUse() || !(Select = dyn_cast<SelectInst>(*I->user_begin())))
- return ReductionInstDesc(false, I);
- return ReductionInstDesc(Select, Prev.MinMaxKind);
- }
-
- // Only handle single use cases for now.
- if (!(Select = dyn_cast<SelectInst>(I)))
- return ReductionInstDesc(false, I);
- if (!(Cmp = dyn_cast<ICmpInst>(I->getOperand(0))) &&
- !(Cmp = dyn_cast<FCmpInst>(I->getOperand(0))))
- return ReductionInstDesc(false, I);
- if (!Cmp->hasOneUse())
- return ReductionInstDesc(false, I);
-
- Value *CmpLeft;
- Value *CmpRight;
-
- // Look for a min/max pattern.
- if (m_UMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
- return ReductionInstDesc(Select, MRK_UIntMin);
- else if (m_UMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
- return ReductionInstDesc(Select, MRK_UIntMax);
- else if (m_SMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
- return ReductionInstDesc(Select, MRK_SIntMax);
- else if (m_SMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
- return ReductionInstDesc(Select, MRK_SIntMin);
- else if (m_OrdFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
- return ReductionInstDesc(Select, MRK_FloatMin);
- else if (m_OrdFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
- return ReductionInstDesc(Select, MRK_FloatMax);
- else if (m_UnordFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
- return ReductionInstDesc(Select, MRK_FloatMin);
- else if (m_UnordFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
- return ReductionInstDesc(Select, MRK_FloatMax);
-
- return ReductionInstDesc(false, I);
-}
-
-LoopVectorizationLegality::ReductionInstDesc
-LoopVectorizationLegality::isReductionInstr(Instruction *I,
- ReductionKind Kind,
- ReductionInstDesc &Prev) {
- bool FP = I->getType()->isFloatingPointTy();
- bool FastMath = FP && I->hasUnsafeAlgebra();
- switch (I->getOpcode()) {
- default:
- return ReductionInstDesc(false, I);
- case Instruction::PHI:
- if (FP && (Kind != RK_FloatMult && Kind != RK_FloatAdd &&
- Kind != RK_FloatMinMax))
- return ReductionInstDesc(false, I);
- return ReductionInstDesc(I, Prev.MinMaxKind);
- case Instruction::Sub:
- case Instruction::Add:
- return ReductionInstDesc(Kind == RK_IntegerAdd, I);
- case Instruction::Mul:
- return ReductionInstDesc(Kind == RK_IntegerMult, I);
- case Instruction::And:
- return ReductionInstDesc(Kind == RK_IntegerAnd, I);
- case Instruction::Or:
- return ReductionInstDesc(Kind == RK_IntegerOr, I);
- case Instruction::Xor:
- return ReductionInstDesc(Kind == RK_IntegerXor, I);
- case Instruction::FMul:
- return ReductionInstDesc(Kind == RK_FloatMult && FastMath, I);
- case Instruction::FSub:
- case Instruction::FAdd:
- return ReductionInstDesc(Kind == RK_FloatAdd && FastMath, I);
- case Instruction::FCmp:
- case Instruction::ICmp:
- case Instruction::Select:
- if (Kind != RK_IntegerMinMax &&
- (!HasFunNoNaNAttr || Kind != RK_FloatMinMax))
- return ReductionInstDesc(false, I);
- return isMinMaxSelectCmpPattern(I, Prev);
}
+ return true;
}
LoopVectorizationLegality::InductionKind
-LoopVectorizationLegality::isInductionVariable(PHINode *Phi) {
- Type *PhiTy = Phi->getType();
- // We only handle integer and pointer inductions variables.
- if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy())
- return IK_NoInduction;
-
- // Check that the PHI is consecutive.
- const SCEV *PhiScev = SE->getSCEV(Phi);
- const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
- if (!AR) {
- DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
- return IK_NoInduction;
- }
- const SCEV *Step = AR->getStepRecurrence(*SE);
-
- // Integer inductions need to have a stride of one.
- if (PhiTy->isIntegerTy()) {
- if (Step->isOne())
- return IK_IntInduction;
- if (Step->isAllOnesValue())
- return IK_ReverseIntInduction;
- return IK_NoInduction;
- }
-
- // Calculate the pointer stride and check if it is consecutive.
- const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
- if (!C)
- return IK_NoInduction;
-
- assert(PhiTy->isPointerTy() && "The PHI must be a pointer");
- Type *PointerElementType = PhiTy->getPointerElementType();
- // The pointer stride cannot be determined if the pointer element type is not
- // sized.
- if (!PointerElementType->isSized())
+LoopVectorizationLegality::isInductionVariable(PHINode *Phi,
+ ConstantInt *&StepValue) {
+ if (!isInductionPHI(Phi, SE, StepValue))
return IK_NoInduction;
- uint64_t Size = DL->getTypeAllocSize(PointerElementType);
- if (C->getValue()->equalsInt(Size))
- return IK_PtrInduction;
- else if (C->getValue()->equalsInt(0 - Size))
- return IK_ReversePtrInduction;
-
- return IK_NoInduction;
+ Type *PhiTy = Phi->getType();
+ // Found an Integer induction variable.
+ if (PhiTy->isIntegerTy())
+ return IK_IntInduction;
+ // Found an Pointer induction variable.
+ return IK_PtrInduction;
}
bool LoopVectorizationLegality::isInductionVariable(const Value *V) {
@@ -5341,11 +3852,7 @@ bool LoopVectorizationLegality::isInductionVariable(const Value *V) {
}
bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) {
- assert(TheLoop->contains(BB) && "Unknown block used");
-
- // Blocks that do not dominate the latch need predication.
- BasicBlock* Latch = TheLoop->getLoopLatch();
- return !DT->dominates(BB, Latch);
+ return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
}
bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB,
@@ -5421,13 +3928,17 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
// Width 1 means no vectorize
VectorizationFactor Factor = { 1U, 0U };
if (OptForSize && Legal->getRuntimePointerCheck()->Need) {
- emitAnalysis(Report() << "runtime pointer checks needed. Enable vectorization of this loop with '#pragma clang loop vectorize(enable)' when compiling with -Os");
+ emitAnalysis(VectorizationReport() <<
+ "runtime pointer checks needed. Enable vectorization of this "
+ "loop with '#pragma clang loop vectorize(enable)' when "
+ "compiling with -Os");
DEBUG(dbgs() << "LV: Aborting. Runtime ptr check is required in Os.\n");
return Factor;
}
- if (!EnableCondStoresVectorization && Legal->NumPredStores) {
- emitAnalysis(Report() << "store that is conditionally executed prevents vectorization");
+ if (!EnableCondStoresVectorization && Legal->getNumPredStores()) {
+ emitAnalysis(VectorizationReport() <<
+ "store that is conditionally executed prevents vectorization");
DEBUG(dbgs() << "LV: No vectorization. There are conditional stores.\n");
return Factor;
}
@@ -5462,7 +3973,9 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
if (OptForSize) {
// If we are unable to calculate the trip count then don't try to vectorize.
if (TC < 2) {
- emitAnalysis(Report() << "unable to calculate the loop count due to complex control flow");
+ emitAnalysis
+ (VectorizationReport() <<
+ "unable to calculate the loop count due to complex control flow");
DEBUG(dbgs() << "LV: Aborting. A tail loop is required in Os.\n");
return Factor;
}
@@ -5476,10 +3989,11 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
// If the trip count that we found modulo the vectorization factor is not
// zero then we require a tail.
if (VF < 2) {
- emitAnalysis(Report() << "cannot optimize for size and vectorize at the "
- "same time. Enable vectorization of this loop "
- "with '#pragma clang loop vectorize(enable)' "
- "when compiling with -Os");
+ emitAnalysis(VectorizationReport() <<
+ "cannot optimize for size and vectorize at the "
+ "same time. Enable vectorization of this loop "
+ "with '#pragma clang loop vectorize(enable)' "
+ "when compiling with -Os");
DEBUG(dbgs() << "LV: Aborting. A tail loop is required in Os.\n");
return Factor;
}
@@ -5532,6 +4046,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
unsigned LoopVectorizationCostModel::getWidestType() {
unsigned MaxWidth = 8;
+ const DataLayout &DL = TheFunction->getParent()->getDataLayout();
// For each block.
for (Loop::block_iterator bb = TheLoop->block_begin(),
@@ -5566,7 +4081,7 @@ unsigned LoopVectorizationCostModel::getWidestType() {
continue;
MaxWidth = std::max(MaxWidth,
- (unsigned)DL->getTypeSizeInBits(T->getScalarType()));
+ (unsigned)DL.getTypeSizeInBits(T->getScalarType()));
}
}
@@ -5645,7 +4160,7 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize,
std::max(1U, (R.MaxLocalUsers - 1)));
// Clamp the unroll factor ranges to reasonable factors.
- unsigned MaxInterleaveSize = TTI.getMaxInterleaveFactor();
+ unsigned MaxInterleaveSize = TTI.getMaxInterleaveFactor(VF);
// Check if the user has overridden the unroll max.
if (VF == 1) {
@@ -5692,8 +4207,10 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize,
// Unroll until store/load ports (estimated by max unroll factor) are
// saturated.
- unsigned StoresUF = UF / (Legal->NumStores ? Legal->NumStores : 1);
- unsigned LoadsUF = UF / (Legal->NumLoads ? Legal->NumLoads : 1);
+ unsigned NumStores = Legal->getNumStores();
+ unsigned NumLoads = Legal->getNumLoads();
+ unsigned StoresUF = UF / (NumStores ? NumStores : 1);
+ unsigned LoadsUF = UF / (NumLoads ? NumLoads : 1);
// If we have a scalar reduction (vector reductions are already dealt with
// by this point), we can increase the critical path length if the loop
@@ -5716,6 +4233,14 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize,
return SmallUF;
}
+ // Unroll if this is a large loop (small loops are already dealt with by this
+ // point) that could benefit from interleaved unrolling.
+ bool HasReductions = (Legal->getReductionVars()->size() > 0);
+ if (TTI.enableAggressiveInterleaving(HasReductions)) {
+ DEBUG(dbgs() << "LV: Unrolling to expose ILP.\n");
+ return UF;
+ }
+
DEBUG(dbgs() << "LV: Not Unrolling.\n");
return 1;
}
@@ -6053,8 +4578,9 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
// Scalarized loads/stores.
int ConsecutiveStride = Legal->isConsecutivePtr(Ptr);
bool Reverse = ConsecutiveStride < 0;
- unsigned ScalarAllocatedSize = DL->getTypeAllocSize(ValTy);
- unsigned VectorElementSize = DL->getTypeStoreSize(VectorTy)/VF;
+ const DataLayout &DL = I->getModule()->getDataLayout();
+ unsigned ScalarAllocatedSize = DL.getTypeAllocSize(ValTy);
+ unsigned VectorElementSize = DL.getTypeStoreSize(VectorTy) / VF;
if (!ConsecutiveStride || ScalarAllocatedSize != VectorElementSize) {
bool IsComplexComputation =
isLikelyComplexAddressComputation(Ptr, Legal, SE, TheLoop);
@@ -6081,7 +4607,11 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
// Wide load/stores.
unsigned Cost = TTI.getAddressComputationCost(VectorTy);
- Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS);
+ if (Legal->isMaskRequired(I))
+ Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy, Alignment,
+ AS);
+ else
+ Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS);
if (Reverse)
Cost += TTI.getShuffleCost(TargetTransformInfo::SK_Reverse,
@@ -6111,14 +4641,12 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
return TTI.getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy);
}
case Instruction::Call: {
+ bool NeedToScalarize;
CallInst *CI = cast<CallInst>(I);
- Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI);
- assert(ID && "Not an intrinsic call!");
- Type *RetTy = ToVectorTy(CI->getType(), VF);
- SmallVector<Type*, 4> Tys;
- for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i)
- Tys.push_back(ToVectorTy(CI->getArgOperand(i)->getType(), VF));
- return TTI.getIntrinsicInstrCost(ID, RetTy, Tys);
+ unsigned CallCost = getVectorCallCost(CI, VF, TTI, TLI, NeedToScalarize);
+ if (getIntrinsicIDForCall(CI, TLI))
+ return std::min(CallCost, getVectorIntrinsicCost(CI, VF, TTI, TLI));
+ return CallCost;
}
default: {
// We are scalarizing the instruction. Return the cost of the scalar
@@ -6145,24 +4673,19 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
}// end of switch.
}
-Type* LoopVectorizationCostModel::ToVectorTy(Type *Scalar, unsigned VF) {
- if (Scalar->isVoidTy() || VF == 1)
- return Scalar;
- return VectorType::get(Scalar, VF);
-}
-
char LoopVectorize::ID = 0;
static const char lv_name[] = "Loop Vectorization";
INITIALIZE_PASS_BEGIN(LoopVectorize, LV_NAME, lv_name, false, false)
-INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfo)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LoopAccessAnalysis)
INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false)
namespace llvm {
@@ -6259,7 +4782,7 @@ void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr,
ConstantInt::get(Cond[Part]->getType(), 1));
CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store");
LoopVectorBody.push_back(CondBlock);
- VectorLp->addBasicBlockToLoop(CondBlock, LI->getBase());
+ VectorLp->addBasicBlockToLoop(CondBlock, *LI);
// Update Builder with newly created basic block.
Builder.SetInsertPoint(InsertPt);
}
@@ -6285,7 +4808,7 @@ void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr,
if (IfPredicateStore) {
BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
LoopVectorBody.push_back(NewIfBlock);
- VectorLp->addBasicBlockToLoop(NewIfBlock, LI->getBase());
+ VectorLp->addBasicBlockToLoop(NewIfBlock, *LI);
Builder.SetInsertPoint(InsertPt);
Instruction *OldBr = IfBlock->getTerminator();
BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
@@ -6310,11 +4833,10 @@ Value *InnerLoopUnroller::getBroadcastInstrs(Value *V) {
return V;
}
-Value *InnerLoopUnroller::getConsecutiveVector(Value* Val, int StartIdx,
- bool Negate) {
+Value *InnerLoopUnroller::getStepVector(Value *Val, int StartIdx, Value *Step) {
// When unrolling and the VF is 1, we only need to add a simple scalar.
Type *ITy = Val->getType();
assert(!ITy->isVectorTy() && "Val must be a scalar");
- Constant *C = ConstantInt::get(ITy, StartIdx, Negate);
- return Builder.CreateAdd(Val, C, "induction");
+ Constant *C = ConstantInt::get(ITy, StartIdx);
+ return Builder.CreateAdd(Val, Builder.CreateMul(C, Step), "induction");
}
diff --git a/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index bd8a4b3..504425e 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -17,9 +17,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Vectorize.h"
#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/Optional.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
@@ -75,6 +75,15 @@ static const unsigned MinVecRegSize = 128;
static const unsigned RecursionMaxDepth = 12;
+// Limit the number of alias checks. The limit is chosen so that
+// it has no negative effect on the llvm benchmarks.
+static const unsigned AliasedCheckLimit = 10;
+
+// Another limit for the alias checks: The maximum distance between load/store
+// instructions where alias checks are done.
+// This limit is useful for very large basic blocks.
+static const unsigned MaxMemDepDistance = 160;
+
/// \brief Predicate for the element types that the SLP vectorizer supports.
///
/// The most important thing to filter here are types which are invalid in LLVM
@@ -278,104 +287,6 @@ static bool CanReuseExtract(ArrayRef<Value *> VL) {
return true;
}
-static void reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
- SmallVectorImpl<Value *> &Left,
- SmallVectorImpl<Value *> &Right) {
-
- SmallVector<Value *, 16> OrigLeft, OrigRight;
-
- bool AllSameOpcodeLeft = true;
- bool AllSameOpcodeRight = true;
- for (unsigned i = 0, e = VL.size(); i != e; ++i) {
- Instruction *I = cast<Instruction>(VL[i]);
- Value *V0 = I->getOperand(0);
- Value *V1 = I->getOperand(1);
-
- OrigLeft.push_back(V0);
- OrigRight.push_back(V1);
-
- Instruction *I0 = dyn_cast<Instruction>(V0);
- Instruction *I1 = dyn_cast<Instruction>(V1);
-
- // Check whether all operands on one side have the same opcode. In this case
- // we want to preserve the original order and not make things worse by
- // reordering.
- AllSameOpcodeLeft = I0;
- AllSameOpcodeRight = I1;
-
- if (i && AllSameOpcodeLeft) {
- if(Instruction *P0 = dyn_cast<Instruction>(OrigLeft[i-1])) {
- if(P0->getOpcode() != I0->getOpcode())
- AllSameOpcodeLeft = false;
- } else
- AllSameOpcodeLeft = false;
- }
- if (i && AllSameOpcodeRight) {
- if(Instruction *P1 = dyn_cast<Instruction>(OrigRight[i-1])) {
- if(P1->getOpcode() != I1->getOpcode())
- AllSameOpcodeRight = false;
- } else
- AllSameOpcodeRight = false;
- }
-
- // Sort two opcodes. In the code below we try to preserve the ability to use
- // broadcast of values instead of individual inserts.
- // vl1 = load
- // vl2 = phi
- // vr1 = load
- // vr2 = vr2
- // = vl1 x vr1
- // = vl2 x vr2
- // If we just sorted according to opcode we would leave the first line in
- // tact but we would swap vl2 with vr2 because opcode(phi) > opcode(load).
- // = vl1 x vr1
- // = vr2 x vl2
- // Because vr2 and vr1 are from the same load we loose the opportunity of a
- // broadcast for the packed right side in the backend: we have [vr1, vl2]
- // instead of [vr1, vr2=vr1].
- if (I0 && I1) {
- if(!i && I0->getOpcode() > I1->getOpcode()) {
- Left.push_back(I1);
- Right.push_back(I0);
- } else if (i && I0->getOpcode() > I1->getOpcode() && Right[i-1] != I1) {
- // Try not to destroy a broad cast for no apparent benefit.
- Left.push_back(I1);
- Right.push_back(I0);
- } else if (i && I0->getOpcode() == I1->getOpcode() && Right[i-1] == I0) {
- // Try preserve broadcasts.
- Left.push_back(I1);
- Right.push_back(I0);
- } else if (i && I0->getOpcode() == I1->getOpcode() && Left[i-1] == I1) {
- // Try preserve broadcasts.
- Left.push_back(I1);
- Right.push_back(I0);
- } else {
- Left.push_back(I0);
- Right.push_back(I1);
- }
- continue;
- }
- // One opcode, put the instruction on the right.
- if (I0) {
- Left.push_back(V1);
- Right.push_back(I0);
- continue;
- }
- Left.push_back(V0);
- Right.push_back(V1);
- }
-
- bool LeftBroadcast = isSplat(Left);
- bool RightBroadcast = isSplat(Right);
-
- // Don't reorder if the operands where good to begin with.
- if (!(LeftBroadcast || RightBroadcast) &&
- (AllSameOpcodeRight || AllSameOpcodeLeft)) {
- Left = OrigLeft;
- Right = OrigRight;
- }
-}
-
/// \returns True if in-tree use also needs extract. This refers to
/// possible scalar operand in vectorized instruction.
static bool InTreeUserNeedToExtract(Value *Scalar, Instruction *UserInst,
@@ -412,6 +323,17 @@ static AliasAnalysis::Location getLocation(Instruction *I, AliasAnalysis *AA) {
return AliasAnalysis::Location();
}
+/// \returns True if the instruction is not a volatile or atomic load/store.
+static bool isSimple(Instruction *I) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(I))
+ return LI->isSimple();
+ if (StoreInst *SI = dyn_cast<StoreInst>(I))
+ return SI->isSimple();
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I))
+ return !MI->isVolatile();
+ return true;
+}
+
/// Bottom Up SLP Vectorizer.
class BoUpSLP {
public:
@@ -420,11 +342,11 @@ public:
typedef SmallPtrSet<Value *, 16> ValueSet;
typedef SmallVector<StoreInst *, 8> StoreList;
- BoUpSLP(Function *Func, ScalarEvolution *Se, const DataLayout *Dl,
- TargetTransformInfo *Tti, TargetLibraryInfo *TLi, AliasAnalysis *Aa,
- LoopInfo *Li, DominatorTree *Dt, AssumptionCache *AC)
+ BoUpSLP(Function *Func, ScalarEvolution *Se, TargetTransformInfo *Tti,
+ TargetLibraryInfo *TLi, AliasAnalysis *Aa, LoopInfo *Li,
+ DominatorTree *Dt, AssumptionCache *AC)
: NumLoadsWantToKeepOrder(0), NumLoadsWantToChangeOrder(0), F(Func),
- SE(Se), DL(Dl), TTI(Tti), TLI(TLi), AA(Aa), LI(Li), DT(Dt),
+ SE(Se), TTI(Tti), TLI(TLi), AA(Aa), LI(Li), DT(Dt),
Builder(Se->getContext()) {
CodeMetrics::collectEphemeralValues(F, AC, EphValues);
}
@@ -461,7 +383,7 @@ public:
}
/// \returns true if the memory operations A and B are consecutive.
- bool isConsecutiveAccess(Value *A, Value *B);
+ bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL);
/// \brief Perform LICM and CSE on the newly generated gather sequences.
void optimizeGatherSequence();
@@ -518,6 +440,16 @@ private:
/// be beneficial even the tree height is tiny.
bool isFullyVectorizableTinyTree();
+ /// \reorder commutative operands in alt shuffle if they result in
+ /// vectorized code.
+ void reorderAltShuffleOperands(ArrayRef<Value *> VL,
+ SmallVectorImpl<Value *> &Left,
+ SmallVectorImpl<Value *> &Right);
+ /// \reorder commutative operands to get better probability of
+ /// generating vectorized code.
+ void reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
+ SmallVectorImpl<Value *> &Left,
+ SmallVectorImpl<Value *> &Right);
struct TreeEntry {
TreeEntry() : Scalars(), VectorizedValue(nullptr),
NeedToGather(0) {}
@@ -594,7 +526,7 @@ private:
}
AliasAnalysis::Location Loc2 = getLocation(Inst2, AA);
bool aliased = true;
- if (Loc1.Ptr && Loc2.Ptr) {
+ if (Loc1.Ptr && Loc2.Ptr && isSimple(Inst1) && isSimple(Inst2)) {
// Do the alias check.
aliased = AA->alias(Loc1, Loc2);
}
@@ -945,7 +877,6 @@ private:
// Analysis and block reference.
Function *F;
ScalarEvolution *SE;
- const DataLayout *DL;
TargetTransformInfo *TTI;
TargetLibraryInfo *TLI;
AliasAnalysis *AA;
@@ -1198,8 +1129,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n");
return;
}
- if (!isConsecutiveAccess(VL[i], VL[i + 1])) {
- if (VL.size() == 2 && isConsecutiveAccess(VL[1], VL[0])) {
+ const DataLayout &DL = F->getParent()->getDataLayout();
+ if (!isConsecutiveAccess(VL[i], VL[i + 1], DL)) {
+ if (VL.size() == 2 && isConsecutiveAccess(VL[1], VL[0], DL)) {
++NumLoadsWantToChangeOrder;
}
BS.cancelScheduling(VL);
@@ -1251,7 +1183,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
case Instruction::ICmp:
case Instruction::FCmp: {
// Check that all of the compares have the same predicate.
- CmpInst::Predicate P0 = dyn_cast<CmpInst>(VL0)->getPredicate();
+ CmpInst::Predicate P0 = cast<CmpInst>(VL0)->getPredicate();
Type *ComparedTy = cast<Instruction>(VL[0])->getOperand(0)->getType();
for (unsigned i = 1, e = VL.size(); i < e; ++i) {
CmpInst *Cmp = cast<CmpInst>(VL[i]);
@@ -1368,9 +1300,10 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
return;
}
case Instruction::Store: {
+ const DataLayout &DL = F->getParent()->getDataLayout();
// Check if the stores are consecutive or of we need to swizzle them.
for (unsigned i = 0, e = VL.size() - 1; i < e; ++i)
- if (!isConsecutiveAccess(VL[i], VL[i + 1])) {
+ if (!isConsecutiveAccess(VL[i], VL[i + 1], DL)) {
BS.cancelScheduling(VL);
newTreeEntry(VL, false);
DEBUG(dbgs() << "SLP: Non-consecutive store.\n");
@@ -1451,6 +1384,16 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
}
newTreeEntry(VL, true);
DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n");
+
+ // Reorder operands if reordering would enable vectorization.
+ if (isa<BinaryOperator>(VL0)) {
+ ValueList Left, Right;
+ reorderAltShuffleOperands(VL, Left, Right);
+ buildTree_rec(Left, Depth + 1);
+ buildTree_rec(Right, Depth + 1);
+ return;
+ }
+
for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
ValueList Operands;
// Prepare the operand vector.
@@ -1774,7 +1717,7 @@ int BoUpSLP::getTreeCost() {
// We only vectorize tiny trees if it is fully vectorizable.
if (VectorizableTree.size() < 3 && !isFullyVectorizableTinyTree()) {
- if (!VectorizableTree.size()) {
+ if (VectorizableTree.empty()) {
assert(!ExternalUses.size() && "We should not have any external users");
}
return INT_MAX;
@@ -1847,7 +1790,7 @@ unsigned BoUpSLP::getAddressSpaceOperand(Value *I) {
return -1;
}
-bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) {
+bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL) {
Value *PtrA = getPointerOperand(A);
Value *PtrB = getPointerOperand(B);
unsigned ASA = getAddressSpaceOperand(A);
@@ -1861,13 +1804,13 @@ bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) {
if (PtrA == PtrB || PtrA->getType() != PtrB->getType())
return false;
- unsigned PtrBitWidth = DL->getPointerSizeInBits(ASA);
+ unsigned PtrBitWidth = DL.getPointerSizeInBits(ASA);
Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
- APInt Size(PtrBitWidth, DL->getTypeStoreSize(Ty));
+ APInt Size(PtrBitWidth, DL.getTypeStoreSize(Ty));
APInt OffsetA(PtrBitWidth, 0), OffsetB(PtrBitWidth, 0);
- PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(*DL, OffsetA);
- PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(*DL, OffsetB);
+ PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
+ PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
APInt OffsetDelta = OffsetB - OffsetA;
@@ -1888,6 +1831,198 @@ bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) {
return X == PtrSCEVB;
}
+// Reorder commutative operations in alternate shuffle if the resulting vectors
+// are consecutive loads. This would allow us to vectorize the tree.
+// If we have something like-
+// load a[0] - load b[0]
+// load b[1] + load a[1]
+// load a[2] - load b[2]
+// load a[3] + load b[3]
+// Reordering the second load b[1] load a[1] would allow us to vectorize this
+// code.
+void BoUpSLP::reorderAltShuffleOperands(ArrayRef<Value *> VL,
+ SmallVectorImpl<Value *> &Left,
+ SmallVectorImpl<Value *> &Right) {
+ const DataLayout &DL = F->getParent()->getDataLayout();
+
+ // Push left and right operands of binary operation into Left and Right
+ for (unsigned i = 0, e = VL.size(); i < e; ++i) {
+ Left.push_back(cast<Instruction>(VL[i])->getOperand(0));
+ Right.push_back(cast<Instruction>(VL[i])->getOperand(1));
+ }
+
+ // Reorder if we have a commutative operation and consecutive access
+ // are on either side of the alternate instructions.
+ for (unsigned j = 0; j < VL.size() - 1; ++j) {
+ if (LoadInst *L = dyn_cast<LoadInst>(Left[j])) {
+ if (LoadInst *L1 = dyn_cast<LoadInst>(Right[j + 1])) {
+ Instruction *VL1 = cast<Instruction>(VL[j]);
+ Instruction *VL2 = cast<Instruction>(VL[j + 1]);
+ if (isConsecutiveAccess(L, L1, DL) && VL1->isCommutative()) {
+ std::swap(Left[j], Right[j]);
+ continue;
+ } else if (isConsecutiveAccess(L, L1, DL) && VL2->isCommutative()) {
+ std::swap(Left[j + 1], Right[j + 1]);
+ continue;
+ }
+ // else unchanged
+ }
+ }
+ if (LoadInst *L = dyn_cast<LoadInst>(Right[j])) {
+ if (LoadInst *L1 = dyn_cast<LoadInst>(Left[j + 1])) {
+ Instruction *VL1 = cast<Instruction>(VL[j]);
+ Instruction *VL2 = cast<Instruction>(VL[j + 1]);
+ if (isConsecutiveAccess(L, L1, DL) && VL1->isCommutative()) {
+ std::swap(Left[j], Right[j]);
+ continue;
+ } else if (isConsecutiveAccess(L, L1, DL) && VL2->isCommutative()) {
+ std::swap(Left[j + 1], Right[j + 1]);
+ continue;
+ }
+ // else unchanged
+ }
+ }
+ }
+}
+
+void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
+ SmallVectorImpl<Value *> &Left,
+ SmallVectorImpl<Value *> &Right) {
+
+ SmallVector<Value *, 16> OrigLeft, OrigRight;
+
+ bool AllSameOpcodeLeft = true;
+ bool AllSameOpcodeRight = true;
+ for (unsigned i = 0, e = VL.size(); i != e; ++i) {
+ Instruction *I = cast<Instruction>(VL[i]);
+ Value *VLeft = I->getOperand(0);
+ Value *VRight = I->getOperand(1);
+
+ OrigLeft.push_back(VLeft);
+ OrigRight.push_back(VRight);
+
+ Instruction *ILeft = dyn_cast<Instruction>(VLeft);
+ Instruction *IRight = dyn_cast<Instruction>(VRight);
+
+ // Check whether all operands on one side have the same opcode. In this case
+ // we want to preserve the original order and not make things worse by
+ // reordering.
+ if (i && AllSameOpcodeLeft && ILeft) {
+ if (Instruction *PLeft = dyn_cast<Instruction>(OrigLeft[i - 1])) {
+ if (PLeft->getOpcode() != ILeft->getOpcode())
+ AllSameOpcodeLeft = false;
+ } else
+ AllSameOpcodeLeft = false;
+ }
+ if (i && AllSameOpcodeRight && IRight) {
+ if (Instruction *PRight = dyn_cast<Instruction>(OrigRight[i - 1])) {
+ if (PRight->getOpcode() != IRight->getOpcode())
+ AllSameOpcodeRight = false;
+ } else
+ AllSameOpcodeRight = false;
+ }
+
+ // Sort two opcodes. In the code below we try to preserve the ability to use
+ // broadcast of values instead of individual inserts.
+ // vl1 = load
+ // vl2 = phi
+ // vr1 = load
+ // vr2 = vr2
+ // = vl1 x vr1
+ // = vl2 x vr2
+ // If we just sorted according to opcode we would leave the first line in
+ // tact but we would swap vl2 with vr2 because opcode(phi) > opcode(load).
+ // = vl1 x vr1
+ // = vr2 x vl2
+ // Because vr2 and vr1 are from the same load we loose the opportunity of a
+ // broadcast for the packed right side in the backend: we have [vr1, vl2]
+ // instead of [vr1, vr2=vr1].
+ if (ILeft && IRight) {
+ if (!i && ILeft->getOpcode() > IRight->getOpcode()) {
+ Left.push_back(IRight);
+ Right.push_back(ILeft);
+ } else if (i && ILeft->getOpcode() > IRight->getOpcode() &&
+ Right[i - 1] != IRight) {
+ // Try not to destroy a broad cast for no apparent benefit.
+ Left.push_back(IRight);
+ Right.push_back(ILeft);
+ } else if (i && ILeft->getOpcode() == IRight->getOpcode() &&
+ Right[i - 1] == ILeft) {
+ // Try preserve broadcasts.
+ Left.push_back(IRight);
+ Right.push_back(ILeft);
+ } else if (i && ILeft->getOpcode() == IRight->getOpcode() &&
+ Left[i - 1] == IRight) {
+ // Try preserve broadcasts.
+ Left.push_back(IRight);
+ Right.push_back(ILeft);
+ } else {
+ Left.push_back(ILeft);
+ Right.push_back(IRight);
+ }
+ continue;
+ }
+ // One opcode, put the instruction on the right.
+ if (ILeft) {
+ Left.push_back(VRight);
+ Right.push_back(ILeft);
+ continue;
+ }
+ Left.push_back(VLeft);
+ Right.push_back(VRight);
+ }
+
+ bool LeftBroadcast = isSplat(Left);
+ bool RightBroadcast = isSplat(Right);
+
+ // If operands end up being broadcast return this operand order.
+ if (LeftBroadcast || RightBroadcast)
+ return;
+
+ // Don't reorder if the operands where good to begin.
+ if (AllSameOpcodeRight || AllSameOpcodeLeft) {
+ Left = OrigLeft;
+ Right = OrigRight;
+ }
+
+ const DataLayout &DL = F->getParent()->getDataLayout();
+
+ // Finally check if we can get longer vectorizable chain by reordering
+ // without breaking the good operand order detected above.
+ // E.g. If we have something like-
+ // load a[0] load b[0]
+ // load b[1] load a[1]
+ // load a[2] load b[2]
+ // load a[3] load b[3]
+ // Reordering the second load b[1] load a[1] would allow us to vectorize
+ // this code and we still retain AllSameOpcode property.
+ // FIXME: This load reordering might break AllSameOpcode in some rare cases
+ // such as-
+ // add a[0],c[0] load b[0]
+ // add a[1],c[2] load b[1]
+ // b[2] load b[2]
+ // add a[3],c[3] load b[3]
+ for (unsigned j = 0; j < VL.size() - 1; ++j) {
+ if (LoadInst *L = dyn_cast<LoadInst>(Left[j])) {
+ if (LoadInst *L1 = dyn_cast<LoadInst>(Right[j + 1])) {
+ if (isConsecutiveAccess(L, L1, DL)) {
+ std::swap(Left[j + 1], Right[j + 1]);
+ continue;
+ }
+ }
+ }
+ if (LoadInst *L = dyn_cast<LoadInst>(Right[j])) {
+ if (LoadInst *L1 = dyn_cast<LoadInst>(Left[j + 1])) {
+ if (isConsecutiveAccess(L, L1, DL)) {
+ std::swap(Left[j + 1], Right[j + 1]);
+ continue;
+ }
+ }
+ }
+ // else unchanged
+ }
+}
+
void BoUpSLP::setInsertPointAfterBundle(ArrayRef<Value *> VL) {
Instruction *VL0 = cast<Instruction>(VL[0]);
BasicBlock::iterator NextInst = VL0;
@@ -1974,6 +2109,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
return Gather(E->Scalars, VecTy);
}
+ const DataLayout &DL = F->getParent()->getDataLayout();
unsigned Opcode = getSameOpcode(E->Scalars);
switch (Opcode) {
@@ -2066,7 +2202,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
if (Value *V = alreadyVectorized(E->Scalars))
return V;
- CmpInst::Predicate P0 = dyn_cast<CmpInst>(VL0)->getPredicate();
+ CmpInst::Predicate P0 = cast<CmpInst>(VL0)->getPredicate();
Value *V;
if (Opcode == Instruction::FCmp)
V = Builder.CreateFCmp(P0, L, R);
@@ -2170,8 +2306,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
unsigned Alignment = LI->getAlignment();
LI = Builder.CreateLoad(VecPtr);
- if (!Alignment)
- Alignment = DL->getABITypeAlignment(ScalarLoadTy);
+ if (!Alignment) {
+ Alignment = DL.getABITypeAlignment(ScalarLoadTy);
+ }
LI->setAlignment(Alignment);
E->VectorizedValue = LI;
++NumVectorInstructions;
@@ -2200,8 +2337,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
ExternalUses.push_back(
ExternalUser(SI->getPointerOperand(), cast<User>(VecPtr), 0));
- if (!Alignment)
- Alignment = DL->getABITypeAlignment(SI->getValueOperand()->getType());
+ if (!Alignment) {
+ Alignment = DL.getABITypeAlignment(SI->getValueOperand()->getType());
+ }
S->setAlignment(Alignment);
E->VectorizedValue = S;
++NumVectorInstructions;
@@ -2227,7 +2365,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
OpVecs.push_back(OpVec);
}
- Value *V = Builder.CreateGEP(Op0, OpVecs);
+ Value *V = Builder.CreateGEP(
+ cast<GetElementPtrInst>(VL0)->getSourceElementType(), Op0, OpVecs);
E->VectorizedValue = V;
++NumVectorInstructions;
@@ -2243,7 +2382,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
Intrinsic::ID IID = Intrinsic::not_intrinsic;
Value *ScalarArg = nullptr;
if (CI && (FI = CI->getCalledFunction())) {
- IID = (Intrinsic::ID) FI->getIntrinsicID();
+ IID = FI->getIntrinsicID();
}
std::vector<Value *> OpVecs;
for (int j = 0, e = CI->getNumArgOperands(); j < e; ++j) {
@@ -2284,10 +2423,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
}
case Instruction::ShuffleVector: {
ValueList LHSVL, RHSVL;
- for (int i = 0, e = E->Scalars.size(); i < e; ++i) {
- LHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0));
- RHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(1));
- }
+ assert(isa<BinaryOperator>(VL0) && "Invalid Shuffle Vector Operand");
+ reorderAltShuffleOperands(E->Scalars, LHSVL, RHSVL);
setInsertPointAfterBundle(E->Scalars);
Value *LHS = vectorizeTree(LHSVL);
@@ -2768,23 +2905,57 @@ void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD,
Instruction *SrcInst = BundleMember->Inst;
AliasAnalysis::Location SrcLoc = getLocation(SrcInst, SLP->AA);
bool SrcMayWrite = BundleMember->Inst->mayWriteToMemory();
+ unsigned numAliased = 0;
+ unsigned DistToSrc = 1;
while (DepDest) {
assert(isInSchedulingRegion(DepDest));
- if (SrcMayWrite || DepDest->Inst->mayWriteToMemory()) {
- if (SLP->isAliased(SrcLoc, SrcInst, DepDest->Inst)) {
- DepDest->MemoryDependencies.push_back(BundleMember);
- BundleMember->Dependencies++;
- ScheduleData *DestBundle = DepDest->FirstInBundle;
- if (!DestBundle->IsScheduled) {
- BundleMember->incrementUnscheduledDeps(1);
- }
- if (!DestBundle->hasValidDependencies()) {
- WorkList.push_back(DestBundle);
- }
+
+ // We have two limits to reduce the complexity:
+ // 1) AliasedCheckLimit: It's a small limit to reduce calls to
+ // SLP->isAliased (which is the expensive part in this loop).
+ // 2) MaxMemDepDistance: It's for very large blocks and it aborts
+ // the whole loop (even if the loop is fast, it's quadratic).
+ // It's important for the loop break condition (see below) to
+ // check this limit even between two read-only instructions.
+ if (DistToSrc >= MaxMemDepDistance ||
+ ((SrcMayWrite || DepDest->Inst->mayWriteToMemory()) &&
+ (numAliased >= AliasedCheckLimit ||
+ SLP->isAliased(SrcLoc, SrcInst, DepDest->Inst)))) {
+
+ // We increment the counter only if the locations are aliased
+ // (instead of counting all alias checks). This gives a better
+ // balance between reduced runtime and accurate dependencies.
+ numAliased++;
+
+ DepDest->MemoryDependencies.push_back(BundleMember);
+ BundleMember->Dependencies++;
+ ScheduleData *DestBundle = DepDest->FirstInBundle;
+ if (!DestBundle->IsScheduled) {
+ BundleMember->incrementUnscheduledDeps(1);
+ }
+ if (!DestBundle->hasValidDependencies()) {
+ WorkList.push_back(DestBundle);
}
}
DepDest = DepDest->NextLoadStore;
+
+ // Example, explaining the loop break condition: Let's assume our
+ // starting instruction is i0 and MaxMemDepDistance = 3.
+ //
+ // +--------v--v--v
+ // i0,i1,i2,i3,i4,i5,i6,i7,i8
+ // +--------^--^--^
+ //
+ // MaxMemDepDistance let us stop alias-checking at i3 and we add
+ // dependencies from i0 to i3,i4,.. (even if they are not aliased).
+ // Previously we already added dependencies from i3 to i6,i7,i8
+ // (because of MaxMemDepDistance). As we added a dependency from
+ // i0 to i3, we have transitive dependencies from i0 to i6,i7,i8
+ // and we can abort this loop at i6.
+ if (DistToSrc >= 2 * MaxMemDepDistance)
+ break;
+ DistToSrc++;
}
}
}
@@ -2888,7 +3059,6 @@ struct SLPVectorizer : public FunctionPass {
}
ScalarEvolution *SE;
- const DataLayout *DL;
TargetTransformInfo *TTI;
TargetLibraryInfo *TLI;
AliasAnalysis *AA;
@@ -2901,12 +3071,11 @@ struct SLPVectorizer : public FunctionPass {
return false;
SE = &getAnalysis<ScalarEvolution>();
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
- TTI = &getAnalysis<TargetTransformInfo>();
- TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
+ TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
+ TLI = TLIP ? &TLIP->getTLI() : nullptr;
AA = &getAnalysis<AliasAnalysis>();
- LI = &getAnalysis<LoopInfo>();
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
@@ -2918,11 +3087,6 @@ struct SLPVectorizer : public FunctionPass {
if (!TTI->getNumberOfRegisters(true))
return false;
- // Must have DataLayout. We can't require it because some tests run w/o
- // triple.
- if (!DL)
- return false;
-
// Don't vectorize when the attribute NoImplicitFloat is used.
if (F.hasFnAttribute(Attribute::NoImplicitFloat))
return false;
@@ -2931,15 +3095,13 @@ struct SLPVectorizer : public FunctionPass {
// Use the bottom up slp vectorizer to construct chains that start with
// store instructions.
- BoUpSLP R(&F, SE, DL, TTI, TLI, AA, LI, DT, AC);
+ BoUpSLP R(&F, SE, TTI, TLI, AA, LI, DT, AC);
// A general note: the vectorizer must use BoUpSLP::eraseInstruction() to
// delete instructions.
// Scan the blocks in the function in post order.
- for (po_iterator<BasicBlock*> it = po_begin(&F.getEntryBlock()),
- e = po_end(&F.getEntryBlock()); it != e; ++it) {
- BasicBlock *BB = *it;
+ for (auto BB : post_order(&F.getEntryBlock())) {
// Vectorize trees that end at stores.
if (unsigned count = collectStores(BB, R)) {
(void)count;
@@ -2964,10 +3126,10 @@ struct SLPVectorizer : public FunctionPass {
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<ScalarEvolution>();
AU.addRequired<AliasAnalysis>();
- AU.addRequired<TargetTransformInfo>();
- AU.addRequired<LoopInfo>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
- AU.addPreserved<LoopInfo>();
+ AU.addPreserved<LoopInfoWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
AU.setPreservesCFG();
}
@@ -3014,15 +3176,11 @@ private:
/// the WeakVH array.
/// Vectorization of part of the VL array may cause later values in the VL array
/// to become invalid. We track when this has happened in the WeakVH array.
-static bool hasValueBeenRAUWed(ArrayRef<Value *> &VL,
- SmallVectorImpl<WeakVH> &VH,
- unsigned SliceBegin,
- unsigned SliceSize) {
- for (unsigned i = SliceBegin; i < SliceBegin + SliceSize; ++i)
- if (VH[i] != VL[i])
- return true;
-
- return false;
+static bool hasValueBeenRAUWed(ArrayRef<Value *> VL, ArrayRef<WeakVH> VH,
+ unsigned SliceBegin, unsigned SliceSize) {
+ VL = VL.slice(SliceBegin, SliceSize);
+ VH = VH.slice(SliceBegin, SliceSize);
+ return !std::equal(VL.begin(), VL.end(), VH.begin());
}
bool SLPVectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain,
@@ -3031,7 +3189,8 @@ bool SLPVectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain,
DEBUG(dbgs() << "SLP: Analyzing a store chain of length " << ChainLen
<< "\n");
Type *StoreTy = cast<StoreInst>(Chain[0])->getValueOperand()->getType();
- unsigned Sz = DL->getTypeSizeInBits(StoreTy);
+ auto &DL = cast<StoreInst>(Chain[0])->getModule()->getDataLayout();
+ unsigned Sz = DL.getTypeSizeInBits(StoreTy);
unsigned VF = MinVecRegSize / Sz;
if (!isPowerOf2_32(Sz) || VF < 2)
@@ -3074,8 +3233,8 @@ bool SLPVectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain,
bool SLPVectorizer::vectorizeStores(ArrayRef<StoreInst *> Stores,
int costThreshold, BoUpSLP &R) {
- SetVector<Value *> Heads, Tails;
- SmallDenseMap<Value *, Value *> ConsecutiveChain;
+ SetVector<StoreInst *> Heads, Tails;
+ SmallDenseMap<StoreInst *, StoreInst *> ConsecutiveChain;
// We may run into multiple chains that merge into a single chain. We mark the
// stores that we vectorized so that we don't visit the same store twice.
@@ -3088,8 +3247,8 @@ bool SLPVectorizer::vectorizeStores(ArrayRef<StoreInst *> Stores,
for (unsigned j = 0; j < e; ++j) {
if (i == j)
continue;
-
- if (R.isConsecutiveAccess(Stores[i], Stores[j])) {
+ const DataLayout &DL = Stores[i]->getModule()->getDataLayout();
+ if (R.isConsecutiveAccess(Stores[i], Stores[j], DL)) {
Tails.insert(Stores[j]);
Heads.insert(Stores[i]);
ConsecutiveChain[Stores[i]] = Stores[j];
@@ -3098,7 +3257,7 @@ bool SLPVectorizer::vectorizeStores(ArrayRef<StoreInst *> Stores,
}
// For stores that start but don't end a link in the chain:
- for (SetVector<Value *>::iterator it = Heads.begin(), e = Heads.end();
+ for (SetVector<StoreInst *>::iterator it = Heads.begin(), e = Heads.end();
it != e; ++it) {
if (Tails.count(*it))
continue;
@@ -3106,7 +3265,7 @@ bool SLPVectorizer::vectorizeStores(ArrayRef<StoreInst *> Stores,
// We found a store instr that starts a chain. Now follow the chain and try
// to vectorize it.
BoUpSLP::ValueList Operands;
- Value *I = *it;
+ StoreInst *I = *it;
// Collect the chain into a list.
while (Tails.count(I) || Heads.count(I)) {
if (VectorizedStores.count(I))
@@ -3131,6 +3290,7 @@ bool SLPVectorizer::vectorizeStores(ArrayRef<StoreInst *> Stores,
unsigned SLPVectorizer::collectStores(BasicBlock *BB, BoUpSLP &R) {
unsigned count = 0;
StoreRefs.clear();
+ const DataLayout &DL = BB->getModule()->getDataLayout();
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
StoreInst *SI = dyn_cast<StoreInst>(it);
if (!SI)
@@ -3176,9 +3336,10 @@ bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
return false;
unsigned Opcode0 = I0->getOpcode();
+ const DataLayout &DL = I0->getModule()->getDataLayout();
Type *Ty0 = I0->getType();
- unsigned Sz = DL->getTypeSizeInBits(Ty0);
+ unsigned Sz = DL.getTypeSizeInBits(Ty0);
unsigned VF = MinVecRegSize / Sz;
for (int i = 0, e = VL.size(); i < e; ++i) {
@@ -3380,8 +3541,7 @@ public:
ReducedValueOpcode(0), ReduxWidth(0), IsPairwiseReduction(false) {}
/// \brief Try to find a reduction tree.
- bool matchAssociativeReduction(PHINode *Phi, BinaryOperator *B,
- const DataLayout *DL) {
+ bool matchAssociativeReduction(PHINode *Phi, BinaryOperator *B) {
assert((!Phi ||
std::find(Phi->op_begin(), Phi->op_end(), B) != Phi->op_end()) &&
"Thi phi needs to use the binary operator");
@@ -3406,9 +3566,10 @@ public:
if (!isValidElementType(Ty))
return false;
+ const DataLayout &DL = B->getModule()->getDataLayout();
ReductionOpcode = B->getOpcode();
ReducedValueOpcode = 0;
- ReduxWidth = MinVecRegSize / DL->getTypeSizeInBits(Ty);
+ ReduxWidth = MinVecRegSize / DL.getTypeSizeInBits(Ty);
ReductionRoot = B;
ReductionPHI = Phi;
@@ -3718,8 +3879,7 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
// Try to match and vectorize a horizontal reduction.
HorizontalReduction HorRdx;
- if (ShouldVectorizeHor &&
- HorRdx.matchAssociativeReduction(P, BI, DL) &&
+ if (ShouldVectorizeHor && HorRdx.matchAssociativeReduction(P, BI) &&
HorRdx.tryToReduce(R, TTI)) {
Changed = true;
it = BB->begin();
@@ -3749,7 +3909,7 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
if (BinaryOperator *BinOp =
dyn_cast<BinaryOperator>(SI->getValueOperand())) {
HorizontalReduction HorRdx;
- if (((HorRdx.matchAssociativeReduction(nullptr, BinOp, DL) &&
+ if (((HorRdx.matchAssociativeReduction(nullptr, BinOp) &&
HorRdx.tryToReduce(R, TTI)) ||
tryToVectorize(BinOp, R))) {
Changed = true;
@@ -3793,6 +3953,7 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
// and the iterator may become invalid value.
it = BB->begin();
e = BB->end();
+ break;
}
}
}
@@ -3849,7 +4010,7 @@ char SLPVectorizer::ID = 0;
static const char lv_name[] = "SLP Vectorizer";
INITIALIZE_PASS_BEGIN(SLPVectorizer, SV_NAME, lv_name, false, false)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
-INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
diff --git a/contrib/llvm/lib/Transforms/Vectorize/Vectorize.cpp b/contrib/llvm/lib/Transforms/Vectorize/Vectorize.cpp
index d459bcf..6e002fd 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/Vectorize.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/Vectorize.cpp
@@ -19,7 +19,7 @@
#include "llvm/Analysis/Passes.h"
#include "llvm/IR/Verifier.h"
#include "llvm/InitializePasses.h"
-#include "llvm/PassManager.h"
+#include "llvm/IR/LegacyPassManager.h"
using namespace llvm;
OpenPOWER on IntegriCloud